Skip to content

Commit

Permalink
Merge pull request #37 from microbiomedata/re_iding
Browse files Browse the repository at this point in the history
Re-iding tool for Napa compliance
  • Loading branch information
mbthornton-lbl authored Jan 16, 2024
2 parents a73554a + e7596c7 commit 6bc6154
Show file tree
Hide file tree
Showing 37 changed files with 86,800 additions and 605 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ htmlcov/
.coverage
attic
.idea/

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@

test:
PYTHONPATH=$(shell pwd) pytest --cov-report term --cov=nmdc_automation ./tests
poetry run pytest --cov-report term --cov=nmdc_automation ./tests
35 changes: 35 additions & 0 deletions configs/napa_config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[cromwell]
cromwell_url = "http://localhost:8088/api/workflows/v1"
cromwell_api = "http://localhost:9999"

[directories]
stage_dir = "/path/to/stage/dir"
template_dir = "/path/to/template/dir"
data_dir = "/tmp"
raw_dir = "/path/to/raw/data/files"

[site]
resource = "Resource Name"
site = "Processing Site"

[nmdc]
url_root = "https://data.microbiomedata.org/data/"
api_url = "https://api.microbiomedata.org/"

[napa]
base_url = "https://api-napa.microbiomedata.org/"
username = "mbthornton"
password = "H8jkYxc6rwrzA_k7g_fM"


[state]
watch_state = "State File"
agent_state = "/tmp/agent.state"
activity_id_state = "/Path/to/activity_id_state"

[workflows]
workflows_config = "./configs/workflows.yaml"

[credentials]
client_id = "xxxxxx"
client_secret = "xxxxxxxx"
187 changes: 187 additions & 0 deletions configs/re_iding_worklfows.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
Workflows:
- Name: Reads QC
Type: nmdc:ReadQcAnalysisActivity
Enabled: True
Git_repo: https://github.com/microbiomedata/ReadsQC
Version: 1.0.2
WDL: rqcfilter.wdl
Collection: read_qc_analysis_activity_set
ActivityRange: ReadQcAnalysisActivity
Filter Input Objects:
- Metagenome Raw Reads
Predecessors:
- Sequencing
- Sequencing Interleaved
Input_prefix: nmdc_rqcfilter
Inputs:
input_files: do:Metagenome Raw Reads
proj: "{activity_id}"
Activity:
name: "Read QC Activity for {id}"
input_read_bases: "{outputs.stats.input_read_bases}"
input_read_count: "{outputs.stats.input_read_count}"
output_read_bases: "{outputs.stats.output_read_bases}"
output_read_count: "{outputs.stats.output_read_count}"
type: nmdc:ReadQcAnalysisActivity
Outputs:
- output: filtered_final
name: Reads QC result fastq (clean data)
suffix: "_filtered.fastq.gz"
data_object_type: Filtered Sequencing Reads
description: "Reads QC for {id}"
- output: filtered_stats_final
name: Reads QC summary statistics
suffix: "_filterStats.txt"
data_object_type: QC Statistics
description: "Reads QC summary for {id}"
- output: rqc_info
name: File containing read filtering information
suffix: "_readsQC.info"
data_object_type: Read Filtering Info File
description: "Read filtering info for {id}"

- Name: Metagenome Assembly
Type: nmdc:MetagenomeAssembly
Enabled: True
Git_repo: https://github.com/microbiomedata/metaAssembly
Version: 1.0.2
WDL: jgi_assembly.wdl
Collection: metagenome_assembly_set
ActivityRange: MetagenomeAssembly
Predecessors:
- Reads QC
- Reads QC Interleave
Input_prefix: jgi_metaASM
Inputs:
input_file: do:Filtered Sequencing Reads
rename_contig_prefix: "{activity_id}"
proj: "{activity_id}"
Activity:
name: "Metagenome Assembly Activity for {id}"
type: nmdc:MetagenomeAssembly
asm_score: "{outputs.stats.asm_score}"
contig_bp: "{outputs.stats.contig_bp}"
contigs: "{outputs.stats.contigs}"
ctg_l50: "{outputs.stats.ctg_l50}"
ctg_l90: "{outputs.stats.ctg_l90}"
ctg_logsum: "{outputs.stats.ctg_logsum}"
ctg_max: "{outputs.stats.ctg_max}"
ctg_n50: "{outputs.stats.ctg_n50}"
ctg_n90: "{outputs.stats.ctg_n90}"
ctg_powsum: "{outputs.stats.ctg_powsum}"
gap_pct: "{outputs.stats.gap_pct}"
gc_avg: "{outputs.stats.gc_avg}"
gc_std: "{outputs.stats.gc_std}"
scaf_bp: "{outputs.stats.scaf_bp}"
scaf_l50: "{outputs.stats.scaf_l50}"
scaf_l90: "{outputs.stats.scaf_l90}"
scaf_l_gt50k: "{outputs.stats.scaf_l_gt50k}"
scaf_logsum: "{outputs.stats.scaf_logsum}"
scaf_max: "{outputs.stats.scaf_max}"
scaf_n50: "{outputs.stats.scaf_n50}"
scaf_n90: "{outputs.stats.scaf_n90}"
scaf_n_gt50k: "{outputs.stats.scaf_n_gt50k}"
scaf_pct_gt50k: "{outputs.stats.scaf_pct_gt50k}"
scaf_powsum: "{outputs.stats.scaf_powsum}"
scaffolds: "{outputs.stats.scaffolds}"
Outputs:
- output: contig
name: Final assembly contigs fasta
suffix: "_contigs.fna"
data_object_type: Assembly Contigs
description: "Assembly contigs for {id}"
- output: scaffold
name: Final assembly scaffolds fasta
suffix: "_scaffolds.fna"
data_object_type: Assembly Scaffolds
description: "Assembly scaffolds for {id}"
- output: covstats
name: Assembled contigs coverage information
suffix: "_covstats.txt"
data_object_type: Assembly Coverage Stats
description: "Coverage Stats for {id}"
- output: agp
name: An AGP format file that describes the assembly
suffix: "_assembly.agp"
data_object_type: Assembly AGP
description: "AGP for {id}"
- output: bam
name: Sorted bam file of reads mapping back to the final assembly
suffix: "_pairedMapped_sorted.bam"
data_object_type: Assembly Coverage BAM
description: "Sorted Bam for {id}"
- output: asminfo
name: File containing assembly info
suffix: "_metaAsm.info"
data_object_type: Assembly Info File
description: "Assembly info for {id}"

- Name: Readbased Analysis
Type: nmdc:ReadBasedTaxonomyAnalysisActivity
Enabled: True
Git_repo: https://github.com/microbiomedata/ReadbasedAnalysis
Version: v1.0.2
WDL: ReadbasedAnalysis.wdl
Collection: read_based_taxonomy_analysis_activity_set
ActivityRange: ReadBasedTaxonomyAnalysisActivity
Predecessors:
- Reads QC
- Reads QC Interleave
Input_prefix: ReadbasedAnalysis
Inputs:
input_file: do:Filtered Sequencing Reads
proj: "{activity_id}"
Activity:
name: "Readbased Taxonomy Analysis Activity for {id}"
type: nmdc:ReadBasedTaxonomyAnalysisActivity
Outputs:
- output: final_gottcha2_report_tsv
data_object_type: GOTTCHA2 Classification Report
description: GOTTCHA2 Classification for {id}
name: GOTTCHA2 classification report file
suffix: _gottcha2_report.tsv
- output: final_gottcha2_full_tsv
data_object_type: GOTTCHA2 Report Full
description: GOTTCHA2 Full Report for {id}
name: GOTTCHA2 report file
suffix: _gottcha2_full_tsv
- output: final_gottcha2_krona_html
data_object_type: GOTTCHA2 Krona Plot
description: GOTTCHA2 Krona for {id}
name: GOTTCHA2 krona plot HTML file
suffix: _gottcha2_krona.html
- output: final_centrifuge_classification_tsv
data_object_type: Centrifuge Taxonomic Classification
description: Centrifuge Classification for {id}
name: Centrifuge output read classification file
suffix: _centrifuge_classification.tsv
- output: final_centrifuge_report_tsv
data_object_type: Centrifuge Classification Report
description: Centrifuge Report for {id}
name: Centrifuge output report file
suffix: _centrifuge_report.tsv
- output: final_centrifuge_krona_html
data_object_type: Centrifuge Krona Plot
description: Centrifuge Krona for {id}
name: Centrifug krona plot HTML file
suffix: _centrifuge_krona.html
- output: final_kraken2_classification_tsv
data_object_type: Kraken2 Taxonomic Classification
description: Kraken2 Classification for {id}
name: Kraken2 output read classification file
suffix: _kraken2_classification.tsv
- output: final_kraken2_report_tsv
data_object_type: Kraken2 Classification Report
description: Kraken2 Report for {id}
name: Kraken2 output report file
suffix: _kraken2_report.tsv
- output: final_kraken2_krona_html
data_object_type: Kraken2 Krona Plot
description: Kraken2 Krona for {id}
name: Kraken2 Krona plot HTML file
suffix: _kraken2_krona.html
- output: info_file
data_object_type: Read Based Analysis Info File
description: Read based analysis info for {id}
name: File containing reads based analysis information
suffix: profiler.info
2 changes: 1 addition & 1 deletion nmdc_automation/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .nmdcapi import NmdcRuntimeApi
from .nmdcapi import NmdcRuntimeApi, NmdcRuntimeUserApi
Loading

0 comments on commit 6bc6154

Please sign in to comment.