Skip to content

Commit

Permalink
add script and process to extract unmerged seqs from the rds file
Browse files Browse the repository at this point in the history
- mimics logic from old NGS16S pipeline script as described in #17
  • Loading branch information
Hoogestraat committed May 18, 2022
1 parent 018dae0 commit cc27b5b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
28 changes: 28 additions & 0 deletions bin/get_dropped_chim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env Rscript

suppressPackageStartupMessages(library(argparse, quietly = TRUE))
suppressPackageStartupMessages(library(tidyr, quietly = TRUE))
suppressPackageStartupMessages(library(readr, quietly = TRUE))
suppressPackageStartupMessages(library(dplyr, quietly = TRUE))
n
main <- function(arguments){
parser <- ArgumentParser(
description="Write forward and reverse unmerged denoised, dereplicated reads")
parser$add_argument('rdata', help='RDS files containing dada2 output')
parser$add_argument(
'-o', '--outfile', default='chim_dropped.csv',
help='output csv file with weight and sequence of svs identified as chimeras')

args <- parser$parse_args(arguments)

obj <- readRDS(args$rdata)
seqtab <- as.data.frame(as.table(obj$seqtab))
seqtab.nochim <- as.data.frame(as.table(obj$seqtab.nochim))

seqtab %>% anti_join(seqtab.nochim, by=c('Var2')) %>%
arrange(-Freq) %>% rename(sequence=Var2) %>%
rename(weight=Freq) %>% select(weight, sequence) %>%
write_csv(args$outfile)
}

main(commandArgs(trailingOnly=TRUE))
22 changes: 22 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ process dada_get_unmerged {

output:
file("unmerged_*.fasta") into dada_unmerged
val sampleid into dada_unmerged_samples
file dada_rds into dada_unmerged_rds

publishDir "${params.output}/dada/${sampleid}/", overwrite: true, mode: 'copy'

Expand All @@ -317,6 +319,26 @@ process dada_get_unmerged {
"""
}

process dada_get_dropped_chimeras {

label 'med_cpu_mem'

input:
val sampleid from dada_unmerged_samples
file("dada_params.json") from maybe_local(params.dada_params)
file dada_rds from dada_unmerged_rds

output:
file("chim_dropped.csv") into dada_chim_dropped

publishDir "${params.output}/dada/${sampleid}/", overwrite: true, mode: 'copy'

"""
get_dropped_chim.R ${dada_rds} \
--outfile chim_dropped.csv
"""
}


process combined_overlaps {

Expand Down

0 comments on commit cc27b5b

Please sign in to comment.