Skip to content

Commit

Permalink
add pseudopooling option to dada2
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer committed Jan 27, 2020
1 parent af0520b commit 536833b
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 6 deletions.
2 changes: 1 addition & 1 deletion amptk/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION = (1, 4, 1)
VERSION = (1, 4, 2)

__version__ = '.'.join(map(str, VERSION))
18 changes: 13 additions & 5 deletions amptk/dada2.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def main(args):
parser.add_argument('--chimera_method', default='consensus', choices=['consensus', 'pooled', 'per-sample'], help='bimera removal method')
parser.add_argument('--uchime_ref', help='Run UCHIME REF [ITS,16S,LSU,COI,custom]')
parser.add_argument('--pool', action='store_true', help='Pool all sequences together for DADA2')
parser.add_argument('--pseudopool', action='store_true', help='Use DADA2 pseudopooling')
parser.add_argument('--debug', action='store_true', help='Keep all intermediate files')
parser.add_argument('-u','--usearch', dest="usearch", default='usearch9', help='USEARCH9 EXE')
parser.add_argument('--cpus', type=int, help="Number of CPUs. Default: auto")
Expand Down Expand Up @@ -172,16 +173,23 @@ def main(args):
os.remove(os.path.join(filtfolder, y))

#now run DADA2 on filtered folder
amptklib.log.info("Running DADA2 pipeline")
dada2log = base+'.dada2.Rscript.log'
dada2out = base+'.dada2.csv'
#check pooling vs notpooled, default is not pooled.
#check pooling pseudopooling or notpooled, default is not pooled.
if args.pool:
POOL = 'TRUE'
amptklib.log.info("Running DADA2 pipeline using pooling of samples")
elif args.pseudopool:
POOL = 'PSEUDO'
amptklib.log.info("Running DADA2 pipeline using pseudopooling of samples")
else:
POOL = 'FALSE'
amptklib.log.info("Running DADA2 pipeline on each sample")
dada2log = base+'.dada2.Rscript.log'
dada2out = base+'.dada2.csv'

dada2cmd = ['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES, args.chimera_method]
amptklib.log.debug(' '.join(dada2cmd))
with open(dada2log, 'w') as logfile:
subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES, args.chimera_method], stdout = logfile, stderr = logfile)
subprocess.call(dada2cmd, stdout = logfile, stderr = logfile)

#check for results
if not os.path.isfile(dada2out):
Expand Down
8 changes: 8 additions & 0 deletions amptk/dada2_pipeline_nofilt.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,28 @@ names(derepSeqs) <- sample.names
#Sample inference
print("-------------")
print("Sample inference")
print(args[4])
if (args[3] == 'illumina') {
if (args[4] == 'TRUE') {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=TRUE, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
} else if (args[4] == 'PSEUDO') {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool="pseudo", BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
} else {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
}
} else if (args[3] == 'ion') {
if (args[4] == 'TRUE') {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=TRUE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
} else if (args[4] == 'PSEUDO') {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool="pseudo", HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
} else {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
}
}

features <- attributes(dadaSeqs)
print(features)

#make sequence table
seqtab <- makeSequenceTable(dadaSeqs, orderBy = "abundance")

Expand Down
1 change: 1 addition & 0 deletions scripts/amptk
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ Arguments: -i, --fastq Input FASTQ file (Required)
--platform Sequencing platform. [ion, illumina, 454]. Default: ion
--chimera_method DADA2 de novo chimera method. Default: consensus [consensus,pooled,per-sample]
--pool Pool all samples together for DADA2. Default: off
--pseudopool Pseudopooling samples for DADA2. Default: off
--uchime_ref Run Ref Chimera filtering. Default: off [ITS, LSU, COI, 16S, custom path]
--cpus Number of CPUs to use. Default: all
--debug Keep intermediate files.
Expand Down

0 comments on commit 536833b

Please sign in to comment.