diff --git a/amptk/__version__.py b/amptk/__version__.py index 94c0ade..5d3d4d9 100644 --- a/amptk/__version__.py +++ b/amptk/__version__.py @@ -1,3 +1,3 @@ -VERSION = (1, 4, 0) +VERSION = (1, 4, 1) __version__ = '.'.join(map(str, VERSION)) \ No newline at end of file diff --git a/amptk/amptklib.py b/amptk/amptklib.py index ab8ccc4..7e726e2 100644 --- a/amptk/amptklib.py +++ b/amptk/amptklib.py @@ -456,6 +456,12 @@ def runSubprocess4(cmd, logfile, logfile2): if stderr[0] != None: logfile.debug(stderr) +def runSubprocess5(cmd): + #function where no logfile and stdout/stderr to fnull + FNULL = open(os.devnull, 'w') + #print(' '.join(cmd)) + subprocess.call(cmd, stdout=FNULL, stderr=FNULL) + def getSize(filename): st = os.stat(filename) return st.st_size diff --git a/amptk/dada2.py b/amptk/dada2.py index 4d542ba..5ba3f84 100755 --- a/amptk/dada2.py +++ b/amptk/dada2.py @@ -71,6 +71,7 @@ def main(args): parser.add_argument('-e','--maxee', default='1.0', help='MaxEE quality filtering') parser.add_argument('-p','--pct_otu', default='97', help="Biological OTU Clustering Percent") parser.add_argument('--platform', default='ion', choices=['ion', 'illumina', '454'], help='Sequencing platform') + parser.add_argument('--chimera_method', default='consensus', choices=['consensus', 'pooled', 'per-sample'], help='bimera removal method') parser.add_argument('--uchime_ref', help='Run UCHIME REF [ITS,16S,LSU,COI,custom]') parser.add_argument('--pool', action='store_true', help='Pool all sequences together for DADA2') parser.add_argument('--debug', action='store_true', help='Keep all intermediate files') @@ -180,7 +181,7 @@ def main(args): else: POOL = 'FALSE' with open(dada2log, 'w') as logfile: - subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES], stdout = logfile, stderr = logfile) + subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES, args.chimera_method], stdout = logfile, stderr = logfile) #check for results if not os.path.isfile(dada2out): diff --git a/amptk/dada2_pipeline_nofilt.R b/amptk/dada2_pipeline_nofilt.R index 0a04ece..9a42379 100755 --- a/amptk/dada2_pipeline_nofilt.R +++ b/amptk/dada2_pipeline_nofilt.R @@ -57,7 +57,7 @@ if (args[3] == 'illumina') { if (args[4] == 'TRUE') { dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=TRUE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES) } else { - dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=FALSE, multithread=CORES) + dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES) } } @@ -65,7 +65,7 @@ if (args[3] == 'illumina') { seqtab <- makeSequenceTable(dadaSeqs, orderBy = "abundance") #remove chimeras -seqtab.nochim <- removeBimeraDenovo(seqtab, verbose=TRUE) +seqtab.nochim <- removeBimeraDenovo(seqtab, method=args[6], verbose=TRUE, multithread=CORES) #transpose transTable <- t(seqtab.nochim) diff --git a/amptk/install.py b/amptk/install.py index aa51cbd..eec4cc2 100755 --- a/amptk/install.py +++ b/amptk/install.py @@ -35,7 +35,8 @@ def main(args): parentdir = os.path.join(os.path.dirname(amptklib.__file__)) for x in args.input: - if os.path.isfile(os.path.join(parentdir, 'DB', x+'.udb')): + udbfile = os.path.join(parentdir, 'DB', x+'.udb') + if os.path.isfile(udbfile): if not args.force: print("A formated database was found, to overwrite use '--force'. You can add more custom databases by using the `amptk database` command.") sys.exit(1) @@ -54,7 +55,11 @@ def main(args): shutil.move(os.path.join(x,file), os.path.join(parentdir, 'DB', file)) shutil.rmtree(x) os.remove(x+'.amptk.tar.gz') - print("%s taxonomy database installed" % x) + print('Extracting FASTA files for {:}'.format(x)) + extracted = os.path.join(parentdir, 'DB', x+'.extracted.fa') + cmd = ['vsearch', '--udb2fasta', udbfile, '--output', extracted] + amptklib.runSubprocess5(cmd) + print("{:} taxonomy database installed to {:}".format(x, os.path.join(parentdir, 'DB'))) if __name__ == "__main__": main() \ No newline at end of file diff --git a/scripts/amptk b/scripts/amptk index 29a61e1..f648f93 100755 --- a/scripts/amptk +++ b/scripts/amptk @@ -301,6 +301,7 @@ Arguments: -i, --fastq Input FASTQ file (Required) -e, --maxee Expected error quality trimming. Default: 1.0 -p, --pct_otu OTU Clustering Radius (percent). Default: 97 --platform Sequencing platform. [ion, illumina, 454]. Default: ion + --chimera_method DADA2 de novo chimera method. Default: consensus [consensus,pooled,per-sample] --pool Pool all samples together for DADA2. Default: off --uchime_ref Run Ref Chimera filtering. Default: off [ITS, LSU, COI, 16S, custom path] --cpus Number of CPUs to use. Default: all