bump to v1.4.1; fix install issue, fix dada2 for ion, add chimera fil…

…ter options to dada2
nextgenusfs · Aug 1, 2019 · c901ef2 · c901ef2
1 parent b8eb31d
commit c901ef2
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 6 deletions.
diff --git a/amptk/__version__.py b/amptk/__version__.py
@@ -1,3 +1,3 @@
-VERSION = (1, 4, 0)
+VERSION = (1, 4, 1)
 
 __version__ = '.'.join(map(str, VERSION))
diff --git a/amptk/amptklib.py b/amptk/amptklib.py
@@ -456,6 +456,12 @@ def runSubprocess4(cmd, logfile, logfile2):
         if stderr[0] != None:
             logfile.debug(stderr)
 
+def runSubprocess5(cmd):
+    #function where no logfile and stdout/stderr to fnull
+    FNULL = open(os.devnull, 'w')
+    #print(' '.join(cmd))
+    subprocess.call(cmd, stdout=FNULL, stderr=FNULL)
+
 def getSize(filename):
     st = os.stat(filename)
     return st.st_size

diff --git a/amptk/dada2.py b/amptk/dada2.py
@@ -71,6 +71,7 @@ def main(args):
 	parser.add_argument('-e','--maxee', default='1.0', help='MaxEE quality filtering')
 	parser.add_argument('-p','--pct_otu', default='97', help="Biological OTU Clustering Percent")
 	parser.add_argument('--platform', default='ion', choices=['ion', 'illumina', '454'], help='Sequencing platform')
+	parser.add_argument('--chimera_method', default='consensus', choices=['consensus', 'pooled', 'per-sample'], help='bimera removal method')
 	parser.add_argument('--uchime_ref', help='Run UCHIME REF [ITS,16S,LSU,COI,custom]')
 	parser.add_argument('--pool', action='store_true', help='Pool all sequences together for DADA2')
 	parser.add_argument('--debug', action='store_true', help='Keep all intermediate files')
@@ -180,7 +181,7 @@ def main(args):
 	else:
 		POOL = 'FALSE'
 	with open(dada2log, 'w') as logfile:
-		subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES], stdout = logfile, stderr = logfile)
+		subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES, args.chimera_method], stdout = logfile, stderr = logfile)
 
 	#check for results
 	if not os.path.isfile(dada2out):

diff --git a/amptk/dada2_pipeline_nofilt.R b/amptk/dada2_pipeline_nofilt.R
@@ -57,15 +57,15 @@ if (args[3] == 'illumina') {
 	if (args[4] == 'TRUE') {
 		dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=TRUE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
 	} else {
-		dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=FALSE, multithread=CORES)
+		dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
 	}
 }
 
 #make sequence table
 seqtab <- makeSequenceTable(dadaSeqs, orderBy = "abundance")
 
 #remove chimeras
-seqtab.nochim <- removeBimeraDenovo(seqtab, verbose=TRUE)
+seqtab.nochim <- removeBimeraDenovo(seqtab, method=args[6], verbose=TRUE, multithread=CORES)
 
 #transpose
 transTable <- t(seqtab.nochim)

diff --git a/amptk/install.py b/amptk/install.py
@@ -35,7 +35,8 @@ def main(args):
 	parentdir = os.path.join(os.path.dirname(amptklib.__file__))
 
 	for x in args.input:
-		if os.path.isfile(os.path.join(parentdir, 'DB', x+'.udb')):
+		udbfile = os.path.join(parentdir, 'DB', x+'.udb')
+		if os.path.isfile(udbfile):
 			if not args.force:
 				print("A formated database was found, to overwrite use '--force'. You can add more custom databases by using the `amptk database` command.")
 				sys.exit(1)
@@ -54,7 +55,11 @@ def main(args):
 			shutil.move(os.path.join(x,file), os.path.join(parentdir, 'DB', file))
 		shutil.rmtree(x)
 		os.remove(x+'.amptk.tar.gz')
-		print("%s taxonomy database installed" % x)
+		print('Extracting FASTA files for {:}'.format(x))
+		extracted = os.path.join(parentdir, 'DB', x+'.extracted.fa')
+		cmd = ['vsearch', '--udb2fasta', udbfile, '--output', extracted]
+		amptklib.runSubprocess5(cmd)
+		print("{:} taxonomy database installed to {:}".format(x, os.path.join(parentdir, 'DB')))
 
 if __name__ == "__main__":
 	main()
diff --git a/scripts/amptk b/scripts/amptk
@@ -301,6 +301,7 @@ Arguments:   -i, --fastq         Input FASTQ file (Required)
              -e, --maxee         Expected error quality trimming. Default: 1.0
              -p, --pct_otu       OTU Clustering Radius (percent). Default: 97
              --platform          Sequencing platform. [ion, illumina, 454]. Default: ion
+             --chimera_method    DADA2 de novo chimera method. Default: consensus [consensus,pooled,per-sample]
              --pool              Pool all samples together for DADA2. Default: off
              --uchime_ref        Run Ref Chimera filtering. Default: off [ITS, LSU, COI, 16S, custom path]
              --cpus              Number of CPUs to use. Default: all