Skip to content

Commit

Permalink
bump to v1.4.1; fix install issue, fix dada2 for ion, add chimera fil…
Browse files Browse the repository at this point in the history
…ter options to dada2
  • Loading branch information
Jon Palmer committed Aug 1, 2019
1 parent b8eb31d commit c901ef2
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 6 deletions.
2 changes: 1 addition & 1 deletion amptk/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
VERSION = (1, 4, 0)
VERSION = (1, 4, 1)

__version__ = '.'.join(map(str, VERSION))
6 changes: 6 additions & 0 deletions amptk/amptklib.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,12 @@ def runSubprocess4(cmd, logfile, logfile2):
if stderr[0] != None:
logfile.debug(stderr)

def runSubprocess5(cmd):
#function where no logfile and stdout/stderr to fnull
FNULL = open(os.devnull, 'w')
#print(' '.join(cmd))
subprocess.call(cmd, stdout=FNULL, stderr=FNULL)

def getSize(filename):
st = os.stat(filename)
return st.st_size
Expand Down
3 changes: 2 additions & 1 deletion amptk/dada2.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def main(args):
parser.add_argument('-e','--maxee', default='1.0', help='MaxEE quality filtering')
parser.add_argument('-p','--pct_otu', default='97', help="Biological OTU Clustering Percent")
parser.add_argument('--platform', default='ion', choices=['ion', 'illumina', '454'], help='Sequencing platform')
parser.add_argument('--chimera_method', default='consensus', choices=['consensus', 'pooled', 'per-sample'], help='bimera removal method')
parser.add_argument('--uchime_ref', help='Run UCHIME REF [ITS,16S,LSU,COI,custom]')
parser.add_argument('--pool', action='store_true', help='Pool all sequences together for DADA2')
parser.add_argument('--debug', action='store_true', help='Keep all intermediate files')
Expand Down Expand Up @@ -180,7 +181,7 @@ def main(args):
else:
POOL = 'FALSE'
with open(dada2log, 'w') as logfile:
subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES], stdout = logfile, stderr = logfile)
subprocess.call(['Rscript', '--vanilla', dada2script, filtfolder, dada2out, args.platform, POOL, CORES, args.chimera_method], stdout = logfile, stderr = logfile)

#check for results
if not os.path.isfile(dada2out):
Expand Down
4 changes: 2 additions & 2 deletions amptk/dada2_pipeline_nofilt.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ if (args[3] == 'illumina') {
if (args[4] == 'TRUE') {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=TRUE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
} else {
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=FALSE, multithread=CORES)
dadaSeqs <- dada(derepSeqs, err=NULL, selfConsist=TRUE, pool=FALSE, HOMOPOLYMER_GAP_PENALTY=-1, BAND_SIZE=32, USE_QUALS=TRUE, multithread=CORES)
}
}

#make sequence table
seqtab <- makeSequenceTable(dadaSeqs, orderBy = "abundance")

#remove chimeras
seqtab.nochim <- removeBimeraDenovo(seqtab, verbose=TRUE)
seqtab.nochim <- removeBimeraDenovo(seqtab, method=args[6], verbose=TRUE, multithread=CORES)

#transpose
transTable <- t(seqtab.nochim)
Expand Down
9 changes: 7 additions & 2 deletions amptk/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def main(args):
parentdir = os.path.join(os.path.dirname(amptklib.__file__))

for x in args.input:
if os.path.isfile(os.path.join(parentdir, 'DB', x+'.udb')):
udbfile = os.path.join(parentdir, 'DB', x+'.udb')
if os.path.isfile(udbfile):
if not args.force:
print("A formated database was found, to overwrite use '--force'. You can add more custom databases by using the `amptk database` command.")
sys.exit(1)
Expand All @@ -54,7 +55,11 @@ def main(args):
shutil.move(os.path.join(x,file), os.path.join(parentdir, 'DB', file))
shutil.rmtree(x)
os.remove(x+'.amptk.tar.gz')
print("%s taxonomy database installed" % x)
print('Extracting FASTA files for {:}'.format(x))
extracted = os.path.join(parentdir, 'DB', x+'.extracted.fa')
cmd = ['vsearch', '--udb2fasta', udbfile, '--output', extracted]
amptklib.runSubprocess5(cmd)
print("{:} taxonomy database installed to {:}".format(x, os.path.join(parentdir, 'DB')))

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions scripts/amptk
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ Arguments: -i, --fastq Input FASTQ file (Required)
-e, --maxee Expected error quality trimming. Default: 1.0
-p, --pct_otu OTU Clustering Radius (percent). Default: 97
--platform Sequencing platform. [ion, illumina, 454]. Default: ion
--chimera_method DADA2 de novo chimera method. Default: consensus [consensus,pooled,per-sample]
--pool Pool all samples together for DADA2. Default: off
--uchime_ref Run Ref Chimera filtering. Default: off [ITS, LSU, COI, 16S, custom path]
--cpus Number of CPUs to use. Default: all
Expand Down

0 comments on commit c901ef2

Please sign in to comment.