Skip to content

Commit

Permalink
bug fix for -t in remove and select, support degenerate nucleotides
Browse files Browse the repository at this point in the history
  • Loading branch information
Jon Palmer authored and Jon Palmer committed Aug 1, 2017
1 parent b8d2581 commit 7f0af85
Show file tree
Hide file tree
Showing 12 changed files with 202 additions and 166 deletions.
4 changes: 3 additions & 1 deletion Dockerfile-base
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ RUN wget https://github.com/torognes/vsearch/archive/v2.4.3.tar.gz && \
make install && \
cd ..

RUN git clone git://github.com/nextgenusfs/amptk.git && \
RUN wget https://github.com/nextgenusfs/amptk/archive/0.10.2.tar.gz && \
tar xzf 0.10.2.tar.gz && \
mv amptk-0.10.2 amptk && \
cd amptk && \
make && \
cd ..
Expand Down
3 changes: 2 additions & 1 deletion amptk.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def download(url, name):
sys.stdout.write(status)
f.close()

version = '0.10.1'
version = '0.10.2'

default_help = """
Usage: amptk <command> <arguments>
Expand Down Expand Up @@ -762,6 +762,7 @@ def download(url, name):
-m, --mapping_file QIIME-like mapping file
-t, --taxonomy Taxonomy calculated elsewhere. 2 Column file.
--method Taxonomy method. Default: hybrid [utax, sintax, usearch, hybrid, rdp, blast]
--add2db Add FASTA files to DB on the fly.
--fasta_db Alternative database of fasta sequenes to use for global alignment.
--utax_db UTAX formatted database. Default: ITS2.udb [See configured DB's below]
--utax_cutoff UTAX confidence value threshold. Default: 0.8 [0 to 0.9]
Expand Down
11 changes: 2 additions & 9 deletions bin/amptk-fastq2sra.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,6 @@ class col:
parser.add_argument('-a','--append', help='Append a name to all sample names for a run, i.e. --append run1 would yield Sample_run1')
args=parser.parse_args()

def FindBarcode(Seq, BarcodeDict):
for BarcodeLabel in BarcodeDict.keys():
Barcode = BarcodeDict[BarcodeLabel]
if Seq.startswith(Barcode):
return Barcode, BarcodeLabel
return "", ""

log_name = args.out + '.amptk-sra.log'
if os.path.isfile(log_name):
os.remove(log_name)
Expand Down Expand Up @@ -238,12 +231,12 @@ def FindBarcode(Seq, BarcodeDict):
#look for forward primer
if args.require_primer != 'off': #means we only want ones with forward primer and or reverse, but don't remove them
#now search for forward primer
foralign = edlib.align(FwdPrimer, seq, mode="HW", k=args.primer_mismatch)
foralign = edlib.align(FwdPrimer, seq, mode="HW", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
if foralign["editDistance"] < 0:
continue
if args.require_primer == 'both':
#now search for reverse primer
revalign = edlib.align(ReverseCompRev, seq, mode="HW", task="locations", k=args.primer_mismatch)
revalign = edlib.align(ReverseCompRev, seq, mode="HW", task="locations", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
if revalign["editDistance"] < 0: #reverse primer was not found
continue
#check size
Expand Down
4 changes: 2 additions & 2 deletions bin/amptk-process_illumina_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def processRead(input):
for title, seq, qual in FastqGeneralIterator(open(input)):
Total += 1
#first thing is look for forward primer, if found trim it off
foralign = edlib.align(FwdPrimer, seq, mode="HW", k=args.primer_mismatch)
foralign = edlib.align(FwdPrimer, seq, mode="HW", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
#if require primer is on make finding primer in amplicon required if amplicon is larger than read length
#if less than read length, can't enforce primer because could have been trimmed via staggered trim in fastq_mergepairs
if args.primer == 'on' and len(seq) > ReadLen:
Expand All @@ -83,7 +83,7 @@ def processRead(input):
Seq = seq
Qual = qual
#now look for reverse primer
revalign = edlib.align(RevPrimer, Seq, mode="HW", task="locations", k=args.primer_mismatch)
revalign = edlib.align(RevPrimer, Seq, mode="HW", task="locations", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
if revalign["editDistance"] >= 0:
RevPrimerFound += 1
RevCutPos = revalign["locations"][0][0]
Expand Down
4 changes: 2 additions & 2 deletions bin/amptk-process_illumina_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def processRead(input):
NoBC += 1
continue
#first thing is look for forward primer, if found trim it off
foralign = edlib.align(FwdPrimer, seq, mode="HW", k=args.primer_mismatch)
foralign = edlib.align(FwdPrimer, seq, mode="HW", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
#if require primer is on make finding primer in amplicon required if amplicon is larger than read length
#if less than read length, can't enforce primer because could have been trimmed via staggered trim in fastq_mergepairs
if args.primer == 'on' and len(seq) > ReadLen:
Expand All @@ -88,7 +88,7 @@ def processRead(input):
Seq = seq
Qual = qual
#now look for reverse primer
revalign = edlib.align(RevPrimer, Seq, mode="HW", task="locations", k=args.primer_mismatch)
revalign = edlib.align(RevPrimer, Seq, mode="HW", task="locations", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
if revalign["editDistance"] >= 0:
RevPrimerFound += 1
RevCutPos = revalign["locations"][0][0]
Expand Down
6 changes: 3 additions & 3 deletions bin/amptk-process_ion.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ def processRead(input):
Seq = seq[BarcodeLength:]
Qual = qual[BarcodeLength:]
#now search for forward primer
foralign = edlib.align(FwdPrimer, Seq, mode="HW", k=args.primer_mismatch)
foralign = edlib.align(FwdPrimer, Seq, mode="HW", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
if foralign["editDistance"] < 0:
NoPrimer += 1
continue
ForTrim = foralign["locations"][0][1]+1
#now search for reverse primer
revalign = edlib.align(RevPrimer, Seq, mode="HW", task="locations", k=args.primer_mismatch)
revalign = edlib.align(RevPrimer, Seq, mode="HW", task="locations", k=args.primer_mismatch, additionalEqualities=amptklib.degenNuc)
if revalign["editDistance"] >= 0: #reverse primer was found
RevPrimerFound += 1
#location to trim sequences
Expand Down Expand Up @@ -169,7 +169,7 @@ def processRead(input):
#check if mapping file passed, use this if present, otherwise use command line arguments
if args.mapping_file:
if not os.path.isfile(args.mapping_file):
amptklib.error("Mapping file is not valid: %s" % args.mapping_file)
amptklib.log.error("Mapping file is not valid: %s" % args.mapping_file)
sys.exit(1)
mapdata = amptklib.parseMappingFile(args.mapping_file, barcode_file)
#forward primer in first item in tuple, reverse in second
Expand Down
Loading

0 comments on commit 7f0af85

Please sign in to comment.