From 5a4379ae973b6a9ae1424e9f60958d26eb6c3fce Mon Sep 17 00:00:00 2001
From: Andre Corvelo <acorvelo@gmail.com>
Date: Mon, 13 Feb 2017 17:49:50 -0500
Subject: [PATCH] major revision

---
 README.md         | 279 +++++++++++++++++------------------
 bin/txM_fastalen  |   4 +-
 bin/txM_fq2gem    |   4 +-
 bin/txM_fqhtrim   |   4 +-
 bin/txM_fqintlv   |  60 ++++----
 bin/txM_fqltrim   |   4 +-
 bin/txM_fqmate    |  98 +++++++++++++
 bin/txM_fqminlen  |   4 +-
 bin/txM_fqsample  | 114 +++++++++++++++
 bin/txM_gem2fq    |   4 +-
 bin/txM_gitax     |   4 +-
 bin/txM_lca       |   4 +-
 bin/txM_mapout    |   4 +-
 bin/txM_mergintlv |   4 +-
 bin/txM_report    | 117 +++++----------
 bin/txM_rescore   | 105 ++++++++++++++
 bin/txM_sge       |  10 +-
 bin/txM_summary   |   6 +-
 taxMaps           | 361 ++++++++++++++++++++--------------------------
 taxMaps-index     |   4 +-
 taxMaps-taxtbl    |   4 +-
 21 files changed, 703 insertions(+), 495 deletions(-)
 create mode 100755 bin/txM_fqmate
 create mode 100755 bin/txM_fqsample
 create mode 100755 bin/txM_rescore

diff --git a/README.md b/README.md
index d57958b..73d11ac 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # taxMaps
 taxMaps is an ultra-efficient, customizable and fully scalable taxonomic classification tool for short-read data designed to deal with large DNA/RNA metagenomics samples. Its performance and comprehensiveness makes it highly suitable for unbiased contamination detection in large-scale sequencing operations, microbiome studies comprising a large number of samples, and for applications where the analysis delivery time is a critical factor, such as pathogen identification from clinical or environmental samples. 
 
-* Version: 0.1
+* Version: 0.2
 * Author: Andre Corvelo, [New York Genome Center](https://www.nygenome.org)
 
 taxMaps is freely available for academic and non-commercial research purposes ([`LICENSE.txt`](https://github.com/nygenome/taxmaps/blob/master/LICENSE.txt)).  
@@ -10,57 +10,57 @@ taxMaps is freely available for academic and non-commercial research purposes ([
 * Flexible input - `.bam`, `.fastq` (compressed/uncompressed, interleaved or not) or any custom command that streams `.fastq` to `STDOUT`
 * Thorough preprocessing - quality trim, multiple adapter removal, hard trim (5' and 3'), low complexity filter.
 * Mapping prioritization - through the use of multiple custom indexes (useful for spike-in and host subtraction).
-* Performance - taxMaps takes full advantage of the outstanding features and performance of [GEM](http://www.nature.com/nmeth/journal/v9/n12/full/nmeth.2221.html). When using pre-compressed indexes, this state-of-the-art aligner can map in a complete (all best hits retrieved) and sensitive (up to 20% edit distance) manner up to 20 million reads per hour against a single-file nucleotide database encompassing more than 117 Gbases, including NCBI’s nt database, microbial RefSeq and thousands of microbial and viral genomes, on a single 16 CPU machine. 
+* Performance - taxMaps takes full advantage of the outstanding features and performance of [GEM](http://www.nature.com/nmeth/journal/v9/n12/full/nmeth.2221.html). When using pre-compressed indexes, this state-of-the-art aligner can map in a very sensitive (up to 20% edit distance) manner several million reads per hour against a single-file nucleotide database encompassing more than 370 Gbases, including NCBI’s nt database, microbial RefSeq and thousands of microbial and viral genomes, on a single 16 CPU machine. 
 * Detailed mapping reports - including coverage and edit-distance histograms per taxa, which can be visualized interactively.
 * SGE support.  
 
 # Manual
 ## List of contents
-* [Installation](https://github.com/nygenome/taxmaps#installation)
-   * [Dependencies](https://github.com/nygenome/taxmaps#dependencies)
-   * [Download](https://github.com/nygenome/taxmaps#download)
-* [Building the taxonomic table](https://github.com/nygenome/taxmaps#building-the-taxonomic-table)
-* [Preprocessing and indexing your database](https://github.com/nygenome/taxmaps#preprocessing-and-indexing-your-database)
-   * [Pre-built indexes](https://github.com/nygenome/taxmaps/blob/master/README.md#pre-built-indexes)
-* [Running taxMaps](https://github.com/nygenome/taxmaps#running-taxmaps)
-   * [Basic usage](https://github.com/nygenome/taxmaps#basic-usage)
-      * [Output directory and sample prefix](https://github.com/nygenome/taxmaps#output-directory-and-sample-prefix)
-   * [Input](https://github.com/nygenome/taxmaps#input)
-      * [FASTQ](https://github.com/nygenome/taxmaps/blob/master/README.md#fastq)
-      * [BAM](https://github.com/nygenome/taxmaps/blob/master/README.md#bam)
-      * [Custom command](https://github.com/nygenome/taxmaps/blob/master/README.md#custom-command)
-   * [Preprocessing](https://github.com/nygenome/taxmaps#preprocessing)
-      * [Quality scores encoding](https://github.com/nygenome/taxmaps/blob/master/README.md#quality-scores-encoding)
-      * [Quality trimming](https://github.com/nygenome/taxmaps/blob/master/README.md#quality-trimming)
-      * [Adapter removal](https://github.com/nygenome/taxmaps/blob/master/README.md#adapter-removal)
-      * [Low complexity filtering](https://github.com/nygenome/taxmaps/blob/master/README.md#low-complexity-filtering)
-      * [Hard-trimming your data](https://github.com/nygenome/taxmaps/blob/master/README.md#hard-trimming-your-data)
-      * [Minimum read length](https://github.com/nygenome/taxmaps/blob/master/README.md#minimum-read-length)
-   * [Mapping](https://github.com/nygenome/taxmaps#mapping)
-      * [Index and length file](https://github.com/nygenome/taxmaps/blob/master/README.md#index-and-length-file) 
-      * [Maximum edit distance](https://github.com/nygenome/taxmaps/blob/master/README.md#maximum-edit-distance)
-      * [Number of CPUs](https://github.com/nygenome/taxmaps/blob/master/README.md#numbers-of-cpus)
-      * [Custom index names](https://github.com/nygenome/taxmaps/blob/master/README.md#custom-index-names)
-      * [Multiple indexes](https://github.com/nygenome/taxmaps/blob/master/README.md#multiple-indexes)
-   * [Taxonomic classification](https://github.com/nygenome/taxmaps#taxonomic-classification)
-   * [Reporting](https://github.com/nygenome/taxmaps#reporting)
-      * [Coverage histograms](https://github.com/nygenome/taxmaps/blob/master/README.md#coverage-histograms)
-      * [Lowest taxonomic level](https://github.com/nygenome/taxmaps/blob/master/README.md#lowest-taxonomic-level)
-      * [Minimum reporting evidence](https://github.com/nygenome/taxmaps/blob/master/README.md#minimum-reporting-evidence)
-      * [Group-specific reports](https://github.com/nygenome/taxmaps/blob/master/README.md#group-specific-reports)
-   * [Output](https://github.com/nygenome/taxmaps#output)
-      * [Output directory](https://github.com/nygenome/taxmaps/blob/master/README.md#output-directory)
-      * [Merged mapping file](https://github.com/nygenome/taxmaps/blob/master/README.md#merged-mapping-file)
-      * [Summary table](https://github.com/nygenome/taxmaps/blob/master/README.md#summary-table)
-      * [Report table](https://github.com/nygenome/taxmaps/blob/master/README.md#report-table)
-      * [Interactive abundance chart](https://github.com/nygenome/taxmaps/blob/master/README.md#interactive-abundance-chart)
-   * [SGE](https://github.com/nygenome/taxmaps#sge)
-   * [Dry run](https://github.com/nygenome/taxmaps#dry-run)
-* [Quick reference](https://github.com/nygenome/taxmaps#quick-reference)
-   * [./taxMaps](https://github.com/nygenome/taxmaps#taxmaps-1)
-   * [./taxMaps-taxtbl](https://github.com/nygenome/taxmaps#taxmaps-taxtbl)
-   * [./taxMaps-index](https://github.com/nygenome/taxmaps#taxmaps-index)
-   * [./bin](https://github.com/nygenome/taxmaps#bin)
+* [Installation](#installation)
+   * [Dependencies](#dependencies)
+   * [Download](#download)
+* [Databases](#databases)
+   * [Pre-built indexes](README.md#pre-built-indexes)
+   * [Custom indexes](README.md#custom-indexes)
+      * [Building the taxonomic table](README.md#building-the-taxonomic-table)
+      * [Preprocessing and indexing your custom database](README.md#preprocessing-and-indexing-your-custom-database)
+* [Running taxMaps](#running-taxmaps)
+   * [Basic usage](#basic-usage)
+      * [Output directory and sample prefix](#output-directory-and-sample-prefix)
+   * [Input](#input)
+      * [FASTQ](README.md#fastq)
+      * [BAM](README.md#bam)
+      * [Custom command](README.md#custom-command)
+      * [Downsampling](README.md#downsampling)
+   * [Preprocessing](#preprocessing)
+      * [Quality scores encoding](README.md#quality-scores-encoding)
+      * [Quality trimming](README.md#quality-trimming)
+      * [Adapter removal](README.md#adapter-removal)
+      * [Low complexity filtering](README.md#low-complexity-filtering)
+      * [Hard-trimming your data](README.md#hard-trimming-your-data)
+      * [Minimum read length](README.md#minimum-read-length)
+   * [Mapping](#mapping)
+      * [Index and length file](README.md#index-and-length-file) 
+      * [Maximum edit distance](README.md#maximum-edit-distance)
+      * [Number of CPUs](README.md#numbers-of-cpus)
+      * [Multiple indexes](README.md#multiple-indexes)
+   * [Taxonomic classification](#taxonomic-classification)
+   * [Reporting](#reporting)
+      * [Coverage histograms](README.md#coverage-histograms)
+      * [Minimum reporting evidence](README.md#minimum-reporting-evidence)
+   * [Output](#output)
+      * [Output directory](README.md#output-directory)
+      * [Merged mapping file](README.md#merged-mapping-file)
+      * [Summary table](README.md#summary-table)
+      * [Report table](README.md#report-table)
+      * [Interactive abundance chart](README.md#interactive-abundance-chart)
+   * [SGE](#sge)
+   * [Dry run](#dry-run)
+* [Quick reference](#quick-reference)
+   * [./taxMaps](#taxmaps-1)
+   * [./taxMaps-taxtbl](#taxmaps-taxtbl)
+   * [./taxMaps-index](#taxmaps-index)
+   * [./bin](#bin)
 
 ## Installation
 #### Dependencies
@@ -111,7 +111,16 @@ $ export PATH=$PATH:{path_to_taxMaps}
 
 Adding line above to `.bash_rc` or `.bash_profile` configuration files will make it permanent.  
 
-## Building the taxonomic table 
+
+## Databases
+
+### Pre-built indexes  
+taxMaps uses pre-built indexes that are available via FTP on the [NYGC public FTP](http://tinyurl.com/krn3e7o) site. These indexes have been compressed using a kmer LCA pre-assignation for several different kmer sizes, which greatly improves mapping efficiency against large databases. To ensure maximum mapping fidelity, you  should pick a kmer size just above your read length or hard-trim down your reads to match the kmer size used in compression. For this, you can use the corresponding `taxMaps` option `-L`. You should also use the `taxonomy.tbl` file provided. 
+
+### Custom indexes
+We strongly recommend the use of pre-built compressed indexes. While we provide scripts to generate taxMaps-compatible indexes, we do not offer any support on it. 
+
+#### Building the taxonomic table 
 taxMaps requires a taxonomic table file for several of its operations. To generate this table you need to download [taxdump.tar.gz](http://tinyurl.com/lj5wkh8) from the NCBI FTP site and extract its contents with the following command:
 
 ```
@@ -126,7 +135,7 @@ $ taxMaps-taxtbl -n names.dmp -t nodes.dmp > taxonomy.tbl
   
 will generate a tab-delimited file named `taxonomy.tbl`, which contains one taxonomic entity per line with the following field order: `TaxId`, `Rank`, `Name` and `PathFromRoot`.  
  
-## Preprocessing and indexing your database
+#### Preprocessing and indexing your custom database
 To generate your indexed database you need to download [gi_taxid_nucl.dmp.gz](http://tinyurl.com/oez34rw) from the NCBI FTP site and extract it using the following command:
 
 ```
@@ -151,9 +160,6 @@ At the end of this process, among others, you should have 4 files:
 
 Generating an index can take from a few minutes to several hours, depending on the size of your database. You can speed it up by running `taxMaps-index` on multiple threads by using the option `-T`.  You can also submit your job to a SGE cluster by specifying a partition/queue with the option `-Q`. The number of slots can be specified using the option `-S`.
 
-### Pre-built indexes  
-You can also use pre-built indexes which are available via FTP on the NYGC public [FTP](http://tinyurl.com/krn3e7o) site. These indexes have been compressed using a kmer LCA pre-assignation for several different kmer sizes, which greatly improves mapping efficiency against large databases. To ensure maximum mapping fidelity, you  should pick a kmer size just above your read length or hard-trim down your reads to match the kmer size used in compression. For this, you can use the corresponding `taxMaps` option `-L`. You should also use the `taxonomy.tbl` file provided.  
-
 ## Running taxMaps
 
 To run taxMaps, first make sure you have all the required files:
@@ -195,6 +201,13 @@ Finally, taxMaps can take custom input commands as long as they write fastq to `
 ```
 $ taxMaps -i "zcat /data/project_x/sample*.fq.gz" ...
 ```  
+  
+##### Downsampling  
+If you wish to downsample your data, you can specify the downsample probability by specifying a float number between 0 and 1 through the option `-D`. For instance, if you want taxMaps to classify 10% of your data, you should use: 
+
+```
+$ taxMaps -f reads.fq -D 0.1 ...
+```  
 
 ### Preprocessing
 
@@ -204,7 +217,7 @@ With taxMaps you can process your data prior to mapping. Controlling the quality
 taxMaps expects quality scores to be encoded in Phred+33 format (see [FASTQ encoding](http://en.wikipedia.org/wiki/FASTQ_format#Encoding)). If your quality data is encoded in Phred+64 format, you should add the `--phred64` flag to your command (usually not required for Illumina 1.8+, Sanger, Roche/454, Ion Torrent, PacBio data). 
 
 ##### Quality trimming  
-Using the option `-q` you can set the threshold for 3' quality-based trimming of your read data. taxMaps will pass this option to `cutadapt`. 
+Using the option `-q` you can set the threshold for 3' quality-based trimming of your read data. taxMaps will pass this option to `cutadapt`. If you wish to remove low quality bases from both ends of the reads, just specify the quality threshold values for the 5' and 3' ends, separed by `,` (e.g. `-q 20,20`). 
 
 ##### Adapter removal  
 To remove adapter sequence, you can pass a string of `cutadapt` options by using the option `-a`. For instance, if you want to remove the adapter sequence `AGATCGGAAGAGC` from both ends of your reads in two passes, you should specify `-a "-b AGATCGGAAGAGC -n 2"`. You can also trim multiple adapter sequences - see [cutadapt user manual](https://cutadapt.readthedocs.org/en/latest/guide.html). You **should not** pass any cutadapt options related to input and output as these will likely cause taxMaps to crash (e.g., `-a "-b AGATCGGAAGAGC -o output.fq"`).
@@ -224,7 +237,7 @@ Finally, with the option `-l` you can control the minimum length a read should h
 In taxMaps, you must specify the database index file with the option `-d`. Just make sure that for every index `{db}.gem` you have the corresponding `{db}.len` in the same directory. 
 
 ##### Maximum edit distance  
-By default taxMaps allows up to 10% edit distance. This threshold can be increased in situations where a greater sensitivity is desired (e.g., viral detection). With the option `-e` you can set the maximum edit distance allowed. You must provide a number between 0 and 1 and edit distance will be considered as a fraction of the read length. For instance, if your read length is 150bp, the following command:
+By default taxMaps allows up to 20% edit distance. This threshold can be decreased in situations where a greater precision is desired. With the option `-e` you can set the maximum edit distance allowed. You must provide a number between 0 and 1 and edit distance will be considered as a fraction of the read length. For instance, if your read length is 150bp, the following command:
 
 ```
 $ taxMaps -f reads.fq -d ncbi.gem -e 0.2 ...
@@ -240,31 +253,22 @@ $ taxMaps -f reads.fq -d ncbi.gem -c 16 ...
 ```
 will use `16` CPUs.  
 
-##### Custom index names  
-Internally taxMaps sets the name of the database as the basename without extension of the index specified by `-d`. You can however set a custom name by using `-n`. This parameter will be used in naming your mapping result files. With the following command:
-
-```
-$ taxMaps -f reads.fq -d GRCh38.gem -e 0.2 -c 16 -n human ...
-```
-your mapping results for `GRCh38.gem` will be have the extension `.human.map`. 
-
-
 ##### Multiple indexes  
 If you want to map against more than one index in a sequential manner, you should provide a `,`-delimited list of index files:
 
 ```
 $ taxMaps -f reads.fq -d phix.gem,human.gem,ncbi.gem ...
 ```
-The command above will first map all reads against `phix.gem`. Then, unmapped reads will be mapped against `human.gem` and, finally, reads that were not mapped will be mapped against the `ncbi.gem` index. When mapping against more than one index, you can still control the abovementioned parameters in a index-specific manner by providing `,`-delimited lists of values for `-e`, `-c` and `-n`:
+The command above will first map all reads against `phix.gem`. Then, unmapped reads will be mapped against `human.gem` and, finally, reads that were not mapped will be mapped against the `ncbi.gem` index. When mapping against more than one index, you can still control the abovementioned parameters in a index-specific manner by providing `,`-delimited lists of values for `-e` and `-c`:
 
 ```
 $ taxMaps -f reads.fq -d phix.gem,human.gem,ncbi.gem \
-          -e 0.1,0.2,0.2 -c 4,16,16 -n phix,human,ncbi ...
+          -e 0.1,0.2,0.2 -c 4,16,16 ...
 ```
 
 ### Taxonomic classification
 
-To run taxMaps, you need to specify through the `-t` option, the path to the taxonomic table generated using `taxMaps-taxtbl`.
+To run taxMaps, you need to specify through the `-t` option, the path to the taxonomic table downloaded from the [NYGC public FTP](http://tinyurl.com/krn3e7o).
 
 For read classification you can choose between three LCA modes:
 * 'single-end' (`-m s`) - reads are classified independently. This is the default option.
@@ -286,9 +290,6 @@ $ taxMaps -f reads.fq -d phix.gem,human.gem,ncbi.gem --cov -x 9606,374840 ...
 
 will not compute any coverage histogram for phiX (`374840`) and human (`9606`) sequences.
 
-##### Lowest taxonomic level  
-By default taxMaps will explore the entire taxonomic tree but you can limit the depth of your reports by setting the lowest reported level with `-r`. This option is turn OFF by default in taxMaps, which will report all leaves. Possible values are: `superkingdom`, `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species` and `subspecies`.
-
 ##### Minimum reporting evidence  
 In taxMaps you can also exclude from your report, taxa with few reads assigned. These are usually non-specific hits or mapping artifacts. By default, taxMaps will not report taxa with less than 10 reads assigned per million reads in your sample (`-z 0.00001`). You can also specify a minimum number of reads regardeless of the proportion. For that you should use integer values:
 
@@ -298,15 +299,6 @@ $ taxMaps -f reads.fq -d ncbi.gem -z 15 ...
 
 The command  above will report taxa with at least `15` reads classified as such.
 
-##### Group-specific reports  
-Apart from the "main" report, you can also generate group specific reports. For that, you should specify a list of TaxIds using the option `-u`. taxMaps will generate additional reports where every taxon listed will be at the root. For instance, the following command:
-
-```
-$ taxMaps -f reads.fq -d ncbi.gem -u 2,10239 ...
-```
-
-will generate additional reports for bacteria (TaxId = `2`) and viruses (TaxId = `10239`).
-
 ### Output
 
 ##### Output directory  
@@ -314,17 +306,15 @@ By default, taxMaps will run on the current working directory, but it is strongl
 
 
 The  output directory contains the following subdirectories:
-* `read_data/` - contains either links to your read data file(s) or a `.sh` script with custom input command specified using the option `-i`.
-* `indexes/` - contains the links to the specified indexes as well as to the corresponding length files.
-* `taxonomy/` - where you can find a link to the taxonomy table specified by `-t`.
-* `map/` - in this directory you will find all the `.map` files and a `.map.lca` file with the mapping results plus the taxonomic classification for every read (see below for format) .
-* `out/` - this is where summary results will be written to. It also contains the graphical and tabular reports. 
-* `log/` - where you can find the log files for `cutadapt`, `prinseq`, `GEM` and `samtools`(if your input data is in `bam` format).
+* `txM.{prefix}.base/` - contains either links to your read data file(s) or a `.sh` script with custom input command specified using the option `-i`, all the links to the specified indexes and corresponding length files, and the taxonomy table specified by `-t`.
+* `txM.{prefix}.map/` - in this directory you will find all the `.map` files and a `.map.lca` file with the mapping results plus the taxonomic classification for every read (see below for format) .
+* `txM.{prefix}.out/` - this is where summary results will be written to. It also contains the graphical and tabular reports. 
+* `txM.{prefix}.log/` - where you can find the log files for `cutadapt`, `prinseq`, `GEM` and `samtools`(if your input data is in `bam` format).
 
 In the output directory you will also find a `.sh` file with all commands that taxMaps will run. Also, if you used the `-Q` option for SGE, you will find a `.sge` custom specification file and an additional subdirectory will be created upon submission, containing several files related to the SGE run.
 
 ##### Merged mapping file  
-After mapping, taxMaps will merge all the `.map` files and perform the taxonomic classification of every read, based on the all its best hits. These results can be found in `{prefix}.merged.map.lca` located in `{out_dir}/map/`. The format is an extended [GEM alignment format](http://algorithms.cnag.cat/wiki/FAQ:The_GEM_alignment_format):  
+After mapping, taxMaps will merge all the `.map` files and perform the taxonomic classification of every read, based on the all its best hits. These results can be found in `{prefix}.merged.map.lca` located in `{out_dir}/txM.{prefix}.map/`. The format is an extended [GEM alignment format](http://algorithms.cnag.cat/wiki/FAQ:The_GEM_alignment_format):  
 
 1. `Name` - Read name.  
 2. `Sequence` - Read sequence.  
@@ -342,7 +332,7 @@ After mapping, taxMaps will merge all the `.map` files and perform the taxonomic
 **\*** The last 4 fields are exclusive for paired-end classification modes. 
 
 ##### Summary table  
-taxMaps then summarizes all mapping information into `{prefix}.merged.map.lca.summary` located in `{out_dir}/out/`. This file consists of one-line records for every taxonomic entity identified in the sample or with at least one hit. Here are the fields:
+taxMaps then summarizes all mapping information into `{prefix}.merged.map.lca.summary` located in `{out_dir}/txM.{prefix}.out/`. This file consists of one-line records for every taxonomic entity identified in the sample or with at least one hit. Here are the fields:
 
 1. `TaxId` - NCBI taxon identifier.
 2. `TaxRank` - NCBI taxon rank.
@@ -361,7 +351,7 @@ taxMaps then summarizes all mapping information into `{prefix}.merged.map.lca.su
 15. `CoverageHistogram` - The coverage histogram string follows a *log* progression, with the first value representing the number of uncovered bases, the second - the number of 1x covered bases, the third - the number of 2-3x covered based, the fourth - the number of 4-7x covered bases and so on... The following string `10:6432:456:0:120:0...` means that there were `10` uncovered bases, `6432` bases at 1x coverage, `456` at 2-3x, `0` between 4-7x and `120` between 8 -15x coverage.
 
 ##### Report table  
-In this tab-delimited file (`out/{prefix}.tbl`) you will find the read counts for the classic taxonomic ranks (`kingdom`, `phylum`, `class`, etc...). Read counts for a given taxon are not limited to reads classified as that taxon, but also include all reads mapping to the taxa under it. These are the fields in the report table:
+In this tab-delimited file (`{out_dir}/txM.{prefix}.out/{prefix}.tbl`) you will find the read counts for the classic taxonomic ranks (`kingdom`, `phylum`, `class`, etc...). Read counts for a given taxon are not limited to reads classified as that taxon, but also include all reads mapping to the taxa under it. These are the fields in the report table:
 
 1. `TaxId` - NCBI taxon identifier.
 2. `TaxRank` - NCBI taxon rank
@@ -371,15 +361,11 @@ In this tab-delimited file (`out/{prefix}.tbl`) you will find the read counts fo
 6. `ReadCounts` - Number of reads classified under `TaxName`
 7. `PercentFromTotal` - Corresponding percentage relative to the root of the report.
 
-If you requested specific reports for certain `TaxId`s with the option `-u`, taxMaps will generate a file for each with the extension `.{TaxId}.tbl` . In these case, the last field (`PercentFromTotal`) will refer to the percentage of reads out of the total reads classified under each of the specified `TaxId`s.  
-
 ##### Interactive abundance chart  
 
-Finally, taxMaps generates an independent `.html` document that allows you to explore in an interactive manner your results (`out/{prefix}.krona.html`). For that, you only need a Web browser and internet access. In the interactive HTML5 chart generated using [Krona](http://sourceforge.net/p/krona/home/krona/), taxomic entities are displayed as nested sectors from the top level (center) to the bottom (outward) of the hierarchy. Here is a screenshot:
-
-![image](https://github.com/nygenome/taxmaps/blob/master/doc/img/example_krona.png)  
+Finally, taxMaps generates an independent `.html` document that allows you to explore in an interactive manner your results (`{out_dir/txM.{prefix}.out/{prefix}.krona.html`). For that, you only need a Web browser and internet access. In the interactive HTML5 chart generated using [Krona](http://sourceforge.net/p/krona/home/krona/), taxomic entities are displayed as nested sectors from the top level (center) to the bottom (outward) of the hierarchy. Here is a screenshot:
 
-In case you have requested [group-specific reports](https://github.com/nygenome/taxmaps/blob/master/README.md#group-specific-reports) through the option `-u`, taxMaps will generate a `out/{prefix}.krona.{TaxId}.html` for each `TaxId` specified.
+![image](doc/img/example_krona.png)  
 
 For more information about Krona, see the project [website](http://sourceforge.net/p/krona/home/krona/).
 
@@ -406,46 +392,44 @@ You can do a dry run of taxMaps by using the flag `--dry`. taxMaps will create t
 `-h`,`--help` show this help message and exit   
 
 *Input*  
-`-i` Input command (use absolute paths!). Quoted, with fq on stdout. Interleaved for paired modes (Mandatory). [`STR`]   
-`-f` Input `.fq`, `.fastq`, `.fq.gz` or `.fastq.gz` files. Interleaved for paired modes (Mandatory). [`FILE`]  
-`-b` Input `.bam` file. Requires `.bam.bai` in the same folder (Mandatory). [`FILE`]  
-`-1` Input `.fq`, `.fastq`, `.fq.gz`, `.fastq.gz` read 1 file. In sync with, and of the same sort as, -2 input file for paired modes (Mandatory). [`FILE`]  
-`-2` Input `.fq`, `.fastq`, `.fq.gz`, `.fastq.gz` read 2 file. In sync with, and of the same sort as, -1 input file for paired modes (Mandatory). [`FILE`]  
+`-i` Input command (absolute paths!). Quoted. Interleaved `fq` on `STDOUT`. [`STR, Mandatory | -f | (-1 & -2) | -b`]  
+`-f` Input `fq`, `fastq`, `fq.gz` or `fastq.gz`. Interleaved. [`CSV_FILE_LIST, Mandatory | -i | (-1 & -2) | -b`]  
+`-b` Input `.bam` file. Requires `.bam.bai` in the same folder. [`FILE, Mandatory | -i | -f | (-1 & -2)`]  
+`-1` Input `fq`, `fastq`, `fq.gz` or `fastq.gz` read 1. In sync with, and of the same sort as `-2`. [`CSV_FILE_LIST, Mandatory | -f | -i | -b`]  
+`-2` Input `fq`, `fastq`, `fq.gz` or `fastq.gz` read 2. In sync with, and of the same sort as `-1`. [`CSV_FILE_LIST, Mandatory | -f | -i | -b`]  
+`-D` Downsampling probability. [`FLOAT, 1`]  
 
 *Preprocessing*  
-`-q` Cutadapt quality cutoff value (default = None). [`INT`]  
-`-a` Adapters string. Quoted.  e.g., "-a AGATCGGAAGAGC -n2 "  (default = None). [`STR`]  
-`-l` Minimum read length for mapping (default = 50). [`INT`]  
-`-L` Maximum read length (hard trimmming; default = None). [`INT`]  
-`-w` 5p trim length (hard trimmming; default = None). [`INT`]  
-`-C` Entropy cutoff for low complexity filtering. Use 0 for no filtering (default = 70). [`INT`]  
-`-N` Filter reads with more than N% of 'N' characters. Use 100 for no filtering (default = 4). [`INT`]   
-`--phred64` Quality scores in Phred+64 format (default = False) [`FLAG`]
+`-q` Cutadapt quality cutoff value (default = None). [`INT, `]  
+`-a` Adapters string. Quoted.  e.g., "-a AGATCGGAAGAGC -n2 "  (default = None). [`STR, `]  
+`-l` Minimum read length for mapping. [`INT, 50`]  
+`-L` Maximum read length (hard trimmming). [`INT, `]  
+`-w` 5p trim length (hard trimmming). [`INT, `]  
+`-C` Entropy cutoff for low complexity filtering. Use 0 for no filtering. [`INT, 70`]  
+`-N` Filter reads with more than N% of 'N' characters. Use 100 for no filtering. [`INT, 4`]   
+`--phred64` Quality scores in Phred+64 format. [`FLAG, OFF`]
 
 *Mapping*  
-`-d` Index file(s). CSV (Mandatory). [`FILE_LIST`]  
-`-e` Edit distance(s). CSV. Value(s) between 0 and 1 (default = 0.1). [`FLOAT_LIST`]  
-`-c` Number of CPUs. CSV (default = 1). [`INT_LIST`]  
-`-n` Index name(s). CSV (default = index basename(s) without extension). [`STR_LIST`]
+`-d` Index file(s). CSV. [`FILE_LIST, Mandatory`]  
+`-e` Edit distance(s). CSV. Value(s) between 0 and 1. [`FLOAT_LIST, 0.2`]  
+`-c` Number of CPUs. CSV. [`INT_LIST, 1`]  
 
 *Taxonomy*  
-`-t` Taxonomic table file (Mandatory). [`FILE`]  
-`-m` Taxa determination mode ('s' single-end; 'p' paired-end; 'P' paired-end strict; default = 's'). [`STR`]  
+`-t` Taxonomic table file. [`FILE, Mandatory`]  
+`-m` Taxa determination mode ('s' single-end; 'p' paired-end; 'P' paired-end strict). [`STR, s`]  
 
 *Reporting*  
-`--cov` Compute coverage histograms (default = False). [`FLAG`]  
-`-x` Excluded taxids. CSV (only if --cov flag ON). [`STR_LIST`]  
-`-r` Lowest taxonomic level e.g., 'species' (default = None). [`STR`]  
-`-z` Reporting cutoff (INT or FLOAT < 1; default = 0.00001). [`FLOAT`]  
-`-u` Create additional reports for these taxids. CSV (default = None). [`STR_LIST`]
+`--cov` Compute coverage histograms. [`FLAG, OFF`]  
+`-x` Excluded taxids. CSV (only if --cov flag ON). [`STR_LIST, `]  
+`-z` Reporting cutoff (INT or FLOAT < 1). [`FLOAT, 0.00001`]  
 
 *Miscellaneous*  
-`-p` Sample prefix (default = sample). [`STR`]  
-`-o` Output directory (default = '.'). [`DIR`]  
-`-Q` Cluster queue/partition (default = None). [`STR`]  
-`-S` Cluster slots (default = 20). [`INT`]  
+`-p` Sample prefix. [`STR, sample`]  
+`-o` Output directory. [`DIR, .`]  
+`-Q` Cluster queue/partition. [`STR, `]  
+`-S` Cluster slots (default = 20). [`INT, 20`]  
 
-`--dry` Dry run (default = False) [`FLAG`]
+`--dry` Dry run. [`FLAG, OFF`]
 
 ### ./taxMaps-taxtbl     
 `taxMaps-taxtbl` is a script that generates the taxonomic table required by `taxMaps` containg all taxids, corresponding descriptions and paths-from-root. It writes to `stdout`.  
@@ -454,8 +438,8 @@ You can do a dry run of taxMaps by using the flag `--dry`. taxMaps will create t
 `--version` show program's version number and exit.  
 `-h`,`--help` show this help message and exit.  
 
-`-n` NCBI Taxonomy names.dmp file (Mandatory). [`FILE`]   
-`-t` NCBI Taxonomy nodes.dmp file (Mandatory). [`FILE`]  
+`-n` NCBI Taxonomy names.dmp file. [`FILE, Mandatory`]   
+`-t` NCBI Taxonomy nodes.dmp file. [`FILE, Mandatory`]  
 
 ### ./taxMaps-index
 `taxMaps-index` is a script that formats the FASTA headers and generates the GEM indexes required by `taxMaps`.  
@@ -464,31 +448,34 @@ You can do a dry run of taxMaps by using the flag `--dry`. taxMaps will create t
 `--version` show program's version number and exit.  
 `-h`,`--help` show this help message and exit.  
 
-`-i` Input fasta file (Mandatory). [`FILE`]  
-`-c` Correspondence file gi -> tax file (Mandatory). [`FILE`]  
-`-t` Taxonomy table file (Mandatory). [`FILE`]  
-`-p` Prefix for output files (Mandatory). [`STR`]  
+`-i` Input fasta file. [`FILE, Mandatory`]  
+`-c` Correspondence file gi -> tax file. [`FILE, Mandatory`]  
+`-t` Taxonomy table file. [`FILE, Mandatory`]  
+`-p` Prefix for output files. [`STR, Mandatory`]  
 
-`-T` Number of threads (default = 1). [`INT`]  
-`-Q` Cluster queue/partition (default = None). [`STR`]  
-`-S` Cluster slots (default = 1). [`INT`]  
+`-T` Number of threads. [`INT, 1`]  
+`-Q` Cluster queue/partition. [`STR, `]  
+`-S` Cluster slots. [`INT, 1`]  
 
-`--dry` Dry run (default = False). [`FLAG`]  
+`--dry` Dry run. [`FLAG, OFF`]  
 
 ### ./bin    
-`txM_fastalen` computes the length of FASTA sequences.   
-`txM_fq2gem` converts FASTQ into GEM mapper format.  
-`txM_fqhtrim` trims from the 3' end of the reads down to the specified length.  
-`txM_fqintlv` interleaves read1 and read2 FASTQ files  
-`txM_fqltrim` trims the specified number of bases from the 5' end of reads.  
-`txM_fqminlen` filters reads taht are shorter than the specified read length.  
-`txM_gem2fq` converts GEM mapper format into FASTQ.  
-`txM_gitax` formats FASTA header for taxMaps.  
-`txM_lca` given a GEM mapper output, computes the LCA for all reads, either in sigle-end or paired-end mode.  
-`txM_mapout` filters mapped reads.  
-`txM_mergintlv` merges and interleaves multiple GEM mapper files assuming the pairing order is consistent across files.   
-`txM_report` generates a tables and plots with the number of reads mapping for all represented taxa.  
-`txM_sge` submits jobs specified in a custom format to a SGE cluster.  
-`txM_summary` generates a summary per taxon with hits, including edit distance and coverage histogram. 
+`txM_fastalen` Computes the length of `FASTA` sequences.   
+`txM_fq2gem` Converts `FASTQ` into GEM mapper format.  
+`txM_fqhtrim` Trims from the 3' end of the reads down to the specified length.  
+`txM_fqintlv` Interleaves read1 and read2 FASTQ files.  
+`txM_fqltrim` Trims the specified number of bases from the 5' end of reads.  
+`txM_fqmate` Checks for presence of mates when extracting from `BAM`.   
+`txM_fqminlen` Filters reads taht are shorter than the specified read length.  
+`txM_fqsample` Downsamples `FASTQ`.  
+`txM_gem2fq` Converts GEM mapper format into `FASTQ`.  
+`txM_gitax` Formats `FASTA` header for taxMaps.  
+`txM_lca` Given a GEM mapper output, computes the LCA for all reads, either in sigle-end or paired-end mode.  
+`txM_mapout` Filters mapped reads.  
+`txM_mergintlv` Merges and interleaves multiple GEM mapper files assuming the pairing order is consistent across files.   
+`txM_report` Generates a tables and plots with the number of reads mapping for all represented taxa.  
+`txM_rescore` Rescores alignments from edit to Levenshtein distance.  
+`txM_sge` Submits jobs specified in a custom format to a SGE cluster.  
+`txM_summary` Generates a summary per taxon with hits, including edit distance and coverage histogram. 
 
 
diff --git a/bin/txM_fastalen b/bin/txM_fastalen
index ceec338..9e6db72 100755
--- a/bin/txM_fastalen
+++ b/bin/txM_fastalen
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog v0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_fq2gem b/bin/txM_fq2gem
index 6b31679..7235616 100755
--- a/bin/txM_fq2gem
+++ b/bin/txM_fq2gem
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog v0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_fqhtrim b/bin/txM_fqhtrim
index bf163ca..8ce5a8b 100755
--- a/bin/txM_fqhtrim
+++ b/bin/txM_fqhtrim
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog [options]", version = "%prog 0.1")
+parser = OptionParser(usage = "\n%prog [options]", version = "%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_fqintlv b/bin/txM_fqintlv
index 3211266..e7da7d9 100755
--- a/bin/txM_fqintlv
+++ b/bin/txM_fqintlv
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -23,7 +23,9 @@
 ### MODULES ####################################################################
 
 from optparse import OptionParser
-import sys, gzip
+from sys import stdin, stdout, stderr
+from subprocess import Popen, PIPE, STDOUT
+from itertools import islice
 
 ################################################################### /MODULES ###
 ################################################################################
@@ -33,6 +35,14 @@ import sys, gzip
 ################################################################################
 ### FUNCTIONS ##################################################################
 
+def p2o(ps):
+	while True:
+		r1 = [line for line in islice(ps[0].stdout, 4)]
+		if not r1:
+			break
+		r2 = [line for line in islice(ps[1].stdout, 4)]
+		stdout.write(''.join(r1+r2))
+			
 ################################################################# /FUNCTIONS ###
 ################################################################################
 
@@ -41,32 +51,28 @@ import sys, gzip
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog [options]", version = "%prog 0.1")
+parser = OptionParser(usage = "\n%prog -1 reads.r1.fq -2 read.r2.fq", version = "%prog v0.2")
 
 parser.add_option(
 	"-1",
 	metavar = "FILE",
 	type = "string",
-	dest = "r1_file",
+	dest = "r1_files",
 	default = None,
-	help = "Read 1 filename (mandatory, .fq, .fastq, .fq.gz, .fastq.gz)"
+	help = "Read 1 filenames. CSVs (mandatory, .fq, .fastq, .fq.gz, .fastq.gz - all of the same type)"
 	)
 
 parser.add_option(
 	"-2",
 	metavar = "FILE",
 	type = "str",
-	dest = "r2_file",
+	dest = "r2_files",
 	default = None,
-	help = "Read 2 filename. (mandatory, .fq, .fastq, .fq.gz, .fastq.gz)"
+	help = "Read 2 filenames. CSVs (mandatory, .fq, .fastq, .fq.gz, .fastq.gz - all of the same type)"
 	)
 
 (opt, args) = parser.parse_args()
-
-if opt.r1_file == None or opt.r2_file == None:
-	parser.print_help()
-	sys.exit(-1)
-
+     
 ######################################################### /ARGUMENTS,OPTIONS ###
 ################################################################################
 
@@ -82,27 +88,15 @@ if opt.r1_file == None or opt.r2_file == None:
 
 ################################################################################
 ### MAIN #######################################################################
-			
-if __name__ == "__main__":
-	if opt.r1_file[-3:] == '.gz':
-		r1_file = gzip.open(opt.r1_file, 'r')
-	else: 
-		r1_file = open(opt.r1_file, 'r')
-	if opt.r2_file[-3:] == '.gz':
-		r2_file = gzip.open(opt.r2_file, 'r')
-	else: 
-		r2_file = open(opt.r2_file, 'r')
-	while True:
-		for t in xrange(4):
-			l1 = r1_file.readline()	
-			sys.stdout.write(l1)
-		for t in xrange(4):
-			l2 = r2_file.readline()
-			sys.stdout.write(l2)
-		if not l1 or not l2:
-			break	
-	r1_file.close()
-	r2_file.close()
+
+if __name__ == '__main__':
+	r1s = opt.r1_files.split(',')
+	r2s = opt.r2_files.split(',')
+	read_cmd = ['cat','zcat'][r1s[0][-3:]=='.gz']
+	ps = []
+	ps.append(Popen([read_cmd] + r1s, stdin=None, stdout=PIPE, shell=False))
+	ps.append(Popen([read_cmd] + r2s, stdin=None, stdout=PIPE, shell=False))	
+	p2o(ps)
 
 ###################################################################### /MAIN ###
 ################################################################################
diff --git a/bin/txM_fqltrim b/bin/txM_fqltrim
index 82389a5..f57fb72 100755
--- a/bin/txM_fqltrim
+++ b/bin/txM_fqltrim
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog [options]", version = "%prog 0.1")
+parser = OptionParser(usage = "\n%prog [options]", version = "%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_fqmate b/bin/txM_fqmate
new file mode 100755
index 0000000..c23ad0e
--- /dev/null
+++ b/bin/txM_fqmate
@@ -0,0 +1,98 @@
+#!/usr/bin/env python 
+
+################################################################################
+### COPYRIGHT ##################################################################
+
+# New York Genome Center
+
+# SOFTWARE COPYRIGHT NOTICE AGREEMENT
+# This software and its documentation are copyright (2014) by the New York
+# Genome Center. All rights are reserved. This software is supplied without
+# any warranty or guaranteed support whatsoever. The New York Genome Center
+# cannot be responsible for its use, misuse, or functionality.
+
+# Version: 0.2
+# Author: Andre Corvelo
+
+################################################################# /COPYRIGHT ###
+################################################################################
+
+
+
+################################################################################
+### MODULES ####################################################################
+
+from optparse import OptionParser
+import sys
+from itertools import islice
+
+
+################################################################### /MODULES ###
+################################################################################
+
+
+
+################################################################################
+### FUNCTIONS ##################################################################
+
+################################################################# /FUNCTIONS ###
+################################################################################
+
+
+
+################################################################################
+### ARGUMENTS,OPTIONS ##########################################################
+
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
+
+parser.add_option(
+	"-i",
+	metavar = "FILE",
+	type = "string",
+	dest = "input_file",
+	default = 'stdin',
+	help = "Input FASTQ file (default = 'stdin')"
+	)
+
+(opt, args) = parser.parse_args()
+
+######################################################### /ARGUMENTS,OPTIONS ###
+################################################################################
+
+
+
+################################################################################
+### CONSTANTS ##################################################################
+
+################################################################# /CONSTANTS ###
+################################################################################
+
+
+
+################################################################################
+### MAIN #######################################################################
+
+if __name__ == "__main__":
+	if opt.input_file != 'stdin':
+		fq = open(opt.input_file, 'r')
+	else:
+		fq = sys.stdin
+
+	r0 = None
+	rid0 = None
+	while True:
+		r = [line for line in islice(fq, 4)]
+		if not r:
+			break
+		rid = r[0].replace(' ', '/').split('/')[0]
+		if rid == rid0:
+			sys.stdout.write(''.join(r0 + r))
+		else:
+			rid0 = rid
+			r0 = r
+
+	if opt.input_file != 'stdin':
+		fq.close()
+	
+###################################################################### /MAIN ###
+################################################################################
diff --git a/bin/txM_fqminlen b/bin/txM_fqminlen
index 51a92bc..5313117 100755
--- a/bin/txM_fqminlen
+++ b/bin/txM_fqminlen
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog 0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_fqsample b/bin/txM_fqsample
new file mode 100755
index 0000000..c55821b
--- /dev/null
+++ b/bin/txM_fqsample
@@ -0,0 +1,114 @@
+#!/usr/bin/env python 
+
+################################################################################
+### COPYRIGHT ##################################################################
+
+# New York Genome Center
+
+# SOFTWARE COPYRIGHT NOTICE AGREEMENT
+# This software and its documentation are copyright (2014) by the New York
+# Genome Center. All rights are reserved. This software is supplied without
+# any warranty or guaranteed support whatsoever. The New York Genome Center
+# cannot be responsible for its use, misuse, or functionality.
+
+# Version: 0.2
+# Author: Andre Corvelo
+
+################################################################# /COPYRIGHT ###
+################################################################################
+
+
+
+################################################################################
+### MODULES ####################################################################
+
+from optparse import OptionParser
+import sys
+from random import uniform
+from itertools import islice
+
+
+################################################################### /MODULES ###
+################################################################################
+
+
+
+################################################################################
+### FUNCTIONS ##################################################################
+
+################################################################# /FUNCTIONS ###
+################################################################################
+
+
+
+################################################################################
+### ARGUMENTS,OPTIONS ##########################################################
+
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
+
+parser.add_option(
+	"-i",
+	metavar = "FILE",
+	type = "string",
+	dest = "input_file",
+	default = 'stdin',
+	help = "Input FASTQ file (default = 'stdin')"
+	)
+
+parser.add_option(
+	"-s",
+	metavar = "FLOAT",
+	type = "float",
+	dest = "prob",
+	default = 1,
+	help = "Sampling probability (default = 1)"
+	)
+
+parser.add_option(
+	"-p",
+	metavar = "FLAG",
+	dest = "paired",
+	action = 'store_true',
+	default = False,
+	help = "Paired sampling (only for interleaved input; default = False)"
+	)
+
+(opt, args) = parser.parse_args()
+
+######################################################### /ARGUMENTS,OPTIONS ###
+################################################################################
+
+
+
+################################################################################
+### CONSTANTS ##################################################################
+
+################################################################# /CONSTANTS ###
+################################################################################
+
+
+
+################################################################################
+### MAIN #######################################################################
+
+if __name__ == "__main__":
+	nl = 4
+	if opt.paired:
+		nl = 8
+
+	if opt.input_file != 'stdin':
+		fq = open(opt.input_file, 'r')
+	else:
+		fq = sys.stdin
+	while True:
+		u = [line for line in islice(fq, nl)]
+		if not u:
+			break
+		if uniform(0,1) < opt.prob:
+			sys.stdout.write(''.join(u))
+
+	if opt.input_file != 'stdin':
+		fq.close()
+	
+###################################################################### /MAIN ###
+################################################################################
diff --git a/bin/txM_gem2fq b/bin/txM_gem2fq
index 6c1ee4d..f15d418 100755
--- a/bin/txM_gem2fq
+++ b/bin/txM_gem2fq
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog [options]", version = "%prog 0.1")
+parser = OptionParser(usage = "\n%prog [options]", version = "%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_gitax b/bin/txM_gitax
index 34060e6..7c94f23 100755
--- a/bin/txM_gitax
+++ b/bin/txM_gitax
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys, uuid, os
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog v0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i", 
diff --git a/bin/txM_lca b/bin/txM_lca
index 08a2f7b..5f562ce 100755
--- a/bin/txM_lca
+++ b/bin/txM_lca
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -107,7 +107,7 @@ def p_pair(pl1, pl2, tax_d):
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog -t tax file", version="%prog 0.1")
+parser = OptionParser(usage="\n%prog -t tax file", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_mapout b/bin/txM_mapout
index 8e6abaa..4f79427 100755
--- a/bin/txM_mapout
+++ b/bin/txM_mapout
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog 0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/bin/txM_mergintlv b/bin/txM_mergintlv
index 584358f..2a9929e 100755
--- a/bin/txM_mergintlv
+++ b/bin/txM_mergintlv
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -42,7 +42,7 @@ import Queue
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog [options] [map files...]", version = "%prog 0.1")
+parser = OptionParser(usage = "\n%prog [options] [map files...]", version = "%prog v0.2")
 
 (opt, args) = parser.parse_args()
         
diff --git a/bin/txM_report b/bin/txM_report
index e22be7d..4171463 100755
--- a/bin/txM_report
+++ b/bin/txM_report
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -42,7 +42,7 @@ import math, os
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog -t tax file", version="%prog 0.1")
+parser = OptionParser(usage="\n%prog -t tax file", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
@@ -67,8 +67,8 @@ parser.add_option(
 	metavar = "FLOAT",
 	type = "float",
 	dest = "cutoff",
-	default = 0.00001,
-	help = "Reporting cutoff (if < 1: minimum fraction from the total number reads or pairs ; if >= 1: minimum number of reads or pairs; default = 0.00001)"
+	default = 1,
+	help = "Reporting cutoff (if < 1: minimum fraction from the total number reads or pairs ; if >= 1: minimum number of reads or pairs; default = 1)"
 	)
 
 parser.add_option(
@@ -89,27 +89,9 @@ parser.add_option(
 	help = "Output prefix (Mandatory)"
 	)
 
-parser.add_option(
-	"-l",
-	metavar = "STR",
-	type = "string",
-	dest = "max_level",
-	default = None,
-	help = "Lowest taxonomic level e.g. 'species' (default = None)"
-	)
-
-parser.add_option(
-	"-u",
-	metavar = "STR",
-	type = "string",
-	dest = "under",
-	default = None,
-	help = "Only report taxa under this taxid (default = None)"
-	)
-
 (opt, args) = parser.parse_args()
 
-if opt.tax_file == None or opt.cutoff == 0 or opt.d_mode not in ['s', 'p', 'P'] or opt.out_prefix == None:
+if opt.tax_file == None or opt.d_mode not in ['s', 'p', 'P'] or opt.out_prefix == None:
 	parser.print_help()
 	exit(-1)
      
@@ -121,19 +103,6 @@ if opt.tax_file == None or opt.cutoff == 0 or opt.d_mode not in ['s', 'p', 'P']
 ################################################################################
 ### CONSTANTS ##################################################################
 
-levels = ['subspecies',
-		  'species',
-		  'genus',
-		  'family',
-		  'order',
-		  'class',
-		  'phylum',
-		  'kingdom',
-		  'superkingdom'
-		  ]
-
-specials = ['0', '1', '-']
-
 ################################################################# /CONSTANTS ###
 ################################################################################
 
@@ -149,11 +118,13 @@ if __name__ == '__main__':
 	for l in tax_file:
 		la = l.strip().split('\t')
 		la[2] = la[2].replace(':','-')
-		tax_d[la[0]] = la + [0, None, 0, [], False,0]
-	tax_d['0'] = ['0', 'no rank', 'unmapped', '0', 0, None, 0, [], False,0]
-	tax_d['1'] = ['1', 'no rank', 'mapped', '1', 0, None, 0, [], False,0]
-	tax_d['-'] = ['-', 'no rank', 'filtered out', '0:-', 0, None, 0, [], False,0]
+		la[3] = la[3].split(':')
+		tax_d[la[0]] = la + [0, 0, 0]
 	tax_file.close()
+	tax_d['0'] = ['0', 'no rank', 'Unclassified', ['0'], 0, 0, 0]
+	tax_d['1'] = ['1', 'no rank', 'Classified', ['1'], 0, 0, 0]
+	tax_d['-'] = ['-', 'no rank', 'Filtered out', ['0','-'], 0, 0, 0]
+	
 	
 	# read rep file
 	if opt.rep_file != 'stdin':
@@ -163,61 +134,47 @@ if __name__ == '__main__':
 	for l in rep_file:
 		la = l.strip().split('\t')
 		tax_id = la[0] 
-		path = tax_d[tax_id][3].split(':')
-		if not opt.under or opt.under in path:
-			if opt.d_mode == 's':
-				counts = int(la[4])
-			elif opt.d_mode == 'p':
-				counts = int(la[7]) + int(la[8])
-			elif opt.d_mode == 'P':
-				counts = int(la[7])
-			tax_d[tax_id][9] = counts
-			for node in path:
-				if not opt.under or opt.under in tax_d[node][3].split(':'):
-					tax_d[node][4] += counts
+		path = tax_d[tax_id][3]
+		if opt.d_mode == 's':
+			counts = int(la[4])
+		elif opt.d_mode == 'p':
+			counts = int(la[7]) + int(la[8])
+		elif opt.d_mode == 'P':
+			counts = int(la[7])
+		tax_d[tax_id][4] = counts
+		for node in path:
+			tax_d[node][5] += counts
+			tax_d[node][6] += counts
 	if opt.rep_file != 'stdin':
 		rep_file.close()
 	
-	if opt.under:
-		reads_n = tax_d[opt.under][4]
-	else:
-		reads_n = tax_d['1'][4] + tax_d['0'][4]	
+	reads_n = tax_d['1'][5] + tax_d['0'][5]	
 
 	if opt.cutoff < 1:
 		min_c = max([1, int(opt.cutoff*reads_n)])
-		dec = max([0, int(math.ceil(-math.log10(opt.cutoff)))])
 	else:
 		min_c = opt.cutoff
-		dec = max([0,int(math.ceil(-math.log10(min_c/reads_n)))])
-
+	
 	for tax_id in tax_d:
-		if tax_d[tax_id][4] >= min_c:
-			tax_d[tax_id][5] = (tax_id in specials or tax_d[tax_id][1] in levels)
-			path = tax_d[tax_id][3].split(':')
-			for node in path:
-				tax_d[node][6] += 1
-				if node in specials or tax_d[node][1] in levels or node == tax_id:
-					tax_d[tax_id][7].append(node)
-				if tax_d[node][1] == opt.max_level and node != tax_id:
-					tax_d[tax_id][8] = True
+		tax_d[tax_id][6] -= tax_d[tax_id][4]
+		path = tax_d[tax_id][3]
+		if tax_d[tax_id][5] > min_c and len(path) > 1 :
+			tax_d[path[-2]][6] -= tax_d[tax_id][5]
+
 
 	# output
 	out_tbl = open(opt.out_prefix + '.tbl', 'w')
 	out_krona = open(opt.out_prefix + '.krona.tree', 'w')
 	
-	for tax_id in sorted(tax_d.keys(), key = lambda k : map(lambda x : int(x.replace('-','0')), tax_d[k][3].split(':'))):
-		if tax_d[tax_id][9] or (tax_d[tax_id][4] >= min_c and (tax_d[tax_id][5] or tax_d[tax_id][6] == 1) and not tax_d[tax_id][8]):
-			
-			from_total = round(float(tax_d[tax_id][4])*100 / reads_n,dec - 2)
-			
-			out_tbl.write('\t'.join(tax_d[tax_id][:2] + [str(len(tax_d[tax_id][7])), ':'.join(tax_d[tax_id][7]), tax_d[tax_id][2], str(tax_d[tax_id][4]), str(from_total)]) + '\n')
+	for tax_id in sorted(tax_d.keys(), key = lambda x : [int(y.replace('-', '0')) for y in tax_d[x][3]]):
+		if tax_d[tax_id][5] >= min_c:
+			perc = float(tax_d[tax_id][5]) * 100 / reads_n
+			out_tbl.write('\t'.join(tax_d[tax_id][:2] + [str(len(tax_d[tax_id][3])), ':'.join(tax_d[tax_id][3]), tax_d[tax_id][2], str(tax_d[tax_id][5]), str(perc)]) + '\n')
 			
-			if opt.under:
-				if tax_d[tax_id][9]:
-					out_krona.write('\t'.join([str(tax_d[tax_id][9])] + map(lambda x : tax_d[x][2], tax_d[tax_id][7][tax_d[tax_id][7].index(opt.under):])) + '\n')
-			else:
-				if tax_d[tax_id][9]: 
-					out_krona.write('\t'.join([str(tax_d[tax_id][9])] + map(lambda x : tax_d[x][2], tax_d[tax_id][7])) + '\n')
+			if tax_d[tax_id][4]:
+				out_krona.write('\t'.join([str(tax_d[tax_id][4])] + [tax_d[x][2] for x in tax_d[tax_id][3]]) + '\n')
+			if tax_d[tax_id][6]:
+				out_krona.write('\t'.join([str(tax_d[tax_id][6])] + [tax_d[x][2] for x in tax_d[tax_id][3]]+ ['Other']) + '\n')
 				
 	out_tbl.close()
 	out_krona.close()
diff --git a/bin/txM_rescore b/bin/txM_rescore
new file mode 100755
index 0000000..b4e5c23
--- /dev/null
+++ b/bin/txM_rescore
@@ -0,0 +1,105 @@
+#!/usr/bin/env python 
+
+################################################################################
+### COPYRIGHT ##################################################################
+
+# New York Genome Center
+
+# SOFTWARE COPYRIGHT NOTICE AGREEMENT
+# This software and its documentation are copyright (2014) by the New York
+# Genome Center. All rights are reserved. This software is supplied without
+# any warranty or guaranteed support whatsoever. The New York Genome Center
+# cannot be responsible for its use, misuse, or functionality.
+
+# Version: 0.2
+# Author: Andre Corvelo
+
+################################################################# /COPYRIGHT ###
+################################################################################
+
+
+
+################################################################################
+### MODULES ####################################################################
+
+from optparse import OptionParser
+from sys import stdin, stdout, stderr, exit
+from re import findall
+
+################################################################### /MODULES ###
+################################################################################
+
+
+
+################################################################################
+### FUNCTIONS ##################################################################
+	
+def getdist(aln_str):
+	return sum(int(s) for s in findall(r'(\d+)[\-|\+]', aln_str)) + len(findall(r'[A-z]', aln_str))
+
+
+def rescore(match_str):
+	match_list = match_str.split(',')
+	match_sorted = sorted((getdist(m.split(':')[3]),m) for m in match_list)
+	new_strata = [0] * (match_sorted[-1][0] + 1)
+	for m in match_sorted:
+		new_strata[m[0]] += 1
+	new_match = ','.join(m[1] for m in match_sorted)
+	return ':'.join(str(s) for s in new_strata), new_match
+	
+################################################################# /FUNCTIONS ###
+################################################################################
+
+
+
+################################################################################
+### ARGUMENTS,OPTIONS ##########################################################
+
+parser = OptionParser(usage="\n%prog -t tax file", version="%prog v0.2")
+
+parser.add_option(
+	"-i",
+	metavar = "FILE",
+	type = "string",
+	dest = "map_file",
+	default = 'stdin',
+	help="Input GEM alignment file (default = 'stdin')"
+	)
+
+(opt, args) = parser.parse_args()
+     
+######################################################### /ARGUMENTS,OPTIONS ###
+################################################################################
+
+
+
+################################################################################
+### CONSTANTS ##################################################################
+
+################################################################# /CONSTANTS ###
+################################################################################
+
+
+
+################################################################################
+### MAIN #######################################################################
+
+if __name__ == '__main__':
+	# read map file
+	if opt.map_file != 'stdin':
+		map_file = open(opt.map_file, 'r')
+	else:
+		map_file = stdin
+
+	for l in map_file:
+		rid, seq, qual, strata, match_str = l.strip().split('\t')
+		if match_str != '-' and  match_str != '.':
+			new_strata, new_match = rescore(match_str)
+			stdout.write('\t'.join([rid, seq, qual, new_strata, new_match]) + '\n')
+		else:
+			stdout.write(l)
+	if opt.map_file != 'stdin':
+		map_file.close()
+
+###################################################################### /MAIN ###
+################################################################################
diff --git a/bin/txM_sge b/bin/txM_sge
index 9cd70e7..f24a314 100755
--- a/bin/txM_sge
+++ b/bin/txM_sge
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -55,7 +55,7 @@ def submitt(pipe_name, job, submd, swd, logdir):
 		dep_l = job['depen'].split(':')
 		for dep in dep_l:
 			if dep not in submd:
-				print '\nUpstream jobs not submitted. Revise your .pipe file.'
+				print '\nUpstream jobs not submitted. Revise your .sge file.'
 				if submd:
 					print 'Killing previously submitted jobs:'
 					os.system('qdel ' + ','.join(submd.values()))
@@ -67,12 +67,16 @@ def submitt(pipe_name, job, submd, swd, logdir):
 	else:
 		deps = ''
 
+	# compute memory
+	job_mem = str(int(job['cpus'])*12)
+	
 	# create bash script
 	job_script = open(job_filename,'w')				
 	job_script.write('#!/bin/sh\n\n')
 	job_script.write('#$ -cwd' + '\n')
 	job_script.write('#$ -q ' + job['queue'] + '\n')
 	job_script.write('#$ -pe make ' + job['cpus'] + '\n')
+	job_script.write('#$ -l h_vmem=' + job_mem+ 'G\n')
 	job_script.write('#$ -o ' + job_filename + '.$JOB_ID.o\n')
 	job_script.write('#$ -e ' + job_filename + '.$JOB_ID.e\n')	
 	if deps:
@@ -109,7 +113,7 @@ def submitt(pipe_name, job, submd, swd, logdir):
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog -p pipefile", version="%prog 0.1")
+parser = OptionParser(usage="\n%prog -p pipefile", version="%prog v0.2")
 
 parser.add_option(
 	"-s",
diff --git a/bin/txM_summary b/bin/txM_summary
index 5334bd7..e2ac4cf 100755
--- a/bin/txM_summary
+++ b/bin/txM_summary
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -43,7 +43,7 @@ def get_best_n(strata_s, n_strata):
 	return (None, 0)
 
 def get_reflen(mstring):
-	return sum([int(s) for s in findall(r'\d+(?!\-)', mstring)]) + len(findall(r'[A-z]+', mstring))
+	return sum([int(s) for s in findall(r'\d+(?!\-)', mstring)]) + len(findall(r'[A-z]', mstring))
 
 def p_rec(l, tax_d, rep_d, max_edit, array_d, len_d, excl_taxa, comp_cov):
 	la = l.strip().split('\t')
@@ -104,7 +104,7 @@ def p_rec(l, tax_d, rep_d, max_edit, array_d, len_d, excl_taxa, comp_cov):
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog 0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i",
diff --git a/taxMaps b/taxMaps
index be821f9..e86d00d 100755
--- a/taxMaps
+++ b/taxMaps
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -23,7 +23,7 @@
 ### MODULES ####################################################################
 
 from optparse import OptionParser, OptionGroup
-import sys, os, errno, commands
+import sys, os, errno, commands, uuid
 
 ################################################################### /MODULES ###
 ################################################################################
@@ -47,6 +47,9 @@ def rp(path,start):
 def bn(path):
 	return os.path.basename(path)
 
+def missing_files(csv_file_list):
+	return sum(not os.path.isfile(f) for f in csv_file_list.split(','))
+
 ################################################################# /FUNCTIONS ###
 ################################################################################
 
@@ -55,7 +58,7 @@ def bn(path):
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog options", version = "%prog v0.1")
+parser = OptionParser(usage = "\n%prog options", version = "%prog v0.2")
 
 # input
 parser_input = OptionGroup(parser, "Input (MANDATORY one of the following: -i, -f, -b or (-1 and -2)")
@@ -105,6 +108,15 @@ parser_input.add_option(
 	help = "Input .fq, .fastq, .fq.gz, .fastq.gz read 2 file. In sync with, and of the same sort as, -1 input file for paired modes (MANDATORY)"
 	)
 
+parser_input.add_option(
+	"-D",
+	metavar = "FLOAT",
+	type = "string",
+	dest = "i_sprob",
+	default = "1",
+	help = "Downsampling probability (default = 1 = No sampling)"
+	)
+
 parser.add_option_group(parser_input)
 
 # preprocessing
@@ -201,8 +213,8 @@ parser_mapping.add_option(
 	metavar = "INT/FLOAT_LIST",
 	type = "string",
 	dest = "m_edits",
-	default = None,
-	help = "Edit distances. CSV (INT or FLOAT<1; default = 0.1)"
+	default = '0.2',
+	help = "Edit distances. CSV (INT or FLOAT<1; default = 0.2)"
 	)
 
 parser_mapping.add_option(
@@ -210,19 +222,10 @@ parser_mapping.add_option(
 	metavar = "INT_LIST",
 	type = "string",
 	dest = "m_cpus",
-	default = None,
+	default = '1',
 	help = "Number of CPUs. CSV (default = 1)"
 	)
 
-parser_mapping.add_option(
-	"-n",
-	metavar = "STR_LIST",
-	type = "string",
-	dest = "m_names",
-	default = None,
-	help = "Index names. CSV (default = index basename(s) without extension)"
-	)
-
 parser.add_option_group(parser_mapping)
 
 # taxonomy
@@ -268,31 +271,13 @@ parser_rep.add_option(
 	help = "Excluded taxids. CSV (only if --cov flag ON)"
 	)
 
-parser_rep.add_option(
-	"-r",
-	metavar = "STR",
-	type = "string",
-	dest = "r_level",
-	default = None,
-	help = "Lowest taxonomic level e.g. 'species' (default = None)"
-	)
-
 parser_rep.add_option(
 	"-z",
 	metavar = "FLOAT",
 	type = "string",
 	dest = "r_cutoff",
-	default = '0.00001',
-	help = "Reporting cutoff (INT or FLOAT<1; default = 0.00001)"
-	)
-
-parser_rep.add_option(
-	"-u",
-	metavar = "STR_LIST",
-	type = "string",
-	dest = "r_detail",
-	default = None,
-	help = "Create additional reports for these taxids. CSV (default = None)"
+	default = '1',
+	help = "Reporting cutoff (INT or FLOAT<1; default = 1)"
 	)
 
 parser.add_option_group(parser_rep)
@@ -305,8 +290,8 @@ parser_misc.add_option(
 	metavar = "STR",
 	type = "string",
 	dest = "m_prefix",
-	default = "sample",
-	help = "Sample prefix (default = sample)"
+	default = str(uuid.uuid4())[:8],
+	help = "Sample prefix (default = random 8-char UUID)"
 	)
 
 parser_misc.add_option(
@@ -347,107 +332,88 @@ parser_misc.add_option(
 
 parser.add_option_group(parser_misc)
 
-
 (opt, args) = parser.parse_args()
 
 
-ecounter = 0
+### check options
+error_list = []
+warning_list = []
+
 # input
+input_list = []
 if not opt.i_comm and not opt.i_fq and not opt.i_bam and (not opt.i_fq_1 or not opt.i_fq_2):
-	sys.stdout.write('\nERROR: Missing input [-i, -f, -b, -1, -2]')
-	ecounter += 1 
-else:
-	icounter = 0
-	inputs = []
-	if opt.i_comm:
-		icounter += 2 
-		inputs.append('-i ' + opt.i_comm)
-	if opt.i_fq:
-		icounter += 2 
-		inputs.append('-f ' + opt.i_fq)
-	if opt.i_bam:
-		icounter += 2 
-		inputs.append('-b ' + opt.i_bam)
-	if opt.i_fq_1:
-		icounter += 1 
-		inputs.append('-1 ' + opt.i_fq_1)
-	if opt.i_fq_2:
-		icounter += 1 
-		inputs.append('-2 ' + opt.i_fq_2)
-	if icounter > 2:
-		ecounter += 1
-		sys.stdout.write('\nERROR: Too many inputs [' + ' '.join(inputs) + ']')
+	error_list.append('ERROR: Missing valid input [-i | -b | -f | (-1 & -2)]')
+if opt.i_comm:
+	input_list.append('-i')
+if opt.i_fq:
+	if missing_files(opt.i_fq):
+		error_list.append('ERROR: File(s) not found in [-f ' + opt.i_fq + ']')
 	else:
-		if opt.i_fq:
-			if not os.path.isfile(opt.i_fq):
-				sys.stdout.write('\nERROR: Specified input file \'' + opt.i_fq + '\' does not exist [-f ' + opt.i_fq + ']')
-				ecounter += 1
-		if opt.i_bam:
-			if not os.path.isfile(opt.i_bam):
-				sys.stdout.write('\nERROR: Specified input file \'' + opt.i_bam + '\' does not exist [-b ' + opt.i_bam + ']')
-				ecounter += 1
-			if not os.path.isfile(opt.i_bam + '.bai'):
-				sys.stdout.write('\nERROR: Accessory index file \'' + opt.i_bam + '.bai\' does not exist [-b ' + opt.i_bam + ']')
-				ecounter += 1
-			
-		if opt.i_fq_1:
-			if not os.path.isfile(opt.i_fq_1):
-				sys.stdout.write('\nERROR: Specified input file \'' + opt.i_fq_1 + '\' does not exist [-1 ' + opt.i_fq_1 + ']')
-				ecounter += 1 
-		if opt.i_fq_2:
-			if not os.path.isfile(opt.i_fq_2):
-				sys.stdout.write('\nERROR: Specified input file \'' + opt.i_fq_2 + '\' does not exist [-2 ' + opt.i_fq_2 + ']')
-				ecounter += 1 
+		input_list.append('-f')
+if opt.i_bam:
+	if missing_files(opt.i_bam):
+		error_list.append('ERROR: File(s) not found in [-b ' + opt.i_bam + ']')
+	else:
+		input_list.append('-b')
+if opt.i_fq_1 and opt.i_fq_2:
+	if len(opt.i_fq_1.split(',')) != len(opt.i_fq_2.split(',')):
+		error_list.append('ERROR: Different number of R1 an R2 files [-1 ' + opt.i_fq_1 + ' -2 ' + opt.i_fq_2 + ']')
+	else:
+		if missing_files(opt.i_fq_1) or missing_files(opt.i_fq_2):
+			error_list.append('ERROR: File(s) not found in [-1 ' + opt.i_fq_1 + ' -2 ' + opt.i_fq_2 + ']')
+		else:
+			input_list.append('(-1 & -2)')
+if len(input_list) > 1:
+	error_list.append('ERROR: Too many inputs [' + ' & '.join(input_list) + ']')
 		
+	
 # mapping
-if not opt.m_indices:
-	sys.stdout.write('\nERROR: Missing index(ices) [-d]')
-	ecounter += 1
-else:
-	for i in opt.m_indices.split(','):
-		if not os.path.isfile(i):
-			sys.stdout.write('\nERROR: Specified index \'' + i + '\' file does not exist [-d ' + opt.m_indices + ']')
-			ecounter += 1 
-
-if opt.m_edits:
-	if len(opt.m_edits.split(',')) != len(opt.m_indices.split(',')):
-			sys.stdout.write('\nERROR: Number of specified edit distances does not match number of specified indices [-e ' + opt.m_edits + ' given -d ' + opt.m_indices + ']')
-			ecounter += 1 
-
-if opt.m_cpus:
-	if len(opt.m_cpus.split(',')) != len(opt.m_indices.split(',')):
-			sys.stdout.write('\nERROR: Number of specified CPUs does not match number of specified indices [-c ' + opt.m_cpus + ' given -d ' + opt.m_indices + ']')
-			ecounter += 1 
-		
-if opt.m_names:
-	if len(opt.m_names.split(',')) != len(opt.m_indices.split(',')):
-			sys.stdout.write('\nERROR: Number of specified names does not match number of specified indices [-n ' + opt.m_names + ' given -d ' + opt.m_indices + ']')
-			ecounter += 1 
+n_indexes = 0
+if opt.m_indices:
+	if missing_files(opt.m_indices):
+		error_list.append('ERROR: File(s) not found in [-d ' + opt.m_indices + ']')
+	else:
+		n_indexes = len(opt.m_indices.split(','))
+if not n_indexes:
+	error_list.append('ERROR: Missing valid index(es) [-d]')
+
+n_dist = len(opt.m_edits.split(','))
+if n_dist != n_indexes and n_indexes:
+	if n_dist == 1 :
+		warning_list.append('WARNING: Setting the maximum edit distance for all indexes [-e ' + ','.join([opt.m_edits]*n_indexes) + ']')
+	else:
+		error_list.append('ERROR: ' + str(n_dist) + ' distances specified [-e ' + opt.m_edits + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']')
+
+n_cpus = len(opt.m_cpus.split(','))
+if n_cpus != n_indexes and n_indexes:
+	if n_cpus == 1 :
+		warning_list.append('WARNING: Setting the number of CPUs for all indexes [-c ' + ','.join([opt.m_cpus]*n_indexes) + ']')
+	else:
+		error_list.append('ERROR: ' + str(n_cpus) + ' CPU specifications [-c ' + opt.m_cpus + '], incompatible with ' + str(n_indexes) + ' indexes [-d ' + opt.m_indices + ']')  
 
 
 # taxonomy
-if not opt.t_table:
-	sys.stdout.write('\nERROR: Missing taxonomic table [-t]')
-	ecounter += 1
+if opt.t_table:
+	if missing_files(opt.t_table):
+		error_list.append('ERROR: File not found [-t ' + opt.t_table + ']')
 else:
-	if not os.path.isfile(opt.t_table):
-		sys.stdout.write('\nERROR: Specified taxonomic table file \'' + opt.t_table + '\' does not exist [-t ' + opt.t_table + ']')
-		ecounter += 1 
-	
+	error_list.append('ERROR: Missing taxonomic table [-t]')
+
+
 if opt.t_mode not in ['s', 'p', 'P']:
-	sys.stdout.write('\nERROR: Invalid mode [-m ' + opt.t_mode + ']')
-	ecounter += 1 
+	error_list.append('ERROR: Invalid mode [-m ' + opt.t_mode + ']')
+
 	
 # misc 
 if opt.m_queue:
 	if opt.m_queue not in commands.getoutput('qstat -g c | egrep "CLUSTER|\--" -v | awk \'{print $1}\'').split('\n'): 
-		sys.stdout.write('\nERROR: Specified queue does not exist [-q ' + opt.m_queue + ']')
+		error_list.append('ERROR: Specified queue does not exist [-q ' + opt.m_queue + ']')
 		ecounter += 1 
 
 
-
-if ecounter:
-	sys.stdout.write('\n\n')
+sys.stderr.write('\n'.join(warning_list) + '\n')
+if error_list:
+	sys.stderr.write('\n'.join(error_list) + '\n\n')
 	parser.print_help()
 	exit(-1)
   
@@ -471,31 +437,47 @@ if __name__ == '__main__':
 	# base dirs
 	module_dir = os.path.abspath(os.path.dirname(__file__))
 	start_dir = os.getcwd()
+	
+	prefix = opt.m_prefix
+
 	run_dir = os.path.abspath(opt.m_outdir)	
 	mkdir_p(run_dir)
 
+	res_dir = run_dir + '/txM.' + prefix + '.base'
+	mkdir_p(res_dir)
+
+	log_dir = run_dir + '/txM.' + prefix + '.log'
+	mkdir_p(log_dir)
+
+	map_dir = run_dir + '/txM.' + prefix + '.map'
+	mkdir_p(map_dir)
+
+	out_dir = run_dir + '/txM.' + prefix + '.out'
+	mkdir_p(out_dir)
+
+
 	# input
-	data_dir = run_dir + '/read_data'
-	mkdir_p(data_dir)
 	if opt.i_fq:
-		fq_file = os.path.abspath(opt.i_fq)
-		fq_lnk = data_dir + '/' + bn(fq_file)
-		os.system(' '.join(['ln', '-s', fq_file, fq_lnk]))
+		fq_files = [os.path.abspath(fq_file) for fq_file in opt.i_fq.split(',')]
+		fq_lnks = [res_dir + '/' + bn(fq_file) for fq_file in fq_files]
+		for x in xrange(len(fq_files)):
+			os.system(' '.join(['ln', '-s', fq_files[x], fq_lnks[x]]))
 	elif opt.i_bam:
 		bam_file = os.path.abspath(opt.i_bam)
-		bam_lnk = data_dir + '/' + bn(bam_file)
+		bam_lnk = res_dir + '/' + bn(bam_file)
 		os.system(' '.join(['ln', '-s', bam_file, bam_lnk]))
-		os.system(' '.join(['ln', '-s', bam_file + '.bai', bam_lnk + '.bai']))
 	elif opt.i_fq_1 and opt.i_fq_2:
-		fq1_file = os.path.abspath(opt.i_fq_1)
-		fq1_lnk = data_dir + '/' + bn(fq1_file)
-		os.system(' '.join(['ln', '-s', fq1_file, fq1_lnk]))
-		fq2_file = os.path.abspath(opt.i_fq_2)
-		fq2_lnk = data_dir + '/' + bn(fq2_file)
-		os.system(' '.join(['ln', '-s', fq2_file, fq2_lnk]))
+		fq1_files = [os.path.abspath(fq1_file) for fq1_file in opt.i_fq_1.split(',')]
+		fq1_lnks = [res_dir + '/' + bn(fq1_file) for fq1_file in fq1_files]
+		for x in xrange(len(fq1_files)):
+			os.system(' '.join(['ln', '-s', fq1_files[x], fq1_lnks[x]]))
+		fq2_files = [os.path.abspath(fq2_file) for fq2_file in opt.i_fq_2.split(',')]
+		fq2_lnks = [res_dir + '/' + bn(fq2_file) for fq2_file in fq2_files]
+		for x in xrange(len(fq2_files)):
+			os.system(' '.join(['ln', '-s', fq2_files[x], fq2_lnks[x]]))
 	elif opt.i_comm:
 		comm_str = opt.i_comm
-		comm_file = open(data_dir + '/input_stream.sh', 'w')
+		comm_file = open(res_dir + '/input_stream.sh', 'w')
 		comm_file.write('#!/bin/sh\n' + comm_str + ' > input_stream.fq\n')
 		comm_file.close()
 
@@ -508,42 +490,31 @@ if __name__ == '__main__':
 	q_offset = opt.p_qoff
 
 	# mapping
-	ind_dir = run_dir + '/indexes'
-	mkdir_p(ind_dir)
-	
-	index_files = opt.m_indices.split(',')
-	
-	len_files = map(lambda x: '.'.join(x.split('.')[:-1]) + '.len', index_files)
-	
-	if opt.m_names:
-		index_names = opt.m_names.split(',')
-	else:
-		index_names = map(lambda x : '.'.join(os.path.basename(x).split('.')[:-1]), index_files.split(','))
-	
-	if opt.m_edits:
-		index_edits = opt.m_edits.split(',')
-	else:
-		index_edits = ['0.1']*len(index_names)
-	max_edit = str(max(map(lambda x: float(x),index_edits)))
+   	index_files = opt.m_indices.split(',')
+   	len_files = ['.'.join(f.split('.')[:-1]) + '.len' for f in index_files]
 
-	if opt.m_cpus:
-		index_cpus = opt.m_cpus.split(',')
-	else:
-		index_cpus = ['1']*len(index_names)
+	index_names = ['.'.join(os.path.basename(f).split('.')[:-1]) for f in index_files]
 	
+	index_edits = opt.m_edits.split(',')
+	if len(index_edits) == 1:
+		index_edits = [index_edits[0]]*len(index_names)
+	max_edit = str(max([float(x) for x in index_edits]))
+
+	index_cpus = opt.m_cpus.split(',')
+	if len(index_cpus) == 1:
+		index_cpus = [index_cpus[0]]*len(index_names)
+		
 	index_lnks = []
 	len_lnks = []
 	for i in xrange(len(index_names)):
-		index_lnks.append(ind_dir + '/' + bn(index_files[i]))
-		len_lnks.append(ind_dir + '/' + bn(len_files[i]))
+		index_lnks.append(res_dir + '/' + bn(index_files[i]))
+		len_lnks.append(res_dir + '/' + bn(len_files[i]))
 		os.system(' '.join(['ln', '-s', os.path.abspath(index_files[i]), index_lnks[i]]))
 		os.system(' '.join(['ln', '-s', os.path.abspath(len_files[i]), len_lnks[i]]))
 	
 	# taxonomy
-	tax_dir = run_dir + '/taxonomy'
-	mkdir_p(tax_dir)
 	tax_file = os.path.abspath(opt.t_table)
-	tax_lnk = tax_dir + '/' + bn(tax_file)
+	tax_lnk = res_dir + '/' + bn(tax_file)
 	os.system(' '.join(['ln', '-s', tax_file, tax_lnk]))
 	
 	det_mode = opt.t_mode
@@ -552,24 +523,13 @@ if __name__ == '__main__':
    	exc_taxa = opt.r_exc
 	
 	# report
-	low_level = opt.r_level
 	rep_cutoff = opt.r_cutoff
-	if opt.r_detail:
-		tax_detail = opt.r_detail.split(',')
 
 	# miscellaneous
-	prefix = opt.m_prefix
 	sge_queue = opt.m_queue
 	sge_slots = opt.m_slots
 	dry_run = opt.m_dry
 	
-	# out dirs
-	map_dir = run_dir + '/map'
-	mkdir_p(map_dir)
-	out_dir = run_dir + '/out'
-	mkdir_p(out_dir)
-	log_dir = run_dir + '/log'
-	mkdir_p(log_dir)
 	
 	#sh script and pipe
 	sh_filename = run_dir + '/txM.' + prefix + '.sh'
@@ -588,20 +548,27 @@ if __name__ == '__main__':
 		input_command = comm_str
 	elif opt.i_fq:
 		if opt.i_fq[-3:] == '.gz':
-			input_command = 'zcat ' + rp(fq_lnk, run_dir)
+			input_command = 'zcat ' + ' '.join([rp(fq_lnk, run_dir) for fq_lnk in fq_lnks]) 
 		else:
-			input_command = 'cat ' + rp(fq_lnk, run_dir)
+			input_command = 'cat ' + ' '.join([rp(fq_lnk, run_dir) for fq_lnk in fq_lnks])
 	elif opt.i_bam:
 		stview_log = rp(log_dir + '/' + prefix + '.st_view.err', run_dir)
 		stbam2fq_log = rp(log_dir + '/' + prefix + '.st_bam2fq.err', run_dir)
 		bam = rp(bam_lnk, run_dir)
 		if opt.t_mode == 's':
-			input_command = 'samtools view -f4 -F256 ' + bam + ' -b 2> ' + stview_log + ' | samtools bam2fq - 2> ' + stbam2fq_log
+			input_command = 'samtools view -f4 -F256 ' + bam + ' -b 2> ' + stview_log + ' \\\n\t| samtools bam2fq - 2> ' + stbam2fq_log
 		else:
-			input_command = 'samtools view -f12 -F256 ' + bam + ' -b 2> ' + stview_log + ' | samtools bam2fq - 2> ' + stbam2fq_log
+			input_command = 'samtools view -f12 -F256 ' + bam + ' -b 2> ' + stview_log + ' \\\n\t| samtools bam2fq - 2> ' + stbam2fq_log
+			input_command += ' \\\n\t| txM_fqmate'
 	elif opt.i_fq_1 and opt.i_fq_2:
-		input_command = 'txM_fqintlv -1 ' + rp(fq1_lnk, run_dir) + ' -2 ' + rp(fq2_lnk, run_dir)
-	input_command += ' | '
+		input_command = 'txM_fqintlv -1 ' + ','.join([rp(fq1_lnk, run_dir) for fq1_lnk in fq1_lnks]) + ' -2 ' + ','.join([rp(fq2_lnk, run_dir) for fq2_lnk in fq2_lnks])
+	input_command += ' \\\n\t| '
+	
+	if opt.i_sprob != '1':
+		if opt.t_mode == 's':
+			input_command += 'txM_fqsample -s ' + opt.i_sprob + ' \\\n\t| '
+		else:
+			input_command += 'txM_fqsample -p -s ' + opt.i_sprob + ' \\\n\t| '
 			
 	# preprocessing
 	filt_out = rp(map_dir + '/' + prefix + '.filtout.map', run_dir)
@@ -616,13 +583,13 @@ if __name__ == '__main__':
 			prep_command += '-q ' + cutadapt_qual + ' '
 		if q_offset:
 			prep_command += '--quality-base=64 '
-		prep_command += '2> ' + cutadapt_log + ' | '
+		prep_command += '2> ' + cutadapt_log + ' \\\n\t| '
 	if leftt_len:
-		prep_command += 'txM_fqltrim -l ' + leftt_len + ' | '
+		prep_command += 'txM_fqltrim -l ' + leftt_len + ' \\\n\t| '
 	if max_rlen:
-		prep_command += 'txM_fqhtrim -l ' + max_rlen + ' | '
+		prep_command += 'txM_fqhtrim -l ' + max_rlen + ' \\\n\t| '
 	if min_rlen:
-		prep_command += 'txM_fqminlen -m ' + min_rlen + ' 2> ' + filt_out + ' | '
+		prep_command += 'txM_fqminlen -m ' + min_rlen + ' 2> ' + filt_out + ' \\\n\t| '
 	#low complexity and Ns
 	if opt.p_lowc or opt.p_nrem != 100:
 		prinseq_log = rp(log_dir + '/' + prefix + '.prins.err', run_dir)
@@ -637,7 +604,7 @@ if __name__ == '__main__':
 			prep_command += '-ns_max_p ' + str(opt.p_nrem) + ' '
 		if q_offset:
 			prep_command += '-phred64 '
-		prep_command += '2> ' + prinseq_log + ' | '
+		prep_command += '2> ' + prinseq_log + ' \\\n\t| '
 
 	# mapping
 	gem_command = 'gem-mapper -q ignore --fast-mapping'
@@ -649,10 +616,10 @@ if __name__ == '__main__':
 		map_cpus = index_cpus[i]
 		map_out = rp(map_dir + '/' + prefix + '.' + index_names[i] + '.map', run_dir)
 		out_files.append(map_out)
-		map_str = gem_command + ' '.join([' -I', map_index, '-m', map_dist, '-e', map_dist, '-T', map_cpus, '2>', map_log, '| '])
+		map_str = gem_command + ' '.join([' -I', map_index, '-m', map_dist, '-e', map_dist, '-T', map_cpus, '2>', map_log, ' \\\n\t| txM_rescore \\\n\t| '])
 		map_str += 'txM_mapout 2> ' + map_out 
 		map_commands.append(map_str)
-	map_lnker = ' | ' + 'txM_gem2fq | ' 
+	map_lnker = ' \\\n\t| ' + 'txM_gem2fq \\\n\t| ' 
 	map_command = map_lnker.join(map_commands)
 	map_out = rp(map_dir + '/' + prefix + '.unmapped.map', run_dir)
 	out_files.append(map_out)
@@ -660,7 +627,7 @@ if __name__ == '__main__':
 	
 	#parse low complexity and Ns
 	if opt.p_lowc or opt.p_nrem != 100:
-		lc_command = 'cat ' + prinseq_out + '.fastq | txM_fq2gem > ' + prinseq_out2 + ' ; '
+		lc_command = 'cat ' + prinseq_out + '.fastq \\\n\t| txM_fq2gem > ' + prinseq_out2 + ' \n\n'
 		lc_command += 'rm ' + prinseq_out + '.fastq'
 
 	# taxonomy
@@ -669,32 +636,29 @@ if __name__ == '__main__':
 	else:
 		tax_command = 'txM_mergintlv '
 	tax_command += ' '.join(out_files)
-	tax_command += ' 2> /dev/null | ' 
+	tax_command += ' 2> /dev/null \\\n\t| ' 
 	tax_command +=  'txM_lca '
 	tax_command += '-t ' + rp(tax_lnk, run_dir) + ' '
 	if det_mode == 'p' or det_mode == 'P':
 		tax_command += '-p '
 	tax_command += '2> ' + rp(map_dir + '/' + prefix + '.merged.map.lca', run_dir)
-	tax_command += ' | '
+	tax_command += ' \\\n\t| '
 	tax_command += 'txM_summary '
 	tax_command += '-t ' + rp(tax_lnk, run_dir) + ' '
 	tax_command += '-e ' + max_edit + ' '
 	if comp_coverage:
 		tax_command += '-c '
-#		tax_command += '-l ' + rp(len_lnk, run_dir) + ' '
-		tax_command += '-l ' + ','.join(map(lambda x : rp(x, run_dir), len_lnks)) + ' ' 
+		tax_command += '-l ' + ','.join([rp(x, run_dir) for x in len_lnks]) + ' ' 
 		if exc_taxa:
 			tax_command += '-x ' + exc_taxa + ' '
 	tax_command += '2> /dev/null > ' + rp(out_dir + '/' + prefix + '.merged.map.lca.summary', run_dir)
 	
 	# reporting
-	rep_command = 'cat ' + rp(out_dir + '/' + prefix + '.merged.map.lca.summary', out_dir) + ' | '
+	rep_command = 'cat ' + rp(out_dir + '/' + prefix + '.merged.map.lca.summary', out_dir) + ' \\\n\t| '
 	rep_command += 'txM_report '
 	rep_command += '-t ' + rp(tax_lnk, out_dir) + ' '
 	rep_command += '-m ' + det_mode + ' '
 	rep_command += '-c ' + rep_cutoff + ' '
-	if low_level:
-		rep_command += '-l ' + low_level + ' '
 	rep_command += '-o ' + prefix + ' '
 
 	sh_file.write(input_command + prep_command + map_command + '\n\n')
@@ -702,22 +666,7 @@ if __name__ == '__main__':
 		sh_file.write(lc_command + '\n\n')
 	sh_file.write(tax_command + '\n\n')
 	sh_file.write('cd ' + rp(out_dir, run_dir) +'\n')
-	sh_file.write(rep_command + '\n')
-	
-	if opt.r_detail:
-		for tax in tax_detail:
-			detail_command = 'cat ' + rp(out_dir + '/' + prefix + '.merged.map.lca.summary', out_dir) + ' | '
-			detail_command += 'txM_report '
-			detail_command += '-t ' + rp(tax_lnk, out_dir) + ' '
-			detail_command += '-m ' + det_mode + ' '
-			detail_command += '-c ' + rep_cutoff + ' '
-			if low_level:
-				detail_command += '-l ' + low_level + ' '
-			detail_command += '-o ' + prefix + '.' + tax + ' '
-			detail_command += '-u ' + tax
-			
-			sh_file.write(detail_command + '\n')
-			
+	sh_file.write(rep_command + '\n')				
 	sh_file.close()   
 	
 	if sge_queue:
diff --git a/taxMaps-index b/taxMaps-index
index ab631e6..d7b6460 100755
--- a/taxMaps-index
+++ b/taxMaps-index
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -41,7 +41,7 @@ import sys, uuid, os
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage="\n%prog [options]", version="%prog v0.1")
+parser = OptionParser(usage="\n%prog [options]", version="%prog v0.2")
 
 parser.add_option(
 	"-i", 
diff --git a/taxMaps-taxtbl b/taxMaps-taxtbl
index 5d8e1fd..684a2af 100755
--- a/taxMaps-taxtbl
+++ b/taxMaps-taxtbl
@@ -11,7 +11,7 @@
 # any warranty or guaranteed support whatsoever. The New York Genome Center
 # cannot be responsible for its use, misuse, or functionality.
 
-# Version: 0.1
+# Version: 0.2
 # Author: Andre Corvelo
 
 ################################################################# /COPYRIGHT ###
@@ -47,7 +47,7 @@ def find_path(node, node_dict):
 ################################################################################
 ### ARGUMENTS,OPTIONS ##########################################################
 
-parser = OptionParser(usage = "\n%prog -n names.dmp -t nodes.dmp", version = "%prog 0.1")
+parser = OptionParser(usage = "\n%prog -n names.dmp -t nodes.dmp", version = "%prog v0.2")
 
 parser.add_option(
 	"-n",