diff --git a/src/read2tags.c b/src/read2tags.c
index 513c8074..a7124e4d 100644
--- a/src/read2tags.c
+++ b/src/read2tags.c
@@ -45,7 +45,8 @@ along with this program. If not, see .
* position record
*/
typedef struct {
- int record;
+ int record_from;
+ int record_to;
int from;
int to;
} pos_t;
@@ -129,30 +130,61 @@ static int bam_aux_cmp(const uint8_t *s, const uint8_t *d)
/*
* Parse a comma separated list of positions
- * Format is r:s:e,r:s:e,r:s:e,...
+ * Format is rf:rt:s:e,...
+ * or is r:s:e,... if rf and rt are the same
+ * or is s:e,... if rf and rt are both zero
*
- * where r is record number (0, 1 or 2 and is optional)
+ * where rf is record number to read (0, 1 or 2 and is optional)
+ * rt is record number to write (0, 1 or 2 and is optional)
* s is the start position in the read string
* e is the end position in the read string
* start and end positions are 1 (not zero) based
*/
-static void parse_positions(va_t *poslist, char *args)
+static void parse_positions(va_t *poslist, char *arg_string)
{
- char *argstr = strdup(args);
+ char *argstr = strdup(arg_string);
char *save_s;
char *s = strtok_r(argstr,",",&save_s);
while (s) {
pos_t *pos = calloc(1, sizeof(pos_t));
char *save_p;
- char *p = strtok_r(s,":",&save_p); if (p) pos->record = atoi(p);
- p = strtok_r(NULL,":",&save_p); if (p) pos->from = atoi(p);
- p = strtok_r(NULL,":",&save_p); if (p) pos->to = atoi(p);
- if (!p) {
- // looks like s:e format
- pos->to = pos->from; pos->from = pos->record; pos->record = 0;
+ int n = 0; // number of position arguments
+ int args[4];
+
+ char *p = strtok_r(s,":",&save_p); if (p) args[n++] = atoi(p);
+ p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p);
+ p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p);
+ p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p);
+
+ switch (n) {
+ case 2: // s:e
+ pos->record_from = 0;
+ pos->record_to = 0;
+ pos->from = args[0];
+ pos->to = args[1];
+ break;
+ case 3: // r:s:e
+ pos->record_from = args[0];
+ pos->record_to = args[0];
+ pos->from = args[1];
+ pos->to = args[2];
+ break;
+ case 4: // rf:rt:s:e
+ pos->record_from = args[0];
+ pos->record_to = args[1];
+ pos->from = args[2];
+ pos->to = args[3];
+ break;
+ default:
+ fprintf(stderr, "Invalid pos format: %s\n", arg_string);
+ exit(1);
}
- if (pos->record < 0 || pos->record > 2 || pos->from == 0 || pos->to == 0 || pos->from > pos->to) {
- fprintf(stderr,"Invalid pos argument: %s\n", args);
+
+ if (pos->record_from < 0 || pos->record_from > 2 ||
+ pos->record_to < 0 || pos->record_to > 2 ||
+ pos->from == 0 || pos->to == 0 ||
+ pos->from > pos->to) {
+ fprintf(stderr,"Invalid pos argument: %s\n", arg_string);
exit(1);
}
va_push(poslist,pos);
@@ -169,6 +201,8 @@ static void usage(FILE *write_to)
fprintf(write_to,
"Usage: bambi read2tags [options]\n"
"\n"
+"Convert portions of a read into tags\n"
+"\n"
"Options:\n"
" -i --input BAM file to read [default: stdin]\n"
" -o --output BAM file to output [default: stdout]\n"
@@ -178,13 +212,25 @@ static void usage(FILE *write_to)
" [default: " DEFAULT_KEEP_TAGS "]\n"
" -d --discard-tags comma separated list of tags to discard when merging records\n"
" [default: " DEFAULT_DISCARD_TAGS "]\n"
-" -p --positions comma separated list of positions\n"
+" -p --positions comma separated list of positions (see below)\n"
" -m --merge merge duplicate tags\n"
" -r --replace replace duplicate tags\n"
" -v --verbose verbose output\n"
" --input-fmt [sam/bam/cram] [default: bam]\n"
" --output-fmt [sam/bam/cram] [default: bam]\n"
" --compression-level [0..9]\n"
+"\n"
+" comma separated list of positions, where each position has the format:\n"
+" Format is rf:rt:s:e,...\n"
+" or is r:s:e,... if rf and rt are the same\n"
+" or is s:e,... if rf and rt are both zero\n"
+"\n"
+" where rf is record number to read (0, 1 or 2 and is optional)\n"
+" rt is record number to write (0, 1 or 2 and is optional)\n"
+" s is the start position in the read string\n"
+" e is the end position in the read string\n"
+" start and end positions are 1 (not zero) based\n"
+"\n"
);
}
@@ -447,17 +493,25 @@ static void shuffle(char *s)
/*
* add a new tag to our taglist, or append to existing tag
*/
-static void add_or_update(va_t *va, char *tag, char *data)
+static void add_or_update(va_t *va, char *tag, char *data, int r)
{
int n;
+ char recno = r + '0';
+ char key[4];
+
+ key[0] = recno;
+ key[1] = tag[0];
+ key[2] = tag[1];
+ key[3] = 0;
+
for (n=0; n < va->end; n++) {
- if (strncmp(tag,va->entries[n],2) == 0) break;
+ if (strncmp(key,va->entries[n],3) == 0) break;
}
if (n == va->end) {
// add new tag
- char *e = calloc(1, strlen(tag) + 1 + strlen(data) + 1);
- strcpy(e, tag);
+ char *e = calloc(1, strlen(key) + 1 + strlen(data) + 1);
+ strcpy(e, key);
strcat(e, ":");
strcat(e, data);
va_push(va,e);
@@ -507,31 +561,43 @@ static void add_tag(bam1_t *rec, char *tag, char *data, opts_t *opts)
}
/*
- * Process one record
+ * Process records
*/
-static bam1_t *process_record(bam1_t *rec, opts_t *opts)
+static void process_records(bam1_t *rec1, bam1_t *rec2, bam1_t **newrec, bam1_t **newrec2, opts_t *opts)
{
pos_t *pos;
- int recno = -1;
- char *tag_data = calloc(1, rec->core.l_qseq+1);
- char *qtag_data = calloc(1, rec->core.l_qseq+1);
+ int readno1 = -1;
+ int readno2 = -1;
+ char *tag_data = calloc(1, rec1->core.l_qseq+1);
+ char *qtag_data = calloc(1, rec1->core.l_qseq+1);
va_t *new_tags = va_init(10,free);
va_t *new_qtags = va_init(10,free);
- if (!(rec->core.flag & BAM_FPAIRED)) recno = 0;
- if (rec->core.flag & BAM_FREAD1) recno = 1;
- if (rec->core.flag & BAM_FREAD2) recno = 2;
+ if (!(rec1->core.flag & BAM_FPAIRED)) readno1 = 0;
+ if (rec1->core.flag & BAM_FREAD1) readno1 = 1;
+ if (rec1->core.flag & BAM_FREAD2) readno1 = 2;
- char *seq = get_read(rec);
- char *quality = get_quality(rec);
+ if (rec2) {
+ if (!(rec2->core.flag & BAM_FPAIRED)) readno2 = 0;
+ if (rec2->core.flag & BAM_FREAD1) readno2 = 1;
+ if (rec2->core.flag & BAM_FREAD2) readno2 = 2;
+ }
/*
* first pass - copy sections of read into tags
*/
for (int n=0; n < opts->poslist->end; n++) {
pos = opts->poslist->entries[n];
- if (pos->record == recno) {
+ if ( (pos->record_from == readno1) || (pos->record_from == readno2) ) {
+ bam1_t *rec = NULL;
+ if (pos->record_from == readno1) rec = rec1;
+ if (pos->record_from == readno2) rec = rec2;
+
if (pos->from <= rec->core.l_qseq) {
+
+ char *seq = get_read(rec);
+ char *quality = get_quality(rec);
+
int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from;
int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to;
int len = to - from + 1;
@@ -539,52 +605,82 @@ static bam1_t *process_record(bam1_t *rec, opts_t *opts)
// copy data from read
memset(tag_data,0,rec->core.l_qseq+1);
memcpy(tag_data, seq + from - 1, len);
- add_or_update(new_tags, opts->taglist->entries[n], tag_data);
+ add_or_update(new_tags, opts->taglist->entries[n], tag_data, pos->record_to);
// copy data from quality
memset(qtag_data,0,rec->core.l_qseq+1);
memcpy(qtag_data, quality + from - 1, len);
- add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data);
+ add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data, pos->record_to);
+ free(seq); free(quality);
}
}
}
// add new tags
for (int n=0; n < new_tags->end; n++) {
- char *tag = new_tags->entries[n]; tag[2] = 0;
+ bam1_t *rec = NULL;
+ int readno = (*(char*)(new_tags->entries[n]) - '0');
+ char *tag = new_tags->entries[n]+1; tag[2] = 0;
char *data = tag+3;
- add_tag(rec, tag, data, opts);
+ if (readno == readno1) rec = rec1;
+ else rec = rec2;
+ if (rec) add_tag(rec, tag, data, opts);
}
// add new quality tags
for (int n=0; n < new_qtags->end; n++) {
- char *tag = new_qtags->entries[n]; tag[2] = 0;
+ bam1_t *rec = NULL;
+ int readno = (*(char*)(new_qtags->entries[n]) - '0');
+ char *tag = new_qtags->entries[n]+1; tag[2] = 0;
char *data = tag+3;
- add_tag(rec, tag, data, opts);
+ if (readno == readno1) rec = rec1;
+ else rec = rec2;
+ if (rec) add_tag(rec, tag, data, opts);
}
/*
* second pass - mark sections of read as deleted
*/
+ char *seq = NULL;
+ char *quality = NULL;
+ char *seq1 = get_read(rec1);
+ char *quality1 = get_quality(rec1);
+ char *seq2 = rec2 ? get_read(rec2) : NULL;
+ char *quality2 = rec2 ? get_quality(rec2) : NULL;
+
for (int n=0; n < opts->poslist->end; n++) {
+ bam1_t *rec = NULL;
pos = opts->poslist->entries[n];
- if (pos->record == recno) {
- if (pos->from <= rec->core.l_qseq) {
- int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from;
- int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to;
- int len = to - from + 1;
- memset(seq + from - 1, 1, len); // mark as deleted
- memset(quality + from - 1, 1, len); // mark as deleted
- }
+ if (pos->record_from == readno1) { rec = rec1; seq = seq1; quality = quality1; }
+ if (pos->record_from == readno2) { rec = rec2; seq = seq2; quality = quality2; }
+
+ if (rec && (pos->from <= rec->core.l_qseq)) {
+ int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from;
+ int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to;
+ int len = to - from + 1;
+ memset(seq + from - 1, 1, len); // mark as deleted
+ memset(quality + from - 1, 1, len); // mark as deleted
}
}
- shuffle(seq); shuffle(quality); // physically remove 'marked as deleted' bytes
- bam1_t *newrec = make_new_rec(rec, seq, quality);
- free(tag_data); free(qtag_data); free(quality); free(seq);
+ shuffle(seq1); shuffle(quality1); // physically remove 'marked as deleted' bytes
+ bam1_t *nr = make_new_rec(rec1, seq1, quality1);
+ *newrec = nr;
+ free(seq1); free(quality1);
+
+ if (rec2) {
+ shuffle(seq2); shuffle(quality2); // physically remove 'marked as deleted' bytes
+ nr = make_new_rec(rec2, seq2, quality2);
+ *newrec2 = nr;
+ free(seq2); free(quality2);
+ } else {
+ *newrec2 = NULL;
+ }
+
+ free(tag_data); free(qtag_data);
va_free(new_tags); va_free(new_qtags);
- return newrec;
+ return;
}
/*
@@ -712,10 +808,12 @@ static bam1_t *merge_records(bam1_t *r1, bam1_t *r2, opts_t *opts)
*/
static int write_record(BAMit_t *bam, bam1_t *rec)
{
- int r = sam_write1(bam->f, bam->h, rec);
- if (r < 0) {
- fprintf(stderr,"sam_write1() failed\n");
- return -1;
+ if (rec) {
+ int r = sam_write1(bam->f, bam->h, rec);
+ if (r < 0) {
+ fprintf(stderr,"sam_write1() failed\n");
+ return -1;
+ }
}
return 0;
}
@@ -731,6 +829,10 @@ int process(opts_t* opts)
int retcode = 0;
int nrec = 0;
int r;
+ bam1_t *newrec = NULL;
+ bam1_t *newrec2= NULL;
+ bam1_t *rec = NULL;
+ bam1_t *rec2 = NULL;
BAMit_t *bam_in = BAMit_open(opts->in_file, 'r', opts->input_fmt, 0, NULL);
BAMit_t *bam_out = BAMit_open(opts->out_file, 'w', opts->output_fmt, opts->compression_level, NULL);
@@ -745,28 +847,38 @@ int process(opts_t* opts)
}
while (BAMit_hasnext(bam_in)) {
- bam1_t *rec = BAMit_next(bam_in);
- if (invalid_record(rec,++nrec)) return -1;
- bam1_t *newrec = process_record(rec,opts);
-
- bam1_t *rec2 = BAMit_peek(bam_in);
- if (rec2 && strcmp(bam_get_qname(rec), bam_get_qname(rec2)) == 0) {
- rec2 = BAMit_next(bam_in);
- if (invalid_record(rec2,++nrec)) return -1;
- bam1_t *newrec2 = process_record(rec2,opts);
- if ((newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) {
- bam1_t *merged_rec = merge_records(newrec, newrec2, opts);
- if (write_record(bam_out, merged_rec)) return -1;
- bam_destroy1(merged_rec);
- } else {
- if (write_record(bam_out, newrec)) return -1;
- if (write_record(bam_out, newrec2)) return -1;
- }
- bam_destroy1(newrec2);
+ bam1_t *r = BAMit_next(bam_in);
+ if (invalid_record(r,++nrec)) return -1;
+ rec = bam_dup1(r);
+
+ r = BAMit_peek(bam_in);
+ if (r && strcmp(bam_get_qname(rec), bam_get_qname(r)) == 0) {
+ r = BAMit_next(bam_in);
+ if (invalid_record(r,++nrec)) return -1;
+ rec2 = bam_dup1(r);
+ } else {
+ rec2 = NULL;
+ }
+
+ process_records(rec, rec2, &newrec, &newrec2, opts);
+
+ //newrec = process_record(rec,opts);
+ //newrec2 = NULL;
+ //if (rec2) newrec2 = process_record(rec2,opts);
+
+ if (newrec2 && ( (newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) ) {
+ bam1_t *merged_rec = merge_records(newrec, newrec2, opts);
+ if (write_record(bam_out, merged_rec)) return -1;
+ bam_destroy1(merged_rec);
} else {
- if (write_record(bam_out,newrec)) return -1;
+ if (write_record(bam_out, newrec)) return -1;
+ if (write_record(bam_out, newrec2)) return -1;
}
+
+ bam_destroy1(rec);
+ bam_destroy1(rec2);
bam_destroy1(newrec);
+ bam_destroy1(newrec2);
}
// tidy up after us
diff --git a/test/data/out/read2tags_1.bam b/test/data/out/read2tags_1.bam
deleted file mode 100644
index d60ace37..00000000
Binary files a/test/data/out/read2tags_1.bam and /dev/null differ
diff --git a/test/data/out/read2tags_1.sam b/test/data/out/read2tags_1.sam
new file mode 100644
index 00000000..3d5ef4da
--- /dev/null
+++ b/test/data/out/read2tags_1.sam
@@ -0,0 +1,22 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_1.sam read2tags_1.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:A Qa:Z:H
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:C Qa:Z:D
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:C Qa:Z:F
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:G Qa:Z:D
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_10.bam b/test/data/out/read2tags_10.bam
deleted file mode 100644
index fe35cd63..00000000
Binary files a/test/data/out/read2tags_10.bam and /dev/null differ
diff --git a/test/data/out/read2tags_10.sam b/test/data/out/read2tags_10.sam
new file mode 100644
index 00000000..aea1dca3
--- /dev/null
+++ b/test/data/out/read2tags_10.sam
@@ -0,0 +1,19 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.DqGa7o/read2tags_10.bam -t BC -q QT -p 2:1:999 -d ci,RG -k BC,QT --replace DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_10.sam read2tags_10.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:TAGCTGTAGCAAAATTACAG QT:Z:EECDDDDDDDDDDDDDDDDD
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 * * RG:Z:1#0 ci:i:513 BC:Z:GGACTAGGAATGCCAGTAAG QT:Z:EECDDDDDCDDDDDDCCDC@
+HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:TTCAAAGCTTTTTAGACAAC QT:Z:ECEEEDDDDDDDDDDDDDDD
+HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:CAGATGGAGTCAGAGGACAT QT:Z:DDDDDDDDDDDDDDDDDDDD
diff --git a/test/data/out/read2tags_11.sam b/test/data/out/read2tags_11.sam
new file mode 100644
index 00000000..410b4218
--- /dev/null
+++ b/test/data/out/read2tags_11.sam
@@ -0,0 +1,24 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h read2tags_1.bam
+@PG ID:samtools.1 PN:samtools PP:samtools VN:1.18 CL:/usr/local/bin/samtools view -O bam -o read2tags_11.bam read2tags_11.sam
+@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_11.sam read2tags_11.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 Ba:Z:A Qa:Z:H
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 Ba:Z:C Qa:Z:F
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 Ba:Z:G Qa:Z:D
diff --git a/test/data/out/read2tags_12.sam b/test/data/out/read2tags_12.sam
new file mode 100644
index 00000000..2c57702b
--- /dev/null
+++ b/test/data/out/read2tags_12.sam
@@ -0,0 +1,24 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h read2tags_1.bam
+@PG ID:samtools.1 PN:samtools PP:samtools VN:1.18 CL:/usr/local/bin/samtools view -O bam -o read2tags_12.bam read2tags_12.sam
+@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_12.sam read2tags_12.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:T Qa:Z:E
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 AGCTGTAGCAAAATTACAG ECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GACTAGGAATGCCAGTAAG ECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:T Qa:Z:E
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TCAAAGCTTTTTAGACAAC CEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:C Qa:Z:D
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 AGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_2.bam b/test/data/out/read2tags_2.bam
deleted file mode 100644
index 88bc12b5..00000000
Binary files a/test/data/out/read2tags_2.bam and /dev/null differ
diff --git a/test/data/out/read2tags_2.sam b/test/data/out/read2tags_2.sam
new file mode 100644
index 00000000..d4f57f77
--- /dev/null
+++ b/test/data/out/read2tags_2.sam
@@ -0,0 +1,22 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.OQBy3B/read2tags_2.bam -t Ba,Bb -q Qa,Qb -p 1:2:4,1:3:5 DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_2.sam read2tags_2.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 AAAAAATTTGGTATTG HHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:CTG Bb:Z:TGT Qa:Z:HHH Qb:Z:HHH
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CGCAGTCTGTCAATGC DDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:AGG Bb:Z:GGC Qa:Z:DDD Qb:Z:DDD
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGAGAATCCCATTGAC FDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:CGC Bb:Z:GCT Qa:Z:FFD Qb:Z:FDD
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GGCCAGAGTCCTTGTC DDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:AGA Bb:Z:GAA Qa:Z:DDD Qb:Z:DDD
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_3.bam b/test/data/out/read2tags_3.bam
deleted file mode 100644
index c377d47e..00000000
Binary files a/test/data/out/read2tags_3.bam and /dev/null differ
diff --git a/test/data/out/read2tags_3.sam b/test/data/out/read2tags_3.sam
new file mode 100644
index 00000000..fef1a019
--- /dev/null
+++ b/test/data/out/read2tags_3.sam
@@ -0,0 +1,19 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Xm6T25/read2tags_3.bam -t Ba -q Qa -p 1:1:999 -d ci DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_3.sam read2tags_3.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 BC:Z:TTGGCATC QT:Z:CCCFFFFE Ba:Z:ACTGTAAAAATTTGGTATTG Qa:Z:HHHHHHHFFFFFFEEBEEED
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 * * BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:CAGGCGCAGTCTGTCAATGC Qa:Z:DDDDDDDDDBDDDDDDEEDD
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATC QT:Z:BCCDFFFD Ba:Z:CCGCTGAGAATCCCATTGAC Qa:Z:FFFDDDDDDDDDDDDDDDDD
+HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATC QT:Z:CCCFFFDD Ba:Z:GAGAAGCCAGAGTCCTTGTC Qa:Z:DDDDDDDDDDDDACDDDDDD
diff --git a/test/data/out/read2tags_4.bam b/test/data/out/read2tags_4.bam
deleted file mode 100644
index 79b8aa84..00000000
Binary files a/test/data/out/read2tags_4.bam and /dev/null differ
diff --git a/test/data/out/read2tags_4.sam b/test/data/out/read2tags_4.sam
new file mode 100644
index 00000000..949251ec
--- /dev/null
+++ b/test/data/out/read2tags_4.sam
@@ -0,0 +1,19 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Xm6T25/read2tags_4.bam -t Ba -q Qa -p 2:1:999 -k ci,RG DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_4.sam read2tags_4.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:TAGCTGTAGCAAAATTACAG Qa:Z:EECDDDDDDDDDDDDDDDDD
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 * * RG:Z:1#0 ci:i:513 Ba:Z:GGACTAGGAATGCCAGTAAG Qa:Z:EECDDDDDCDDDDDDCCDC@
+HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:TTCAAAGCTTTTTAGACAAC Qa:Z:ECEEEDDDDDDDDDDDDDDD
+HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:CAGATGGAGTCAGAGGACAT Qa:Z:DDDDDDDDDDDDDDDDDDDD
diff --git a/test/data/out/read2tags_5.bam b/test/data/out/read2tags_5.bam
deleted file mode 100644
index ebada0bc..00000000
Binary files a/test/data/out/read2tags_5.bam and /dev/null differ
diff --git a/test/data/out/read2tags_5.sam b/test/data/out/read2tags_5.sam
new file mode 100644
index 00000000..0dd40b55
--- /dev/null
+++ b/test/data/out/read2tags_5.sam
@@ -0,0 +1,23 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags_5.sam -o /tmp/bambi.u6VHzC/read2tags_5.bam -t Ba -q Qa -p 1:10 DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_5.sam read2tags_5.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573
+HS31_12693:1:1101:12330:2220#0 4 * 0 0 * * 0 0 CAGAGGACAT DDDDDDDDDD RG:Z:1#0 ci:i:573 Ba:Z:CAGATGGAGT Qa:Z:DDDDDDDDDD
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_6.bam b/test/data/out/read2tags_6.bam
deleted file mode 100644
index 9ad4cb7f..00000000
Binary files a/test/data/out/read2tags_6.bam and /dev/null differ
diff --git a/test/data/out/read2tags_6.sam b/test/data/out/read2tags_6.sam
new file mode 100644
index 00000000..ea6119f3
--- /dev/null
+++ b/test/data/out/read2tags_6.sam
@@ -0,0 +1,22 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.k5dqBd/read2tags_6.bam -t Ba,Ba -q Qa,Qb -p 1:2:2,1:1:1 DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_6.sam read2tags_6.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 TGTAAAAATTTGGTATTG HHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:CA Qa:Z:H Qb:Z:H
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 GGCGCAGTCTGTCAATGC DDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:AC Qa:Z:D Qb:Z:D
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 GCTGAGAATCCCATTGAC FDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:CC Qa:Z:F Qb:Z:F
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAAGCCAGAGTCCTTGTC DDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:AG Qa:Z:D Qb:Z:D
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_7.bam b/test/data/out/read2tags_7.bam
deleted file mode 100644
index d77ddd73..00000000
Binary files a/test/data/out/read2tags_7.bam and /dev/null differ
diff --git a/test/data/out/read2tags_7.sam b/test/data/out/read2tags_7.sam
new file mode 100644
index 00000000..6ca747a8
--- /dev/null
+++ b/test/data/out/read2tags_7.sam
@@ -0,0 +1,22 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Prkj5q/read2tags_7.bam -t BC -q QT -p 1:1:1 --replace DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_7.sam read2tags_7.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:A QT:Z:H
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD RG:Z:1#0 ci:i:472 BC:Z:C QT:Z:D
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:C QT:Z:F
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:G QT:Z:D
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_8.bam b/test/data/out/read2tags_8.bam
deleted file mode 100644
index 76ffd59b..00000000
Binary files a/test/data/out/read2tags_8.bam and /dev/null differ
diff --git a/test/data/out/read2tags_8.sam b/test/data/out/read2tags_8.sam
new file mode 100644
index 00000000..f40091ad
--- /dev/null
+++ b/test/data/out/read2tags_8.sam
@@ -0,0 +1,22 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Prkj5q/read2tags_8.bam -t BC -q QT -p 1:1:1 --merge DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_8.sam read2tags_8.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:TTGGCATCA QT:Z:CCCFFFFEH
+HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD RG:Z:1#0 ci:i:472 BC:Z:TTTTATTTC QT:Z:-71(()))D
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513
+HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATCC QT:Z:BCCDFFFDF
+HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538
+HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATCG QT:Z:CCCFFFDDD
+HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573
diff --git a/test/data/out/read2tags_9.bam b/test/data/out/read2tags_9.bam
deleted file mode 100644
index 7acdf65e..00000000
Binary files a/test/data/out/read2tags_9.bam and /dev/null differ
diff --git a/test/data/out/read2tags_9.sam b/test/data/out/read2tags_9.sam
new file mode 100644
index 00000000..9aa1d2e8
--- /dev/null
+++ b/test/data/out/read2tags_9.sam
@@ -0,0 +1,19 @@
+@HD VN:1.4 SO:unsorted
+@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0
+@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3
+@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false
+@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0
+@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter -
+@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx
+@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false
+@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.nyvJS9/read2tags_9.bam -t BC -q QT -p 2:1:999 -d ci,RG -k BC,QT --merge DS:convert reads to tags
+@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_9.sam read2tags_9.bam
+@SQ SN:phix-illumina.fa LN:5386
+@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC
+HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:TTGGCATCTAGCTGTAGCAAAATTACAG QT:Z:CCCFFFFEEECDDDDDDDDDDDDDDDDD
+HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472
+HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 * * RG:Z:1#0 ci:i:513 BC:Z:GGACTAGGAATGCCAGTAAG QT:Z:EECDDDDDCDDDDDDCCDC@
+HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATCTTCAAAGCTTTTTAGACAAC QT:Z:BCCDFFFDECEEEDDDDDDDDDDDDDDD
+HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATCCAGATGGAGTCAGAGGACAT QT:Z:CCCFFFDDDDDDDDDDDDDDDDDDDDDD
diff --git a/test/t_read2tags.c b/test/t_read2tags.c
index 73b22bc3..372c6123 100644
--- a/test/t_read2tags.c
+++ b/test/t_read2tags.c
@@ -1,6 +1,6 @@
-/* test/t_select.c -- select test cases.
+/* test/t_read2tags.c -- select test cases.
- Copyright (C) 2016 Genome Research Ltd.
+ Copyright (C) 2024 Genome Research Ltd.
Author: Jennifer Liddle
@@ -51,11 +51,13 @@ void setup_test_1(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("Ba");
(*argv)[(*argc)++] = strdup("-q");
@@ -69,11 +71,13 @@ void setup_test_2(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("Ba,Bb");
(*argv)[(*argc)++] = strdup("-q");
@@ -87,11 +91,13 @@ void setup_test_3(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("Ba");
(*argv)[(*argc)++] = strdup("-q");
@@ -107,11 +113,13 @@ void setup_test_4(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("Ba");
(*argv)[(*argc)++] = strdup("-q");
@@ -127,11 +135,13 @@ void setup_test_5(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags_5.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("Ba");
(*argv)[(*argc)++] = strdup("-q");
@@ -145,11 +155,13 @@ void setup_test_6(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("Ba,Ba");
(*argv)[(*argc)++] = strdup("-q");
@@ -163,11 +175,13 @@ void setup_test_7(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("BC");
(*argv)[(*argc)++] = strdup("-q");
@@ -182,11 +196,13 @@ void setup_test_8(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("BC");
(*argv)[(*argc)++] = strdup("-q");
@@ -201,11 +217,13 @@ void setup_test_9(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("BC");
(*argv)[(*argc)++] = strdup("-q");
@@ -224,11 +242,13 @@ void setup_test_10(int* argc, char*** argv, char *outputfile)
*argc = 0;
*argv = (char**)calloc(sizeof(char*), 100);
(*argv)[(*argc)++] = strdup("bambi");
- (*argv)[(*argc)++] = strdup("select");
+ (*argv)[(*argc)++] = strdup("read2tags");
(*argv)[(*argc)++] = strdup("-i");
(*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
(*argv)[(*argc)++] = strdup("-o");
(*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
(*argv)[(*argc)++] = strdup("-t");
(*argv)[(*argc)++] = strdup("BC");
(*argv)[(*argc)++] = strdup("-q");
@@ -242,35 +262,117 @@ void setup_test_10(int* argc, char*** argv, char *outputfile)
(*argv)[(*argc)++] = strdup("--replace");
}
+void setup_test_11(int* argc, char*** argv, char *outputfile)
+{
+ *argc = 0;
+ *argv = (char**)calloc(sizeof(char*), 100);
+ (*argv)[(*argc)++] = strdup("bambi");
+ (*argv)[(*argc)++] = strdup("read2tags");
+ (*argv)[(*argc)++] = strdup("-i");
+ (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
+ (*argv)[(*argc)++] = strdup("-o");
+ (*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
+ (*argv)[(*argc)++] = strdup("-t");
+ (*argv)[(*argc)++] = strdup("Ba");
+ (*argv)[(*argc)++] = strdup("-q");
+ (*argv)[(*argc)++] = strdup("Qa");
+ (*argv)[(*argc)++] = strdup("-p");
+ (*argv)[(*argc)++] = strdup("1:2:1:1");
+}
+
+void setup_test_12(int* argc, char*** argv, char *outputfile)
+{
+ *argc = 0;
+ *argv = (char**)calloc(sizeof(char*), 100);
+ (*argv)[(*argc)++] = strdup("bambi");
+ (*argv)[(*argc)++] = strdup("read2tags");
+ (*argv)[(*argc)++] = strdup("-i");
+ (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam"));
+ (*argv)[(*argc)++] = strdup("-o");
+ (*argv)[(*argc)++] = strdup(outputfile);
+ (*argv)[(*argc)++] = strdup("--output-fmt");
+ (*argv)[(*argc)++] = strdup("sam");
+ (*argv)[(*argc)++] = strdup("-t");
+ (*argv)[(*argc)++] = strdup("Ba");
+ (*argv)[(*argc)++] = strdup("-q");
+ (*argv)[(*argc)++] = strdup("Qa");
+ (*argv)[(*argc)++] = strdup("-p");
+ (*argv)[(*argc)++] = strdup("2:1:1:1");
+}
+
void checkFiles(char *gotfile, char *expectfile, int verbose)
{
BAMit_t *bgot = BAMit_open(gotfile, 'r', NULL, 0, NULL);
BAMit_t *bexp = BAMit_open(expectfile, 'r', NULL, 0, NULL);
- bam1_t *got_rec, *exp_rec;
+ // bam1_t *got_rec, *exp_rec;
- int c = sam_hdr_count_lines(bgot->h, "RG");
- if (c != sam_hdr_count_lines(bexp->h, "RG")) { failure++; return; }
+ int f = failure;
+
+ int c1 = sam_hdr_count_lines(bgot->h, "RG");
+ int c2 = sam_hdr_count_lines(bexp->h, "RG");
+ if (c1 != c2) {
+ failure++;
+ if (verbose) fprintf(stderr, "RG lines: expected %d, got %d\n", c2, c1);
+ }
- for (int n=0; n < c; n++) {
+ for (int n=0; n < c1; n++) {
kstring_t ks_got; ks_initialize(&ks_got);
kstring_t ks_exp; ks_initialize(&ks_exp);
sam_hdr_find_line_pos(bgot->h, "RG", n, &ks_got);
sam_hdr_find_line_pos(bexp->h, "RG", n, &ks_exp);
- if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { failure++; return; }
+ if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) {
+ if (verbose) fprintf(stderr, "RG %d: expected %s, got %s\n", n, ks_str(&ks_exp), ks_str(&ks_got));
+ failure++;
+ break;
+ }
ks_free(&ks_got); ks_free(&ks_exp);
}
- while ((exp_rec = BAMit_next(bexp)) != NULL) {
- got_rec = BAMit_next(bgot);
- if (!got_rec) { fprintf(stderr, "%s ended too soon\n", gotfile); failure++; return; }
- if (memcmp(got_rec->data, exp_rec->data, got_rec->l_data)) {
+ BAMit_free(bexp);
+ BAMit_free(bgot);
+
+ FILE *getfp = fopen(gotfile, "r");
+ FILE *expfp = fopen(expectfile, "r");
+ char getline[2048];
+ char expline[2048];
+
+ if (!getfp) {
+ fprintf(stderr, "Can't open file %s\n", gotfile);
+ exit(1);
+ }
+
+ if (!expfp) {
+ fprintf(stderr, "Can't open file %s\n", expectfile);
+ exit(1);
+ }
+
+ // skip header
+ while (fgets(getline, 2047, getfp) > 0) {
+ if (getline[0] != '@') break;
+ }
+ while (fgets(expline, 2047, expfp) > 0) {
+ if (expline[0] != '@') break;
+ }
+
+ // compare read records
+ while (true) {
+ if (strcmp(getline,expline) != 0) {
+ fprintf(stderr, "Expected: %sFound : %s\n", expline, getline);
failure++;
- break;
}
+ if (fgets(getline, 2047, getfp) == 0) break;
+ if (fgets(expline, 2047, expfp) == 0) break;
+ }
+
+ fclose(getfp); fclose(expfp);
+
+ if (verbose) {
+ if (f == failure) fprintf(stderr, " :\tpass\n");
+ else fprintf(stderr, " :\t*** FAIL ***\n");
}
- BAMit_free(bexp);
- BAMit_free(bgot);
return;
}
@@ -308,75 +410,100 @@ int main(int argc, char**argv)
char outputfile[1024];
// minimal options
- sprintf(outputfile,"%s/read2tags_1.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 1: minimal options\n");
+ sprintf(outputfile,"%s/read2tags_1.sam", TMPDIR);
setup_test_1(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_1.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_1.sam"),verbose);
free_args(argv_1);
// overlapping reads
- sprintf(outputfile,"%s/read2tags_2.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 2: Overlapping reads\n");
+ sprintf(outputfile,"%s/read2tags_2.sam", TMPDIR);
setup_test_2(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_2.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_2.sam"),verbose);
free_args(argv_1);
// remove first record
- sprintf(outputfile,"%s/read2tags_3.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 3: remove first record\n");
+ sprintf(outputfile,"%s/read2tags_3.sam", TMPDIR);
setup_test_3(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_3.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_3.sam"),verbose);
free_args(argv_1);
// remove second record
- sprintf(outputfile,"%s/read2tags_4.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 4: remove second record\n");
+ sprintf(outputfile,"%s/read2tags_4.sam", TMPDIR);
setup_test_4(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_4.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_4.sam"),verbose);
free_args(argv_1);
// handle single reads
- sprintf(outputfile,"%s/read2tags_5.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 5: handle single reads\n");
+ sprintf(outputfile,"%s/read2tags_5.sam", TMPDIR);
setup_test_5(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_5.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_5.sam"),verbose);
free_args(argv_1);
// specify duplicate tags
- sprintf(outputfile,"%s/read2tags_6.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 6: specify duplicate tags\n");
+ sprintf(outputfile,"%s/read2tags_6.sam", TMPDIR);
setup_test_6(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_6.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_6.sam"),verbose);
free_args(argv_1);
// use --replace option
- sprintf(outputfile,"%s/read2tags_7.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 7: use --replace option\n");
+ sprintf(outputfile,"%s/read2tags_7.sam", TMPDIR);
setup_test_7(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_7.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_7.sam"),verbose);
free_args(argv_1);
// use --merge option
- sprintf(outputfile,"%s/read2tags_8.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 8: use --merge option\n");
+ sprintf(outputfile,"%s/read2tags_8.sam", TMPDIR);
setup_test_8(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_8.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_8.sam"),verbose);
free_args(argv_1);
// use --merge option with duplicate tags
- sprintf(outputfile,"%s/read2tags_9.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 9: use --merge option with duplicate tags\n");
+ sprintf(outputfile,"%s/read2tags_9.sam", TMPDIR);
setup_test_9(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_9.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_9.sam"),verbose);
free_args(argv_1);
// use --replace option with duplicate tags
- sprintf(outputfile,"%s/read2tags_10.bam", TMPDIR);
+ if (verbose) fprintf(stderr,"Test 10: use --replace option with duplicate tags\n");
+ sprintf(outputfile,"%s/read2tags_10.sam", TMPDIR);
setup_test_10(&argc_1, &argv_1, outputfile);
main_read2tags(argc_1-1, argv_1+1);
- checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.bam"),verbose);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.sam"),verbose);
free_args(argv_1);
+ // write tags to read 2 from read 1
+ if (verbose) fprintf(stderr,"Test 11: write tags to read 2 from read 1\n");
+ sprintf(outputfile,"%s/read2tags_11.sam", TMPDIR);
+ setup_test_11(&argc_1, &argv_1, outputfile);
+ main_read2tags(argc_1-1, argv_1+1);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_11.sam"),verbose);
+ free_args(argv_1);
+
+ // write tags to read 1 from read 2
+ if (verbose) fprintf(stderr,"Test 12: write tags to read 1 from read 2\n");
+ sprintf(outputfile,"%s/read2tags_12.sam", TMPDIR);
+ setup_test_12(&argc_1, &argv_1, outputfile);
+ main_read2tags(argc_1-1, argv_1+1);
+ checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_12.sam"),verbose);
+ free_args(argv_1);
printf("read2tags tests: %s\n", failure ? "FAILED" : "Passed");
return failure ? EXIT_FAILURE : EXIT_SUCCESS;