diff --git a/src/read2tags.c b/src/read2tags.c index 513c8074..a7124e4d 100644 --- a/src/read2tags.c +++ b/src/read2tags.c @@ -45,7 +45,8 @@ along with this program. If not, see . * position record */ typedef struct { - int record; + int record_from; + int record_to; int from; int to; } pos_t; @@ -129,30 +130,61 @@ static int bam_aux_cmp(const uint8_t *s, const uint8_t *d) /* * Parse a comma separated list of positions - * Format is r:s:e,r:s:e,r:s:e,... + * Format is rf:rt:s:e,... + * or is r:s:e,... if rf and rt are the same + * or is s:e,... if rf and rt are both zero * - * where r is record number (0, 1 or 2 and is optional) + * where rf is record number to read (0, 1 or 2 and is optional) + * rt is record number to write (0, 1 or 2 and is optional) * s is the start position in the read string * e is the end position in the read string * start and end positions are 1 (not zero) based */ -static void parse_positions(va_t *poslist, char *args) +static void parse_positions(va_t *poslist, char *arg_string) { - char *argstr = strdup(args); + char *argstr = strdup(arg_string); char *save_s; char *s = strtok_r(argstr,",",&save_s); while (s) { pos_t *pos = calloc(1, sizeof(pos_t)); char *save_p; - char *p = strtok_r(s,":",&save_p); if (p) pos->record = atoi(p); - p = strtok_r(NULL,":",&save_p); if (p) pos->from = atoi(p); - p = strtok_r(NULL,":",&save_p); if (p) pos->to = atoi(p); - if (!p) { - // looks like s:e format - pos->to = pos->from; pos->from = pos->record; pos->record = 0; + int n = 0; // number of position arguments + int args[4]; + + char *p = strtok_r(s,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + + switch (n) { + case 2: // s:e + pos->record_from = 0; + pos->record_to = 0; + pos->from = args[0]; + pos->to = args[1]; + break; + case 3: // r:s:e + pos->record_from = args[0]; + pos->record_to = args[0]; + pos->from = args[1]; + pos->to = args[2]; + break; + case 4: // rf:rt:s:e + pos->record_from = args[0]; + pos->record_to = args[1]; + pos->from = args[2]; + pos->to = args[3]; + break; + default: + fprintf(stderr, "Invalid pos format: %s\n", arg_string); + exit(1); } - if (pos->record < 0 || pos->record > 2 || pos->from == 0 || pos->to == 0 || pos->from > pos->to) { - fprintf(stderr,"Invalid pos argument: %s\n", args); + + if (pos->record_from < 0 || pos->record_from > 2 || + pos->record_to < 0 || pos->record_to > 2 || + pos->from == 0 || pos->to == 0 || + pos->from > pos->to) { + fprintf(stderr,"Invalid pos argument: %s\n", arg_string); exit(1); } va_push(poslist,pos); @@ -169,6 +201,8 @@ static void usage(FILE *write_to) fprintf(write_to, "Usage: bambi read2tags [options]\n" "\n" +"Convert portions of a read into tags\n" +"\n" "Options:\n" " -i --input BAM file to read [default: stdin]\n" " -o --output BAM file to output [default: stdout]\n" @@ -178,13 +212,25 @@ static void usage(FILE *write_to) " [default: " DEFAULT_KEEP_TAGS "]\n" " -d --discard-tags comma separated list of tags to discard when merging records\n" " [default: " DEFAULT_DISCARD_TAGS "]\n" -" -p --positions comma separated list of positions\n" +" -p --positions comma separated list of positions (see below)\n" " -m --merge merge duplicate tags\n" " -r --replace replace duplicate tags\n" " -v --verbose verbose output\n" " --input-fmt [sam/bam/cram] [default: bam]\n" " --output-fmt [sam/bam/cram] [default: bam]\n" " --compression-level [0..9]\n" +"\n" +" comma separated list of positions, where each position has the format:\n" +" Format is rf:rt:s:e,...\n" +" or is r:s:e,... if rf and rt are the same\n" +" or is s:e,... if rf and rt are both zero\n" +"\n" +" where rf is record number to read (0, 1 or 2 and is optional)\n" +" rt is record number to write (0, 1 or 2 and is optional)\n" +" s is the start position in the read string\n" +" e is the end position in the read string\n" +" start and end positions are 1 (not zero) based\n" +"\n" ); } @@ -447,17 +493,25 @@ static void shuffle(char *s) /* * add a new tag to our taglist, or append to existing tag */ -static void add_or_update(va_t *va, char *tag, char *data) +static void add_or_update(va_t *va, char *tag, char *data, int r) { int n; + char recno = r + '0'; + char key[4]; + + key[0] = recno; + key[1] = tag[0]; + key[2] = tag[1]; + key[3] = 0; + for (n=0; n < va->end; n++) { - if (strncmp(tag,va->entries[n],2) == 0) break; + if (strncmp(key,va->entries[n],3) == 0) break; } if (n == va->end) { // add new tag - char *e = calloc(1, strlen(tag) + 1 + strlen(data) + 1); - strcpy(e, tag); + char *e = calloc(1, strlen(key) + 1 + strlen(data) + 1); + strcpy(e, key); strcat(e, ":"); strcat(e, data); va_push(va,e); @@ -507,31 +561,43 @@ static void add_tag(bam1_t *rec, char *tag, char *data, opts_t *opts) } /* - * Process one record + * Process records */ -static bam1_t *process_record(bam1_t *rec, opts_t *opts) +static void process_records(bam1_t *rec1, bam1_t *rec2, bam1_t **newrec, bam1_t **newrec2, opts_t *opts) { pos_t *pos; - int recno = -1; - char *tag_data = calloc(1, rec->core.l_qseq+1); - char *qtag_data = calloc(1, rec->core.l_qseq+1); + int readno1 = -1; + int readno2 = -1; + char *tag_data = calloc(1, rec1->core.l_qseq+1); + char *qtag_data = calloc(1, rec1->core.l_qseq+1); va_t *new_tags = va_init(10,free); va_t *new_qtags = va_init(10,free); - if (!(rec->core.flag & BAM_FPAIRED)) recno = 0; - if (rec->core.flag & BAM_FREAD1) recno = 1; - if (rec->core.flag & BAM_FREAD2) recno = 2; + if (!(rec1->core.flag & BAM_FPAIRED)) readno1 = 0; + if (rec1->core.flag & BAM_FREAD1) readno1 = 1; + if (rec1->core.flag & BAM_FREAD2) readno1 = 2; - char *seq = get_read(rec); - char *quality = get_quality(rec); + if (rec2) { + if (!(rec2->core.flag & BAM_FPAIRED)) readno2 = 0; + if (rec2->core.flag & BAM_FREAD1) readno2 = 1; + if (rec2->core.flag & BAM_FREAD2) readno2 = 2; + } /* * first pass - copy sections of read into tags */ for (int n=0; n < opts->poslist->end; n++) { pos = opts->poslist->entries[n]; - if (pos->record == recno) { + if ( (pos->record_from == readno1) || (pos->record_from == readno2) ) { + bam1_t *rec = NULL; + if (pos->record_from == readno1) rec = rec1; + if (pos->record_from == readno2) rec = rec2; + if (pos->from <= rec->core.l_qseq) { + + char *seq = get_read(rec); + char *quality = get_quality(rec); + int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; int len = to - from + 1; @@ -539,52 +605,82 @@ static bam1_t *process_record(bam1_t *rec, opts_t *opts) // copy data from read memset(tag_data,0,rec->core.l_qseq+1); memcpy(tag_data, seq + from - 1, len); - add_or_update(new_tags, opts->taglist->entries[n], tag_data); + add_or_update(new_tags, opts->taglist->entries[n], tag_data, pos->record_to); // copy data from quality memset(qtag_data,0,rec->core.l_qseq+1); memcpy(qtag_data, quality + from - 1, len); - add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data); + add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data, pos->record_to); + free(seq); free(quality); } } } // add new tags for (int n=0; n < new_tags->end; n++) { - char *tag = new_tags->entries[n]; tag[2] = 0; + bam1_t *rec = NULL; + int readno = (*(char*)(new_tags->entries[n]) - '0'); + char *tag = new_tags->entries[n]+1; tag[2] = 0; char *data = tag+3; - add_tag(rec, tag, data, opts); + if (readno == readno1) rec = rec1; + else rec = rec2; + if (rec) add_tag(rec, tag, data, opts); } // add new quality tags for (int n=0; n < new_qtags->end; n++) { - char *tag = new_qtags->entries[n]; tag[2] = 0; + bam1_t *rec = NULL; + int readno = (*(char*)(new_qtags->entries[n]) - '0'); + char *tag = new_qtags->entries[n]+1; tag[2] = 0; char *data = tag+3; - add_tag(rec, tag, data, opts); + if (readno == readno1) rec = rec1; + else rec = rec2; + if (rec) add_tag(rec, tag, data, opts); } /* * second pass - mark sections of read as deleted */ + char *seq = NULL; + char *quality = NULL; + char *seq1 = get_read(rec1); + char *quality1 = get_quality(rec1); + char *seq2 = rec2 ? get_read(rec2) : NULL; + char *quality2 = rec2 ? get_quality(rec2) : NULL; + for (int n=0; n < opts->poslist->end; n++) { + bam1_t *rec = NULL; pos = opts->poslist->entries[n]; - if (pos->record == recno) { - if (pos->from <= rec->core.l_qseq) { - int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; - int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; - int len = to - from + 1; - memset(seq + from - 1, 1, len); // mark as deleted - memset(quality + from - 1, 1, len); // mark as deleted - } + if (pos->record_from == readno1) { rec = rec1; seq = seq1; quality = quality1; } + if (pos->record_from == readno2) { rec = rec2; seq = seq2; quality = quality2; } + + if (rec && (pos->from <= rec->core.l_qseq)) { + int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; + int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; + int len = to - from + 1; + memset(seq + from - 1, 1, len); // mark as deleted + memset(quality + from - 1, 1, len); // mark as deleted } } - shuffle(seq); shuffle(quality); // physically remove 'marked as deleted' bytes - bam1_t *newrec = make_new_rec(rec, seq, quality); - free(tag_data); free(qtag_data); free(quality); free(seq); + shuffle(seq1); shuffle(quality1); // physically remove 'marked as deleted' bytes + bam1_t *nr = make_new_rec(rec1, seq1, quality1); + *newrec = nr; + free(seq1); free(quality1); + + if (rec2) { + shuffle(seq2); shuffle(quality2); // physically remove 'marked as deleted' bytes + nr = make_new_rec(rec2, seq2, quality2); + *newrec2 = nr; + free(seq2); free(quality2); + } else { + *newrec2 = NULL; + } + + free(tag_data); free(qtag_data); va_free(new_tags); va_free(new_qtags); - return newrec; + return; } /* @@ -712,10 +808,12 @@ static bam1_t *merge_records(bam1_t *r1, bam1_t *r2, opts_t *opts) */ static int write_record(BAMit_t *bam, bam1_t *rec) { - int r = sam_write1(bam->f, bam->h, rec); - if (r < 0) { - fprintf(stderr,"sam_write1() failed\n"); - return -1; + if (rec) { + int r = sam_write1(bam->f, bam->h, rec); + if (r < 0) { + fprintf(stderr,"sam_write1() failed\n"); + return -1; + } } return 0; } @@ -731,6 +829,10 @@ int process(opts_t* opts) int retcode = 0; int nrec = 0; int r; + bam1_t *newrec = NULL; + bam1_t *newrec2= NULL; + bam1_t *rec = NULL; + bam1_t *rec2 = NULL; BAMit_t *bam_in = BAMit_open(opts->in_file, 'r', opts->input_fmt, 0, NULL); BAMit_t *bam_out = BAMit_open(opts->out_file, 'w', opts->output_fmt, opts->compression_level, NULL); @@ -745,28 +847,38 @@ int process(opts_t* opts) } while (BAMit_hasnext(bam_in)) { - bam1_t *rec = BAMit_next(bam_in); - if (invalid_record(rec,++nrec)) return -1; - bam1_t *newrec = process_record(rec,opts); - - bam1_t *rec2 = BAMit_peek(bam_in); - if (rec2 && strcmp(bam_get_qname(rec), bam_get_qname(rec2)) == 0) { - rec2 = BAMit_next(bam_in); - if (invalid_record(rec2,++nrec)) return -1; - bam1_t *newrec2 = process_record(rec2,opts); - if ((newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) { - bam1_t *merged_rec = merge_records(newrec, newrec2, opts); - if (write_record(bam_out, merged_rec)) return -1; - bam_destroy1(merged_rec); - } else { - if (write_record(bam_out, newrec)) return -1; - if (write_record(bam_out, newrec2)) return -1; - } - bam_destroy1(newrec2); + bam1_t *r = BAMit_next(bam_in); + if (invalid_record(r,++nrec)) return -1; + rec = bam_dup1(r); + + r = BAMit_peek(bam_in); + if (r && strcmp(bam_get_qname(rec), bam_get_qname(r)) == 0) { + r = BAMit_next(bam_in); + if (invalid_record(r,++nrec)) return -1; + rec2 = bam_dup1(r); + } else { + rec2 = NULL; + } + + process_records(rec, rec2, &newrec, &newrec2, opts); + + //newrec = process_record(rec,opts); + //newrec2 = NULL; + //if (rec2) newrec2 = process_record(rec2,opts); + + if (newrec2 && ( (newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) ) { + bam1_t *merged_rec = merge_records(newrec, newrec2, opts); + if (write_record(bam_out, merged_rec)) return -1; + bam_destroy1(merged_rec); } else { - if (write_record(bam_out,newrec)) return -1; + if (write_record(bam_out, newrec)) return -1; + if (write_record(bam_out, newrec2)) return -1; } + + bam_destroy1(rec); + bam_destroy1(rec2); bam_destroy1(newrec); + bam_destroy1(newrec2); } // tidy up after us diff --git a/test/data/out/read2tags_1.bam b/test/data/out/read2tags_1.bam deleted file mode 100644 index d60ace37..00000000 Binary files a/test/data/out/read2tags_1.bam and /dev/null differ diff --git a/test/data/out/read2tags_1.sam b/test/data/out/read2tags_1.sam new file mode 100644 index 00000000..3d5ef4da --- /dev/null +++ b/test/data/out/read2tags_1.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_1.sam read2tags_1.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:A Qa:Z:H +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:C Qa:Z:D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:C Qa:Z:F +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:G Qa:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_10.bam b/test/data/out/read2tags_10.bam deleted file mode 100644 index fe35cd63..00000000 Binary files a/test/data/out/read2tags_10.bam and /dev/null differ diff --git a/test/data/out/read2tags_10.sam b/test/data/out/read2tags_10.sam new file mode 100644 index 00000000..aea1dca3 --- /dev/null +++ b/test/data/out/read2tags_10.sam @@ -0,0 +1,19 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.DqGa7o/read2tags_10.bam -t BC -q QT -p 2:1:999 -d ci,RG -k BC,QT --replace DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_10.sam read2tags_10.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:TAGCTGTAGCAAAATTACAG QT:Z:EECDDDDDDDDDDDDDDDDD +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 * * RG:Z:1#0 ci:i:513 BC:Z:GGACTAGGAATGCCAGTAAG QT:Z:EECDDDDDCDDDDDDCCDC@ +HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:TTCAAAGCTTTTTAGACAAC QT:Z:ECEEEDDDDDDDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:CAGATGGAGTCAGAGGACAT QT:Z:DDDDDDDDDDDDDDDDDDDD diff --git a/test/data/out/read2tags_11.sam b/test/data/out/read2tags_11.sam new file mode 100644 index 00000000..410b4218 --- /dev/null +++ b/test/data/out/read2tags_11.sam @@ -0,0 +1,24 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h read2tags_1.bam +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.18 CL:/usr/local/bin/samtools view -O bam -o read2tags_11.bam read2tags_11.sam +@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_11.sam read2tags_11.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 Ba:Z:A Qa:Z:H +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 Ba:Z:C Qa:Z:F +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 Ba:Z:G Qa:Z:D diff --git a/test/data/out/read2tags_12.sam b/test/data/out/read2tags_12.sam new file mode 100644 index 00000000..2c57702b --- /dev/null +++ b/test/data/out/read2tags_12.sam @@ -0,0 +1,24 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h read2tags_1.bam +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.18 CL:/usr/local/bin/samtools view -O bam -o read2tags_12.bam read2tags_12.sam +@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_12.sam read2tags_12.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:T Qa:Z:E +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 AGCTGTAGCAAAATTACAG ECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GACTAGGAATGCCAGTAAG ECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:T Qa:Z:E +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TCAAAGCTTTTTAGACAAC CEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:C Qa:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 AGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_2.bam b/test/data/out/read2tags_2.bam deleted file mode 100644 index 88bc12b5..00000000 Binary files a/test/data/out/read2tags_2.bam and /dev/null differ diff --git a/test/data/out/read2tags_2.sam b/test/data/out/read2tags_2.sam new file mode 100644 index 00000000..d4f57f77 --- /dev/null +++ b/test/data/out/read2tags_2.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.OQBy3B/read2tags_2.bam -t Ba,Bb -q Qa,Qb -p 1:2:4,1:3:5 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_2.sam read2tags_2.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 AAAAAATTTGGTATTG HHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:CTG Bb:Z:TGT Qa:Z:HHH Qb:Z:HHH +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CGCAGTCTGTCAATGC DDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:AGG Bb:Z:GGC Qa:Z:DDD Qb:Z:DDD +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGAGAATCCCATTGAC FDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:CGC Bb:Z:GCT Qa:Z:FFD Qb:Z:FDD +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GGCCAGAGTCCTTGTC DDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:AGA Bb:Z:GAA Qa:Z:DDD Qb:Z:DDD +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_3.bam b/test/data/out/read2tags_3.bam deleted file mode 100644 index c377d47e..00000000 Binary files a/test/data/out/read2tags_3.bam and /dev/null differ diff --git a/test/data/out/read2tags_3.sam b/test/data/out/read2tags_3.sam new file mode 100644 index 00000000..fef1a019 --- /dev/null +++ b/test/data/out/read2tags_3.sam @@ -0,0 +1,19 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Xm6T25/read2tags_3.bam -t Ba -q Qa -p 1:1:999 -d ci DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_3.sam read2tags_3.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 BC:Z:TTGGCATC QT:Z:CCCFFFFE Ba:Z:ACTGTAAAAATTTGGTATTG Qa:Z:HHHHHHHFFFFFFEEBEEED +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 * * BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:CAGGCGCAGTCTGTCAATGC Qa:Z:DDDDDDDDDBDDDDDDEEDD +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATC QT:Z:BCCDFFFD Ba:Z:CCGCTGAGAATCCCATTGAC Qa:Z:FFFDDDDDDDDDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATC QT:Z:CCCFFFDD Ba:Z:GAGAAGCCAGAGTCCTTGTC Qa:Z:DDDDDDDDDDDDACDDDDDD diff --git a/test/data/out/read2tags_4.bam b/test/data/out/read2tags_4.bam deleted file mode 100644 index 79b8aa84..00000000 Binary files a/test/data/out/read2tags_4.bam and /dev/null differ diff --git a/test/data/out/read2tags_4.sam b/test/data/out/read2tags_4.sam new file mode 100644 index 00000000..949251ec --- /dev/null +++ b/test/data/out/read2tags_4.sam @@ -0,0 +1,19 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Xm6T25/read2tags_4.bam -t Ba -q Qa -p 2:1:999 -k ci,RG DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_4.sam read2tags_4.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:TAGCTGTAGCAAAATTACAG Qa:Z:EECDDDDDDDDDDDDDDDDD +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 * * RG:Z:1#0 ci:i:513 Ba:Z:GGACTAGGAATGCCAGTAAG Qa:Z:EECDDDDDCDDDDDDCCDC@ +HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:TTCAAAGCTTTTTAGACAAC Qa:Z:ECEEEDDDDDDDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:CAGATGGAGTCAGAGGACAT Qa:Z:DDDDDDDDDDDDDDDDDDDD diff --git a/test/data/out/read2tags_5.bam b/test/data/out/read2tags_5.bam deleted file mode 100644 index ebada0bc..00000000 Binary files a/test/data/out/read2tags_5.bam and /dev/null differ diff --git a/test/data/out/read2tags_5.sam b/test/data/out/read2tags_5.sam new file mode 100644 index 00000000..0dd40b55 --- /dev/null +++ b/test/data/out/read2tags_5.sam @@ -0,0 +1,23 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags_5.sam -o /tmp/bambi.u6VHzC/read2tags_5.bam -t Ba -q Qa -p 1:10 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_5.sam read2tags_5.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 +HS31_12693:1:1101:12330:2220#0 4 * 0 0 * * 0 0 CAGAGGACAT DDDDDDDDDD RG:Z:1#0 ci:i:573 Ba:Z:CAGATGGAGT Qa:Z:DDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_6.bam b/test/data/out/read2tags_6.bam deleted file mode 100644 index 9ad4cb7f..00000000 Binary files a/test/data/out/read2tags_6.bam and /dev/null differ diff --git a/test/data/out/read2tags_6.sam b/test/data/out/read2tags_6.sam new file mode 100644 index 00000000..ea6119f3 --- /dev/null +++ b/test/data/out/read2tags_6.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.k5dqBd/read2tags_6.bam -t Ba,Ba -q Qa,Qb -p 1:2:2,1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_6.sam read2tags_6.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 TGTAAAAATTTGGTATTG HHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:CA Qa:Z:H Qb:Z:H +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 GGCGCAGTCTGTCAATGC DDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:AC Qa:Z:D Qb:Z:D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 GCTGAGAATCCCATTGAC FDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:CC Qa:Z:F Qb:Z:F +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAAGCCAGAGTCCTTGTC DDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:AG Qa:Z:D Qb:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_7.bam b/test/data/out/read2tags_7.bam deleted file mode 100644 index d77ddd73..00000000 Binary files a/test/data/out/read2tags_7.bam and /dev/null differ diff --git a/test/data/out/read2tags_7.sam b/test/data/out/read2tags_7.sam new file mode 100644 index 00000000..6ca747a8 --- /dev/null +++ b/test/data/out/read2tags_7.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Prkj5q/read2tags_7.bam -t BC -q QT -p 1:1:1 --replace DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_7.sam read2tags_7.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:A QT:Z:H +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD RG:Z:1#0 ci:i:472 BC:Z:C QT:Z:D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:C QT:Z:F +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:G QT:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_8.bam b/test/data/out/read2tags_8.bam deleted file mode 100644 index 76ffd59b..00000000 Binary files a/test/data/out/read2tags_8.bam and /dev/null differ diff --git a/test/data/out/read2tags_8.sam b/test/data/out/read2tags_8.sam new file mode 100644 index 00000000..f40091ad --- /dev/null +++ b/test/data/out/read2tags_8.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Prkj5q/read2tags_8.bam -t BC -q QT -p 1:1:1 --merge DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_8.sam read2tags_8.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:TTGGCATCA QT:Z:CCCFFFFEH +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD RG:Z:1#0 ci:i:472 BC:Z:TTTTATTTC QT:Z:-71(()))D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATCC QT:Z:BCCDFFFDF +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATCG QT:Z:CCCFFFDDD +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_9.bam b/test/data/out/read2tags_9.bam deleted file mode 100644 index 7acdf65e..00000000 Binary files a/test/data/out/read2tags_9.bam and /dev/null differ diff --git a/test/data/out/read2tags_9.sam b/test/data/out/read2tags_9.sam new file mode 100644 index 00000000..9aa1d2e8 --- /dev/null +++ b/test/data/out/read2tags_9.sam @@ -0,0 +1,19 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.nyvJS9/read2tags_9.bam -t BC -q QT -p 2:1:999 -d ci,RG -k BC,QT --merge DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_9.sam read2tags_9.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:TTGGCATCTAGCTGTAGCAAAATTACAG QT:Z:CCCFFFFEEECDDDDDDDDDDDDDDDDD +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 * * RG:Z:1#0 ci:i:513 BC:Z:GGACTAGGAATGCCAGTAAG QT:Z:EECDDDDDCDDDDDDCCDC@ +HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATCTTCAAAGCTTTTTAGACAAC QT:Z:BCCDFFFDECEEEDDDDDDDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATCCAGATGGAGTCAGAGGACAT QT:Z:CCCFFFDDDDDDDDDDDDDDDDDDDDDD diff --git a/test/t_read2tags.c b/test/t_read2tags.c index 73b22bc3..372c6123 100644 --- a/test/t_read2tags.c +++ b/test/t_read2tags.c @@ -1,6 +1,6 @@ -/* test/t_select.c -- select test cases. +/* test/t_read2tags.c -- select test cases. - Copyright (C) 2016 Genome Research Ltd. + Copyright (C) 2024 Genome Research Ltd. Author: Jennifer Liddle @@ -51,11 +51,13 @@ void setup_test_1(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("Ba"); (*argv)[(*argc)++] = strdup("-q"); @@ -69,11 +71,13 @@ void setup_test_2(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("Ba,Bb"); (*argv)[(*argc)++] = strdup("-q"); @@ -87,11 +91,13 @@ void setup_test_3(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("Ba"); (*argv)[(*argc)++] = strdup("-q"); @@ -107,11 +113,13 @@ void setup_test_4(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("Ba"); (*argv)[(*argc)++] = strdup("-q"); @@ -127,11 +135,13 @@ void setup_test_5(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags_5.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("Ba"); (*argv)[(*argc)++] = strdup("-q"); @@ -145,11 +155,13 @@ void setup_test_6(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("Ba,Ba"); (*argv)[(*argc)++] = strdup("-q"); @@ -163,11 +175,13 @@ void setup_test_7(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("BC"); (*argv)[(*argc)++] = strdup("-q"); @@ -182,11 +196,13 @@ void setup_test_8(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("BC"); (*argv)[(*argc)++] = strdup("-q"); @@ -201,11 +217,13 @@ void setup_test_9(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("BC"); (*argv)[(*argc)++] = strdup("-q"); @@ -224,11 +242,13 @@ void setup_test_10(int* argc, char*** argv, char *outputfile) *argc = 0; *argv = (char**)calloc(sizeof(char*), 100); (*argv)[(*argc)++] = strdup("bambi"); - (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("read2tags"); (*argv)[(*argc)++] = strdup("-i"); (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); (*argv)[(*argc)++] = strdup("-o"); (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); (*argv)[(*argc)++] = strdup("-t"); (*argv)[(*argc)++] = strdup("BC"); (*argv)[(*argc)++] = strdup("-q"); @@ -242,35 +262,117 @@ void setup_test_10(int* argc, char*** argv, char *outputfile) (*argv)[(*argc)++] = strdup("--replace"); } +void setup_test_11(int* argc, char*** argv, char *outputfile) +{ + *argc = 0; + *argv = (char**)calloc(sizeof(char*), 100); + (*argv)[(*argc)++] = strdup("bambi"); + (*argv)[(*argc)++] = strdup("read2tags"); + (*argv)[(*argc)++] = strdup("-i"); + (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); + (*argv)[(*argc)++] = strdup("-o"); + (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); + (*argv)[(*argc)++] = strdup("-t"); + (*argv)[(*argc)++] = strdup("Ba"); + (*argv)[(*argc)++] = strdup("-q"); + (*argv)[(*argc)++] = strdup("Qa"); + (*argv)[(*argc)++] = strdup("-p"); + (*argv)[(*argc)++] = strdup("1:2:1:1"); +} + +void setup_test_12(int* argc, char*** argv, char *outputfile) +{ + *argc = 0; + *argv = (char**)calloc(sizeof(char*), 100); + (*argv)[(*argc)++] = strdup("bambi"); + (*argv)[(*argc)++] = strdup("read2tags"); + (*argv)[(*argc)++] = strdup("-i"); + (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); + (*argv)[(*argc)++] = strdup("-o"); + (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("--output-fmt"); + (*argv)[(*argc)++] = strdup("sam"); + (*argv)[(*argc)++] = strdup("-t"); + (*argv)[(*argc)++] = strdup("Ba"); + (*argv)[(*argc)++] = strdup("-q"); + (*argv)[(*argc)++] = strdup("Qa"); + (*argv)[(*argc)++] = strdup("-p"); + (*argv)[(*argc)++] = strdup("2:1:1:1"); +} + void checkFiles(char *gotfile, char *expectfile, int verbose) { BAMit_t *bgot = BAMit_open(gotfile, 'r', NULL, 0, NULL); BAMit_t *bexp = BAMit_open(expectfile, 'r', NULL, 0, NULL); - bam1_t *got_rec, *exp_rec; + // bam1_t *got_rec, *exp_rec; - int c = sam_hdr_count_lines(bgot->h, "RG"); - if (c != sam_hdr_count_lines(bexp->h, "RG")) { failure++; return; } + int f = failure; + + int c1 = sam_hdr_count_lines(bgot->h, "RG"); + int c2 = sam_hdr_count_lines(bexp->h, "RG"); + if (c1 != c2) { + failure++; + if (verbose) fprintf(stderr, "RG lines: expected %d, got %d\n", c2, c1); + } - for (int n=0; n < c; n++) { + for (int n=0; n < c1; n++) { kstring_t ks_got; ks_initialize(&ks_got); kstring_t ks_exp; ks_initialize(&ks_exp); sam_hdr_find_line_pos(bgot->h, "RG", n, &ks_got); sam_hdr_find_line_pos(bexp->h, "RG", n, &ks_exp); - if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { failure++; return; } + if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { + if (verbose) fprintf(stderr, "RG %d: expected %s, got %s\n", n, ks_str(&ks_exp), ks_str(&ks_got)); + failure++; + break; + } ks_free(&ks_got); ks_free(&ks_exp); } - while ((exp_rec = BAMit_next(bexp)) != NULL) { - got_rec = BAMit_next(bgot); - if (!got_rec) { fprintf(stderr, "%s ended too soon\n", gotfile); failure++; return; } - if (memcmp(got_rec->data, exp_rec->data, got_rec->l_data)) { + BAMit_free(bexp); + BAMit_free(bgot); + + FILE *getfp = fopen(gotfile, "r"); + FILE *expfp = fopen(expectfile, "r"); + char getline[2048]; + char expline[2048]; + + if (!getfp) { + fprintf(stderr, "Can't open file %s\n", gotfile); + exit(1); + } + + if (!expfp) { + fprintf(stderr, "Can't open file %s\n", expectfile); + exit(1); + } + + // skip header + while (fgets(getline, 2047, getfp) > 0) { + if (getline[0] != '@') break; + } + while (fgets(expline, 2047, expfp) > 0) { + if (expline[0] != '@') break; + } + + // compare read records + while (true) { + if (strcmp(getline,expline) != 0) { + fprintf(stderr, "Expected: %sFound : %s\n", expline, getline); failure++; - break; } + if (fgets(getline, 2047, getfp) == 0) break; + if (fgets(expline, 2047, expfp) == 0) break; + } + + fclose(getfp); fclose(expfp); + + if (verbose) { + if (f == failure) fprintf(stderr, " :\tpass\n"); + else fprintf(stderr, " :\t*** FAIL ***\n"); } - BAMit_free(bexp); - BAMit_free(bgot); return; } @@ -308,75 +410,100 @@ int main(int argc, char**argv) char outputfile[1024]; // minimal options - sprintf(outputfile,"%s/read2tags_1.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 1: minimal options\n"); + sprintf(outputfile,"%s/read2tags_1.sam", TMPDIR); setup_test_1(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_1.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_1.sam"),verbose); free_args(argv_1); // overlapping reads - sprintf(outputfile,"%s/read2tags_2.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 2: Overlapping reads\n"); + sprintf(outputfile,"%s/read2tags_2.sam", TMPDIR); setup_test_2(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_2.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_2.sam"),verbose); free_args(argv_1); // remove first record - sprintf(outputfile,"%s/read2tags_3.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 3: remove first record\n"); + sprintf(outputfile,"%s/read2tags_3.sam", TMPDIR); setup_test_3(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_3.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_3.sam"),verbose); free_args(argv_1); // remove second record - sprintf(outputfile,"%s/read2tags_4.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 4: remove second record\n"); + sprintf(outputfile,"%s/read2tags_4.sam", TMPDIR); setup_test_4(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_4.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_4.sam"),verbose); free_args(argv_1); // handle single reads - sprintf(outputfile,"%s/read2tags_5.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 5: handle single reads\n"); + sprintf(outputfile,"%s/read2tags_5.sam", TMPDIR); setup_test_5(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_5.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_5.sam"),verbose); free_args(argv_1); // specify duplicate tags - sprintf(outputfile,"%s/read2tags_6.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 6: specify duplicate tags\n"); + sprintf(outputfile,"%s/read2tags_6.sam", TMPDIR); setup_test_6(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_6.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_6.sam"),verbose); free_args(argv_1); // use --replace option - sprintf(outputfile,"%s/read2tags_7.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 7: use --replace option\n"); + sprintf(outputfile,"%s/read2tags_7.sam", TMPDIR); setup_test_7(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_7.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_7.sam"),verbose); free_args(argv_1); // use --merge option - sprintf(outputfile,"%s/read2tags_8.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 8: use --merge option\n"); + sprintf(outputfile,"%s/read2tags_8.sam", TMPDIR); setup_test_8(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_8.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_8.sam"),verbose); free_args(argv_1); // use --merge option with duplicate tags - sprintf(outputfile,"%s/read2tags_9.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 9: use --merge option with duplicate tags\n"); + sprintf(outputfile,"%s/read2tags_9.sam", TMPDIR); setup_test_9(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_9.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_9.sam"),verbose); free_args(argv_1); // use --replace option with duplicate tags - sprintf(outputfile,"%s/read2tags_10.bam", TMPDIR); + if (verbose) fprintf(stderr,"Test 10: use --replace option with duplicate tags\n"); + sprintf(outputfile,"%s/read2tags_10.sam", TMPDIR); setup_test_10(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.sam"),verbose); free_args(argv_1); + // write tags to read 2 from read 1 + if (verbose) fprintf(stderr,"Test 11: write tags to read 2 from read 1\n"); + sprintf(outputfile,"%s/read2tags_11.sam", TMPDIR); + setup_test_11(&argc_1, &argv_1, outputfile); + main_read2tags(argc_1-1, argv_1+1); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_11.sam"),verbose); + free_args(argv_1); + + // write tags to read 1 from read 2 + if (verbose) fprintf(stderr,"Test 12: write tags to read 1 from read 2\n"); + sprintf(outputfile,"%s/read2tags_12.sam", TMPDIR); + setup_test_12(&argc_1, &argv_1, outputfile); + main_read2tags(argc_1-1, argv_1+1); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_12.sam"),verbose); + free_args(argv_1); printf("read2tags tests: %s\n", failure ? "FAILED" : "Passed"); return failure ? EXIT_FAILURE : EXIT_SUCCESS;