From df384d50463ca11a6a9943671010db971242a07a Mon Sep 17 00:00:00 2001 From: jenniferliddle Date: Tue, 21 May 2024 10:19:05 +0100 Subject: [PATCH 1/2] Updated to handle NanoSeq --- src/read2tags.c | 252 ++++++++++++++++++++++++--------- test/data/out/read2tags_11.bam | Bin 0 -> 2399 bytes test/data/out/read2tags_12.bam | Bin 0 -> 2396 bytes test/t_read2tags.c | 115 ++++++++++++++- 4 files changed, 290 insertions(+), 77 deletions(-) create mode 100644 test/data/out/read2tags_11.bam create mode 100644 test/data/out/read2tags_12.bam diff --git a/src/read2tags.c b/src/read2tags.c index 513c8074..a7124e4d 100644 --- a/src/read2tags.c +++ b/src/read2tags.c @@ -45,7 +45,8 @@ along with this program. If not, see . * position record */ typedef struct { - int record; + int record_from; + int record_to; int from; int to; } pos_t; @@ -129,30 +130,61 @@ static int bam_aux_cmp(const uint8_t *s, const uint8_t *d) /* * Parse a comma separated list of positions - * Format is r:s:e,r:s:e,r:s:e,... + * Format is rf:rt:s:e,... + * or is r:s:e,... if rf and rt are the same + * or is s:e,... if rf and rt are both zero * - * where r is record number (0, 1 or 2 and is optional) + * where rf is record number to read (0, 1 or 2 and is optional) + * rt is record number to write (0, 1 or 2 and is optional) * s is the start position in the read string * e is the end position in the read string * start and end positions are 1 (not zero) based */ -static void parse_positions(va_t *poslist, char *args) +static void parse_positions(va_t *poslist, char *arg_string) { - char *argstr = strdup(args); + char *argstr = strdup(arg_string); char *save_s; char *s = strtok_r(argstr,",",&save_s); while (s) { pos_t *pos = calloc(1, sizeof(pos_t)); char *save_p; - char *p = strtok_r(s,":",&save_p); if (p) pos->record = atoi(p); - p = strtok_r(NULL,":",&save_p); if (p) pos->from = atoi(p); - p = strtok_r(NULL,":",&save_p); if (p) pos->to = atoi(p); - if (!p) { - // looks like s:e format - pos->to = pos->from; pos->from = pos->record; pos->record = 0; + int n = 0; // number of position arguments + int args[4]; + + char *p = strtok_r(s,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + + switch (n) { + case 2: // s:e + pos->record_from = 0; + pos->record_to = 0; + pos->from = args[0]; + pos->to = args[1]; + break; + case 3: // r:s:e + pos->record_from = args[0]; + pos->record_to = args[0]; + pos->from = args[1]; + pos->to = args[2]; + break; + case 4: // rf:rt:s:e + pos->record_from = args[0]; + pos->record_to = args[1]; + pos->from = args[2]; + pos->to = args[3]; + break; + default: + fprintf(stderr, "Invalid pos format: %s\n", arg_string); + exit(1); } - if (pos->record < 0 || pos->record > 2 || pos->from == 0 || pos->to == 0 || pos->from > pos->to) { - fprintf(stderr,"Invalid pos argument: %s\n", args); + + if (pos->record_from < 0 || pos->record_from > 2 || + pos->record_to < 0 || pos->record_to > 2 || + pos->from == 0 || pos->to == 0 || + pos->from > pos->to) { + fprintf(stderr,"Invalid pos argument: %s\n", arg_string); exit(1); } va_push(poslist,pos); @@ -169,6 +201,8 @@ static void usage(FILE *write_to) fprintf(write_to, "Usage: bambi read2tags [options]\n" "\n" +"Convert portions of a read into tags\n" +"\n" "Options:\n" " -i --input BAM file to read [default: stdin]\n" " -o --output BAM file to output [default: stdout]\n" @@ -178,13 +212,25 @@ static void usage(FILE *write_to) " [default: " DEFAULT_KEEP_TAGS "]\n" " -d --discard-tags comma separated list of tags to discard when merging records\n" " [default: " DEFAULT_DISCARD_TAGS "]\n" -" -p --positions comma separated list of positions\n" +" -p --positions comma separated list of positions (see below)\n" " -m --merge merge duplicate tags\n" " -r --replace replace duplicate tags\n" " -v --verbose verbose output\n" " --input-fmt [sam/bam/cram] [default: bam]\n" " --output-fmt [sam/bam/cram] [default: bam]\n" " --compression-level [0..9]\n" +"\n" +" comma separated list of positions, where each position has the format:\n" +" Format is rf:rt:s:e,...\n" +" or is r:s:e,... if rf and rt are the same\n" +" or is s:e,... if rf and rt are both zero\n" +"\n" +" where rf is record number to read (0, 1 or 2 and is optional)\n" +" rt is record number to write (0, 1 or 2 and is optional)\n" +" s is the start position in the read string\n" +" e is the end position in the read string\n" +" start and end positions are 1 (not zero) based\n" +"\n" ); } @@ -447,17 +493,25 @@ static void shuffle(char *s) /* * add a new tag to our taglist, or append to existing tag */ -static void add_or_update(va_t *va, char *tag, char *data) +static void add_or_update(va_t *va, char *tag, char *data, int r) { int n; + char recno = r + '0'; + char key[4]; + + key[0] = recno; + key[1] = tag[0]; + key[2] = tag[1]; + key[3] = 0; + for (n=0; n < va->end; n++) { - if (strncmp(tag,va->entries[n],2) == 0) break; + if (strncmp(key,va->entries[n],3) == 0) break; } if (n == va->end) { // add new tag - char *e = calloc(1, strlen(tag) + 1 + strlen(data) + 1); - strcpy(e, tag); + char *e = calloc(1, strlen(key) + 1 + strlen(data) + 1); + strcpy(e, key); strcat(e, ":"); strcat(e, data); va_push(va,e); @@ -507,31 +561,43 @@ static void add_tag(bam1_t *rec, char *tag, char *data, opts_t *opts) } /* - * Process one record + * Process records */ -static bam1_t *process_record(bam1_t *rec, opts_t *opts) +static void process_records(bam1_t *rec1, bam1_t *rec2, bam1_t **newrec, bam1_t **newrec2, opts_t *opts) { pos_t *pos; - int recno = -1; - char *tag_data = calloc(1, rec->core.l_qseq+1); - char *qtag_data = calloc(1, rec->core.l_qseq+1); + int readno1 = -1; + int readno2 = -1; + char *tag_data = calloc(1, rec1->core.l_qseq+1); + char *qtag_data = calloc(1, rec1->core.l_qseq+1); va_t *new_tags = va_init(10,free); va_t *new_qtags = va_init(10,free); - if (!(rec->core.flag & BAM_FPAIRED)) recno = 0; - if (rec->core.flag & BAM_FREAD1) recno = 1; - if (rec->core.flag & BAM_FREAD2) recno = 2; + if (!(rec1->core.flag & BAM_FPAIRED)) readno1 = 0; + if (rec1->core.flag & BAM_FREAD1) readno1 = 1; + if (rec1->core.flag & BAM_FREAD2) readno1 = 2; - char *seq = get_read(rec); - char *quality = get_quality(rec); + if (rec2) { + if (!(rec2->core.flag & BAM_FPAIRED)) readno2 = 0; + if (rec2->core.flag & BAM_FREAD1) readno2 = 1; + if (rec2->core.flag & BAM_FREAD2) readno2 = 2; + } /* * first pass - copy sections of read into tags */ for (int n=0; n < opts->poslist->end; n++) { pos = opts->poslist->entries[n]; - if (pos->record == recno) { + if ( (pos->record_from == readno1) || (pos->record_from == readno2) ) { + bam1_t *rec = NULL; + if (pos->record_from == readno1) rec = rec1; + if (pos->record_from == readno2) rec = rec2; + if (pos->from <= rec->core.l_qseq) { + + char *seq = get_read(rec); + char *quality = get_quality(rec); + int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; int len = to - from + 1; @@ -539,52 +605,82 @@ static bam1_t *process_record(bam1_t *rec, opts_t *opts) // copy data from read memset(tag_data,0,rec->core.l_qseq+1); memcpy(tag_data, seq + from - 1, len); - add_or_update(new_tags, opts->taglist->entries[n], tag_data); + add_or_update(new_tags, opts->taglist->entries[n], tag_data, pos->record_to); // copy data from quality memset(qtag_data,0,rec->core.l_qseq+1); memcpy(qtag_data, quality + from - 1, len); - add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data); + add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data, pos->record_to); + free(seq); free(quality); } } } // add new tags for (int n=0; n < new_tags->end; n++) { - char *tag = new_tags->entries[n]; tag[2] = 0; + bam1_t *rec = NULL; + int readno = (*(char*)(new_tags->entries[n]) - '0'); + char *tag = new_tags->entries[n]+1; tag[2] = 0; char *data = tag+3; - add_tag(rec, tag, data, opts); + if (readno == readno1) rec = rec1; + else rec = rec2; + if (rec) add_tag(rec, tag, data, opts); } // add new quality tags for (int n=0; n < new_qtags->end; n++) { - char *tag = new_qtags->entries[n]; tag[2] = 0; + bam1_t *rec = NULL; + int readno = (*(char*)(new_qtags->entries[n]) - '0'); + char *tag = new_qtags->entries[n]+1; tag[2] = 0; char *data = tag+3; - add_tag(rec, tag, data, opts); + if (readno == readno1) rec = rec1; + else rec = rec2; + if (rec) add_tag(rec, tag, data, opts); } /* * second pass - mark sections of read as deleted */ + char *seq = NULL; + char *quality = NULL; + char *seq1 = get_read(rec1); + char *quality1 = get_quality(rec1); + char *seq2 = rec2 ? get_read(rec2) : NULL; + char *quality2 = rec2 ? get_quality(rec2) : NULL; + for (int n=0; n < opts->poslist->end; n++) { + bam1_t *rec = NULL; pos = opts->poslist->entries[n]; - if (pos->record == recno) { - if (pos->from <= rec->core.l_qseq) { - int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; - int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; - int len = to - from + 1; - memset(seq + from - 1, 1, len); // mark as deleted - memset(quality + from - 1, 1, len); // mark as deleted - } + if (pos->record_from == readno1) { rec = rec1; seq = seq1; quality = quality1; } + if (pos->record_from == readno2) { rec = rec2; seq = seq2; quality = quality2; } + + if (rec && (pos->from <= rec->core.l_qseq)) { + int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; + int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; + int len = to - from + 1; + memset(seq + from - 1, 1, len); // mark as deleted + memset(quality + from - 1, 1, len); // mark as deleted } } - shuffle(seq); shuffle(quality); // physically remove 'marked as deleted' bytes - bam1_t *newrec = make_new_rec(rec, seq, quality); - free(tag_data); free(qtag_data); free(quality); free(seq); + shuffle(seq1); shuffle(quality1); // physically remove 'marked as deleted' bytes + bam1_t *nr = make_new_rec(rec1, seq1, quality1); + *newrec = nr; + free(seq1); free(quality1); + + if (rec2) { + shuffle(seq2); shuffle(quality2); // physically remove 'marked as deleted' bytes + nr = make_new_rec(rec2, seq2, quality2); + *newrec2 = nr; + free(seq2); free(quality2); + } else { + *newrec2 = NULL; + } + + free(tag_data); free(qtag_data); va_free(new_tags); va_free(new_qtags); - return newrec; + return; } /* @@ -712,10 +808,12 @@ static bam1_t *merge_records(bam1_t *r1, bam1_t *r2, opts_t *opts) */ static int write_record(BAMit_t *bam, bam1_t *rec) { - int r = sam_write1(bam->f, bam->h, rec); - if (r < 0) { - fprintf(stderr,"sam_write1() failed\n"); - return -1; + if (rec) { + int r = sam_write1(bam->f, bam->h, rec); + if (r < 0) { + fprintf(stderr,"sam_write1() failed\n"); + return -1; + } } return 0; } @@ -731,6 +829,10 @@ int process(opts_t* opts) int retcode = 0; int nrec = 0; int r; + bam1_t *newrec = NULL; + bam1_t *newrec2= NULL; + bam1_t *rec = NULL; + bam1_t *rec2 = NULL; BAMit_t *bam_in = BAMit_open(opts->in_file, 'r', opts->input_fmt, 0, NULL); BAMit_t *bam_out = BAMit_open(opts->out_file, 'w', opts->output_fmt, opts->compression_level, NULL); @@ -745,28 +847,38 @@ int process(opts_t* opts) } while (BAMit_hasnext(bam_in)) { - bam1_t *rec = BAMit_next(bam_in); - if (invalid_record(rec,++nrec)) return -1; - bam1_t *newrec = process_record(rec,opts); - - bam1_t *rec2 = BAMit_peek(bam_in); - if (rec2 && strcmp(bam_get_qname(rec), bam_get_qname(rec2)) == 0) { - rec2 = BAMit_next(bam_in); - if (invalid_record(rec2,++nrec)) return -1; - bam1_t *newrec2 = process_record(rec2,opts); - if ((newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) { - bam1_t *merged_rec = merge_records(newrec, newrec2, opts); - if (write_record(bam_out, merged_rec)) return -1; - bam_destroy1(merged_rec); - } else { - if (write_record(bam_out, newrec)) return -1; - if (write_record(bam_out, newrec2)) return -1; - } - bam_destroy1(newrec2); + bam1_t *r = BAMit_next(bam_in); + if (invalid_record(r,++nrec)) return -1; + rec = bam_dup1(r); + + r = BAMit_peek(bam_in); + if (r && strcmp(bam_get_qname(rec), bam_get_qname(r)) == 0) { + r = BAMit_next(bam_in); + if (invalid_record(r,++nrec)) return -1; + rec2 = bam_dup1(r); + } else { + rec2 = NULL; + } + + process_records(rec, rec2, &newrec, &newrec2, opts); + + //newrec = process_record(rec,opts); + //newrec2 = NULL; + //if (rec2) newrec2 = process_record(rec2,opts); + + if (newrec2 && ( (newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) ) { + bam1_t *merged_rec = merge_records(newrec, newrec2, opts); + if (write_record(bam_out, merged_rec)) return -1; + bam_destroy1(merged_rec); } else { - if (write_record(bam_out,newrec)) return -1; + if (write_record(bam_out, newrec)) return -1; + if (write_record(bam_out, newrec2)) return -1; } + + bam_destroy1(rec); + bam_destroy1(rec2); bam_destroy1(newrec); + bam_destroy1(newrec2); } // tidy up after us diff --git a/test/data/out/read2tags_11.bam b/test/data/out/read2tags_11.bam new file mode 100644 index 0000000000000000000000000000000000000000..e5b271569453a56688c521d19ff5b32d0bf8af2f GIT binary patch literal 2399 zcmV-l383~LiwFb&00000{{{d;LjnN32klr%bK5u;b|0179Qux`x%AXTBB{l3G^MIg zA}w*6q-c_IoD&KXAqj16AxJO%L;Cm514PoY;&f-yOXe`LEIom51Ms~MkDThxcfWjp zcJ^*zbgtd8)*E($<+w_DR+Kb)_0F4jEMpwZf{y2o7c8J3(JV{LB8yRwZOeO7(2fy| z>#>-nJH+d8l%Bz?s4 zCrL^C$Qf6(p*rlVco7UVto6^|4zP|F7f#>Sjhh?AASF?(w4^C#C8HcN49uBs+rgjW zUJ>g(gLUG_)?IU=q2(&{R^dd6=$*pLD62}bI-YG}-L~{#V*0_b|7JLR^SY5PTIp>g zjnr1sTB+YkgH{^0(x{crTd9pPx_F)M*$v)i1?D-sr4e3N+bu0P3c{7~0lT_mPTI|_ zammLh@T?zA1J9Oj=r3(>=c(#wIMBvu!FKy@xi7M6x6fgczN;fnKUOr|2<&BBRPHHC z2uo2GZ^@?2ibq6q2BR^n5}}a6>J(l2kY}7mC}A7gLnz!cjv`oQ1m8IU%z-9Bc|p5* zwT{__lnhFwpfQD|@1aY8A4_3k$>9RLMc};1a$2B@Q(=EkJ8{hjpI$0 zP!tvwhdq<9MHW;dE31;CA6UAHt4Pc(q9tLl<07&25gj{DvVy=&8TrR+l*7*R zFpVXsCtDIj4DS+OT6Spq6W|Ssd{Z~@++VIdY#D$_+nV~qPVRD*-lo}oiURX$Wx6xV zo#UD53fo|YJX;Shm%cNxY!geVD3m1lV%1uRD{GDqd&? z^MZA-T*JJnJ=H^wF%k=49!8&tG9eMkOImDM8VQy=9r2#?9LK zzfGz+K(B~;C$WK5k68$cu;HaYoB(wPH-`nseN~!kU2V`mANG}l1Fa|K(tt&7)bGtB zCMgBKCE3Lc%GEIic!EF1_t(!+qBK z96g*Z%m7LrYPTK0hG~bo7z8bMSu;l;2*-sY4Y9p^|KxPpXw(%ZY)%wvyNNdXwIctN z&PM-LZ8wF+^Q|Z@o&&>@7(+#rhuS(VH(6Z3`?~_b1|>!EJbr|uqqan~B~;U$21Py; z<)k!i7YI;wM9FV=O1{Rh(`;QxPFD`v%9^Fh^CVDrgHZPpfj+43QhbbrMx{i5F&xuO zWUr87vuAfy`M6O^Adx%va|2Y=?G|(gGsB$yOi>?Jza2>!hjVnZ$8fS!zR%(a0V8VX zM=H>qLMJGs&+9~RI=`&)(psT^q2IX=1ool&?Y(FZhI=AU1M%fafJx>b^)$6x<_|{{ z@6ck`l6&*Rla`<7@sjJu}nww^0G~V57};PS;57KXq^PfqkHI#=t%TSu(^kHfCRBOZ+ezLpXq`;W+kla^wFmznnP=y3*@~x z>>U9TkSKKk5-k{5Du7sb6YVfZf;S@3B)7hLXaSNV6#$YVHIA@DU8itSUkz1tEZ_fBHC62d&iH@q$yLXje{u^jgVS>M>btYEv%kUrDQ?eR zeSdcLAK~!^&ASx<03VA81ONa4009360763o0E7XZlDkgBFc^j1zJWuAih;(CE7Ace zPI^%yRl*mVEDWWdvY?8oBVtFS%HW0PgSVk}TdI_RRB|LMmKFWyd_H~!0DK*E1#$;~ zdvH-ZGh2`^WBN#=NfM_tr8K1Jn8tA$MWfIUL6Ae()3VgJ*E`~ zz(ZD0^Z!B>LWsH19e7%ckLN~$ci{djLCLF5bXcd!PZ*wRl&ewRpj8UX3#$y zn}(xtXk2Mz2D+TPn`#3q@7oFOKL&fxtrRn>#(guTK`Glv9>^)S-R8w?t^Y=9ZdRCZc47ebn% zrOj~JeQ60?%b*eDT9*tt1l#xZN7cs6bS-Sx^R`__acsMe65BQ8h~-%4Ud_$JM|35a z$bNg{a)&-^i(b!wSjBS0q|<9${t5cbqGuLef-Q;NRw>>C001A02m}BC000301^_}s R0stET0{{R300000004}UhtdE5 literal 0 HcmV?d00001 diff --git a/test/data/out/read2tags_12.bam b/test/data/out/read2tags_12.bam new file mode 100644 index 0000000000000000000000000000000000000000..feb112b7b8320bd1161bc1124d0ea583ec033a20 GIT binary patch literal 2396 zcmV-i38VHOiwFb&00000{{{d;LjnN42klr%bK5u;b|0179Qux`x%AXTBB{l3G^MIg zA}w*6q-c_IoD&KXAqj16AxJO%L;Cm514PoY;&f-yOXe`LEIom51Ms~MkDThxcfWjp zcJ^*zbgtd8)*E($<+w_DR+Kb)_0F4jEMpwZf{y2o7c8J3(JV{LB8yRwZOeO7(2fy| z>#>-nJH+d8l%Bz?s4 zCrL^C$Qf6(p*rlVco7UVto6^|4zP|F7f#>Sjhh?AASF?(w4^C#C8HcN49uBs+rgjW zUJ>g(gLUG_)?IU=q2(&{R^dd6=$*pLD62}bI-YG}-L~{#V*0_b|7JLR^SY5PTIp>g zjnr1sTB+YkgH{^0(x{crTd9pPx_F)M*$v)i1?D-sr4e3N+bu0P3c{7~0lT_mPTI|_ zammLh@T?zA1J9Oj=r3(>=c(#wIMBvu!FKy@xi7M6x6fgczN;fnKUOr|2<&BBRPHHC z2uo2GZ^@?2ibq6q2BR^n5}}a6>J(l2kY}7mC}A7gLnz!cjv`oQ1m8IU%z-9Bc|p5* zwT{__lnhFwpfQD|@1aY8A4_3k$>9RLMc};1a$2B@Q(=EkJ8{hjpI$0 zP!tvwhdq<9MHW;dE31;CA6UAHt4Pc(q9tLl<07&25gj{DvVy=&8TrR+l*7*R zFpVXsCtDIj4DS+OT6Spq6W|Ssd{Z~@++VIdY#D$_+nV~qPVRD*-lo}oiURX$Wx6xV zo#UD53fo|YJX;Shm%cNxY!geVD3m1lV%1uRD{GDqd&? z^MZA-T*JJnJ=H^wF%k=49!8&tG9eMkOImDM8VQy=9r2#?9LK zzfGz+K(B~;C$WK5k68$cu;HaYoB(wPH-`nseN~!kU2V`mANG}l1Fa|K(tt&7)bGtB zCMgBKCE3Lc%GEIic!EF1_t(!+qBK z96g*Z%m7LrYPTK0hG~bo7z8bMSu;l;2*-sY4Y9p^|KxPpXw(%ZY)%wvyNNdXwIctN z&PM-LZ8wF+^Q|Z@o&&>@7(+#rhuS(VH(6Z3`?~_b1|>!EJbr|uqqan~B~;U$21Py; z<)k!i7YI;wM9FV=O1{Rh(`;QxPFD`v%9^Fh^CVDrgHZPpfj+43QhbbrMx{i5F&xuO zWUr87vuAfy`M6O^Adx%va|2Y=?G|(gGsB$yOi>?Jza2>!hjVnZ$8fS!zR%(a0V8VX zM=H>qLMJGs&+9~RI=`&)(psT^q2IX=1ool&?Y(FZhI=AU1M%fafJx>b^)$6x<_|{{ z@6ck`l6&*Rla`<7@sjJu}nww^0G~V57};PS;57KXq^PfqkHI#=t%TSu(^kHfCRBOZ+ezLpXq`;W+kla^wFmznnP=y3*@~x z>>U9TkSKKk5-k{5Du7sb6YVfZf;S@3B)7hLXaSNV6#$YVHIA@DU`u_un zZ}|YCV}xVDJY99z?W>`xj^+EGs-~))z#0FKJ-O<5^G|L8W^h{0UVV3VcJ?>;KgI3Y ztMAXw{sXQRmfyP-001A02m}BC000301^_}s0sw#kos!>5!!Q`dv%NuUE{Yd!Nh5X_ zLcex@XklQQ&i2NOxm@)|h9LSDuJmFeKA*mge%a=%R$=x)XbAK>=a-WwLdg40CnUEC zxgxZ7hFg*^Lb!oo9ECB&06xSc2*WrChQ8|)yINV+y2_pEY^&3agkVAlH&KPwS*D~` zF1vo7y`V z8~owOM=J<$3`+&ey_`A8-hgdCh`r_3TH>m4pHIN^27>`+SiE)a8zBzC1%TlMLxg6g z^r&up>v0!x|c?0eCPQYjXl znh?j`ucq6g*W#2G_10@@TYYXG+8 @@ -242,35 +242,111 @@ void setup_test_10(int* argc, char*** argv, char *outputfile) (*argv)[(*argc)++] = strdup("--replace"); } +void setup_test_11(int* argc, char*** argv, char *outputfile) +{ + *argc = 0; + *argv = (char**)calloc(sizeof(char*), 100); + (*argv)[(*argc)++] = strdup("bambi"); + (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("-i"); + (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); + (*argv)[(*argc)++] = strdup("-o"); + (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("-t"); + (*argv)[(*argc)++] = strdup("Ba"); + (*argv)[(*argc)++] = strdup("-q"); + (*argv)[(*argc)++] = strdup("Qa"); + (*argv)[(*argc)++] = strdup("-p"); + (*argv)[(*argc)++] = strdup("1:2:1:1"); +} + +void setup_test_12(int* argc, char*** argv, char *outputfile) +{ + *argc = 0; + *argv = (char**)calloc(sizeof(char*), 100); + (*argv)[(*argc)++] = strdup("bambi"); + (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("-i"); + (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); + (*argv)[(*argc)++] = strdup("-o"); + (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("-t"); + (*argv)[(*argc)++] = strdup("Ba"); + (*argv)[(*argc)++] = strdup("-q"); + (*argv)[(*argc)++] = strdup("Qa"); + (*argv)[(*argc)++] = strdup("-p"); + (*argv)[(*argc)++] = strdup("2:1:1:1"); +} + void checkFiles(char *gotfile, char *expectfile, int verbose) { BAMit_t *bgot = BAMit_open(gotfile, 'r', NULL, 0, NULL); BAMit_t *bexp = BAMit_open(expectfile, 'r', NULL, 0, NULL); - bam1_t *got_rec, *exp_rec; + // bam1_t *got_rec, *exp_rec; + + int f = failure; - int c = sam_hdr_count_lines(bgot->h, "RG"); - if (c != sam_hdr_count_lines(bexp->h, "RG")) { failure++; return; } + int c1 = sam_hdr_count_lines(bgot->h, "RG"); + int c2 = sam_hdr_count_lines(bexp->h, "RG"); + if (c1 != c2) { + failure++; + if (verbose) fprintf(stderr, "RG lines: expected %d, got %d\n", c2, c1); + } - for (int n=0; n < c; n++) { + for (int n=0; n < c1; n++) { kstring_t ks_got; ks_initialize(&ks_got); kstring_t ks_exp; ks_initialize(&ks_exp); sam_hdr_find_line_pos(bgot->h, "RG", n, &ks_got); sam_hdr_find_line_pos(bexp->h, "RG", n, &ks_exp); - if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { failure++; return; } + if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { + if (verbose) fprintf(stderr, "RG %d: expected %s, got %s\n", n, ks_str(&ks_exp), ks_str(&ks_got)); + failure++; + break; + } ks_free(&ks_got); ks_free(&ks_exp); } +#if 0 while ((exp_rec = BAMit_next(bexp)) != NULL) { got_rec = BAMit_next(bgot); - if (!got_rec) { fprintf(stderr, "%s ended too soon\n", gotfile); failure++; return; } + if (!got_rec) { fprintf(stderr, "%s ended too soon\n", gotfile); failure++; break; } if (memcmp(got_rec->data, exp_rec->data, got_rec->l_data)) { + if (verbose) fprintf(stderr, "Record different\n"); failure++; break; } } +#endif BAMit_free(bexp); BAMit_free(bgot); + + char cmd[1024]; + sprintf(cmd, "samtools view -o /tmp/got.sam %s ", gotfile); + system(cmd); + sprintf(cmd, "samtools view -o /tmp/exp.sam %s ", expectfile); + system(cmd); + + FILE *getfp = fopen("/tmp/got.sam", "r"); + FILE *expfp = fopen("/tmp/exp.sam", "r"); + char getline[1024]; + char expline[1024]; + + while (fgets(getline, 1023, getfp) > 0) { + fgets(expline, 1023, expfp); + if (strcmp(getline,expline) != 0) { + fprintf(stderr, "Expected: %sFound : %s\n", expline, getline); + failure++; + } + } + + fclose(getfp); fclose(expfp); + + if (verbose) { + if (f == failure) fprintf(stderr, " :\tpass\n"); + else fprintf(stderr, " :\t*** FAIL ***\n"); + } + return; } @@ -308,6 +384,7 @@ int main(int argc, char**argv) char outputfile[1024]; // minimal options + if (verbose) fprintf(stderr,"Test 1: minimal options\n"); sprintf(outputfile,"%s/read2tags_1.bam", TMPDIR); setup_test_1(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -315,6 +392,7 @@ int main(int argc, char**argv) free_args(argv_1); // overlapping reads + if (verbose) fprintf(stderr,"Test 2: Overlapping reads\n"); sprintf(outputfile,"%s/read2tags_2.bam", TMPDIR); setup_test_2(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -322,6 +400,7 @@ int main(int argc, char**argv) free_args(argv_1); // remove first record + if (verbose) fprintf(stderr,"Test 3: remove first record\n"); sprintf(outputfile,"%s/read2tags_3.bam", TMPDIR); setup_test_3(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -329,6 +408,7 @@ int main(int argc, char**argv) free_args(argv_1); // remove second record + if (verbose) fprintf(stderr,"Test 4: remove second record\n"); sprintf(outputfile,"%s/read2tags_4.bam", TMPDIR); setup_test_4(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -336,6 +416,7 @@ int main(int argc, char**argv) free_args(argv_1); // handle single reads + if (verbose) fprintf(stderr,"Test 5: handle single reads\n"); sprintf(outputfile,"%s/read2tags_5.bam", TMPDIR); setup_test_5(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -343,6 +424,7 @@ int main(int argc, char**argv) free_args(argv_1); // specify duplicate tags + if (verbose) fprintf(stderr,"Test 6: specify duplicate tags\n"); sprintf(outputfile,"%s/read2tags_6.bam", TMPDIR); setup_test_6(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -350,6 +432,7 @@ int main(int argc, char**argv) free_args(argv_1); // use --replace option + if (verbose) fprintf(stderr,"Test 7: use --replace option\n"); sprintf(outputfile,"%s/read2tags_7.bam", TMPDIR); setup_test_7(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -357,6 +440,7 @@ int main(int argc, char**argv) free_args(argv_1); // use --merge option + if (verbose) fprintf(stderr,"Test 8: use --merge option\n"); sprintf(outputfile,"%s/read2tags_8.bam", TMPDIR); setup_test_8(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -364,6 +448,7 @@ int main(int argc, char**argv) free_args(argv_1); // use --merge option with duplicate tags + if (verbose) fprintf(stderr,"Test 9: use --merge option with duplicate tags\n"); sprintf(outputfile,"%s/read2tags_9.bam", TMPDIR); setup_test_9(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -371,12 +456,28 @@ int main(int argc, char**argv) free_args(argv_1); // use --replace option with duplicate tags + if (verbose) fprintf(stderr,"Test 10: use --replace option with duplicate tags\n"); sprintf(outputfile,"%s/read2tags_10.bam", TMPDIR); setup_test_10(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.bam"),verbose); free_args(argv_1); + // write tags to read 2 from read 1 + if (verbose) fprintf(stderr,"Test 11: write tags to read 2 from read 1\n"); + sprintf(outputfile,"%s/read2tags_11.bam", TMPDIR); + setup_test_11(&argc_1, &argv_1, outputfile); + main_read2tags(argc_1-1, argv_1+1); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_11.bam"),verbose); + free_args(argv_1); + + // write tags to read 1 from read 2 + if (verbose) fprintf(stderr,"Test 12: write tags to read 1 from read 2\n"); + sprintf(outputfile,"%s/read2tags_12.bam", TMPDIR); + setup_test_12(&argc_1, &argv_1, outputfile); + main_read2tags(argc_1-1, argv_1+1); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_12.bam"),verbose); + free_args(argv_1); printf("read2tags tests: %s\n", failure ? "FAILED" : "Passed"); return failure ? EXIT_FAILURE : EXIT_SUCCESS; From d06da2ecac346235486ec0b7e2f732a10c8cfc60 Mon Sep 17 00:00:00 2001 From: jenniferliddle Date: Thu, 6 Jun 2024 16:08:18 +0100 Subject: [PATCH 2/2] Removed the dependency on samtools from the unit tests --- test/data/out/read2tags_1.bam | Bin 2336 -> 0 bytes test/data/out/read2tags_1.sam | 22 +++++ test/data/out/read2tags_10.bam | Bin 2323 -> 0 bytes test/data/out/read2tags_10.sam | 19 +++++ test/data/out/read2tags_11.bam | Bin 2399 -> 0 bytes test/data/out/read2tags_11.sam | 24 ++++++ test/data/out/read2tags_12.bam | Bin 2396 -> 0 bytes test/data/out/read2tags_12.sam | 24 ++++++ test/data/out/read2tags_2.bam | Bin 2381 -> 0 bytes test/data/out/read2tags_2.sam | 22 +++++ test/data/out/read2tags_3.bam | Bin 2339 -> 0 bytes test/data/out/read2tags_3.sam | 19 +++++ test/data/out/read2tags_4.bam | Bin 2343 -> 0 bytes test/data/out/read2tags_4.sam | 19 +++++ test/data/out/read2tags_5.bam | Bin 2347 -> 0 bytes test/data/out/read2tags_5.sam | 23 ++++++ test/data/out/read2tags_6.bam | Bin 2357 -> 0 bytes test/data/out/read2tags_6.sam | 22 +++++ test/data/out/read2tags_7.bam | Bin 2281 -> 0 bytes test/data/out/read2tags_7.sam | 22 +++++ test/data/out/read2tags_8.bam | Bin 2325 -> 0 bytes test/data/out/read2tags_8.sam | 22 +++++ test/data/out/read2tags_9.bam | Bin 2345 -> 0 bytes test/data/out/read2tags_9.sam | 19 +++++ test/t_read2tags.c | 146 +++++++++++++++++++-------------- 25 files changed, 343 insertions(+), 60 deletions(-) delete mode 100644 test/data/out/read2tags_1.bam create mode 100644 test/data/out/read2tags_1.sam delete mode 100644 test/data/out/read2tags_10.bam create mode 100644 test/data/out/read2tags_10.sam delete mode 100644 test/data/out/read2tags_11.bam create mode 100644 test/data/out/read2tags_11.sam delete mode 100644 test/data/out/read2tags_12.bam create mode 100644 test/data/out/read2tags_12.sam delete mode 100644 test/data/out/read2tags_2.bam create mode 100644 test/data/out/read2tags_2.sam delete mode 100644 test/data/out/read2tags_3.bam create mode 100644 test/data/out/read2tags_3.sam delete mode 100644 test/data/out/read2tags_4.bam create mode 100644 test/data/out/read2tags_4.sam delete mode 100644 test/data/out/read2tags_5.bam create mode 100644 test/data/out/read2tags_5.sam delete mode 100644 test/data/out/read2tags_6.bam create mode 100644 test/data/out/read2tags_6.sam delete mode 100644 test/data/out/read2tags_7.bam create mode 100644 test/data/out/read2tags_7.sam delete mode 100644 test/data/out/read2tags_8.bam create mode 100644 test/data/out/read2tags_8.sam delete mode 100644 test/data/out/read2tags_9.bam create mode 100644 test/data/out/read2tags_9.sam diff --git a/test/data/out/read2tags_1.bam b/test/data/out/read2tags_1.bam deleted file mode 100644 index d60ace37d4d00133be10cf6bda14b1b8f39078be..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2336 zcmV+*3E%b~iwFb&00000{{{d;LjnMR2klr}bK5o+b|2-po$0<~W?y!vkx1&|INH%n zD3O-9B`KPu9OnrGiI9Yvw-BT+{bl`mJ%C7BcAPewUebpujs$_{92@}O_u)WO-TCIH zZ%4AB)8MTMyC$4&i1ZTS{ZQ|Co*WE9P9Gm4k8Ezp zjvg}9UibQ1Z_os>;Ai9WTzEzO5VA*NQ<(qF8B3Q_f08Ib;}^Gu^g> zKP0>&)_Vr)#F4GL=0rowRp_n4i4xIUg_luQm11=~+r+wU>A}SGgJJ*maQOOF(>iZk zZ<^LfZChH~>bI>y+Zwj5QQJCeTkRO5^H=$vUE^(5V4kxZ8sT-d-O_@iAY2*mv8y}g zq}|*amwb!@&-&gp@NDUZ{?dkUo~n+918s~JY`5>0`y#7$`y3|eyE@|ZLq*e#z+SdR z<({I1uoPwSmTbzbctA8~&>OQV5e6BoPSK_JdB$mk61Jf|gu*@JD1v21@R<|999R;R z7qpvK>zHjw$$%mSjVUaB4_yHKSPBzM4iDf@44fBPP773VD*T^&%lLc`bc@jXK{~vn zalFYAio&AeuxAps$O0y^vMMS1H%m8h6^Xe;v?L66TqKr0pyR+vRuGseBma4Ya+x74 z3Pt;}%*TpycX!wD0hm$UC?XG$21HfRZ0UHu83fkS#kP56+7lJQYi_!xuZJcE94}o% z_upfIX)HlK*^(H>@GkI$WrwCe0o|a;H+2Kg{pHHTmI0Wwt*I~kFB@1NBm+*uD|^_0$-} z^p?hxR)wb|vC@d%8+4OJv=Grh>hhis2ui9WKv_c4s0-RkK||q803nT}4@#=IWB{Cd z0Zb6bUl-LJpjSk_XJdn^9;*y0lH-`nseO0<^U2V`m8}^k$0IetH(tt&7 z)NjorCMkuuCE3Lc=<1jPJi+f1`^)Dj^1+9#>?O1;*oJ=!?2xD%euQ61UwAcZ^|@pd zq;agk(Zkuo41n^$-L?Z8rXA{{6Rg~2tsH$292ZC$Vte`SG3c_fs2L`Fo*C4B6K(WM zB>z~>M*kPL8=~=iE6BxjP*@UUU_^P~)^oZ^a)Ive3IH2WisX6x07pmdiE2+^)2#+1 z9|}4tP1ywvU>#BNo1K!cG3+#37gE!egSE0|sq!=l)ZHM|y+B|O=DQRhBcU-V(Vq;* zG!xk?q}c4)Emb~jloCYbmi^cO6?MA>-NDQ-XFpQZht+R;62{>i-Rv=(?3C}bI6|O^ z+S!o_6sJ%Liq@xXA~>C2HhF2S(0|cy+y?^tQ2qK|v`Qk~>PiuK1>_lm-h`Ly&BWihBrofvlH@2+cqDPcYLgdjMR7Ud2q4r6k zv_Msw0EpoznAY|6D}nE*k`)#KYgRlygRi7Sd%}-Kl(8T`=!#`<5=BfbCeT4X+ZI_O z9~=+$xd+Z+JrL-^+=U_$JSQ)1@2Ae9W1H_y)5GR<=<9gpI=bgUKvBj7O+^Eh$xPPJ z^;khGga?Z+l35wf0SoN;%Q?uAsFz@K59xpiVsGBmEPuSx1$oR$Sl8);Pf0X~(mEH& zd$HR)0wf?&+5jY4FtAhrv92cCZjOX*M50J;WA)Gin&K)TZl44ooj)|?J5;hemL9Yd zz*o>DyA{}b>(4p5+f#6`434B^JR&%Qx8iXCVe(m!dSRjscD{;)9^>->S%8UoVO@Wv zd_>KowJKg`1mJT3cm5nNMAZ++Goasr!~Tm1@h6}hR&&kjVq7%57XdDQv*`B*!@3Y( zGq^{^bW@@(I~=HRvn#l0;UAPAxTY!qBt=>(CFP~OS}6s@>qAY(0c{EDW=JWM{9x64 zceeT8;{4I1-e@?mrv!CBpi6kqkv4{Zf?=Eb7Z733g~|(`)3?E8CveAbxxVkVKW}VF z$9Bh~!TH%K5Jk*c`~V9c>UgX1F9HM$2YWiM?`o3aWfeW3zpvfyYN)DX`S+cwscI*1#(%Njb~@htqat|*=gI8!o0F51f589exIH=j=gG;>`598e z-4p--ABzYC000000RIL6LPG)of&rzHPfNov0L8O?gVY{|2iK$-b_bzpwn+;E^T)Lp z4|6&7BEy61V|4To1V5jC8%_3)&MM6LptO|o-uv~%002IBx&nmSTOoT zlS#@lno%0lY)l!;l4KNzF^CEXdq!2}?s}uaMuZ?jNGN2fb)HMEC3wtBYQLZBQc5|q zx(ClI`7V|v2lHiFfR|R6H{k!NOC{<`KxjOl5FCEju7C@DZP@lt#-{OT9NR3KSQ}pm ze_gL(9qj$l_GYoS+*)&cHMnnXX%r2ILtA0-(Xrstfh(|TwK)ZxQaZY|ZJ2XcUZr4- z{pvuz)WNrfF)!;)(Sk5$8RO3=al_)?c>hoaZ_}o?J+&?BvRIxa{uH zXJ?}~+86|dF%?H1!S2lcGP;dD6Fp(ft)8TAbtn)ouqlE@Mt~3KT5_5HblTMhed^Hj zf34nZgLusg#Ff`;&2WT1?S26R5`Y}&0{{RYiwFb&00000{{{d;LjnLB00RI300000 G0002xx_S`+ diff --git a/test/data/out/read2tags_1.sam b/test/data/out/read2tags_1.sam new file mode 100644 index 00000000..3d5ef4da --- /dev/null +++ b/test/data/out/read2tags_1.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_1.sam read2tags_1.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:A Qa:Z:H +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:C Qa:Z:D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:C Qa:Z:F +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:G Qa:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_10.bam b/test/data/out/read2tags_10.bam deleted file mode 100644 index fe35cd63c57e55ac48b64e555e805344216dfc75..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2323 zcmV+u3GDVCiwFb&00000{{{d;LjnMr2klr(bK|%X_8#S1s**dZ_L5XAk<`O8R!UVv ziL}I8lA=k<>p7ty5t6Xx%LM7i{?YuMG$4|eJ)Ygk>|=6>%9cQ&(P%Wj{<^`b?tJ&} zzn+|YSQwpacdYe>onSexQl1qhjZQy!^NwYVgIUn=-0^}1^bXCkv@Eh11=+TIAO-Ch z!MN^=S-L~K?nhaQSjx+yN@!XN?fstG)8MO7yC$4&h@2(V`=Q?PJh>RCoIVdCk8Ezq zjvg%3-u3!gZ_p^Q;`4yagy$6(M$7PkJXcV$BQA$4NW(&!jIUn{E3ooxx%-DzGB%~u?R@| zgykbiN&LhaSG1uz?5lVY3^c6u&o2g8$BXw)-`0(r8^s_cQLMD2DQ6|494rjXnQq&` zAEI6n{+@xKII?xuoM>pd3cXc0Q6hS;@G{D(Qh3L+O|09N9!yL>81~-{hwsjs*88?~ z(X>Wt+tS)rzikcL*061j+SYm7YS$ROKg;**25++h^PJt%2(PQ{mKGcZ;mY`kUEMJ! z?cvtA|`A3L8rf1Mnvn&WkLk1*$j|^Iy-F`uQHhEkf%jIpIed z$D1snC@d-tXC`5bEQmx_RwYG$XXz%cBC)rKmW08H3&YYUbgVeZ3IaQ2-X*@Y?9lWlz#A0#rf%T5zg&6PG60jdHTA_jdCFCKn`RFw3e2mO z>CP;7j%TJT<_0t5*?M@n^qq-i+bhR{Go9-GY-xzqrMQVheST0howb8kD*!h{a65Jt zqHEKiE=62VEcbGWu2z;A*5+P=w4sM$qe19{1)%ujL^Nk1a`YSQ!&ZGGz?O@B-IoL2(%yoomY zr6B*5&PM-LY&U_%^Q{Ojo&&>@7(+yqhuC@@ZZf!l_jd(=4MK|KdHe*aqn?TCnGj9) z8U*=Jgp<;gUC@B2BT9a|Q}Q*2lV#-m}LG@PgA>P z{&+<34lQ;qxi=4)IR+PHHKIn?2auP*MI1|D_j%|x9 zkpss|eQClu_yYki%v~rFVdmt`t*9FV}CznlXdiFyeR_h1f?Aok`>&GN@HU69AD1iwz7d`hA@l-9XG z-kaUt5g-AHQU)N=f`O$1h;=p5c5@_nBN9b&tE-0=&=gkzar-0y>HMiF-=UJ-vGl;5 z0KS4I*}cHt2Y=4d!=Azf%iu^-#xsI5_$rS%lh(7`4;LSCwi*eEL-UPUKXVLEshIJvn zW^j*+>83qAY(0c{EDW=JWM{2Bj19$B_YOf>)y+|Vtc@=&E>Jf@8+PW;QTG`musuQ?lxO+c#+qWNE(y`s~Xz>306yiv@MEhRtc=outb9KDc_!sem z#RPjguCH>E0Lm(QLjS+Pf6E(u#|XzFbi3-X+gC$X9m~Ies+y{H0%!a`_UfnO%|EH3 zXOM1Yr{A5Nocsm;U*q=V^lv98{{fEjxlH^N001A02m}BC000301^_}s0svb9osvIK zgfJM!-@Et)APmGo<&O~t6JE;Q1(HiFg_OlX>BQpT%5omJHGFGx3WPxbpIgUa?vO*g_3&YB&E3E5^hvemZF#nb|Z|_9_{)7G>no}L~Iz7 ztyB0;6;zI$dtY+)6QOw;h(sI;22NtR++YxdVMxsN`{Hp1@vMFSf#>-nJH+d8l%Bz?s4 zCrL^C$Qf6(p*rlVco7UVto6^|4zP|F7f#>Sjhh?AASF?(w4^C#C8HcN49uBs+rgjW zUJ>g(gLUG_)?IU=q2(&{R^dd6=$*pLD62}bI-YG}-L~{#V*0_b|7JLR^SY5PTIp>g zjnr1sTB+YkgH{^0(x{crTd9pPx_F)M*$v)i1?D-sr4e3N+bu0P3c{7~0lT_mPTI|_ zammLh@T?zA1J9Oj=r3(>=c(#wIMBvu!FKy@xi7M6x6fgczN;fnKUOr|2<&BBRPHHC z2uo2GZ^@?2ibq6q2BR^n5}}a6>J(l2kY}7mC}A7gLnz!cjv`oQ1m8IU%z-9Bc|p5* zwT{__lnhFwpfQD|@1aY8A4_3k$>9RLMc};1a$2B@Q(=EkJ8{hjpI$0 zP!tvwhdq<9MHW;dE31;CA6UAHt4Pc(q9tLl<07&25gj{DvVy=&8TrR+l*7*R zFpVXsCtDIj4DS+OT6Spq6W|Ssd{Z~@++VIdY#D$_+nV~qPVRD*-lo}oiURX$Wx6xV zo#UD53fo|YJX;Shm%cNxY!geVD3m1lV%1uRD{GDqd&? z^MZA-T*JJnJ=H^wF%k=49!8&tG9eMkOImDM8VQy=9r2#?9LK zzfGz+K(B~;C$WK5k68$cu;HaYoB(wPH-`nseN~!kU2V`mANG}l1Fa|K(tt&7)bGtB zCMgBKCE3Lc%GEIic!EF1_t(!+qBK z96g*Z%m7LrYPTK0hG~bo7z8bMSu;l;2*-sY4Y9p^|KxPpXw(%ZY)%wvyNNdXwIctN z&PM-LZ8wF+^Q|Z@o&&>@7(+#rhuS(VH(6Z3`?~_b1|>!EJbr|uqqan~B~;U$21Py; z<)k!i7YI;wM9FV=O1{Rh(`;QxPFD`v%9^Fh^CVDrgHZPpfj+43QhbbrMx{i5F&xuO zWUr87vuAfy`M6O^Adx%va|2Y=?G|(gGsB$yOi>?Jza2>!hjVnZ$8fS!zR%(a0V8VX zM=H>qLMJGs&+9~RI=`&)(psT^q2IX=1ool&?Y(FZhI=AU1M%fafJx>b^)$6x<_|{{ z@6ck`l6&*Rla`<7@sjJu}nww^0G~V57};PS;57KXq^PfqkHI#=t%TSu(^kHfCRBOZ+ezLpXq`;W+kla^wFmznnP=y3*@~x z>>U9TkSKKk5-k{5Du7sb6YVfZf;S@3B)7hLXaSNV6#$YVHIA@DU8itSUkz1tEZ_fBHC62d&iH@q$yLXje{u^jgVS>M>btYEv%kUrDQ?eR zeSdcLAK~!^&ASx<03VA81ONa4009360763o0E7XZlDkgBFc^j1zJWuAih;(CE7Ace zPI^%yRl*mVEDWWdvY?8oBVtFS%HW0PgSVk}TdI_RRB|LMmKFWyd_H~!0DK*E1#$;~ zdvH-ZGh2`^WBN#=NfM_tr8K1Jn8tA$MWfIUL6Ae()3VgJ*E`~ zz(ZD0^Z!B>LWsH19e7%ckLN~$ci{djLCLF5bXcd!PZ*wRl&ewRpj8UX3#$y zn}(xtXk2Mz2D+TPn`#3q@7oFOKL&fxtrRn>#(guTK`Glv9>^)S-R8w?t^Y=9ZdRCZc47ebn% zrOj~JeQ60?%b*eDT9*tt1l#xZN7cs6bS-Sx^R`__acsMe65BQ8h~-%4Ud_$JM|35a z$bNg{a)&-^i(b!wSjBS0q|<9${t5cbqGuLef-Q;NRw>>C001A02m}BC000301^_}s R0stET0{{R300000004}UhtdE5 diff --git a/test/data/out/read2tags_11.sam b/test/data/out/read2tags_11.sam new file mode 100644 index 00000000..410b4218 --- /dev/null +++ b/test/data/out/read2tags_11.sam @@ -0,0 +1,24 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.W6cJH8/read2tags_1.bam -t Ba -q Qa -p 1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h read2tags_1.bam +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.18 CL:/usr/local/bin/samtools view -O bam -o read2tags_11.bam read2tags_11.sam +@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_11.sam read2tags_11.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 Ba:Z:A Qa:Z:H +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 Ba:Z:C Qa:Z:F +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 Ba:Z:G Qa:Z:D diff --git a/test/data/out/read2tags_12.bam b/test/data/out/read2tags_12.bam deleted file mode 100644 index feb112b7b8320bd1161bc1124d0ea583ec033a20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2396 zcmV-i38VHOiwFb&00000{{{d;LjnN42klr%bK5u;b|0179Qux`x%AXTBB{l3G^MIg zA}w*6q-c_IoD&KXAqj16AxJO%L;Cm514PoY;&f-yOXe`LEIom51Ms~MkDThxcfWjp zcJ^*zbgtd8)*E($<+w_DR+Kb)_0F4jEMpwZf{y2o7c8J3(JV{LB8yRwZOeO7(2fy| z>#>-nJH+d8l%Bz?s4 zCrL^C$Qf6(p*rlVco7UVto6^|4zP|F7f#>Sjhh?AASF?(w4^C#C8HcN49uBs+rgjW zUJ>g(gLUG_)?IU=q2(&{R^dd6=$*pLD62}bI-YG}-L~{#V*0_b|7JLR^SY5PTIp>g zjnr1sTB+YkgH{^0(x{crTd9pPx_F)M*$v)i1?D-sr4e3N+bu0P3c{7~0lT_mPTI|_ zammLh@T?zA1J9Oj=r3(>=c(#wIMBvu!FKy@xi7M6x6fgczN;fnKUOr|2<&BBRPHHC z2uo2GZ^@?2ibq6q2BR^n5}}a6>J(l2kY}7mC}A7gLnz!cjv`oQ1m8IU%z-9Bc|p5* zwT{__lnhFwpfQD|@1aY8A4_3k$>9RLMc};1a$2B@Q(=EkJ8{hjpI$0 zP!tvwhdq<9MHW;dE31;CA6UAHt4Pc(q9tLl<07&25gj{DvVy=&8TrR+l*7*R zFpVXsCtDIj4DS+OT6Spq6W|Ssd{Z~@++VIdY#D$_+nV~qPVRD*-lo}oiURX$Wx6xV zo#UD53fo|YJX;Shm%cNxY!geVD3m1lV%1uRD{GDqd&? z^MZA-T*JJnJ=H^wF%k=49!8&tG9eMkOImDM8VQy=9r2#?9LK zzfGz+K(B~;C$WK5k68$cu;HaYoB(wPH-`nseN~!kU2V`mANG}l1Fa|K(tt&7)bGtB zCMgBKCE3Lc%GEIic!EF1_t(!+qBK z96g*Z%m7LrYPTK0hG~bo7z8bMSu;l;2*-sY4Y9p^|KxPpXw(%ZY)%wvyNNdXwIctN z&PM-LZ8wF+^Q|Z@o&&>@7(+#rhuS(VH(6Z3`?~_b1|>!EJbr|uqqan~B~;U$21Py; z<)k!i7YI;wM9FV=O1{Rh(`;QxPFD`v%9^Fh^CVDrgHZPpfj+43QhbbrMx{i5F&xuO zWUr87vuAfy`M6O^Adx%va|2Y=?G|(gGsB$yOi>?Jza2>!hjVnZ$8fS!zR%(a0V8VX zM=H>qLMJGs&+9~RI=`&)(psT^q2IX=1ool&?Y(FZhI=AU1M%fafJx>b^)$6x<_|{{ z@6ck`l6&*Rla`<7@sjJu}nww^0G~V57};PS;57KXq^PfqkHI#=t%TSu(^kHfCRBOZ+ezLpXq`;W+kla^wFmznnP=y3*@~x z>>U9TkSKKk5-k{5Du7sb6YVfZf;S@3B)7hLXaSNV6#$YVHIA@DU`u_un zZ}|YCV}xVDJY99z?W>`xj^+EGs-~))z#0FKJ-O<5^G|L8W^h{0UVV3VcJ?>;KgI3Y ztMAXw{sXQRmfyP-001A02m}BC000301^_}s0sw#kos!>5!!Q`dv%NuUE{Yd!Nh5X_ zLcex@XklQQ&i2NOxm@)|h9LSDuJmFeKA*mge%a=%R$=x)XbAK>=a-WwLdg40CnUEC zxgxZ7hFg*^Lb!oo9ECB&06xSc2*WrChQ8|)yINV+y2_pEY^&3agkVAlH&KPwS*D~` zF1vo7y`V z8~owOM=J<$3`+&ey_`A8-hgdCh`r_3TH>m4pHIN^27>`+SiE)a8zBzC1%TlMLxg6g z^r&up>v0!x|c?0eCPQYjXl znh?j`ucq6g*W#2G_10@@TYYXG+8~^+fd+4F*cIV)O{A9yA4Epa_vK&^a`yBP`DA@Z zw)Bvp_GU2D2BS8Jog{V2GBR9|R783H-G4U7A=7(WR;V>Yv$=z+9L)?DdhU!z8jGBfRk)X%RyFb2dr@(91n(UFS5MQjy_=14yH=B;R@f3hKlXvZpL4Sf059Xf`2d#jkFB8r_=G~=vdltYH0H8&h5 z{6oSkV!h|EP8>OgXH7MGdRZZ#J~#D2%Sn57;wY zYuasYgG;WX(6@iEOgvwDQLuC%oUf{r@krCrCEM=$)vnCz?XG}H2A+X9{aDj%EwGnw zP_?5dB`iaEvLWj#FZYNR40;n*r@|nI)hYY*vB)`%QOeeIfKaq!9L2EA2tEq}m;+0K zijww=dX=y>sTfeCqzQ$kAD|0>AIo53>EQwViGhnUFKCHsPKEzVZyBHOfNn8b?WMzy zG)dNZN>NnS9QI7Y7TLf=R@D_ne`nb`sbev>m{x?rj*G;yJvt7Y<|TodGV+fzRLBfr zQ7GC~RiP`&!^1<%2Vh2hr-(d68W2@M^QG$tRv6k#4?EV4m|m@Ngbr?z*oL|50g6*cMJfVF8vVxnObKn4)_!&Efq5ps=N9KciqGsL!s1H+w8 zRPjP{m=~;r?U~kX6RGKP&52k5>(KiolqrcxQPFb4vRJ6x%O3x^?tyx#Qf%J|{d#VU z5_(UQX{W+7lG<5JA5FT>V_J&npLF>z2LvTG5TGn2S=C`o0bz9q7$sVWuqKJ5F8Ik8ewPo<5SROYtb-F_`EP^{HEIE zmq`ApoK5~KZns3^#YT{e7oe~t!N7>>z^#{bljH*3-78B?opKZ!K zl@E@G`qBgEupS6>Vc|iM2%ghdxA${r(Xq`BmgQsXHVO=U?YV~ULqJi+gl$CwmC0Pz z(9Kw3Cxi!!uaa3A&IJqX2g?P>k*Jqoa}Vi&2x5QH)+~R#(uGCBDp=RqKAgPzQ(&At> zSYFNcquG-Q9?Fe|1A9i!W-HYHh_1-l6*LN@=|g>drs*SnBG|TWehm@6TqwTqKYbrw z^+HdFi}qu``>A6?dXA@0MsLqgfizpREt?D5G?~*T1OlY*lV6^`AcQ`) z>k26cp*wWZcn6P2%@g`S<6)YN>6p@pjt4YJ#&O(_f(Y5RU6sl!yWMW5UWE9BkRao^ z)>3k&Il7lQg- zIL9Rf2Zw7L$;5bVY}-EBfJXg61hZ%i+Yn!c%J;H*YkxBV3^oW_Gq@TEHzOK`y_IP$DvyaIkDqDiU?nVRnz7Gw}b@#h} z|MleL{iWHv_NLlk+zXe}I^%g+(fIVezv$WKG&I7V?@cdRNIxPY&#E#{P?&G4ds5P# z8BUwNgk?L#n|_pMh-JJg>y&1dh&~*s0}bBVuxrBUhDa|FK8W<5@5{j;yEL#B7MtWax)=7xi+9L;qXKV?oz8#RwfRk)T%RyG`2CQ)&91n(UFS5MQjy_=14rWTW-h4WJ9KpO=0-6|MHD-$XvSH^D2EI~%g`Ms z{6oSkV!aJmCypH5vt}Avt|EUG&6JqlDZGmFx)Q77I~LX*TMuVe5RQj$$K$te+Sa?S zb>6lnYS+@b*05`hy4JXBO}f@u*XqWYyn9pZ*$v+2CFTXYr7>RD+bu0Q3Zs?z0eiY@ z&AQEPaLK1A^z9!k6B|n}3YHFp^Hp^+9%)l_$#(mGwJ-B}w=ZClfu|!*Kh`wc2<+us zRP8BB3CmEPY{{m|%SS{D2E7TZQ(=(9>Xd!@P~@D(C}kTuKq%TXj$&A51fK-~%z-6A zMM?Wby-wJMR17Fm(uBg&56}g`k7Y2i^zZ=w#K1+F7qmn*r^5fGw~WvCK(`pJAEm>O zG)XpjN>NnS9QI7Y7TLf=R@D_ne`nbysbev>m{x?rj*G;yM|2!G%}W9^W#k`kP$4sf zMWJY4RmD_M?(gqgJ^(Z7TSep{(txN6GM26%SYc={J?vQ5mNQckycU*c1$ty*!12;E z_22^*n8p&+vn@$r4F3XO*iK{xGtdo+0!ugXB3Q0`Y@2{d$DRkmPwsM^-DdfHhC=IV zWqF3}EwEvE!Z$RK@95FRGH_?MUkYB&Z0}-;u2!}cHR)c1wW&v9qG1$31`zneOf-!Mx%v$bV5)%`V%x)k?#?Ev zcp(Gk1?ymYrghUqYPwu=A{M|p^gan?N@7w}wA`{R7Ap6$$A7MSpkAsJ+jF5`&y7(+ z?`SgXRCrEOJB#UqNjG^+OA-B(F8}3#prkqil%*t#`=G59G!)JR5YkxsptMdZ2Ecg` zzyxvpbx|z)UrIJX z8rKe8Ju)t>5GW7a?K-e&Igu_p!OB}U$}t4N@qnZec9uUr1zoll4a0=b3xmdQrcHi{ zH< zV?igSExVurtRqTsyHkoahMi{XQfj(#uvXSAQ=TV*`WuA$7YOXZe3#*4B(x?K`jg?9 z}QIGu=-t3$~c^(n>~h;o$`I2#0V5o zJ3BIg;uI=D(fYhi45#z!CaR;cB?qIkl@-z@1z66+5{>ea7`&IF9 zB=IgScb&MmUwrBLc`Z+cohXeJQ5VZ}L@h7N6nK;6#+H>_^oY_)h&;N7%1Ayr)IJ%M z7O2ir05Kc|^QOLjBk vce)@&B~`|@Rd|(Px#S@G8W_qU5N}%;+ToW1UkrP+cHn( zgX5vT^uRf+2LfGKcu*vQ=j_$({oGk}Z1aO<`PjON0v)eBSNDAgD9V_yt!SV!F=P$h zj1_i5c(C{?nU&#Ou)uz>T!0*jdI>i7kPe6-_7`o<^2aM(SR|~1b)7#3l*9`ttqXy? zSG)ZqKmrn_4M3s;1Iq*un`)x#=1Ay9EQ;hV)&N~XQ(Oha?UMkci^sNnhe~$GvV(R4 z_)41QcLICwg9S(TdkPM=$&r+dCj{s4Rz3|NOuh(GuS~SX&Nq?JV|*SU3ox-R?3-_t zkEnUHPQ{yy0DLasE?D48QT4;|4Cr^^u>UGT{0S(B)m*ct7#9uyRe+1%EQW*8xGBWf z4DL}0-BhU04hJgS>`E?L_$TEjuBi$DNs*RHMR_H!R!Rx+hES7nKwFCXIZ~>$I9LsC z(z9qdc{0HxxzTW7&k5>(L|5=$AZ@Bm&(F_MKSmoSn6_zt2@!r=sJ-w#eIH)+LT?I} z?1z5$Gsl+n9B(=qy*oPv!ic$R->W^}nYNd!p1+#@B2ch!aOTtI&L){&)$t?x|5g24 zUe$YMG!-1}tKHpYoJUxH$NwI9exni7tcXD#_7x;gP+mqA3 zot*p!qR?9g)*U(NxGUJ#XkkQDv<$ zBrU<+haVWOhWB){$+jW){50@SbRvWsE=k!F$gL~_sr~D-FYO3@@6fy$ znWL{wx9HYum}}g!WZkw$vvMSL=x@VO{zD-H001A02m}BC000301^_}s0stET0{{R3 J00000001KhX08AL diff --git a/test/data/out/read2tags_3.sam b/test/data/out/read2tags_3.sam new file mode 100644 index 00000000..fef1a019 --- /dev/null +++ b/test/data/out/read2tags_3.sam @@ -0,0 +1,19 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Xm6T25/read2tags_3.bam -t Ba -q Qa -p 1:1:999 -d ci DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_3.sam read2tags_3.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 4 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 BC:Z:TTGGCATC QT:Z:CCCFFFFE Ba:Z:ACTGTAAAAATTTGGTATTG Qa:Z:HHHHHHHFFFFFFEEBEEED +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 * * BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:CAGGCGCAGTCTGTCAATGC Qa:Z:DDDDDDDDDBDDDDDDEEDD +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 4 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:AAGTGATC QT:Z:BCCDFFFD Ba:Z:CCGCTGAGAATCCCATTGAC Qa:Z:FFFDDDDDDDDDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 4 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 BC:Z:TCACGATC QT:Z:CCCFFFDD Ba:Z:GAGAAGCCAGAGTCCTTGTC Qa:Z:DDDDDDDDDDDDACDDDDDD diff --git a/test/data/out/read2tags_4.bam b/test/data/out/read2tags_4.bam deleted file mode 100644 index 79b8aa84e060f83e0a6d0253eea25a37ca067b11..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2343 zcmV+?3E1`@iwFb&00000{{{d;LjnMc2klr(bK|%X_8#S1s**dZ_L5XaBB_UGtdy#T zB59emBt?^y*K_IP$DvyaIkDqDiU?nVRnz7Gw}b@#h} z|MleL{iWHv_NLlk+zXe}I^%g+(fIVezv$WKG&I7V?@cdRNIxPY&#E#{P?&G4ds5P# z8BUwNgk?L#n|_pMh-JJg>y&1dh&~*s0}bBVuxrBUhDa|FK8W<5@5{j;yEL#B7MtWax)=7xi+9L;qXKV?oz8#RwfRk)T%RyG`2CQ)&91n(UFS5MQjy_=14rWTW-h4WJ9KpO=0-6|MHD-$XvSH^D2EI~%g`Ms z{6oSkV!aJmCypH5vt}Avt|EUG&6JqlDZGmFx)Q77I~LX*TMuVe5RQj$$K$te+Sa?S zb>6lnYS+@b*05`hy4JXBO}f@u*XqWYyn9pZ*$v+2CFTXYr7>RD+bu0Q3Zs?z0eiY@ z&AQEPaLK1A^z9!k6B|n}3YHFp^Hp^+9%)l_$#(mGwJ-B}w=ZClfu|!*Kh`wc2<+us zRP8BB3CmEPY{{m|%SS{D2E7TZQ(=(9>Xd!@P~@D(C}kTuKq%TXj$&A51fK-~%z-6A zMM?Wby-wJMR17Fm(uBg&56}g`k7Y2i^zZ=w#K1+F7qmn*r^5fGw~WvCK(`pJAEm>O zG)XpjN>NnS9QI7Y7TLf=R@D_ne`nbysbev>m{x?rj*G;yM|2!G%}W9^W#k`kP$4sf zMWJY4RmD_M?(gqgJ^(Z7TSep{(txN6GM26%SYc={J?vQ5mNQckycU*c1$ty*!12;E z_22^*n8p&+vn@$r4F3XO*iK{xGtdo+0!ugXB3Q0`Y@2{d$DRkmPwsM^-DdfHhC=IV zWqF3}EwEvE!Z$RK@95FRGH_?MUkYB&Z0}-;u2!}cHR)c1wW&v9qG1$31`zneOf-!Mx%v$bV5)%`V%x)k?#?Ev zcp(Gk1?ymYrghUqYPwu=A{M|p^gan?N@7w}wA`{R7Ap6$$A7MSpkAsJ+jF5`&y7(+ z?`SgXRCrEOJB#UqNjG^+OA-B(F8}3#prkqil%*t#`=G59G!)JR5YkxsptMdZ2Ecg` zzyxvpbx|z)UrIJX z8rKe8Ju)t>5GW7a?K-e&Igu_p!OB}U$}t4N@qnZec9uUr1zoll4a0=b3xmdQrcHi{ zH< zV?igSExVurtRqTsyHkoahMi{XQfj(#uvXSAQ=TV*`WuA$7YOXZe3#*4B(x?K`jg?9 z}QIGu=-t3$~c^(n>~h;o$`I2#0V5o zJ3BIg;uI=D(fYhi45#z!CaR;cB?qIkl@-z@1z66+5{>ea7`&IF9 zB=IgScb&MmUwrBLc`Z+cohXeJQ5VZ}L@h7N6nK;6#+H>_^oY_)h&;N7%1Ayr)IJ%M z7O2ir05Kc|^QOLjBk vce)@&B~`|@Rd|(Px#S@G8W_qU5N}%;+ToW1UkrP+cHn( zgX5vT^uRf+2LfGKcu*vQ=j_$({oGk}Z1aO<`PjON0v)eBSNDAgD9V_yt!SV!F=P$h zj1_i5c(C{?nU&#Ou)uz>T!0*jdI>i7kPe6-_7`o<^2aM(SR|~1b)7#3l*9`ttqXy? zSG)ZqKmrn_4M3s;1Iq*un`)x#=1Ay9EQ;hV)&N~XQ(Oha?UMkci^sNnhe~$GvV(R4 z_)41QcLICwg9S(TdkPM=$&r+dCj{s4Rz3|NOuh(GuS~SX&Nq?JV|*SU3ox-R?3-_t zkEnUHPQ{yy0DLasE?D48QT4;|4Cr^^u>UGT{0S(B)m*ct7#9uyRe+1%EQW*8xGBWf z4DL}0-BhU04hJgS>`E?L_$TEjuBi$DNs*RHMR_H!R!Rx+hES7nKwFCXIZ~>$I9LsC z(z9qdc{0IcxzTW7&k5>(L|5=$py5=To}Zti{w>MF@tYG2b%%{z@O_IH;<45%W z+xoY>t@q4mDrnkQ$Nixisp?ez{ZrLcwHLb6|FNHWdj8^*df9-Z#W?-$anK^@6aWApiwFb&00000{{{d;LjnMP0iBY~O2a@Dg{R^Rv}93SsQD4o zg^-)+4+#{U+{DSoMeVGzk>bi{aMhB98=pmd8hsl*lU7Mv!Qz2rhCt?g_ndhZLVRv? zD$1S^$HHlS!&a1YN%s_w6PZ#<6j3@NDbqL}Mt&rE^|IT2uFHOXxzMCS^9@vphev+T7L`$&jka$E8KYQfFKC#fo*v3#^6Bs5?iW_Jyiek68e?rGn+T!8_3`#kkPqvnMd}pDqDiU?nVRnz7Gw}b@#jf z{O#oA-O}t`dQ)vM?uDyqo$I;B}9q7Mh^K!dk7>^0$ZL!_4oA4Gc3_vK&^a{l-b`DAlL zcJz>;_IfbX2BS8Jog{V2GBUg-sfhCQyZ>a6L#DU1tWax)=7xi+9L;qXKV?oz8R3sfRk)T%RyG`2CQ)&91n(UFS5MQj^1O_4rWTW8J15yj3bnsHV!$|1whGIYlY z|CI2GSZ@Q?i6ck%teJ*Z>&Rb6GbN_C3a{e4uEgs2j)ir{*29?mCwq#S~@yFLF*}l(G#SAQbHxM=>ljg3p2g=D?Dm zqNM$zzE0SNR17Fm(uBg&570Tlk7Y2i^zZ=w#K1+F7qmn*r^5faw~WvCK(`oOKS+n4 zXp(I5l%lAtIqaE)EwX`$tg0)D{=u?MQpaL$F|7!L9T$mZ59l~>nwJD-%E&)op+aT| zi$c-9s*0(i+}+)^d;n(DH;TwZqybSCWUO32u)@$@dDyWoEoY`8cr7f?3iQarfa8^C z>cM*~FpVXsXIqlM82&jvx1GoeW}q7s1(t5&MX*}?*fs%^jy(^ApWNj-yUFsq429Ok z+VTwBTVTWTgl}jd-_fJ;O^N) zgf6XMz7o8i+1~jIU94>@YSO&~Yg3QJM8hb63?T6PnP?gja`h`5z*GY>#I}b6-JMNT z@j?d73)aE*OzWzN)O5M#L@a=H=zSE*l*FW{Xt`xsEL84!kN;HnK)qBcwr_=gJvBxN zy`{;lQ{g#D?JTDECf(#QEk*QCy8Pz@f|BY8P?nM`?t`{c&`>xNKuBZhgVH*w7y##9 z029RV*G07e=#}x{+1Q|}$0`IQY<1}m7eL*`?P0<3P?hdlUmFe2#zW;0KpTj;v|y1N z^?UP3NJb%UMRo}Ty1JGCPx!~g{`xtJeDGl_2Pv&ew&9-xJ0$8wpWs*07hdgJeJR-l zX5|$#3FzOEg|=1-W zV?igSExVurtRqTsvr~#|3_H!POR4F~!CJXynesFV)ZZY~KSy8>=DQ3ZBcU~^&|eJ4 zEEm};q}=S;Emb~jlnO-TmVIo2iu(PM?qFt^vyT)FVfDM7lyNvmH+u{xJLUU4i4iEG zc6MX}#VJ&RqV;*37*6MxO5m z2ggHw>49@t4+Off@SsQp&)JLH`?<5|*ycye^09Rl1v*}PuI~E~P?RxYThTyeV#pf0 z87u6B@L=&(GAqNmV1fN$wE#I1^%89EAsrAw>@V7y<pFi3D2W$PS{DL& zFLwJ!fCMB;8-PRy29^mRHq}Je&5_WJSQN=!tN~g=Q(Oha?UMkci-)#+he~$GvV(R4 z_)41Qw*q_bf(1u+dkPM=$&r+dM+E2aRz40OOuh(GFHE$>&Nq?JV|*SU3ox1*V8eYd@$x-ob|D*yhaYO2}`-Rb|>k2^ho@ky0zz=2|%es^+m@;&@N$L-1K-%n2d3(y%a zP2Lm$03VA81ONa4009360763o0E_{hlD$sDFcgK|h>0yxhKPY;$B1-5ij%e|kvjMX zoQ0v(Qx;S)puPYrO64I7&j)V71< zlv2tE&^>xt$FYB2?Aad<@&mD=I(a?>001A02m}BC000301^_}s R0stET0{{R300000006b!bxQyM diff --git a/test/data/out/read2tags_5.sam b/test/data/out/read2tags_5.sam new file mode 100644 index 00000000..0dd40b55 --- /dev/null +++ b/test/data/out/read2tags_5.sam @@ -0,0 +1,23 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags_5.sam -o /tmp/bambi.u6VHzC/read2tags_5.bam -t Ba -q Qa -p 1:10 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_5.sam read2tags_5.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 ACTGTAAAAATTTGGTATTG HHHHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 CAGGCGCAGTCTGTCAATGC DDDDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CCGCTGAGAATCCCATTGAC FFFDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAGAAGCCAGAGTCCTTGTC DDDDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 +HS31_12693:1:1101:12330:2220#0 4 * 0 0 * * 0 0 CAGAGGACAT DDDDDDDDDD RG:Z:1#0 ci:i:573 Ba:Z:CAGATGGAGT Qa:Z:DDDDDDDDDD +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_6.bam b/test/data/out/read2tags_6.bam deleted file mode 100644 index 9ad4cb7f69c0d4d94d0083c6a1d4659f8eca5fef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2357 zcmV-53Ci{#iwFb&00000{{{d;LjnMd2klr(bK5u)_8jHgR_)$VHJ9C5OCMj11Q#6;Yml_n!@N$n=hu6>81UZ0?{cM>E4kd5J>!*s??p z^#R5j_2!OV-weoRz)7~FU@ci8f8~F0AJ9G^5`dTqbMHD-$XvSH^D2EI~Yi>AB z_=kj7#Cp$Roj7s~&zfpzxr+Q%G*x1Hr|>Gy>q@MS?^xJyY$KdnK{y`18IRwbwXL^Z z>s{NLs9j6zTEnh2>RRKjHR)RCU8@^o^7gFQvunJ~OUw&)Lu0(Iw_93r6huJkoS@$#(mGwJ-B}w=ZClfoC92Z)=)u1orYR zs`eD6gk>mCwq#S~?{C~{6?l(G#SAQbHxM=>ljg3p2g=D?Dm zqNM$zUMFlrDh3oOX+mM?2j~Le$1<2$dUybTV&J083tFO@Q{n&8TgK;mpj(XAkJ8~s znk1V%r6?+E4tpkHi)>&btLloPzq4$U)UlXbOe?}*$36RoghJS%CY$vjUDd+}8fn}I@5iD0ewoSmKW6uKNCwIBdZnFG7L!ouG zvb?$NE%4m(gl{-UzGFle%fOx5jl1jhrCM%Y>Y_!M;6S~Lt3J}(R!zo|C) zC6a$CXOsVm+bz*}u@&Uv1t=^@FfgJzaO)-AB)LHMcO`%gC`F1Qd4!{*@kEU$u<1?% zl8*(Ql(y`G2C$AO#m!DB));o0txKut%E4M$vrKuO1nO@P>R%wR2lHKqkCD)tROnBJ zW0s5T6;f{Y?2an88>Ipfxnn=KKt=t2Np~l&6Cp+c)Jc$t~ zqIP~{0>vp*f}-_#n;1^#*G*nqEA%h)8~1_0KGeUy7u~^dPvmJJK70u7`^YdDs3Oi96E21uz>4;ihmMQQi%Z)87x#$t4lMs1y50#O8a;SYW zC@oN(r2t|$3T91xeJ1c7*RsMQV9m;>XYiF&Xixajh%y%B2VIE_PU4t}#RNLYXWKGQ z<%8p)zVyI3tOo*JSa?t*g6H(r?fu+YbZqm3W%<~;jsgR(JlF7j2q?;!u&rpIGMUR7 zx*03%gz#YTRWd8XxnP0)V7UM}67>>n?jaozLF_Nun&ppIy0A!C1?xI{3@C{gP+AuP zd9QZ+M}Pz*N*jPg2L_f2AU4%R*Ugd8jaU@PU917Rgr>L(h}$OtNEeT7`3{xrj%5e! z1n`wK&F=*E-Ukbg?)MZNY?C7?8BYk#;jMfcK$v_Hq+Xe5i=A&Gp~v_LPh3+K0Fojtm5TC8Uagc8;tioDd_DX>qU` z+)U!zS^Q*z&*etLfjuK-~nm*KrXPOTG1lu;vuOY&h3&j`yr|-k7 zUg+s?(SGQ6KXq(L&++uh=dQl=V_(GF~q100rR2WbnBbA2?!0W-=!0$FHDFLaJUvd&z(K+Yy@iRi`qnUG5 zxCq@Mx3veKs8}ZKp2gEN$yml%#IgxXk}QtLQ4k?hIZkZ~-(1=mZiiA#DGh`ywa#8sWE~S)nm>cxCmTzKJN{$w*vOtRmcszCTyddvSc`9Kl0g+sfQWAWhCWf_s zZOIOfMrYA@5`h@7;>Ss1+mBPw;U4k4 zO=vb>4i{vPuO*lHPd8s}@vkjD&;Q?kI}zrF7nm#8?-X)?f8GBCn*cTc_yYg{ABzYC b000000RIL6LPG)o8vp|U0000000000ZyI-g diff --git a/test/data/out/read2tags_6.sam b/test/data/out/read2tags_6.sam new file mode 100644 index 00000000..ea6119f3 --- /dev/null +++ b/test/data/out/read2tags_6.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.k5dqBd/read2tags_6.bam -t Ba,Ba -q Qa,Qb -p 1:2:2,1:1:1 DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_6.sam read2tags_6.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 TGTAAAAATTTGGTATTG HHHHHFFFFFFEEBEEED BC:Z:TTGGCATC RG:Z:1#0 QT:Z:CCCFFFFE ci:i:215 Ba:Z:CA Qa:Z:H Qb:Z:H +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 GGCGCAGTCTGTCAATGC DDDDDDDBDDDDDDEEDD BC:Z:TTTTATTT RG:Z:1#0 QT:Z:-71(())) ci:i:472 Ba:Z:AC Qa:Z:D Qb:Z:D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 GCTGAGAATCCCATTGAC FDDDDDDDDDDDDDDDDD BC:Z:AAGTGATC RG:Z:1#0 QT:Z:BCCDFFFD ci:i:538 Ba:Z:CC Qa:Z:F Qb:Z:F +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 GAAGCCAGAGTCCTTGTC DDDDDDDDDDACDDDDDD BC:Z:TCACGATC RG:Z:1#0 QT:Z:CCCFFFDD ci:i:573 Ba:Z:AG Qa:Z:D Qb:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_7.bam b/test/data/out/read2tags_7.bam deleted file mode 100644 index d77ddd73c26450f27657109fe17ed63e6bb36d55..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2281 zcmVPqvnMd}pvMfPBccTG({dI$L-Tm%A ze>*vOcVYJ4dsA&N?uE-~o$I;B}9tPcn3K!cCg?3!@8AyP}24Mj11Q#6;_`9_Mc30@bs3J6>7!M+;C8pqq**)yhI^mCwq#S~z7m1uap{snCC}E$#C?gjLuNC2ka09F=$XL35V1=Q*^sr;Sx15=Z;I*(kE6^hg1CE!T zsRtjhz%-Vio^442bNJ`@+;$=>m;rB46j-{67r}DnW7`BwI`%vedh(R(>^jTuG89^u zE6X!%Z-EWV6S|>+d`FMYmw`L89cSg*aHexTFqWp6U5Z;c(iaCqbE6x)TmiTtg1fQH z2)(z0`BKF7%=XTg=yGLSQRD7=kT&&5EHsP)@Bj>dn2DwlAy>b`0W38zLu`9E(B0WY z6)$AKx?mq{&$O-@OHG$+PQ(UShu%k`Oi4_Nik4fJ#e(IY_xMkF57 z0+>LKzfP(JK(CAk&*lbJJ!T;Y!j_l*a01j_+$IZ-hpJT9`r2rCHXbSm1KL2Wr3H&T zsNd^HLNW?=E3!)%gsW=_@PvPi?XO>>@CP4`a*)!hWE=iDu!Eys^a*}t_`<8*t1k^U z5j3tHx_V?>SRsTw#BSGtP0NXN(Fs!CvSE%P5RL~y8ewPo)1%R4E73%l(0L}%=*_gr zF9rFhbT;`-vE2q5FSa7McmWJc5)2Vh9b)TwxXIuG-rtn~HV7$F6v+dmjz$wTnh;I5 z8U*=Rgp<;iUC@B2BT8|-Q;Ic)lVM?Kz#WUV3PSK15NE$ z#r+Y*yR_VOMaMEfT9%Kkt0>U%%5!z!2ZN%F3EPSWDicH2 z(9K+7M}!B7ucBFL&IJkV2g?P}k*Js8a1ZVP31WZI)+~QM(}hLCD%jWgLqJKqfYQ1U z$a}HdKLR8mQOW=$Ixw(I0I{hix^9jHZ^WWV?raUv1vJG~K-@kFK)QHn%Xg?`cPu+_ zCxEY{X?`oP_bym)bhoFVV4ECC%6LR@4j<*?1VZGCAoapRTkL!j3O&Z>0kQxS>)gKj zM*fJJN9$O;@d&`@0`7tZz7SPEq-Q|C3(5Yg0^%<~Iqc?|HO07S_%8xnJhK=MM&qUs zUo*HzC3I7vK072-xY?ClwD3peBiB>~fTT!CrJ}r&S1Y9idqb$nIG`;>{TwM(S{$SX zetG?Ia`PyHU(17rgblS;{{}%-)i01Xg+H0tiiB+BQ=3imEJSE>!S_P@^j&z_3%w~^ zv+w)egN`lfIo@DJ7rqbv2AtB;C{d*n?qbWb?$Pn{D*$}$ask=_xP!CK zncade1wLWCOhtwnVuaZOg~;MKiGm2`nh?(vwYj<6CftT|$~g}nRu$Si&+Gg{<&}i{ z$4bM?fcXyYpP033YDI`~T+Uh0PY3$eshypyhf%VKEDK{><0|EDnhk0EeLwwKtlj}t zeom}l-U=459#BsaM8xFEQbpTXICVi9=GMQ4J5R;}HalP^(~*jprm2NROWVWeQhJOU z>4)L**DTMcd&S$)iXEzW^{>TuTnmE+ivim?#w6yvm=B2iRoX0+d7_^i*mZWUMm znqrqqWg|6}h2ssLwTJx%aov95hywrsABzYC000000RIL6LPG)o8vp|U0000000000 DzwS)A diff --git a/test/data/out/read2tags_7.sam b/test/data/out/read2tags_7.sam new file mode 100644 index 00000000..6ca747a8 --- /dev/null +++ b/test/data/out/read2tags_7.sam @@ -0,0 +1,22 @@ +@HD VN:1.4 SO:unsorted +@PG ID:SCS PN:HiSeq Control Software DS:Controlling software on instrument VN:2.0.12.0 +@PG ID:basecalling PN:RTA PP:SCS DS:Basecalling Package VN:1.17.21.3 +@PG ID:Illumina2bam PN:Illumina2bam PP:basecalling DS:Convert Illumina BCL to BAM or SAM file VN:V1.13 CL:uk.ac.sanger.npg.illumina.Illumina2bam INTENSITY_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities BASECALLS_DIR=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BaseCalls LANE=1 OUTPUT=/dev/stdout SAMPLE_ALIAS=ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers STUDY_NAME=Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ COMPRESSION_LEVEL=0 GENERATE_SECONDARY_BASE_CALLS=false PF_FILTER=true READ_GROUP_ID=1 LIBRARY_NAME=unknown SEQUENCING_CENTER=SC PLATFORM=ILLUMINA BARCODE_SEQUENCE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT VERBOSITY=INFO QUIET=false VALIDATION_STRINGENCY=STRICT MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false +@PG ID:bamadapterfind PN:bamadapterfind PP:Illumina2bam VN:0.0.129 CL:bamadapterfind level=0 +@PG ID:BamIndexDecoder PN:BamIndexDecoder PP:bamadapterfind DS:A command-line tool to decode multiplexed bam file VN:V1.13 CL:uk.ac.sanger.npg.picard.BamIndexDecoder INPUT=/dev/stdin OUTPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam BARCODE_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/lane_1.taglist METRICS_FILE=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/12693_1.bam.tag_decode.metrics VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true BARCODE_TAG_NAME=BC BARCODE_QUALITY_TAG_NAME=QT MAX_MISMATCHES=1 MIN_MISMATCH_DELTA=1 MAX_NO_CALLS=2 CONVERT_LOW_QUALITY_TO_NO_CALL=false MAX_LOW_QUALITY_TO_CONVERT=15 VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:spf PN:spatial_filter PP:BamIndexDecoder DS:A program to apply a spatial filter VN:v10.14 CL:/software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -c -F pb_align_12693_1.bam.filter -t /nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/archive/qc/tileviz/12693_1 --region_size 200 --region_mismatch_threshold 0.0160 --region_insertion_threshold 0.0160 --region_deletion_threshold 0.0160 pb_align_12693_1.bam ; /software/solexa/pkg/pb_calibration/v10.14/bin/spatial_filter -a -u -F pb_align_12693_1.bam.filter - +@PG ID:bwa PN:bwa PP:spf VN:0.5.10-tpx +@PG ID:BamMerger PN:BamMerger PP:bwa DS:A command-line tool to merge BAM/SAM alignment info in the first input file with the data in an unmapped BAM file, producing a third BAM file that has alignment data and all the additional data from the unmapped BAM VN:V1.13 CL:uk.ac.sanger.npg.picard.BamMerger ALIGNED_BAM=pb_align_12693_1.bam INPUT=/dev/stdin OUTPUT=12693_1.bam KEEP_EXTRA_UNMAPPED_READS=true REPLACE_ALIGNED_BASE_QUALITY=true VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true ALIGNMENT_PROGRAM_ID=bwa KEEP_ALL_PG=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:SplitBamByReadGroup PN:SplitBamByReadGroup PP:BamMerger DS:Split a BAM file into multiple BAM files based on ReadGroup. Headers are a copy of the original file, removing @RGs where IDs match with the other ReadGroup IDs VN:V1.13 CL:uk.ac.sanger.npg.picard.SplitBamByReadGroup INPUT=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/12693_1.bam OUTPUT_PREFIX=/nfs/sf32/ILorHSany_sf32/analysis/140420_HS31_12693_A_H8M2LADXX/Data/Intensities/BAM_basecalls_20140421-132642/no_cal/lane1/12693_1 OUTPUT_COMMON_RG_HEAD_TO_TRIM=1 VALIDATION_STRINGENCY=SILENT CREATE_MD5_FILE=true VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false +@PG ID:bambi PN:bambi PP:SplitBamByReadGroup VN:12.34 CL:bambi select -i /nfs/users/nfs_j/js10/npg/bambi/test/data/read2tags.sam -o /tmp/bambi.Prkj5q/read2tags_7.bam -t BC -q QT -p 1:1:1 --replace DS:convert reads to tags +@PG ID:samtools PN:samtools PP:bambi VN:1.18 CL:/usr/local/bin/samtools view -h -o read2tags_7.sam read2tags_7.bam +@SQ SN:phix-illumina.fa LN:5386 +@RG ID:1#0 PL:ILLUMINA PU:140420_HS31_12693_A_H8M2LADXX_1#0 LB:unknown DS:Study Illumina Controls: SPIKED_CONTROL,ERP005431: High-throughput RNA sequencing of the main olfactory epithelium of odour-exposed mice. This data is part of a pre-publication release. For information on the proper use of pre-publication data shared by the Wellcome Trust Sanger Institute (including details of any publication moratoria), please see http://www.sanger.ac.uk/datasharing/ DT:2014-04-20T00:00:00+0100 SM:ERS427447,ERS427448,ERS427449,ERS427450,ERS427451,ERS427452,ERS427453,ERS427454,ERS427455,ERS427456,ERS427457,ERS427458,phiX_for_spiked_buffers PG:BamIndexDecoder CN:SC +HS31_12693:1:1101:5133:2240#0 77 * 0 0 * * 0 0 CTGTAAAAATTTGGTATTG HHHHHHFFFFFFEEBEEED RG:Z:1#0 ci:i:215 BC:Z:A QT:Z:H +HS31_12693:1:1101:5133:2240#0 141 * 0 0 * * 0 0 TAGCTGTAGCAAAATTACAG EECDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:215 +HS31_12693:1:1101:10450:2212#0 77 * 0 0 * * 0 0 AGGCGCAGTCTGTCAATGC DDDDDDDDBDDDDDDEEDD RG:Z:1#0 ci:i:472 BC:Z:C QT:Z:D +HS31_12693:1:1101:11147:2231#0 141 * 0 0 * * 0 0 GGACTAGGAATGCCAGTAAG EECDDDDDCDDDDDDCCDC@ RG:Z:1#0 ci:i:513 +HS31_12693:1:1101:11999:2206#0 77 * 0 0 * * 0 0 CGCTGAGAATCCCATTGAC FFDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 BC:Z:C QT:Z:F +HS31_12693:1:1101:11999:2206#0 141 * 0 0 * * 0 0 TTCAAAGCTTTTTAGACAAC ECEEEDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:538 +HS31_12693:1:1101:12330:2229#0 77 * 0 0 * * 0 0 AGAAGCCAGAGTCCTTGTC DDDDDDDDDDDACDDDDDD RG:Z:1#0 ci:i:573 BC:Z:G QT:Z:D +HS31_12693:1:1101:12330:2229#0 141 * 0 0 * * 0 0 CAGATGGAGTCAGAGGACAT DDDDDDDDDDDDDDDDDDDD RG:Z:1#0 ci:i:573 diff --git a/test/data/out/read2tags_8.bam b/test/data/out/read2tags_8.bam deleted file mode 100644 index 76ffd59be96eefd8d0a117a30298ba5402fd2f60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2325 zcmV+w3F`JAiwFb&00000{{{d;LjnMX2klr(bK5u)_8jHgRPEkTHJ9DmNF?=e9Brv8 zlt@dgAt{=qJkALPiI9YtFB7C6^XvAfZ37}{*>PqvnMd}piX%Z_ccTG(--m{#y7S$C z{&sTmeqnU3+_Bahc7o-&N_ke4G&+6n%{!Jc4rW2ebH@u7&|5Ui(z3{66lB}-o)olW z1mn6dX6X*`x*ugJVks|+Dxqm9qW61hPlLB6?3!@8A<|2P_d~tod2%oaIemJFJhHhV zJ9@}ad)w=4y+ISiisLF_De13CBBDJ1?mZjikm(&QO4OL4>C8rDhNilMvH}J0v1PFw z>I%jhbY}Lry6KTkkCSvqi(Z=VdaQQt9S?>qH#FVA3O``O@+V5Vp`if;|#Udc- zBbHxDO5#V(xS|c!VPD0IV4z{GfA(&Gb-Xxt`nGOdUn>SFiDIQCO*tzW<&a@u&UD)j z{*>^FSnnCE6Gyi0niCBzSE08GCrU)`6kbMIRf^T|Y!mCYr3Vw!4~G4>!{OUEP3ydE zy=z({wQXr_tKYT;ZEM)JMs4e?ZM9>J&fnyFc8#}LfqBkuXoT0*c1sJ6f^cPgz^?9? zlXi1!T=FprJnKi(z_Xt z$~{F1VJXVuE!mV=@rY>7pf_e!A`CKEouW%0@{H36C2T``2!(sbQ3T73;4>$HIj|%s zFK9Qf)-l_Vk^w~u8dF&M9=ZVdu@okj93H@*7&tGooEE6!RQSL2mht%>=oX>%qjdO* z#_=XgC<=>;!=6dlA`6(v%BrO3A1vL(RV3yX(ULIOagkX1h>im%SwUc?jQsNr%4LSI zC=~6>G9N3-{r!Ez2Vh2Zqli328W2@Mv!&ztW)N6Q7u)8QX-`xHues@(z8;zwaJ+O4 z-T#0Erm+O|WJ_Wg!@Iy2mK~b@1ayNU-_#8}_m?XVTLxg#wx+)Dle=7{H)(dCqQJad zneNPT=Xhqi!Z(;9&(_0>rSD8E+g>>q?CDhZXG=qjF3n9G>hnXO>8yQtxdLzlgWG49 zA-XdC=~D1|V!0Pfbh)z3uuk_1tPMRB6AeNiGJwDzCZahDk)vN@AExRX0k&N1>&|4P ziWi!}ykH$H*D$Z^NOhNEjKl(%hu$ZlOh`oXk``N*MndIY_IS^A57bMQV*5_$*K=bO z(>oeZS{0s>#7ZOjV9-q#(LzN3q|19bASkJh0A&eDqb_JG1r3EW0faP?J}9Z;k^yiY z1TaAye_d2_fL;;xUW^T@daOb~!bX?=Z~@dE+#D7h_f_exb+tkNY}i*00koc&O9K|U zQNK5jn4}cqmSh(*psQmF@C1KM?604r$Oj*`vX{`ZU>p89utTD5_z8X`ec{!t)t8b@ zkjAkBM-OKUGXTm1ciRqZn0Ba(POx&9wQ}@9a9kj1i0$Q1PeGTBMa?kb^TMF^n`on7 zBKfCsHu_E6ZivS7tsocAL19UZff40_TQBJ*$pyN$BOD#IC#pSxO?MiQ zd?@IoG-VewfOSO4Z+1$)#<0_DT}Vw=4%W(=rONXpPMUN<*gvg_NsEp*3L+z77 zX@ROV0T9DcFsLB`Yif)~tAX246{u_JkjeC}Tl>&=t$zB#M|=OrV2&wk@(m zJ~$rgOAnmGdLYn+xeG-icurp3-p`#y$2LEjriabz(AV+Gb#%{zfTD~Enu-Q0lbNic z>#>4X2oDxtC9^V|0~XlxmvfLKQ7^&f9?}63#NNEAS^j*b3-Xwiu&&cbpOR<}rFAZl z_iDFy1V})lv;j!8U|^{LVqHzN-5d$sh(wXx#_FL3G{sdw+&&3FI)7}+cc^4{EInu^ zfUlrQb|83;|0!XK57TvHVQk|HgYlJZhst&{@d^`R!?fVKp6Go+MBez59! z#m&dj?UM;Umm3WSHq=_(TLe{AH%HnS{=vZ}HC-@mQ~weo{J2ni;d}Z%xarE03$37ql&*l#=?Z~jTKJcDy(cKY4P$;tQd|0QluPXB&# z@?ZKFx;Et$001A02m}BC000301^_}s0swIVoszvy!!Q(u-M)cChKhm4jw8|mDL3hl z5(x?a(0E~hdg{W!fch4s%HW0PgSP=E6dJXvRC1&!RutWHKA-!B5c=Bb3TY3a2XxW- z=8j0OGI)k`o@WIV00|T`$g(0$r%9NgSYtf0sjM4B=oevF*O5vLN)gSSW zo6?Yu_b~>;p2(2rxid@VZVrn%A5mg?5JjhNcXc^*$efp^bRDiwFb&00000{{{d;LjnMo2kltPa^tua^(^IGs**jbvPmkYNb2EkE2XNY zL|SHsq-c`zxK}7xgd~jlYJ#+_`ObV$E+CRtyWKOLo=0a9l`Vn5#l;11&cOv|hWp*W z|9o=t?#k@lcoV%p>V?Zmo$gU@pfKN5kEEnM zGn_Pa3Cp&KH}xpb5X*Q~)+x;@;eF88`Z|2IZtn=EYa)9I_d#Uzd|wU*E@#h;$S3Q2 zvZaRz^*8;2-XFG3>?EmEmXX07Nrjh>pZ$*pIYjzE%L=t-Xf}6Hm7|&AqP#>Q{A^ev zhq{5WhP}Bnsqgz_-RC6R(z2fw+dgYL_m2ldwij7mXh-j{X$Mm^+i;a{h6B}h^71Mq z*&fSBl96Q38CUg@HX3O7Dje!qADmwdv4OANx&y~BZ*NtTR7AD2ie{WujBRjl_M)`=s>@T{qhmaE8LMN>7V4=S(XyspIR_>P4Q$2P*L6@;U~o6+daSzCJB zl`h)SSnEo9R~mGsVOJV;rEym}?@HYmsdi;vZ+8VuGVlz<>4%zTYk|Fd zgQ^`xDPbAPlMPu{dAUclU{IT|Iu!;vtWK%Wry}PxMk!meek_BDrH2Oi69X4zUeFTNoQnQ0drSX(2Xu?k-ClP1 zfhNg1PbrGZn!}z+*diO4$f~-c=x;1rCv`057SoC_*l`h9wnxW~)4U`wQ%3%Nh6))W zEDA-tswyU``uOV`H5e9fw(R}Ipffa`K(!-8*V>wd|!DnH4R$xRH1{^Ov z(+J*UfoUv3J>8H5#_%ukrR_vkFa_P9D6k9@FM{RD$F>QWbnICm`pI3cv->Q6%ur}u zuPkqFdkZ|bJkd9tBi}Ki%VpqBZO2);Htgxl280-Bjg&lIDn}JW{7PM2ZlQx zYvP0EFfUjK+cT}(##2+}nq#p5)}i)UC{q%XqN3%7WwB7Xmo@&!x(Di|NwK{U`t`9f zO6UVkrkx7UNNQ&>eKP4fk7+5qe^TYY91xV$K!CE8WU&I;N=7t)Z%`t7IzqSn%$LB)768uddD*L;~Vc|iN2tB8-Ztv&LqGOxyEz8H&Z4?-I<++CMgF}(Vgl$FxnaNz{ z(9Kw3Cxi!!uaa5m&IJqX2g?P>k;s=|a}VKw2x5QH<}AOz(uGCBDp=RqKA3y1wz5#kR(IjrU#Ym#wM@LvVEcxN%_4@XTR ze#hV*mC$vC6m~dJ;bvEIQNll}KXP4D0Z6K}R4d9Wd9_kY@Hc>*j04(Iq~u7g(&Av% z&-M@h3NM~b@P*uHIItntQa&Kas+0l^Ci>*!;sPl#TC=lYfs}h_KNF3L)Oo?ab@O&a z_~POO5dKczh1b2%o502UN$FmFY)H@XCgb7T^HU&^SP%WLTF-YT?S-r7uO>eW9xOUI zvq^K0lkBhRc#r;negBr%_nsL|1aB2>qztr3(6aWApiwFb&00000{{{d;LjnMF0iDu4PQySD z1>gTA>eX=gT9@Pc zvYm@&g1`-ez<->CMr&4vjnZtiy()|`ij@}krefw>Oi>h^_eZQcth0uC zcL0^5=`4bkVldo1aLcfC$CmixW7May-XbhxNLP zJooz8Ym0@EEfygnOAWd`*?g7Rydata, exp_rec->data, got_rec->l_data)) { - if (verbose) fprintf(stderr, "Record different\n"); - failure++; - break; - } - } -#endif - BAMit_free(bexp); BAMit_free(bgot); - char cmd[1024]; - sprintf(cmd, "samtools view -o /tmp/got.sam %s ", gotfile); - system(cmd); - sprintf(cmd, "samtools view -o /tmp/exp.sam %s ", expectfile); - system(cmd); + FILE *getfp = fopen(gotfile, "r"); + FILE *expfp = fopen(expectfile, "r"); + char getline[2048]; + char expline[2048]; + + if (!getfp) { + fprintf(stderr, "Can't open file %s\n", gotfile); + exit(1); + } + + if (!expfp) { + fprintf(stderr, "Can't open file %s\n", expectfile); + exit(1); + } - FILE *getfp = fopen("/tmp/got.sam", "r"); - FILE *expfp = fopen("/tmp/exp.sam", "r"); - char getline[1024]; - char expline[1024]; + // skip header + while (fgets(getline, 2047, getfp) > 0) { + if (getline[0] != '@') break; + } + while (fgets(expline, 2047, expfp) > 0) { + if (expline[0] != '@') break; + } - while (fgets(getline, 1023, getfp) > 0) { - fgets(expline, 1023, expfp); + // compare read records + while (true) { if (strcmp(getline,expline) != 0) { fprintf(stderr, "Expected: %sFound : %s\n", expline, getline); failure++; } + if (fgets(getline, 2047, getfp) == 0) break; + if (fgets(expline, 2047, expfp) == 0) break; } fclose(getfp); fclose(expfp); @@ -385,98 +411,98 @@ int main(int argc, char**argv) // minimal options if (verbose) fprintf(stderr,"Test 1: minimal options\n"); - sprintf(outputfile,"%s/read2tags_1.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_1.sam", TMPDIR); setup_test_1(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_1.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_1.sam"),verbose); free_args(argv_1); // overlapping reads if (verbose) fprintf(stderr,"Test 2: Overlapping reads\n"); - sprintf(outputfile,"%s/read2tags_2.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_2.sam", TMPDIR); setup_test_2(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_2.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_2.sam"),verbose); free_args(argv_1); // remove first record if (verbose) fprintf(stderr,"Test 3: remove first record\n"); - sprintf(outputfile,"%s/read2tags_3.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_3.sam", TMPDIR); setup_test_3(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_3.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_3.sam"),verbose); free_args(argv_1); // remove second record if (verbose) fprintf(stderr,"Test 4: remove second record\n"); - sprintf(outputfile,"%s/read2tags_4.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_4.sam", TMPDIR); setup_test_4(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_4.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_4.sam"),verbose); free_args(argv_1); // handle single reads if (verbose) fprintf(stderr,"Test 5: handle single reads\n"); - sprintf(outputfile,"%s/read2tags_5.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_5.sam", TMPDIR); setup_test_5(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_5.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_5.sam"),verbose); free_args(argv_1); // specify duplicate tags if (verbose) fprintf(stderr,"Test 6: specify duplicate tags\n"); - sprintf(outputfile,"%s/read2tags_6.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_6.sam", TMPDIR); setup_test_6(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_6.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_6.sam"),verbose); free_args(argv_1); // use --replace option if (verbose) fprintf(stderr,"Test 7: use --replace option\n"); - sprintf(outputfile,"%s/read2tags_7.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_7.sam", TMPDIR); setup_test_7(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_7.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_7.sam"),verbose); free_args(argv_1); // use --merge option if (verbose) fprintf(stderr,"Test 8: use --merge option\n"); - sprintf(outputfile,"%s/read2tags_8.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_8.sam", TMPDIR); setup_test_8(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_8.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_8.sam"),verbose); free_args(argv_1); // use --merge option with duplicate tags if (verbose) fprintf(stderr,"Test 9: use --merge option with duplicate tags\n"); - sprintf(outputfile,"%s/read2tags_9.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_9.sam", TMPDIR); setup_test_9(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_9.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_9.sam"),verbose); free_args(argv_1); // use --replace option with duplicate tags if (verbose) fprintf(stderr,"Test 10: use --replace option with duplicate tags\n"); - sprintf(outputfile,"%s/read2tags_10.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_10.sam", TMPDIR); setup_test_10(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.sam"),verbose); free_args(argv_1); // write tags to read 2 from read 1 if (verbose) fprintf(stderr,"Test 11: write tags to read 2 from read 1\n"); - sprintf(outputfile,"%s/read2tags_11.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_11.sam", TMPDIR); setup_test_11(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_11.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_11.sam"),verbose); free_args(argv_1); // write tags to read 1 from read 2 if (verbose) fprintf(stderr,"Test 12: write tags to read 1 from read 2\n"); - sprintf(outputfile,"%s/read2tags_12.bam", TMPDIR); + sprintf(outputfile,"%s/read2tags_12.sam", TMPDIR); setup_test_12(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); - checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_12.bam"),verbose); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_12.sam"),verbose); free_args(argv_1); printf("read2tags tests: %s\n", failure ? "FAILED" : "Passed");