From df384d50463ca11a6a9943671010db971242a07a Mon Sep 17 00:00:00 2001 From: jenniferliddle Date: Tue, 21 May 2024 10:19:05 +0100 Subject: [PATCH] Updated to handle NanoSeq --- src/read2tags.c | 252 ++++++++++++++++++++++++--------- test/data/out/read2tags_11.bam | Bin 0 -> 2399 bytes test/data/out/read2tags_12.bam | Bin 0 -> 2396 bytes test/t_read2tags.c | 115 ++++++++++++++- 4 files changed, 290 insertions(+), 77 deletions(-) create mode 100644 test/data/out/read2tags_11.bam create mode 100644 test/data/out/read2tags_12.bam diff --git a/src/read2tags.c b/src/read2tags.c index 513c8074..a7124e4d 100644 --- a/src/read2tags.c +++ b/src/read2tags.c @@ -45,7 +45,8 @@ along with this program. If not, see . * position record */ typedef struct { - int record; + int record_from; + int record_to; int from; int to; } pos_t; @@ -129,30 +130,61 @@ static int bam_aux_cmp(const uint8_t *s, const uint8_t *d) /* * Parse a comma separated list of positions - * Format is r:s:e,r:s:e,r:s:e,... + * Format is rf:rt:s:e,... + * or is r:s:e,... if rf and rt are the same + * or is s:e,... if rf and rt are both zero * - * where r is record number (0, 1 or 2 and is optional) + * where rf is record number to read (0, 1 or 2 and is optional) + * rt is record number to write (0, 1 or 2 and is optional) * s is the start position in the read string * e is the end position in the read string * start and end positions are 1 (not zero) based */ -static void parse_positions(va_t *poslist, char *args) +static void parse_positions(va_t *poslist, char *arg_string) { - char *argstr = strdup(args); + char *argstr = strdup(arg_string); char *save_s; char *s = strtok_r(argstr,",",&save_s); while (s) { pos_t *pos = calloc(1, sizeof(pos_t)); char *save_p; - char *p = strtok_r(s,":",&save_p); if (p) pos->record = atoi(p); - p = strtok_r(NULL,":",&save_p); if (p) pos->from = atoi(p); - p = strtok_r(NULL,":",&save_p); if (p) pos->to = atoi(p); - if (!p) { - // looks like s:e format - pos->to = pos->from; pos->from = pos->record; pos->record = 0; + int n = 0; // number of position arguments + int args[4]; + + char *p = strtok_r(s,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + p = strtok_r(NULL,":",&save_p); if (p) args[n++] = atoi(p); + + switch (n) { + case 2: // s:e + pos->record_from = 0; + pos->record_to = 0; + pos->from = args[0]; + pos->to = args[1]; + break; + case 3: // r:s:e + pos->record_from = args[0]; + pos->record_to = args[0]; + pos->from = args[1]; + pos->to = args[2]; + break; + case 4: // rf:rt:s:e + pos->record_from = args[0]; + pos->record_to = args[1]; + pos->from = args[2]; + pos->to = args[3]; + break; + default: + fprintf(stderr, "Invalid pos format: %s\n", arg_string); + exit(1); } - if (pos->record < 0 || pos->record > 2 || pos->from == 0 || pos->to == 0 || pos->from > pos->to) { - fprintf(stderr,"Invalid pos argument: %s\n", args); + + if (pos->record_from < 0 || pos->record_from > 2 || + pos->record_to < 0 || pos->record_to > 2 || + pos->from == 0 || pos->to == 0 || + pos->from > pos->to) { + fprintf(stderr,"Invalid pos argument: %s\n", arg_string); exit(1); } va_push(poslist,pos); @@ -169,6 +201,8 @@ static void usage(FILE *write_to) fprintf(write_to, "Usage: bambi read2tags [options]\n" "\n" +"Convert portions of a read into tags\n" +"\n" "Options:\n" " -i --input BAM file to read [default: stdin]\n" " -o --output BAM file to output [default: stdout]\n" @@ -178,13 +212,25 @@ static void usage(FILE *write_to) " [default: " DEFAULT_KEEP_TAGS "]\n" " -d --discard-tags comma separated list of tags to discard when merging records\n" " [default: " DEFAULT_DISCARD_TAGS "]\n" -" -p --positions comma separated list of positions\n" +" -p --positions comma separated list of positions (see below)\n" " -m --merge merge duplicate tags\n" " -r --replace replace duplicate tags\n" " -v --verbose verbose output\n" " --input-fmt [sam/bam/cram] [default: bam]\n" " --output-fmt [sam/bam/cram] [default: bam]\n" " --compression-level [0..9]\n" +"\n" +" comma separated list of positions, where each position has the format:\n" +" Format is rf:rt:s:e,...\n" +" or is r:s:e,... if rf and rt are the same\n" +" or is s:e,... if rf and rt are both zero\n" +"\n" +" where rf is record number to read (0, 1 or 2 and is optional)\n" +" rt is record number to write (0, 1 or 2 and is optional)\n" +" s is the start position in the read string\n" +" e is the end position in the read string\n" +" start and end positions are 1 (not zero) based\n" +"\n" ); } @@ -447,17 +493,25 @@ static void shuffle(char *s) /* * add a new tag to our taglist, or append to existing tag */ -static void add_or_update(va_t *va, char *tag, char *data) +static void add_or_update(va_t *va, char *tag, char *data, int r) { int n; + char recno = r + '0'; + char key[4]; + + key[0] = recno; + key[1] = tag[0]; + key[2] = tag[1]; + key[3] = 0; + for (n=0; n < va->end; n++) { - if (strncmp(tag,va->entries[n],2) == 0) break; + if (strncmp(key,va->entries[n],3) == 0) break; } if (n == va->end) { // add new tag - char *e = calloc(1, strlen(tag) + 1 + strlen(data) + 1); - strcpy(e, tag); + char *e = calloc(1, strlen(key) + 1 + strlen(data) + 1); + strcpy(e, key); strcat(e, ":"); strcat(e, data); va_push(va,e); @@ -507,31 +561,43 @@ static void add_tag(bam1_t *rec, char *tag, char *data, opts_t *opts) } /* - * Process one record + * Process records */ -static bam1_t *process_record(bam1_t *rec, opts_t *opts) +static void process_records(bam1_t *rec1, bam1_t *rec2, bam1_t **newrec, bam1_t **newrec2, opts_t *opts) { pos_t *pos; - int recno = -1; - char *tag_data = calloc(1, rec->core.l_qseq+1); - char *qtag_data = calloc(1, rec->core.l_qseq+1); + int readno1 = -1; + int readno2 = -1; + char *tag_data = calloc(1, rec1->core.l_qseq+1); + char *qtag_data = calloc(1, rec1->core.l_qseq+1); va_t *new_tags = va_init(10,free); va_t *new_qtags = va_init(10,free); - if (!(rec->core.flag & BAM_FPAIRED)) recno = 0; - if (rec->core.flag & BAM_FREAD1) recno = 1; - if (rec->core.flag & BAM_FREAD2) recno = 2; + if (!(rec1->core.flag & BAM_FPAIRED)) readno1 = 0; + if (rec1->core.flag & BAM_FREAD1) readno1 = 1; + if (rec1->core.flag & BAM_FREAD2) readno1 = 2; - char *seq = get_read(rec); - char *quality = get_quality(rec); + if (rec2) { + if (!(rec2->core.flag & BAM_FPAIRED)) readno2 = 0; + if (rec2->core.flag & BAM_FREAD1) readno2 = 1; + if (rec2->core.flag & BAM_FREAD2) readno2 = 2; + } /* * first pass - copy sections of read into tags */ for (int n=0; n < opts->poslist->end; n++) { pos = opts->poslist->entries[n]; - if (pos->record == recno) { + if ( (pos->record_from == readno1) || (pos->record_from == readno2) ) { + bam1_t *rec = NULL; + if (pos->record_from == readno1) rec = rec1; + if (pos->record_from == readno2) rec = rec2; + if (pos->from <= rec->core.l_qseq) { + + char *seq = get_read(rec); + char *quality = get_quality(rec); + int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; int len = to - from + 1; @@ -539,52 +605,82 @@ static bam1_t *process_record(bam1_t *rec, opts_t *opts) // copy data from read memset(tag_data,0,rec->core.l_qseq+1); memcpy(tag_data, seq + from - 1, len); - add_or_update(new_tags, opts->taglist->entries[n], tag_data); + add_or_update(new_tags, opts->taglist->entries[n], tag_data, pos->record_to); // copy data from quality memset(qtag_data,0,rec->core.l_qseq+1); memcpy(qtag_data, quality + from - 1, len); - add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data); + add_or_update(new_qtags, opts->qtaglist->entries[n], qtag_data, pos->record_to); + free(seq); free(quality); } } } // add new tags for (int n=0; n < new_tags->end; n++) { - char *tag = new_tags->entries[n]; tag[2] = 0; + bam1_t *rec = NULL; + int readno = (*(char*)(new_tags->entries[n]) - '0'); + char *tag = new_tags->entries[n]+1; tag[2] = 0; char *data = tag+3; - add_tag(rec, tag, data, opts); + if (readno == readno1) rec = rec1; + else rec = rec2; + if (rec) add_tag(rec, tag, data, opts); } // add new quality tags for (int n=0; n < new_qtags->end; n++) { - char *tag = new_qtags->entries[n]; tag[2] = 0; + bam1_t *rec = NULL; + int readno = (*(char*)(new_qtags->entries[n]) - '0'); + char *tag = new_qtags->entries[n]+1; tag[2] = 0; char *data = tag+3; - add_tag(rec, tag, data, opts); + if (readno == readno1) rec = rec1; + else rec = rec2; + if (rec) add_tag(rec, tag, data, opts); } /* * second pass - mark sections of read as deleted */ + char *seq = NULL; + char *quality = NULL; + char *seq1 = get_read(rec1); + char *quality1 = get_quality(rec1); + char *seq2 = rec2 ? get_read(rec2) : NULL; + char *quality2 = rec2 ? get_quality(rec2) : NULL; + for (int n=0; n < opts->poslist->end; n++) { + bam1_t *rec = NULL; pos = opts->poslist->entries[n]; - if (pos->record == recno) { - if (pos->from <= rec->core.l_qseq) { - int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; - int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; - int len = to - from + 1; - memset(seq + from - 1, 1, len); // mark as deleted - memset(quality + from - 1, 1, len); // mark as deleted - } + if (pos->record_from == readno1) { rec = rec1; seq = seq1; quality = quality1; } + if (pos->record_from == readno2) { rec = rec2; seq = seq2; quality = quality2; } + + if (rec && (pos->from <= rec->core.l_qseq)) { + int from = (pos->from > rec->core.l_qseq) ? rec->core.l_qseq : pos->from; + int to = (pos->to > rec->core.l_qseq) ? rec->core.l_qseq : pos->to; + int len = to - from + 1; + memset(seq + from - 1, 1, len); // mark as deleted + memset(quality + from - 1, 1, len); // mark as deleted } } - shuffle(seq); shuffle(quality); // physically remove 'marked as deleted' bytes - bam1_t *newrec = make_new_rec(rec, seq, quality); - free(tag_data); free(qtag_data); free(quality); free(seq); + shuffle(seq1); shuffle(quality1); // physically remove 'marked as deleted' bytes + bam1_t *nr = make_new_rec(rec1, seq1, quality1); + *newrec = nr; + free(seq1); free(quality1); + + if (rec2) { + shuffle(seq2); shuffle(quality2); // physically remove 'marked as deleted' bytes + nr = make_new_rec(rec2, seq2, quality2); + *newrec2 = nr; + free(seq2); free(quality2); + } else { + *newrec2 = NULL; + } + + free(tag_data); free(qtag_data); va_free(new_tags); va_free(new_qtags); - return newrec; + return; } /* @@ -712,10 +808,12 @@ static bam1_t *merge_records(bam1_t *r1, bam1_t *r2, opts_t *opts) */ static int write_record(BAMit_t *bam, bam1_t *rec) { - int r = sam_write1(bam->f, bam->h, rec); - if (r < 0) { - fprintf(stderr,"sam_write1() failed\n"); - return -1; + if (rec) { + int r = sam_write1(bam->f, bam->h, rec); + if (r < 0) { + fprintf(stderr,"sam_write1() failed\n"); + return -1; + } } return 0; } @@ -731,6 +829,10 @@ int process(opts_t* opts) int retcode = 0; int nrec = 0; int r; + bam1_t *newrec = NULL; + bam1_t *newrec2= NULL; + bam1_t *rec = NULL; + bam1_t *rec2 = NULL; BAMit_t *bam_in = BAMit_open(opts->in_file, 'r', opts->input_fmt, 0, NULL); BAMit_t *bam_out = BAMit_open(opts->out_file, 'w', opts->output_fmt, opts->compression_level, NULL); @@ -745,28 +847,38 @@ int process(opts_t* opts) } while (BAMit_hasnext(bam_in)) { - bam1_t *rec = BAMit_next(bam_in); - if (invalid_record(rec,++nrec)) return -1; - bam1_t *newrec = process_record(rec,opts); - - bam1_t *rec2 = BAMit_peek(bam_in); - if (rec2 && strcmp(bam_get_qname(rec), bam_get_qname(rec2)) == 0) { - rec2 = BAMit_next(bam_in); - if (invalid_record(rec2,++nrec)) return -1; - bam1_t *newrec2 = process_record(rec2,opts); - if ((newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) { - bam1_t *merged_rec = merge_records(newrec, newrec2, opts); - if (write_record(bam_out, merged_rec)) return -1; - bam_destroy1(merged_rec); - } else { - if (write_record(bam_out, newrec)) return -1; - if (write_record(bam_out, newrec2)) return -1; - } - bam_destroy1(newrec2); + bam1_t *r = BAMit_next(bam_in); + if (invalid_record(r,++nrec)) return -1; + rec = bam_dup1(r); + + r = BAMit_peek(bam_in); + if (r && strcmp(bam_get_qname(rec), bam_get_qname(r)) == 0) { + r = BAMit_next(bam_in); + if (invalid_record(r,++nrec)) return -1; + rec2 = bam_dup1(r); + } else { + rec2 = NULL; + } + + process_records(rec, rec2, &newrec, &newrec2, opts); + + //newrec = process_record(rec,opts); + //newrec2 = NULL; + //if (rec2) newrec2 = process_record(rec2,opts); + + if (newrec2 && ( (newrec->core.l_qseq == 0) || (newrec2->core.l_qseq == 0)) ) { + bam1_t *merged_rec = merge_records(newrec, newrec2, opts); + if (write_record(bam_out, merged_rec)) return -1; + bam_destroy1(merged_rec); } else { - if (write_record(bam_out,newrec)) return -1; + if (write_record(bam_out, newrec)) return -1; + if (write_record(bam_out, newrec2)) return -1; } + + bam_destroy1(rec); + bam_destroy1(rec2); bam_destroy1(newrec); + bam_destroy1(newrec2); } // tidy up after us diff --git a/test/data/out/read2tags_11.bam b/test/data/out/read2tags_11.bam new file mode 100644 index 0000000000000000000000000000000000000000..e5b271569453a56688c521d19ff5b32d0bf8af2f GIT binary patch literal 2399 zcmV-l383~LiwFb&00000{{{d;LjnN32klr%bK5u;b|0179Qux`x%AXTBB{l3G^MIg zA}w*6q-c_IoD&KXAqj16AxJO%L;Cm514PoY;&f-yOXe`LEIom51Ms~MkDThxcfWjp zcJ^*zbgtd8)*E($<+w_DR+Kb)_0F4jEMpwZf{y2o7c8J3(JV{LB8yRwZOeO7(2fy| z>#>-nJH+d8l%Bz?s4 zCrL^C$Qf6(p*rlVco7UVto6^|4zP|F7f#>Sjhh?AASF?(w4^C#C8HcN49uBs+rgjW zUJ>g(gLUG_)?IU=q2(&{R^dd6=$*pLD62}bI-YG}-L~{#V*0_b|7JLR^SY5PTIp>g zjnr1sTB+YkgH{^0(x{crTd9pPx_F)M*$v)i1?D-sr4e3N+bu0P3c{7~0lT_mPTI|_ zammLh@T?zA1J9Oj=r3(>=c(#wIMBvu!FKy@xi7M6x6fgczN;fnKUOr|2<&BBRPHHC z2uo2GZ^@?2ibq6q2BR^n5}}a6>J(l2kY}7mC}A7gLnz!cjv`oQ1m8IU%z-9Bc|p5* zwT{__lnhFwpfQD|@1aY8A4_3k$>9RLMc};1a$2B@Q(=EkJ8{hjpI$0 zP!tvwhdq<9MHW;dE31;CA6UAHt4Pc(q9tLl<07&25gj{DvVy=&8TrR+l*7*R zFpVXsCtDIj4DS+OT6Spq6W|Ssd{Z~@++VIdY#D$_+nV~qPVRD*-lo}oiURX$Wx6xV zo#UD53fo|YJX;Shm%cNxY!geVD3m1lV%1uRD{GDqd&? z^MZA-T*JJnJ=H^wF%k=49!8&tG9eMkOImDM8VQy=9r2#?9LK zzfGz+K(B~;C$WK5k68$cu;HaYoB(wPH-`nseN~!kU2V`mANG}l1Fa|K(tt&7)bGtB zCMgBKCE3Lc%GEIic!EF1_t(!+qBK z96g*Z%m7LrYPTK0hG~bo7z8bMSu;l;2*-sY4Y9p^|KxPpXw(%ZY)%wvyNNdXwIctN z&PM-LZ8wF+^Q|Z@o&&>@7(+#rhuS(VH(6Z3`?~_b1|>!EJbr|uqqan~B~;U$21Py; z<)k!i7YI;wM9FV=O1{Rh(`;QxPFD`v%9^Fh^CVDrgHZPpfj+43QhbbrMx{i5F&xuO zWUr87vuAfy`M6O^Adx%va|2Y=?G|(gGsB$yOi>?Jza2>!hjVnZ$8fS!zR%(a0V8VX zM=H>qLMJGs&+9~RI=`&)(psT^q2IX=1ool&?Y(FZhI=AU1M%fafJx>b^)$6x<_|{{ z@6ck`l6&*Rla`<7@sjJu}nww^0G~V57};PS;57KXq^PfqkHI#=t%TSu(^kHfCRBOZ+ezLpXq`;W+kla^wFmznnP=y3*@~x z>>U9TkSKKk5-k{5Du7sb6YVfZf;S@3B)7hLXaSNV6#$YVHIA@DU8itSUkz1tEZ_fBHC62d&iH@q$yLXje{u^jgVS>M>btYEv%kUrDQ?eR zeSdcLAK~!^&ASx<03VA81ONa4009360763o0E7XZlDkgBFc^j1zJWuAih;(CE7Ace zPI^%yRl*mVEDWWdvY?8oBVtFS%HW0PgSVk}TdI_RRB|LMmKFWyd_H~!0DK*E1#$;~ zdvH-ZGh2`^WBN#=NfM_tr8K1Jn8tA$MWfIUL6Ae()3VgJ*E`~ zz(ZD0^Z!B>LWsH19e7%ckLN~$ci{djLCLF5bXcd!PZ*wRl&ewRpj8UX3#$y zn}(xtXk2Mz2D+TPn`#3q@7oFOKL&fxtrRn>#(guTK`Glv9>^)S-R8w?t^Y=9ZdRCZc47ebn% zrOj~JeQ60?%b*eDT9*tt1l#xZN7cs6bS-Sx^R`__acsMe65BQ8h~-%4Ud_$JM|35a z$bNg{a)&-^i(b!wSjBS0q|<9${t5cbqGuLef-Q;NRw>>C001A02m}BC000301^_}s R0stET0{{R300000004}UhtdE5 literal 0 HcmV?d00001 diff --git a/test/data/out/read2tags_12.bam b/test/data/out/read2tags_12.bam new file mode 100644 index 0000000000000000000000000000000000000000..feb112b7b8320bd1161bc1124d0ea583ec033a20 GIT binary patch literal 2396 zcmV-i38VHOiwFb&00000{{{d;LjnN42klr%bK5u;b|0179Qux`x%AXTBB{l3G^MIg zA}w*6q-c_IoD&KXAqj16AxJO%L;Cm514PoY;&f-yOXe`LEIom51Ms~MkDThxcfWjp zcJ^*zbgtd8)*E($<+w_DR+Kb)_0F4jEMpwZf{y2o7c8J3(JV{LB8yRwZOeO7(2fy| z>#>-nJH+d8l%Bz?s4 zCrL^C$Qf6(p*rlVco7UVto6^|4zP|F7f#>Sjhh?AASF?(w4^C#C8HcN49uBs+rgjW zUJ>g(gLUG_)?IU=q2(&{R^dd6=$*pLD62}bI-YG}-L~{#V*0_b|7JLR^SY5PTIp>g zjnr1sTB+YkgH{^0(x{crTd9pPx_F)M*$v)i1?D-sr4e3N+bu0P3c{7~0lT_mPTI|_ zammLh@T?zA1J9Oj=r3(>=c(#wIMBvu!FKy@xi7M6x6fgczN;fnKUOr|2<&BBRPHHC z2uo2GZ^@?2ibq6q2BR^n5}}a6>J(l2kY}7mC}A7gLnz!cjv`oQ1m8IU%z-9Bc|p5* zwT{__lnhFwpfQD|@1aY8A4_3k$>9RLMc};1a$2B@Q(=EkJ8{hjpI$0 zP!tvwhdq<9MHW;dE31;CA6UAHt4Pc(q9tLl<07&25gj{DvVy=&8TrR+l*7*R zFpVXsCtDIj4DS+OT6Spq6W|Ssd{Z~@++VIdY#D$_+nV~qPVRD*-lo}oiURX$Wx6xV zo#UD53fo|YJX;Shm%cNxY!geVD3m1lV%1uRD{GDqd&? z^MZA-T*JJnJ=H^wF%k=49!8&tG9eMkOImDM8VQy=9r2#?9LK zzfGz+K(B~;C$WK5k68$cu;HaYoB(wPH-`nseN~!kU2V`mANG}l1Fa|K(tt&7)bGtB zCMgBKCE3Lc%GEIic!EF1_t(!+qBK z96g*Z%m7LrYPTK0hG~bo7z8bMSu;l;2*-sY4Y9p^|KxPpXw(%ZY)%wvyNNdXwIctN z&PM-LZ8wF+^Q|Z@o&&>@7(+#rhuS(VH(6Z3`?~_b1|>!EJbr|uqqan~B~;U$21Py; z<)k!i7YI;wM9FV=O1{Rh(`;QxPFD`v%9^Fh^CVDrgHZPpfj+43QhbbrMx{i5F&xuO zWUr87vuAfy`M6O^Adx%va|2Y=?G|(gGsB$yOi>?Jza2>!hjVnZ$8fS!zR%(a0V8VX zM=H>qLMJGs&+9~RI=`&)(psT^q2IX=1ool&?Y(FZhI=AU1M%fafJx>b^)$6x<_|{{ z@6ck`l6&*Rla`<7@sjJu}nww^0G~V57};PS;57KXq^PfqkHI#=t%TSu(^kHfCRBOZ+ezLpXq`;W+kla^wFmznnP=y3*@~x z>>U9TkSKKk5-k{5Du7sb6YVfZf;S@3B)7hLXaSNV6#$YVHIA@DU`u_un zZ}|YCV}xVDJY99z?W>`xj^+EGs-~))z#0FKJ-O<5^G|L8W^h{0UVV3VcJ?>;KgI3Y ztMAXw{sXQRmfyP-001A02m}BC000301^_}s0sw#kos!>5!!Q`dv%NuUE{Yd!Nh5X_ zLcex@XklQQ&i2NOxm@)|h9LSDuJmFeKA*mge%a=%R$=x)XbAK>=a-WwLdg40CnUEC zxgxZ7hFg*^Lb!oo9ECB&06xSc2*WrChQ8|)yINV+y2_pEY^&3agkVAlH&KPwS*D~` zF1vo7y`V z8~owOM=J<$3`+&ey_`A8-hgdCh`r_3TH>m4pHIN^27>`+SiE)a8zBzC1%TlMLxg6g z^r&up>v0!x|c?0eCPQYjXl znh?j`ucq6g*W#2G_10@@TYYXG+8 @@ -242,35 +242,111 @@ void setup_test_10(int* argc, char*** argv, char *outputfile) (*argv)[(*argc)++] = strdup("--replace"); } +void setup_test_11(int* argc, char*** argv, char *outputfile) +{ + *argc = 0; + *argv = (char**)calloc(sizeof(char*), 100); + (*argv)[(*argc)++] = strdup("bambi"); + (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("-i"); + (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); + (*argv)[(*argc)++] = strdup("-o"); + (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("-t"); + (*argv)[(*argc)++] = strdup("Ba"); + (*argv)[(*argc)++] = strdup("-q"); + (*argv)[(*argc)++] = strdup("Qa"); + (*argv)[(*argc)++] = strdup("-p"); + (*argv)[(*argc)++] = strdup("1:2:1:1"); +} + +void setup_test_12(int* argc, char*** argv, char *outputfile) +{ + *argc = 0; + *argv = (char**)calloc(sizeof(char*), 100); + (*argv)[(*argc)++] = strdup("bambi"); + (*argv)[(*argc)++] = strdup("select"); + (*argv)[(*argc)++] = strdup("-i"); + (*argv)[(*argc)++] = strdup(MKNAME(DATA_DIR,"/read2tags.sam")); + (*argv)[(*argc)++] = strdup("-o"); + (*argv)[(*argc)++] = strdup(outputfile); + (*argv)[(*argc)++] = strdup("-t"); + (*argv)[(*argc)++] = strdup("Ba"); + (*argv)[(*argc)++] = strdup("-q"); + (*argv)[(*argc)++] = strdup("Qa"); + (*argv)[(*argc)++] = strdup("-p"); + (*argv)[(*argc)++] = strdup("2:1:1:1"); +} + void checkFiles(char *gotfile, char *expectfile, int verbose) { BAMit_t *bgot = BAMit_open(gotfile, 'r', NULL, 0, NULL); BAMit_t *bexp = BAMit_open(expectfile, 'r', NULL, 0, NULL); - bam1_t *got_rec, *exp_rec; + // bam1_t *got_rec, *exp_rec; + + int f = failure; - int c = sam_hdr_count_lines(bgot->h, "RG"); - if (c != sam_hdr_count_lines(bexp->h, "RG")) { failure++; return; } + int c1 = sam_hdr_count_lines(bgot->h, "RG"); + int c2 = sam_hdr_count_lines(bexp->h, "RG"); + if (c1 != c2) { + failure++; + if (verbose) fprintf(stderr, "RG lines: expected %d, got %d\n", c2, c1); + } - for (int n=0; n < c; n++) { + for (int n=0; n < c1; n++) { kstring_t ks_got; ks_initialize(&ks_got); kstring_t ks_exp; ks_initialize(&ks_exp); sam_hdr_find_line_pos(bgot->h, "RG", n, &ks_got); sam_hdr_find_line_pos(bexp->h, "RG", n, &ks_exp); - if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { failure++; return; } + if (strcmp(ks_str(&ks_got), ks_str(&ks_exp))) { + if (verbose) fprintf(stderr, "RG %d: expected %s, got %s\n", n, ks_str(&ks_exp), ks_str(&ks_got)); + failure++; + break; + } ks_free(&ks_got); ks_free(&ks_exp); } +#if 0 while ((exp_rec = BAMit_next(bexp)) != NULL) { got_rec = BAMit_next(bgot); - if (!got_rec) { fprintf(stderr, "%s ended too soon\n", gotfile); failure++; return; } + if (!got_rec) { fprintf(stderr, "%s ended too soon\n", gotfile); failure++; break; } if (memcmp(got_rec->data, exp_rec->data, got_rec->l_data)) { + if (verbose) fprintf(stderr, "Record different\n"); failure++; break; } } +#endif BAMit_free(bexp); BAMit_free(bgot); + + char cmd[1024]; + sprintf(cmd, "samtools view -o /tmp/got.sam %s ", gotfile); + system(cmd); + sprintf(cmd, "samtools view -o /tmp/exp.sam %s ", expectfile); + system(cmd); + + FILE *getfp = fopen("/tmp/got.sam", "r"); + FILE *expfp = fopen("/tmp/exp.sam", "r"); + char getline[1024]; + char expline[1024]; + + while (fgets(getline, 1023, getfp) > 0) { + fgets(expline, 1023, expfp); + if (strcmp(getline,expline) != 0) { + fprintf(stderr, "Expected: %sFound : %s\n", expline, getline); + failure++; + } + } + + fclose(getfp); fclose(expfp); + + if (verbose) { + if (f == failure) fprintf(stderr, " :\tpass\n"); + else fprintf(stderr, " :\t*** FAIL ***\n"); + } + return; } @@ -308,6 +384,7 @@ int main(int argc, char**argv) char outputfile[1024]; // minimal options + if (verbose) fprintf(stderr,"Test 1: minimal options\n"); sprintf(outputfile,"%s/read2tags_1.bam", TMPDIR); setup_test_1(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -315,6 +392,7 @@ int main(int argc, char**argv) free_args(argv_1); // overlapping reads + if (verbose) fprintf(stderr,"Test 2: Overlapping reads\n"); sprintf(outputfile,"%s/read2tags_2.bam", TMPDIR); setup_test_2(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -322,6 +400,7 @@ int main(int argc, char**argv) free_args(argv_1); // remove first record + if (verbose) fprintf(stderr,"Test 3: remove first record\n"); sprintf(outputfile,"%s/read2tags_3.bam", TMPDIR); setup_test_3(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -329,6 +408,7 @@ int main(int argc, char**argv) free_args(argv_1); // remove second record + if (verbose) fprintf(stderr,"Test 4: remove second record\n"); sprintf(outputfile,"%s/read2tags_4.bam", TMPDIR); setup_test_4(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -336,6 +416,7 @@ int main(int argc, char**argv) free_args(argv_1); // handle single reads + if (verbose) fprintf(stderr,"Test 5: handle single reads\n"); sprintf(outputfile,"%s/read2tags_5.bam", TMPDIR); setup_test_5(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -343,6 +424,7 @@ int main(int argc, char**argv) free_args(argv_1); // specify duplicate tags + if (verbose) fprintf(stderr,"Test 6: specify duplicate tags\n"); sprintf(outputfile,"%s/read2tags_6.bam", TMPDIR); setup_test_6(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -350,6 +432,7 @@ int main(int argc, char**argv) free_args(argv_1); // use --replace option + if (verbose) fprintf(stderr,"Test 7: use --replace option\n"); sprintf(outputfile,"%s/read2tags_7.bam", TMPDIR); setup_test_7(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -357,6 +440,7 @@ int main(int argc, char**argv) free_args(argv_1); // use --merge option + if (verbose) fprintf(stderr,"Test 8: use --merge option\n"); sprintf(outputfile,"%s/read2tags_8.bam", TMPDIR); setup_test_8(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -364,6 +448,7 @@ int main(int argc, char**argv) free_args(argv_1); // use --merge option with duplicate tags + if (verbose) fprintf(stderr,"Test 9: use --merge option with duplicate tags\n"); sprintf(outputfile,"%s/read2tags_9.bam", TMPDIR); setup_test_9(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); @@ -371,12 +456,28 @@ int main(int argc, char**argv) free_args(argv_1); // use --replace option with duplicate tags + if (verbose) fprintf(stderr,"Test 10: use --replace option with duplicate tags\n"); sprintf(outputfile,"%s/read2tags_10.bam", TMPDIR); setup_test_10(&argc_1, &argv_1, outputfile); main_read2tags(argc_1-1, argv_1+1); checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_10.bam"),verbose); free_args(argv_1); + // write tags to read 2 from read 1 + if (verbose) fprintf(stderr,"Test 11: write tags to read 2 from read 1\n"); + sprintf(outputfile,"%s/read2tags_11.bam", TMPDIR); + setup_test_11(&argc_1, &argv_1, outputfile); + main_read2tags(argc_1-1, argv_1+1); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_11.bam"),verbose); + free_args(argv_1); + + // write tags to read 1 from read 2 + if (verbose) fprintf(stderr,"Test 12: write tags to read 1 from read 2\n"); + sprintf(outputfile,"%s/read2tags_12.bam", TMPDIR); + setup_test_12(&argc_1, &argv_1, outputfile); + main_read2tags(argc_1-1, argv_1+1); + checkFiles(outputfile,MKNAME(DATA_DIR,"/out/read2tags_12.bam"),verbose); + free_args(argv_1); printf("read2tags tests: %s\n", failure ? "FAILED" : "Passed"); return failure ? EXIT_FAILURE : EXIT_SUCCESS;