Skip to content

Commit

Permalink
fix bug in parseMD() that caused segmentation fault error in same cas…
Browse files Browse the repository at this point in the history
…es when processing reads with different lenghts; adjust start coordinates of input genomic regions to be always >=1; version number moved to 1.0.5;
  • Loading branch information
gnarzisi committed Mar 5, 2018
1 parent b54678f commit 5aaefb7
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 16 deletions.
4 changes: 3 additions & 1 deletion src/Lancet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ int loadRefs(const string reference, const string region, vector< map<string, Re
int SP = stoi(START) - PADDING;
int EP = stoi(END) + PADDING;

if(SP<0) {SP=0;} // start position cannnot be negative
if(SP<1) {SP=1;} // start position cannnot be less than 1
// check chromosome size
std::vector<RefData>::iterator it;
for (it = bamrefs.begin() ; it != bamrefs.end(); ++it) {
Expand Down Expand Up @@ -333,6 +333,8 @@ void loadBed(const string bedfile, vector< map<string, Ref_t *> > &reftable, Ref

int SP = stoi(tokens[1]) - PADDING;
int EP = stoi(tokens[2]) + PADDING;

if(SP<1) {SP=1;} // start position cannnot be less than 1

//region = tokens[0] + ":" + tokens[1] + "-" + tokens[2];
region = tokens[0] + ":" + itos(SP) + "-" + itos(EP);
Expand Down
2 changes: 1 addition & 1 deletion src/Lancet.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

#include "Microassembler.hh"

string VERSION = "1.0.4 (beta), February 9 2018";
string VERSION = "1.0.5 (beta), March 1 2018";

/**** configuration parameters ****/
int NUM_THREADS = 1;
Expand Down
10 changes: 7 additions & 3 deletions src/Microassembler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ bool Microassembler::isActiveRegion(BamReader &reader, Ref_t *refinfo, BamRegion
BamAlignment al;
int totalreadbp = 0;
bool ans = false;
bool flag = false;
int MQ = MIN_MAP_QUAL;
string CIGAR = "";
string rg = "";
Expand Down Expand Up @@ -288,18 +289,21 @@ bool Microassembler::isActiveRegion(BamReader &reader, Ref_t *refinfo, BamRegion
al.GetTag("RG", rg); // get the read group information for the read
if(rg.empty()) { rg = "null"; }

if( (al.QueryBases).length() != (al.Qualities).length() ) {
cerr << "WARNING: inconsistent length between read sequence (L=" << (al.QueryBases).length() << ") and base qualities (L=" << (al.Qualities).length() << ")" << endl;
}

if ( (readgroups.find("null") != readgroups.end()) || (readgroups.find(rg) != readgroups.end()) ) { // select reads in the read group RG

// parse MD string
// String for mismatching positions. Regex : [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*10
al.GetTag("MD", md); // get string of mismatching positions
flag = al.GetTag("MD", md); // get string of mismatching positions
//cerr << "MD: " << md << " alstart: " << alstart << " Q: " << al.Qualities << " MinQ " << MIN_QUAL_CALL << endl;
parseMD(md, mapX, alstart, al.Qualities, MIN_QUAL_CALL);
if(flag==true) { parseMD(md, mapX, alstart, al.Qualities, MIN_QUAL_CALL); }

// add SNV to database
//Variant_t(string chr_, int pos_, string ref_, string alt_, int ref_cov_normal_, int ref_cov_tumor_, int alt_cov_normal_fwd_, int alt_cov_normal_rev_, int alt_cov_tumor_fwd_, int alt_cov_tumor_rev_, char prev_bp_ref_, char prev_bp_alt_, Filters &fs)


// example: 31M1I17M1D37M
CIGAR = "";
int pos = alstart; // initialize position to start of alignment
Expand Down
29 changes: 18 additions & 11 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -375,24 +375,29 @@ void parseMD(string & md, map<int,int> & M, int start, string & qual, int min_qv
size_t p2;
string del;
string num;
int pos = start;
int step;
int pos = start; // traks position in the reference
size_t rpos = 0; // traks relative position in the read
while(p!=string::npos) {
num = md.substr(p_old+1,p-(p_old+1));
pos += atoi(num.c_str());
//cerr << num << "|";
if(md[p]=='^') {
step = atoi(num.c_str());
pos += step; rpos += step;
if(md[p]=='^') { // the reference contains bases that are missing in the read
p2 = md.find_first_not_of("ACGT^",p+1); // find next number
del = md.substr(p,p2-p);
pos += del.size();
del = md.substr(p+1,p2-(p+1)); // sequence in ref missing from the read
pos += del.size(); // increase only reference pointer -- read postion does not change
//cerr << del << "|";
p = md.find_first_of("ACGT^",p2);
p_old = p2-1;
}
else {
++pos;
assert( (pos-start) >= 0 );
if(qual[pos-start] >= min_qv) {
//cerr << qual[pos-start] << ">=?" << min_qv << endl;
++pos; ++rpos;
assert( rpos >= 0 );
assert( rpos <= qual.length() );

//if(qual[pos-start] >= min_qv) {
if(qual[rpos] >= min_qv) {
mit = M.find(pos);
if (mit != M.end()) { ++((*mit).second); }
else { M.insert(std::pair<int,int>(pos,1)); }
Expand All @@ -402,8 +407,10 @@ void parseMD(string & md, map<int,int> & M, int start, string & qual, int min_qv
p = md.find_first_of("ACGT^",p_old+1);
}
}
num = md.substr(p_old+1);
//cerr << num << endl;
num = md.substr(p_old+1);
step = atoi(num.c_str());
pos += step; rpos += step;
//cerr << num << "\t" << md << "\t" << rpos << "?" << qual.length() << endl;
}

// Fasta_Read
Expand Down

0 comments on commit 5aaefb7

Please sign in to comment.