Skip to content

Commit

Permalink
warning message if MD tag missing from alignments; command line in VC…
Browse files Browse the repository at this point in the history
…F header; fixed bug in FET score camputation, caused by a log(0) calculation in rare cases; updated version number to v1.0.6
  • Loading branch information
gnarzisi committed Apr 15, 2018
1 parent fc7d2a5 commit 80905c6
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 18 deletions.
51 changes: 43 additions & 8 deletions src/Lancet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,29 @@ int rLancet(string tumor_bam, string normal_bam, string ref_fasta, string reg, s

if(verbose) { printConfiguration(cerr, filters); }

BamReader reader;
BamReader readerT;
// attempt to open the BamReader
if ( !reader.Open(TUMOR) ) {
cerr << "Could not open BAM file." << endl;
if ( !readerT.Open(TUMOR) ) {
cerr << "Could not open tumor BAM file." << endl;
return -1;
}
RefVector references = reader.GetReferenceData(); // Extract all reference sequence entries.

BamReader readerN;
// attempt to open the BamReader
if ( !readerN.Open(NORMAL) ) {
cerr << "Could not open normal BAM file." << endl;
return -1;
}

bool found = (checkPresenceOfMDtag(readerT) || checkPresenceOfMDtag(readerN));
if(!found && ACTIVE_REGIONS) {
cerr << endl << "--------WARNING--------" << endl;
cerr << "MD tag required to select active regions, but is missing from alignments." << endl;
cerr << "RECOMMENDED ACTION: turn off the active region module (--active-region-off)" << endl;
cerr << "-----------------------" << endl << endl;
}

RefVector references = readerT.GetReferenceData(); // Extract all reference sequence entries.

// run the assembler on each region
try {
Expand Down Expand Up @@ -585,6 +601,8 @@ int main(int argc, char** argv)
exit(0);
}

COMMAND_LINE = buildCommandLine(argc,argv);

cerr.setf(ios::fixed,ios::floatfield);
cerr.precision(1);

Expand Down Expand Up @@ -762,13 +780,29 @@ int main(int argc, char** argv)

if(verbose) { printConfiguration(cerr, filters); }

BamReader reader;
BamReader readerT;
// attempt to open the BamReader
if ( !reader.Open(TUMOR) ) {
cerr << "Could not open BAM file." << endl;
if ( !readerT.Open(TUMOR) ) {
cerr << "Could not open tumor BAM file." << endl;
return -1;
}
RefVector references = reader.GetReferenceData(); // Extract all reference sequence entries.

BamReader readerN;
// attempt to open the BamReader
if ( !readerN.Open(NORMAL) ) {
cerr << "Could not open normal BAM file." << endl;
return -1;
}

bool found = (checkPresenceOfMDtag(readerT) || checkPresenceOfMDtag(readerN));
if(!found && ACTIVE_REGIONS) {
cerr << endl << "--------WARNING--------" << endl;
cerr << "MD tag required to select active regions, but is missing from alignments." << endl;
cerr << "RECOMMENDED ACTION: turn off the active region module (--active-region-off)" << endl;
cerr << "-----------------------" << endl << endl;
}

RefVector references = readerT.GetReferenceData(); // Extract all reference sequence entries.

// run the assembler on each region
try {
Expand Down Expand Up @@ -871,6 +905,7 @@ int main(int argc, char** argv)
//merge variant from all threads
cerr << "Merge variants" << endl;
VariantDB_t variantDB; // variants DB
variantDB.setCommandLine(COMMAND_LINE);
for( i=0; i < NUM_THREADS; ++i ) {

tot_skip += assemblers[i]->num_skip;
Expand Down
3 changes: 2 additions & 1 deletion src/Lancet.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@

#include "Microassembler.hh"

string VERSION = "1.0.5 (beta), March 1 2018";
string VERSION = "1.0.6 (beta), April 15 2018";
string COMMAND_LINE;

/**** configuration parameters ****/
int NUM_THREADS = 1;
Expand Down
17 changes: 8 additions & 9 deletions src/Variant.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,21 +199,20 @@ double Variant_t::compute_FET_score() {

// fisher exaxt test (FET) score for tumor/normal coverages
prob = fet.kt_fisher_exact((ref_cov_normal_fwd+ref_cov_normal_rev), (ref_cov_tumor_fwd+ref_cov_tumor_rev), (alt_cov_normal_fwd+alt_cov_normal_rev), (alt_cov_tumor_fwd+alt_cov_tumor_rev), &left, &right, &twotail);
if(prob == 1) { fet_score = 0.0; }
if(prob == 1.0) { fet_score = 0.0; }
else if(prob == 0.0) { fet_score = -10.0*log10(1/std::numeric_limits<double>::max()); }
else { fet_score = -10.0*log10(prob); }

/*
cerr << endl;
cerr << "FET score: " << fet_score << " (p = " << prob << ")" << endl;
cerr << "FET score left: " << fet_score_left << " (p = " << left << ")" << endl;
cerr << "FET score right: " << fet_score_right << " (p = " << right << ")" << endl;
cerr << "FET score twotail: " << fet_score_twotail << " (p = " << twotail << ")" << endl;
*/
//cerr << endl;
//cerr.precision(100);
//cerr << "FET score left: " << fet_score_left << " (p = " << left << ")" << endl;
//cerr << "FET score right: " << fet_score_right << " (p = " << right << ")" << endl;
//cerr << "FET score twotail: " << fet_score_twotail << " (p = " << twotail << ")" << endl;

return fet_score;
}

// compute fisher exaxt test score for strand bias (SB) in tumor
// compute fisher exact test score for strand bias (SB) in tumor
double Variant_t::compute_SB_score() {

double prob = 0.0;
Expand Down
2 changes: 2 additions & 0 deletions src/Variant.hh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

#include <string>
#include <iostream>
#include <limits>
#include <cfloat>
#include "util.hh"
#include "FET.hh"

Expand Down
1 change: 1 addition & 0 deletions src/VariantDB.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ void VariantDB_t::printHeader(const string version, const string reference, char
cout << "##fileformat=VCFv4.2\n"
"##fileDate=" << date << ""
"##source=lancet " << version << "\n"
"##cmdline=" << command_line << "\n"
"##reference=" << reference << "\n"
"##INFO=<ID=FETS,Number=1,Type=Float,Description=\"phred-scaled p-value of the Fisher's exact test for tumor-normal allele counts (right-sided)\">\n"
"##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic mutation\">\n"
Expand Down
2 changes: 2 additions & 0 deletions src/VariantDB.hh
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@ public:

map<string,Variant_t> DB; // databbase of variants
unordered_map<string,int> nCNT; // counts of variants per postion in the normal
string command_line; // command line used to run the tool

VariantDB_t() {}

void setCommandLine(string cl) { command_line = cl; }
void addVar(Variant_t v);
void printHeader(const string version, const string reference, char * date, Filters &fs, string &sample_name_N, string &sample_name_T);
void printToVCF(const string version, const string reference, char * date, Filters &fs, string &sample_name_N, string &sample_name_T);
Expand Down
26 changes: 26 additions & 0 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@
using namespace std;


// build the command line used to run the program
string buildCommandLine(int argc, char** argv) {
string command;

for(int i=0;i<argc;i++) {
command += string(argv[i]).append(" ");
}

return command;
}

// return file name without extension
StringType GetBaseFilename(const char *filename)
{
Expand Down Expand Up @@ -363,6 +374,21 @@ bool seqAboveQual(string qv, int Q)
return true;
}


// check for presence of MD tag in alignments, returns false if missing.
bool checkPresenceOfMDtag(BamReader &reader) {
BamAlignment al;
bool ans = true;

if(reader.GetNextAlignment(al)) {
//cerr << "Name: " << al.Name << endl;
ans = al.HasTag("MD"); // get string of mismatching positions
}
//else { cerr << "No valid alignment found!!" << endl; }

return ans;
}

// parse MD string
// extract SNVs locations and add them to map M
//////////////////////////////////////////////////////////////////////////
Expand Down
6 changes: 6 additions & 0 deletions src/util.hh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
#include <assert.h>
#include <map>

#include "api/BamReader.h"

using namespace BamTools;

#ifdef UNICODE //Test to see if we're using wchar_ts or not.
typedef std::wstring StringType;
#else
Expand Down Expand Up @@ -60,6 +64,7 @@
#define HASHMAP std
#endif

std::string buildCommandLine(int argc, char** argv);
StringType GetBaseFilename(const char *filename);
FILE * xfopen(const std::string & filename, const std::string & mode);
void xfclose(FILE * fp);
Expand All @@ -77,6 +82,7 @@ bool isRepeat(const std::string & seq, int K);
bool isAlmostRepeat(const std::string & seq, int K, int max);
bool kMismatch(size_t s, size_t e, const std::string & t, size_t start, int max);
bool seqAboveQual(std::string qv, int Q);
bool checkPresenceOfMDtag(BamReader &reader);
void parseMD(std::string & md, std::map<int,int> & map, int start, std::string & qual, int min_qv);
bool findTandems(const std::string & seq, const std::string & tag, int max_unit_len, int min_report_units, int min_report_len, int dist_from_str, int pos, int & len, std::string & motif);

Expand Down

0 comments on commit 80905c6

Please sign in to comment.