Skip to content

Commit

Permalink
Merge pull request #277 from waveygang/map-chunk-query
Browse files Browse the repository at this point in the history
Map chunk query
  • Loading branch information
ekg authored Oct 13, 2024
2 parents 7eb3c4b + abda8f3 commit 1460a98
Show file tree
Hide file tree
Showing 11 changed files with 1,135 additions and 544 deletions.
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,11 @@ if (${CMAKE_BUILD_TYPE} MATCHES Generic)
endif ()

if (${CMAKE_BUILD_TYPE} MATCHES Debug)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O -g -fsanitize=address")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O -g -fsanitize=address")
# Enable debug symbols and ASan with no optimizations
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0 -g -fsanitize=address -fno-omit-frame-pointer")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fsanitize=address -fno-omit-frame-pointer")
# Ensure that ASan is linked explicitly
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
else()
set (CMAKE_C_FLAGS "${OpenMP_C_FLAGS} ${PIC_FLAG} ${EXTRA_FLAGS}")
set (CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS} ${PIC_FLAG} ${EXTRA_FLAGS}")
Expand Down
2 changes: 1 addition & 1 deletion src/common/progress.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class ProgressMeter {
//std::cerr << input_seconds << " seconds is " << days << " days, " << hours << " hours, " << minutes << " minutes, and " << seconds << " seconds." << std::endl;
}
void increment(const uint64_t& incr) {
completed += incr;
completed.fetch_add(incr, std::memory_order_relaxed);
}
};

Expand Down
36 changes: 22 additions & 14 deletions src/common/seqiter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void for_each_seq_in_file(
}
}

void for_each_seq_in_file(
void for_each_seq_in_faidx_t(
faidx_t* fai,
const std::vector<std::string>& seq_names,
const std::function<void(const std::string&, const std::string&)>& func) {
Expand All @@ -119,6 +119,15 @@ void for_each_seq_in_file(
}
}
}

void for_each_seq_in_file(
const std::string& filename,
const std::vector<std::string>& seq_names,
const std::function<void(const std::string&, const std::string&)>& func) {
faidx_t* fai = fai_load(filename.c_str());
for_each_seq_in_faidx_t(fai, seq_names, func);
fai_destroy(fai);
}

void for_each_seq_in_file_filtered(
const std::string& filename,
Expand All @@ -135,23 +144,22 @@ void for_each_seq_in_file_filtered(
int num_seqs = faidx_nseq(fai);
for (int i = 0; i < num_seqs; i++) {
const char* seq_name = faidx_iseq(fai, i);
bool prefix_skip = true;
for (const auto& prefix : query_prefix) {
if (strncmp(seq_name, prefix.c_str(), prefix.size()) == 0) {
prefix_skip = false;
break;
}
}
if (!query_prefix.empty() && prefix_skip) {
continue;
bool keep = false;
for (const auto& prefix : query_prefix) {
if (strncmp(seq_name, prefix.c_str(), prefix.size()) == 0) {
keep = true;
break;
}
}
if (query_list.empty() || query_list.count(seq_name)) {
keep = true;
}
if (!query_list.empty() && query_list.count(seq_name) == 0) {
continue;
if (keep) {
query_seq_names.push_back(seq_name);
}
query_seq_names.push_back(seq_name);
}

for_each_seq_in_file(
for_each_seq_in_faidx_t(
fai,
query_seq_names,
func);
Expand Down
14 changes: 1 addition & 13 deletions src/interface/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,10 @@ int main(int argc, char** argv) {
std::cerr << "[wfmash::map] Spaced seed sensitivity " << sps.sensitivity << std::endl;
}

//Build the sketch for reference
skch::Sketch referSketch(map_parameters);

std::chrono::duration<double> timeRefSketch = skch::Time::now() - t0;
std::cerr << "[wfmash::map] time spent computing the reference index: " << timeRefSketch.count() << " sec" << std::endl;

if (referSketch.minmerIndex.size() == 0)
{
std::cerr << "[wfmash::map] ERROR, reference sketch is empty. Reference sequences shorter than the segment length are not indexed" << std::endl;
return 1;
}

//Map the sequences in query file
t0 = skch::Time::now();

skch::Map mapper = skch::Map(map_parameters, referSketch);
skch::Map mapper = skch::Map(map_parameters);

std::chrono::duration<double> timeMapQuery = skch::Time::now() - t0;
std::cerr << "[wfmash::map] time spent mapping the query: " << timeMapQuery.count() << " sec" << std::endl;
Expand Down
12 changes: 12 additions & 0 deletions src/interface/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ void parse_args(int argc,
args::ValueFlag<std::string> mashmap_index(mapping_opts, "FILE", "Use MashMap index in FILE, create if it doesn't exist", {"mm-index"});
args::Flag create_mashmap_index_only(mapping_opts, "create-index-only", "Create only the index file without performing mapping", {"create-index-only"});
args::Flag overwrite_mashmap_index(mapping_opts, "overwrite-mm-index", "Overwrite MashMap index if it exists", {"overwrite-mm-index"});
args::ValueFlag<std::string> index_by(mapping_opts, "SIZE", "Set the target total size of sequences for each index subset", {"index-by"});

args::Group alignment_opts(parser, "[ Alignment Options ]");
args::ValueFlag<std::string> align_input_paf(alignment_opts, "FILE", "derive precise alignments for this input PAF", {'i', "input-paf"});
Expand Down Expand Up @@ -649,6 +650,17 @@ void parse_args(int argc,
map_parameters.overwrite_index = overwrite_mashmap_index;
map_parameters.create_index_only = create_mashmap_index_only;

if (index_by) {
const int64_t index_size = wfmash::handy_parameter(args::get(index_by));
if (index_size <= 0) {
std::cerr << "[wfmash] ERROR, skch::parseandSave, index-by size must be a positive integer." << std::endl;
exit(1);
}
map_parameters.index_by_size = index_size;
} else {
map_parameters.index_by_size = std::numeric_limits<size_t>::max(); // Default to indexing all sequences
}

if (approx_mapping) {
map_parameters.outFileName = "/dev/stdout";
yeet_parameters.approx_mapping = true;
Expand Down
20 changes: 12 additions & 8 deletions src/map/include/base_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ namespace skch
{
std::string name; //Name of the sequence
offset_t len; //Length of the sequence
int groupId; //Group ID for the sequence
};

//Label tags for strand information
Expand Down Expand Up @@ -208,13 +209,16 @@ namespace skch

typedef std::vector<MappingResult> MappingResultsVector_t;

//Vector type for storing MinmerInfo
typedef std::vector<MinmerInfo> MinVec_Type;

//Container to save copy of kseq object
struct InputSeqContainer
{
seqno_t seqCounter; //sequence counter
seqno_t seqId; //sequence id
offset_t len; //sequence length
std::string seq; //sequence string
std::string seqName; //sequence id
std::string name; //sequence name


/*
Expand All @@ -223,11 +227,11 @@ namespace skch
* @param[in] kseq_id sequence id name
* @param[in] len length of sequence
*/
InputSeqContainer(const std::string& s, const std::string& id, seqno_t seqcount)
: seqCounter(seqcount)
InputSeqContainer(const std::string& s, const std::string& name, seqno_t id)
: seqId(id)
, len(s.length())
, seq(s)
, seqName(id) { }
, name(name) { }
};

struct InputSeqProgContainer : InputSeqContainer
Expand All @@ -242,8 +246,8 @@ namespace skch
* @param[in] kseq_id sequence id name
* @param[in] len length of sequence
*/
InputSeqProgContainer(const std::string& s, const std::string& id, seqno_t seqcount, progress_meter::ProgressMeter& pm)
: InputSeqContainer(s, id, seqcount)
InputSeqProgContainer(const std::string& s, const std::string& name, seqno_t id, progress_meter::ProgressMeter& pm)
: InputSeqContainer(s, name, id)
, progress(pm) { }
};

Expand All @@ -267,7 +271,7 @@ namespace skch
struct QueryMetaData
{
char *seq; //query sequence pointer
seqno_t seqCounter; //query sequence counter
seqno_t seqId; //query sequence id
offset_t len; //length of this query sequence
offset_t fullLen; //length of the full sequence it derives from
int sketchSize; //sketch size
Expand Down
Loading

0 comments on commit 1460a98

Please sign in to comment.