Skip to content

Commit

Permalink
create all the indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
ekg committed Oct 12, 2024
1 parent d602127 commit 6fe2be0
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 46 deletions.
6 changes: 6 additions & 0 deletions src/map/include/computeMap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,12 @@ namespace skch
// For each subset of target sequences
uint64_t subset_count = 0;
for (const auto& target_subset : target_subsets) {
std::cerr << "processing subset " << subset_count << " of " << target_subsets.size() << std::endl;
std::cerr << "entries: ";
for (const auto& seqName : target_subset) {
std::cerr << seqName << " ";
}
std::cerr << std::endl;
if (target_subset.empty()) {
continue; // Skip empty subsets
}
Expand Down
62 changes: 16 additions & 46 deletions src/map/include/winSketch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,64 +146,32 @@ namespace skch
std::cerr << "[mashmap::skch::Sketch] Initializing Sketch..." << std::endl;

// Calculate total sequence length
/*
for (const auto& fileName : param.refSequences) {
std::cerr << "targets are " << targets.size() << " ";
for (const auto& target : targets) {
std::cerr << target << " ";
}
std::cerr << std::endl;
seqiter::for_each_seq_in_file(
fileName,
targets,
[&](const std::string& seq_name, const std::string& seq) {
total_seq_length += seq.length();
});
}
*/

if (param.indexFilename.empty()
|| !stdfs::exists(param.indexFilename)
|| param.overwrite_index)
this->build(true, targets);
this->computeFreqHist();
this->computeFreqSeedSet();
this->dropFreqSeedSet();
this->hashFreq.clear();
if (!param.indexFilename.empty())
{
/*
std::atomic<bool> reader_done(false);
std::atomic<bool> workers_done(false);
progress_meter::ProgressMeter progress(total_seq_length, "[mashmap::skch::Sketch::initialize] indexed");
std::thread reader([&]() {
reader_thread(targets, reader_done);
});
std::vector<std::thread> workers; workers.reserve(param.threads);
for (int i = 0; i < param.threads; ++i) {
workers.emplace_back([&]() {
worker_thread(reader_done, progress);
});
}
std::thread writer([&]() {
writer_thread(workers_done, progress);
});
reader.join();
for (auto& worker : workers) {
worker.join();
}
workers_done.store(true);
writer.join();
*/

this->build(true, targets);
this->computeFreqHist();
this->computeFreqSeedSet();
this->dropFreqSeedSet();
this->hashFreq.clear();
if (!param.indexFilename.empty())
{
this->writeIndex();
}
if (param.create_index_only)
{
std::cerr << "[mashmap::skch::Sketch] Index created successfully. Exiting." << std::endl;
exit(0);
}
} else {
this->readIndex();
}

std::cerr << "[mashmap::skch::Sketch] Unique minmer hashes after pruning = " << (minmerPosLookupIndex.size() - this->frequentSeeds.size()) << std::endl;
std::cerr << "[mashmap::skch::Sketch] Total minmer windows after pruning = " << minmerIndex.size() << std::endl;
std::cerr << "[mashmap::skch::Sketch] Number of sequences = " << idManager.size() << std::endl;
Expand Down Expand Up @@ -302,6 +270,7 @@ namespace skch
std::chrono::time_point<std::chrono::system_clock> t0 = skch::Time::now();

if (compute_seeds) {
std::cerr << "creating seeds" << std::endl;

//Create the thread pool
ThreadPool<InputSeqContainer, MI_Type> threadPool([this](InputSeqContainer* e) { return buildHelper(e); }, param.threads);
Expand All @@ -316,6 +285,7 @@ namespace skch
fileName,
target_names,
[&](const std::string& seq_name, const std::string& seq) {
std::cerr << "on sequence " << seq_name << std::endl;
if (seq.length() >= param.segLength) {
seqno_t seqId = idManager.getSequenceId(seq_name);
threadPool.runWhenThreadAvailable(new InputSeqContainer(seq, seq_name, seqId));
Expand Down

0 comments on commit 6fe2be0

Please sign in to comment.