From 35c0ef206a078ed3146baacebf608cb240498865 Mon Sep 17 00:00:00 2001 From: "Erik Garrison (aider)" Date: Wed, 20 Nov 2024 11:47:53 -0600 Subject: [PATCH] feat: Add logging for target subset statistics during mapping --- src/map/include/computeMap.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/map/include/computeMap.hpp b/src/map/include/computeMap.hpp index b7f69cd4..727de1ba 100644 --- a/src/map/include/computeMap.hpp +++ b/src/map/include/computeMap.hpp @@ -533,6 +533,19 @@ namespace skch std::vector> target_subsets = createTargetSubsets(targetSequenceNames); + // Calculate and log subset statistics + uint64_t total_subset_size = 0; + for (const auto& subset : target_subsets) { + for (const auto& seqName : subset) { + seqno_t seqId = idManager->getSequenceId(seqName); + total_subset_size += idManager->getSequenceLength(seqId); + } + } + double avg_subset_size = target_subsets.size() ? (double)total_subset_size / target_subsets.size() : 0; + std::cerr << "[wfmash::mashmap] Target subsets: " << target_subsets.size() + << ", target size: " << param.index_by_size << "bp" + << ", average size: " << std::fixed << std::setprecision(0) << avg_subset_size << "bp" << std::endl; + std::unordered_map combinedMappings; // Build index for the current subset