Skip to content

Commit

Permalink
预估的线程大小
Browse files Browse the repository at this point in the history
  • Loading branch information
eric committed Jan 23, 2024
1 parent c8c5473 commit e96befa
Showing 1 changed file with 19 additions and 15 deletions.
34 changes: 19 additions & 15 deletions kr2r/src/bin/estimate_capacity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,12 @@ use clap::{error::ErrorKind, Error, Parser};
use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
use kr2r::mmscanner::{MinimizerScanner, BITS_PER_CHAR, DEFAULT_SPACED_SEED_MASK};
use kr2r::utils::{expand_spaced_seed_mask, find_library_fna_files};
use kr2r::KBuildHasher;
use kr2r::{murmur_hash3, KBuildHasher};
use seq_io::fasta::{Reader, Record};
use seq_io::parallel::read_parallel;
use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::{
atomic::{AtomicUsize, Ordering},
Arc, Mutex,
};
use std::sync::atomic::{AtomicUsize, Ordering};

#[derive(Parser, Debug)]
#[clap(
Expand Down Expand Up @@ -56,6 +53,9 @@ fn parse_binary(src: &str) -> Result<u64, std::num::ParseIntError> {
u64::from_str_radix(src, 2)
}

const RANGE_SECTIONS: u64 = 1024;
const RANGE_MASK: u64 = RANGE_SECTIONS - 1;

fn main() {
let mut args = Args::parse();
if args.k_mer < args.l_mer as u64 {
Expand All @@ -67,11 +67,12 @@ fn main() {
expand_spaced_seed_mask(args.spaced_seed_mask, BITS_PER_CHAR as u64);
}
let fna_files = find_library_fna_files(args.database);
let hllp: HyperLogLogPlus<u64, _> = HyperLogLogPlus::new(16, KBuildHasher::default()).unwrap();
let mut hllp: HyperLogLogPlus<u64, _> =
HyperLogLogPlus::new(16, KBuildHasher::default()).unwrap();

let hllp = Arc::new(Mutex::new(hllp));
let counter = Arc::new(AtomicUsize::new(0)); // 初始化原子计数器
let counter = AtomicUsize::new(0); // 初始化原子计数器

// let sets: HashSet<u64> = HashSet::new();
for fna_file in fna_files {
println!("fna_file {:?}", fna_file);
let reader = Reader::from_path(fna_file).unwrap();
Expand All @@ -81,7 +82,7 @@ fn main() {
read_parallel(
reader,
args.threads as u32,
args.threads / 2 as usize,
args.threads - 2 as usize,
|record_set| {
let mut count = 0;
let mut scanner = MinimizerScanner::default(k_mer, l_mer);
Expand All @@ -95,18 +96,21 @@ fn main() {
scanner.set_seq_end(seq);
while let Some(minimizer) = scanner.next_minimizer(seq) {
count += 1;
minimizer_set.insert(minimizer);
let hash_v = murmur_hash3(minimizer);
if hash_v & RANGE_MASK < args.n as u64 {
minimizer_set.insert(minimizer);
}
}
scanner.reset();
}
(minimizer_set, count)
},
|record_sets| {
while let Some(Ok((_, (m_set, count)))) = record_sets.next() {
let mut hllp_clone = hllp.lock().unwrap();
for minimizer in m_set {
hllp_clone.insert(&minimizer);
hllp.insert(&minimizer);
}
// sets.extend(m_set);

counter.fetch_add(count, Ordering::SeqCst);
}
Expand All @@ -116,8 +120,8 @@ fn main() {

let final_count = counter.load(Ordering::SeqCst); // 读取计数器的最终值

let mut hllp_clone = hllp.lock().unwrap();
let hllp_count = hllp_clone.count();
let hllp_count = hllp.count();
// println!("sets {:?}", sets.len() * 1024 / args.n);
println!("Final count: {:?}", final_count);
println!("HLLP count: {:?}", hllp_count);
println!("HLLP count: {:?}", hllp_count * 1024f64 / args.n as f64);
}

0 comments on commit e96befa

Please sign in to comment.