From 36286d8cb1251ffac4b838a4a460fe5a0c4f70c0 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 01/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } --- minimap2-sys/Cargo.toml | 2 +- minimap2-sys/src/bindings.rs | 2 +- minimap2-sys/src/lib.rs | 7 ++++++ src/lib.rs | 43 +++++++++++++++++++++++++++++++----- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/minimap2-sys/Cargo.toml b/minimap2-sys/Cargo.toml index 17cf0eb..463a49e 100644 --- a/minimap2-sys/Cargo.toml +++ b/minimap2-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "minimap2-sys" -version = "0.1.19+minimap2.2.28" +version = "0.1.20+minimap2.2.28" edition = "2021" links = "libminimap2" authors = ["Joseph Guhlin "] diff --git a/minimap2-sys/src/bindings.rs b/minimap2-sys/src/bindings.rs index ed56e1f..a722564 100644 --- a/minimap2-sys/src/bindings.rs +++ b/minimap2-sys/src/bindings.rs @@ -4686,7 +4686,7 @@ fn bindgen_test_layout_mm_idx_seq_t() { ); } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] pub struct mm_idx_t { pub b: i32, pub w: i32, diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index e5640ec..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -12,6 +12,13 @@ unsafe impl Send for mm_idx_t {} unsafe impl Send for mm_idx_reader_t {} unsafe impl Send for mm_mapopt_t {} +impl Drop for mm_idx_t { + fn drop(&mut self) { + unsafe { mm_idx_destroy(self) }; + } +} + + use std::mem::MaybeUninit; impl Default for mm_mapopt_t { diff --git a/src/lib.rs b/src/lib.rs index cc0729f..93ea145 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -266,7 +266,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone)] +#[derive(Clone, Copy)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, @@ -278,10 +278,10 @@ pub struct Aligner { pub threads: usize, /// Index created by minimap2 - pub idx: Option<*mut mm_idx_t>, + pub idx: Option>, /// Index reader created by minimap2 - pub idx_reader: Option, + pub idx_reader: Option>, /// Whether to add soft clipping to CIGAR result pub cigar_clipping: bool, @@ -871,7 +871,7 @@ impl Aligner { mm_reg = MaybeUninit::new(unsafe { mm_map( - self.idx.unwrap() as *const mm_idx_t, + self.idx.unwrap().as_ref() as *const mm_idx_t, seq.len() as i32, seq.as_ptr() as *const ::std::os::raw::c_char, &mut n_regs, @@ -990,7 +990,7 @@ impl Aligner { km, &mut cs_string, &mut m_cs_string, - self.idx.unwrap() as *const mm_idx_t, + self.idx.unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, true.into(), @@ -1009,7 +1009,7 @@ impl Aligner { km, &mut cs_string, &mut m_cs_string, - self.idx.unwrap() as *const mm_idx_t, + self.idx.unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, ); @@ -1935,4 +1935,35 @@ mod tests { ..sr }; } + + #[test] + fn double_free_test_index_load_before_threads() { + // Create a new aligner + let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); + + // Perform a test mapping to ensure the index is loaded and all + let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert(mappings.len() > 0); + + // Spawn two threads using thread scoped aligners, clone aligner + std::thread::scope(|s| { + let aligner = aligner.clone(); + let jh0 = s.spawn(|| { + let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + + let aligner = aligner.clone(); + let jh1 = s.spawn(|| { + std::thread::sleep(std::time::Duration::from_millis(200)); + let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + + jh0.join().unwrap(); + jh1.join().unwrap(); + }); + } } From 24b8217f59e271cdf3e9fb271d751c32b0810218 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 02/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + src/lib.rs | 163 ++++++++++++++++++++++++++++++---------- 2 files changed, 126 insertions(+), 38 deletions(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } diff --git a/src/lib.rs b/src/lib.rs index 93ea145..a8a93ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -266,7 +266,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone, Copy)] +#[derive(Clone)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, @@ -278,7 +278,7 @@ pub struct Aligner { pub threads: usize, /// Index created by minimap2 - pub idx: Option>, + pub idx: Option>, /// Index reader created by minimap2 pub idx_reader: Option>, @@ -679,7 +679,8 @@ impl Aligner { mm_idx_index_name(idx.assume_init()); } - self.idx = Some(unsafe { idx.assume_init() }); + let mm_idx = unsafe { idx.assume_init() }; + self.idx = Some(Arc::new(mm_idx)); Ok(()) } @@ -786,7 +787,8 @@ impl Aligner { ) }); - self.idx = Some(unsafe { idx.assume_init() }); + let mm_idx = unsafe { idx.assume_init() }; + self.idx = Some(Arc::new(mm_idx)); self.mapopt.mid_occ = 1000; Ok(self) @@ -871,7 +873,7 @@ impl Aligner { mm_reg = MaybeUninit::new(unsafe { mm_map( - self.idx.unwrap().as_ref() as *const mm_idx_t, + &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, seq.len() as i32, seq.as_ptr() as *const ::std::os::raw::c_char, &mut n_regs, @@ -880,6 +882,7 @@ impl Aligner { qname, ) }); + let mut mappings = Vec::with_capacity(n_regs as usize); for i in 0..n_regs { @@ -888,8 +891,10 @@ impl Aligner { let const_ptr = reg_ptr as *const mm_reg1_t; let reg: mm_reg1_t = *reg_ptr; - let contig: *mut ::std::os::raw::c_char = - (*((*(self.idx.unwrap())).seq.offset(reg.rid as isize))).name; + let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); + let contig = std::ffi::CStr::from_ptr( + (*(**idx).seq.offset(reg.rid as isize)).name, + ); let is_primary = reg.parent == reg.id && (reg.sam_pri() > 0); let is_supplementary = (reg.parent == reg.id) && (reg.sam_pri() == 0); @@ -990,7 +995,7 @@ impl Aligner { km, &mut cs_string, &mut m_cs_string, - self.idx.unwrap().as_ref() as *const mm_idx_t, + &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, true.into(), @@ -1009,7 +1014,7 @@ impl Aligner { km, &mut cs_string, &mut m_cs_string, - self.idx.unwrap().as_ref() as *const mm_idx_t, + &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, ); @@ -1040,16 +1045,17 @@ impl Aligner { }; let target_name_arc = Arc::new( - std::ffi::CStr::from_ptr(contig) + std::ffi::CStr::from_ptr(contig.as_ptr()) .to_str() .unwrap() .to_string(), ); + let target_len = (*(**idx).seq.offset(reg.rid as isize)).len as i32; + mappings.push(Mapping { target_name: Some(Arc::clone(&target_name_arc)), - target_len: (*((*(self.idx.unwrap())).seq.offset(reg.rid as isize))).len - as i32, + target_len, target_start: reg.rs, target_end: reg.re, query_name: query_name_arc.clone(), @@ -1153,20 +1159,6 @@ mod send { unsafe impl Send for Aligner {} } -/* TODO: This stopped working when we switched to not storing raw pointers but the structs themselves -/ Since Rust is now handling the structs, I think memory gets freed that way, maybe this is no longer -/ necessary? -/ TODO: Test for memory leaks -*/ -impl Drop for Aligner { - fn drop(&mut self) { - if self.idx.is_some() { - let idx = self.idx.take().unwrap(); - unsafe { mm_idx_destroy(idx) }; - } - } -} - #[derive(PartialEq, Eq)] pub enum FileFormat { FASTA, @@ -1827,20 +1819,20 @@ mod tests { Err("File does not exist") ); - if let Err("File is empty") = Aligner::builder().with_index("test_data/empty.fa", None) + if let Err("Index File is empty") = Aligner::builder().with_index("test_data/empty.fa", None) { println!("File is empty - Success"); } else { panic!("File is empty error not thrown"); } - if let Err("Invalid Path") = Aligner::builder().with_index("\0invalid_\0path\0", None) { + if let Err("Invalid Path for Index") = Aligner::builder().with_index("\0invalid_\0path\0", None) { println!("Invalid Path - Success"); } else { panic!("Invalid Path error not thrown"); } - if let Err("Invalid Output") = + if let Err("Invalid Output for Index") = Aligner::builder().with_index("test_data/MT-human.fa", Some("test\0test")) { println!("Invalid output - Success"); @@ -1937,33 +1929,128 @@ mod tests { } #[test] - fn double_free_test_index_load_before_threads() { + fn double_free_index_test() { + // Create a new aligner + println!("Creating aligner"); + let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); + println!("Aligner created"); + + // Perform a test mapping to ensure the index is loaded and all + let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + + println!("Going into threads"); + + // Spawn two threads using thread scoped aligners, clone aligner + std::thread::scope(|s| { + let aligner_ = aligner.clone(); + + // Confirm that aligner_ idx points to the same memory as aligner idx arc + assert_eq!(Arc::as_ptr(aligner.idx.as_ref().unwrap()), Arc::as_ptr(aligner_.idx.as_ref().unwrap())); + + // Confirm we have a strong count of 2 + assert_eq!(Arc::strong_count(&aligner.idx.as_ref().unwrap()), 2); + + let jh0 = s.spawn(move || { + let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + + let aligner_ = aligner.clone(); + let jh1 = s.spawn(move || { + std::thread::sleep(std::time::Duration::from_millis(200)); + let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + + jh0.join().unwrap(); + jh1.join().unwrap(); + }); + + println!("Past the first one"); + // Create a new aligner let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); // Perform a test mapping to ensure the index is loaded and all let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); - assert(mappings.len() > 0); + assert!(mappings.len() > 0); // Spawn two threads using thread scoped aligners, clone aligner std::thread::scope(|s| { - let aligner = aligner.clone(); - let jh0 = s.spawn(|| { - let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + let aligner0 = aligner.clone(); + let aligner1 = aligner.clone(); + + // Force drop logic + drop(aligner); + + let jh0 = s.spawn(move || { + println!("First thread"); + let mappings = aligner0.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); // Sleep 100ms std::thread::sleep(std::time::Duration::from_millis(100)); + println!("First thread done"); }); - let aligner = aligner.clone(); - let jh1 = s.spawn(|| { + // Join, to force drop logic from external thread + jh0.join().unwrap(); + + let jh1 = s.spawn(move || { + println!("Second thread"); std::thread::sleep(std::time::Duration::from_millis(200)); - let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + let mappings = aligner1.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); // Sleep 100ms std::thread::sleep(std::time::Duration::from_millis(100)); + assert!(Arc::strong_count(&aligner1.idx.as_ref().unwrap()) == 1); + println!("Second thread done"); }); + jh1.join().unwrap(); + }); + + println!("Moving to the third test"); + + // Finally with no test mapping + // Create a new aligner + let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); + + // Spawn two threads using thread scoped aligners, clone aligner + std::thread::scope(|s| { + let aligner0 = aligner.clone(); + let aligner1 = aligner.clone(); + + // Force drop logic + drop(aligner); + + let jh0 = s.spawn(move || { + println!("First thread - No mapping"); + let mappings = aligner0.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + + // Join, to force drop logic from external thread jh0.join().unwrap(); + + let jh1 = s.spawn(move || { + println!("Second thread - No mapping"); + std::thread::sleep(std::time::Duration::from_millis(200)); + let mappings = aligner1.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + jh1.join().unwrap(); - }); + }); + + } } From b843935084526e888e9d447d0286958c3bffaf31 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:52:22 +1300 Subject: [PATCH 03/36] htslib working with new arc, } impl MMIndex { pub fn n_seq(&self) -> u32 { - self.inner.n_seq + unsafe { (**self.inner).n_seq } } pub fn seqs(&self) -> Vec { let mut seqs: Vec = Vec::with_capacity(self.n_seq() as usize); for i in 0..self.n_seq() { - let _seq = unsafe { *(self.inner.seq).offset(i as isize) }; - let c_str = unsafe { ffi::CStr::from_ptr(_seq.name) }; + let _seq = unsafe { *(**self.inner).seq.offset(i as isize) }; + // let c_str = unsafe { ffi::CStr::from_ptr(_seq.name) }; + let c_str = unsafe { CStr::from_ptr(_seq.name) }; let rust_str = c_str.to_str().unwrap().to_string(); seqs.push(SeqMetaData { name: rust_str, @@ -344,7 +346,7 @@ impl From<&Aligner> for MMIndex { fn from(aligner: &Aligner) -> Self { MMIndex { // inner: aligner.idx.unwrap(), - inner: unsafe { **aligner.idx.as_ref().unwrap() }, + inner: std::sync::Arc::clone(&aligner.idx.as_ref().unwrap()) } } } From a92b02b13e0f0f9edd6b3f4fc4b639eebfe3fdac Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 04/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 5fb0d72ab1555b913acf591a460720c834201e2b Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 05/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index a8a93ff..71a9e80 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -266,7 +266,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone)] +#[derive(Clone, Copy)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, From e6280e247c8ca47796b2599835bf51a41166c549 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 06/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 958acdca17b2ea3bc8219e598f781548f678e9fa Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 07/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From fe349ec21551f9a257c3314ce50f81e7d7871120 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 08/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } From 8df880010ce88c9bbebf4054be7ff70f743b60e9 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 09/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From becf8f669fe6e8a33d9bb7dd65d59f15df4257f8 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 10/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 264725c4734570196c08cf5ea3907faf4b3ff41c Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 11/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } From 3cf26047a019c80ac9fb4f7d9d95aa21f8d3ee4b Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 12/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From d29cf8fe23f00cc17fa44d1f7c6bb7ed1c80a556 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 13/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 82f1d38f9a571da44af63492cf54ead8a8a975cf Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 14/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } From 2e9dda0f572af6f16946976e5361350f49775858 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 15/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 036532d6935a7e7650e526377dabdd4233b2f371 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 16/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 7eaf442dc0cb4d07cf1021c42fa11fc665b13705 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 17/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } From fe85c6e625286ff1ca7e7f10538e6b9e1899240c Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 18/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From a60a985a345b9f7c206b8ca374138a5c57957908 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 19/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 36ab013cbdd20a7da5d6775fb6b00a6567e884ad Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 14:40:12 +1300 Subject: [PATCH 20/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } --- .gitignore | 1 + Cargo.toml | 5 +++-- README.md | 45 ++++++++++++++++++++++++--------------------- src/lib.rs | 34 +++++++++++++++++++++++++++++++--- 4 files changed, 59 insertions(+), 26 deletions(-) diff --git a/.gitignore b/.gitignore index 4cde53c..2194cfd 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ test_data/*.bam test_data/*.mmi .ipynb_checkpoints .vscode/ +.quarto \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 1feaa74..2e84093 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,13 +50,14 @@ rayon = "1.10" [features] default = ["map-file"] -sse2only = ["minimap2-sys/sse2only"] +map-file = ["needletail"] #, "simdutf8"] htslib = ['rust-htslib'] simde = ["minimap2-sys/simde"] -map-file = ["needletail"] #, "simdutf8"] zlib-ng = ["minimap2-sys/zlib-ng"] curl = ["rust-htslib/curl"] static = ["minimap2-sys/static", "rust-htslib/static"] +sse2only = ["minimap2-sys/sse2only"] + [package.metadata.docs.rs] features = ["map-file", "htslib"] diff --git a/README.md b/README.md index 46f20db..0ff61f8 100644 --- a/README.md +++ b/README.md @@ -6,20 +6,21 @@ A rust FFI library for [minimap2](https://github.com/lh3/minimap2/). In developm [![codecov](https://codecov.io/gh/jguhlin/minimap2-rs/branch/main/graph/badge.svg?token=huw27ZC6Qy)](https://codecov.io/gh/jguhlin/minimap2-rs) # Structure -minimap2-sys is the library of the raw FFI bindings to minimap2. minimap2 is the more rusty version. +minimap2-sys is the raw FFI bindings to minimap2. minimap2 is the more opinionated, rusty version. # How to use ## Requirements ```toml -minimap2 = "0.1.20+minimap2.2.28" +minimap2 = "0.1.21+minimap2.2.28" ``` Also see [Features](#features) -Tested with rustc 1.64.0 and nightly. So probably a good idea to upgrade before running. But let me know if you run into pain points with older versions and will try to fix! +Tested with rustc 1.82.0 and nightly. So probably a good idea to upgrade before running. But let me know if you run into pain points with older versions and will try to fix. ## Minimap2 Version Table | minimap2-rs | minimap2 | |-------------|----------| +| 0.1.21 | 2.28 | | 0.1.20 | 2.28 | | 0.1.19 | 2.28 | | 0.1.18 | 2.28 | @@ -117,11 +118,13 @@ let results = sequences.par_iter().map(|seq| { ## Features The following crate features are available: -* `mm2-fast` - Replace minimap2 with [mm2-fast](https://github.com/bwa-mem2/mm2-fast). This is likely not portable. -* `htslib` - Support output of bam/sam files using htslib. -* `simde` - Compile minimap2 / mm2-fast with [simd-everywhere](https://github.com/simd-everywhere/simde) support. -* `map-file` - *Default* - Convenience function for mapping an entire file. Caution, this is single-threaded. -* `sse2only` - Compiles for SSE2 support only (Default is to try to compile for SSE4.1, SSE2 only is default on aarch64) +* map-file - Enables the ability to map a file directly to a reference. Enabled by deafult +* htslib - Provides an interface to minimap2 that returns rust_htslib::Records +* simde - Enables SIMD Everywhere library in minimap2 +* zlib-ng - Enables the use of zlib-ng for faster compression +* curl - Enables curl for htslib +* static - Builds minimap2 as a static library +* sse2only - Builds minimap2 with only SSE2 support Map-file is a *default* feature and enabled unless otherwise specified. @@ -129,7 +132,7 @@ Map-file is a *default* feature and enabled unless otherwise specified. * setting mismatch penalty for base transitions [minimap 2.27 release notes](https://github.com/lh3/minimap2/releases/tag/v2.27) * Generate ds tags to indicate uncertainty in indels -Potentially more, but I'm using this to keep track. I'd expect those would get implemented over time, but if you have urgent need open a pull request or an issue! Thanks +Potentially others. Please create an issue! ## Building for MUSL Follow these [instructions](https://github.com/rust-cross/rust-musl-cross#prebuilt-images). @@ -141,10 +144,9 @@ alias rust-musl-builder='docker run --rm -it -v "$(pwd)":/home/rust/src messense rust-musl-builder cargo build --release ``` -Please note minimap2 is only tested for x86_64. Other platforms may work, please open an issue if minimap2 compiles but minimap2-rs does not. +Minimap2 is tested on x86_64 and aarch64 (arm64). Other platforms may work, please open an issue if minimap2 compiles but minimap2-rs does not. ### Features tested with MUSL -* `mm2-fast` - **Fail** * `htslib` - **Success** * `simde` - **Success** @@ -153,23 +155,19 @@ Please note minimap2 is only tested for x86_64. Other platforms may work, please - [mappy-rs](https://github.com/Adoni5/mappy-rs) - Drop-in multi-threaded replacement for python's mappy - [HiFiHLA](https://github.com/PacificBiosciences/hifihla) - HLA star-calling tool for PacBio HiFi data - [STRdust](https://github.com/wdecoster/STRdust) - Tandem repeat genotyper for long reads - -# Want feedback -* Many fields are i32 / i8 to mimic the C environment, but would it make more sense to convert to u32 / u8 / usize? -* Let me know pain points! +- [oarfish](https://github.com/COMBINE-lab/oarfish) - transcript quantification from long-read RNA-seq data # Next things todo -* Print other tags so we can have an entire PAF format -* -sys Compile with SSE2 / SSE4.1 (auto-detect, but also make with features) * Multi-thread guide (tokio async threads or use crossbeam queue and traditional threads?) * Iterator interface for map_file -* MORE TESTS -* -sys Get SSE working with "sse" feature (compiles and tests work in -sys crate, but not main crate) * -sys Possible to decouple from pthread? -* -sys Enable Lisa-hash for mm2-fast? But must handle build arguments from the command-line. # Citation -You should cite the minimap2 papers if you use this in your work. +Please cite the appropriate minimap2 papers if you use this in your work, as well as this library. + +## DOI for this library + +## Minimap2 Papers > Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. > *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi] @@ -180,6 +178,11 @@ and/or: > *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2] # Changelog +### 0.1.21 minimap2 2.28 +#### Breaking Changes ++ Map now returns Arc String's to reduce memory allocation for large and/or repetitive jobs ++ map now takes an additional argument, query_name: Option> + ### 0.1.20 minimap2 2.28 + Fix htslib errors. No update to -sys crate needed. diff --git a/src/lib.rs b/src/lib.rs index 71a9e80..ad33f20 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -//! API providing a rusty interface to minimap2 or mm2-fast libraries. +//! API providing a rusty interface to minimap2 //! //! This library supports statically linking and compiling minimap2 directly, no separate install is required. //! @@ -12,10 +12,26 @@ //! # Crate Features //! This crate has multiple create features available. //! * map-file - Enables the ability to map a file directly to a reference. Enabled by deafult -//! * mm2-fast - Uses the mm2-fast library instead of standard minimap2 //! * htslib - Provides an interface to minimap2 that returns rust_htslib::Records //! * simde - Enables SIMD Everywhere library in minimap2 -//! * sse - Enables the use of SSE instructions +//! * zlib-ng - Enables the use of zlib-ng for faster compression +//! * curl - Enables curl for htslib +//! * static - Builds minimap2 as a static library +//! * sse2only - Builds minimap2 with only SSE2 support +//! +//! ## Previously Supported Features +//! * mm2-fast - Uses the mm2-fast library instead of standard minimap2 +//! +//! If needed, this can be re-enabled. +//! +//! # Compile-time options +//! I recommend the following: +//! ```toml +//! [profile.release] +//! opt-level = 3 +//! lto = "fat" +//! codegen-units = 1 +//! ``` //! //! # Examples //! ## Mapping a file to a reference @@ -2050,7 +2066,19 @@ mod tests { jh1.join().unwrap(); }); + } + // Test aligner cloning for flag permanence + #[test] + fn aligner_cloning_flags() { + let aligner = Aligner::builder().map_ont().with_cigar().with_index("yeast_ref.mmi", None).unwrap(); + // Confirm with_cigar is set + // self.mapopt.flag |= MM_F_CIGAR as i64; + assert_eq!(aligner.mapopt.flag & MM_F_CIGAR as i64, MM_F_CIGAR as i64); + // Clone aligner + let aligner_clone = aligner.clone(); + assert_eq!(aligner_clone.mapopt.flag & MM_F_CIGAR as i64, MM_F_CIGAR as i64); } + } From 47aacbf56020116a54c78957d1bc9447fb7a286f Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 14:54:03 +1300 Subject: [PATCH 21/36] Update docs README and doc updates --- README.md | 28 ++++++++++++++++++++++++---- minimap2-sys/README.md | 13 ++++++++++--- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0ff61f8..93aaa68 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Align a sequence: ```rust let seq: Vec = b"ACTGACTCACATCGACTACGACTACTAGACACTAGACTATCGACTACTGACATCGA"; let alignment = aligner - .map(&seq, false, false, None, None) + .map(&seq, false, false, None, None, Some(b"My Sequence Name")) .expect("Unable to align"); ``` @@ -54,6 +54,9 @@ let aligner = map_ont(); let aligner = asm20(); ``` +**Note** Each preset overwrites different arguments. Using multiple at a time is not technically supported, but will work. Results unknown. So be careful! +It's equivalent to running minimap2 -x map_ont -x short ... + ### Customization [MapOpts](https://docs.rs/minimap2-sys/0.1.5/minimap2_sys/struct.mm_mapopt_t.html) and [IdxOpts](https://docs.rs/minimap2-sys/0.1.5/minimap2_sys/struct.mm_idxopt_t.html) can be customized with Rust's struct pattern, as well as applying mapping settings. Inspired by [bevy](https://bevyengine.org/). ```rust @@ -84,7 +87,7 @@ let mut fasta = Fasta::from_buffer(&mut reader) for seq in reader { let seq = seq.unwrap(); let alignment: Vec = aligner - .map(&seq.sequence.unwrap(), false, false, None, None) + .map(&seq.sequence.unwrap(), false, false, None, None, None) .expect("Unable to align"); println!("{:?}", alignment); } @@ -112,7 +115,7 @@ This _appears_ to work. use rayon::prelude::*; let results = sequences.par_iter().map(|seq| { - aligner.map(seq.as_bytes(), false, false, None, None).unwrap() + aligner.map(seq.as_bytes(), false, false, None, None, None).unwrap() }).collect::>(); ``` @@ -179,9 +182,26 @@ and/or: # Changelog ### 0.1.21 minimap2 2.28 +Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB #### Breaking Changes + Map now returns Arc String's to reduce memory allocation for large and/or repetitive jobs -+ map now takes an additional argument, query_name: Option> ++ map now takes an additional argument, query_name: Option>, possibly solves [#75](https://github.com/jguhlin/minimap2-rs/issues/75) (@rob-p @mbhall88 @jguhlin) ++ Arc the Index, to prevent double-frees, solves [#71](https://github.com/jguhlin/minimap2-rs/issues/71) ++ Map file now passes in query name, which should help with [#75](https://github.com/jguhlin/minimap2-rs/issues/75) ++ Supplementary flag now better detected (@rob-p) ++ FIX: Cigar string missing softclip operation (@Sam-Sims) + +### Other Changes ++ Experimental Android support, solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) ++ Better docs on applying presets, solves [#84](https://github.com/jguhlin/minimap2-rs/issues/84) ++ Better detection of target arch c_char's and ptr's, solves [#82](https://github.com/jguhlin/minimap2-rs/issues/82) ++ Support for M1 Mac compilation and addition of github workflows to test it, solving [#81](https://github.com/jguhlin/minimap2-rs/issues/81) ++ Rayon test, so some support, closes [#5](https://github.com/jguhlin/minimap2-rs/issues/5) ++ Static str's and now static CStr's ++ FIX: memory leak due to sequences allocated by minimap2 not being freed @charlesgregory ++ Add Send + Sync to Aligner, along with unit test @PB-DB ++ Only use rust-htslib/curl when curl feature is enabled @PB-DB ++ Mark bam::Record objects as supplementary @PB-DB ### 0.1.20 minimap2 2.28 + Fix htslib errors. No update to -sys crate needed. diff --git a/minimap2-sys/README.md b/minimap2-sys/README.md index cea5ea7..c066a37 100644 --- a/minimap2-sys/README.md +++ b/minimap2-sys/README.md @@ -1,8 +1,8 @@ # System Bindings for libminimap2 -Use this if you need lower-level bindings for minimap2. Also works with mm2-fast. +Use this if you need lower-level bindings for minimap2. # Minimap2 Version -Sync'd to minimap2 2.27 +Minimap2 2.28 ## Breaking Changes ### 0.0.18 @@ -10,14 +10,21 @@ mm2-fast and minimap2 have diverged. At this point mm2-fast is no longer support ## Features * vendored - Regenerate the bindings from the vendored minimap2 source. Requires llvm installed. Useful to update the bindings to a different version of minimap2. -* mm2-fast - Use [mm2-fast](https://github.com/bwa-mem2/mm2-fast) as the backend rather than minimap2 itself. * simde - Enable simde support (SIMD-everywhere) * sse - Enable some sse bindings +* zlib-ng - Use zlib-ng +* static - Static compilation ## TODO * Can we decouple from pthread? This would allow Windows and (possibly) WASM compilation. ## Changelog +### 0.1.20 minimap2.2.28 +* Move Drop impl to -sys crate + +### 0.1.19 minimap2.2.28 +* Update to minimap2 2.28 + ### 0.1.18 minimap2.2.27 * Regenerated bindings * mm2-fast and minimap2 have diverged From 41e4b3ff203ce02249fca41c0bb6458c658d5105 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 22/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } From 362f1da3fc247c6e6b572942a80e814d568ffa4c Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 23/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + src/lib.rs | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } diff --git a/src/lib.rs b/src/lib.rs index ad33f20..c9cb798 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -282,7 +282,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone, Copy)] +#[derive(Clone)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, @@ -2050,6 +2050,8 @@ mod tests { assert!(mappings.len() > 0); // Sleep 100ms std::thread::sleep(std::time::Duration::from_millis(100)); + assert!(Arc::strong_count(&aligner1.idx.as_ref().unwrap()) == 1); + println!("Second thread done"); }); // Join, to force drop logic from external thread From 18c726759ed0328ba0dd8cd13d0196d87ea6fea0 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 24/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From d59b47f65a8fbbbef0a31d788dfd783e4f8d879c Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 25/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c9cb798..5987eb4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -282,7 +282,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone)] +#[derive(Clone, Copy)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, From 750414c24109b4466736d6d76ae3b8719d71807f Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 26/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + src/lib.rs | 12 +----------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } diff --git a/src/lib.rs b/src/lib.rs index 5987eb4..8ebf91f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -282,7 +282,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone, Copy)] +#[derive(Clone)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, @@ -2055,16 +2055,6 @@ mod tests { }); // Join, to force drop logic from external thread - jh0.join().unwrap(); - - let jh1 = s.spawn(move || { - println!("Second thread - No mapping"); - std::thread::sleep(std::time::Duration::from_millis(200)); - let mappings = aligner1.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); - assert!(mappings.len() > 0); - // Sleep 100ms - std::thread::sleep(std::time::Duration::from_millis(100)); - }); jh1.join().unwrap(); }); From 1ad371bdd0fe439c27932bf884ec3e9537c4fe54 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 27/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From 2ea9f8d0b18af2ae93e30a88ceca86f1520bb0b2 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 12:29:15 +1300 Subject: [PATCH 28/36] #71 move drop to minimap2-sys impl Drop for mm_idx_t { fn drop(&mut self) { unsafe { mm_idx_destroy(self) }; } } --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8ebf91f..238dfbe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -282,7 +282,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone)] +#[derive(Clone, Copy)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, From d2f9fb73dd030d8b4518b45d3f754d35c5720686 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:49:30 +1300 Subject: [PATCH 29/36] Fixes double free --- minimap2-sys/src/lib.rs | 1 + src/lib.rs | 74 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 2540154..23b3ed2 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,6 +14,7 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { + println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } diff --git a/src/lib.rs b/src/lib.rs index 238dfbe..5cb3776 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -282,7 +282,7 @@ impl Default for ThreadLocalBuffer { /// Aligner::builder(); /// ``` -#[derive(Clone, Copy)] +#[derive(Clone)] pub struct Aligner { /// Index options passed to minimap2 (mm_idxopt_t) pub idxopt: IdxOpt, @@ -295,6 +295,7 @@ pub struct Aligner { /// Index created by minimap2 pub idx: Option>, + pub idx: Option>, /// Index reader created by minimap2 pub idx_reader: Option>, @@ -695,6 +696,8 @@ impl Aligner { mm_idx_index_name(idx.assume_init()); } + let mm_idx = unsafe { idx.assume_init() }; + self.idx = Some(Arc::new(mm_idx)); let mm_idx = unsafe { idx.assume_init() }; self.idx = Some(Arc::new(mm_idx)); @@ -803,6 +806,8 @@ impl Aligner { ) }); + let mm_idx = unsafe { idx.assume_init() }; + self.idx = Some(Arc::new(mm_idx)); let mm_idx = unsafe { idx.assume_init() }; self.idx = Some(Arc::new(mm_idx)); self.mapopt.mid_occ = 1000; @@ -889,6 +894,7 @@ impl Aligner { mm_reg = MaybeUninit::new(unsafe { mm_map( + &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, seq.len() as i32, seq.as_ptr() as *const ::std::os::raw::c_char, @@ -899,6 +905,7 @@ impl Aligner { ) }); + let mut mappings = Vec::with_capacity(n_regs as usize); for i in 0..n_regs { @@ -907,6 +914,10 @@ impl Aligner { let const_ptr = reg_ptr as *const mm_reg1_t; let reg: mm_reg1_t = *reg_ptr; + let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); + let contig = std::ffi::CStr::from_ptr( + (*(**idx).seq.offset(reg.rid as isize)).name, + ); let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); let contig = std::ffi::CStr::from_ptr( (*(**idx).seq.offset(reg.rid as isize)).name, @@ -1012,6 +1023,7 @@ impl Aligner { &mut cs_string, &mut m_cs_string, &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, + &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, true.into(), @@ -1031,6 +1043,7 @@ impl Aligner { &mut cs_string, &mut m_cs_string, &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, + &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, ); @@ -1061,6 +1074,7 @@ impl Aligner { }; let target_name_arc = Arc::new( + std::ffi::CStr::from_ptr(contig.as_ptr()) std::ffi::CStr::from_ptr(contig.as_ptr()) .to_str() .unwrap() @@ -1069,9 +1083,12 @@ impl Aligner { let target_len = (*(**idx).seq.offset(reg.rid as isize)).len as i32; + let target_len = (*(**idx).seq.offset(reg.rid as isize)).len as i32; + mappings.push(Mapping { target_name: Some(Arc::clone(&target_name_arc)), target_len, + target_len, target_start: reg.rs, target_end: reg.re, query_name: query_name_arc.clone(), @@ -1835,6 +1852,7 @@ mod tests { Err("File does not exist") ); + if let Err("Index File is empty") = Aligner::builder().with_index("test_data/empty.fa", None) if let Err("Index File is empty") = Aligner::builder().with_index("test_data/empty.fa", None) { println!("File is empty - Success"); @@ -1842,12 +1860,14 @@ mod tests { panic!("File is empty error not thrown"); } + if let Err("Invalid Path for Index") = Aligner::builder().with_index("\0invalid_\0path\0", None) { if let Err("Invalid Path for Index") = Aligner::builder().with_index("\0invalid_\0path\0", None) { println!("Invalid Path - Success"); } else { panic!("Invalid Path error not thrown"); } + if let Err("Invalid Output for Index") = if let Err("Invalid Output for Index") = Aligner::builder().with_index("test_data/MT-human.fa", Some("test\0test")) { @@ -1945,16 +1965,22 @@ mod tests { } #[test] + fn double_free_index_test() { fn double_free_index_test() { // Create a new aligner println!("Creating aligner"); + println!("Creating aligner"); let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); println!("Aligner created"); + println!("Aligner created"); // Perform a test mapping to ensure the index is loaded and all let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); assert!(mappings.len() > 0); + println!("Going into threads"); + assert!(mappings.len() > 0); + println!("Going into threads"); // Spawn two threads using thread scoped aligners, clone aligner @@ -1967,6 +1993,17 @@ mod tests { // Confirm we have a strong count of 2 assert_eq!(Arc::strong_count(&aligner.idx.as_ref().unwrap()), 2); + let jh0 = s.spawn(move || { + let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + let aligner_ = aligner.clone(); + + // Confirm that aligner_ idx points to the same memory as aligner idx arc + assert_eq!(Arc::as_ptr(aligner.idx.as_ref().unwrap()), Arc::as_ptr(aligner_.idx.as_ref().unwrap())); + + // Confirm we have a strong count of 2 + assert_eq!(Arc::strong_count(&aligner.idx.as_ref().unwrap()), 2); + let jh0 = s.spawn(move || { let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); assert!(mappings.len() > 0); @@ -1974,6 +2011,8 @@ mod tests { std::thread::sleep(std::time::Duration::from_millis(100)); }); + let aligner_ = aligner.clone(); + let jh1 = s.spawn(move || { let aligner_ = aligner.clone(); let jh1 = s.spawn(move || { std::thread::sleep(std::time::Duration::from_millis(200)); @@ -1996,6 +2035,37 @@ mod tests { let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); assert!(mappings.len() > 0); + // Spawn two threads using thread scoped aligners, clone aligner + std::thread::scope(|s| { + let aligner0 = aligner.clone(); + let aligner1 = aligner.clone(); + + // Force drop logic + drop(aligner); + + let jh0 = s.spawn(move || { + println!("First thread"); + let mappings = aligner0.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); + + jh0.join().unwrap(); + jh1.join().unwrap(); + }); + + println!("Past the first one"); + + // Create a new aligner + let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); + + // Perform a test mapping to ensure the index is loaded and all + let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Spawn two threads using thread scoped aligners, clone aligner std::thread::scope(|s| { let aligner0 = aligner.clone(); @@ -2011,8 +2081,10 @@ mod tests { // Sleep 100ms std::thread::sleep(std::time::Duration::from_millis(100)); println!("First thread done"); + println!("First thread done"); }); + // Join, to force drop logic from external thread // Join, to force drop logic from external thread jh0.join().unwrap(); From fffe86919ced6ff06333a130ffe123b137a95de4 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 13:54:41 +1300 Subject: [PATCH 30/36] Remove debugging message --- minimap2-sys/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/minimap2-sys/src/lib.rs b/minimap2-sys/src/lib.rs index 23b3ed2..2540154 100644 --- a/minimap2-sys/src/lib.rs +++ b/minimap2-sys/src/lib.rs @@ -14,7 +14,6 @@ unsafe impl Send for mm_mapopt_t {} impl Drop for mm_idx_t { fn drop(&mut self) { - println!("----- Destroying index!"); unsafe { mm_idx_destroy(self) }; } } From fbdcb7dec49ae0f429482b2f49e6ccf8f7bd6a94 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 15:21:40 +1300 Subject: [PATCH 31/36] Fix merge errors --- src/lib.rs | 74 ++++++++---------------------------------------------- 1 file changed, 10 insertions(+), 64 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5cb3776..e6aec37 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -295,7 +295,6 @@ pub struct Aligner { /// Index created by minimap2 pub idx: Option>, - pub idx: Option>, /// Index reader created by minimap2 pub idx_reader: Option>, @@ -894,7 +893,6 @@ impl Aligner { mm_reg = MaybeUninit::new(unsafe { mm_map( - &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, seq.len() as i32, seq.as_ptr() as *const ::std::os::raw::c_char, @@ -1023,7 +1021,6 @@ impl Aligner { &mut cs_string, &mut m_cs_string, &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, - &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, true.into(), @@ -1043,7 +1040,6 @@ impl Aligner { &mut cs_string, &mut m_cs_string, &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, - &**self.idx.as_ref().unwrap().as_ref() as *const mm_idx_t, const_ptr, seq.as_ptr() as *const libc::c_char, ); @@ -1074,7 +1070,6 @@ impl Aligner { }; let target_name_arc = Arc::new( - std::ffi::CStr::from_ptr(contig.as_ptr()) std::ffi::CStr::from_ptr(contig.as_ptr()) .to_str() .unwrap() @@ -1088,7 +1083,6 @@ impl Aligner { mappings.push(Mapping { target_name: Some(Arc::clone(&target_name_arc)), target_len, - target_len, target_start: reg.rs, target_end: reg.re, query_name: query_name_arc.clone(), @@ -1852,7 +1846,6 @@ mod tests { Err("File does not exist") ); - if let Err("Index File is empty") = Aligner::builder().with_index("test_data/empty.fa", None) if let Err("Index File is empty") = Aligner::builder().with_index("test_data/empty.fa", None) { println!("File is empty - Success"); @@ -1860,14 +1853,12 @@ mod tests { panic!("File is empty error not thrown"); } - if let Err("Invalid Path for Index") = Aligner::builder().with_index("\0invalid_\0path\0", None) { if let Err("Invalid Path for Index") = Aligner::builder().with_index("\0invalid_\0path\0", None) { println!("Invalid Path - Success"); } else { panic!("Invalid Path error not thrown"); } - if let Err("Invalid Output for Index") = if let Err("Invalid Output for Index") = Aligner::builder().with_index("test_data/MT-human.fa", Some("test\0test")) { @@ -1965,22 +1956,16 @@ mod tests { } #[test] - fn double_free_index_test() { fn double_free_index_test() { // Create a new aligner println!("Creating aligner"); - println!("Creating aligner"); let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); println!("Aligner created"); - println!("Aligner created"); // Perform a test mapping to ensure the index is loaded and all let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); assert!(mappings.len() > 0); - println!("Going into threads"); - assert!(mappings.len() > 0); - println!("Going into threads"); // Spawn two threads using thread scoped aligners, clone aligner @@ -1993,17 +1978,6 @@ mod tests { // Confirm we have a strong count of 2 assert_eq!(Arc::strong_count(&aligner.idx.as_ref().unwrap()), 2); - let jh0 = s.spawn(move || { - let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); - assert!(mappings.len() > 0); - let aligner_ = aligner.clone(); - - // Confirm that aligner_ idx points to the same memory as aligner idx arc - assert_eq!(Arc::as_ptr(aligner.idx.as_ref().unwrap()), Arc::as_ptr(aligner_.idx.as_ref().unwrap())); - - // Confirm we have a strong count of 2 - assert_eq!(Arc::strong_count(&aligner.idx.as_ref().unwrap()), 2); - let jh0 = s.spawn(move || { let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); assert!(mappings.len() > 0); @@ -2011,8 +1985,6 @@ mod tests { std::thread::sleep(std::time::Duration::from_millis(100)); }); - let aligner_ = aligner.clone(); - let jh1 = s.spawn(move || { let aligner_ = aligner.clone(); let jh1 = s.spawn(move || { std::thread::sleep(std::time::Duration::from_millis(200)); @@ -2047,44 +2019,11 @@ mod tests { println!("First thread"); let mappings = aligner0.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); assert!(mappings.len() > 0); - let mappings = aligner_.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); - assert!(mappings.len() > 0); // Sleep 100ms std::thread::sleep(std::time::Duration::from_millis(100)); - }); - - jh0.join().unwrap(); - jh1.join().unwrap(); - }); - - println!("Past the first one"); - - // Create a new aligner - let aligner = Aligner::builder().map_ont().with_index("yeast_ref.mmi", None).unwrap(); - - // Perform a test mapping to ensure the index is loaded and all - let mappings = aligner.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); - assert!(mappings.len() > 0); - - // Spawn two threads using thread scoped aligners, clone aligner - std::thread::scope(|s| { - let aligner0 = aligner.clone(); - let aligner1 = aligner.clone(); - - // Force drop logic - drop(aligner); - - let jh0 = s.spawn(move || { - println!("First thread"); - let mappings = aligner0.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); - assert!(mappings.len() > 0); - // Sleep 100ms - std::thread::sleep(std::time::Duration::from_millis(100)); - println!("First thread done"); println!("First thread done"); }); - // Join, to force drop logic from external thread // Join, to force drop logic from external thread jh0.join().unwrap(); @@ -2122,11 +2061,19 @@ mod tests { assert!(mappings.len() > 0); // Sleep 100ms std::thread::sleep(std::time::Duration::from_millis(100)); - assert!(Arc::strong_count(&aligner1.idx.as_ref().unwrap()) == 1); - println!("Second thread done"); }); // Join, to force drop logic from external thread + jh0.join().unwrap(); + + let jh1 = s.spawn(move || { + println!("Second thread - No mapping"); + std::thread::sleep(std::time::Duration::from_millis(200)); + let mappings = aligner1.map("ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA".as_bytes(), false, false, None, None, Some("Sample Query")).unwrap(); + assert!(mappings.len() > 0); + // Sleep 100ms + std::thread::sleep(std::time::Duration::from_millis(100)); + }); jh1.join().unwrap(); }); @@ -2144,5 +2091,4 @@ mod tests { let aligner_clone = aligner.clone(); assert_eq!(aligner_clone.mapopt.flag & MM_F_CIGAR as i64, MM_F_CIGAR as i64); } - } From d832250f597cfba26bb9ba33bc106ec4e53c8e4b Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 15:30:44 +1300 Subject: [PATCH 32/36] Update build-test-rust-minimap2-sys.yml --- .github/workflows/build-test-rust-minimap2-sys.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build-test-rust-minimap2-sys.yml b/.github/workflows/build-test-rust-minimap2-sys.yml index c65727f..24cd37a 100644 --- a/.github/workflows/build-test-rust-minimap2-sys.yml +++ b/.github/workflows/build-test-rust-minimap2-sys.yml @@ -49,10 +49,6 @@ jobs: - run: cd minimap2-sys - name: Run minimap2-sys tests on Android aarch64 run: cross test --target aarch64-linux-android - - name: Run minimap2-sys tests on Android armv7 - run: cross test --target armv7-linux-androideabi - - name: Run minimap2-sys tests on Android i686 - run: cross test --target i686-linux-android - name: Run minimap2-sys tests on Android x86_64 run: cross test --target x86_64-linux-android From 84bd02f366a9b2dfd088762ed3e6f9c6dc3d9c8b Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 16:14:58 +1300 Subject: [PATCH 33/36] Publicly export minimap_sys as ffi, so minimap2::ffi::mm_idx_t ... etc --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index e6aec37..49302b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,6 +73,8 @@ use std::os::unix::ffi::OsStrExt; use libc::c_void; use minimap2_sys::*; +pub use minimap2_sys as ffi; + #[cfg(feature = "map-file")] use needletail::parse_fastx_file; From 4d3be0b0bdc10a41749f405be5a2d5b874c3e414 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 16:36:44 +1300 Subject: [PATCH 34/36] Some ergonomic functions for getting n_seqs and get_seq --- src/lib.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 49302b5..6cac12c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -337,6 +337,31 @@ impl Aligner { aligner } + + /// Get the number of sequences in the index + pub fn n_seq(&self) -> u32 { + unsafe { + let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); + let idx: *const mm_idx_t = *idx; + (*idx).n_seq as u32 + } + } + + /// Get sequences direct from the index + pub fn get_seq<'aln>(&'aln self, i: usize) -> Option<&'aln mm_idx_seq_t> { + unsafe { + let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); + let idx: *const mm_idx_t = *idx; + // todo, should this be > or >= + if i > self.n_seq() as usize { + return None; + } + let seq = (*idx).seq; + let seq = seq.offset(i as isize); + let seq = &*seq; + Some(seq) + } + } } impl Aligner { From 579b322858f0dbab642d122cf71268db624f6230 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 16:53:18 +1300 Subject: [PATCH 35/36] Fix macos-13 ci, add additional android tests --- .github/workflows/build-test-rust-minimap2-sys.yml | 4 +++- .github/workflows/build-test-rust-minimap2.yaml | 14 ++++++++++++++ README.md | 3 ++- src/lib.rs | 5 ++++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-test-rust-minimap2-sys.yml b/.github/workflows/build-test-rust-minimap2-sys.yml index 24cd37a..24f7b3d 100644 --- a/.github/workflows/build-test-rust-minimap2-sys.yml +++ b/.github/workflows/build-test-rust-minimap2-sys.yml @@ -55,7 +55,9 @@ jobs: test-macos-13: runs-on: macos-13 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + with: + submodules: 'recursive' - run: cd minimap2-sys - name: Run minimap2-sys tests run: cargo test diff --git a/.github/workflows/build-test-rust-minimap2.yaml b/.github/workflows/build-test-rust-minimap2.yaml index 2b606a9..84bc618 100644 --- a/.github/workflows/build-test-rust-minimap2.yaml +++ b/.github/workflows/build-test-rust-minimap2.yaml @@ -55,3 +55,17 @@ jobs: run: cargo test --features htslib - name: Run tests simde run: cargo test --features simde + + test-android: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: 'recursive' + - name: Install Rust Cross + run: rustup target add aarch64-linux-android armv7-linux-androideabi i686-linux-android x86_64-linux-android + - run: cargo install cross --git https://github.com/cross-rs/cross + - name: Run minimap2 tests on Android aarch64 + run: cross test --target aarch64-linux-android + - name: Run minimap2 tests on Android x86_64 + run: cross test --target x86_64-linux-android \ No newline at end of file diff --git a/README.md b/README.md index 93aaa68..4657bca 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,7 @@ Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB + FIX: Cigar string missing softclip operation (@Sam-Sims) ### Other Changes -+ Experimental Android support, solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) ++ Add ergonomic functions n_seq and get_seq. + Better docs on applying presets, solves [#84](https://github.com/jguhlin/minimap2-rs/issues/84) + Better detection of target arch c_char's and ptr's, solves [#82](https://github.com/jguhlin/minimap2-rs/issues/82) + Support for M1 Mac compilation and addition of github workflows to test it, solving [#81](https://github.com/jguhlin/minimap2-rs/issues/81) @@ -202,6 +202,7 @@ Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB + Add Send + Sync to Aligner, along with unit test @PB-DB + Only use rust-htslib/curl when curl feature is enabled @PB-DB + Mark bam::Record objects as supplementary @PB-DB ++ Experimental Android support, solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) ### 0.1.20 minimap2 2.28 + Fix htslib errors. No update to -sys crate needed. diff --git a/src/lib.rs b/src/lib.rs index 6cac12c..cb6b637 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -338,7 +338,7 @@ impl Aligner { aligner } - /// Get the number of sequences in the index + /// Returns the number of sequences in the index pub fn n_seq(&self) -> u32 { unsafe { let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); @@ -348,6 +348,9 @@ impl Aligner { } /// Get sequences direct from the index + /// + /// Returns a reference to the sequence at the given index + /// Remainds valid as long as the aligner is valid pub fn get_seq<'aln>(&'aln self, i: usize) -> Option<&'aln mm_idx_seq_t> { unsafe { let idx = Arc::as_ptr(self.idx.as_ref().unwrap()); From 2366de0c447b680c771f3ebcfb4bcdab52210716 Mon Sep 17 00:00:00 2001 From: Joseph Guhlin Date: Mon, 25 Nov 2024 16:53:52 +1300 Subject: [PATCH 36/36] Add arch's for android --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4657bca..9ab9112 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB + Add Send + Sync to Aligner, along with unit test @PB-DB + Only use rust-htslib/curl when curl feature is enabled @PB-DB + Mark bam::Record objects as supplementary @PB-DB -+ Experimental Android support, solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) ++ Experimental Android support (tested on aarch64 and x86_64), solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66) ### 0.1.20 minimap2 2.28 + Fix htslib errors. No update to -sys crate needed.