Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aligner clone logistics #93

Merged
merged 36 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
36286d8
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
24b8217
Fixes double free
jguhlin Nov 25, 2024
b843935
htslib working with new arc<idx
jguhlin Nov 25, 2024
a92b02b
Remove debugging message
jguhlin Nov 25, 2024
5fb0d72
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
e6280e2
Fixes double free
jguhlin Nov 25, 2024
958acdc
Remove debugging message
jguhlin Nov 25, 2024
fe349ec
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
8df8800
Fixes double free
jguhlin Nov 25, 2024
becf8f6
Remove debugging message
jguhlin Nov 25, 2024
264725c
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
3cf2604
Fixes double free
jguhlin Nov 25, 2024
d29cf8f
Remove debugging message
jguhlin Nov 25, 2024
82f1d38
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
2e9dda0
Fixes double free
jguhlin Nov 25, 2024
036532d
Remove debugging message
jguhlin Nov 25, 2024
7eaf442
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
fe85c6e
Fixes double free
jguhlin Nov 25, 2024
a60a985
Remove debugging message
jguhlin Nov 25, 2024
36ab013
#71 move drop to minimap2-sys
jguhlin Nov 25, 2024
47aacbf
Update docs
jguhlin Nov 25, 2024
41e4b3f
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
362f1da
Fixes double free
jguhlin Nov 25, 2024
18c7267
Remove debugging message
jguhlin Nov 25, 2024
d59b47f
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
750414c
Fixes double free
jguhlin Nov 25, 2024
1ad371b
Remove debugging message
jguhlin Nov 25, 2024
2ea9f8d
#71 move drop to minimap2-sys
jguhlin Nov 24, 2024
d2f9fb7
Fixes double free
jguhlin Nov 25, 2024
fffe869
Remove debugging message
jguhlin Nov 25, 2024
fbdcb7d
Fix merge errors
jguhlin Nov 25, 2024
d832250
Update build-test-rust-minimap2-sys.yml
jguhlin Nov 25, 2024
84bd02f
Publicly export minimap_sys as ffi, so minimap2::ffi::mm_idx_t ... etc
jguhlin Nov 25, 2024
4d3be0b
Some ergonomic functions for getting n_seqs and get_seq
jguhlin Nov 25, 2024
579b322
Fix macos-13 ci, add additional android tests
jguhlin Nov 25, 2024
2366de0
Add arch's for android
jguhlin Nov 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions .github/workflows/build-test-rust-minimap2-sys.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,15 @@ jobs:
- run: cd minimap2-sys
- name: Run minimap2-sys tests on Android aarch64
run: cross test --target aarch64-linux-android
- name: Run minimap2-sys tests on Android armv7
run: cross test --target armv7-linux-androideabi
- name: Run minimap2-sys tests on Android i686
run: cross test --target i686-linux-android
- name: Run minimap2-sys tests on Android x86_64
run: cross test --target x86_64-linux-android

test-macos-13:
runs-on: macos-13
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- run: cd minimap2-sys
- name: Run minimap2-sys tests
run: cargo test
Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/build-test-rust-minimap2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,17 @@ jobs:
run: cargo test --features htslib
- name: Run tests simde
run: cargo test --features simde

test-android:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Install Rust Cross
run: rustup target add aarch64-linux-android armv7-linux-androideabi i686-linux-android x86_64-linux-android
- run: cargo install cross --git https://github.com/cross-rs/cross
- name: Run minimap2 tests on Android aarch64
run: cross test --target aarch64-linux-android
- name: Run minimap2 tests on Android x86_64
run: cross test --target x86_64-linux-android
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ test_data/*.bam
test_data/*.mmi
.ipynb_checkpoints
.vscode/
.quarto
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,14 @@ rayon = "1.10"

[features]
default = ["map-file"]
sse2only = ["minimap2-sys/sse2only"]
map-file = ["needletail"] #, "simdutf8"]
htslib = ['rust-htslib']
simde = ["minimap2-sys/simde"]
map-file = ["needletail"] #, "simdutf8"]
zlib-ng = ["minimap2-sys/zlib-ng"]
curl = ["rust-htslib/curl"]
static = ["minimap2-sys/static", "rust-htslib/static"]
sse2only = ["minimap2-sys/sse2only"]


[package.metadata.docs.rs]
features = ["map-file", "htslib"]
72 changes: 48 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,21 @@ A rust FFI library for [minimap2](https://github.com/lh3/minimap2/). In developm
[![codecov](https://codecov.io/gh/jguhlin/minimap2-rs/branch/main/graph/badge.svg?token=huw27ZC6Qy)](https://codecov.io/gh/jguhlin/minimap2-rs)

# Structure
minimap2-sys is the library of the raw FFI bindings to minimap2. minimap2 is the more rusty version.
minimap2-sys is the raw FFI bindings to minimap2. minimap2 is the more opinionated, rusty version.

# How to use
## Requirements
```toml
minimap2 = "0.1.20+minimap2.2.28"
minimap2 = "0.1.21+minimap2.2.28"
```
Also see [Features](#features)

Tested with rustc 1.64.0 and nightly. So probably a good idea to upgrade before running. But let me know if you run into pain points with older versions and will try to fix!
Tested with rustc 1.82.0 and nightly. So probably a good idea to upgrade before running. But let me know if you run into pain points with older versions and will try to fix.

## Minimap2 Version Table
| minimap2-rs | minimap2 |
|-------------|----------|
| 0.1.21 | 2.28 |
| 0.1.20 | 2.28 |
| 0.1.19 | 2.28 |
| 0.1.18 | 2.28 |
Expand All @@ -42,7 +43,7 @@ Align a sequence:
```rust
let seq: Vec<u8> = b"ACTGACTCACATCGACTACGACTACTAGACACTAGACTATCGACTACTGACATCGA";
let alignment = aligner
.map(&seq, false, false, None, None)
.map(&seq, false, false, None, None, Some(b"My Sequence Name"))
.expect("Unable to align");
```

Expand All @@ -53,6 +54,9 @@ let aligner = map_ont();
let aligner = asm20();
```

**Note** Each preset overwrites different arguments. Using multiple at a time is not technically supported, but will work. Results unknown. So be careful!
It's equivalent to running minimap2 -x map_ont -x short ...

### Customization
[MapOpts](https://docs.rs/minimap2-sys/0.1.5/minimap2_sys/struct.mm_mapopt_t.html) and [IdxOpts](https://docs.rs/minimap2-sys/0.1.5/minimap2_sys/struct.mm_idxopt_t.html) can be customized with Rust's struct pattern, as well as applying mapping settings. Inspired by [bevy](https://bevyengine.org/).
```rust
Expand Down Expand Up @@ -83,7 +87,7 @@ let mut fasta = Fasta::from_buffer(&mut reader)
for seq in reader {
let seq = seq.unwrap();
let alignment: Vec<Mapping> = aligner
.map(&seq.sequence.unwrap(), false, false, None, None)
.map(&seq.sequence.unwrap(), false, false, None, None, None)
.expect("Unable to align");
println!("{:?}", alignment);
}
Expand Down Expand Up @@ -111,25 +115,27 @@ This _appears_ to work.
use rayon::prelude::*;

let results = sequences.par_iter().map(|seq| {
aligner.map(seq.as_bytes(), false, false, None, None).unwrap()
aligner.map(seq.as_bytes(), false, false, None, None, None).unwrap()
}).collect::<Vec<_>>();
```

## Features
The following crate features are available:
* `mm2-fast` - Replace minimap2 with [mm2-fast](https://github.com/bwa-mem2/mm2-fast). This is likely not portable.
* `htslib` - Support output of bam/sam files using htslib.
* `simde` - Compile minimap2 / mm2-fast with [simd-everywhere](https://github.com/simd-everywhere/simde) support.
* `map-file` - *Default* - Convenience function for mapping an entire file. Caution, this is single-threaded.
* `sse2only` - Compiles for SSE2 support only (Default is to try to compile for SSE4.1, SSE2 only is default on aarch64)
* map-file - Enables the ability to map a file directly to a reference. Enabled by deafult
* htslib - Provides an interface to minimap2 that returns rust_htslib::Records
* simde - Enables SIMD Everywhere library in minimap2
* zlib-ng - Enables the use of zlib-ng for faster compression
* curl - Enables curl for htslib
* static - Builds minimap2 as a static library
* sse2only - Builds minimap2 with only SSE2 support

Map-file is a *default* feature and enabled unless otherwise specified.

## Missing Features
* setting mismatch penalty for base transitions [minimap 2.27 release notes](https://github.com/lh3/minimap2/releases/tag/v2.27)
* Generate ds tags to indicate uncertainty in indels

Potentially more, but I'm using this to keep track. I'd expect those would get implemented over time, but if you have urgent need open a pull request or an issue! Thanks
Potentially others. Please create an issue!

## Building for MUSL
Follow these [instructions](https://github.com/rust-cross/rust-musl-cross#prebuilt-images).
Expand All @@ -141,10 +147,9 @@ alias rust-musl-builder='docker run --rm -it -v "$(pwd)":/home/rust/src messense
rust-musl-builder cargo build --release
```

Please note minimap2 is only tested for x86_64. Other platforms may work, please open an issue if minimap2 compiles but minimap2-rs does not.
Minimap2 is tested on x86_64 and aarch64 (arm64). Other platforms may work, please open an issue if minimap2 compiles but minimap2-rs does not.

### Features tested with MUSL
* `mm2-fast` - **Fail**
* `htslib` - **Success**
* `simde` - **Success**

Expand All @@ -153,23 +158,19 @@ Please note minimap2 is only tested for x86_64. Other platforms may work, please
- [mappy-rs](https://github.com/Adoni5/mappy-rs) - Drop-in multi-threaded replacement for python's mappy
- [HiFiHLA](https://github.com/PacificBiosciences/hifihla) - HLA star-calling tool for PacBio HiFi data
- [STRdust](https://github.com/wdecoster/STRdust) - Tandem repeat genotyper for long reads

# Want feedback
* Many fields are i32 / i8 to mimic the C environment, but would it make more sense to convert to u32 / u8 / usize?
* Let me know pain points!
- [oarfish](https://github.com/COMBINE-lab/oarfish) - transcript quantification from long-read RNA-seq data

# Next things todo
* Print other tags so we can have an entire PAF format
* -sys Compile with SSE2 / SSE4.1 (auto-detect, but also make with features)
* Multi-thread guide (tokio async threads or use crossbeam queue and traditional threads?)
* Iterator interface for map_file
* MORE TESTS
* -sys Get SSE working with "sse" feature (compiles and tests work in -sys crate, but not main crate)
* -sys Possible to decouple from pthread?
* -sys Enable Lisa-hash for mm2-fast? But must handle build arguments from the command-line.

# Citation
You should cite the minimap2 papers if you use this in your work.
Please cite the appropriate minimap2 papers if you use this in your work, as well as this library.

## DOI for this library

## Minimap2 Papers

> Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
> *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
Expand All @@ -180,6 +181,29 @@ and/or:
> *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2]

# Changelog
### 0.1.21 minimap2 2.28
Contributors to this release: @mbhall88 @rob-p @Sam-Sims @charlesgregory @PB-DB
#### Breaking Changes
+ Map now returns Arc String's to reduce memory allocation for large and/or repetitive jobs
+ map now takes an additional argument, query_name: Option<impl AsRef<[u8]>>, possibly solves [#75](https://github.com/jguhlin/minimap2-rs/issues/75) (@rob-p @mbhall88 @jguhlin)
+ Arc the Index, to prevent double-frees, solves [#71](https://github.com/jguhlin/minimap2-rs/issues/71)
+ Map file now passes in query name, which should help with [#75](https://github.com/jguhlin/minimap2-rs/issues/75)
+ Supplementary flag now better detected (@rob-p)
+ FIX: Cigar string missing softclip operation (@Sam-Sims)

### Other Changes
+ Add ergonomic functions n_seq and get_seq.
+ Better docs on applying presets, solves [#84](https://github.com/jguhlin/minimap2-rs/issues/84)
+ Better detection of target arch c_char's and ptr's, solves [#82](https://github.com/jguhlin/minimap2-rs/issues/82)
+ Support for M1 Mac compilation and addition of github workflows to test it, solving [#81](https://github.com/jguhlin/minimap2-rs/issues/81)
+ Rayon test, so some support, closes [#5](https://github.com/jguhlin/minimap2-rs/issues/5)
+ Static str's and now static CStr's
+ FIX: memory leak due to sequences allocated by minimap2 not being freed @charlesgregory
+ Add Send + Sync to Aligner, along with unit test @PB-DB
+ Only use rust-htslib/curl when curl feature is enabled @PB-DB
+ Mark bam::Record objects as supplementary @PB-DB
+ Experimental Android support (tested on aarch64 and x86_64), solves [#66](https://github.com/jguhlin/minimap2-rs/issues/66)

### 0.1.20 minimap2 2.28
+ Fix htslib errors. No update to -sys crate needed.

Expand Down
2 changes: 1 addition & 1 deletion minimap2-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "minimap2-sys"
version = "0.1.19+minimap2.2.28"
version = "0.1.20+minimap2.2.28"
edition = "2021"
links = "libminimap2"
authors = ["Joseph Guhlin <[email protected]>"]
Expand Down
13 changes: 10 additions & 3 deletions minimap2-sys/README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
# System Bindings for libminimap2
Use this if you need lower-level bindings for minimap2. Also works with mm2-fast.
Use this if you need lower-level bindings for minimap2.

# Minimap2 Version
Sync'd to minimap2 2.27
Minimap2 2.28

## Breaking Changes
### 0.0.18
mm2-fast and minimap2 have diverged. At this point mm2-fast is no longer supported. Please use a previous crate version.

## Features
* vendored - Regenerate the bindings from the vendored minimap2 source. Requires llvm installed. Useful to update the bindings to a different version of minimap2.
* mm2-fast - Use [mm2-fast](https://github.com/bwa-mem2/mm2-fast) as the backend rather than minimap2 itself.
* simde - Enable simde support (SIMD-everywhere)
* sse - Enable some sse bindings
* zlib-ng - Use zlib-ng
* static - Static compilation

## TODO
* Can we decouple from pthread? This would allow Windows and (possibly) WASM compilation.

## Changelog
### 0.1.20 minimap2.2.28
* Move Drop impl to -sys crate

### 0.1.19 minimap2.2.28
* Update to minimap2 2.28

### 0.1.18 minimap2.2.27
* Regenerated bindings
* mm2-fast and minimap2 have diverged
Expand Down
2 changes: 1 addition & 1 deletion minimap2-sys/src/bindings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4686,7 +4686,7 @@ fn bindgen_test_layout_mm_idx_seq_t() {
);
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
#[derive(Debug, Clone)]
pub struct mm_idx_t {
pub b: i32,
pub w: i32,
Expand Down
7 changes: 7 additions & 0 deletions minimap2-sys/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ unsafe impl Send for mm_idx_t {}
unsafe impl Send for mm_idx_reader_t {}
unsafe impl Send for mm_mapopt_t {}

impl Drop for mm_idx_t {
fn drop(&mut self) {
unsafe { mm_idx_destroy(self) };
}
}


use std::mem::MaybeUninit;

impl Default for mm_mapopt_t {
Expand Down
16 changes: 9 additions & 7 deletions src/htslib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ use rust_htslib::bam::{Header, HeaderView, Record};
use std::ffi::{CStr, CString};
use std::mem::MaybeUninit;
use std::ptr;
use std::sync::Arc;

/// A wrapper around mm_bseq1_t
#[derive(Debug)]
Expand Down Expand Up @@ -164,7 +165,7 @@ impl Aligner {

let mm_reg = MaybeUninit::new(unsafe {
mm_ffi::mm_map(
self.idx.unwrap() as *const mm_ffi::mm_idx_t,
&**self.idx.as_ref().unwrap().as_ref() as *const mm_ffi::mm_idx_t,
query.inner.l_seq,
query.inner.seq as *const libc::c_char,
&mut n_regs,
Expand All @@ -191,7 +192,7 @@ impl Aligner {
// TODO: use mm_write_sam3 t do the writing so that we can pass the map_opt flags
mm_ffi::mm_write_sam(
result.as_mut_ptr(),
self.idx.unwrap() as *const mm_ffi::mm_idx_t,
&**self.idx.as_ref().unwrap().as_ref() as *const mm_ffi::mm_idx_t,
&query.inner as *const mm_ffi::mm_bseq1_t,
const_ptr,
n_regs,
Expand Down Expand Up @@ -304,19 +305,20 @@ pub struct SeqMetaData {

#[derive(Debug)]
pub struct MMIndex {
pub inner: mm_ffi::mm_idx_t,
pub inner: Arc<*mut mm_ffi::mm_idx_t>,
}

impl MMIndex {
pub fn n_seq(&self) -> u32 {
self.inner.n_seq
unsafe { (**self.inner).n_seq }
}

pub fn seqs(&self) -> Vec<SeqMetaData> {
let mut seqs: Vec<SeqMetaData> = Vec::with_capacity(self.n_seq() as usize);
for i in 0..self.n_seq() {
let _seq = unsafe { *(self.inner.seq).offset(i as isize) };
let c_str = unsafe { ffi::CStr::from_ptr(_seq.name) };
let _seq = unsafe { *(**self.inner).seq.offset(i as isize) };
// let c_str = unsafe { ffi::CStr::from_ptr(_seq.name) };
let c_str = unsafe { CStr::from_ptr(_seq.name) };
let rust_str = c_str.to_str().unwrap().to_string();
seqs.push(SeqMetaData {
name: rust_str,
Expand Down Expand Up @@ -344,7 +346,7 @@ impl From<&Aligner> for MMIndex {
fn from(aligner: &Aligner) -> Self {
MMIndex {
// inner: aligner.idx.unwrap(),
inner: unsafe { **aligner.idx.as_ref().unwrap() },
inner: std::sync::Arc::clone(&aligner.idx.as_ref().unwrap())
}
}
}
Expand Down
Loading