From e32cb291afbf7f735dd63fb7b86659c531b5444e Mon Sep 17 00:00:00 2001 From: Anuradha Wickramarachchi Date: Sun, 25 Aug 2024 16:55:24 +0930 Subject: [PATCH 1/5] DEV: python bindings feature - interim commit --- .github/workflows/pypi.yml | 162 +++++++++++++++++++++++++++++++++++++ Cargo.lock | 106 ++++++++++++++++++++++++ Cargo.toml | 3 +- composition/Cargo.toml | 1 + counter/Cargo.toml | 1 + coverage/Cargo.toml | 1 + kmer/Cargo.toml | 1 + kmertools/Cargo.toml | 1 + ktio/Cargo.toml | 1 + misc/Cargo.toml | 1 + pykt/.gitignore | 72 +++++++++++++++++ pykt/Cargo.toml | 17 ++++ pykt/pyproject.toml | 16 ++++ pykt/src/cgr.rs | 89 ++++++++++++++++++++ pykt/src/lib.rs | 13 +++ pykt/src/oligo.rs | 77 ++++++++++++++++++ 16 files changed, 561 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/pypi.yml create mode 100644 pykt/.gitignore create mode 100644 pykt/Cargo.toml create mode 100644 pykt/pyproject.toml create mode 100644 pykt/src/cgr.rs create mode 100644 pykt/src/lib.rs create mode 100644 pykt/src/oligo.rs diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..f696c2d --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,162 @@ +# This file is autogenerated by maturin v1.7.1 +# To update, run +# +# maturin generate-ci -m ./pykt/Cargo.toml github +# +name: Upload to PyPI + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + linux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + - runner: ubuntu-latest + target: s390x + - runner: ubuntu-latest + target: ppc64le + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.platform.target }} + path: dist + + musllinux: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: ubuntu-latest + target: x86_64 + - runner: ubuntu-latest + target: x86 + - runner: ubuntu-latest + target: aarch64 + - runner: ubuntu-latest + target: armv7 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + sccache: 'true' + manylinux: musllinux_1_2 + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-musllinux-${{ matrix.platform.target }} + path: dist + + windows: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: windows-latest + target: x64 + - runner: windows-latest + target: x86 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + architecture: ${{ matrix.platform.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.platform.target }} + path: dist + + macos: + runs-on: ${{ matrix.platform.runner }} + strategy: + matrix: + platform: + - runner: macos-12 + target: x86_64 + - runner: macos-14 + target: aarch64 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.platform.target }} + args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.platform.target }} + path: dist + + sdist: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist --manifest-path ./pykt/Cargo.toml + - name: Upload sdist + uses: actions/upload-artifact@v4 + with: + name: wheels-sdist + path: dist + + release: + name: Release + runs-on: ubuntu-latest + needs: [linux, musllinux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v4 + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing wheels-*/* diff --git a/Cargo.lock b/Cargo.lock index bc1d043..ffea1d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,6 +477,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + [[package]] name = "instant" version = "0.1.12" @@ -581,6 +587,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -706,6 +721,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + [[package]] name = "ordered-float" version = "3.9.2" @@ -752,6 +773,79 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pykt" +version = "0.1.0" +dependencies = [ + "composition", + "kmer", + "pyo3", + "rayon", +] + +[[package]] +name = "pyo3" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.60", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn 2.0.60", +] + [[package]] name = "quote" version = "1.0.36" @@ -1001,6 +1095,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "thiserror" version = "1.0.58" @@ -1045,6 +1145,12 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index cb2c517..9181e0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ package.version = "0.1.0" package.edition = "2021" package.authors = ["Anuradha Wickramarachchi anuradhawick@gmail.com", "Vijini Mallawaarachchi viji.mallawaarachchi@gmail.com"] +package.description = "kmertools is a k-mer based feature extraction tool designed to support metagenomics and other bioinformatics analytics." -members = ["composition", "coverage", "kmertools", "kmer", "ktio", "counter", "misc"] +members = ["composition", "coverage", "kmertools", "kmer", "ktio", "counter", "misc", "pykt"] resolver = "2" diff --git a/composition/Cargo.toml b/composition/Cargo.toml index 88ef689..3695521 100644 --- a/composition/Cargo.toml +++ b/composition/Cargo.toml @@ -3,6 +3,7 @@ name = "composition" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true [dependencies] kmer = { path = "../kmer" } diff --git a/counter/Cargo.toml b/counter/Cargo.toml index 4330b08..91df802 100644 --- a/counter/Cargo.toml +++ b/counter/Cargo.toml @@ -3,6 +3,7 @@ name = "counter" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/coverage/Cargo.toml b/coverage/Cargo.toml index a4852b8..c45b6c2 100644 --- a/coverage/Cargo.toml +++ b/coverage/Cargo.toml @@ -3,6 +3,7 @@ name = "coverage" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/kmer/Cargo.toml b/kmer/Cargo.toml index 0b8c957..618d808 100644 --- a/kmer/Cargo.toml +++ b/kmer/Cargo.toml @@ -3,6 +3,7 @@ name = "kmer" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true [dependencies] indicatif = "0.17.8" diff --git a/kmertools/Cargo.toml b/kmertools/Cargo.toml index 8425e2d..42e63d7 100644 --- a/kmertools/Cargo.toml +++ b/kmertools/Cargo.toml @@ -3,6 +3,7 @@ name = "kmertools" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true [dependencies] clap = { version = "4.5.4", features = ["derive"] } diff --git a/ktio/Cargo.toml b/ktio/Cargo.toml index 1909adc..c1a6451 100644 --- a/ktio/Cargo.toml +++ b/ktio/Cargo.toml @@ -3,6 +3,7 @@ name = "ktio" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true [dependencies] bio = "1.6.0" diff --git a/misc/Cargo.toml b/misc/Cargo.toml index ddf2404..381527a 100644 --- a/misc/Cargo.toml +++ b/misc/Cargo.toml @@ -3,6 +3,7 @@ name = "misc" version.workspace = true edition.workspace = true authors.workspace = true +description.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/pykt/.gitignore b/pykt/.gitignore new file mode 100644 index 0000000..c8f0442 --- /dev/null +++ b/pykt/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version diff --git a/pykt/Cargo.toml b/pykt/Cargo.toml new file mode 100644 index 0000000..f8d172c --- /dev/null +++ b/pykt/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "pykt" +version.workspace = true +edition.workspace = true +authors.workspace = true +description.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "kmertools" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = "0.22.0" +rayon = "1.10.0" +composition = { path = "../composition" } +kmer = { path = "../kmer" } diff --git a/pykt/pyproject.toml b/pykt/pyproject.toml new file mode 100644 index 0000000..444eba0 --- /dev/null +++ b/pykt/pyproject.toml @@ -0,0 +1,16 @@ +[build-system] +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" + +[project] +name = "kmertools" +requires-python = ">=3.9" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dynamic = ["version", "description"] + +[tool.maturin] +features = ["pyo3/extension-module"] diff --git a/pykt/src/cgr.rs b/pykt/src/cgr.rs new file mode 100644 index 0000000..ab60171 --- /dev/null +++ b/pykt/src/cgr.rs @@ -0,0 +1,89 @@ +use pyo3::{exceptions::PyValueError, prelude::*}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::collections::HashMap; + +type Point = (f64, f64); + +fn cgr_maps(vecsize: f64) -> (Point, HashMap) { + let cgr_a: Point = (0.0, 0.0); + let cgr_t: Point = (vecsize, 0.0); + let cgr_g: Point = (vecsize, vecsize); + let cgr_c: Point = (0.0, vecsize); + let cgr_center: Point = (vecsize / 2.0, vecsize / 2.0); + + let cgr_dict: HashMap = [ + (b'A', cgr_a), // Adenine + (b'T', cgr_t), // Thymine + (b'G', cgr_g), // Guanine + (b'C', cgr_c), // Cytosine + (b'U', cgr_t), // Uracil (demethylated form of thymine) + (b'a', cgr_a), // Adenine + (b't', cgr_t), // Thymine + (b'g', cgr_g), // Guanine + (b'c', cgr_c), // Cytosine + (b'u', cgr_t), // Uracil/Thymine + ] + .iter() + .cloned() + .collect(); + + (cgr_center, cgr_dict) +} + +/// Computer for generating chaos game representation (cgr) +#[pyclass] +pub struct CgrComputer { + cgr_center: Point, + cgr_map: HashMap, +} + +#[pymethods] +impl CgrComputer { + /// Initialise the cgr counter + /// Attributes: + /// ksize (int): size of the k-mers to count + #[new] + #[pyo3(signature = (vecsize))] + fn new(vecsize: usize) -> Self { + let (cgr_center, cgr_map) = cgr_maps(vecsize as f64); + + Self { + cgr_center, + cgr_map, + } + } + + /// Generate the cgr + /// Attributes: + /// seq (str): sequence as a string + #[pyo3(signature = (seq))] + fn vectorise_one(&self, seq: String) -> PyResult> { + let mut cgr = Vec::with_capacity(seq.len()); + let mut cgr_marker = self.cgr_center; + + for s in seq.as_bytes().iter() { + if let Some(&cgr_corner) = self.cgr_map.get(s) { + cgr_marker = ( + (cgr_corner.0 + cgr_marker.0) / 2.0, + (cgr_corner.1 + cgr_marker.1) / 2.0, + ); + cgr.push(cgr_marker); + } else { + return Err(PyValueError::new_err("Bad nucleotide, unable to proceed")); + } + } + + Ok(cgr) + } + + /// Generate the cgrs + /// Attributes: + /// seq (list[str]): list of sequences + /// norm (bool): enable normalisation by counts + #[pyo3(signature = (seqs))] + fn vectorise_batch(&self, seqs: Vec) -> PyResult>> { + seqs.into_par_iter() + .map(|seq| self.vectorise_one(seq)) + .collect() + } +} diff --git a/pykt/src/lib.rs b/pykt/src/lib.rs new file mode 100644 index 0000000..1fad443 --- /dev/null +++ b/pykt/src/lib.rs @@ -0,0 +1,13 @@ +mod cgr; +mod oligo; +use cgr::CgrComputer; +use oligo::OligoComputer; +use pyo3::prelude::*; + +/// A Python module implemented in Rust. +#[pymodule] +fn kmertools(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/pykt/src/oligo.rs b/pykt/src/oligo.rs new file mode 100644 index 0000000..4d9be46 --- /dev/null +++ b/pykt/src/oligo.rs @@ -0,0 +1,77 @@ +use kmer::{kmer::KmerGenerator, numeric_to_kmer}; +use pyo3::prelude::*; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::collections::HashMap; + +/// Computer for generating oligonucleotide frequency vectors +#[pyclass] +pub struct OligoComputer { + ksize: usize, + kcount: usize, + pos_map: Vec, + pos_kmer: HashMap, +} + +#[pymethods] +impl OligoComputer { + /// Initialise the kmer counter + /// Attributes: + /// ksize (int): size of the k-mers to count + #[new] + #[pyo3(signature = (ksize))] + fn new(ksize: usize) -> Self { + let (min_mer_pos_map, pos_min_mer_map, kcount) = KmerGenerator::kmer_pos_maps(ksize); + + Self { + ksize, + kcount, + pos_map: min_mer_pos_map, + pos_kmer: pos_min_mer_map, + } + } + + /// Generate the oligo nucletide vector + /// Attributes: + /// seq (str): sequence as a string + /// norm (bool): enable normalisation by counts + #[pyo3(signature = (seq, norm=true))] + fn vectorise_one(&self, seq: String, norm: bool) -> Vec { + let mut vec = vec![0_f64; self.kcount]; + let mut total = 0_f64; + + for (fmer, rmer) in KmerGenerator::new(seq.as_bytes(), self.ksize) { + let min_mer = u64::min(fmer, rmer); + unsafe { + // we already know the size of the vector and + // min_mer is absolutely smaller than that + let &min_mer_pos = self.pos_map.get_unchecked(min_mer as usize); + *vec.get_unchecked_mut(min_mer_pos) += 1_f64; + total += 1_f64; + } + } + if norm { + vec.iter_mut().for_each(|el| *el /= f64::max(1_f64, total)); + } + vec + } + + /// Generate the oligo nucletide vector + /// Attributes: + /// seq (list[str]): list of sequences + /// norm (bool): enable normalisation by counts + #[pyo3(signature = (seqs, norm=true))] + fn vectorise_batch(&self, seqs: Vec, norm: bool) -> Vec> { + seqs.into_par_iter() + .map(|seq| self.vectorise_one(seq, norm)) + .collect() + } + + /// Generate the header for oligo nucletide vector + fn get_header(&self) -> Vec { + let mut kmers = vec![String::new(); self.kcount]; + for (&pos, &kmer) in self.pos_kmer.iter() { + kmers[pos] = numeric_to_kmer(kmer, self.ksize); + } + kmers + } +} From 38db931cb13f5fc49d3dbc835daac9b99a4dbe70 Mon Sep 17 00:00:00 2001 From: Anuradha Wickramarachchi Date: Sun, 25 Aug 2024 16:57:39 +0930 Subject: [PATCH 2/5] GIT: Update workflows --- .github/workflows/clippy_check.yml | 7 ++++++- .github/workflows/coverage.yml | 2 +- .github/workflows/pypi.yml | 5 ----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/clippy_check.yml b/.github/workflows/clippy_check.yml index e79c7a2..8162f89 100644 --- a/.github/workflows/clippy_check.yml +++ b/.github/workflows/clippy_check.yml @@ -1,6 +1,11 @@ -on: push name: Clippy check +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + # Make sure CI fails on all warnings, including Clippy lints env: RUSTFLAGS: "-Dwarnings" diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index c3ced2a..861b360 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -1,4 +1,4 @@ -name: coverage +name: Code coverage on: push: diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index f696c2d..bd250b7 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -1,8 +1,3 @@ -# This file is autogenerated by maturin v1.7.1 -# To update, run -# -# maturin generate-ci -m ./pykt/Cargo.toml github -# name: Upload to PyPI on: From 12fb0443483a354264072bd5eca2ad7ef957ed80 Mon Sep 17 00:00:00 2001 From: Anuradha Wickramarachchi Date: Sun, 25 Aug 2024 18:04:46 +0930 Subject: [PATCH 3/5] DEV: rename python module to pykmertools --- .github/workflows/pypi.yml | 10 +++++----- Cargo.lock | 2 +- Cargo.toml | 6 ++++-- {pykt => pykmertools}/.gitignore | 0 {pykt => pykmertools}/Cargo.toml | 6 ++++-- {pykt => pykmertools}/pyproject.toml | 11 +++++++++-- {pykt => pykmertools}/src/cgr.rs | 0 {pykt => pykmertools}/src/lib.rs | 2 +- {pykt => pykmertools}/src/oligo.rs | 0 9 files changed, 24 insertions(+), 13 deletions(-) rename {pykt => pykmertools}/.gitignore (100%) rename {pykt => pykmertools}/Cargo.toml (78%) rename {pykt => pykmertools}/pyproject.toml (51%) rename {pykt => pykmertools}/src/cgr.rs (100%) rename {pykt => pykmertools}/src/lib.rs (79%) rename {pykt => pykmertools}/src/oligo.rs (100%) diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index bd250b7..0bdcb78 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -34,7 +34,7 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + args: --release --out dist --find-interpreter --manifest-path ./pykmertools/Cargo.toml sccache: 'true' manylinux: auto - name: Upload wheels @@ -65,7 +65,7 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + args: --release --out dist --find-interpreter --manifest-path ./pykmertools/Cargo.toml sccache: 'true' manylinux: musllinux_1_2 - name: Upload wheels @@ -93,7 +93,7 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + args: --release --out dist --find-interpreter --manifest-path ./pykmertools/Cargo.toml sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 @@ -119,7 +119,7 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter --manifest-path ./pykt/Cargo.toml + args: --release --out dist --find-interpreter --manifest-path ./pykmertools/Cargo.toml sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 @@ -135,7 +135,7 @@ jobs: uses: PyO3/maturin-action@v1 with: command: sdist - args: --out dist --manifest-path ./pykt/Cargo.toml + args: --out dist --manifest-path ./pykmertools/Cargo.toml - name: Upload sdist uses: actions/upload-artifact@v4 with: diff --git a/Cargo.lock b/Cargo.lock index ffea1d6..d023ac8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -774,7 +774,7 @@ dependencies = [ ] [[package]] -name = "pykt" +name = "pykmertools" version = "0.1.0" dependencies = [ "composition", diff --git a/Cargo.toml b/Cargo.toml index 9181e0d..11d15bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,10 @@ [workspace] package.version = "0.1.0" package.edition = "2021" -package.authors = ["Anuradha Wickramarachchi anuradhawick@gmail.com", "Vijini Mallawaarachchi viji.mallawaarachchi@gmail.com"] +package.authors = ["Anuradha Wickramarachchi ", "Vijini Mallawaarachchi "] package.description = "kmertools is a k-mer based feature extraction tool designed to support metagenomics and other bioinformatics analytics." +package.readme = "README.md" +package.license-file = "LICENSE" -members = ["composition", "coverage", "kmertools", "kmer", "ktio", "counter", "misc", "pykt"] +members = ["composition", "coverage", "kmertools", "kmer", "ktio", "counter", "misc", "pykmertools"] resolver = "2" diff --git a/pykt/.gitignore b/pykmertools/.gitignore similarity index 100% rename from pykt/.gitignore rename to pykmertools/.gitignore diff --git a/pykt/Cargo.toml b/pykmertools/Cargo.toml similarity index 78% rename from pykt/Cargo.toml rename to pykmertools/Cargo.toml index f8d172c..426947c 100644 --- a/pykt/Cargo.toml +++ b/pykmertools/Cargo.toml @@ -1,13 +1,15 @@ [package] -name = "pykt" +name = "pykmertools" version.workspace = true edition.workspace = true authors.workspace = true description.workspace = true +readme.workspace = true +license-file.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] -name = "kmertools" +name = "pykmertools" crate-type = ["cdylib"] [dependencies] diff --git a/pykt/pyproject.toml b/pykmertools/pyproject.toml similarity index 51% rename from pykt/pyproject.toml rename to pykmertools/pyproject.toml index 444eba0..7e98503 100644 --- a/pykt/pyproject.toml +++ b/pykmertools/pyproject.toml @@ -3,14 +3,21 @@ requires = ["maturin>=1.7,<2.0"] build-backend = "maturin" [project] -name = "kmertools" +name = "pykmertools" requires-python = ">=3.9" classifiers = [ "Programming Language :: Rust", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dynamic = ["version", "description"] +dynamic = ["version", "description", "license", "readme"] +keywords = ["genomics", "bioinformatics"] + + +[project.urls] +Documentation = "https://github.com/anuradhawick/kmertools/wiki" +"Bug Tracker" = "https://github.com/anuradhawick/kmertools/issues" +"Source Code" = "https://github.com/anuradhawick/kmertools/" [tool.maturin] features = ["pyo3/extension-module"] diff --git a/pykt/src/cgr.rs b/pykmertools/src/cgr.rs similarity index 100% rename from pykt/src/cgr.rs rename to pykmertools/src/cgr.rs diff --git a/pykt/src/lib.rs b/pykmertools/src/lib.rs similarity index 79% rename from pykt/src/lib.rs rename to pykmertools/src/lib.rs index 1fad443..70edf93 100644 --- a/pykt/src/lib.rs +++ b/pykmertools/src/lib.rs @@ -6,7 +6,7 @@ use pyo3::prelude::*; /// A Python module implemented in Rust. #[pymodule] -fn kmertools(m: &Bound<'_, PyModule>) -> PyResult<()> { +fn pykmertools(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(()) diff --git a/pykt/src/oligo.rs b/pykmertools/src/oligo.rs similarity index 100% rename from pykt/src/oligo.rs rename to pykmertools/src/oligo.rs From a3d94cfa4270f084ca01b271e1fd29061db23bfa Mon Sep 17 00:00:00 2001 From: Anuradha Wickramarachchi Date: Sun, 25 Aug 2024 18:59:11 +0930 Subject: [PATCH 4/5] FIX: patch tarpaulin_include https://github.com/RustScan/RustScan/pull/647/files --- composition/Cargo.toml | 3 +++ kmertools/Cargo.toml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/composition/Cargo.toml b/composition/Cargo.toml index 3695521..d894d59 100644 --- a/composition/Cargo.toml +++ b/composition/Cargo.toml @@ -13,3 +13,6 @@ rayon = "1.10.0" [lib] doctest = false + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin_include)"] } diff --git a/kmertools/Cargo.toml b/kmertools/Cargo.toml index 42e63d7..29f5c7f 100644 --- a/kmertools/Cargo.toml +++ b/kmertools/Cargo.toml @@ -12,3 +12,6 @@ coverage = { path = "../coverage" } counter = { path = "../counter" } misc = { path = "../misc" } ktio = { path = "../ktio" } + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin_include)"] } From 01e0f8fbfc585fde2a6aa2c273c9915d2658a56a Mon Sep 17 00:00:00 2001 From: Anuradha Wickramarachchi Date: Mon, 26 Aug 2024 18:01:43 +0930 Subject: [PATCH 5/5] DEV: tarpaulin config, updated docs, cov(WIP) --- pykmertools/src/cgr.rs | 1 - pykmertools/src/cov.rs | 13 +++++++++++++ pykmertools/src/lib.rs | 8 +++++++- tarpaulin.toml | 2 ++ 4 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 pykmertools/src/cov.rs create mode 100644 tarpaulin.toml diff --git a/pykmertools/src/cgr.rs b/pykmertools/src/cgr.rs index ab60171..cc1d8d4 100644 --- a/pykmertools/src/cgr.rs +++ b/pykmertools/src/cgr.rs @@ -79,7 +79,6 @@ impl CgrComputer { /// Generate the cgrs /// Attributes: /// seq (list[str]): list of sequences - /// norm (bool): enable normalisation by counts #[pyo3(signature = (seqs))] fn vectorise_batch(&self, seqs: Vec) -> PyResult>> { seqs.into_par_iter() diff --git a/pykmertools/src/cov.rs b/pykmertools/src/cov.rs new file mode 100644 index 0000000..666bbf0 --- /dev/null +++ b/pykmertools/src/cov.rs @@ -0,0 +1,13 @@ +use pyo3::prelude::*; + +#[pyclass] +pub struct CovComputer {} + +#[pymethods] +impl CovComputer { + #[new] + #[pyo3(signature = ())] + fn new() -> Self { + Self {} + } +} diff --git a/pykmertools/src/lib.rs b/pykmertools/src/lib.rs index 70edf93..5e718e9 100644 --- a/pykmertools/src/lib.rs +++ b/pykmertools/src/lib.rs @@ -1,10 +1,16 @@ mod cgr; +mod cov; mod oligo; use cgr::CgrComputer; use oligo::OligoComputer; use pyo3::prelude::*; -/// A Python module implemented in Rust. +/// Pykmertools: kmertools python wrapper +/// Modules: +/// OligoComputer - computing oligonucleotide frequency vectors +/// from DNA sequences +/// CgrComputer - computing chaos game representations +/// for DNA sequences #[pymodule] fn pykmertools(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; diff --git a/tarpaulin.toml b/tarpaulin.toml new file mode 100644 index 0000000..5b16f57 --- /dev/null +++ b/tarpaulin.toml @@ -0,0 +1,2 @@ +[tarpaulin] +exclude = ["pykmertools"]