Skip to content

Commit

Permalink
Merge pull request #7 from eric9n/main
Browse files Browse the repository at this point in the history
fna
  • Loading branch information
eric9n authored Jan 11, 2024
2 parents b3e4340 + 68f1ebb commit e1ccdb8
Show file tree
Hide file tree
Showing 8 changed files with 387 additions and 112 deletions.
166 changes: 119 additions & 47 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,72 +5,144 @@ on:
branches:
- release

# This is the example from the readme.
# On each push to the `release` branch it will create or update a GitHub release, build your app, and upload the artifacts to the release.
env:
CARGO_TERM_COLOR: always
BINARY_PREFIX: ncbi

jobs:
build-and-release:
permissions:
contents: write
build-cross:
runs-on: ubuntu-latest
env:
RUST_BACKTRACE: full
strategy:
fail-fast: false
matrix:
platform: [macos-latest, ubuntu-20.04, windows-latest]
target:
- i686-unknown-linux-musl
- x86_64-pc-windows-gnu
- x86_64-unknown-linux-gnu
- x86_64-unknown-linux-musl
- armv7-unknown-linux-musleabihf
- armv7-unknown-linux-gnueabihf
- arm-unknown-linux-gnueabi
- arm-unknown-linux-gnueabihf
- arm-unknown-linux-musleabi
- arm-unknown-linux-musleabihf
- aarch64-unknown-linux-gnu
- aarch64-unknown-linux-musl
- mips-unknown-linux-musl
- mipsel-unknown-linux-musl

runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build --release
- uses: actions/checkout@v3

# Set up the GitHub CLI
- name: Install GitHub CLI
- name: Install Rust
run: |
brew install gh
if: matrix.platform == 'macos-latest'
rustup set profile minimal
rustup toolchain install stable
rustup default stable
rustup override set stable
rustup target add --toolchain stable ${{ matrix.target }}
- name: Install GitHub CLI
run: |
sudo apt install -y gh
if: matrix.platform == 'ubuntu-20.04'
- name: Install cross
run: cargo install cross

- name: Install GitHub CLI
- name: Build ${{ matrix.target }}
timeout-minutes: 120
run: |
choco install gh
if: matrix.platform == 'windows-latest'
compile_target=${{ matrix.target }}
if [[ "$compile_target" == *"-linux-"* || "$compile_target" == *"-apple-"* ]]; then
compile_features="-f local-redir -f local-tun"
fi
if [[ "$compile_target" == "mips-"* || "$compile_target" == "mipsel-"* || "$compile_target" == "mips64-"* || "$compile_target" == "mips64el-"* ]]; then
sudo apt-get update -y && sudo apt-get install -y upx;
if [[ "$?" == "0" ]]; then
compile_compress="-u"
fi
fi
cd build
./build-release -t ${{ matrix.target }} $compile_features $compile_compress
- name: Upload Github Assets
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
files: build/release/*
prerelease: ${{ contains(github.ref_name, '-') }}
tag_name: ${{ inputs.tag || github.ref_name }}

build-unix:
runs-on: ${{ matrix.os }}
env:
BUILD_EXTRA_FEATURES: "local-redir local-tun"
RUST_BACKTRACE: full
strategy:
fail-fast: false
matrix:
# os: [ubuntu-latest, macos-latest]
os: [macos-latest]
target:
- x86_64-apple-darwin
- aarch64-apple-darwin
steps:
- uses: actions/checkout@v3

# Log in to the GitHub CLI
- name: Login to GitHub CLI
run: echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
- name: Install GNU tar
if: runner.os == 'macOS'
run: |
brew install gnu-tar
# echo "::add-path::/usr/local/opt/gnu-tar/libexec/gnubin"
echo "/usr/local/opt/gnu-tar/libexec/gnubin" >> $GITHUB_PATH
# Create a release
- name: Create Release
id: create_release
- name: Install Rust
run: |
gh release create ${{ github.ref_name }} \
--title "Release ${{ github.ref_name }}" \
--notes "Release notes for ${{ github.ref_name }}" \
--draft
rustup set profile minimal
rustup toolchain install stable
rustup default stable
rustup override set stable
rustup target add --toolchain stable ${{ matrix.target }}
- name: Build release
shell: bash
run: |
./build/build-host-release -t ${{ matrix.target }}
- name: Upload Github Assets
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
files: build/release/*
prerelease: ${{ contains(github.ref_name, '-') }}
tag_name: ${{ inputs.tag || github.ref_name }}

- name: Rename and prepare binaries for upload
build-windows:
runs-on: windows-latest
env:
RUSTFLAGS: "-C target-feature=+crt-static"
RUST_BACKTRACE: full
steps:
- uses: actions/checkout@v3

- name: Install Rust
run: |
EXT=""
if [ "${{ runner.os }}" = "Windows" ]; then
EXT=".exe"
fi
for file in "${BINARY_PREFIX}"*; do
mv "$file" "${file}-${RUNNER_OS}-${RUNNER_ARCH}${EXT}"
done
echo "Renamed binaries for upload"
shell: bash
rustup set profile minimal
rustup toolchain install stable
rustup default stable
rustup override set stable
- name: Build release
run: |
pwsh ./build/build-host-release.ps1
- name: Upload Artifacts
uses: actions/upload-artifact@v3
- name: Upload Github Assets
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
name: ${{ env.BINARY_PREFIX }}-${{ runner.os }}-${{ runner.arch }}
path: |
${BINARY_PREFIX}*-${RUNNER_OS}-${RUNNER_ARCH}${EXT}
files: build/release/*
prerelease: ${{ contains(github.ref_name, '-') }}
tag_name: ${{ inputs.tag || github.ref_name }}
1 change: 1 addition & 0 deletions ncbi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ lazy_static = "1.4"
log = "0.4"
env_logger = "0.10.1"
md-5 = "0.10.6"
async-compression = "0.4.5"
96 changes: 96 additions & 0 deletions ncbi/src/fna.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
use async_compression::tokio::bufread::GzipDecoder;
use regex::Regex;
use std::collections::HashMap;
use tokio::fs::OpenOptions;
use tokio::{
fs::File,
io::{AsyncBufReadExt, AsyncWriteExt, BufReader, BufWriter},
};

use anyhow::Result;
use std::path::PathBuf;

pub async fn parse_assembly_fna(site: &str, data_dir: &PathBuf) -> Result<HashMap<String, String>> {
let mut gz_files: HashMap<String, String> = HashMap::new();
let file_name = format!("assembly_summary_{}.txt", site);
let file_path = data_dir.join(file_name);
let file = File::open(&file_path).await?;
let reader = BufReader::new(file);
let mut lines = reader.lines();

while let Some(line) = lines.next_line().await? {
if line.starts_with('#') {
continue;
}

let fields: Vec<&str> = line.split('\t').collect();
if fields.len() > 19 {
let (taxid, asm_level, ftp_path) = (fields[5], fields[11], fields[19]);

if !["Complete Genome", "Chromosome"].contains(&asm_level) || ftp_path == "na" {
continue;
}

let fna_file_name = format!(
"{}_genomic.fna.gz",
ftp_path.split('/').last().unwrap_or_default()
);
gz_files.insert(fna_file_name, taxid.into());
}
}
Ok(gz_files)
}

pub async fn write_to_fna(site: &str, data_dir: &PathBuf) -> Result<()> {
log::info!("write to fna...");

let gz_files = parse_assembly_fna(site, data_dir).await?;
let library_fna_path = data_dir.join(format!("library_{}.fna", &site));
let prelim_map_path = data_dir.join(format!("prelim_map_{}.txt", &site));

let mut fna_writer = BufWriter::new(
OpenOptions::new()
.create(true)
.write(true)
.open(&library_fna_path)
.await?,
);
let mut map_writer = BufWriter::new(
OpenOptions::new()
.create(true)
.write(true)
.open(&prelim_map_path)
.await?,
);

let re: Regex = Regex::new(r"^>(\S+)").unwrap();

for (gz_path, taxid) in gz_files {
let gz_file = data_dir.join(&site).join(gz_path);
let file = File::open(gz_file).await?;
let decompressor = GzipDecoder::new(BufReader::new(file));
let mut reader = BufReader::new(decompressor);

let mut line = String::new();
while reader.read_line(&mut line).await? != 0 {
if let Some(caps) = re.captures(&line) {
let seqid = &caps[1];
map_writer
.write_all(format!("{}\t{}\n", seqid, taxid).as_bytes())
.await?;
fna_writer
.write_all(format!(">kraken:taxid|{}|{}", taxid, &line[1..]).as_bytes())
.await?;
} else {
fna_writer.write_all(line.as_bytes()).await?;
}
line.clear();
}
}

fna_writer.flush().await?;
map_writer.flush().await?;

log::info!("write to fna finished");
Ok(())
}
1 change: 1 addition & 0 deletions ncbi/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod client;
pub mod down;
pub mod fna;
pub mod load;
pub mod md5sum;
pub mod meta;
Expand Down
Loading

0 comments on commit e1ccdb8

Please sign in to comment.