Skip to content

Commit

Permalink
bump noodles version and break stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
brentp committed Oct 9, 2024
1 parent 562ae6e commit ffddd2b
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 21 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ smartstring = { version = "1.0.1", optional = true }
smol_str = { version = "0.2.0", optional = true }
compact_str = { version = "0.7.0", optional = true }
kstring = { version = "2.0.0", optional = true }
noodles = { version = "0.52.0" }
noodles = { version = "0.83.0" }
flate2 = "1.0.26"
clap = { version = "4.2.7", features = ['derive'] }
env_logger = "0.10.0"
Expand Down Expand Up @@ -47,7 +47,7 @@ dyn_positioned = []
[dev-dependencies]
criterion = { version = "0.4", features = ["html_reports"] }
clap = { version = "4.3.0", features = ["derive"] }
mlua = {version = "0.9.1", features=["luau", "send", "parking_lot"]}
mlua = {version = "0.9.9", features=["luau", "send", "parking_lot"]}

[[bench]]
name = "random_intervals"
Expand Down
1 change: 0 additions & 1 deletion src/sniff.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use flate2::read::GzDecoder;
use noodles::bam::reader;
use std::io::{BufRead, Read, Seek};
use std::path::Path;

Expand Down
116 changes: 98 additions & 18 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ use crate::position::Position;
use crate::report::{Report, ReportFragment};
use crate::sniff::Compression;
use crate::sniff::FileFormat;
use noodles::bam;
use noodles::bcf;
use noodles::bgzf;
use noodles::sam;
use noodles::vcf::{self, Header, Record};
use std::io::Write;
use std::result::Result;
Expand Down Expand Up @@ -52,15 +56,21 @@ pub trait ColumnReporter {
#[derive(Debug)]
pub enum FormatConversionError {
IncompatibleFormats(FileFormat, FileFormat, String),
UnsupportedFormat(FileFormat),
}

pub enum InputHeader {
Vcf(vcf::Header),
Sam(sam::Header),
None,
}

pub struct Writer {
in_fmt: FileFormat,
out_fmt: FileFormat,
compression: Compression,
writer: Box<dyn Write>,
vcf_writer: Option<vcf::Writer<Box<dyn Write>>>,
header: Option<vcf::Header>,
writer: GenomicWriter,
header: Option<Header>,
}

impl Writer {
Expand All @@ -69,6 +79,7 @@ impl Writer {
out_fmt: Option<FileFormat>,
compression: Compression,
writer: Box<dyn Write>,
input_header: InputHeader,
) -> Result<Self, FormatConversionError> {
let out_fmt = match out_fmt {
Some(f) => f,
Expand All @@ -79,30 +90,46 @@ impl Writer {
},
};

let header = match in_fmt {
FileFormat::VCF => Some(vcf::Header::from_reader(reader)?),
_ => None,
let header = match input_header {
InputHeader::Vcf(h) => Some(h),
InputHeader::Sam(_) => None, // We'll need to convert SAM header to VCF header if needed
InputHeader::None => None,
};

let vcf_writer = if out_fmt == FileFormat::VCF {
Some(vcf::Writer::new(writer.clone()))
} else {
None
let genomic_writer = match out_fmt {
FileFormat::VCF => GenomicWriter::Vcf(vcf::Writer::new(writer)),
FileFormat::BCF => GenomicWriter::Bcf(bcf::Writer::new(writer)),
FileFormat::BAM => GenomicWriter::Bam(bam::Writer::new(writer)),
FileFormat::BED => GenomicWriter::Bed(writer),
// Handle other formats
_ => return Err(FormatConversionError::UnsupportedFormat(out_fmt)),
};

Ok(Self {
in_fmt: in_fmt.clone(),
out_fmt,
compression,
writer,
vcf_writer,
writer: genomic_writer,
header,
})
}

pub fn write_vcf_header(&mut self, header: &Header) -> Result<(), std::io::Error> {
if let Some(vcf_writer) = &mut self.vcf_writer {
vcf_writer.write_header(header)?;
match &mut self.writer {
GenomicWriter::Vcf(vcf_writer) => {
vcf_writer.write_header(header)?;
}
GenomicWriter::Bcf(bcf_writer) => {
bcf_writer.write_header(header)?;
}
_ => {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Cannot write VCF header for non-VCF/BCF formats",
));
}
}
self.header = Some(header.clone());
Ok(())
}

Expand All @@ -113,10 +140,15 @@ impl Writer {
) -> Result<(), std::io::Error> {
match self.out_fmt {
FileFormat::VCF => {
let vcf_writer = self
.vcf_writer
.as_mut()
.expect("VCF writer not initialized");
let vcf_writer = match &mut self.writer {
GenomicWriter::Vcf(writer) => writer,
_ => {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Expected VCF writer, but found a different format",
))
}
};

for fragment in report {
// Extract the VCF record from the position using matches!
Expand Down Expand Up @@ -235,3 +267,51 @@ impl Writer {
}
}
}

pub enum GenomicWriter {
Vcf(vcf::Writer<Box<dyn Write>>),
Bcf(bcf::Writer<bgzf::Writer<Box<dyn Write>>>),
Bam(bam::Writer<bgzf::Writer<Box<dyn Write>>>),
Bed(Box<dyn Write>),
Gff(gff::Writer<Box<dyn Write>>),
// Add other formats as needed
}

impl GenomicWriterTrait for GenomicWriter {
fn write_header(&mut self, header: &Header) -> Result<(), std::io::Error> {
match self {
GenomicWriter::Vcf(writer) => writer.write_header(header),
GenomicWriter::Bcf(writer) => writer.write_header(header),
GenomicWriter::Bam(writer) => writer.write_header(header),
GenomicWriter::Bed(_) => Ok(()), // BED doesn't have a header
GenomicWriter::Gff(writer) => writer.write_header(header),
// Handle other formats
}
}

fn write_record(&mut self, record: &Record) -> Result<(), std::io::Error> {
match self {
GenomicWriter::Vcf(writer) => writer.write_record(record),
GenomicWriter::Bcf(writer) => writer.write_record(record),
GenomicWriter::Bam(writer) => writer.write_record(record),
GenomicWriter::Bed(writer) => {
// Implement BED record writing
writeln!(
writer,
"{}\t{}\t{}",
record.chrom(),
record.start(),
record.end()
)
}
GenomicWriter::Gff(writer) => writer.write_record(record),
// Handle other formats
}
}
}

pub trait GenomicWriterTrait {
fn write_header(&mut self, header: &Header) -> Result<(), std::io::Error>;
fn write_record(&mut self, record: &Record) -> Result<(), std::io::Error>;
// Add other common methods as needed
}

0 comments on commit ffddd2b

Please sign in to comment.