Skip to content

Commit

Permalink
fasta/fai: Replace csv with a simple line parser
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Apr 29, 2020
1 parent 9e3689d commit 3b964c4
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 82 deletions.
1 change: 0 additions & 1 deletion noodles-fasta/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@ license = "MIT"
edition = "2018"

[dependencies]
csv = "1.0.1"
63 changes: 53 additions & 10 deletions noodles-fasta/src/fai/reader.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,66 @@
use std::io::Read;
use std::io::{self, BufRead};

use super::Record;

pub struct Reader<R> {
reader: csv::Reader<R>,
inner: R,
line_buf: String,
}

impl<R> Reader<R>
where
R: Read,
R: BufRead,
{
pub fn new(inner: R) -> Self {
let reader = csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(false)
.from_reader(inner);
Self {
inner,
line_buf: String::new(),
}
}

pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
self.line_buf.clear();

Self { reader }
match self.inner.read_line(&mut self.line_buf) {
Ok(0) => Ok(0),
Ok(n) => {
self.line_buf.pop();

*record = self
.line_buf
.parse()
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

Ok(n)
}
Err(e) => Err(e),
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_read_record() -> io::Result<()> {
let data = b"\
sq0\t10946\t4\t80\t81
sq1\t17711\t10954\t80\t81
";

let mut reader = Reader::new(&data[..]);
let mut record = Record::default();

let bytes_read = reader.read_record(&mut record)?;
assert_eq!(bytes_read, 18);

let bytes_read = reader.read_record(&mut record)?;
assert_eq!(bytes_read, 22);

let bytes_read = reader.read_record(&mut record)?;
assert_eq!(bytes_read, 0);

pub fn records(&mut self) -> csv::StringRecordsIter<R> {
self.reader.records()
Ok(())
}
}
165 changes: 94 additions & 71 deletions noodles-fasta/src/fai/record.rs
Original file line number Diff line number Diff line change
@@ -1,103 +1,126 @@
use csv::StringRecord;

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Header {
Name,
Length,
Offset,
LineBases,
LineWidth,
}
mod field;

#[derive(Debug, Eq, PartialEq)]
pub enum Error {
Missing(Header),
Parse(Header, String),
}
use std::{error, fmt, str::FromStr};

pub type Result<T> = std::result::Result<T, Error>;
use self::field::Field;

pub struct Record(StringRecord);
const FIELD_DELIMITER: char = '\t';
const MAX_FIELDS: usize = 5;

#[derive(Debug, Default)]
pub struct Record {
name: String,
len: u64,
offset: u64,
line_bases: u64,
line_width: u64,
}

impl Record {
pub fn new(inner: StringRecord) -> Self {
Self(inner)
pub fn new(name: String, len: u64, offset: u64, line_bases: u64, line_width: u64) -> Self {
Self {
name,
len,
offset,
line_bases,
line_width,
}
}

pub fn name(&self) -> self::Result<&str> {
self.parse(Header::Name)
pub fn name(&self) -> &str {
&self.name
}

pub fn length(&self) -> self::Result<u64> {
self.parse_u64(Header::Length)
pub fn len(&self) -> u64 {
self.len
}

pub fn offset(&self) -> self::Result<u64> {
self.parse_u64(Header::Offset)
pub fn offset(&self) -> u64 {
self.offset
}

pub fn line_bases(&self) -> self::Result<u64> {
self.parse_u64(Header::LineBases)
pub fn line_bases(&self) -> u64 {
self.line_bases
}

pub fn line_width(&self) -> self::Result<u64> {
self.parse_u64(Header::LineWidth)
pub fn line_width(&self) -> u64 {
self.line_width
}
}

#[derive(Debug)]
pub enum ParseError {
Missing(Field),
Invalid(Field, std::num::ParseIntError),
}

fn parse(&self, header: Header) -> self::Result<&str> {
self.0
.get(header as usize)
.ok_or_else(|| Error::Missing(header))
impl error::Error for ParseError {}

impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Missing(field) => write!(f, "missing field: {:?}", field),
Self::Invalid(field, message) => write!(f, "invalid {:?} field: {}", field, message),
}
}
}

impl FromStr for Record {
type Err = ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut fields = s.splitn(MAX_FIELDS, FIELD_DELIMITER);

fn parse_u64(&self, header: Header) -> self::Result<u64> {
self.parse(header).and_then(|s| {
s.parse()
.map_err(|e| Error::Parse(header, format!("{}", e)))
let name = parse_string(&mut fields, Field::Name)?;
let len = parse_u64(&mut fields, Field::Length)?;
let offset = parse_u64(&mut fields, Field::Offset)?;
let line_bases = parse_u64(&mut fields, Field::LineBases)?;
let line_width = parse_u64(&mut fields, Field::LineWidth)?;

Ok(Record {
name,
len,
offset,
line_bases,
line_width,
})
}
}

fn parse_string<'a, I>(fields: &mut I, field: Field) -> Result<String, ParseError>
where
I: Iterator<Item = &'a str>,
{
fields
.next()
.ok_or_else(|| ParseError::Missing(field))
.map(|s| s.into())
}

fn parse_u64<'a, I>(fields: &mut I, field: Field) -> Result<u64, ParseError>
where
I: Iterator<Item = &'a str>,
{
fields
.next()
.ok_or_else(|| ParseError::Missing(field))
.and_then(|s| s.parse().map_err(|e| ParseError::Invalid(field, e)))
}

#[cfg(test)]
mod tests {
use super::*;

fn build_string_record() -> StringRecord {
StringRecord::from(vec!["chr1", "248956422", "112", "70", "71"])
}

#[test]
fn test_name() {
let r = build_string_record();
let record = Record::new(r);
assert_eq!(record.name(), Ok("chr1"));
}

#[test]
fn test_length() {
let r = build_string_record();
let record = Record::new(r);
assert_eq!(record.length(), Ok(248956422));
}
fn test_from_str() -> Result<(), ParseError> {
let record: Record = "sq0\t10946\t4\t80\t81".parse()?;

#[test]
fn test_offset() {
let r = build_string_record();
let record = Record::new(r);
assert_eq!(record.offset(), Ok(112));
}
assert_eq!(record.name(), "sq0");
assert_eq!(record.len(), 10946);
assert_eq!(record.offset(), 4);
assert_eq!(record.line_bases(), 80);
assert_eq!(record.line_width(), 81);

#[test]
fn test_line_bases() {
let r = build_string_record();
let record = Record::new(r);
assert_eq!(record.line_bases(), Ok(70));
}

#[test]
fn test_line_width() {
let r = build_string_record();
let record = Record::new(r);
assert_eq!(record.line_width(), Ok(71));
Ok(())
}
}
8 changes: 8 additions & 0 deletions noodles-fasta/src/fai/record/field.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#[derive(Clone, Copy, Debug)]
pub enum Field {
Name,
Length,
Offset,
LineBases,
LineWidth,
}

0 comments on commit 3b964c4

Please sign in to comment.