-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fasta/fai: Replace csv with a simple line parser
- Loading branch information
Showing
4 changed files
with
155 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,4 +6,3 @@ license = "MIT" | |
edition = "2018" | ||
|
||
[dependencies] | ||
csv = "1.0.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,66 @@ | ||
use std::io::Read; | ||
use std::io::{self, BufRead}; | ||
|
||
use super::Record; | ||
|
||
pub struct Reader<R> { | ||
reader: csv::Reader<R>, | ||
inner: R, | ||
line_buf: String, | ||
} | ||
|
||
impl<R> Reader<R> | ||
where | ||
R: Read, | ||
R: BufRead, | ||
{ | ||
pub fn new(inner: R) -> Self { | ||
let reader = csv::ReaderBuilder::new() | ||
.delimiter(b'\t') | ||
.has_headers(false) | ||
.from_reader(inner); | ||
Self { | ||
inner, | ||
line_buf: String::new(), | ||
} | ||
} | ||
|
||
pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> { | ||
self.line_buf.clear(); | ||
|
||
Self { reader } | ||
match self.inner.read_line(&mut self.line_buf) { | ||
Ok(0) => Ok(0), | ||
Ok(n) => { | ||
self.line_buf.pop(); | ||
|
||
*record = self | ||
.line_buf | ||
.parse() | ||
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; | ||
|
||
Ok(n) | ||
} | ||
Err(e) => Err(e), | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_read_record() -> io::Result<()> { | ||
let data = b"\ | ||
sq0\t10946\t4\t80\t81 | ||
sq1\t17711\t10954\t80\t81 | ||
"; | ||
|
||
let mut reader = Reader::new(&data[..]); | ||
let mut record = Record::default(); | ||
|
||
let bytes_read = reader.read_record(&mut record)?; | ||
assert_eq!(bytes_read, 18); | ||
|
||
let bytes_read = reader.read_record(&mut record)?; | ||
assert_eq!(bytes_read, 22); | ||
|
||
let bytes_read = reader.read_record(&mut record)?; | ||
assert_eq!(bytes_read, 0); | ||
|
||
pub fn records(&mut self) -> csv::StringRecordsIter<R> { | ||
self.reader.records() | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,103 +1,126 @@ | ||
use csv::StringRecord; | ||
|
||
#[derive(Clone, Copy, Debug, Eq, PartialEq)] | ||
pub enum Header { | ||
Name, | ||
Length, | ||
Offset, | ||
LineBases, | ||
LineWidth, | ||
} | ||
mod field; | ||
|
||
#[derive(Debug, Eq, PartialEq)] | ||
pub enum Error { | ||
Missing(Header), | ||
Parse(Header, String), | ||
} | ||
use std::{error, fmt, str::FromStr}; | ||
|
||
pub type Result<T> = std::result::Result<T, Error>; | ||
use self::field::Field; | ||
|
||
pub struct Record(StringRecord); | ||
const FIELD_DELIMITER: char = '\t'; | ||
const MAX_FIELDS: usize = 5; | ||
|
||
#[derive(Debug, Default)] | ||
pub struct Record { | ||
name: String, | ||
len: u64, | ||
offset: u64, | ||
line_bases: u64, | ||
line_width: u64, | ||
} | ||
|
||
impl Record { | ||
pub fn new(inner: StringRecord) -> Self { | ||
Self(inner) | ||
pub fn new(name: String, len: u64, offset: u64, line_bases: u64, line_width: u64) -> Self { | ||
Self { | ||
name, | ||
len, | ||
offset, | ||
line_bases, | ||
line_width, | ||
} | ||
} | ||
|
||
pub fn name(&self) -> self::Result<&str> { | ||
self.parse(Header::Name) | ||
pub fn name(&self) -> &str { | ||
&self.name | ||
} | ||
|
||
pub fn length(&self) -> self::Result<u64> { | ||
self.parse_u64(Header::Length) | ||
pub fn len(&self) -> u64 { | ||
self.len | ||
} | ||
|
||
pub fn offset(&self) -> self::Result<u64> { | ||
self.parse_u64(Header::Offset) | ||
pub fn offset(&self) -> u64 { | ||
self.offset | ||
} | ||
|
||
pub fn line_bases(&self) -> self::Result<u64> { | ||
self.parse_u64(Header::LineBases) | ||
pub fn line_bases(&self) -> u64 { | ||
self.line_bases | ||
} | ||
|
||
pub fn line_width(&self) -> self::Result<u64> { | ||
self.parse_u64(Header::LineWidth) | ||
pub fn line_width(&self) -> u64 { | ||
self.line_width | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
pub enum ParseError { | ||
Missing(Field), | ||
Invalid(Field, std::num::ParseIntError), | ||
} | ||
|
||
fn parse(&self, header: Header) -> self::Result<&str> { | ||
self.0 | ||
.get(header as usize) | ||
.ok_or_else(|| Error::Missing(header)) | ||
impl error::Error for ParseError {} | ||
|
||
impl fmt::Display for ParseError { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
Self::Missing(field) => write!(f, "missing field: {:?}", field), | ||
Self::Invalid(field, message) => write!(f, "invalid {:?} field: {}", field, message), | ||
} | ||
} | ||
} | ||
|
||
impl FromStr for Record { | ||
type Err = ParseError; | ||
|
||
fn from_str(s: &str) -> Result<Self, Self::Err> { | ||
let mut fields = s.splitn(MAX_FIELDS, FIELD_DELIMITER); | ||
|
||
fn parse_u64(&self, header: Header) -> self::Result<u64> { | ||
self.parse(header).and_then(|s| { | ||
s.parse() | ||
.map_err(|e| Error::Parse(header, format!("{}", e))) | ||
let name = parse_string(&mut fields, Field::Name)?; | ||
let len = parse_u64(&mut fields, Field::Length)?; | ||
let offset = parse_u64(&mut fields, Field::Offset)?; | ||
let line_bases = parse_u64(&mut fields, Field::LineBases)?; | ||
let line_width = parse_u64(&mut fields, Field::LineWidth)?; | ||
|
||
Ok(Record { | ||
name, | ||
len, | ||
offset, | ||
line_bases, | ||
line_width, | ||
}) | ||
} | ||
} | ||
|
||
fn parse_string<'a, I>(fields: &mut I, field: Field) -> Result<String, ParseError> | ||
where | ||
I: Iterator<Item = &'a str>, | ||
{ | ||
fields | ||
.next() | ||
.ok_or_else(|| ParseError::Missing(field)) | ||
.map(|s| s.into()) | ||
} | ||
|
||
fn parse_u64<'a, I>(fields: &mut I, field: Field) -> Result<u64, ParseError> | ||
where | ||
I: Iterator<Item = &'a str>, | ||
{ | ||
fields | ||
.next() | ||
.ok_or_else(|| ParseError::Missing(field)) | ||
.and_then(|s| s.parse().map_err(|e| ParseError::Invalid(field, e))) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
fn build_string_record() -> StringRecord { | ||
StringRecord::from(vec!["chr1", "248956422", "112", "70", "71"]) | ||
} | ||
|
||
#[test] | ||
fn test_name() { | ||
let r = build_string_record(); | ||
let record = Record::new(r); | ||
assert_eq!(record.name(), Ok("chr1")); | ||
} | ||
|
||
#[test] | ||
fn test_length() { | ||
let r = build_string_record(); | ||
let record = Record::new(r); | ||
assert_eq!(record.length(), Ok(248956422)); | ||
} | ||
fn test_from_str() -> Result<(), ParseError> { | ||
let record: Record = "sq0\t10946\t4\t80\t81".parse()?; | ||
|
||
#[test] | ||
fn test_offset() { | ||
let r = build_string_record(); | ||
let record = Record::new(r); | ||
assert_eq!(record.offset(), Ok(112)); | ||
} | ||
assert_eq!(record.name(), "sq0"); | ||
assert_eq!(record.len(), 10946); | ||
assert_eq!(record.offset(), 4); | ||
assert_eq!(record.line_bases(), 80); | ||
assert_eq!(record.line_width(), 81); | ||
|
||
#[test] | ||
fn test_line_bases() { | ||
let r = build_string_record(); | ||
let record = Record::new(r); | ||
assert_eq!(record.line_bases(), Ok(70)); | ||
} | ||
|
||
#[test] | ||
fn test_line_width() { | ||
let r = build_string_record(); | ||
let record = Record::new(r); | ||
assert_eq!(record.line_width(), Ok(71)); | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#[derive(Clone, Copy, Debug)] | ||
pub enum Field { | ||
Name, | ||
Length, | ||
Offset, | ||
LineBases, | ||
LineWidth, | ||
} |