Skip to content

Commit

Permalink
Merge pull request #1234 from nextstrain/fix/fasta-without-trailing-n…
Browse files Browse the repository at this point in the history
…ewline
  • Loading branch information
ivan-aksamentov authored Aug 9, 2023
2 parents 7432f82 + 97f67b8 commit a8e2a49
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 20 deletions.
70 changes: 52 additions & 18 deletions packages_rs/nextclade/src/io/concat.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::cmp::min;
/// concat.rs
///
/// Taken with modifications from
Expand All @@ -10,21 +11,32 @@
/// concatenation of the items' contents.
use std::io::{Read, Result};

pub fn concat<I>(iter: I) -> Concat<I>
pub struct Concat<I>
where
I: Iterator,
<I as Iterator>::Item: Read,
{
Concat::<I>::from(iter)
iter: I,
curr: Option<<I as Iterator>::Item>,
delimiter: Option<Vec<u8>>,
}

pub struct Concat<I>
impl<I> Concat<I>
where
I: Iterator,
<I as Iterator>::Item: Read,
{
iter: I,
curr: Option<<I as Iterator>::Item>,
/// Concatenate readers into a single reader
pub fn from(iter: I) -> Concat<I> {
Self::with_delimiter(iter, None)
}

/// Concatenate readers into a single reader, alternating them with the provided delimiter,
/// i.e. inserting this sequence of bytes between adjacent readers.
pub fn with_delimiter(mut iter: I, delimiter: Option<Vec<u8>>) -> Concat<I> {
let curr = iter.next();
Concat { iter, curr, delimiter }
}
}

impl<I> Concat<I>
Expand All @@ -42,18 +54,6 @@ where
}
}

impl<I> From<I> for Concat<I>
where
I: Iterator,
<I as Iterator>::Item: Read,
{
fn from(mut iter: I) -> Concat<I> {
let curr = iter.next();

Concat { iter, curr }
}
}

impl<I> Read for Concat<I>
where
I: Iterator,
Expand All @@ -70,7 +70,41 @@ where
} else {
// The current reader reached the end so we have to advance the iterator and try again.
self.curr = self.iter.next();
self.read(buf)

// Before moving to the next reader, insert delimiter, if requested
let n_bytes_inserted = if let Some(delimiter) = &self.delimiter {
let n_bytes_inserted = min(delimiter.len(), buf.len());
buf[..n_bytes_inserted].copy_from_slice(delimiter);
n_bytes_inserted
} else {
0
};

let n_bytes_read = self.read(&mut buf[n_bytes_inserted..])?;
Ok(n_bytes_read + n_bytes_inserted)
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use rstest::rstest;

#[rstest]
fn concatenates_readers_with_delimiter() {
// The idea is that readers (e.g. files) don't contain newline at the end, making line parsing incorrect on reader
// boundaries.
// It is expected that the newline delimiter will be inserted between readers, to compensate for that.

let r1: &[u8] = b"No\ntrailing\nnewline";
let r2: &[u8] = b"And\nneither\nhere";

let mut concat = Concat::with_delimiter([r1, r2].into_iter(), Some(b"\n".to_vec()));
let mut result = String::new();
concat.read_to_string(&mut result).unwrap();

assert_eq!(result, "No\ntrailing\nnewline\nAnd\nneither\nhere\n");
}
}
4 changes: 2 additions & 2 deletions packages_rs/nextclade/src/io/fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::alphabet::aa::from_aa_seq;
use crate::constants::REVERSE_COMPLEMENT_SUFFIX;
use crate::gene::gene_map::GeneMap;
use crate::io::compression::Decompressor;
use crate::io::concat::concat;
use crate::io::concat::Concat;
use crate::io::file::{create_file_or_stdout, open_file_or_stdin, open_stdin};
use crate::translate::translate_genes::CdsTranslation;
use crate::{make_error, make_internal_error};
Expand Down Expand Up @@ -86,7 +86,7 @@ impl<'a> FastaReader<'a> {
.map(|filepath| -> Result<Box<dyn BufRead + 'a>, Report> { open_file_or_stdin(&Some(filepath)) })
.collect::<Result<Vec<Box<dyn BufRead + 'a>>, Report>>()?;

let concat = concat(readers.into_iter());
let concat = Concat::with_delimiter(readers.into_iter(), Some(b"\n".to_vec()));
let concat_buf = BufReader::new(concat);

Ok(Self::new(Box::new(concat_buf)))
Expand Down

0 comments on commit a8e2a49

Please sign in to comment.