Skip to content

Commit

Permalink
vcf/lazy/record/genotypes: Remove checking for empty values
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Sep 13, 2023
1 parent c91dcf2 commit 652f596
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 57 deletions.
16 changes: 15 additions & 1 deletion noodles-vcf/src/lazy/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,22 @@ impl Record {

/// Returns the genotypes.
pub fn genotypes(&self) -> Genotypes<'_> {
use crate::record::FIELD_DELIMITER;

let buf = &self.buf[self.bounds.genotypes_range()];
Genotypes::new(buf)

let is_missing = || {
buf.split(FIELD_DELIMITER)
.next()
.map(|s| s == MISSING)
.unwrap_or_default()
};

if buf.is_empty() || is_missing() {
Genotypes::new("")
} else {
Genotypes::new(buf)
}
}
}

Expand Down
125 changes: 69 additions & 56 deletions noodles-vcf/src/lazy/record/genotypes.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
mod sample;

use std::{io, iter};
use std::iter;

pub use self::sample::Sample;
use crate::record::{FIELD_DELIMITER, MISSING_FIELD};
use crate::record::FIELD_DELIMITER;

/// Raw VCF record genotypes.
#[derive(Debug, Eq, PartialEq)]
Expand All @@ -16,49 +16,33 @@ impl<'a> Genotypes<'a> {

/// Returns whether there may be any genotypes.
pub fn is_empty(&self) -> bool {
let is_missing = self
.0
.split(FIELD_DELIMITER)
.next()
.map(|s| s == MISSING_FIELD)
.unwrap_or_default();

self.0.is_empty() || is_missing
self.0.is_empty()
}

/// Returns an iterator over keys.
pub fn keys(&self) -> io::Result<Box<dyn Iterator<Item = &str> + '_>> {
const DELIMITER: char = ':';

if self.is_empty() {
return Ok(Box::new(iter::empty()));
}

let (raw_format, _) = self
.0
.split_once(FIELD_DELIMITER)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing field separator"))?;

Ok(Box::new(raw_format.split(DELIMITER)))
pub fn keys(&self) -> impl Iterator<Item = &str> + '_ {
let (mut src, _) = self.0.split_once(FIELD_DELIMITER).unwrap_or_default();

iter::from_fn(move || {
if src.is_empty() {
None
} else {
Some(parse_key(&mut src))
}
})
}

/// Returns an iterator over samples.
pub fn samples(&self) -> io::Result<Box<dyn Iterator<Item = Option<Sample<'_>>> + '_>> {
if self.is_empty() {
return Ok(Box::new(iter::empty()));
}

let (_, raw_samples) = self
.0
.split_once(FIELD_DELIMITER)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "missing field separator"))?;

Ok(Box::new(raw_samples.split(FIELD_DELIMITER).map(
|s| match s {
"." => None,
_ => Some(Sample::new(s)),
},
)))
pub fn samples(&self) -> impl Iterator<Item = Option<Sample<'_>>> + '_ {
let (_, mut src) = self.0.split_once(FIELD_DELIMITER).unwrap_or_default();

iter::from_fn(move || {
if src.is_empty() {
None
} else {
Some(parse_sample(&mut src))
}
})
}
}

Expand All @@ -68,46 +52,75 @@ impl<'a> AsRef<str> for Genotypes<'a> {
}
}

fn parse_key<'a>(src: &mut &'a str) -> &'a str {
const DELIMITER: u8 = b':';

match src.as_bytes().iter().position(|&b| b == DELIMITER) {
Some(i) => {
let (buf, rest) = src.split_at(i);
*src = &rest[1..];
buf
}
None => {
let (buf, rest) = src.split_at(src.len());
*src = rest;
buf
}
}
}

fn parse_sample<'a>(src: &mut &'a str) -> Option<Sample<'a>> {
const DELIMITER: u8 = b'\t';
const MISSING: &str = ".";

let buf = match src.as_bytes().iter().position(|&b| b == DELIMITER) {
Some(i) => {
let (buf, rest) = src.split_at(i);
*src = &rest[1..];
buf
}
None => {
let (buf, rest) = src.split_at(src.len());
*src = rest;
buf
}
};

match buf {
MISSING => None,
_ => Some(Sample::new(buf)),
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_is_empty() {
assert!(Genotypes::new("").is_empty());
assert!(Genotypes::new(".\t.").is_empty());
assert!(!Genotypes::new("GT:GQ\t0|0:13").is_empty());
}

#[test]
fn test_keys() -> io::Result<()> {
fn test_keys() {
let genotypes = Genotypes::new("");
assert!(genotypes.keys()?.next().is_none());

let genotypes = Genotypes::new(".\t.");
assert!(genotypes.keys()?.next().is_none());
assert!(genotypes.keys().next().is_none());

let genotypes = Genotypes::new("GT:GQ\t0|0:13");
let actual: Vec<_> = genotypes.keys()?.collect();
let actual: Vec<_> = genotypes.keys().collect();
let expected = ["GT", "GQ"];
assert_eq!(actual, expected);

Ok(())
}

#[test]
fn test_samples() -> io::Result<()> {
fn test_samples() {
let genotypes = Genotypes::new("");
assert!(genotypes.samples()?.next().is_none());

let genotypes = Genotypes::new(".\t.");
assert!(genotypes.samples()?.next().is_none());
assert!(genotypes.samples().next().is_none());

let genotypes = Genotypes::new("GT:GQ\t0|0:13\t.");
let actual: Vec<_> = genotypes.samples()?.collect();
let actual: Vec<_> = genotypes.samples().collect();
let expected = [Some(Sample::new("0|0:13")), None];
assert_eq!(actual, expected);

Ok(())
}
}

0 comments on commit 652f596

Please sign in to comment.