Skip to content

Commit

Permalink
vcf/variant/record_buf/samples/keys: Simplify keys to a set of strings
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Mar 25, 2024
1 parent 998297d commit 72cb2e6
Show file tree
Hide file tree
Showing 16 changed files with 123 additions and 260 deletions.
23 changes: 10 additions & 13 deletions noodles-bcf/src/async/io/reader/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,12 @@ mod tests {
AlternateBases, Filters, Info, Samples,
},
record_buf::{
samples::{
sample::{
value::{
genotype::{allele::Phasing, Allele},
Genotype,
},
Value as GenotypeFieldValue,
samples::sample::{
value::{
genotype::{allele::Phasing, Allele},
Genotype,
},
Keys,
Value as GenotypeFieldValue,
},
Samples as VcfGenotypes,
},
Expand Down Expand Up @@ -128,12 +125,10 @@ mod tests {

let samples = record.samples()?;

let column_names: Vec<_> = samples
let keys = samples
.column_names(&header)
.map(|result| result.map(String::from))
.collect::<io::Result<_>>()?;
let keys = Keys::try_from(column_names)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

let values = samples
.iter()
Expand All @@ -150,13 +145,15 @@ mod tests {
let actual = VcfGenotypes::new(keys, values);

let expected = VcfGenotypes::new(
Keys::try_from(vec![
[
String::from(samples::keys::key::GENOTYPE),
String::from(samples::keys::key::CONDITIONAL_GENOTYPE_QUALITY),
String::from(samples::keys::key::READ_DEPTH),
String::from(samples::keys::key::READ_DEPTHS),
String::from(samples::keys::key::ROUNDED_GENOTYPE_LIKELIHOODS),
])?,
]
.into_iter()
.collect(),
vec![
vec![
Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![
Expand Down
23 changes: 10 additions & 13 deletions noodles-bcf/src/io/reader/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,12 @@ pub(crate) mod tests {
},
record_buf::{
info::field::Value as InfoFieldValue,
samples::{
sample::{
value::{
genotype::{allele::Phasing, Allele},
Array, Genotype,
},
Value as GenotypeFieldValue,
samples::sample::{
value::{
genotype::{allele::Phasing, Allele},
Array, Genotype,
},
Keys,
Value as GenotypeFieldValue,
},
Samples as VcfGenotypes,
},
Expand Down Expand Up @@ -198,12 +195,10 @@ pub(crate) mod tests {

let samples = record.samples()?;

let column_names: Vec<_> = samples
let keys = samples
.column_names(&header)
.map(|result| result.map(String::from))
.collect::<io::Result<_>>()?;
let keys = Keys::try_from(column_names)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

let values = samples
.iter()
Expand All @@ -220,13 +215,15 @@ pub(crate) mod tests {
let actual = VcfGenotypes::new(keys, values);

let expected = VcfGenotypes::new(
Keys::try_from(vec![
[
String::from(samples::keys::key::GENOTYPE),
String::from(samples::keys::key::CONDITIONAL_GENOTYPE_QUALITY),
String::from(samples::keys::key::READ_DEPTH),
String::from(samples::keys::key::READ_DEPTHS),
String::from(samples::keys::key::ROUNDED_GENOTYPE_LIKELIHOODS),
])?,
]
.into_iter()
.collect(),
vec![
vec![
Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![
Expand Down
12 changes: 2 additions & 10 deletions noodles-bcf/src/record/codec/decoder/samples.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@ mod values;

use std::{error, fmt};

use noodles_vcf::{
self as vcf,
variant::record_buf::{samples::Keys, Samples},
};
use noodles_vcf::{self as vcf, variant::record_buf::Samples};

use self::{
key::read_key,
Expand Down Expand Up @@ -40,25 +37,21 @@ pub fn read_samples(
}
}

let keys = Keys::try_from(keys).map_err(DecodeError::InvalidKeys)?;

Ok(Samples::new(keys, samples))
Ok(Samples::new(keys.into_iter().collect(), samples))
}

#[allow(clippy::enum_variant_names)]
#[derive(Debug, Eq, PartialEq)]
pub enum DecodeError {
InvalidKey(key::DecodeError),
InvalidValues(values::DecodeError),
InvalidKeys(vcf::variant::record_buf::samples::keys::TryFromKeyVectorError),
}

impl error::Error for DecodeError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self {
Self::InvalidKey(e) => Some(e),
Self::InvalidValues(e) => Some(e),
Self::InvalidKeys(e) => Some(e),
}
}
}
Expand All @@ -68,7 +61,6 @@ impl fmt::Display for DecodeError {
match self {
Self::InvalidKey(_) => write!(f, "invalid key"),
Self::InvalidValues(_) => write!(f, "invalid values"),
Self::InvalidKeys(_) => write!(f, "invalid keys"),
}
}
}
6 changes: 4 additions & 2 deletions noodles-bcf/src/record/codec/encoder/samples.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ mod tests {
let string_maps = StringMaps::try_from(&header)?;

let genotypes = vcf::variant::record_buf::Samples::new(
vcf::variant::record_buf::samples::Keys::try_from(vec![
[
String::from(key::CONDITIONAL_GENOTYPE_QUALITY),
String::from(key::READ_DEPTH),
])?,
]
.into_iter()
.collect(),
vec![
vec![Some(Value::from(13)), Some(Value::from(5))],
vec![Some(Value::from(8))],
Expand Down
5 changes: 3 additions & 2 deletions noodles-vcf/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@

* IDs => set of strings,
* reference bases => string,
* alternate bases => list of strings, and
* filters => set of strings.
* alternate bases => list of strings,
* filters => set of strings, and
* samples keys => set of strings.

* vcf/variant/record_buf/builder: Remove validation on build
(`Builder::build`).
Expand Down
21 changes: 10 additions & 11 deletions noodles-vcf/src/io/reader/record_buf/samples.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub(super) fn parse_samples(
mut s: &str,
genotypes: &mut Samples,
) -> Result<(), ParseError> {
genotypes.keys.clear();
genotypes.keys.as_mut().clear();

let sample_count = header.sample_names().len();

Expand Down Expand Up @@ -84,15 +84,12 @@ mod tests {
fn test_parse_samples() -> Result<(), Box<dyn std::error::Error>> {
use crate::variant::{
record::samples::keys::key,
record_buf::samples::{
sample::{
value::{
genotype::{allele::Phasing, Allele},
Genotype,
},
Value,
record_buf::samples::sample::{
value::{
genotype::{allele::Phasing, Allele},
Genotype,
},
Keys,
Value,
},
};

Expand All @@ -105,7 +102,7 @@ mod tests {
let header = Header::builder().add_sample_name("sample0").build();
parse_samples(&header, "GT\t0|0", &mut genotypes)?;
let expected = Samples::new(
Keys::try_from(vec![String::from(key::GENOTYPE)])?,
[String::from(key::GENOTYPE)].into_iter().collect(),
vec![vec![Some(Value::Genotype(Genotype::try_from(vec![
Allele::new(Some(0), Phasing::Phased),
Allele::new(Some(0), Phasing::Phased),
Expand All @@ -119,7 +116,9 @@ mod tests {
.build();
parse_samples(&header, "GQ\t8\t13", &mut genotypes)?;
let expected = Samples::new(
Keys::try_from(vec![String::from(key::CONDITIONAL_GENOTYPE_QUALITY)])?,
[String::from(key::CONDITIONAL_GENOTYPE_QUALITY)]
.into_iter()
.collect(),
vec![vec![Some(Value::from(8))], vec![Some(Value::from(13))]],
);
assert_eq!(genotypes, expected);
Expand Down
28 changes: 16 additions & 12 deletions noodles-vcf/src/io/reader/record_buf/samples/keys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub(super) fn parse_keys(header: &Header, s: &str, keys: &mut Keys) -> Result<()
gt_position = Some(i);
}

if let Some(key) = keys.replace(key) {
if let Some(key) = keys.as_mut().replace(key) {
return Err(ParseError::DuplicateKey(key));
}
}
Expand All @@ -75,38 +75,42 @@ mod tests {
let header = Header::default();
let mut keys = Keys::default();

keys.clear();
keys.as_mut().clear();
parse_keys(&header, ".", &mut keys)?;
assert_eq!(keys, Keys::default());

keys.clear();
keys.as_mut().clear();
parse_keys(&header, "GT", &mut keys)?;
let expected = Keys::try_from(vec![String::from(key::GENOTYPE)])?;
let expected = [String::from(key::GENOTYPE)].into_iter().collect();
assert_eq!(keys, expected);

keys.clear();
keys.as_mut().clear();
parse_keys(&header, "GQ", &mut keys)?;
let expected = Keys::try_from(vec![String::from(key::CONDITIONAL_GENOTYPE_QUALITY)])?;
let expected = [String::from(key::CONDITIONAL_GENOTYPE_QUALITY)]
.into_iter()
.collect();
assert_eq!(keys, expected);

keys.clear();
keys.as_mut().clear();
parse_keys(&header, "GT:GQ", &mut keys)?;
let expected = Keys::try_from(vec![
let expected = [
String::from(key::GENOTYPE),
String::from(key::CONDITIONAL_GENOTYPE_QUALITY),
])?;
]
.into_iter()
.collect();
assert_eq!(keys, expected);

keys.clear();
keys.as_mut().clear();
assert_eq!(parse_keys(&header, "", &mut keys), Err(ParseError::Empty));

keys.clear();
keys.as_mut().clear();
assert_eq!(
parse_keys(&header, "GQ:GT", &mut keys),
Err(ParseError::InvalidGenotypeKeyPosition)
);

keys.clear();
keys.as_mut().clear();
assert_eq!(
parse_keys(&header, "GT:GT", &mut keys),
Err(ParseError::DuplicateKey(String::from(key::GENOTYPE)))
Expand Down
28 changes: 17 additions & 11 deletions noodles-vcf/src/io/reader/record_buf/samples/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pub(super) fn parse_values(

let mut raw_values = s.split(DELIMITER);

for (key, raw_value) in keys.iter().zip(&mut raw_values) {
for (key, raw_value) in keys.as_ref().iter().zip(&mut raw_values) {
let value = match raw_value {
MISSING => None,
_ => {
Expand Down Expand Up @@ -112,12 +112,12 @@ mod tests {
let header = Header::default();
let mut values = Vec::new();

let keys = Keys::try_from(vec![String::from(key::GENOTYPE)])?;
let keys = [String::from(key::GENOTYPE)].into_iter().collect();
values.clear();
parse_values(&header, &keys, ".", &mut values)?;
assert!(values.is_empty());

let keys = Keys::try_from(vec![String::from(key::GENOTYPE)])?;
let keys = [String::from(key::GENOTYPE)].into_iter().collect();
values.clear();
parse_values(&header, &keys, "0|0", &mut values)?;
assert_eq!(
Expand All @@ -128,10 +128,12 @@ mod tests {
])?))]
);

let keys = Keys::try_from(vec![
let keys = [
String::from(key::GENOTYPE),
String::from(key::CONDITIONAL_GENOTYPE_QUALITY),
])?;
]
.into_iter()
.collect();
values.clear();
parse_values(&header, &keys, "0|0:13", &mut values)?;
assert_eq!(
Expand All @@ -145,10 +147,12 @@ mod tests {
]
);

let keys = Keys::try_from(vec![
let keys = [
String::from(key::GENOTYPE),
String::from(key::CONDITIONAL_GENOTYPE_QUALITY),
])?;
]
.into_iter()
.collect();
values.clear();
parse_values(&header, &keys, "0|0:.", &mut values)?;
assert_eq!(
Expand All @@ -162,10 +166,12 @@ mod tests {
]
);

let keys = Keys::try_from(vec![
let keys = [
String::from(key::GENOTYPE),
String::from(key::CONDITIONAL_GENOTYPE_QUALITY),
])?;
]
.into_iter()
.collect();
values.clear();
parse_values(&header, &keys, "0|0", &mut values)?;
assert_eq!(
Expand All @@ -176,14 +182,14 @@ mod tests {
])?))]
);

let keys = Keys::try_from(vec![String::from(key::GENOTYPE)])?;
let keys = [String::from(key::GENOTYPE)].into_iter().collect();
values.clear();
assert_eq!(
parse_values(&header, &keys, "", &mut values),
Err(ParseError::Empty)
);

let keys = Keys::try_from(vec![String::from(key::GENOTYPE)])?;
let keys = [String::from(key::GENOTYPE)].into_iter().collect();
values.clear();
assert_eq!(
parse_values(&header, &keys, "0|0:13", &mut values),
Expand Down
Loading

0 comments on commit 72cb2e6

Please sign in to comment.