Skip to content

Commit

Permalink
Test generated header
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Sep 30, 2024
1 parent e5ae887 commit d590ebd
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 2 deletions.
44 changes: 44 additions & 0 deletions tests/test_vcf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import numpy as np
import pytest
import zarr
from bio2zarr import vcf2zarr
from cyvcf2 import VCF
from numpy.testing import assert_array_equal

Expand Down Expand Up @@ -269,6 +271,48 @@ def test_write_vcf__header_flags(tmp_path):
assert_vcfs_close(original, output)


def test_write_vcf__generate_header(tmp_path):
original = pathlib.Path("tests/data/vcf") / "sample.vcf.gz"
# don't use cache here since we mutate the vcz
vcz = tmp_path.joinpath("intermediate.vcz")
vcf2zarr.convert([original], vcz, worker_processes=0, local_alleles=False)

# remove vcf_header
root = zarr.open(vcz, mode="r+")
del root.attrs["vcf_header"]

output_header = StringIO()
write_vcf(vcz, output_header, header_only=True, no_version=True)

expected_vcf_header = """##fileformat=VCFv4.3
##source={}
##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
##INFO=<ID=AC,Number=2,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=2,Type=Float,Description="Allele Frequency">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
##FILTER=<ID=PASS,Description="">
##FILTER=<ID=s50,Description="">
##FILTER=<ID=q10,Description="">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
##contig=<ID=19>
##contig=<ID=20>
##contig=<ID=X>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
""" # noqa: E501

# substitute value of source
expected_vcf_header = expected_vcf_header.format(root.attrs["source"])

assert output_header.getvalue() == expected_vcf_header


def test_compute_info_fields():
gt = np.array([
[[0, 0], [0, 1], [1, 1]],
Expand Down
2 changes: 0 additions & 2 deletions vcztools/vcf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,6 @@ def _generate_header(ds, original_header, sample_ids, *, no_version: bool = Fals
# [1.4.1 File format]
print("##fileformat=VCFv4.3", file=output)

print('##FILTER=<ID=PASS,Description="All filters passed">', file=output)

if "source" in ds.attrs:
print(f'##source={ds.attrs["source"]}', file=output)

Expand Down

0 comments on commit d590ebd

Please sign in to comment.