Skip to content

Commit

Permalink
Add provenance header
Browse files Browse the repository at this point in the history
  • Loading branch information
Will-Tyler committed Aug 30, 2024
1 parent 6baaf2f commit 83985b3
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 7 deletions.
13 changes: 13 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pathlib
import re

from tests.test_bcftools_validation import run_vcztools
from tests.utils import vcz_path_cache


def test_version_header():
vcf_path = pathlib.Path("tests/data/vcf/sample.vcf.gz")
vcz_path = vcz_path_cache(vcf_path)

output = run_vcztools(f"view {vcz_path}")
assert re.search("##vcztools_viewCommand=.+; Date=.+", output)
2 changes: 1 addition & 1 deletion tests/test_vcf_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ def test_vcf_to_zarr_to_vcf__real_files(tmp_path, vcf_file):
original = pathlib.Path("tests/data/vcf") / vcf_file
vcz = vcz_path_cache(original)
generated = tmp_path.joinpath("output.vcf")
write_vcf(vcz, generated)
write_vcf(vcz, generated, no_version=True)
assert_vcfs_close(original, generated)
8 changes: 4 additions & 4 deletions tests/test_vcf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def test_write_vcf(tmp_path, output_is_path):
output = tmp_path.joinpath("output.vcf")

if output_is_path:
write_vcf(vcz, output)
write_vcf(vcz, output, no_version=True)
else:
output_str = StringIO()
write_vcf(vcz, output_str)
write_vcf(vcz, output_str, no_version=True)
with open(output, "w") as f:
f.write(output_str.getvalue())

Expand Down Expand Up @@ -251,10 +251,10 @@ def test_write_vcf__header_flags(tmp_path):
output = tmp_path.joinpath("output.vcf")

output_header = StringIO()
write_vcf(vcz, output_header, header_only=True)
write_vcf(vcz, output_header, header_only=True, no_version=True)

output_no_header = StringIO()
write_vcf(vcz, output_no_header, no_header=True)
write_vcf(vcz, output_no_header, no_header=True, no_version=True)
assert not output_no_header.getvalue().startswith("#")

# combine outputs and check VCFs match
Expand Down
1 change: 1 addition & 0 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def view(
output,
header_only=header_only,
no_header=no_header,
no_version=no_version,
variant_regions=regions,
variant_targets=targets,
samples=samples,
Expand Down
15 changes: 13 additions & 2 deletions vcztools/vcf_writer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import functools
import io
import re
import sys
from datetime import datetime
from typing import Optional

import numpy as np
Expand Down Expand Up @@ -82,6 +84,7 @@ def write_vcf(
vcf_header: Optional[str] = None,
header_only: bool = False,
no_header: bool = False,
no_version: bool = False,
variant_regions=None,
variant_targets=None,
samples=None,
Expand Down Expand Up @@ -157,7 +160,9 @@ def write_vcf(
original_header = root.attrs["vcf_header"]
else:
original_header = None
vcf_header = _generate_header(root, original_header, sample_ids)
vcf_header = _generate_header(
root, original_header, sample_ids, no_version=no_version
)

if not no_header:
print(vcf_header, end="", file=output)
Expand Down Expand Up @@ -382,7 +387,7 @@ def c_chunk_to_vcf(
print(line, file=output)


def _generate_header(ds, original_header, sample_ids):
def _generate_header(ds, original_header, sample_ids, *, no_version: bool = False):
output = io.StringIO()

contigs = list(ds["contig_id"][:])
Expand Down Expand Up @@ -509,6 +514,12 @@ def _generate_header(ds, original_header, sample_ids):
else:
print(f"##contig=<ID={contig},length={contig_lengths[i]}>", file=output)

if not no_version:
print(
f"##vcztools_viewCommand={' '.join(sys.argv[1:])}; Date={datetime.now()}",
file=output,
)

# [1.5 Header line syntax]
print(
"#CHROM",
Expand Down

0 comments on commit 83985b3

Please sign in to comment.