From 83985b3a5beb9060332b70796757ea62a0c59993 Mon Sep 17 00:00:00 2001 From: willtyler Date: Fri, 30 Aug 2024 19:53:03 +0000 Subject: [PATCH] Add provenance header --- tests/test_cli.py | 13 +++++++++++++ tests/test_vcf_roundtrip.py | 2 +- tests/test_vcf_writer.py | 8 ++++---- vcztools/cli.py | 1 + vcztools/vcf_writer.py | 15 +++++++++++++-- 5 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 tests/test_cli.py diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..c5387a2 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,13 @@ +import pathlib +import re + +from tests.test_bcftools_validation import run_vcztools +from tests.utils import vcz_path_cache + + +def test_version_header(): + vcf_path = pathlib.Path("tests/data/vcf/sample.vcf.gz") + vcz_path = vcz_path_cache(vcf_path) + + output = run_vcztools(f"view {vcz_path}") + assert re.search("##vcztools_viewCommand=.+; Date=.+", output) diff --git a/tests/test_vcf_roundtrip.py b/tests/test_vcf_roundtrip.py index 329c161..b11593a 100644 --- a/tests/test_vcf_roundtrip.py +++ b/tests/test_vcf_roundtrip.py @@ -21,5 +21,5 @@ def test_vcf_to_zarr_to_vcf__real_files(tmp_path, vcf_file): original = pathlib.Path("tests/data/vcf") / vcf_file vcz = vcz_path_cache(original) generated = tmp_path.joinpath("output.vcf") - write_vcf(vcz, generated) + write_vcf(vcz, generated, no_version=True) assert_vcfs_close(original, generated) diff --git a/tests/test_vcf_writer.py b/tests/test_vcf_writer.py index d1f79ed..f493db3 100644 --- a/tests/test_vcf_writer.py +++ b/tests/test_vcf_writer.py @@ -18,10 +18,10 @@ def test_write_vcf(tmp_path, output_is_path): output = tmp_path.joinpath("output.vcf") if output_is_path: - write_vcf(vcz, output) + write_vcf(vcz, output, no_version=True) else: output_str = StringIO() - write_vcf(vcz, output_str) + write_vcf(vcz, output_str, no_version=True) with open(output, "w") as f: f.write(output_str.getvalue()) @@ -251,10 +251,10 @@ def test_write_vcf__header_flags(tmp_path): output = tmp_path.joinpath("output.vcf") output_header = StringIO() - write_vcf(vcz, output_header, header_only=True) + write_vcf(vcz, output_header, header_only=True, no_version=True) output_no_header = StringIO() - write_vcf(vcz, output_no_header, no_header=True) + write_vcf(vcz, output_no_header, no_header=True, no_version=True) assert not output_no_header.getvalue().startswith("#") # combine outputs and check VCFs match diff --git a/vcztools/cli.py b/vcztools/cli.py index 72de451..10f9c89 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -130,6 +130,7 @@ def view( output, header_only=header_only, no_header=no_header, + no_version=no_version, variant_regions=regions, variant_targets=targets, samples=samples, diff --git a/vcztools/vcf_writer.py b/vcztools/vcf_writer.py index 871b536..51e7d8a 100644 --- a/vcztools/vcf_writer.py +++ b/vcztools/vcf_writer.py @@ -1,6 +1,8 @@ import functools import io import re +import sys +from datetime import datetime from typing import Optional import numpy as np @@ -82,6 +84,7 @@ def write_vcf( vcf_header: Optional[str] = None, header_only: bool = False, no_header: bool = False, + no_version: bool = False, variant_regions=None, variant_targets=None, samples=None, @@ -157,7 +160,9 @@ def write_vcf( original_header = root.attrs["vcf_header"] else: original_header = None - vcf_header = _generate_header(root, original_header, sample_ids) + vcf_header = _generate_header( + root, original_header, sample_ids, no_version=no_version + ) if not no_header: print(vcf_header, end="", file=output) @@ -382,7 +387,7 @@ def c_chunk_to_vcf( print(line, file=output) -def _generate_header(ds, original_header, sample_ids): +def _generate_header(ds, original_header, sample_ids, *, no_version: bool = False): output = io.StringIO() contigs = list(ds["contig_id"][:]) @@ -509,6 +514,12 @@ def _generate_header(ds, original_header, sample_ids): else: print(f"##contig=", file=output) + if not no_version: + print( + f"##vcztools_viewCommand={' '.join(sys.argv[1:])}; Date={datetime.now()}", + file=output, + ) + # [1.5 Header line syntax] print( "#CHROM",