From 846b7591a4ec7646271d7410745c33e93f048f2b Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 5 Aug 2024 11:00:30 +0100 Subject: [PATCH] Add support for index -n/--nrecords --- tests/test_stats.py | 16 ++++++++++++++++ vcztools/cli.py | 15 ++++++++++++--- vcztools/stats.py | 15 +++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 tests/test_stats.py create mode 100644 vcztools/stats.py diff --git a/tests/test_stats.py b/tests/test_stats.py new file mode 100644 index 0000000..9ec0900 --- /dev/null +++ b/tests/test_stats.py @@ -0,0 +1,16 @@ +import pathlib +from io import StringIO + +from vcztools.stats import nrecords + +from .utils import vcz_path_cache + + +def test_nrecords(): + original = pathlib.Path("tests/data/vcf") / "sample.vcf.gz" + vcz = vcz_path_cache(original) + + output_str = StringIO() + nrecords(vcz, output_str) + + assert output_str.getvalue() == "9\n" diff --git a/vcztools/cli.py b/vcztools/cli.py index 0248e49..1175094 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -2,7 +2,7 @@ import click -from . import regions, vcf_writer +from . import regions, stats, vcf_writer class NaturalOrderGroup(click.Group): @@ -16,8 +16,17 @@ def list_commands(self, ctx): @click.command @click.argument("path", type=click.Path()) -def index(path): - regions.create_index(path) +@click.option( + "-n", + "--nrecords", + is_flag=True, + help="Print the number of records (variants).", +) +def index(path, nrecords): + if nrecords: + stats.nrecords(path, sys.stdout) + else: + regions.create_index(path) @click.command diff --git a/vcztools/stats.py b/vcztools/stats.py new file mode 100644 index 0000000..e819326 --- /dev/null +++ b/vcztools/stats.py @@ -0,0 +1,15 @@ +from contextlib import ExitStack +from pathlib import Path + +import zarr + + +def nrecords(vcz, output): + root = zarr.open(vcz, mode="r") + + with ExitStack() as stack: + if isinstance(output, str) or isinstance(output, Path): + output = stack.enter_context(open(output, mode="w")) + + num_variants = root["variant_position"].shape[0] + print(num_variants, file=output)