diff --git a/tests/test_bcftools_validation.py b/tests/test_bcftools_validation.py index 40cbe11..3a5cb88 100644 --- a/tests/test_bcftools_validation.py +++ b/tests/test_bcftools_validation.py @@ -72,6 +72,8 @@ def test_vcf_output(tmp_path, args, vcf_file): @pytest.mark.parametrize( ("args", "vcf_name"), [ + ("index -n", "sample.vcf.gz"), + ("index --nrecords", "1kg_2020_chrM.vcf.gz"), ("query -l", "sample.vcf.gz"), ("query --list-samples", "1kg_2020_chrM.vcf.gz"), ], diff --git a/tests/test_stats.py b/tests/test_stats.py new file mode 100644 index 0000000..e4787bb --- /dev/null +++ b/tests/test_stats.py @@ -0,0 +1,15 @@ +import pathlib +from io import StringIO + +from vcztools.stats import nrecords + +from .utils import vcz_path_cache + + +def test_nrecords(): + original = pathlib.Path("tests/data/vcf") / "sample.vcf.gz" + vcz = vcz_path_cache(original) + + output_str = StringIO() + nrecords(vcz, output_str) + assert output_str.getvalue() == "9\n" diff --git a/vcztools/cli.py b/vcztools/cli.py index b320573..873880a 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -3,7 +3,7 @@ import click from . import query as query_module -from . import regions, vcf_writer +from . import regions, stats, vcf_writer class NaturalOrderGroup(click.Group): @@ -17,8 +17,17 @@ def list_commands(self, ctx): @click.command @click.argument("path", type=click.Path()) -def index(path): - regions.create_index(path) +@click.option( + "-n", + "--nrecords", + is_flag=True, + help="Print the number of records (variants).", +) +def index(path, nrecords): + if nrecords: + stats.nrecords(path, sys.stdout) + else: + regions.create_index(path) @click.command diff --git a/vcztools/stats.py b/vcztools/stats.py new file mode 100644 index 0000000..460802e --- /dev/null +++ b/vcztools/stats.py @@ -0,0 +1,11 @@ +import zarr + +from vcztools.utils import open_file_like + + +def nrecords(vcz, output): + root = zarr.open(vcz, mode="r") + + with open_file_like(output) as output: + num_variants = root["variant_position"].shape[0] + print(num_variants, file=output)