Skip to content

Commit

Permalink
Add query list samples command
Browse files Browse the repository at this point in the history
  • Loading branch information
Will-Tyler committed Aug 19, 2024
1 parent 3e1c238 commit ac397a9
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
19 changes: 18 additions & 1 deletion tests/test_bcftools_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def run_vcztools(args: str) -> str:
]
)
# fmt: on
def test(tmp_path, args, vcf_file):
def test_vcf_output(tmp_path, args, vcf_file):
original = pathlib.Path("tests/data/vcf") / vcf_file
vcz = vcz_path_cache(original)

Expand All @@ -67,3 +67,20 @@ def test(tmp_path, args, vcf_file):
f.write(vcztools_out)

assert_vcfs_close(bcftools_out_file, vcztools_out_file)


@pytest.mark.parametrize(
("args", "vcf_name"),
[
("query -l", "sample.vcf.gz"),
("query --list-samples", "1kg_2020_chrM.vcf.gz"),
],
)
def test_output(tmp_path, args, vcf_name):
vcf_path = pathlib.Path("tests/data/vcf") / vcf_name
vcz_path = vcz_path_cache(vcf_path)

bcftools_output = run_bcftools(f"{args} {vcf_path}")
vcztools_output = run_vcztools(f"{args} {vcz_path}")

assert vcztools_output == bcftools_output
20 changes: 20 additions & 0 deletions tests/test_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pathlib
from io import StringIO

from tests.utils import vcz_path_cache
from vcztools.query import list_samples


def test_list_samples(tmp_path):
vcf_path = pathlib.Path("tests/data/vcf") / "sample.vcf.gz"
vcz_path = vcz_path_cache(vcf_path)
expected_output = "NA00001\nNA00002\nNA00003\n"

with StringIO() as output:
list_samples(vcz_path, output)
assert output.getvalue() == expected_output

list_samples(vcz_path, output=tmp_path / "sample_ids.txt")

with open(tmp_path / "sample_ids.txt") as file:
assert file.read() == expected_output
16 changes: 16 additions & 0 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import click

from . import query as query_module
from . import regions, vcf_writer


Expand All @@ -20,6 +21,20 @@ def index(path):
regions.create_index(path)


@click.command
@click.argument("path", type=click.Path())
@click.option(
"-l",
"--list-samples",
is_flag=True,
help="List the sample IDs and exit.",
)
def query(path, list_samples):
if list_samples:
query_module.list_samples(path)
return


@click.command
@click.argument("path", type=click.Path())
@click.option(
Expand Down Expand Up @@ -96,4 +111,5 @@ def vcztools_main():


vcztools_main.add_command(index)
vcztools_main.add_command(query)
vcztools_main.add_command(view)
15 changes: 15 additions & 0 deletions vcztools/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pathlib
from contextlib import ExitStack

import zarr


def list_samples(vcz_path, output=None):
root = zarr.open(vcz_path, mode="r")

with ExitStack() as stack:
if isinstance(output, str) or isinstance(output, pathlib.Path):
output = stack.enter_context(open(output, mode="w"))

sample_ids = root["sample_id"][:]
print("\n".join(sample_ids), file=output)

0 comments on commit ac397a9

Please sign in to comment.