diff --git a/tests/data/txt/samples.txt b/tests/data/txt/samples.txt new file mode 100644 index 0000000..7e4784c --- /dev/null +++ b/tests/data/txt/samples.txt @@ -0,0 +1,2 @@ +NA00001 +NA00003 diff --git a/tests/test_bcftools_validation.py b/tests/test_bcftools_validation.py index 9d79763..757d40f 100644 --- a/tests/test_bcftools_validation.py +++ b/tests/test_bcftools_validation.py @@ -53,6 +53,11 @@ def run_vcztools(args: str) -> str: "view --no-version -G", "sample.vcf.gz" ), + ( + "view --no-update --no-version --samples-file " + "tests/data/txt/samples.txt", + "sample.vcf.gz"), + ("view -I --no-version -S tests/data/txt/samples.txt", "sample.vcf.gz"), ] ) # fmt: on diff --git a/vcztools/cli.py b/vcztools/cli.py index fe82bf1..3f61dcb 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -85,6 +85,12 @@ def query(path, list_samples, format): default=None, help="Regions to include.", ) +@click.option( + "-I", + "--no-update", + is_flag=True, + help="Do not recalculate INFO fields for the sample subset.", +) @click.option( "-s", "--samples", @@ -92,6 +98,13 @@ def query(path, list_samples, format): default=None, help="Samples to include.", ) +@click.option( + "-S", + "--samples-file", + type=str, + default=None, + help="File of sample names to include.", +) @click.option( "-G", "--drop-genotypes", @@ -120,7 +133,9 @@ def view( no_version, regions, targets, + no_update, samples, + samples_file, drop_genotypes, include, exclude, @@ -129,6 +144,13 @@ def view( split = output.split(".") raise ValueError(f"Output file extension must be .vcf, got: .{split[-1]}") + if samples_file: + with open(samples_file) as file: + samples = samples or "" + samples += ",".join(line.strip() for line in file.readlines()) + + # TODO: use no_update when fixing https://github.com/sgkit-dev/vcztools/issues/75 + vcf_writer.write_vcf( path, output,