Skip to content

Commit

Permalink
rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
JakeHagen committed Aug 21, 2024
1 parent c238c99 commit df74734
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
9 changes: 9 additions & 0 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ def query(path, list_samples):
default=None,
help="Samples to include.",
)
@click.option(
"-G",
"--drop_genotypes",
type=bool,
is_flag=True,
help="Drop genotypes.",
)
@click.option(
"-t",
"--targets",
Expand All @@ -98,6 +105,7 @@ def view(
regions,
targets,
samples,
drop_genotypes,
include,
exclude,
):
Expand All @@ -109,6 +117,7 @@ def view(
variant_regions=regions,
variant_targets=targets,
samples=samples,
drop_genotypes=drop_genotypes,
include=include,
exclude=exclude,
)
Expand Down
27 changes: 18 additions & 9 deletions vcztools/vcf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def write_vcf(
variant_regions=None,
variant_targets=None,
samples=None,
drop_genotypes: bool = False,
include: Optional[str] = None,
exclude: Optional[str] = None,
) -> None:
Expand Down Expand Up @@ -139,7 +140,14 @@ def write_vcf(
root = zarr.open(vcz, mode="r")

with open_file_like(output) as output:
if samples is None:
if samples and drop_genotypes:
raise ValueError(
"Cannot select samples and drop genotypes."
)
elif drop_genotypes:
sample_ids = []
samples_selection = np.array([])
elif samples is None:
sample_ids = root["sample_id"][:]
samples_selection = None
else:
Expand Down Expand Up @@ -302,19 +310,20 @@ def c_chunk_to_vcf(
num_samples = len(samples_selection) if samples_selection is not None else None
for name, array in root.items():
if name.startswith("call_") and not name.startswith("call_genotype"):
vcf_name = name[len("call_") :]
format_fields[vcf_name] = get_vchunk_array(
array, v_chunk, v_mask_chunk, samples_selection
)
if num_samples is None:
num_samples = array.shape[1]
if num_samples != 0:
vcf_name = name[len("call_") :]
format_fields[vcf_name] = get_vchunk_array(
array, v_chunk, v_mask_chunk, samples_selection
)
if num_samples is None:
num_samples = array.shape[1]
elif name.startswith("variant_") and name not in RESERVED_VARIABLE_NAMES:
vcf_name = name[len("variant_") :]
info_fields[vcf_name] = get_vchunk_array(array, v_chunk, v_mask_chunk)

gt = None
gt_phased = None
if "call_genotype" in root:
if "call_genotype" in root and num_samples != 0:
array = root["call_genotype"]
gt = get_vchunk_array(array, v_chunk, v_mask_chunk, samples_selection)
if "call_genotype_phased" in root:
Expand Down Expand Up @@ -352,7 +361,7 @@ def c_chunk_to_vcf(
encoder.add_info_field(name, array)

for name, array in format_fields.items():
assert num_samples > 0
#assert num_samples > 0
if array.dtype.kind in ("O", "U"):
array = array.astype("S")
if len(array.shape) == 2:
Expand Down

0 comments on commit df74734

Please sign in to comment.