Skip to content

Commit

Permalink
Support call fields in queries
Browse files Browse the repository at this point in the history
  • Loading branch information
Will-Tyler committed Oct 3, 2024
1 parent 9929543 commit cc31740
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 23 deletions.
3 changes: 3 additions & 0 deletions tests/test_bcftools_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ def test_vcf_output_with_output_option(tmp_path, args, vcf_file):
(r"query -f '[%CHROM\t]\n'", "sample.vcf.gz"),
(r"query -f '[%CHROM\t]\n' -i 'POS=112'", "sample.vcf.gz"),
(r"query -f '%CHROM\t%POS\t%REF\t%ALT[\t%GT]\n'", "sample.vcf.gz"),
(r"query -f 'GQ:[ %GQ] \t GT:[ %GT]\n'", "sample.vcf.gz"),
(r"query -f '[%CHROM:%POS %GT\n]'", "sample.vcf.gz"),
(r"query -f '[%GT %DP\n]'", "sample.vcf.gz"),
],
)
def test_output(tmp_path, args, vcf_name):
Expand Down
37 changes: 14 additions & 23 deletions vcztools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,22 +115,8 @@ def stringify(gt_and_phase: tuple):
gt_row = gt_row.tolist()
yield map(stringify, zip(gt_row, phase))
else:
for v_chunk_index in range(gt_zarray.cdata_shape[0]):
start = v_chunk_index * v_chunk_size
end = start + v_chunk_size

for gt_row in gt_zarray[start:end]:

def stringify(gt: list[int]):
gt = [
str(allele) if allele != constants.INT_MISSING else "."
for allele in gt
if allele != constants.INT_FILL
]
return "/".join(gt)

gt_row = gt_row.tolist()
yield map(stringify, gt_row)
# TODO: Support datasets without the phasing data
raise NotImplementedError

return generate

Expand Down Expand Up @@ -164,8 +150,7 @@ def generate(root):
if tag == "REF":
row = row[0]
if tag == "ALT":
row = [allele for allele in row[1:] if allele]
row = row or "."
row = [allele for allele in row[1:] if allele] or "."
if tag == "FILTER":
assert filter_ids is not None

Expand All @@ -185,17 +170,23 @@ def generate(root):
):
row = ",".join(map(str, row))

result = row if not is_missing else "."

if sample_loop:
sample_count = root["sample_id"].shape[0]

if isinstance(row, np.ndarray) or isinstance(row, list):
if isinstance(row, np.ndarray):
row = row.tolist()
row = [
str(element)
if element != constants.INT_MISSING
else "."
for element in row
if element != constants.INT_FILL
]
yield row
else:
yield itertools.repeat(row, sample_count)
yield itertools.repeat(str(row), sample_count)
else:
yield result
yield row if not is_missing else "."

return generate

Expand Down

0 comments on commit cc31740

Please sign in to comment.