diff --git a/tests/test_bcftools_validation.py b/tests/test_bcftools_validation.py index f17f089..830669c 100644 --- a/tests/test_bcftools_validation.py +++ b/tests/test_bcftools_validation.py @@ -131,6 +131,9 @@ def test_vcf_output_with_output_option(tmp_path, args, vcf_file): (r"query -f '[%CHROM\t]\n'", "sample.vcf.gz"), (r"query -f '[%CHROM\t]\n' -i 'POS=112'", "sample.vcf.gz"), (r"query -f '%CHROM\t%POS\t%REF\t%ALT[\t%GT]\n'", "sample.vcf.gz"), + (r"query -f 'GQ:[ %GQ] \t GT:[ %GT]\n'", "sample.vcf.gz"), + (r"query -f '[%CHROM:%POS %GT\n]'", "sample.vcf.gz"), + (r"query -f '[%GT %DP\n]'", "sample.vcf.gz"), ], ) def test_output(tmp_path, args, vcf_name): diff --git a/vcztools/query.py b/vcztools/query.py index 0200a96..4df2b43 100644 --- a/vcztools/query.py +++ b/vcztools/query.py @@ -115,22 +115,8 @@ def stringify(gt_and_phase: tuple): gt_row = gt_row.tolist() yield map(stringify, zip(gt_row, phase)) else: - for v_chunk_index in range(gt_zarray.cdata_shape[0]): - start = v_chunk_index * v_chunk_size - end = start + v_chunk_size - - for gt_row in gt_zarray[start:end]: - - def stringify(gt: list[int]): - gt = [ - str(allele) if allele != constants.INT_MISSING else "." - for allele in gt - if allele != constants.INT_FILL - ] - return "/".join(gt) - - gt_row = gt_row.tolist() - yield map(stringify, gt_row) + # TODO: Support datasets without the phasing data + raise NotImplementedError return generate @@ -164,8 +150,7 @@ def generate(root): if tag == "REF": row = row[0] if tag == "ALT": - row = [allele for allele in row[1:] if allele] - row = row or "." + row = [allele for allele in row[1:] if allele] or "." if tag == "FILTER": assert filter_ids is not None @@ -185,17 +170,23 @@ def generate(root): ): row = ",".join(map(str, row)) - result = row if not is_missing else "." - if sample_loop: sample_count = root["sample_id"].shape[0] - if isinstance(row, np.ndarray) or isinstance(row, list): + if isinstance(row, np.ndarray): + row = row.tolist() + row = [ + str(element) + if element != constants.INT_MISSING + else "." + for element in row + if element != constants.INT_FILL + ] yield row else: - yield itertools.repeat(row, sample_count) + yield itertools.repeat(str(row), sample_count) else: - yield result + yield row if not is_missing else "." return generate