diff --git a/dev.py b/dev.py index 2a25fc2..2dcd02c 100644 --- a/dev.py +++ b/dev.py @@ -56,7 +56,7 @@ def main(root): # we avoid retrieving stuff we don't need. format_fields = {} info_fields = {} - for name, array in root.items(): + for name, array in root.arrays(): if name.startswith("call_") and not name.startswith("call_genotype"): vcf_name = name[len("call_") :] format_fields[vcf_name] = array.blocks[v_chunk] diff --git a/vcztools/filter.py b/vcztools/filter.py index 6ff04e3..3929e13 100644 --- a/vcztools/filter.py +++ b/vcztools/filter.py @@ -113,7 +113,7 @@ def _compose_comparison_evaluator(self, parse_results: pp.ParseResults) -> Calla def evaluator(root, variant_chunk_index: int) -> np.ndarray: vcf_name = parse_results[0] - vcz_names = set(name for name, _array in root.items()) + vcz_names = set(root.keys()) vcz_name = vcf_name_to_vcz_name(vcz_names, vcf_name) zarray = root[vcz_name] variant_chunk_len = zarray.chunks[0] diff --git a/vcztools/query.py b/vcztools/query.py index fd33c4a..689af41 100644 --- a/vcztools/query.py +++ b/vcztools/query.py @@ -141,7 +141,7 @@ def _compose_tag_generator( return self._compose_sample_ids_generator() def generate(root): - vcz_names = set(name for name, _zarray in root.items()) + vcz_names = set(root.keys()) vcz_name = vcf_name_to_vcz_name(vcz_names, tag) zarray = root[vcz_name] contig_ids = root["contig_id"][:] if tag == "CHROM" else None diff --git a/vcztools/vcf_writer.py b/vcztools/vcf_writer.py index 0240939..63af9df 100644 --- a/vcztools/vcf_writer.py +++ b/vcztools/vcf_writer.py @@ -309,15 +309,15 @@ def c_chunk_to_vcf( no_update, preceding_future: concurrent.futures.Future | None = None, ): - chrom = contigs[get_vchunk_array(root.variant_contig, v_chunk, v_mask_chunk)] + chrom = contigs[get_vchunk_array(root["variant_contig"], v_chunk, v_mask_chunk)] # TODO check we don't truncate silently by doing this - pos = get_vchunk_array(root.variant_position, v_chunk, v_mask_chunk).astype( + pos = get_vchunk_array(root["variant_position"], v_chunk, v_mask_chunk).astype( np.int32 ) - id = get_vchunk_array(root.variant_id, v_chunk, v_mask_chunk).astype("S") - alleles = get_vchunk_array(root.variant_allele, v_chunk, v_mask_chunk) - qual = get_vchunk_array(root.variant_quality, v_chunk, v_mask_chunk) - filter_ = get_vchunk_array(root.variant_filter, v_chunk, v_mask_chunk) + id = get_vchunk_array(root["variant_id"], v_chunk, v_mask_chunk).astype("S") + alleles = get_vchunk_array(root["variant_allele"], v_chunk, v_mask_chunk) + qual = get_vchunk_array(root["variant_quality"], v_chunk, v_mask_chunk) + filter_ = get_vchunk_array(root["variant_filter"], v_chunk, v_mask_chunk) format_fields = {} info_fields = {} num_samples = len(samples_selection) if samples_selection is not None else None @@ -346,7 +346,7 @@ def c_chunk_to_vcf( else: gt_phased = np.zeros_like(gt, dtype=bool) - for name, zarray in root.items(): + for name, zarray in root.arrays(): if ( name.startswith("call_") and not name.startswith("call_genotype") @@ -442,7 +442,8 @@ def _generate_header(ds, original_header, sample_ids, *, no_version: bool = Fals # GT must be the first field if present, per the spec (section 1.6.2) format_fields.append("GT") - for var, arr in ds.items(): + for var in sorted(ds.keys()): + arr = ds[var] if ( var.startswith("variant_") and not var.endswith("_fill")