Skip to content

Commit

Permalink
Use sgkit.distarray for Hardy-Weinberg Equilibrium
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Sep 19, 2024
1 parent c552a84 commit c359718
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cubed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:
- name: Test with pytest
run: |
pytest -v sgkit/tests/test_aggregation.py -k 'test_count_call_alleles or test_sample_stats or (test_count_variant_alleles and not test_count_variant_alleles__chunked[call_genotype]) or (test_variant_stats and not test_variant_stats__chunks[chunks2-False])' --use-cubed
pytest -v sgkit/tests/test_{aggregation,hwe}.py -k 'test_count_call_alleles or test_hwep or test_sample_stats or (test_count_variant_alleles and not test_count_variant_alleles__chunked[call_genotype]) or (test_variant_stats and not test_variant_stats__chunks[chunks2-False])' --use-cubed
5 changes: 2 additions & 3 deletions sgkit/stats/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,8 @@ def genotype_coords(
G = da.map_blocks(_index_as_genotype, X, K, new_axis=1, chunks=chunks)
# allow enough room for all alleles and separators
dtype = "|S{}".format(max_chars * ploidy + ploidy - 1)
S = da.map_blocks(
genotype_as_bytes, G, False, max_chars, drop_axis=1, dtype=dtype
).astype("U")
S = da.map_blocks(genotype_as_bytes, G, False, max_chars, drop_axis=1, dtype=dtype)
S = da.astype(S, "U{}".format(max_chars * ploidy + ploidy - 1))
new_ds = create_dataset({variables.genotype_id: ("genotypes", S)})
ds = conditional_merge_datasets(ds, new_ds, merge)
if assign_coords:
Expand Down
2 changes: 1 addition & 1 deletion sgkit/stats/hwe.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Hashable, Optional

import dask.array as da
import numpy as np
from xarray import Dataset

import sgkit.distarray as da
from sgkit import variables
from sgkit.accelerate import numba_jit
from sgkit.stats.aggregation import count_variant_genotypes
Expand Down

0 comments on commit c359718

Please sign in to comment.