From 309ce94d1229ad8ed963c9da7162af937e0ccc22 Mon Sep 17 00:00:00 2001 From: Jere Koskela Date: Tue, 6 Aug 2024 19:14:29 +0100 Subject: [PATCH] Multiple merger polyploid scaling --- CHANGELOG.md | 6 ++ lib/msprime.c | 168 ++++++++++++++++----------------------- msprime/_msprimemodule.c | 4 +- tests/test_ancestry.py | 2 +- tests/test_lowlevel.py | 4 +- verification.py | 26 +++--- 6 files changed, 96 insertions(+), 114 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5bd0ee3..b4030913f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## [1.3.3] - 2024-XX-XX + +**Bug fixes**: + +- Correct the Dirac coalescent time scaling with polyploidy and population growth. + ## [1.3.2] - 2024-07-08 - Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`) diff --git a/lib/msprime.c b/lib/msprime.c index b7b2b30ea..2316fb00b 100644 --- a/lib/msprime.c +++ b/lib/msprime.c @@ -7949,33 +7949,18 @@ msp_dirac_get_common_ancestor_waiting_time_from_rate( msp_t *self, population_t *pop, double lambda) { double ret = DBL_MAX; - double alpha = pop->growth_rate; + double alpha = 2.0 * pop->growth_rate; double t = self->time; double u, dt, z; + double pop_size = pop->initial_size; if (lambda > 0.0) { u = gsl_ran_exponential(self->rng, 1.0 / lambda); if (alpha == 0.0) { - if (self->ploidy == 1) { - ret = pop->initial_size * pop->initial_size * u; - } else { - /* For ploidy > 1 we assume N/2 two-parent families, so that the rate - * with which 2 lineages belong to a common family is (2/N)^2 */ - ret = pop->initial_size * pop->initial_size * u / 4.0; - } + ret = pop_size * pop_size * u; } else { dt = t - pop->start_time; - if (self->ploidy == 1) { - z = 1 - + alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt) - * u; - } else { - /* For ploidy > 1 we assume N/2 two-parent families, so that the rate - * with which 2 lineages belong to a common family is (2/N)^2 */ - z = 1 - + alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt) - * u / 4.0; - } + z = 1 + alpha * pop_size * pop_size * exp(-alpha * dt) * u; /* if z is <= 0 no coancestry can occur */ if (z > 0) { ret = log(z) / alpha; @@ -7994,13 +7979,7 @@ msp_dirac_get_common_ancestor_waiting_time( { population_t *pop = &self->populations[pop_id]; unsigned int n = (unsigned int) avl_count(&pop->ancestors[label]); - double c = self->model.params.dirac_coalescent.c; - double lambda = n * (n - 1.0) / 2.0; - if (self->ploidy == 1) { - lambda += c; - } else { - lambda += c / (2.0 * self->ploidy); - } + double lambda = gsl_sf_choose(n, 2) + self->model.params.dirac_coalescent.c; return msp_dirac_get_common_ancestor_waiting_time_from_rate(self, pop, lambda); } @@ -8018,71 +7997,64 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t double nC2, p; double psi = self->model.params.dirac_coalescent.psi; - /* We assume haploid reproduction is single-parent, while all other ploidies - * are two-parent */ - if (self->ploidy == 1) { - num_parental_copies = 1; - } else { - num_parental_copies = 2 * self->ploidy; - } - Q = tsk_malloc(num_parental_copies * sizeof(*Q)); - if (Q == NULL) { - ret = MSP_ERR_NO_MEMORY; - goto out; - } - ancestors = &self->populations[pop_id].ancestors[label]; n = avl_count(ancestors); nC2 = gsl_sf_choose(n, 2); - if (self->ploidy == 1) { - p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c)); - } else { - p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c / (2.0 * self->ploidy))); - } + p = nC2 / (nC2 + self->model.params.dirac_coalescent.c); + if (gsl_rng_uniform(self->rng) < p) { - /* When 2 * ploidy parental chromosomes are available, Mendelian segregation - * results in a merger only 1 / (2 * ploidy) of the time. */ - if (self->ploidy == 1 - || gsl_rng_uniform(self->rng) < 1.0 / (2.0 * self->ploidy)) { - /* Choose x and y */ - n = avl_count(ancestors); - j = (uint32_t) gsl_rng_uniform_int(self->rng, n); - x_node = avl_at(ancestors, j); - tsk_bug_assert(x_node != NULL); - x_lin = (lineage_t *) x_node->item; - x = x_lin->head; - avl_unlink_node(ancestors, x_node); - j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1); - y_node = avl_at(ancestors, j); - tsk_bug_assert(y_node != NULL); - y_lin = (lineage_t *) y_node->item; - y = y_lin->head; - avl_unlink_node(ancestors, y_node); - self->num_ca_events++; - msp_free_avl_node(self, x_node); - msp_free_lineage(self, x_lin); - msp_free_avl_node(self, y_node); - msp_free_lineage(self, y_lin); - ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL); - } + /* Choose x and y */ + n = avl_count(ancestors); + j = (uint32_t) gsl_rng_uniform_int(self->rng, n); + x_node = avl_at(ancestors, j); + tsk_bug_assert(x_node != NULL); + x_lin = (lineage_t *) x_node->item; + x = x_lin->head; + avl_unlink_node(ancestors, x_node); + j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1); + y_node = avl_at(ancestors, j); + tsk_bug_assert(y_node != NULL); + y_lin = (lineage_t *) y_node->item; + y = y_lin->head; + avl_unlink_node(ancestors, y_node); + self->num_ca_events++; + msp_free_avl_node(self, x_node); + msp_free_lineage(self, x_lin); + msp_free_avl_node(self, y_node); + msp_free_lineage(self, y_lin); + ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL); } else { - for (j = 0; j < num_parental_copies; j++) { - avl_init_tree(&Q[j], cmp_segment_queue, NULL); - } num_participants = gsl_ran_binomial(self->rng, psi, n); - ret = msp_multi_merger_common_ancestor_event( - self, ancestors, Q, num_participants, num_parental_copies); - if (ret < 0) { - goto out; - } - /* All the lineages that have been assigned to the particular pots can now be - * merged. - */ - for (j = 0; j < num_parental_copies; j++) { - ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL); + if (num_participants > 1) { + /* We assume haploid reproduction is single-parent, while all other ploidies + * are two-parent */ + if (self->ploidy == 1) { + num_parental_copies = 1; + } else { + num_parental_copies = 2 * self->ploidy; + } + Q = tsk_malloc(num_parental_copies * sizeof(*Q)); + if (Q == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } + for (j = 0; j < num_parental_copies; j++) { + avl_init_tree(&Q[j], cmp_segment_queue, NULL); + } + ret = msp_multi_merger_common_ancestor_event( + self, ancestors, Q, num_participants, num_parental_copies); if (ret < 0) { goto out; } + /* All the lineages that have been assigned to the particular pots can now be + * merged. + */ + for (j = 0; j < num_parental_copies; j++) { + ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL); + if (ret < 0) { + goto out; + } + } } } out: @@ -8242,22 +8214,6 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l double truncation_point = beta_compute_truncation(self); double beta_x, u, increment; - /* We assume haploid reproduction is single-parent, while all other ploidies - * are two-parent */ - if (self->ploidy == 1) { - num_parental_copies = 1; - } else { - num_parental_copies = 2 * self->ploidy; - } - Q = tsk_malloc(num_parental_copies * sizeof(*Q)); - if (Q == NULL) { - ret = MSP_ERR_NO_MEMORY; - goto out; - } - - for (j = 0; j < num_parental_copies; j++) { - avl_init_tree(&Q[j], cmp_segment_queue, NULL); - } ancestors = &self->populations[pop_id].ancestors[label]; n = avl_count(ancestors); beta_x = ran_inc_beta(self->rng, 2.0 - alpha, alpha, truncation_point); @@ -8295,6 +8251,22 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l num_participants = 2 + gsl_ran_binomial(self->rng, beta_x, n - 2); } while (gsl_rng_uniform(self->rng) > 1 / gsl_sf_choose(num_participants, 2)); + /* We assume haploid reproduction is single-parent, while all other ploidies + * are two-parent */ + if (self->ploidy == 1) { + num_parental_copies = 1; + } else { + num_parental_copies = 2 * self->ploidy; + } + Q = tsk_malloc(num_parental_copies * sizeof(*Q)); + if (Q == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } + + for (j = 0; j < num_parental_copies; j++) { + avl_init_tree(&Q[j], cmp_segment_queue, NULL); + } ret = msp_multi_merger_common_ancestor_event( self, ancestors, Q, num_participants, num_parental_copies); if (ret < 0) { diff --git a/msprime/_msprimemodule.c b/msprime/_msprimemodule.c index 6b3954f02..b3de9398e 100644 --- a/msprime/_msprimemodule.c +++ b/msprime/_msprimemodule.c @@ -1199,8 +1199,8 @@ Simulator_parse_simulation_model(Simulator *self, PyObject *py_model) goto out; } c = PyFloat_AsDouble(value); - if (psi <= 0 || psi >= 1.0) { - PyErr_SetString(PyExc_ValueError, "Must have 0 < psi < 1"); + if (psi <= 0 || psi > 1.0) { + PyErr_SetString(PyExc_ValueError, "Must have 0 < psi <= 1"); goto out; } if (c < 0) { diff --git a/tests/test_ancestry.py b/tests/test_ancestry.py index 08f895e76..14eacb403 100644 --- a/tests/test_ancestry.py +++ b/tests/test_ancestry.py @@ -206,7 +206,7 @@ def test_multimerger_dirac(self): recombination_rate=0.1, record_full_arg=True, sequence_length=10, - model=msprime.DiracCoalescent(psi=0.5, c=5), + model=msprime.DiracCoalescent(psi=0.5, c=100), ) self.verify(sim, multiple_mergers=True) diff --git a/tests/test_lowlevel.py b/tests/test_lowlevel.py index 8b33e4a52..73d2812e5 100644 --- a/tests/test_lowlevel.py +++ b/tests/test_lowlevel.py @@ -1353,7 +1353,7 @@ def test_dirac_simulation_model(self): make_sim(model=model) with pytest.raises(ValueError): make_sim(model=get_simulation_model("dirac")) - for bad_psi in [-1, 0, -1e-6, 1, 1e6]: + for bad_psi in [-1, 0, -1e-6, 1 + 1e-6, 1e6]: with pytest.raises(ValueError): make_sim( model=get_simulation_model("dirac", c=1, psi=bad_psi), @@ -1363,7 +1363,7 @@ def test_dirac_simulation_model(self): make_sim( model=get_simulation_model("dirac", psi=0.5, c=bad_c), ) - for psi in [0.99, 0.2, 1e-4]: + for psi in [1, 0.2, 1e-4]: for c in [5.0, 1e2, 1e-4]: model = get_simulation_model("dirac", psi=psi, c=c) sim = make_sim(model=model) diff --git a/verification.py b/verification.py index dd7ca5b13..ea3a6ca40 100644 --- a/verification.py +++ b/verification.py @@ -3431,12 +3431,14 @@ def _run(self, pop_size, alpha, growth_rate, num_replicates=10000): logging.debug(f"running Beta growth for {pop_size} {alpha} {growth_rate}") b = growth_rate * (alpha - 1) model = (msprime.BetaCoalescent(alpha=alpha),) - ploidy = 2 - a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy)) - name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}" - self.compare_tmrca( - pop_size, growth_rate, model, num_replicates, a, b, ploidy, name - ) + for ploidy in range(2, 7): + a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy)) + name = ( + f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}" + ) + self.compare_tmrca( + pop_size, growth_rate, model, num_replicates, a, b, ploidy, name + ) ploidy = 1 a = 1 / self.compute_beta_timescale(pop_size, alpha, ploidy) name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}" @@ -3474,12 +3476,14 @@ def test_100000_11_001(self): class DiracGrowth(XiGrowth): def _run(self, pop_size, c, psi, growth_rate, num_replicates=10000): logging.debug(f"running Dirac growth for {pop_size} {c} {psi} {growth_rate}") - b = growth_rate + b = 2 * growth_rate model = (msprime.DiracCoalescent(psi=psi, c=c),) - p = 2 - a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size) - name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}" - self.compare_tmrca(pop_size, growth_rate, model, num_replicates, a, b, p, name) + for p in range(2, 7): + a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size) + name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}" + self.compare_tmrca( + pop_size, growth_rate, model, num_replicates, a, b, p, name + ) p = 1 a = (1 + c * psi * psi) / (pop_size * pop_size) name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"