Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple merger polyploid scaling #2310

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [1.3.3] - 2024-XX-XX

**Bug fixes**:

- Correct the Dirac coalescent time scaling with polyploidy and population growth.

## [1.3.2] - 2024-07-08

- Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`)
Expand Down
168 changes: 70 additions & 98 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -7949,33 +7949,18 @@ msp_dirac_get_common_ancestor_waiting_time_from_rate(
msp_t *self, population_t *pop, double lambda)
{
double ret = DBL_MAX;
double alpha = pop->growth_rate;
double alpha = 2.0 * pop->growth_rate;
double t = self->time;
double u, dt, z;
double pop_size = pop->initial_size;

if (lambda > 0.0) {
u = gsl_ran_exponential(self->rng, 1.0 / lambda);
if (alpha == 0.0) {
if (self->ploidy == 1) {
ret = pop->initial_size * pop->initial_size * u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
ret = pop->initial_size * pop->initial_size * u / 4.0;
}
ret = pop_size * pop_size * u;
} else {
dt = t - pop->start_time;
if (self->ploidy == 1) {
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u / 4.0;
}
z = 1 + alpha * pop_size * pop_size * exp(-alpha * dt) * u;
/* if z is <= 0 no coancestry can occur */
if (z > 0) {
ret = log(z) / alpha;
Expand All @@ -7994,13 +7979,7 @@ msp_dirac_get_common_ancestor_waiting_time(
{
population_t *pop = &self->populations[pop_id];
unsigned int n = (unsigned int) avl_count(&pop->ancestors[label]);
double c = self->model.params.dirac_coalescent.c;
double lambda = n * (n - 1.0) / 2.0;
if (self->ploidy == 1) {
lambda += c;
} else {
lambda += c / (2.0 * self->ploidy);
}
double lambda = gsl_sf_choose(n, 2) + self->model.params.dirac_coalescent.c;

return msp_dirac_get_common_ancestor_waiting_time_from_rate(self, pop, lambda);
}
Expand All @@ -8018,71 +7997,64 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t
double nC2, p;
double psi = self->model.params.dirac_coalescent.psi;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
nC2 = gsl_sf_choose(n, 2);
if (self->ploidy == 1) {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c));
} else {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c / (2.0 * self->ploidy)));
}
p = nC2 / (nC2 + self->model.params.dirac_coalescent.c);

if (gsl_rng_uniform(self->rng) < p) {
/* When 2 * ploidy parental chromosomes are available, Mendelian segregation
* results in a merger only 1 / (2 * ploidy) of the time. */
if (self->ploidy == 1
|| gsl_rng_uniform(self->rng) < 1.0 / (2.0 * self->ploidy)) {
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x_lin = (lineage_t *) x_node->item;
x = x_lin->head;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y_lin = (lineage_t *) y_node->item;
y = y_lin->head;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_lineage(self, x_lin);
msp_free_avl_node(self, y_node);
msp_free_lineage(self, y_lin);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
}
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x_lin = (lineage_t *) x_node->item;
x = x_lin->head;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y_lin = (lineage_t *) y_node->item;
y = y_lin->head;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_lineage(self, x_lin);
msp_free_avl_node(self, y_node);
msp_free_lineage(self, y_lin);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
} else {
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
num_participants = gsl_ran_binomial(self->rng, psi, n);
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (num_participants > 1) {
/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (ret < 0) {
goto out;
}
}
}
}
out:
Expand Down Expand Up @@ -8242,22 +8214,6 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
double truncation_point = beta_compute_truncation(self);
double beta_x, u, increment;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
beta_x = ran_inc_beta(self->rng, 2.0 - alpha, alpha, truncation_point);
Expand Down Expand Up @@ -8295,6 +8251,22 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
num_participants = 2 + gsl_ran_binomial(self->rng, beta_x, n - 2);
} while (gsl_rng_uniform(self->rng) > 1 / gsl_sf_choose(num_participants, 2));

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
Expand Down
4 changes: 2 additions & 2 deletions msprime/_msprimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1199,8 +1199,8 @@ Simulator_parse_simulation_model(Simulator *self, PyObject *py_model)
goto out;
}
c = PyFloat_AsDouble(value);
if (psi <= 0 || psi >= 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi < 1");
if (psi <= 0 || psi > 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi <= 1");
goto out;
}
if (c < 0) {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ancestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_multimerger_dirac(self):
recombination_rate=0.1,
record_full_arg=True,
sequence_length=10,
model=msprime.DiracCoalescent(psi=0.5, c=5),
model=msprime.DiracCoalescent(psi=0.5, c=100),
)
self.verify(sim, multiple_mergers=True)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ def test_dirac_simulation_model(self):
make_sim(model=model)
with pytest.raises(ValueError):
make_sim(model=get_simulation_model("dirac"))
for bad_psi in [-1, 0, -1e-6, 1, 1e6]:
for bad_psi in [-1, 0, -1e-6, 1 + 1e-6, 1e6]:
with pytest.raises(ValueError):
make_sim(
model=get_simulation_model("dirac", c=1, psi=bad_psi),
Expand All @@ -1363,7 +1363,7 @@ def test_dirac_simulation_model(self):
make_sim(
model=get_simulation_model("dirac", psi=0.5, c=bad_c),
)
for psi in [0.99, 0.2, 1e-4]:
for psi in [1, 0.2, 1e-4]:
for c in [5.0, 1e2, 1e-4]:
model = get_simulation_model("dirac", psi=psi, c=c)
sim = make_sim(model=model)
Expand Down
26 changes: 15 additions & 11 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3431,12 +3431,14 @@ def _run(self, pop_size, alpha, growth_rate, num_replicates=10000):
logging.debug(f"running Beta growth for {pop_size} {alpha} {growth_rate}")
b = growth_rate * (alpha - 1)
model = (msprime.BetaCoalescent(alpha=alpha),)
ploidy = 2
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
for ploidy in range(2, 7):
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = (
f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
)
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
ploidy = 1
a = 1 / self.compute_beta_timescale(pop_size, alpha, ploidy)
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
Expand Down Expand Up @@ -3474,12 +3476,14 @@ def test_100000_11_001(self):
class DiracGrowth(XiGrowth):
def _run(self, pop_size, c, psi, growth_rate, num_replicates=10000):
logging.debug(f"running Dirac growth for {pop_size} {c} {psi} {growth_rate}")
b = growth_rate
b = 2 * growth_rate
model = (msprime.DiracCoalescent(psi=psi, c=c),)
p = 2
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(pop_size, growth_rate, model, num_replicates, a, b, p, name)
for p in range(2, 7):
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, p, name
)
p = 1
a = (1 + c * psi * psi) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
Expand Down
Loading