Skip to content

Commit

Permalink
Merge pull request #2321 from jeromekelleher/internal-pedigree-sample…
Browse files Browse the repository at this point in the history
…-first-pass

Initial steps for internal pedigree samples
  • Loading branch information
jeromekelleher authored Sep 7, 2024
2 parents b3b1be8 + be1bcc8 commit e2932a2
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 119 deletions.
34 changes: 19 additions & 15 deletions algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,12 @@ def __init__(
# implemented using `ParametricAncestryModel()`
self.hull_offset = hull_offset

self.initialise(tables.tree_sequence())
if model == "fixed_pedigree":
self.t = 0
self.S[0] = 0
self.S[self.L] = -1
else:
self.initialise(tables.tree_sequence())

self.num_ca_events = 0
self.num_re_events = 0
Expand Down Expand Up @@ -981,6 +986,7 @@ def initialise(self, ts):
root_segments_tail = [None for _ in range(ts.num_nodes)]
root_lineages = [None for _ in range(ts.num_nodes)]
last_S = -1
start_time = np.inf
for tree in ts.trees():
left, right = tree.interval
S = 0 if tree.num_roots == 1 else tree.num_roots
Expand All @@ -991,6 +997,7 @@ def initialise(self, ts):
# any ancestral segments to the state.
if tree.num_roots > 1:
for root in tree.roots:
start_time = min(start_time, tree.time(root))
population = ts.node(root).population
if root_segments_head[root] is None:
seg = self.alloc_segment(left, right, root)
Expand All @@ -1014,7 +1021,7 @@ def initialise(self, ts):
# Insert the segment chains into the algorithm state.
for node in range(ts.num_nodes):
lineage = root_lineages[node]
if lineage is not None:
if lineage is not None and ts.nodes_time[node] == start_time:
seg = lineage.head
left_end = seg.left
while seg is not None:
Expand Down Expand Up @@ -1616,6 +1623,16 @@ def process_pedigree_common_ancestors(self, ind, ploid):
individual, then recombine and distribute the remaining ancestral
material among its parent ploids.
"""
node = ind.nodes[ploid]
is_sample = (self.tables.nodes.flags[node] & tskit.NODE_IS_SAMPLE) > 0
if is_sample:
segment = self.alloc_segment(0, self.L, node)
lineage = self.alloc_lineage(segment, population=0)
self.add_lineage(lineage)
ind.add_common_ancestor(lineage.head, ploid=ploid)
for k in list(self.S.keys())[:-1]:
self.S[k] += 1

common_ancestors = ind.common_ancestors[ploid]
if len(common_ancestors) == 0:
# No ancestral material inherited on this ploid of this individual
Expand All @@ -1632,7 +1649,6 @@ def process_pedigree_common_ancestors(self, ind, ploid):
# monoploid genome for this ploid of this individual.
# If any coalescences occur, they use the corresponding node ID.
# FIXME update the population/label here
node = ind.nodes[ploid]
genome = self.merge_ancestors(common_ancestors, 0, 0, node)
if ind.parents[ploid] == tskit.NULL:
# If this individual is a founder we need to make sure that all
Expand Down Expand Up @@ -1676,18 +1692,6 @@ def dtwf_climb_pedigree(self):
Simulates transmission of ancestral material through a pre-specified
pedigree
"""
assert self.num_populations == 1 # Single pop/pedigree for now
pop = self.P[0]

# Go through the extant lineages and gather the ancestral material
# into the corresponding pedigree individuals.
for lineage in pop.iter_ancestors():
u = lineage.head.node
node = self.tables.nodes[u]
assert node.individual != tskit.NULL
ind = self.pedigree.individuals[node.individual]
ind.add_common_ancestor(lineage.head, ploid=ind.nodes.index(u))

# Visit pedigree individuals in time order.
visit_order = sorted(self.pedigree.individuals, key=lambda x: (x.time, x.id))
for ind in visit_order:
Expand Down
104 changes: 51 additions & 53 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -1859,11 +1859,13 @@ msp_verify_overlaps(msp_t *self)
int ok = overlap_counter_alloc(&counter, self->sequence_length, 0);
tsk_bug_assert(ok == 0);

/* add in the overlaps for ancient samples */
for (j = self->next_sampling_event; j < self->num_sampling_events; j++) {
se = self->sampling_events[j];
for (u = self->root_segments[se.sample]; u != NULL; u = u->next) {
overlap_counter_increment_interval(&counter, u->left, u->right);
if (self->model.type != MSP_MODEL_WF_PED) {
/* add in the overlaps for ancient samples */
for (j = self->next_sampling_event; j < self->num_sampling_events; j++) {
se = self->sampling_events[j];
for (u = self->root_segments[se.sample]; u != NULL; u = u->next) {
overlap_counter_increment_interval(&counter, u->left, u->right);
}
}
}

Expand Down Expand Up @@ -2495,12 +2497,10 @@ msp_store_coalescence_edge(
int ret = 0;

if (self->model.type == MSP_MODEL_DTWF || self->model.type == MSP_MODEL_WF_PED) {
if (self->additional_nodes & MSP_NODE_IS_RE_EVENT) {
// parent and child can be equal
// don't store edges
if (parent == child) {
return ret;
}
// parent and child can be equal if we're storing RE events, or if we've
// got internal samples. Don't store edges
if (parent == child) {
return ret;
}
}

Expand Down Expand Up @@ -2914,13 +2914,30 @@ msp_pedigree_add_individual_common_ancestor(
}

static int MSP_WARN_UNUSED
msp_pedigree_add_sample_ancestry(msp_t *self, segment_t *segment)
msp_pedigree_add_sample_ancestry(msp_t *self, tsk_id_t node_id)
{
int ret = 0;
tsk_size_t ploid;
tsk_id_t node_id = segment->value;
tsk_id_t individual_id;
individual_t *ind;
avl_node_t *a;
node_mapping_t *counter;
segment_t *seg
= msp_alloc_segment(self, 0, self->sequence_length, node_id, 0, 0, NULL, NULL);
lineage_t *lin = msp_alloc_lineage(self, seg, seg, 0, 0);

if (seg == NULL || lin == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
ret = msp_insert_individual(self, lin);
if (ret != 0) {
goto out;
}
for (a = self->overlap_counts.head; a != self->overlap_counts.tail; a = a->next) {
counter = (node_mapping_t *) a->item;
counter->value += 1;
}

tsk_bug_assert(node_id < (tsk_id_t) self->tables->nodes.num_rows);
individual_id = self->tables->nodes.individual[node_id];
Expand All @@ -2934,14 +2951,7 @@ msp_pedigree_add_sample_ancestry(msp_t *self, segment_t *segment)
}
}
tsk_bug_assert(ploid < ind->ploidy);
if (avl_count(&ind->common_ancestors[ploid]) > 0) {
/* This is where we'd deal with the internal samples. What we'll probably
* need to do is to go through the ancestry after we've processed the
* individual and then pad out any gaps in the ancestry segments. */
ret = MSP_ERR_PEDIGREE_INTERNAL_SAMPLE;
goto out;
}
ret = msp_pedigree_add_individual_common_ancestor(self, ind->id, segment, ploid);
ret = msp_pedigree_add_individual_common_ancestor(self, ind->id, seg, ploid);
if (ret != 0) {
goto out;
}
Expand All @@ -2955,9 +2965,11 @@ msp_pedigree_initialise(msp_t *self)
int ret = 0;
population_t *pop;
lineage_t *lin;
segment_t *seg;
avl_node_t *a;
label_id_t label = 0;
tsk_size_t j;
node_mapping_t *counter;

if (self->next_demographic_event != NULL) {
ret = MSP_ERR_UNSUPPORTED_OPERATION;
Expand All @@ -2981,14 +2993,21 @@ msp_pedigree_initialise(msp_t *self)

for (j = 0; j < self->num_populations; j++) {
pop = &self->populations[j];
/* Rather than messing about with how we initialise from trees, it's
* easier to just remove the lineages here, before we add them
* back later when dealing with samples in the pedigree. */
for (a = pop->ancestors[label].head; a != NULL; a = a->next) {
lin = (lineage_t *) a->item;
ret = msp_pedigree_add_sample_ancestry(self, lin->head);
if (ret != 0) {
goto out;
for (seg = lin->head; seg != NULL; seg = seg->next) {
msp_free_segment(self, seg);
}
msp_remove_individual(self, lin);
}
}
tsk_bug_assert(avl_count(&self->overlap_counts) == 2);
counter = (node_mapping_t *) self->overlap_counts.head->item;
counter->value = 0;

self->pedigree.next_individual = 0;
out:
return ret;
Expand Down Expand Up @@ -4589,31 +4608,6 @@ msp_apply_sampling_events(msp_t *self, double time)
return ret;
}

static int MSP_WARN_UNUSED
msp_pedigree_insert_ancient_samples(msp_t *self)
{
int ret = 0;
sampling_event_t *se;
segment_t *root_seg, *new_head;

while (self->next_sampling_event < self->num_sampling_events
&& self->sampling_events[self->next_sampling_event].time <= self->time) {
se = self->sampling_events + self->next_sampling_event;
root_seg = self->root_segments[se->sample];
ret = msp_insert_root_segments(self, root_seg, &new_head);
if (ret != 0) {
goto out;
}
ret = msp_pedigree_add_sample_ancestry(self, new_head);
if (ret != 0) {
goto out;
}
self->next_sampling_event++;
}
out:
return ret;
}

static double
msp_get_next_fixed_event_time(msp_t *self)
{
Expand Down Expand Up @@ -5483,13 +5477,21 @@ msp_pedigree_process_common_ancestors(msp_t *self, individual_t *ind, tsk_size_t
{
int ret = 0;
tsk_id_t node = ind->nodes[ploid];
bool is_sample = (self->tables->nodes.flags[node] & TSK_NODE_IS_SAMPLE) != 0;
tsk_id_t parent = ind->parents[ploid];
avl_tree_t *common_ancestors = &ind->common_ancestors[ploid];
segment_t *genome, *parent_ancestry[MSP_MAX_PED_PLOIDY], *seg;
const tsk_size_t ploidy = self->ploidy;
tsk_size_t j;
tsk_id_t *parent_nodes;

if (is_sample) {
ret = msp_pedigree_add_sample_ancestry(self, node);
if (ret != 0) {
goto out;
}
}

/* FIXME - assuming 1 label here */
ret = msp_merge_n_ancestors(
self, common_ancestors, ind->population, 0, node, &genome);
Expand Down Expand Up @@ -5607,10 +5609,6 @@ msp_run_pedigree(msp_t *self, double max_time, unsigned long max_events)
}
tsk_bug_assert(ind->time >= self->time);
self->time = ind->time;
ret = msp_pedigree_insert_ancient_samples(self);
if (ret != 0) {
goto out;
}
for (ploid = 0; ploid < ind->ploidy; ploid++) {
ret = msp_pedigree_process_common_ancestors(self, ind, ploid);
if (ret != 0) {
Expand Down
18 changes: 3 additions & 15 deletions lib/tests/test_pedigrees.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,8 @@ test_replicates_ancient_samples(void)
CU_ASSERT_EQUAL_FATAL(ret, 0);

for (j = 0; j < 20; j++) {
msp_verify(&msp, 0);
ret = msp_run(&msp, DBL_MAX, UINT32_MAX);

CU_ASSERT_EQUAL_FATAL(ret, MSP_EXIT_MODEL_COMPLETE);
msp_verify(&msp, 0);
ret = msp_finalise_tables(&msp);
Expand Down Expand Up @@ -844,24 +844,12 @@ test_errors(void)
static void
test_internal_samples(void)
{
int ret;
tsk_id_t parents[] = { -1, -1, -1, -1, 0, 1 };
double time[] = { 1.0, 1.0, 0.0 };
tsk_flags_t is_sample[] = { true, true, true };
msp_t msp;
tsk_table_collection_t tables;
gsl_rng *rng = safe_rng_alloc();

ret = build_pedigree_sim(
&msp, &tables, rng, 100, 2, 3, parents, time, is_sample, NULL);
ret = msp_initialise(&msp);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = msp_run(&msp, DBL_MAX, UINT32_MAX);
CU_ASSERT_EQUAL_FATAL(ret, MSP_ERR_PEDIGREE_INTERNAL_SAMPLE);
tsk_table_collection_free(&tables);
msp_free(&msp);

gsl_rng_free(rng);
verify_pedigree(0, 1, 3, parents, time, is_sample, NULL, 0);
verify_pedigree(0.1, 1, 3, parents, time, is_sample, NULL, 0);
}

static void
Expand Down
6 changes: 0 additions & 6 deletions lib/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,6 @@ msp_strerror_internal(int err)
ret = "All individuals in the input pedigree must be associated with "
"exactly two parents (can be TSK_NULL, if not known)";
break;
case MSP_ERR_PEDIGREE_INTERNAL_SAMPLE:
ret = "Samples that are internal nodes in the pedigree are not "
"currently supported. Please comment on this GitHub issue if you "
"would like to see this feature implemented: "
"https://github.com/tskit-dev/msprime/issues/1855 ";
break;

case MSP_ERR_BAD_PROPORTION:
ret = "Proportion values must have 0 <= x <= 1";
Expand Down
1 change: 0 additions & 1 deletion lib/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
#define MSP_ERR_PEDIGREE_TIME_TRAVEL -88
#define MSP_ERR_PEDIGREE_IND_NOT_DIPLOID -89
#define MSP_ERR_PEDIGREE_IND_NOT_TWO_PARENTS -90
#define MSP_ERR_PEDIGREE_INTERNAL_SAMPLE -91

/* clang-format on */
/* This bit is 0 for any errors originating from tskit */
Expand Down
15 changes: 15 additions & 0 deletions tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,21 @@ def test_pedigree_trio(self, r):
input_tables.nodes.assert_equals(output_tables.nodes[: len(input_tables.nodes)])
assert len(output_tables.edges) >= 2

@pytest.mark.parametrize("r", [0, 0.1, 1])
def test_pedigree_internal_sample(self, r):
input_tables = simulate_pedigree(num_founders=4, num_generations=4)
flags = input_tables.nodes.flags
flags[8:12] = tskit.NODE_IS_SAMPLE # Make inds 4 and 5 samples
input_tables.nodes.flags = flags
with tempfile.TemporaryDirectory() as tmpdir:
ts_path = pathlib.Path(tmpdir) / "pedigree.trees"
input_tables.dump(ts_path)
ts = self.run_script(f"0 --from-ts {ts_path} -r {r} --model=fixed_pedigree")
output_tables = ts.dump_tables()
input_tables.individuals.assert_equals(output_tables.individuals)
input_tables.nodes.assert_equals(output_tables.nodes[: len(input_tables.nodes)])
assert len(output_tables.edges) >= 2

@pytest.mark.parametrize("num_founders", [1, 2, 20])
def test_one_gen_pedigree(self, num_founders):
tables = simulate_pedigree(num_founders=num_founders, num_generations=1)
Expand Down
Loading

0 comments on commit e2932a2

Please sign in to comment.