Skip to content

Commit

Permalink
Fixup coord remapping problems
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Nov 6, 2024
1 parent 6c146a0 commit bb72e7d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 10 deletions.
24 changes: 14 additions & 10 deletions sc2ts/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,7 @@ def make_tsb(ts, mirror_coordinates=False):
)

position_map = np.hstack([ts.sites_position, [ts.sequence_length]])
# Note - bracketing by 0 on the left here.
# bracketing by 0 on the left here while we're translating edge locations.
position_map[0] = 0
# Get the indexes into the position array.
left = np.searchsorted(position_map, ts.edges_left)
Expand All @@ -1172,6 +1172,7 @@ def make_tsb(ts, mirror_coordinates=False):
if np.any(position_map[right] != ts.edges_right):
raise ValueError("Invalid right coordinates")

position_map[0] = ts.sites_position[0]
# Need to sort by child ID here and left so that we can efficiently
# insert the child paths.
index = np.lexsort((left, ts.edges_child))
Expand Down Expand Up @@ -1245,11 +1246,11 @@ def match_worker(strain, h, likelihood_threshold):

path_len = len(match_path[0])
num_muts = np.sum(m != h)
likelihood = rho**path_len * mu**num_muts
likelihood = rho ** (path_len - 1) * mu**num_muts
hmm_match = HmmMatch(path, mutations, likelihood=likelihood)

logger.debug(
f"Found path len={path_len} and muts={num_muts} P={likelihood:.2g} "
f"Found path len={path_len} and muts={num_muts} Lik={likelihood:.2g} "
f"for {strain} in {duration:.3f}s "
f"mean_tb_size={matcher.mean_traceback_size:.1f} "
f"match_mem={humanize.naturalsize(matcher.total_memory, binary=True)}"
Expand Down Expand Up @@ -1279,7 +1280,7 @@ def match_worker(strain, h, likelihood_threshold):
raw_hmm_match = future.result()
sample = future_to_sample[future]
hmm_matches[sample.strain] = raw_hmm_match.translate_coordinates(
coord_map, mirror_coordinates
coord_map, mirror_coordinates, ts.sites_position
)
bar.update()
bar.close()
Expand Down Expand Up @@ -1366,12 +1367,15 @@ def path_summary(self):
def mutation_summary(self):
return "[" + ", ".join(str(mutation) for mutation in self.mutations) + "]"

def translate_coordinates(self, coord_map, mirror_coordinates):
def translate_coordinates(self, coord_map, mirror_coordinates, sites_position):
"""
Return a copy of this HmmMatch with coordinates translated from raw site-based
values to their final positions.
"""
L = coord_map[-1]
first_pos = coord_map[0]
# Set first pos to 0 while we're translating edge coords
coord_map[0] = 0
path = []
for seg in self.path:
if mirror_coordinates:
Expand All @@ -1387,22 +1391,22 @@ def translate_coordinates(self, coord_map, mirror_coordinates):
# if matching forwards
path = path[::-1]

coord_map[0] = first_pos
mutations = []
# for site_id in np.where(h != m)[0]:
for mut in self.mutations:
site_id = mut.site_id
if mirror_coordinates:
site_id = mirror(mut.site_id, L)
assert site_id != 0 # This breaks because coord_map[0] == 0
site_pos = coord_map[site_id]
site_id = mirror(mut.site_id, len(coord_map) - 2)
mutations.append(
MatchMutation(
site_id=int(site_id),
site_position=int(site_pos),
site_position=int(sites_position[site_id]),
derived_state=mut.derived_state,
inherited_state=mut.inherited_state,
)
)
if mirror_coordinates:
mutations = mutations[::-1]
return HmmMatch(path, mutations, self.likelihood)


Expand Down
2 changes: 2 additions & 0 deletions tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def match_tsinfer(self, samples, ts, mirror_coordinates=False, **kwargs):
ts=ts,
mu=0.125,
rho=0,
likelihood_threshold=1e-200,
mirror_coordinates=mirror_coordinates,
**kwargs,
)
Expand Down Expand Up @@ -1100,6 +1101,7 @@ def test_match_recombinant(self, fx_ts_map):
ts=ts,
mu=mu,
rho=rho,
likelihood_threshold=1e-200,
num_threads=0,
)
interval_right = 11083
Expand Down

0 comments on commit bb72e7d

Please sign in to comment.