Skip to content

Commit

Permalink
implemented log likelihood method for pairwise interactions
Browse files Browse the repository at this point in the history
  • Loading branch information
ashuaibi7 committed Nov 23, 2024
1 parent d7cf319 commit c450795
Showing 1 changed file with 73 additions and 3 deletions.
76 changes: 73 additions & 3 deletions src/dialect/models/interaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,79 @@ def __init__(self, gene_a, gene_b):
# Likelihood & Metric Evaluation #
# ---------------------------------------------------------------------------- #

# Placeholder for methods to compute interaction-specific metrics (e.g., likelihood ratio)
# def compute_log_likelihood(self):
# pass
def compute_log_likelihood(self, tau):
"""
Compute the complete data log-likelihood for the interaction given the parameters tau.
The log-likelihood function is given by:
\sum_{i=1}^{N} \log ()
\mathbb{P}(P_i = c_i)\mathbb{P}(P_i' = c_i') \tau_{00} +
\mathbb{P}(P_i = c_i)\mathbb{P}(P_i' = c_i' - 1) \tau_{01} +
\mathbb{P}(P_i = c_i - 1)\mathbb{P}(P_i' = c_i') \tau_{10} +
\mathbb{P}(P_i = c_i - 1)\mathbb{P}(P_i' = c_i' - 1) \tau_{11}
)
where:
- `N` is the number of samples.
- `P_i` and `P_i'` represent the RVs for passenger mutations for the two genes.
- `c_i` and `c_i'` are the observed counts of somatic mutations for the two genes.
- `tau = (tau_00, tau_01, tau_10, tau_11)` are the interaction parameters.
:param tau (tuple): A tuple (tau_00, tau_01, tau_10, tau_11) representing the interaction parameters.
:return (float): The log-likelihood value.
:raises ValueError: If `bmr_pmf` or `counts` are not defined for either gene, or if `tau` is invalid.
"""
if not self.gene1.bmr_pmf or not self.gene2.bmr_pmf:
raise ValueError("BMR PMFs are not defined for one or both genes.")
if not self.gene1.counts or not self.gene2.counts:
raise ValueError("Counts are not defined for one or both genes.")
if not all(
0 <= t <= 1 for t in [self.tau_00, self.tau_01, self.tau_10, self.tau_11]
) or not np.isclose(
sum([self.tau_00, self.tau_01, self.tau_10, self.tau_11]), 1
):
logging.info(
f"Invalid tau parameters: tau_00={self.tau_00}, tau_01={self.tau_01}, tau_10={self.tau_10}, tau_11={self.tau_11}"
)
raise ValueError(
"Invalid tau parameters. Ensure 0 <= tau_i <= 1 and sum(tau) == 1."
)

gene_a_counts = self.gene_a.counts
gene_b_counts = self.gene_b.counts

logging.info(f"Computing log likelihood for interaction {self.name}.")
logging.info(
f"tau_00: {self.tau_00}, tau_01: {self.tau_01}, tau_10: {self.tau_10}, tau_11: {self.tau_11}"
)

log_likelihood = sum(
np.log(
( # \mathbb{P}(P_i = c_i)\mathbb{P}(P_i' = c_i') \tau_{00}
self.gene_a.bmr_pmf.get(c_a, 0)
* self.gene_b.bmr_pmf.get(c_b, 0)
* self.tau_00
)
+ ( # \mathbb{P}(P_i = c_i)\mathbb{P}(P_i' = c_i' - 1) \tau_{01}
self.gene_a.bmr_pmf.get(c_a, 0)
* self.gene_b.bmr_pmf.get(c_b - 1, 0)
* self.tau_01
)
+ ( # \mathbb{P}(P_i = c_i - 1)\mathbb{P}(P_i' = c_i') \tau_{10}
self.gene_a.bmr_pmf.get(c_a - 1, 0)
* self.gene_b.bmr_pmf.get(c_b, 0)
* self.tau_10
)
+ ( # \mathbb{P}(P_i = c_i - 1)\mathbb{P}(P_i' = c_i' - 1) \tau_{11}
self.gene_a.bmr_pmf.get(c_a - 1, 0)
* self.gene_b.bmr_pmf.get(c_b - 1, 0)
* self.tau_11
)
)
for c_a, c_b in zip(gene_a_counts, gene_b_counts)
)
return log_likelihood

#
# def compute_likelihood_ratio(self):
# pass
Expand Down

0 comments on commit c450795

Please sign in to comment.