Skip to content

Commit

Permalink
Refactor global_comparison for parallelization
Browse files Browse the repository at this point in the history
Related to #165.
  • Loading branch information
dweindl committed Feb 17, 2025
1 parent af6f7f2 commit 2e28019
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 132 deletions.
261 changes: 130 additions & 131 deletions src/ccompass/MOA.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def stats_proteome(
return results


def global_comparison(
def global_comparisons(
results: dict[str, ResultsModel],
) -> dict[tuple[str, str], ComparisonModel]:
"""Compute global changes."""
Expand All @@ -445,155 +445,154 @@ def global_comparison(
result1, result2 = results[cond1], results[cond2]

logger.info(f"Processing {comb}...")
classnames = list(set(result1.classnames) & set(result2.classnames))
comparison = comparisons[comb] = ComparisonModel()
comparisons[comb] = global_comparison(result1, result2)

metrics_own = result1.metrics
metrics_other = result2.metrics
logger.info("Global changes calculated.")

## prepare data:
comparison.intersection_data = pd.merge(
metrics_own,
metrics_other,
left_index=True,
right_index=True,
how="inner",
)
comparison.metrics = pd.DataFrame(
index=comparison.intersection_data.index
)
return comparisons

logger.info("performing t-tests...")

for classname in classnames:
comparison.metrics["RL_" + classname] = (
metrics_other["CC_" + classname]
- metrics_own["CC_" + classname]
)
def global_comparison(
result1: ResultsModel, result2: ResultsModel
) -> ComparisonModel:
"""Perform a single global comparison."""
classnames = list(set(result1.classnames) & set(result2.classnames))
comparison = ComparisonModel()

metrics_own = result1.metrics
metrics_other = result2.metrics

## prepare data:
comparison.intersection_data = pd.merge(
metrics_own,
metrics_other,
left_index=True,
right_index=True,
how="inner",
)
comparison.metrics = pd.DataFrame(index=comparison.intersection_data.index)

rl_cols = [
col for col in comparison.metrics.columns if col.startswith("RL_")
]
comparison.metrics["RLS"] = (
comparison.metrics[rl_cols].abs().sum(axis=1)
)
logger.info("performing t-tests...")

for classname in classnames:
comparison.metrics["fRL_" + classname] = (
metrics_other["fCC_" + classname]
- metrics_own["fCC_" + classname]
)
frl_cols = [
col for col in comparison.metrics.columns if col.startswith("fRL_")
]
comparison.metrics["fRLS"] = (
comparison.metrics[frl_cols].abs().sum(axis=1)
for classname in classnames:
comparison.metrics["RL_" + classname] = (
metrics_other["CC_" + classname] - metrics_own["CC_" + classname]
)

test_df = perform_mann_whitney_t_tests_per_cell(
metrics_own, metrics_other, "CClist_"
)
common_indices = test_df.index
for classname in classnames:
test_df.rename(
columns={
"CClist_" + classname + "_U": "U_" + classname,
"CClist_" + classname + "_T": "T_" + classname,
"CClist_" + classname + "_D": "D_" + classname,
"CClist_" + classname + "_P(U)": "P(U)_" + classname,
"CClist_" + classname + "_P(T)": "P(T)_" + classname,
},
inplace=True,
)
# calculate DS:
d_columns = [col for col in test_df.columns if col.startswith("D_")]
test_df["DS"] = test_df[d_columns].abs().sum(axis=1)
rl_cols = [
col for col in comparison.metrics.columns if col.startswith("RL_")
]
comparison.metrics["RLS"] = comparison.metrics[rl_cols].abs().sum(axis=1)

# add statistics to metrics:
comparison.metrics = pd.merge(
comparison.metrics,
test_df,
left_index=True,
right_index=True,
how="left",
for classname in classnames:
comparison.metrics["fRL_" + classname] = (
metrics_other["fCC_" + classname] - metrics_own["fCC_" + classname]
)
frl_cols = [
col for col in comparison.metrics.columns if col.startswith("fRL_")
]
comparison.metrics["fRLS"] = comparison.metrics[frl_cols].abs().sum(axis=1)

logger.info("calculate RLS lists...")
RLS_results = {}
RLS_null = {}
for ID in common_indices:
cclists_own = [
metrics_own.loc[ID, "CClist_" + classname]
for classname in classnames
]
cclists_other = [
metrics_other.loc[ID, "CClist_" + classname]
for classname in classnames
]

cclists_own_transposed = [
list(values) for values in zip(*cclists_own)
]
cclists_other_transposed = [
list(values) for values in zip(*cclists_other)
]
test_df = perform_mann_whitney_t_tests_per_cell(
metrics_own, metrics_other, "CClist_"
)
common_indices = test_df.index
for classname in classnames:
test_df.rename(
columns={
"CClist_" + classname + "_U": "U_" + classname,
"CClist_" + classname + "_T": "T_" + classname,
"CClist_" + classname + "_D": "D_" + classname,
"CClist_" + classname + "_P(U)": "P(U)_" + classname,
"CClist_" + classname + "_P(T)": "P(T)_" + classname,
},
inplace=True,
)
# calculate DS:
d_columns = [col for col in test_df.columns if col.startswith("D_")]
test_df["DS"] = test_df[d_columns].abs().sum(axis=1)

# add statistics to metrics:
comparison.metrics = pd.merge(
comparison.metrics,
test_df,
left_index=True,
right_index=True,
how="left",
)

RLS_results[ID] = []
RLS_null[ID] = []
logger.info("calculate RLS lists...")
RLS_results = {}
RLS_null = {}
for ID in common_indices:
cclists_own = [
metrics_own.loc[ID, "CClist_" + classname]
for classname in classnames
]
cclists_other = [
metrics_other.loc[ID, "CClist_" + classname]
for classname in classnames
]

for i in range(len(cclists_own_transposed)):
for j in range(i + 1, len(cclists_own_transposed)):
null_result = compare_lists(
cclists_own_transposed[i], cclists_own_transposed[j]
)
RLS_null[ID].append(null_result)
for i in range(len(cclists_other_transposed)):
for j in range(i + 1, len(cclists_other_transposed)):
null_result = compare_lists(
cclists_other_transposed[i],
cclists_other_transposed[j],
)
RLS_null[ID].append(null_result)
cclists_own_transposed = [list(values) for values in zip(*cclists_own)]
cclists_other_transposed = [
list(values) for values in zip(*cclists_other)
]

for own_list in cclists_own_transposed:
for other_list in cclists_other_transposed:
comparison_result = compare_lists(own_list, other_list)
RLS_results[ID].append(comparison_result)
comparison.RLS_results = pd.Series(RLS_results)
comparison.RLS_null = pd.Series(RLS_null)
RLS_results[ID] = []
RLS_null[ID] = []

comparison.metrics["P(t)_RLS"] = np.nan
comparison.metrics["P(u)_RLS"] = np.nan
for index in comparison.metrics.index:
if index in common_indices:
# Perform the t-test
stat, p_value = ttest_ind(
for i in range(len(cclists_own_transposed)):
for j in range(i + 1, len(cclists_own_transposed)):
null_result = compare_lists(
cclists_own_transposed[i], cclists_own_transposed[j]
)
RLS_null[ID].append(null_result)
for i in range(len(cclists_other_transposed)):
for j in range(i + 1, len(cclists_other_transposed)):
null_result = compare_lists(
cclists_other_transposed[i],
cclists_other_transposed[j],
)
RLS_null[ID].append(null_result)

for own_list in cclists_own_transposed:
for other_list in cclists_other_transposed:
comparison_result = compare_lists(own_list, other_list)
RLS_results[ID].append(comparison_result)
comparison.RLS_results = pd.Series(RLS_results)
comparison.RLS_null = pd.Series(RLS_null)

comparison.metrics["P(t)_RLS"] = np.nan
comparison.metrics["P(u)_RLS"] = np.nan
for index in comparison.metrics.index:
if index in common_indices:
# Perform the t-test
stat, p_value = ttest_ind(
comparison.RLS_results.loc[index],
comparison.RLS_null.loc[index],
nan_policy="omit",
)
comparison.metrics.loc[index, "P(t)_RLS"] = p_value
if (
is_all_nan(comparison.RLS_results.loc[index])
or is_all_nan(comparison.RLS_null.loc[index])
or len(set(comparison.RLS_results.loc[index])) == 1
or len(set(comparison.RLS_null.loc[index])) == 1
):
comparison.metrics.loc[index, "P(u)_RLS"] = pd.NA
else:
stat_u, p_value_u = stats.mannwhitneyu(
comparison.RLS_results.loc[index],
comparison.RLS_null.loc[index],
nan_policy="omit",
alternative="two-sided",
)
comparison.metrics.loc[index, "P(t)_RLS"] = p_value
if (
is_all_nan(comparison.RLS_results.loc[index])
or is_all_nan(comparison.RLS_null.loc[index])
or len(set(comparison.RLS_results.loc[index])) == 1
or len(set(comparison.RLS_null.loc[index])) == 1
):
comparison.metrics.loc[index, "P(u)_RLS"] = pd.NA
else:
stat_u, p_value_u = stats.mannwhitneyu(
comparison.RLS_results.loc[index],
comparison.RLS_null.loc[index],
alternative="two-sided",
)
comparison.metrics.loc[index, "P(u)_RLS"] = p_value_u
else:
comparison.metrics.loc[index, "P(t)_RLS"] = pd.NA
comparison.metrics.loc[index, "P(u)_RLS"] = pd.NA
comparison.metrics.loc[index, "P(u)_RLS"] = p_value_u
else:
comparison.metrics.loc[index, "P(t)_RLS"] = pd.NA
comparison.metrics.loc[index, "P(u)_RLS"] = pd.NA

logger.info("Global changes calculated.")

return comparisons
return comparison


def class_comparison(
Expand Down
2 changes: 1 addition & 1 deletion src/ccompass/main_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,7 +1287,7 @@ def run(self):

elif event == "-global_run-":
with wait_cursor(self.main_window):
self.model.comparison = MOA.global_comparison(
self.model.comparison = MOA.global_comparisons(
self.model.results
)
self.model.status.comparison_global = True
Expand Down

0 comments on commit 2e28019

Please sign in to comment.