Skip to content

Commit

Permalink
Merge pull request #12 from JuBiotech/fix-noise-issues
Browse files Browse the repository at this point in the history
Investigate and fix noise issues
  • Loading branch information
Y0dler authored May 11, 2024
2 parents 1f47e9e + 1180993 commit c1e5aa1
Show file tree
Hide file tree
Showing 7 changed files with 3,607 additions and 1,751 deletions.
3,472 changes: 1,735 additions & 1,737 deletions notebooks/Investigation_doublepeak_separation.ipynb

Large diffs are not rendered by default.

1,833 changes: 1,833 additions & 0 deletions notebooks/Investigation_noise_sigma.ipynb

Large diffs are not rendered by default.

33 changes: 23 additions & 10 deletions peak_performance/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ class ModelType(str, Enum):
DoubleSkewNormal = "double_skew_normal"


def guess_noise(intensity):
n = len(intensity)
ifrom = int(np.ceil(0.15 * n))
ito = int(np.floor(0.85 * n))
start_ints = intensity[:ifrom]
end_ints = intensity[ito:]
return np.std([*(start_ints - np.mean(start_ints)), *(end_ints - np.mean(end_ints))])


def initial_guesses(time: np.ndarray, intensity: np.ndarray):
"""
Provide initial guesses for priors.
Expand Down Expand Up @@ -79,12 +88,16 @@ def initial_guesses(time: np.ndarray, intensity: np.ndarray):
# use the indeces in noise_index to get the time and intensity of all noise data points
noise_time = [time[n] for n in noise_index]
noise_intensity = [intensity[n] for n in noise_index]
# calculate the width of the noise
noise_width_guess = max(noise_intensity) - min(noise_intensity)

# use scipy to fit a linear regression through the noise as a prior for the eventual baseline
baseline_fit = st.linregress(noise_time, noise_intensity)

# calculate the width of the noise
noise_width_guess = guess_noise(intensity)

# clip the noise to at least 10
noise_width_guess = np.clip(noise_width_guess, 10, np.inf)

return baseline_fit.slope, baseline_fit.intercept, noise_width_guess


Expand Down Expand Up @@ -166,15 +179,15 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
# add guesses to the pmodel as ConstantData
pm.ConstantData("intercept_guess", intercept_guess)
pm.ConstantData("slope_guess", slope_guess)
pm.ConstantData("noise_width_guess", noise_width_guess)
noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)

# priors plus error handling in case of mathematically impermissible values
baseline_intercept = pm.Normal(
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
)
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
noise = pm.LogNormal("noise", pt.log(noise_guess))
# define priors for parameters of a normally distributed posterior
mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
std = pm.HalfNormal("std", np.ptp(time) / 3)
Expand Down Expand Up @@ -325,15 +338,15 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
# add guesses to the pmodel as ConstantData
pm.ConstantData("intercept_guess", intercept_guess)
pm.ConstantData("slope_guess", slope_guess)
pm.ConstantData("noise_width_guess", noise_width_guess)
noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)

# priors
baseline_intercept = pm.Normal(
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
)
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
noise = pm.LogNormal("noise", pt.log(noise_guess))
# NOTE: We expect dobule-peaks to be narrower w.r.t. the time frame, compare to single peaks.
std = pm.HalfNormal("std", sigma=[np.ptp(time) / 6, np.ptp(time) / 6], dims=("subpeak",))
height = pm.HalfNormal(
Expand Down Expand Up @@ -534,15 +547,15 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
# add guesses to the pmodel as ConstantData
pm.ConstantData("intercept_guess", intercept_guess)
pm.ConstantData("slope_guess", slope_guess)
pm.ConstantData("noise_width_guess", noise_width_guess)
noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)

# priors plus error handling in case of mathematically impermissible values
baseline_intercept = pm.Normal(
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
)
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
noise = pm.LogNormal("noise", pt.log(noise_guess))
mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
std = pm.HalfNormal("std", np.ptp(time) / 3)
alpha = pm.Normal("alpha", 0, 3.5)
Expand Down Expand Up @@ -650,15 +663,15 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
# add guesses to the pmodel as ConstantData
pm.ConstantData("intercept_guess", intercept_guess)
pm.ConstantData("slope_guess", slope_guess)
pm.ConstantData("noise_width_guess", noise_width_guess)
noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)

# priors plus error handling in case of mathematically impermissible values
baseline_intercept = pm.Normal(
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
)
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
noise = pm.LogNormal("noise", pt.log(noise_guess))
# use univariate ordered normal distribution for the mean values
# use a zero sum normal distribution to describe the distance of the mean values
# from the mean of the mean values ("meanmean")
Expand Down
2 changes: 1 addition & 1 deletion peak_performance/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def posterior_predictive_sampling(pmodel, idata):
Inference data object updated with the posterior predictive samples.
"""
with pmodel:
idata.extend(pm.sample_posterior_predictive(idata, var_names=["y"]))
idata.extend(pm.sample_posterior_predictive(idata))
return idata


Expand Down
2 changes: 1 addition & 1 deletion peak_performance/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def plot_posterior_predictive(
plot_density(
ax=ax,
x=time,
samples=idata.posterior_predictive.y.stack(sample=("chain", "draw")).T.values,
samples=idata.posterior_predictive["L"].stack(sample=("chain", "draw")).T.values,
percentiles=(2.5, 97.5),
)
# plot the raw data points
Expand Down
16 changes: 14 additions & 2 deletions peak_performance/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,31 @@
}


def test_noise_guessing():
expected = 0.7
intensities = [
*np.random.normal(10, expected, size=200),
*np.random.normal(0, 6, size=600),
*np.random.normal(40, expected, size=200),
]
actual = models.guess_noise(intensities)
assert 0.6 < actual < 0.8
pass


def test_initial_guesses():
# define time and intensity for example with known result
time = 2 + 0.1 * np.arange(17)
intensity = [1, 5, 3] + 11 * [1000] + [7, 9, 11]
# define expected results
expected_noise_width = np.ptp([1, 5, 3, 7, 9, 11])
expected_baseline_fit = st.linregress([2, 2.1, 2.2, 3.4, 3.5, 3.6], [1, 5, 3, 7, 9, 11])
# get the values from the initial guesses function
slope, intercept, noise_width = models.initial_guesses(time, intensity)
# compare the outcome with the expected values
assert expected_baseline_fit.slope == slope
assert expected_baseline_fit.intercept == intercept
assert expected_noise_width == noise_width
# With this example the noise is clipped to at least 10
assert noise_width == 10
pass


Expand Down
Binary file modified test_data/test_plot_posterior_predictive/idata.nc
Binary file not shown.

0 comments on commit c1e5aa1

Please sign in to comment.