Merge pull request #12 from JuBiotech/fix-noise-issues

Investigate and fix noise issues
JuBiotech · May 11, 2024 · c1e5aa1 · c1e5aa1
2 parents 1f47e9e + 1180993
commit c1e5aa1
Show file tree

Hide file tree

Showing 7 changed files with 3,607 additions and 1,751 deletions.
diff --git a/notebooks/Investigation_doublepeak_separation.ipynb b/notebooks/Investigation_doublepeak_separation.ipynb
diff --git a/notebooks/Investigation_noise_sigma.ipynb b/notebooks/Investigation_noise_sigma.ipynb
diff --git a/peak_performance/models.py b/peak_performance/models.py
@@ -36,6 +36,15 @@ class ModelType(str, Enum):
     DoubleSkewNormal = "double_skew_normal"
 
 
+def guess_noise(intensity):
+    n = len(intensity)
+    ifrom = int(np.ceil(0.15 * n))
+    ito = int(np.floor(0.85 * n))
+    start_ints = intensity[:ifrom]
+    end_ints = intensity[ito:]
+    return np.std([*(start_ints - np.mean(start_ints)), *(end_ints - np.mean(end_ints))])
+
+
 def initial_guesses(time: np.ndarray, intensity: np.ndarray):
     """
     Provide initial guesses for priors.
@@ -79,12 +88,16 @@ def initial_guesses(time: np.ndarray, intensity: np.ndarray):
     # use the indeces in noise_index to get the time and intensity of all noise data points
     noise_time = [time[n] for n in noise_index]
     noise_intensity = [intensity[n] for n in noise_index]
-    # calculate the width of the noise
-    noise_width_guess = max(noise_intensity) - min(noise_intensity)
 
     # use scipy to fit a linear regression through the noise as a prior for the eventual baseline
     baseline_fit = st.linregress(noise_time, noise_intensity)
 
+    # calculate the width of the noise
+    noise_width_guess = guess_noise(intensity)
+
+    # clip the noise to at least 10
+    noise_width_guess = np.clip(noise_width_guess, 10, np.inf)
+
     return baseline_fit.slope, baseline_fit.intercept, noise_width_guess
 
 
@@ -166,15 +179,15 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
         # add guesses to the pmodel as ConstantData
         pm.ConstantData("intercept_guess", intercept_guess)
         pm.ConstantData("slope_guess", slope_guess)
-        pm.ConstantData("noise_width_guess", noise_width_guess)
+        noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
 
         # priors plus error handling in case of mathematically impermissible values
         baseline_intercept = pm.Normal(
             "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
         )
         baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
         baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
-        noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
+        noise = pm.LogNormal("noise", pt.log(noise_guess))
         # define priors for parameters of a normally distributed posterior
         mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
         std = pm.HalfNormal("std", np.ptp(time) / 3)
@@ -325,15 +338,15 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
         # add guesses to the pmodel as ConstantData
         pm.ConstantData("intercept_guess", intercept_guess)
         pm.ConstantData("slope_guess", slope_guess)
-        pm.ConstantData("noise_width_guess", noise_width_guess)
+        noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
 
         # priors
         baseline_intercept = pm.Normal(
             "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
         )
         baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
         baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
-        noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
+        noise = pm.LogNormal("noise", pt.log(noise_guess))
         # NOTE: We expect dobule-peaks to be narrower w.r.t. the time frame, compare to single peaks.
         std = pm.HalfNormal("std", sigma=[np.ptp(time) / 6, np.ptp(time) / 6], dims=("subpeak",))
         height = pm.HalfNormal(
@@ -534,15 +547,15 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
         # add guesses to the pmodel as ConstantData
         pm.ConstantData("intercept_guess", intercept_guess)
         pm.ConstantData("slope_guess", slope_guess)
-        pm.ConstantData("noise_width_guess", noise_width_guess)
+        noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
 
         # priors plus error handling in case of mathematically impermissible values
         baseline_intercept = pm.Normal(
             "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
         )
         baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
         baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
-        noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
+        noise = pm.LogNormal("noise", pt.log(noise_guess))
         mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
         std = pm.HalfNormal("std", np.ptp(time) / 3)
         alpha = pm.Normal("alpha", 0, 3.5)
@@ -650,15 +663,15 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
         # add guesses to the pmodel as ConstantData
         pm.ConstantData("intercept_guess", intercept_guess)
         pm.ConstantData("slope_guess", slope_guess)
-        pm.ConstantData("noise_width_guess", noise_width_guess)
+        noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
 
         # priors plus error handling in case of mathematically impermissible values
         baseline_intercept = pm.Normal(
             "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
         )
         baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
         baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
-        noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
+        noise = pm.LogNormal("noise", pt.log(noise_guess))
         # use univariate ordered normal distribution for the mean values
         # use a zero sum normal distribution to describe the distance of the mean values
         # from the mean of the mean values ("meanmean")

diff --git a/peak_performance/pipeline.py b/peak_performance/pipeline.py
@@ -648,7 +648,7 @@ def posterior_predictive_sampling(pmodel, idata):
         Inference data object updated with the posterior predictive samples.
     """
     with pmodel:
-        idata.extend(pm.sample_posterior_predictive(idata, var_names=["y"]))
+        idata.extend(pm.sample_posterior_predictive(idata))
     return idata
 
 

diff --git a/peak_performance/plots.py b/peak_performance/plots.py
@@ -168,7 +168,7 @@ def plot_posterior_predictive(
     plot_density(
         ax=ax,
         x=time,
-        samples=idata.posterior_predictive.y.stack(sample=("chain", "draw")).T.values,
+        samples=idata.posterior_predictive["L"].stack(sample=("chain", "draw")).T.values,
         percentiles=(2.5, 97.5),
     )
     # plot the raw data points

diff --git a/peak_performance/test_models.py b/peak_performance/test_models.py
@@ -30,19 +30,31 @@
 }
 
 
+def test_noise_guessing():
+    expected = 0.7
+    intensities = [
+        *np.random.normal(10, expected, size=200),
+        *np.random.normal(0, 6, size=600),
+        *np.random.normal(40, expected, size=200),
+    ]
+    actual = models.guess_noise(intensities)
+    assert 0.6 < actual < 0.8
+    pass
+
+
 def test_initial_guesses():
     # define time and intensity for example with known result
     time = 2 + 0.1 * np.arange(17)
     intensity = [1, 5, 3] + 11 * [1000] + [7, 9, 11]
     # define expected results
-    expected_noise_width = np.ptp([1, 5, 3, 7, 9, 11])
     expected_baseline_fit = st.linregress([2, 2.1, 2.2, 3.4, 3.5, 3.6], [1, 5, 3, 7, 9, 11])
     # get the values from the initial guesses function
     slope, intercept, noise_width = models.initial_guesses(time, intensity)
     # compare the outcome with the expected values
     assert expected_baseline_fit.slope == slope
     assert expected_baseline_fit.intercept == intercept
-    assert expected_noise_width == noise_width
+    # With this example the noise is clipped to at least 10
+    assert noise_width == 10
     pass
 
 

diff --git a/test_data/test_plot_posterior_predictive/idata.nc b/test_data/test_plot_posterior_predictive/idata.nc