Implement methods to compute Rfactor directly in Python (#45)

empa-scientific-it · Oct 23, 2024 · 7d65406 · 7d65406
1 parent f288bc0
commit 7d65406
Show file tree

Hide file tree

Showing 7 changed files with 359 additions and 11 deletions.
diff --git a/.gitignore b/.gitignore
@@ -36,7 +36,7 @@ MANIFEST
 pip-log.txt
 pip-delete-this-directory.txt
 
-# Unit test / coverage reports
+# Unit test / coverage reports / Unit test outputs
 htmlcov/
 .tox/
 .nox/
@@ -50,6 +50,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
+*.png
 
 # Translations
 *.mo

diff --git a/cleedpy/preprocessing.py b/cleedpy/preprocessing.py
@@ -0,0 +1,106 @@
+import numpy as np
+from scipy.interpolate import CubicHermiteSpline
+
+
+def lorentzian_smoothing(curve, vi):
+    """
+    Draws a Lorentzian curve in the same grid as the given curve.
+    Performs the convolution of the two curves.
+    The given curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+    Inputs:
+        - curve: numpy array of [e,i] arrays
+        - vi: predefined constant representing the width
+
+    Output:
+        The result of the convolution: a new array of [e,i] arrays
+    """
+
+    energies = curve[:, 0]
+    intensities = curve[:, 1]
+    result = []
+
+    for j in range(energies.size):
+        c = 0
+        l_value = 0
+        for i in range(energies.size):
+            c += vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2)
+            l_value += (
+                intensities[i] * vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2)
+            )
+        result.append(l_value / c)
+
+    return np.array(list(zip(energies, result)))
+
+
+def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi):
+    """
+    Performs all the preprocessing steps and R-factor calculations, required before the Search.
+    A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+    Inputs:
+        - theoretical_curves: numpy array of arrays of [e,i] arrays
+        - experimental_curves: numpy array of arrays of [e,i] arrays
+            where:
+                - number of theoretical curves = number of experimental curves
+        - shift: provided by user, used to edit the energy value per step
+        - r_factor: provided by user, name of the preferred R-factor method
+
+    Design:
+        1. Lorentzian smoothing of all curves
+        2. For each energy point:
+            a. Shift theoretical curve
+            b. Find boundaries of overlap
+            c. Filter both curves to keep only the elements in the overlap
+            d. For each experimental curve:
+                i. Spline = interpolate it to the theoretical grid
+                ii. Calculate R-factor
+            e. Calculate average R-factor of all curves in this shift
+        3. Find the min of the average values
+
+    Output:
+        The optimal R-factor value per curve.
+    """
+
+    for i in range(len(theoretical_curves)):
+        theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi)
+        experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi)
+
+    energy_shift = [shift, 0]
+    shifted_theoretical_curves = theoretical_curves + energy_shift
+    print(shifted_theoretical_curves)
+
+    # Find the boundaries of the overlap after the shift
+    min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[
+        0
+    ]
+    max_bound = np.minimum(
+        shifted_theoretical_curves[0, -1], experimental_curves[0, -1]
+    )[0]
+    print(min_bound, max_bound)
+
+    # Filter both curves based on the boundaries
+    filtered_theoretical_curves = shifted_theoretical_curves[
+        shifted_theoretical_curves[:, 0] >= min_bound
+    ]
+    filtered_theoretical_curves = filtered_theoretical_curves[
+        filtered_theoretical_curves[:, 0] <= max_bound
+    ]
+
+    filtered_experimental_curves = experimental_curves[
+        experimental_curves[:, 0] >= min_bound
+    ]
+    filtered_experimental_curves = filtered_experimental_curves[
+        filtered_experimental_curves[:, 0] <= max_bound
+    ]
+    print(filtered_theoretical_curves, filtered_experimental_curves)
+
+    # Spline
+    for i in range(len(filtered_experimental_curves)):
+        e_values = filtered_experimental_curves[i][:, 0]
+        i_values = filtered_experimental_curves[i][:, 1]
+        theoretical_e_grid = filtered_theoretical_curves[i][:, 0]
+        CubicHermiteSpline(e_values, i_values, theoretical_e_grid)
+
+    print(globals().get(r_factor))
+    return
diff --git a/cleedpy/rfactor.py b/cleedpy/rfactor.py
@@ -1,6 +1,120 @@
 import numpy as np
 
 
-def mean_square_error(y_true, y_pred):
-    """Mean Square Error"""
-    return np.mean(np.square(y_true - y_pred))
+def r2_factor(theoretical_curve, experimental_curve):
+    """
+    Calculates R2-factor of the curve of a specific spot (x,y) of the experiment.
+    A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+    Inputs:
+        - theoretical curve: numpy array of [e,i] arrays
+        - experimental curve: numpy array of [e,i] arrays
+
+    Design:
+        R2 = sqrt{ S(it - c*ie)^2 / S(it - it_avg)^2 }
+
+        where:
+            c = sqrt( S|it|^2 / S|ie|^ 2)
+            it_avg = (S it)/ dE
+    """
+
+    # [TODO] (not sure) Use the length of the shortest curve
+    min_length = min(len(theoretical_curve), len(experimental_curve))
+    theoretical_curve = theoretical_curve[:min_length]
+    experimental_curve = experimental_curve[:min_length]
+
+    # Extract the it, ie values for theoretical and experimental intensity
+    it = theoretical_curve[:, 1]
+    ie = experimental_curve[:, 1]
+
+    # Calculate normalization factor c and it_avg
+    c = np.sqrt(np.sum(it**2) / np.sum(ie**2))
+    it_avg = np.sum(it) / it.size  # dE = number of energy steps
+
+    # Calculate the numerator and denominator of R2
+    numerator = np.sum((it - c * ie) ** 2)
+    denominator = np.sum((it - it_avg) ** 2)
+
+    # Calculate R2
+    r2 = np.sqrt(numerator / denominator)
+
+    # [TODO] error handling: may return NaN
+    return r2
+
+
+def rp_factor(theoretical_curve, experimental_curve):
+    """
+    Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment.
+    A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
+
+    Inputs:
+        - theoretical curve: numpy array of [e,i] arrays
+        - experimental curve: numpy array of [e,i] arrays
+
+    Design:
+        Rp = S(ye - yt)^2 / S(ye^2 + yt^2)
+    """
+
+    # Extract the it, ie values for theoretical and experimental intensity
+    it = theoretical_curve[:, 1]
+    ie = experimental_curve[:, 1]
+
+    # Extract the et, ee values for theoretical and experimental energy
+    et = theoretical_curve[:, 0]
+    ee = experimental_curve[:, 0]
+
+    # Calculate theoretical and experimental energy steps
+    step_et = energy_step(et)
+    step_ee = energy_step(ee)
+
+    # Calculate Y for theoretical and experimental intensity
+    yt = y_function(it, step_et)
+    ye = y_function(ie, step_ee)
+
+    # Calculate the numerator and denominator of Rp
+    numerator = np.sum((yt - ye) ** 2 * step_et)
+    denominator = np.sum(ye**2 * step_ee) + np.sum(yt**2 * step_et)
+
+    # Calculate Rp
+    rp = numerator / denominator
+
+    # [TODO] error handling: may return NaN
+    return rp
+
+
+def y_function(intensities, energy_step):
+    """
+    Calculates y for a given curve.
+
+    Inputs:
+        - I: array of intensity values of the curve
+        - E: array of energy values of the curve
+
+    Design:
+        Y = L / (1 + L^2 * vi^2)
+
+        where:
+            L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1]))
+    """
+
+    # [TODO] constants should be defined elsewhere
+    vi = 4
+
+    y_values = (intensities[1:] - intensities[:-1]) / (
+        energy_step * 0.5 * (intensities[1:] + intensities[:-1])
+    )
+    return y_values / (1 + y_values**2 * vi**2)
+
+
+def energy_step(energies):
+    """
+    Calculates the pairwise energy step. Returns an array.
+
+    Inputs:
+        - E: array of energy values of the curve
+
+    Design:
+        step = E[i] - E[i-1]
+    """
+
+    return energies[1:] - energies[:-1]
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,7 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
     "click",
+    "matplotlib",
     "numpy",
     "pyyaml",
     "scipy",

diff --git a/tests/curves_helper.py b/tests/curves_helper.py
@@ -0,0 +1,78 @@
+def curve_a():
+    return [
+        [70, 0.0037557780],
+        [74, 0.0013476560],
+        [78, 0.0010809600],
+        [82, 0.0009844219],
+        [86, 0.0009538341],
+        [90, 0.0011735780],
+        [94, 0.0020866750],
+        [98, 0.0040076710],
+        [102, 0.0049233240],
+        [106, 0.0051959950],
+        [110, 0.0044225620],
+        [114, 0.0020602790],
+        [118, 0.0005740963],
+        [122, 0.0005428890],
+        [126, 0.0002391057],
+    ]
+
+
+def curve_b():
+    return [
+        [70, 0.0101207100],
+        [74, 0.0100794400],
+        [78, 0.0056215730],
+        [82, 0.0028791560],
+        [86, 0.0013405740],
+        [90, 0.0005220333],
+        [94, 0.0004865836],
+        [98, 0.0002609056],
+        [102, 0.0000723265],
+        [106, 0.0002626837],
+        [110, 0.0016541830],
+        [114, 0.0041199060],
+        [118, 0.0062692520],
+        [122, 0.0079176060],
+        [126, 0.0138149000],
+    ]
+
+
+def curve_c():
+    return [
+        [70, 0.0101222800],
+        [74, 0.0100787500],
+        [78, 0.0056227330],
+        [82, 0.0028790560],
+        [86, 0.0013404120],
+        [90, 0.0005221121],
+        [94, 0.0004865952],
+        [98, 0.0002609447],
+        [102, 0.0000722322],
+        [106, 0.0002627033],
+        [110, 0.0016542040],
+        [114, 0.0041198820],
+        [118, 0.0062694210],
+        [122, 0.0079189570],
+        [126, 0.0138144800],
+    ]
+
+
+def curve_a_smoothed():
+    return [
+        [70, 0.00258495],
+        [74, 0.00182937],
+        [78, 0.00145542],
+        [82, 0.00135855],
+        [86, 0.00144012],
+        [90, 0.00176726],
+        [94, 0.00244699],
+        [98, 0.00335730],
+        [102, 0.00395957],
+        [106, 0.00404653],
+        [110, 0.00348618],
+        [114, 0.00238778],
+        [118, 0.00145559],
+        [122, 0.00101030],
+        [126, 0.00079836],
+    ]
diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py
@@ -0,0 +1,35 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pytest
+
+from cleedpy.preprocessing import lorentzian_smoothing
+from tests.curves_helper import curve_a, curve_a_smoothed
+
+
+@pytest.mark.parametrize(
+    "curve, vi, expected",
+    [
+        (np.array(curve_a()), 4, np.array(curve_a_smoothed())),
+    ],
+)
+def test_lorentzian_smoothing(curve, vi, expected):
+    l_curve = lorentzian_smoothing(curve, vi)
+
+    x_values = curve[:, 0]
+    y_values = curve[:, 1]
+    plt.plot(x_values, y_values, marker="o", color="g", label="Input: Initial curve")
+
+    x_values = l_curve[:, 0]
+    y_values = l_curve[:, 1]
+    plt.plot(x_values, y_values, marker="x", color="r", label="Output: Smoothed curve")
+
+    y_min = 0
+    y_max = 0.0110
+    y_step = 0.0005
+    plt.ylim(y_min, y_max)
+    plt.yticks(np.arange(y_min, y_max + y_step, y_step))
+    plt.grid()
+    plt.legend()
+    plt.savefig("test_lorentzian_smoothing_output.png")
+
+    assert np.allclose(expected, l_curve)