diff --git a/.gitignore b/.gitignore index 6136306..6507b73 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ MANIFEST pip-log.txt pip-delete-this-directory.txt -# Unit test / coverage reports +# Unit test / coverage reports / Unit test outputs htmlcov/ .tox/ .nox/ @@ -50,6 +50,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +*.png # Translations *.mo diff --git a/cleedpy/preprocessing.py b/cleedpy/preprocessing.py new file mode 100644 index 0000000..0501df6 --- /dev/null +++ b/cleedpy/preprocessing.py @@ -0,0 +1,106 @@ +import numpy as np +from scipy.interpolate import CubicHermiteSpline + + +def lorentzian_smoothing(curve, vi): + """ + Draws a Lorentzian curve in the same grid as the given curve. + Performs the convolution of the two curves. + The given curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - curve: numpy array of [e,i] arrays + - vi: predefined constant representing the width + + Output: + The result of the convolution: a new array of [e,i] arrays + """ + + energies = curve[:, 0] + intensities = curve[:, 1] + result = [] + + for j in range(energies.size): + c = 0 + l_value = 0 + for i in range(energies.size): + c += vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2) + l_value += ( + intensities[i] * vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2) + ) + result.append(l_value / c) + + return np.array(list(zip(energies, result))) + + +def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi): + """ + Performs all the preprocessing steps and R-factor calculations, required before the Search. + A curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - theoretical_curves: numpy array of arrays of [e,i] arrays + - experimental_curves: numpy array of arrays of [e,i] arrays + where: + - number of theoretical curves = number of experimental curves + - shift: provided by user, used to edit the energy value per step + - r_factor: provided by user, name of the preferred R-factor method + + Design: + 1. Lorentzian smoothing of all curves + 2. For each energy point: + a. Shift theoretical curve + b. Find boundaries of overlap + c. Filter both curves to keep only the elements in the overlap + d. For each experimental curve: + i. Spline = interpolate it to the theoretical grid + ii. Calculate R-factor + e. Calculate average R-factor of all curves in this shift + 3. Find the min of the average values + + Output: + The optimal R-factor value per curve. + """ + + for i in range(len(theoretical_curves)): + theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi) + experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi) + + energy_shift = [shift, 0] + shifted_theoretical_curves = theoretical_curves + energy_shift + print(shifted_theoretical_curves) + + # Find the boundaries of the overlap after the shift + min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[ + 0 + ] + max_bound = np.minimum( + shifted_theoretical_curves[0, -1], experimental_curves[0, -1] + )[0] + print(min_bound, max_bound) + + # Filter both curves based on the boundaries + filtered_theoretical_curves = shifted_theoretical_curves[ + shifted_theoretical_curves[:, 0] >= min_bound + ] + filtered_theoretical_curves = filtered_theoretical_curves[ + filtered_theoretical_curves[:, 0] <= max_bound + ] + + filtered_experimental_curves = experimental_curves[ + experimental_curves[:, 0] >= min_bound + ] + filtered_experimental_curves = filtered_experimental_curves[ + filtered_experimental_curves[:, 0] <= max_bound + ] + print(filtered_theoretical_curves, filtered_experimental_curves) + + # Spline + for i in range(len(filtered_experimental_curves)): + e_values = filtered_experimental_curves[i][:, 0] + i_values = filtered_experimental_curves[i][:, 1] + theoretical_e_grid = filtered_theoretical_curves[i][:, 0] + CubicHermiteSpline(e_values, i_values, theoretical_e_grid) + + print(globals().get(r_factor)) + return diff --git a/cleedpy/rfactor.py b/cleedpy/rfactor.py index 36f9854..e201b1b 100644 --- a/cleedpy/rfactor.py +++ b/cleedpy/rfactor.py @@ -1,6 +1,120 @@ import numpy as np -def mean_square_error(y_true, y_pred): - """Mean Square Error""" - return np.mean(np.square(y_true - y_pred)) +def r2_factor(theoretical_curve, experimental_curve): + """ + Calculates R2-factor of the curve of a specific spot (x,y) of the experiment. + A curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - theoretical curve: numpy array of [e,i] arrays + - experimental curve: numpy array of [e,i] arrays + + Design: + R2 = sqrt{ S(it - c*ie)^2 / S(it - it_avg)^2 } + + where: + c = sqrt( S|it|^2 / S|ie|^ 2) + it_avg = (S it)/ dE + """ + + # [TODO] (not sure) Use the length of the shortest curve + min_length = min(len(theoretical_curve), len(experimental_curve)) + theoretical_curve = theoretical_curve[:min_length] + experimental_curve = experimental_curve[:min_length] + + # Extract the it, ie values for theoretical and experimental intensity + it = theoretical_curve[:, 1] + ie = experimental_curve[:, 1] + + # Calculate normalization factor c and it_avg + c = np.sqrt(np.sum(it**2) / np.sum(ie**2)) + it_avg = np.sum(it) / it.size # dE = number of energy steps + + # Calculate the numerator and denominator of R2 + numerator = np.sum((it - c * ie) ** 2) + denominator = np.sum((it - it_avg) ** 2) + + # Calculate R2 + r2 = np.sqrt(numerator / denominator) + + # [TODO] error handling: may return NaN + return r2 + + +def rp_factor(theoretical_curve, experimental_curve): + """ + Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment. + A curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - theoretical curve: numpy array of [e,i] arrays + - experimental curve: numpy array of [e,i] arrays + + Design: + Rp = S(ye - yt)^2 / S(ye^2 + yt^2) + """ + + # Extract the it, ie values for theoretical and experimental intensity + it = theoretical_curve[:, 1] + ie = experimental_curve[:, 1] + + # Extract the et, ee values for theoretical and experimental energy + et = theoretical_curve[:, 0] + ee = experimental_curve[:, 0] + + # Calculate theoretical and experimental energy steps + step_et = energy_step(et) + step_ee = energy_step(ee) + + # Calculate Y for theoretical and experimental intensity + yt = y_function(it, step_et) + ye = y_function(ie, step_ee) + + # Calculate the numerator and denominator of Rp + numerator = np.sum((yt - ye) ** 2 * step_et) + denominator = np.sum(ye**2 * step_ee) + np.sum(yt**2 * step_et) + + # Calculate Rp + rp = numerator / denominator + + # [TODO] error handling: may return NaN + return rp + + +def y_function(intensities, energy_step): + """ + Calculates y for a given curve. + + Inputs: + - I: array of intensity values of the curve + - E: array of energy values of the curve + + Design: + Y = L / (1 + L^2 * vi^2) + + where: + L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1])) + """ + + # [TODO] constants should be defined elsewhere + vi = 4 + + y_values = (intensities[1:] - intensities[:-1]) / ( + energy_step * 0.5 * (intensities[1:] + intensities[:-1]) + ) + return y_values / (1 + y_values**2 * vi**2) + + +def energy_step(energies): + """ + Calculates the pairwise energy step. Returns an array. + + Inputs: + - E: array of energy values of the curve + + Design: + step = E[i] - E[i-1] + """ + + return energies[1:] - energies[:-1] diff --git a/pyproject.toml b/pyproject.toml index d04a84e..52c964a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ classifiers = [ requires-python = ">=3.10" dependencies = [ "click", + "matplotlib", "numpy", "pyyaml", "scipy", diff --git a/tests/curves_helper.py b/tests/curves_helper.py new file mode 100644 index 0000000..f957dff --- /dev/null +++ b/tests/curves_helper.py @@ -0,0 +1,78 @@ +def curve_a(): + return [ + [70, 0.0037557780], + [74, 0.0013476560], + [78, 0.0010809600], + [82, 0.0009844219], + [86, 0.0009538341], + [90, 0.0011735780], + [94, 0.0020866750], + [98, 0.0040076710], + [102, 0.0049233240], + [106, 0.0051959950], + [110, 0.0044225620], + [114, 0.0020602790], + [118, 0.0005740963], + [122, 0.0005428890], + [126, 0.0002391057], + ] + + +def curve_b(): + return [ + [70, 0.0101207100], + [74, 0.0100794400], + [78, 0.0056215730], + [82, 0.0028791560], + [86, 0.0013405740], + [90, 0.0005220333], + [94, 0.0004865836], + [98, 0.0002609056], + [102, 0.0000723265], + [106, 0.0002626837], + [110, 0.0016541830], + [114, 0.0041199060], + [118, 0.0062692520], + [122, 0.0079176060], + [126, 0.0138149000], + ] + + +def curve_c(): + return [ + [70, 0.0101222800], + [74, 0.0100787500], + [78, 0.0056227330], + [82, 0.0028790560], + [86, 0.0013404120], + [90, 0.0005221121], + [94, 0.0004865952], + [98, 0.0002609447], + [102, 0.0000722322], + [106, 0.0002627033], + [110, 0.0016542040], + [114, 0.0041198820], + [118, 0.0062694210], + [122, 0.0079189570], + [126, 0.0138144800], + ] + + +def curve_a_smoothed(): + return [ + [70, 0.00258495], + [74, 0.00182937], + [78, 0.00145542], + [82, 0.00135855], + [86, 0.00144012], + [90, 0.00176726], + [94, 0.00244699], + [98, 0.00335730], + [102, 0.00395957], + [106, 0.00404653], + [110, 0.00348618], + [114, 0.00238778], + [118, 0.00145559], + [122, 0.00101030], + [126, 0.00079836], + ] diff --git a/tests/test_preprocessing.py b/tests/test_preprocessing.py new file mode 100644 index 0000000..9189515 --- /dev/null +++ b/tests/test_preprocessing.py @@ -0,0 +1,35 @@ +import matplotlib.pyplot as plt +import numpy as np +import pytest + +from cleedpy.preprocessing import lorentzian_smoothing +from tests.curves_helper import curve_a, curve_a_smoothed + + +@pytest.mark.parametrize( + "curve, vi, expected", + [ + (np.array(curve_a()), 4, np.array(curve_a_smoothed())), + ], +) +def test_lorentzian_smoothing(curve, vi, expected): + l_curve = lorentzian_smoothing(curve, vi) + + x_values = curve[:, 0] + y_values = curve[:, 1] + plt.plot(x_values, y_values, marker="o", color="g", label="Input: Initial curve") + + x_values = l_curve[:, 0] + y_values = l_curve[:, 1] + plt.plot(x_values, y_values, marker="x", color="r", label="Output: Smoothed curve") + + y_min = 0 + y_max = 0.0110 + y_step = 0.0005 + plt.ylim(y_min, y_max) + plt.yticks(np.arange(y_min, y_max + y_step, y_step)) + plt.grid() + plt.legend() + plt.savefig("test_lorentzian_smoothing_output.png") + + assert np.allclose(expected, l_curve) diff --git a/tests/test_rfactor.py b/tests/test_rfactor.py index 8f5cff3..5b2d220 100644 --- a/tests/test_rfactor.py +++ b/tests/test_rfactor.py @@ -3,18 +3,31 @@ import numpy as np import pytest -from cleedpy import rfactor +from cleedpy.rfactor import r2_factor, rp_factor +from tests.curves_helper import curve_a, curve_b, curve_c @pytest.mark.parametrize( - "y_true, y_pred, expected", + "the_curve, exp_curve, expected_r", [ - ([1, 2, 3], [1, 2, 3], 0), - ([1, 2, 3], [1.1, 2.2, 3.3], 0.04666666666666666), - ([1.0, 2.0, 3.0], [1.0, 2.0, 5.0], 1.3333333333333333), + (curve_a(), curve_b(), 1.8688830931), + (curve_b(), curve_c(), 0.0001429368), ], ) -def test_mean_squer_error(y_true, y_pred, expected): +def test_r2_factor(the_curve, exp_curve, expected_r): assert math.isclose( - expected, rfactor.mean_square_error(np.array(y_true), np.array(y_pred)) + expected_r, r2_factor(np.array(the_curve), np.array(exp_curve)), abs_tol=5 + ) + + +@pytest.mark.parametrize( + "the_curve, exp_curve, expected_r", + [ + (curve_a(), curve_b(), 1.4898282448), + (curve_b(), curve_c(), 0), + ], +) +def test_rp_factor(the_curve, exp_curve, expected_r): + assert math.isclose( + expected_r, rp_factor(np.array(the_curve), np.array(exp_curve)), abs_tol=5 )