diff --git a/cleedpy/preprocessing.py b/cleedpy/preprocessing.py index b80660b..0501df6 100644 --- a/cleedpy/preprocessing.py +++ b/cleedpy/preprocessing.py @@ -1,106 +1,106 @@ -import numpy as np -from scipy.interpolate import CubicHermiteSpline - - -def lorentzian_smoothing(curve, vi): - """ - Draws a Lorentzian curve in the same grid as the given curve. - Performs the convolution of the two curves. - The given curve is represented by an array of (e,i) points, where e = energy and i = intensity. - - Inputs: - - curve: numpy array of [e,i] arrays - - vi: predefined constant representing the width - - Output: - The result of the convolution: a new array of [e,i] arrays - """ - - energies = curve[:, 0] - intensities = curve[:, 1] - result = [] - - for j in range(energies.size): - c = 0 - l_value = 0 - for i in range(energies.size): - c += vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2) - l_value += ( - intensities[i] * vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2) - ) - result.append(l_value / c) - - return np.array(list(zip(energies, result))) - - -def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi): - """ - Performs all the preprocessing steps and R-factor calculations, required before the Search. - A curve is represented by an array of (e,i) points, where e = energy and i = intensity. - - Inputs: - - theoretical_curves: numpy array of arrays of [e,i] arrays - - experimental_curves: numpy array of arrays of [e,i] arrays - where: - - number of theoretical curves = number of experimental curves - - shift: provided by user, used to edit the energy value per step - - r_factor: provided by user, name of the preferred R-factor method - - Design: - 1. Lorentzian smoothing of all curves - 2. For each energy point: - a. Shift theoretical curve - b. Find boundaries of overlap - c. Filter both curves to keep only the elements in the overlap - d. For each experimental curve: - i. Spline = interpolate it to the theoretical grid - ii. Calculate R-factor - e. Calculate average R-factor of all curves in this shift - 3. Find the min of the average values - - Output: - The optimal R-factor value per curve. - """ - - for i in range(len(theoretical_curves)): - theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi) - experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi) - - energy_shift = [shift, 0] - shifted_theoretical_curves = theoretical_curves + energy_shift - print(shifted_theoretical_curves) - - # Find the boundaries of the overlap after the shift - min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[ - 0 - ] - max_bound = np.minimum( - shifted_theoretical_curves[0, -1], experimental_curves[0, -1] - )[0] - print(min_bound, max_bound) - - # Filter both curves based on the boundaries - filtered_theoretical_curves = shifted_theoretical_curves[ - shifted_theoretical_curves[:, 0] >= min_bound - ] - filtered_theoretical_curves = filtered_theoretical_curves[ - filtered_theoretical_curves[:, 0] <= max_bound - ] - - filtered_experimental_curves = experimental_curves[ - experimental_curves[:, 0] >= min_bound - ] - filtered_experimental_curves = filtered_experimental_curves[ - filtered_experimental_curves[:, 0] <= max_bound - ] - print(filtered_theoretical_curves, filtered_experimental_curves) - - # Spline - for i in range(len(filtered_experimental_curves)): - e_values = filtered_experimental_curves[i][:, 0] - i_values = filtered_experimental_curves[i][:, 1] - theoretical_e_grid = filtered_theoretical_curves[i][:, 0] - CubicHermiteSpline(e_values, i_values, theoretical_e_grid) - - print(globals().get(r_factor)) - return +import numpy as np +from scipy.interpolate import CubicHermiteSpline + + +def lorentzian_smoothing(curve, vi): + """ + Draws a Lorentzian curve in the same grid as the given curve. + Performs the convolution of the two curves. + The given curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - curve: numpy array of [e,i] arrays + - vi: predefined constant representing the width + + Output: + The result of the convolution: a new array of [e,i] arrays + """ + + energies = curve[:, 0] + intensities = curve[:, 1] + result = [] + + for j in range(energies.size): + c = 0 + l_value = 0 + for i in range(energies.size): + c += vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2) + l_value += ( + intensities[i] * vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2) + ) + result.append(l_value / c) + + return np.array(list(zip(energies, result))) + + +def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi): + """ + Performs all the preprocessing steps and R-factor calculations, required before the Search. + A curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - theoretical_curves: numpy array of arrays of [e,i] arrays + - experimental_curves: numpy array of arrays of [e,i] arrays + where: + - number of theoretical curves = number of experimental curves + - shift: provided by user, used to edit the energy value per step + - r_factor: provided by user, name of the preferred R-factor method + + Design: + 1. Lorentzian smoothing of all curves + 2. For each energy point: + a. Shift theoretical curve + b. Find boundaries of overlap + c. Filter both curves to keep only the elements in the overlap + d. For each experimental curve: + i. Spline = interpolate it to the theoretical grid + ii. Calculate R-factor + e. Calculate average R-factor of all curves in this shift + 3. Find the min of the average values + + Output: + The optimal R-factor value per curve. + """ + + for i in range(len(theoretical_curves)): + theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi) + experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi) + + energy_shift = [shift, 0] + shifted_theoretical_curves = theoretical_curves + energy_shift + print(shifted_theoretical_curves) + + # Find the boundaries of the overlap after the shift + min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[ + 0 + ] + max_bound = np.minimum( + shifted_theoretical_curves[0, -1], experimental_curves[0, -1] + )[0] + print(min_bound, max_bound) + + # Filter both curves based on the boundaries + filtered_theoretical_curves = shifted_theoretical_curves[ + shifted_theoretical_curves[:, 0] >= min_bound + ] + filtered_theoretical_curves = filtered_theoretical_curves[ + filtered_theoretical_curves[:, 0] <= max_bound + ] + + filtered_experimental_curves = experimental_curves[ + experimental_curves[:, 0] >= min_bound + ] + filtered_experimental_curves = filtered_experimental_curves[ + filtered_experimental_curves[:, 0] <= max_bound + ] + print(filtered_theoretical_curves, filtered_experimental_curves) + + # Spline + for i in range(len(filtered_experimental_curves)): + e_values = filtered_experimental_curves[i][:, 0] + i_values = filtered_experimental_curves[i][:, 1] + theoretical_e_grid = filtered_theoretical_curves[i][:, 0] + CubicHermiteSpline(e_values, i_values, theoretical_e_grid) + + print(globals().get(r_factor)) + return diff --git a/cleedpy/rfactor.py b/cleedpy/rfactor.py index 8517602..e201b1b 100644 --- a/cleedpy/rfactor.py +++ b/cleedpy/rfactor.py @@ -1,120 +1,120 @@ -import numpy as np - - -def r2_factor(theoretical_curve, experimental_curve): - """ - Calculates R2-factor of the curve of a specific spot (x,y) of the experiment. - A curve is represented by an array of (e,i) points, where e = energy and i = intensity. - - Inputs: - - theoretical curve: numpy array of [e,i] arrays - - experimental curve: numpy array of [e,i] arrays - - Design: - R2 = sqrt{ S(it - c*ie)^2 / S(it - it_avg)^2 } - - where: - c = sqrt( S|it|^2 / S|ie|^ 2) - it_avg = (S it)/ dE - """ - - # [TODO] (not sure) Use the length of the shortest curve - min_length = min(len(theoretical_curve), len(experimental_curve)) - theoretical_curve = theoretical_curve[:min_length] - experimental_curve = experimental_curve[:min_length] - - # Extract the it, ie values for theoretical and experimental intensity - it = theoretical_curve[:, 1] - ie = experimental_curve[:, 1] - - # Calculate normalization factor c and it_avg - c = np.sqrt(np.sum(it**2) / np.sum(ie**2)) - it_avg = np.sum(it) / it.size # dE = number of energy steps - - # Calculate the numerator and denominator of R2 - numerator = np.sum((it - c * ie) ** 2) - denominator = np.sum((it - it_avg) ** 2) - - # Calculate R2 - r2 = np.sqrt(numerator / denominator) - - # [TODO] error handling: may return NaN - return r2 - - -def rp_factor(theoretical_curve, experimental_curve): - """ - Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment. - A curve is represented by an array of (e,i) points, where e = energy and i = intensity. - - Inputs: - - theoretical curve: numpy array of [e,i] arrays - - experimental curve: numpy array of [e,i] arrays - - Design: - Rp = S(ye - yt)^2 / S(ye^2 + yt^2) - """ - - # Extract the it, ie values for theoretical and experimental intensity - it = theoretical_curve[:, 1] - ie = experimental_curve[:, 1] - - # Extract the et, ee values for theoretical and experimental energy - et = theoretical_curve[:, 0] - ee = experimental_curve[:, 0] - - # Calculate theoretical and experimental energy steps - step_et = energy_step(et) - step_ee = energy_step(ee) - - # Calculate Y for theoretical and experimental intensity - yt = y_function(it, step_et) - ye = y_function(ie, step_ee) - - # Calculate the numerator and denominator of Rp - numerator = np.sum((yt - ye) ** 2 * step_et) - denominator = np.sum(ye**2 * step_ee) + np.sum(yt**2 * step_et) - - # Calculate Rp - rp = numerator / denominator - - # [TODO] error handling: may return NaN - return rp - - -def y_function(intensities, energy_step): - """ - Calculates y for a given curve. - - Inputs: - - I: array of intensity values of the curve - - E: array of energy values of the curve - - Design: - Y = L / (1 + L^2 * vi^2) - - where: - L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1])) - """ - - # [TODO] constants should be defined elsewhere - vi = 4 - - y_values = (intensities[1:] - intensities[:-1]) / ( - energy_step * 0.5 * (intensities[1:] + intensities[:-1]) - ) - return y_values / (1 + y_values**2 * vi**2) - - -def energy_step(energies): - """ - Calculates the pairwise energy step. Returns an array. - - Inputs: - - E: array of energy values of the curve - - Design: - step = E[i] - E[i-1] - """ - - return energies[1:] - energies[:-1] +import numpy as np + + +def r2_factor(theoretical_curve, experimental_curve): + """ + Calculates R2-factor of the curve of a specific spot (x,y) of the experiment. + A curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - theoretical curve: numpy array of [e,i] arrays + - experimental curve: numpy array of [e,i] arrays + + Design: + R2 = sqrt{ S(it - c*ie)^2 / S(it - it_avg)^2 } + + where: + c = sqrt( S|it|^2 / S|ie|^ 2) + it_avg = (S it)/ dE + """ + + # [TODO] (not sure) Use the length of the shortest curve + min_length = min(len(theoretical_curve), len(experimental_curve)) + theoretical_curve = theoretical_curve[:min_length] + experimental_curve = experimental_curve[:min_length] + + # Extract the it, ie values for theoretical and experimental intensity + it = theoretical_curve[:, 1] + ie = experimental_curve[:, 1] + + # Calculate normalization factor c and it_avg + c = np.sqrt(np.sum(it**2) / np.sum(ie**2)) + it_avg = np.sum(it) / it.size # dE = number of energy steps + + # Calculate the numerator and denominator of R2 + numerator = np.sum((it - c * ie) ** 2) + denominator = np.sum((it - it_avg) ** 2) + + # Calculate R2 + r2 = np.sqrt(numerator / denominator) + + # [TODO] error handling: may return NaN + return r2 + + +def rp_factor(theoretical_curve, experimental_curve): + """ + Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment. + A curve is represented by an array of (e,i) points, where e = energy and i = intensity. + + Inputs: + - theoretical curve: numpy array of [e,i] arrays + - experimental curve: numpy array of [e,i] arrays + + Design: + Rp = S(ye - yt)^2 / S(ye^2 + yt^2) + """ + + # Extract the it, ie values for theoretical and experimental intensity + it = theoretical_curve[:, 1] + ie = experimental_curve[:, 1] + + # Extract the et, ee values for theoretical and experimental energy + et = theoretical_curve[:, 0] + ee = experimental_curve[:, 0] + + # Calculate theoretical and experimental energy steps + step_et = energy_step(et) + step_ee = energy_step(ee) + + # Calculate Y for theoretical and experimental intensity + yt = y_function(it, step_et) + ye = y_function(ie, step_ee) + + # Calculate the numerator and denominator of Rp + numerator = np.sum((yt - ye) ** 2 * step_et) + denominator = np.sum(ye**2 * step_ee) + np.sum(yt**2 * step_et) + + # Calculate Rp + rp = numerator / denominator + + # [TODO] error handling: may return NaN + return rp + + +def y_function(intensities, energy_step): + """ + Calculates y for a given curve. + + Inputs: + - I: array of intensity values of the curve + - E: array of energy values of the curve + + Design: + Y = L / (1 + L^2 * vi^2) + + where: + L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1])) + """ + + # [TODO] constants should be defined elsewhere + vi = 4 + + y_values = (intensities[1:] - intensities[:-1]) / ( + energy_step * 0.5 * (intensities[1:] + intensities[:-1]) + ) + return y_values / (1 + y_values**2 * vi**2) + + +def energy_step(energies): + """ + Calculates the pairwise energy step. Returns an array. + + Inputs: + - E: array of energy values of the curve + + Design: + step = E[i] - E[i-1] + """ + + return energies[1:] - energies[:-1] diff --git a/tests/curves_helper.py b/tests/curves_helper.py index 6ccf2ca..f957dff 100644 --- a/tests/curves_helper.py +++ b/tests/curves_helper.py @@ -1,78 +1,78 @@ -def curve_a(): - return [ - [70, 0.0037557780], - [74, 0.0013476560], - [78, 0.0010809600], - [82, 0.0009844219], - [86, 0.0009538341], - [90, 0.0011735780], - [94, 0.0020866750], - [98, 0.0040076710], - [102, 0.0049233240], - [106, 0.0051959950], - [110, 0.0044225620], - [114, 0.0020602790], - [118, 0.0005740963], - [122, 0.0005428890], - [126, 0.0002391057], - ] - - -def curve_b(): - return [ - [70, 0.0101207100], - [74, 0.0100794400], - [78, 0.0056215730], - [82, 0.0028791560], - [86, 0.0013405740], - [90, 0.0005220333], - [94, 0.0004865836], - [98, 0.0002609056], - [102, 0.0000723265], - [106, 0.0002626837], - [110, 0.0016541830], - [114, 0.0041199060], - [118, 0.0062692520], - [122, 0.0079176060], - [126, 0.0138149000], - ] - - -def curve_c(): - return [ - [70, 0.0101222800], - [74, 0.0100787500], - [78, 0.0056227330], - [82, 0.0028790560], - [86, 0.0013404120], - [90, 0.0005221121], - [94, 0.0004865952], - [98, 0.0002609447], - [102, 0.0000722322], - [106, 0.0002627033], - [110, 0.0016542040], - [114, 0.0041198820], - [118, 0.0062694210], - [122, 0.0079189570], - [126, 0.0138144800], - ] - - -def curve_a_smoothed(): - return [ - [70, 0.00258495], - [74, 0.00182937], - [78, 0.00145542], - [82, 0.00135855], - [86, 0.00144012], - [90, 0.00176726], - [94, 0.00244699], - [98, 0.00335730], - [102, 0.00395957], - [106, 0.00404653], - [110, 0.00348618], - [114, 0.00238778], - [118, 0.00145559], - [122, 0.00101030], - [126, 0.00079836], - ] +def curve_a(): + return [ + [70, 0.0037557780], + [74, 0.0013476560], + [78, 0.0010809600], + [82, 0.0009844219], + [86, 0.0009538341], + [90, 0.0011735780], + [94, 0.0020866750], + [98, 0.0040076710], + [102, 0.0049233240], + [106, 0.0051959950], + [110, 0.0044225620], + [114, 0.0020602790], + [118, 0.0005740963], + [122, 0.0005428890], + [126, 0.0002391057], + ] + + +def curve_b(): + return [ + [70, 0.0101207100], + [74, 0.0100794400], + [78, 0.0056215730], + [82, 0.0028791560], + [86, 0.0013405740], + [90, 0.0005220333], + [94, 0.0004865836], + [98, 0.0002609056], + [102, 0.0000723265], + [106, 0.0002626837], + [110, 0.0016541830], + [114, 0.0041199060], + [118, 0.0062692520], + [122, 0.0079176060], + [126, 0.0138149000], + ] + + +def curve_c(): + return [ + [70, 0.0101222800], + [74, 0.0100787500], + [78, 0.0056227330], + [82, 0.0028790560], + [86, 0.0013404120], + [90, 0.0005221121], + [94, 0.0004865952], + [98, 0.0002609447], + [102, 0.0000722322], + [106, 0.0002627033], + [110, 0.0016542040], + [114, 0.0041198820], + [118, 0.0062694210], + [122, 0.0079189570], + [126, 0.0138144800], + ] + + +def curve_a_smoothed(): + return [ + [70, 0.00258495], + [74, 0.00182937], + [78, 0.00145542], + [82, 0.00135855], + [86, 0.00144012], + [90, 0.00176726], + [94, 0.00244699], + [98, 0.00335730], + [102, 0.00395957], + [106, 0.00404653], + [110, 0.00348618], + [114, 0.00238778], + [118, 0.00145559], + [122, 0.00101030], + [126, 0.00079836], + ] diff --git a/tests/test_rfactor.py b/tests/test_rfactor.py index e23295d..5b2d220 100644 --- a/tests/test_rfactor.py +++ b/tests/test_rfactor.py @@ -1,33 +1,33 @@ -import math - -import numpy as np -import pytest - -from cleedpy.rfactor import r2_factor, rp_factor -from tests.curves_helper import curve_a, curve_b, curve_c - - -@pytest.mark.parametrize( - "the_curve, exp_curve, expected_r", - [ - (curve_a(), curve_b(), 1.8688830931), - (curve_b(), curve_c(), 0.0001429368), - ], -) -def test_r2_factor(the_curve, exp_curve, expected_r): - assert math.isclose( - expected_r, r2_factor(np.array(the_curve), np.array(exp_curve)), abs_tol=5 - ) - - -@pytest.mark.parametrize( - "the_curve, exp_curve, expected_r", - [ - (curve_a(), curve_b(), 1.4898282448), - (curve_b(), curve_c(), 0), - ], -) -def test_rp_factor(the_curve, exp_curve, expected_r): - assert math.isclose( - expected_r, rp_factor(np.array(the_curve), np.array(exp_curve)), abs_tol=5 - ) +import math + +import numpy as np +import pytest + +from cleedpy.rfactor import r2_factor, rp_factor +from tests.curves_helper import curve_a, curve_b, curve_c + + +@pytest.mark.parametrize( + "the_curve, exp_curve, expected_r", + [ + (curve_a(), curve_b(), 1.8688830931), + (curve_b(), curve_c(), 0.0001429368), + ], +) +def test_r2_factor(the_curve, exp_curve, expected_r): + assert math.isclose( + expected_r, r2_factor(np.array(the_curve), np.array(exp_curve)), abs_tol=5 + ) + + +@pytest.mark.parametrize( + "the_curve, exp_curve, expected_r", + [ + (curve_a(), curve_b(), 1.4898282448), + (curve_b(), curve_c(), 0), + ], +) +def test_rp_factor(the_curve, exp_curve, expected_r): + assert math.isclose( + expected_r, rp_factor(np.array(the_curve), np.array(exp_curve)), abs_tol=5 + )