Skip to content

Commit

Permalink
Implement methods to compute Rfactor directly in Python (#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
despadam authored Oct 23, 2024
1 parent f288bc0 commit 7d65406
Show file tree
Hide file tree
Showing 7 changed files with 359 additions and 11 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ MANIFEST
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
# Unit test / coverage reports / Unit test outputs
htmlcov/
.tox/
.nox/
Expand All @@ -50,6 +50,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
cover/
*.png

# Translations
*.mo
Expand Down
106 changes: 106 additions & 0 deletions cleedpy/preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import numpy as np
from scipy.interpolate import CubicHermiteSpline


def lorentzian_smoothing(curve, vi):
"""
Draws a Lorentzian curve in the same grid as the given curve.
Performs the convolution of the two curves.
The given curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- curve: numpy array of [e,i] arrays
- vi: predefined constant representing the width
Output:
The result of the convolution: a new array of [e,i] arrays
"""

energies = curve[:, 0]
intensities = curve[:, 1]
result = []

for j in range(energies.size):
c = 0
l_value = 0
for i in range(energies.size):
c += vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2)
l_value += (
intensities[i] * vi**2 / ((energies[i] - energies[j]) ** 2 + vi**2)
)
result.append(l_value / c)

return np.array(list(zip(energies, result)))


def preprocessing_loop(theoretical_curves, experimental_curves, shift, r_factor, vi):
"""
Performs all the preprocessing steps and R-factor calculations, required before the Search.
A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- theoretical_curves: numpy array of arrays of [e,i] arrays
- experimental_curves: numpy array of arrays of [e,i] arrays
where:
- number of theoretical curves = number of experimental curves
- shift: provided by user, used to edit the energy value per step
- r_factor: provided by user, name of the preferred R-factor method
Design:
1. Lorentzian smoothing of all curves
2. For each energy point:
a. Shift theoretical curve
b. Find boundaries of overlap
c. Filter both curves to keep only the elements in the overlap
d. For each experimental curve:
i. Spline = interpolate it to the theoretical grid
ii. Calculate R-factor
e. Calculate average R-factor of all curves in this shift
3. Find the min of the average values
Output:
The optimal R-factor value per curve.
"""

for i in range(len(theoretical_curves)):
theoretical_curves[i] = lorentzian_smoothing(theoretical_curves[i], vi)
experimental_curves[i] = lorentzian_smoothing(experimental_curves[i], vi)

energy_shift = [shift, 0]
shifted_theoretical_curves = theoretical_curves + energy_shift
print(shifted_theoretical_curves)

# Find the boundaries of the overlap after the shift
min_bound = np.maximum(shifted_theoretical_curves[0, 0], experimental_curves[0, 0])[
0
]
max_bound = np.minimum(
shifted_theoretical_curves[0, -1], experimental_curves[0, -1]
)[0]
print(min_bound, max_bound)

# Filter both curves based on the boundaries
filtered_theoretical_curves = shifted_theoretical_curves[
shifted_theoretical_curves[:, 0] >= min_bound
]
filtered_theoretical_curves = filtered_theoretical_curves[
filtered_theoretical_curves[:, 0] <= max_bound
]

filtered_experimental_curves = experimental_curves[
experimental_curves[:, 0] >= min_bound
]
filtered_experimental_curves = filtered_experimental_curves[
filtered_experimental_curves[:, 0] <= max_bound
]
print(filtered_theoretical_curves, filtered_experimental_curves)

# Spline
for i in range(len(filtered_experimental_curves)):
e_values = filtered_experimental_curves[i][:, 0]
i_values = filtered_experimental_curves[i][:, 1]
theoretical_e_grid = filtered_theoretical_curves[i][:, 0]
CubicHermiteSpline(e_values, i_values, theoretical_e_grid)

print(globals().get(r_factor))
return
120 changes: 117 additions & 3 deletions cleedpy/rfactor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,120 @@
import numpy as np


def mean_square_error(y_true, y_pred):
"""Mean Square Error"""
return np.mean(np.square(y_true - y_pred))
def r2_factor(theoretical_curve, experimental_curve):
"""
Calculates R2-factor of the curve of a specific spot (x,y) of the experiment.
A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- theoretical curve: numpy array of [e,i] arrays
- experimental curve: numpy array of [e,i] arrays
Design:
R2 = sqrt{ S(it - c*ie)^2 / S(it - it_avg)^2 }
where:
c = sqrt( S|it|^2 / S|ie|^ 2)
it_avg = (S it)/ dE
"""

# [TODO] (not sure) Use the length of the shortest curve
min_length = min(len(theoretical_curve), len(experimental_curve))
theoretical_curve = theoretical_curve[:min_length]
experimental_curve = experimental_curve[:min_length]

# Extract the it, ie values for theoretical and experimental intensity
it = theoretical_curve[:, 1]
ie = experimental_curve[:, 1]

# Calculate normalization factor c and it_avg
c = np.sqrt(np.sum(it**2) / np.sum(ie**2))
it_avg = np.sum(it) / it.size # dE = number of energy steps

# Calculate the numerator and denominator of R2
numerator = np.sum((it - c * ie) ** 2)
denominator = np.sum((it - it_avg) ** 2)

# Calculate R2
r2 = np.sqrt(numerator / denominator)

# [TODO] error handling: may return NaN
return r2


def rp_factor(theoretical_curve, experimental_curve):
"""
Calculates Pendry's R-factor of the curve of a specific spot (x,y) of the experiment.
A curve is represented by an array of (e,i) points, where e = energy and i = intensity.
Inputs:
- theoretical curve: numpy array of [e,i] arrays
- experimental curve: numpy array of [e,i] arrays
Design:
Rp = S(ye - yt)^2 / S(ye^2 + yt^2)
"""

# Extract the it, ie values for theoretical and experimental intensity
it = theoretical_curve[:, 1]
ie = experimental_curve[:, 1]

# Extract the et, ee values for theoretical and experimental energy
et = theoretical_curve[:, 0]
ee = experimental_curve[:, 0]

# Calculate theoretical and experimental energy steps
step_et = energy_step(et)
step_ee = energy_step(ee)

# Calculate Y for theoretical and experimental intensity
yt = y_function(it, step_et)
ye = y_function(ie, step_ee)

# Calculate the numerator and denominator of Rp
numerator = np.sum((yt - ye) ** 2 * step_et)
denominator = np.sum(ye**2 * step_ee) + np.sum(yt**2 * step_et)

# Calculate Rp
rp = numerator / denominator

# [TODO] error handling: may return NaN
return rp


def y_function(intensities, energy_step):
"""
Calculates y for a given curve.
Inputs:
- I: array of intensity values of the curve
- E: array of energy values of the curve
Design:
Y = L / (1 + L^2 * vi^2)
where:
L = (I[i] - I[i-1]) / (energy_step * 0.5 * (I[i] + I[i-1]))
"""

# [TODO] constants should be defined elsewhere
vi = 4

y_values = (intensities[1:] - intensities[:-1]) / (
energy_step * 0.5 * (intensities[1:] + intensities[:-1])
)
return y_values / (1 + y_values**2 * vi**2)


def energy_step(energies):
"""
Calculates the pairwise energy step. Returns an array.
Inputs:
- E: array of energy values of the curve
Design:
step = E[i] - E[i-1]
"""

return energies[1:] - energies[:-1]
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ classifiers = [
requires-python = ">=3.10"
dependencies = [
"click",
"matplotlib",
"numpy",
"pyyaml",
"scipy",
Expand Down
78 changes: 78 additions & 0 deletions tests/curves_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
def curve_a():
return [
[70, 0.0037557780],
[74, 0.0013476560],
[78, 0.0010809600],
[82, 0.0009844219],
[86, 0.0009538341],
[90, 0.0011735780],
[94, 0.0020866750],
[98, 0.0040076710],
[102, 0.0049233240],
[106, 0.0051959950],
[110, 0.0044225620],
[114, 0.0020602790],
[118, 0.0005740963],
[122, 0.0005428890],
[126, 0.0002391057],
]


def curve_b():
return [
[70, 0.0101207100],
[74, 0.0100794400],
[78, 0.0056215730],
[82, 0.0028791560],
[86, 0.0013405740],
[90, 0.0005220333],
[94, 0.0004865836],
[98, 0.0002609056],
[102, 0.0000723265],
[106, 0.0002626837],
[110, 0.0016541830],
[114, 0.0041199060],
[118, 0.0062692520],
[122, 0.0079176060],
[126, 0.0138149000],
]


def curve_c():
return [
[70, 0.0101222800],
[74, 0.0100787500],
[78, 0.0056227330],
[82, 0.0028790560],
[86, 0.0013404120],
[90, 0.0005221121],
[94, 0.0004865952],
[98, 0.0002609447],
[102, 0.0000722322],
[106, 0.0002627033],
[110, 0.0016542040],
[114, 0.0041198820],
[118, 0.0062694210],
[122, 0.0079189570],
[126, 0.0138144800],
]


def curve_a_smoothed():
return [
[70, 0.00258495],
[74, 0.00182937],
[78, 0.00145542],
[82, 0.00135855],
[86, 0.00144012],
[90, 0.00176726],
[94, 0.00244699],
[98, 0.00335730],
[102, 0.00395957],
[106, 0.00404653],
[110, 0.00348618],
[114, 0.00238778],
[118, 0.00145559],
[122, 0.00101030],
[126, 0.00079836],
]
35 changes: 35 additions & 0 deletions tests/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import matplotlib.pyplot as plt
import numpy as np
import pytest

from cleedpy.preprocessing import lorentzian_smoothing
from tests.curves_helper import curve_a, curve_a_smoothed


@pytest.mark.parametrize(
"curve, vi, expected",
[
(np.array(curve_a()), 4, np.array(curve_a_smoothed())),
],
)
def test_lorentzian_smoothing(curve, vi, expected):
l_curve = lorentzian_smoothing(curve, vi)

x_values = curve[:, 0]
y_values = curve[:, 1]
plt.plot(x_values, y_values, marker="o", color="g", label="Input: Initial curve")

x_values = l_curve[:, 0]
y_values = l_curve[:, 1]
plt.plot(x_values, y_values, marker="x", color="r", label="Output: Smoothed curve")

y_min = 0
y_max = 0.0110
y_step = 0.0005
plt.ylim(y_min, y_max)
plt.yticks(np.arange(y_min, y_max + y_step, y_step))
plt.grid()
plt.legend()
plt.savefig("test_lorentzian_smoothing_output.png")

assert np.allclose(expected, l_curve)
Loading

0 comments on commit 7d65406

Please sign in to comment.