Skip to content

Commit

Permalink
Added parsing strings to composition utility function
Browse files Browse the repository at this point in the history
  • Loading branch information
CPrescher committed Jul 24, 2024
1 parent f4eda33 commit 78bead5
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 2 deletions.
70 changes: 68 additions & 2 deletions glassure/utility.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
import re
from typing import Optional, Union, Dict
from collections import defaultdict
from copy import copy

import numpy as np
Expand Down Expand Up @@ -35,6 +36,71 @@
Composition = Dict[str, Union[int, float]]


def parse_str_to_composition(formula: str) -> Composition:
"""
Parses a chemical formula string into a dictionary with elements as keys and abundances as relative numbers.
Typical examples are 'SiO2'-> {'Si': 1, 'O': 2} or 'Na2Si2O5' -> {'Na': 2, 'Si': 2, 'O': 5}
:param formula: chemical formula string
:return: Composition with elements as keys and abundances as relative numbers
"""
# Remove all whitespace from the formula
formula = re.sub(r"\s+", "", formula)

def parse_segment(segment):
element_pattern = r"([A-Z][a-z]*)(\d*\.\d+|\d*)"
parsed = defaultdict(float)
for element, count in re.findall(element_pattern, segment):
parsed[element] += float(count) if count else 1.0
return parsed

def multiply_segment(segment_dict, multiplier):
for element in segment_dict:
segment_dict[element] *= multiplier
return segment_dict

def process_brackets(formula):
bracket_patterns = [
r"\(([^\(\)]+)\)(\d*\.\d+|\d*)", # ()
r"\[([^\[\]]+)\](\d*\.\d+|\d*)", # []
r"\{([^\{\}]+)\}(\d*\.\d+|\d*)", # {}
]

while any(re.search(pattern, formula) for pattern in bracket_patterns):
for pattern in bracket_patterns:
while re.search(pattern, formula):
matches = re.findall(pattern, formula)
for sub_formula, count in matches:
parsed_sub_formula = parse_segment(sub_formula)
multiplier = float(count) if count else 1.0
parsed_sub_formula = multiply_segment(
parsed_sub_formula, multiplier
)
sub_formula_string = "".join(
[
f"{el}{parsed_sub_formula[el]}"
for el in parsed_sub_formula
]
)
formula = formula.replace(
f"({sub_formula}){count}", sub_formula_string, 1
)
formula = formula.replace(
f"[{sub_formula}]{count}", sub_formula_string, 1
)
formula = formula.replace(
f"{{{sub_formula}}}{count}", sub_formula_string, 1
)
return formula

# Process all brackets first
formula = process_brackets(formula)

# Parse the final expanded formula
final_parsed = parse_segment(formula)
return dict(final_parsed)


def calculate_f_mean_squared(
composition: Composition, q: np.ndarray, sf_source="hajdu"
) -> np.ndarray:
Expand Down Expand Up @@ -271,8 +337,8 @@ def extrapolate_to_zero_spline(
will be set to zero
:param pattern: input pattern
:param x_max: defines the maximum x value within the spline will be fitted to the input pattern.
this parameter should be larger than the minimum of the pattern x
:param x_max: defines the maximum x value within the spline will be fitted to the input pattern.
this parameter should be larger than the minimum of the pattern x
:param smooth_factor: defines the smoothing of the spline extrapolation please see numpy.UnivariateSpline manual for
explanations
:param replace: boolean flag whether to replace the data values in the fitted region (default = False)
Expand Down
25 changes: 25 additions & 0 deletions tests/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
convert_two_theta_to_q_space_raw,
calculate_s0,
calculate_kn_correction,
parse_str_to_composition,
)
from glassure import Pattern

Expand Down Expand Up @@ -227,3 +228,27 @@ def test_convert_two_theta_to_q_space(self):
self.assertAlmostEqual(
np.max(pattern_q.x), 4 * np.pi * np.sin(25.0 / 360 * np.pi) / wavelength
)


def test_parse_string_to_composition(self):
inputs = [
"Si O 0.5",
"(Mg)(Si O2)2.5",
"[Al2O3]3",
"\\{Fe3O4\\}2.5",
"(H2O)2[NaCl]\\{KOH\\}0.5",
"\\{[Co(NH3)4(OH)2]3[Co(CN)6]\\}2"
]

outputs = [
{"Si": 1, "O": 0.5},
{"Mg": 1, "Si": 2.5, "O": 5},
{"Al": 6, "O": 9},
{"Fe": 7.5, "O": 10},
{"H": 4.5, "O": 2.5, "Na": 1, "Cl": 1, "K": 0.5},
{"Co": 8, "N": 36, "H": 84, "O": 12, "C": 12}
]

for input, output in zip(inputs, outputs):
parsed_formula = parse_str_to_composition(input)
self.assertEqual(parsed_formula, output)

0 comments on commit 78bead5

Please sign in to comment.