Skip to content

Commit

Permalink
Merge pull request #112 from h2020charisma/nexusformat
Browse files Browse the repository at this point in the history
NeXus file format
  • Loading branch information
kerberizer authored Apr 23, 2024
2 parents 56ea4bb + fe4911f commit d81c194
Show file tree
Hide file tree
Showing 13 changed files with 866 additions and 40 deletions.
215 changes: 215 additions & 0 deletions examples/nexusformat.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ramanchada2 as rc2\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SpeMetadataModel(__root__={'Original file': SpeMetadataFieldModel(__root__='PST10_iR785_OP03_8000msx2.txt')})"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"kwargs = {\"sample\":['PST'], \"provider\" : ['FNMT'], \"OP\":['03'], \"laser_wl\":['785']}\n",
"spe = rc2.spectrum.from_test_spe(**kwargs)\n",
"spe.meta"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ 'assay_uuid': None,\n",
" 'citation': Citation(year='2022', title='Round Robin 1', owner='FNMT'),\n",
" 'effects': [ EffectArray(endpoint='Raman spectrum', endpointtype=None, result=None, conditions=None, idresult=None, endpointGroup=None, endpointSynonyms=None, sampleID=None, signal=ValueArray(unit=None, values=array([1354.36, 1355.04, 1349.8 , ..., 1031.89, 1031.96, 1031.44]), errQualifier=None, errorValue=None), axes={'x': ValueArray(unit='cm-1', values=array([ 120.111, 120.505, 120.899, ..., 2499.82 , 2499.94 , 2499.98 ]), errQualifier=None, errorValue=None)})],\n",
" 'interpretationCriteria': None,\n",
" 'interpretationResult': None,\n",
" 'investigation_uuid': None,\n",
" 'owner': SampleLink(substance=Sample(uuid='PST'), company=Company(uuid=None, name='Default company')),\n",
" 'parameters': {'E.method': 'Raman spectrometry', 'wavelength': 785},\n",
" 'protocol': Protocol(topcategory='P-CHEM', category=EndpointCategory(code='ANALYTICAL_METHODS_SECTION', term=None, title=None), endpoint=None, guideline=None),\n",
" 'updated': None,\n",
" 'uuid': 'cbf04397-9352-4382-89e1-81be6d99f473'}\n"
]
},
{
"data": {
"text/plain": [
"NXroot('spectrum')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pynanomapper.datamodel.ambit as mx\n",
"import numpy as np\n",
"from typing import Dict, Optional, Union, List\n",
"from pynanomapper.datamodel.nexus_utils import to_nexus\n",
"import json \n",
"import nexusformat.nexus.tree as nx\n",
"import pprint\n",
"import uuid\n",
"pp = pprint.PrettyPrinter(indent=4)\n",
"\n",
"\n",
"data_dict: Dict[str, mx.ValueArray] = {\n",
" 'x': mx.ValueArray(values = spe.x, unit=\"cm-1\")\n",
"}\n",
"ea = mx.EffectArray(endpoint=\"Raman spectrum\", \n",
" signal = mx.ValueArray(values = spe.y),\n",
" axes = data_dict)\n",
"#ea.to_json()\n",
"effect_list: List[Union[mx.EffectRecord,mx.EffectArray]] = []\n",
"effect_list.append(ea)\n",
"papp = mx.ProtocolApplication(protocol=mx.Protocol(topcategory=\"P-CHEM\",category=mx.EndpointCategory(code=\"ANALYTICAL_METHODS_SECTION\")),\n",
" effects=effect_list)\n",
"papp.citation = mx.Citation(owner=\"FNMT\",title=\"Round Robin 1\",year=2022)\n",
"papp.parameters = {\"E.method\" : \"Raman spectrometry\" , \"wavelength\" : 785}\n",
"\n",
"papp.uuid = str(uuid.uuid4())\n",
"company=mx.Company(name = \"FNMT\")\n",
"substance = mx.Sample(uuid = \"PST\")\n",
"papp.owner = mx.SampleLink(substance = substance)\n",
"#papp.to_json()\n",
"study = mx.Study(study=[papp])\n",
"\n",
"pp.pprint(papp.__dict__)\n",
"#print(papp.to_json())\n",
"#print(study.to_json())\n",
"nxroot = nx.NXroot()\n",
"study.to_nexus(nxroot)\n",
"nxroot.save(\"spectrum.nxs\",mode=\"w\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def spe2pap(spe:rc2.spectrum.Spectrum):\n",
" effect_list: List[EffectRecord] = []\n",
" effect_list.append(EffectRecord(endpoint=\"Endpoint 1\", unit=\"Unit 1\", loValue=5.0))\n",
" protocol = Protocol(topcategory=\"P-CHEM\",category=EndpointCategory(code=\"ANALYTICAL_METHODS_SECTION\"))\n",
" papp = ProtocolApplication(protocol=protocol,effects=effect_list)\n",
" return papp\n",
"\n",
"\n",
"spe2pap(spe)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spe.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spe.write_nexus(\"nexus_test.cha\",\"entry\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import List\n",
"from pynanomapper.datamodel.ambit import EffectRecord, Protocol, EndpointCategory, ProtocolApplication\n",
"\n",
"effect_list: List[EffectRecord] = []\n",
"\n",
"effect_list.append(EffectRecord(endpoint=\"Endpoint 1\", unit=\"Unit 1\", loValue=5.0))\n",
"effect_list.append(EffectRecord(endpoint=\"Endpoint 2\", unit=\"Unit 2\", loValue=10.0))\n",
"\n",
"papp = ProtocolApplication(protocol=Protocol(topcategory=\"P-CHEM\",category=EndpointCategory(code=\"XYZ\")),effects=effect_list)\n",
"papp"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"papp = ProtocolApplication(effects=effect_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"papp.protocol,papp.effects"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pynanomapper.datamodel.ambit import SubstanceRecord,Sample,SampleLink\n",
"SubstanceRecord(name=\"xky\")\n",
"substance=Sample(uuid=\"xxx\")\n",
"SampleLink(substance=substance)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ramanchada2",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
61 changes: 59 additions & 2 deletions src/ramanchada2/io/HSDS.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,63 @@
logger = logging.getLogger()


# https://manual.nexusformat.org/examples/napi/python.html
# https://manual.nexusformat.org/examples/python/simple_example_basic/index.html
@pydantic.validate_arguments(config=dict(arbitrary_types_allowed=True))
def write_nexus(filename: str,
dataset: str,
x: npt.NDArray, y: npt.NDArray, meta: Dict, h5module=None):
_h5 = h5module or h5py
try:
with _h5.File(filename, 'a') as f:
f.attrs['default'] = dataset
try:
nxentry = f.require_group('sample')
except: # noqa: E722
pass

nxentry = f.require_group('instrument')
for m in meta:
print(m, meta[m])

try:
nxentry = f.require_group(dataset)
nxentry.attrs["NX_class"] = 'NXentry'
nxentry.attrs['default'] = 'data'
except: # noqa: E722
pass

try:
nxdata = nxentry.require_group('data')
nxdata.attrs["NX_class"] = 'NXdata'
nxdata.attrs['signal'] = 'spectrum'
nxdata.attrs['axes'] = 'raman_shift'
nxdata.attrs['raman_shift_indices'] = [0,]
except: # noqa: E722
pass

try:
tth = nxdata.require_group('raman_shift', data=x)
tth.attrs['units'] = 'cm-1'
tth.attrs['long_name'] = 'Raman shift (cm-1)'
except: # noqa: E722
pass

try:
counts = nxdata.create_dataset('spectrum', data=y)
counts.attrs['units'] = 'au'
counts.attrs['long_name'] = 'spectrum'
except: # noqa: E722
pass

except ValueError as e:
logger.warning(repr(e))


class DatasetExistsError(Exception):
pass


@pydantic.validate_arguments(config=dict(arbitrary_types_allowed=True))
def write_cha(filename: str,
dataset: str,
Expand All @@ -25,9 +82,9 @@ def write_cha(filename: str,
ds = h5.create_dataset(dataset, data=data)
ds.attrs.update(meta)
else:
logger.warning(f'dataset `{dataset}` already exists in file `{filename}`')
raise DatasetExistsError(f'dataset `{dataset}` already exists in file `{filename}`')
except ValueError as e:
logger.warning(repr(e))
raise e


def read_cha(filename: str,
Expand Down
1 change: 1 addition & 0 deletions src/ramanchada2/misc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@
find_closest_pairs,
find_closest_pairs_idx,
align, align_shift,
match_peaks
)
69 changes: 68 additions & 1 deletion src/ramanchada2/misc/utils/argmin2d.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import pydantic
import numpy as np
import numpy.typing as npt
import pandas as pd
import pydantic
from scipy import linalg
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import euclidean_distances
from typing import List, Union


Expand Down Expand Up @@ -77,3 +80,67 @@ def align_shift(x, y,
if loss > loss_bak:
return p_bak
return p


def match_peaks(spe_pos_dict, ref):
# Min-Max normalize the reference values
min_value = min(ref.values())
max_value = max(ref.values())
if len(ref.keys()) > 1:
normalized_ref = {key: (value - min_value) / (max_value - min_value) for key, value in ref.items()}
else:
normalized_ref = ref

min_value_spe = min(spe_pos_dict.values())
max_value_spe = max(spe_pos_dict.values())
# Min-Max normalize the spe_pos_dict
if len(spe_pos_dict.keys()) > 1:
normalized_spe = {
key: (value - min_value_spe) / (max_value_spe - min_value_spe) for key, value in spe_pos_dict.items()
}
else:
normalized_spe = spe_pos_dict
data_list = [
{'Wavelength': key, 'Intensity': value, 'Source': 'spe'} for key, value in normalized_spe.items()
] + [
{'Wavelength': key, 'Intensity': value, 'Source': 'reference'} for key, value in normalized_ref.items()
]

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)
feature_matrix = df[['Wavelength', 'Intensity']].to_numpy()

n_ref = len(ref.keys())
n_spe = len(spe_pos_dict.keys())
kmeans = KMeans(n_clusters=n_ref if n_ref > n_spe else n_spe)
kmeans.fit(feature_matrix)
labels = kmeans.labels_
# Extract cluster labels, x values, and y values
df['Cluster'] = labels
grouped = df.groupby('Cluster')
x_spe = np.array([])
x_reference = np.array([])
x_distance = np.array([])
clusters = np.array([])
# Iterate through each group
for cluster, group in grouped:
# Get the unique sources within the group
unique_sources = group['Source'].unique()
if 'reference' in unique_sources and 'spe' in unique_sources:
# Pivot the DataFrame to create the desired structure
for w_spe in group.loc[group["Source"] == "spe"]["Wavelength"].values:
x = None
r = None
e_min = None
for w_ref in group.loc[group["Source"] == "reference"]["Wavelength"].values:
e = euclidean_distances(w_spe.reshape(-1, 1), w_ref.reshape(-1, 1))[0][0]
if (e_min is None) or (e < e_min):
x = w_spe
r = w_ref
e_min = e
x_spe = np.append(x_spe, x)
x_reference = np.append(x_reference, r)
x_distance = np.append(x_distance, e_min)
clusters = np.append(clusters, cluster)
sort_indices = np.argsort(x_spe)
return (x_spe[sort_indices], x_reference[sort_indices], x_distance[sort_indices], df)
Loading

0 comments on commit d81c194

Please sign in to comment.