Skip to content

Commit

Permalink
Added general PPMS plugin (#133)
Browse files Browse the repository at this point in the history
* Added general PPMS plugin in its old structure

* Adapted PPMS structure

* Quick fix on the entry points

* Fix in schema bound_logger import

* Fixed formatting

* Fixed some typos

* Small Fix: ACT -> ETO

* changed names and made test folder

* added entrydata_definition for parser specilization

* lint

* Changed repeats to not is_scalar in merge_sections (#136)

Updated `merge_sections` util function to work with the latest nomad-lab version

* 137 merge sections break for non scalar quantities (#138)

* Added breaking test for boolean array

* Added more breaking test cases

* Added fix for comparing non numpy arrays

* Ruff

* Enhance test_merge_sections to capture output and validate float_array values

* Refine warning message for merging sections with differing quantity values

* Refactor merge_sections to improve warning logic for differing quantity values

* 139 merge sections still breaking for pint quantity arrays (#140)

* Added breaking test for float array with units

* Added fix for comparison of pint quantity arrays

* Added test for multi dimensional array

* Ruff

* 114 looks for errors in the structlogger in tests (#115)

* Add fixture to capture error from logs

* Add nomad-lab infrastructure deps required by caplog fixture

* Ruff

* Refactoring fixtures; parameterize caplog to allow capturing different log levels

* Removing the formatter from nomad.utils

* testing: add logger.error in normalize

* testing: adding the formatter from nomad.utils

* Add logstash dep; cleaning

* Cleaning

* Specify caplog as an arg (from review)

* moved nomad search import

* lint

* change create_archive inheritance

* lint

* fix test files path

* Added ETO test file to tests/ppms

* Added ETO sequence file to tests/ppms

* Changed regex in parsers mainfile_name_re

* Removed entry_type from PPMSSequenceParser

* lint

* Fixed PPMS parsers
Added call of set_entrydata_definition to parse

* Added test for PPMS data file

* Update __init__.py

* Fixed failing ppms test

* Fixed ppms test again

* PPMS data parser now matches two mime types

---------

Co-authored-by: Andrea Albino <[email protected]>
Co-authored-by: Hampus Näsström <[email protected]>
Co-authored-by: Sarthak Kapoor <[email protected]>
  • Loading branch information
4 people authored Dec 19, 2024
1 parent baa56e5 commit 81e3c1d
Show file tree
Hide file tree
Showing 19 changed files with 20,587 additions and 2 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ parsing of following vendor-specific file formats:
- `.xrdml` (Malvern Panalytical)
- `.brml` (Bruker)

The `nomad_measurements.ppms` module supports
parsing of following file format:
- `.dat` (in the structure of the QuantumDesign PPMS)

## Getting started
`nomad-measurements` can be installed from PyPI using `pip`.
Currently we require features in `nomad-lab` which are not published to PyPI.
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,5 +134,8 @@ where = ["src"]
general_schema = "nomad_measurements:schema"
xrd_schema = "nomad_measurements.xrd:schema"
xrd_parser = "nomad_measurements.xrd:parser"
ppms_schema = "nomad_measurements.ppms:ppms_schema"
ppms_data_parser = "nomad_measurements.ppms:ppms_data_parser"
ppms_sequence_parser = "nomad_measurements.ppms:ppms_sequence_parser"

[tool.setuptools_scm]
51 changes: 51 additions & 0 deletions src/nomad_measurements/ppms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from nomad.config.models.plugins import (
ParserEntryPoint,
SchemaPackageEntryPoint,
)
from pydantic import Field


class DataParserEntryPoint(ParserEntryPoint):
def load(self):
from nomad_measurements.ppms.parser import PPMSParser

return PPMSParser(**self.dict())


ppms_data_parser = DataParserEntryPoint(
name='PpmsDataParser',
description='New parser entry point configuration.',
mainfile_name_re=r'.+\.dat',
mainfile_mime_re='text/plain|application/x-wine-extension-ini',
mainfile_contents_re='BYAPP,',
)


class SqcParserEntryPoint(ParserEntryPoint):
def load(self):
from nomad_measurements.ppms.parser import PPMSSequenceParser

return PPMSSequenceParser(**self.dict())


ppms_sequence_parser = SqcParserEntryPoint(
name='PpmsSequenceParser',
description='New parser entry point configuration.',
mainfile_name_re=r'.+\.seq',
mainfile_mime_re='text/plain',
)


class PPMSSchemaEntryPoint(SchemaPackageEntryPoint):
parameter: int = Field(0, description='Custom configuration parameter')

def load(self):
from nomad_measurements.ppms.schema import m_package

return m_package


ppms_schema = PPMSSchemaEntryPoint(
name='NewSchemaPackage',
description='New schema package entry point configuration.',
)
121 changes: 121 additions & 0 deletions src/nomad_measurements/ppms/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from time import perf_counter, sleep
from typing import (
TYPE_CHECKING,
)

from nomad.datamodel import ClientContext, EntryArchive
from nomad.datamodel.data import (
EntryData,
)
from nomad.datamodel.metainfo.annotations import (
ELNAnnotation,
)
from nomad.metainfo import Quantity
from nomad.parsing import MatchingParser

if TYPE_CHECKING:
from nomad.datamodel.datamodel import (
EntryArchive,
)

from nomad.datamodel import EntryArchive
from nomad.datamodel.metainfo.basesections import (
BaseSection,
)

from nomad_measurements.ppms.schema import PPMSMeasurement
from nomad_measurements.utils import create_archive


def find_matching_sequence_file(archive, entry, logger):
if isinstance(archive.m_context, ClientContext):
return None
from nomad.search import search

tic = perf_counter()
while True:
search_result = search(
owner='user',
query={
'results.eln.sections:any': ['PPMSSequenceFile'],
'upload_id:any': [archive.m_context.upload_id],
},
user_id=archive.metadata.main_author.user_id,
)
if len(search_result.data) > 0:
for sequence in search_result.data:
entry.sequence_file = sequence['search_quantities'][0]['str_value']
logger.info(sequence['search_quantities'][0]['str_value'])
break
sleep(0.1)
toc = perf_counter()
if toc - tic > 15: # noqa: PLR2004
logger.warning(
"The Sequence File entry/ies in the current upload were\
not found and couldn't be referenced."
)
break
return


class PPMSFile(EntryData):
measurement = Quantity(
type=PPMSMeasurement,
a_eln=ELNAnnotation(
component='ReferenceEditQuantity',
),
)


class PPMSParser(MatchingParser):
ppms_measurement: str = MatchingParser

def parse(self, mainfile: str, archive: EntryArchive, logger) -> None:
self.ppms_measurement = PPMSMeasurement

data_file = mainfile.split('/')[-1]
data_file_with_path = mainfile.split('raw/')[-1]
entry = self.ppms_measurement()
entry.data_file = data_file_with_path
file_name = f'{data_file[:-4]}.archive.json'
# entry.normalize(archive, logger)
find_matching_sequence_file(archive, entry, logger)
archive.data = PPMSFile(measurement=create_archive(entry, archive, file_name))
archive.metadata.entry_name = data_file + ' measurement file'


class PPMSSequenceFile(BaseSection, EntryData):
file_path = Quantity(
type=str,
a_eln=dict(component='FileEditQuantity'),
a_browser=dict(adaptor='RawFileAdaptor'),
)


class PPMSSequenceParser(MatchingParser):
ppms_sequence: str = MatchingParser

def parse(self, mainfile: str, archive: EntryArchive, logger) -> None:
self.ppms_sequence = PPMSSequenceFile
data_file = mainfile.split('/')[-1]
data_file_with_path = mainfile.split('raw/')[-1]
archive.data = self.ppms_sequence(file_path=data_file_with_path)
archive.metadata.entry_name = data_file + ' sequence file'
Loading

0 comments on commit 81e3c1d

Please sign in to comment.