Added general PPMS plugin (#133)

* Added general PPMS plugin in its old structure * Adapted PPMS structure * Quick fix on the entry points * Fix in schema bound_logger import * Fixed formatting * Fixed some typos * Small Fix: ACT -> ETO * changed names and made test folder * added entrydata_definition for parser specilization * lint * Changed repeats to not is_scalar in merge_sections (#136) Updated `merge_sections` util function to work with the latest nomad-lab version * 137 merge sections break for non scalar quantities (#138) * Added breaking test for boolean array * Added more breaking test cases * Added fix for comparing non numpy arrays * Ruff * Enhance test_merge_sections to capture output and validate float_array values * Refine warning message for merging sections with differing quantity values * Refactor merge_sections to improve warning logic for differing quantity values * 139 merge sections still breaking for pint quantity arrays (#140) * Added breaking test for float array with units * Added fix for comparison of pint quantity arrays * Added test for multi dimensional array * Ruff * 114 looks for errors in the structlogger in tests (#115) * Add fixture to capture error from logs * Add nomad-lab infrastructure deps required by caplog fixture * Ruff * Refactoring fixtures; parameterize caplog to allow capturing different log levels * Removing the formatter from nomad.utils * testing: add logger.error in normalize * testing: adding the formatter from nomad.utils * Add logstash dep; cleaning * Cleaning * Specify caplog as an arg (from review) * moved nomad search import * lint * change create_archive inheritance * lint * fix test files path * Added ETO test file to tests/ppms * Added ETO sequence file to tests/ppms * Changed regex in parsers mainfile_name_re * Removed entry_type from PPMSSequenceParser * lint * Fixed PPMS parsers Added call of set_entrydata_definition to parse * Added test for PPMS data file * Update __init__.py * Fixed failing ppms test * Fixed ppms test again * PPMS data parser now matches two mime types --------- Co-authored-by: Andrea Albino <[email protected]> Co-authored-by: Hampus Näsström <[email protected]> Co-authored-by: Sarthak Kapoor <[email protected]>
FAIRmat-NFDI · Dec 19, 2024 · 81e3c1d · 81e3c1d
1 parent baa56e5
commit 81e3c1d
Show file tree

Hide file tree

Showing 19 changed files with 20,587 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,10 @@ parsing of following vendor-specific file formats:
 - `.xrdml` (Malvern Panalytical)
 - `.brml` (Bruker)
 
+The `nomad_measurements.ppms` module supports
+parsing of following file format:
+- `.dat` (in the structure of the QuantumDesign PPMS)
+
 ## Getting started
 `nomad-measurements` can be installed from PyPI using `pip`.
 Currently we require features in `nomad-lab` which are not published to PyPI.

diff --git a/pyproject.toml b/pyproject.toml
@@ -134,5 +134,8 @@ where = ["src"]
 general_schema = "nomad_measurements:schema"
 xrd_schema = "nomad_measurements.xrd:schema"
 xrd_parser = "nomad_measurements.xrd:parser"
+ppms_schema = "nomad_measurements.ppms:ppms_schema"
+ppms_data_parser = "nomad_measurements.ppms:ppms_data_parser"
+ppms_sequence_parser = "nomad_measurements.ppms:ppms_sequence_parser"
 
 [tool.setuptools_scm]
diff --git a/src/nomad_measurements/ppms/__init__.py b/src/nomad_measurements/ppms/__init__.py
@@ -0,0 +1,51 @@
+from nomad.config.models.plugins import (
+    ParserEntryPoint,
+    SchemaPackageEntryPoint,
+)
+from pydantic import Field
+
+
+class DataParserEntryPoint(ParserEntryPoint):
+    def load(self):
+        from nomad_measurements.ppms.parser import PPMSParser
+
+        return PPMSParser(**self.dict())
+
+
+ppms_data_parser = DataParserEntryPoint(
+    name='PpmsDataParser',
+    description='New parser entry point configuration.',
+    mainfile_name_re=r'.+\.dat',
+    mainfile_mime_re='text/plain|application/x-wine-extension-ini',
+    mainfile_contents_re='BYAPP,',
+)
+
+
+class SqcParserEntryPoint(ParserEntryPoint):
+    def load(self):
+        from nomad_measurements.ppms.parser import PPMSSequenceParser
+
+        return PPMSSequenceParser(**self.dict())
+
+
+ppms_sequence_parser = SqcParserEntryPoint(
+    name='PpmsSequenceParser',
+    description='New parser entry point configuration.',
+    mainfile_name_re=r'.+\.seq',
+    mainfile_mime_re='text/plain',
+)
+
+
+class PPMSSchemaEntryPoint(SchemaPackageEntryPoint):
+    parameter: int = Field(0, description='Custom configuration parameter')
+
+    def load(self):
+        from nomad_measurements.ppms.schema import m_package
+
+        return m_package
+
+
+ppms_schema = PPMSSchemaEntryPoint(
+    name='NewSchemaPackage',
+    description='New schema package entry point configuration.',
+)
diff --git a/src/nomad_measurements/ppms/parser.py b/src/nomad_measurements/ppms/parser.py
@@ -0,0 +1,121 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from time import perf_counter, sleep
+from typing import (
+    TYPE_CHECKING,
+)
+
+from nomad.datamodel import ClientContext, EntryArchive
+from nomad.datamodel.data import (
+    EntryData,
+)
+from nomad.datamodel.metainfo.annotations import (
+    ELNAnnotation,
+)
+from nomad.metainfo import Quantity
+from nomad.parsing import MatchingParser
+
+if TYPE_CHECKING:
+    from nomad.datamodel.datamodel import (
+        EntryArchive,
+    )
+
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.basesections import (
+    BaseSection,
+)
+
+from nomad_measurements.ppms.schema import PPMSMeasurement
+from nomad_measurements.utils import create_archive
+
+
+def find_matching_sequence_file(archive, entry, logger):
+    if isinstance(archive.m_context, ClientContext):
+        return None
+    from nomad.search import search
+
+    tic = perf_counter()
+    while True:
+        search_result = search(
+            owner='user',
+            query={
+                'results.eln.sections:any': ['PPMSSequenceFile'],
+                'upload_id:any': [archive.m_context.upload_id],
+            },
+            user_id=archive.metadata.main_author.user_id,
+        )
+        if len(search_result.data) > 0:
+            for sequence in search_result.data:
+                entry.sequence_file = sequence['search_quantities'][0]['str_value']
+                logger.info(sequence['search_quantities'][0]['str_value'])
+                break
+        sleep(0.1)
+        toc = perf_counter()
+        if toc - tic > 15:  # noqa: PLR2004
+            logger.warning(
+                "The Sequence File entry/ies in the current upload were\
+                        not found and couldn't be referenced."
+            )
+            break
+    return
+
+
+class PPMSFile(EntryData):
+    measurement = Quantity(
+        type=PPMSMeasurement,
+        a_eln=ELNAnnotation(
+            component='ReferenceEditQuantity',
+        ),
+    )
+
+
+class PPMSParser(MatchingParser):
+    ppms_measurement: str = MatchingParser
+
+    def parse(self, mainfile: str, archive: EntryArchive, logger) -> None:
+        self.ppms_measurement = PPMSMeasurement
+
+        data_file = mainfile.split('/')[-1]
+        data_file_with_path = mainfile.split('raw/')[-1]
+        entry = self.ppms_measurement()
+        entry.data_file = data_file_with_path
+        file_name = f'{data_file[:-4]}.archive.json'
+        # entry.normalize(archive, logger)
+        find_matching_sequence_file(archive, entry, logger)
+        archive.data = PPMSFile(measurement=create_archive(entry, archive, file_name))
+        archive.metadata.entry_name = data_file + ' measurement file'
+
+
+class PPMSSequenceFile(BaseSection, EntryData):
+    file_path = Quantity(
+        type=str,
+        a_eln=dict(component='FileEditQuantity'),
+        a_browser=dict(adaptor='RawFileAdaptor'),
+    )
+
+
+class PPMSSequenceParser(MatchingParser):
+    ppms_sequence: str = MatchingParser
+
+    def parse(self, mainfile: str, archive: EntryArchive, logger) -> None:
+        self.ppms_sequence = PPMSSequenceFile
+        data_file = mainfile.split('/')[-1]
+        data_file_with_path = mainfile.split('raw/')[-1]
+        archive.data = self.ppms_sequence(file_path=data_file_with_path)
+        archive.metadata.entry_name = data_file + ' sequence file'