From ae2bc0d469870d1f46e3e01a33b5795d766c3226 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Tue, 5 Nov 2024 18:55:51 +0100 Subject: [PATCH 01/37] First version of edit extraction schema --- pyproject.toml | 3 +- .../__init__.py | 15 + .../llm_extraction_schema.py | 479 ++++++++++++++++++ 3 files changed, 496 insertions(+), 1 deletion(-) create mode 100644 src/perovskite_solar_cell_database/llm_extraction_schema.py diff --git a/pyproject.toml b/pyproject.toml index 9990d74..016246d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,4 +135,5 @@ perovskite_solar_cell = "perovskite_solar_cell_database:perovskite_solar_cell" solar_cell_app = "perovskite_solar_cell_database.apps:solar_cells" perovskite_composition = "perovskite_solar_cell_database:perovskite_composition" ion_parser = "perovskite_solar_cell_database:ion_parser" -perovskite_ions_app = "perovskite_solar_cell_database.apps:perovskite_ions" \ No newline at end of file +perovskite_ions_app = "perovskite_solar_cell_database.apps:perovskite_ions" +llm_extraction_schema = "perovskite_solar_cell_database:llm_extraction_schema" diff --git a/src/perovskite_solar_cell_database/__init__.py b/src/perovskite_solar_cell_database/__init__.py index f8e759f..182f035 100644 --- a/src/perovskite_solar_cell_database/__init__.py +++ b/src/perovskite_solar_cell_database/__init__.py @@ -66,3 +66,18 @@ def load(self): }, }, ) + + +class LLMSchemaExtractionPackageEntryPoint(SchemaPackageEntryPoint): + def load(self): + from perovskite_solar_cell_database.llm_extraction_schema import ( + m_package, + ) + + return m_package + + +llm_extraction_schema = LLMSchemaExtractionPackageEntryPoint( + name='LLMExtractionSchema', + description='Schema package defined for the perovskite solar cells database LLM extraction.', +) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py new file mode 100644 index 0000000..a567f52 --- /dev/null +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -0,0 +1,479 @@ +from typing import ( + TYPE_CHECKING, +) + +from nomad.datamodel.data import ArchiveSection +from nomad.datamodel.metainfo.basesections import PublicationReference +from nomad.datamodel.metainfo.eln import ELNAnnotation +from nomad.metainfo import JSON, Quantity, Section, SubSection +from nomad.metainfo.metainfo import MEnum + +if TYPE_CHECKING: + pass + +from nomad.datamodel.data import Schema +from nomad.metainfo import SchemaPackage + +m_package = SchemaPackage() + + +# LightSource class +class LightSource(ArchiveSection): + m_def = Section(label='Light Source') + + type = Quantity( + type=MEnum( + [ + 'AM 1.5G', + 'AM 1.5D', + 'AM 0', + 'Monochromatic', + 'White LED', + 'Other', + 'Outdoor', + ] + ), + description='Type of light source', + a_eln=ELNAnnotation(label='Light Source Type', component='EnumEditQuantity'), + ) + + description = Quantity( + type=str, + description='Additional details about the light source. This is very important.', + a_eln=ELNAnnotation(label='Description', component='StringEditQuantity'), + ) + + light_intensity = Quantity( + type=float, + unit='mW/cm**2', + description='Light intensity value', + a_eln=ELNAnnotation( + label='Light Intensity', + component='NumberEditQuantity', + defaultDisplayUnit='mW/cm**2', + props=dict(minValue=0), + ), + ) + + lamp = Quantity( + type=str, + description='Type of lamp used to generate the spectrum', + a_eln=ELNAnnotation(label='Lamp', component='StringEditQuantity'), + ) + + +# Stability class +class Stability(ArchiveSection): + time = Quantity( + type=float, + unit='hour', + description='Duration of the stability test', + a_eln=ELNAnnotation( + label='Time', defaultDisplayUnit='hour', component='NumberEditQuantity' + ), + ) + + light_intensity = Quantity( + type=float, + unit='mW/cm**2', + description='Light intensity during stability test', + a_eln=ELNAnnotation( + label='Light Intensity', + component='NumberEditQuantity', + defaultDisplayUnit='mW/cm**2', + props=dict(minValue=0), + ), + ) + + humidity = Quantity( + type=float, + description='Relative humidity during stability test', + a_eln=ELNAnnotation( + label='Humidity', + component='NumberEditQuantity', + props=dict(minValue=0, maxValue=100), + ), + ) + + temperature = Quantity( + type=float, + unit='°C', + description='Temperature during stability test', + a_eln=ELNAnnotation( + label='Temperature', defaultDisplayUnit='°C', component='NumberEditQuantity' + ), + ) + + PCE_T80 = Quantity( + type=float, + unit='hour', + description='Time after which the cell performance has degraded by 20%', + a_eln=ELNAnnotation( + label='PCE T80', defaultDisplayUnit='hour', component='NumberEditQuantity' + ), + ) + + PCE_at_start = Quantity( + type=float, + description='PCE at the start of the experiment', + a_eln=ELNAnnotation(label='PCE at Start', component='NumberEditQuantity'), + ) + + PCE_after_1000_hours = Quantity( + type=float, + description='PCE after 1000 hours', + a_eln=ELNAnnotation( + label='PCE after 1000 Hours', component='NumberEditQuantity' + ), + ) + + PCE_at_end = Quantity( + type=float, + description='PCE at the end of the experiment', + a_eln=ELNAnnotation(label='PCE at End', component='NumberEditQuantity'), + ) + + +# ProcessingAtmosphere class +class ProcessingAtmosphere(ArchiveSection): + m_def = Section(label='Processing Atmosphere') + + type = Quantity( + type=str, + description='Type of atmosphere', + a_eln=ELNAnnotation(label='Atmosphere Type', component='StringEditQuantity'), + ) + + pressure = Quantity( + type=float, + unit='mbar', + description='Pressure during processing', + a_eln=ELNAnnotation( + label='Pressure', defaultDisplayUnit='mbar', component='NumberEditQuantity' + ), + ) + + relative_humidity = Quantity( + type=float, + description='Relative humidity during processing', + a_eln=ELNAnnotation( + label='Relative Humidity', + component='NumberEditQuantity', + props=dict(minValue=0, maxValue=100), + ), + ) + + +# ReactionSolution class +class ReactionSolution(ArchiveSection): + m_def = Section(label='Reaction Solution') + + compounds = Quantity( + type=str, + shape=['*'], + description='List of compounds', + a_eln=ELNAnnotation(label='Compounds', component='StringEditQuantity'), + ) + + concentrations = Quantity( + type=float, + shape=['*'], + description='Concentrations of compounds', + a_eln=ELNAnnotation(label='Concentrations', component='NumberEditQuantity'), + ) + + concentrations_unit = Quantity( + type=str, + description='Unit of the concentrations', + a_eln=ELNAnnotation( + label='Concentrations Unit', component='StringEditQuantity' + ), + ) + + volume = Quantity( + type=float, + unit='L', + description='Volume of the solution', + a_eln=ELNAnnotation( + label='Volume', defaultDisplayUnit='L', component='NumberEditQuantity' + ), + ) + + temperature = Quantity( + type=float, + unit='°C', + description='Temperature of the solution', + a_eln=ELNAnnotation( + label='Temperature', defaultDisplayUnit='°C', component='NumberEditQuantity' + ), + ) + + solvent = Quantity( + type=str, + description='Solvent used', + a_eln=ELNAnnotation(label='Solvent', component='StringEditQuantity'), + ) + + +# ProcessingStep class +class ProcessingStep(ArchiveSection): + m_def = Section(label='Processing Step') + + step_name = Quantity( + type=str, + description='Name of the processing step', + a_eln=ELNAnnotation(label='Step Name', component='StringEditQuantity'), + ) + + method = Quantity( + type=str, + description='Method used in the processing step (e.g., spin-coating, dropcasting)', + a_eln=ELNAnnotation(label='Method', component='StringEditQuantity'), + ) + + atmosphere = SubSection( + section_def=ProcessingAtmosphere, + a_eln=ELNAnnotation(label='Atmosphere'), + ) + + temperature = Quantity( + type=float, + unit='°C', + description='Temperature during the step', + a_eln=ELNAnnotation( + label='Temperature', defaultDisplayUnit='°C', component='NumberEditQuantity' + ), + ) + + duration = Quantity( + type=float, + unit='s', + description='Duration of the step', + a_eln=ELNAnnotation( + label='Duration', defaultDisplayUnit='s', component='NumberEditQuantity' + ), + ) + + antisolvent = Quantity( + type=str, + description='Antisolvent used', + a_eln=ELNAnnotation(label='Antisolvent', component='StringEditQuantity'), + ) + + gas = Quantity( + type=str, + description='Gas used in the process', + a_eln=ELNAnnotation(label='Gas', component='StringEditQuantity'), + ) + + solution = SubSection( + section_def=ReactionSolution, a_eln=ELNAnnotation(label='Solution') + ) + + additional_parameters = Quantity( + type=JSON, + description='Any additional parameters specific to this processing step', + a_eln=ELNAnnotation(label='Additional Parameters'), + ) + + +# Deposition class +class Deposition(ArchiveSection): + steps = SubSection( + section_def=ProcessingStep, + repeats=True, + description='List of processing steps in order of execution. Only report conditions that have been explicitly reported.', + ) + + reviewer_additional_notes = Quantity( + type=str, + description='Any additional comments or observations', + a_eln=ELNAnnotation(label='Additional Notes', component='RichTextEditQuantity'), + ) + + additional_notes = Quantity( + type=str, description='Any additional comments or observations' + ) + + +# Layer class +class Layer(ArchiveSection): + name = Quantity( + type=str, + description='Name of the layer', + a_eln=ELNAnnotation(label='Layer Name', component='StringEditQuantity'), + ) + + thickness = Quantity( + type=float, + unit='nm', + description='Thickness of the layer', + a_eln=ELNAnnotation( + label='Thickness', + component='NumberEditQuantity', + defaultDisplayUnit='nm', + props=dict(minValue=0), + ), + ) + + functionality = Quantity( + type=MEnum( + [ + 'Hole-transport', + 'Electron-transport', + 'Contact', + 'Absorber', + 'Other', + 'Substrate', + ] + ), + description='Functionality of the layer', + a_eln=ELNAnnotation(label='Functionality', component='EnumEditQuantity'), + ) + + deposition = SubSection( + section_def=Deposition, a_eln=ELNAnnotation(label='Deposition') + ) + + +# PerovskiteSolarCell class +class LLMExtractedPerovskiteSolarCell(PublicationReference, Schema): + m_def = Section(label='LLM Extracted Perovskite Solar Cell') + + review_completed = Quantity( + type=bool, + description='True if the review of the data is completed', + default=False, + a_eln=ELNAnnotation(label='Review Completed', component='BoolEditQuantity'), + ) + + DOI_number = Quantity( + type=str, + description='DOI number of the publication', + a_eln=ELNAnnotation(label='DOI Number', component='URLEditQuantity'), + ) + + cell_stack = Quantity( + type=str, + shape=['*'], + description='The stack sequence of the cell.', + a_eln=ELNAnnotation(label='Cell Stack', component='StringEditQuantity'), + ) + + perovskite_composition = Quantity( + type=str, + description='Chemical formula of the perovskite absorber', + a_eln=ELNAnnotation( + label='Perovskite Composition', component='StringEditQuantity' + ), + ) + + device_architecture = Quantity( + type=MEnum(['pin', 'nip', 'back-contacted', 'front-contacted']), + description='Device architecture', + a_eln=ELNAnnotation(label='Device Architecture', component='EnumEditQuantity'), + ) + + pce = Quantity( + type=float, + description='Power Conversion Efficiency (PCE)', + a_eln=ELNAnnotation( + label='PCE', + component='NumberEditQuantity', + props=dict(minValue=0, maxValue=40), + ), + ) + + jsc = Quantity( + type=float, + unit='mA/cm**2', + description='Short-circuit current density (JSC)', + a_eln=ELNAnnotation( + label='JSC', defaultDisplayUnit='mA/cm**2', component='NumberEditQuantity' + ), + ) + + voc = Quantity( + type=float, + unit='V', + description='Open-circuit voltage (VOC)', + a_eln=ELNAnnotation( + label='VOC', component='NumberEditQuantity', props=dict(minValue=0) + ), + ) + + ff = Quantity( + type=float, + description='Fill Factor (FF)', + a_eln=ELNAnnotation( + label='Fill Factor', + component='NumberEditQuantity', + props=dict(minValue=0, maxValue=100), + ), + ) + + active_area = Quantity( + type=float, + unit='cm**2', + description='Reported active area of the solar cell.', + a_eln=ELNAnnotation( + label='Active Area', + component='NumberEditQuantity', + defaultDisplayUnit='cm**2', + props=dict(minValue=0), + ), + ) + + number_devices = Quantity( + type=int, + description='Number of devices over which the metrics have been averaged', + a_eln=ELNAnnotation(label='Number of Devices', component='NumberEditQuantity'), + ) + + averaged_quantities = Quantity( + type=bool, + description='True if metrics are averaged over multiple devices', + a_eln=ELNAnnotation(label='Averaged Quantities', component='BoolEditQuantity'), + ) + + light_source = SubSection( + section_def=LightSource, a_eln=ELNAnnotation(label='Light Source') + ) + + bandgap = Quantity( + type=float, + unit='eV', + description='Bandgap of the perovskite material in eV. Include this field only if the bandgap has been directly measured in the experiment.', + a_eln=ELNAnnotation( + label='Bandgap', + component='NumberEditQuantity', + props=dict(minValue=0.5, maxValue=4.0), + ), + ) + + encapsulation = Quantity( + type=str, + description='Encapsulation method, if any', + a_eln=ELNAnnotation(label='Encapsulation', component='StringEditQuantity'), + ) + + reviewer_additional_notes = Quantity( + type=str, + description='Any additional comments or observations', + a_eln=ELNAnnotation(label='Additional Notes', component='RichTextEditQuantity'), + ) + + additional_notes = Quantity( + type=str, description='Any additional comments or observations' + ) + + stability = SubSection( + section_def=Stability, a_eln=ELNAnnotation(label='Stability') + ) + + layers = SubSection( + section_def=Layer, repeats=True, a_eln=ELNAnnotation(label='Layers') + ) + + +m_package.__init_metainfo__() From d900bbc83f2d01108dbb3cc5cef9e6b51f24547b Mon Sep 17 00:00:00 2001 From: Kevin M Jablonka <32935233+kjappelbaum@users.noreply.github.com> Date: Wed, 6 Nov 2024 17:03:01 +0100 Subject: [PATCH 02/37] feat: migrate extraction schema to new version (#27) * feat: update schema to new LLM version * Added review base section which needs to be included in every section. * add additional field descriptions * Fixed typos and some other things --------- Co-authored-by: Pepe Marquez --- .DS_Store | Bin 0 -> 6148 bytes .../llm_extraction_schema.py | 179 ++++++++++++------ 2 files changed, 119 insertions(+), 60 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..29af2be25832289b77a036485b20994863661f54 GIT binary patch literal 6148 zcmeHK%}T>S5Z<-XrW7FuMUM+!3$~?*;w8lT0!H+pQWFw17_+5G&7l->)fe(jd>&_Z zH)1ho5jz9B-~8@oKgj+t#<(|+28=n3F&i2pN2Ni~-5A<1$%q`s2+MpLrZNQk-NgPn z;I~_>U@^;C@%{V5X`19kzyHBETGr-P+iu%k`_6xqrC$WaJokdx4O*8{reUoI;Z+is z6KCg4W<`)>@k|vYQ4A@!*GU%1(v$Nn%2lne19r#mOq{*NVle87;qZ9b6N{5!zb8gV zqvf(=?;aeUUQC|S=S;q8J~_~>WZPg3Z=ifF=f$68smvb0SLfGxgv0 Date: Wed, 6 Nov 2024 22:27:35 +0100 Subject: [PATCH 03/37] Draft of app for the extracted entries --- pyproject.toml | 1 + .../apps/__init__.py | 10 + .../apps/llm_extracted_solarcells.py | 215 ++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py diff --git a/pyproject.toml b/pyproject.toml index 016246d..3eec4b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,3 +137,4 @@ perovskite_composition = "perovskite_solar_cell_database:perovskite_composition" ion_parser = "perovskite_solar_cell_database:ion_parser" perovskite_ions_app = "perovskite_solar_cell_database.apps:perovskite_ions" llm_extraction_schema = "perovskite_solar_cell_database:llm_extraction_schema" +llm_extracted_solar_cells = "perovskite_solar_cell_database.apps:llm_extracted_solar_cells" \ No newline at end of file diff --git a/src/perovskite_solar_cell_database/apps/__init__.py b/src/perovskite_solar_cell_database/apps/__init__.py index 58839ef..0f646e4 100644 --- a/src/perovskite_solar_cell_database/apps/__init__.py +++ b/src/perovskite_solar_cell_database/apps/__init__.py @@ -1,6 +1,9 @@ from nomad.config.models.plugins import AppEntryPoint from perovskite_solar_cell_database.apps.perovskite_ions_app import perovskite_ions_app +from perovskite_solar_cell_database.apps.llm_extracted_solarcells import ( + llm_extracted_solar_cells, +) from perovskite_solar_cell_database.apps.solar_cell_app import solar_cell_app solar_cells = AppEntryPoint( @@ -24,3 +27,10 @@ """, app=perovskite_ions_app, ) +llm_extracted_solar_cells = AppEntryPoint( + name='LLM Extracted Solar Cells', + description=""" + This app allows you to search **LLM extracted solar cell data** within NOMAD. + """, + app=llm_extracted_solar_cells, +) diff --git a/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py b/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py new file mode 100644 index 0000000..513390a --- /dev/null +++ b/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py @@ -0,0 +1,215 @@ +import yaml +from nomad.config.models.ui import ( + App, + Column, + Columns, + FilterMenu, + FilterMenus, + Filters, +) + +llm_extracted_solar_cells = App( + # Label of the App + label='LLM Extracted Solar Cells', + # Path used in the URL, must be unique + path='llm-extracted-solar-cells', + # Used to categorize apps in the explore menu + category='LLM strcutured data extraction', + # Brief description used in the app menu + description=""" + Explore the LLM extracted solar cells. + """, + # Longer description that can also use markdown + readme=""" + Explore LLM extracted solar cells. + """, + # Controls the available search filters. If you want to filter by + # quantities in a schema package, you need to load the schema package + # explicitly here. Note that you can use a glob syntax to load the + # entire package, or just a single schema from a package. + filters=Filters( + include=[ + '*#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell', + ] + ), + # Controls which columns are shown in the results table + columns=Columns( + selected=[ + 'authors', + # 'results.material.elements', + 'entry_type', + 'data.review_completed#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell', + 'references', + # 'data.lab_id#nomad_material_processing.combinatorial.ThinFilmCombinatorialSample' + ], + options={ + 'entry_type': Column(label='Entry type', align='left'), + 'entry_name': Column(label='Name', align='left'), + 'entry_create_time': Column(label='Entry time', align='left'), + 'authors': Column(label='Authors', align='left'), + 'upload_name': Column(label='Upload name', align='left'), + 'references': Column(label='References', align='left'), + 'data.review_completed#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell': Column( + label='Review completed', align='left' + ), # noqa: E501 + 'data.publication_title#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell': Column( + label='Publication title', align='left' + ), # noqa: E501 + # 'data.lab_id#nomad_htem_database.schema_packages.htem_package.HTEMLibrary': Column( # noqa: E501 + # label='Library ID', align='left' + # ), + 'results.material.elements': Column(label='Elements', align='left'), + }, + ), + # Dictionary of search filters that are always enabled for queries made + # within this app. This is especially important to narrow down the + # results to the wanted subset. Any available search filter can be + # targeted here. This example makes sure that only entries that use + # MySchema are included. + filters_locked={ + 'entry_type': 'LLMExtractedPerovskiteSolarCell', + }, + # Controls the filter menus shown on the left + filter_menus=FilterMenus( + options={ + 'material': FilterMenu(label='Material', level=0), + 'elements': FilterMenu(label='Elements / Formula', level=1, size='xl'), + 'eln': FilterMenu(label='Electronic Lab Notebook', level=0), + 'custom_quantities': FilterMenu( + label='User Defined Quantities', level=0, size='l' + ), + 'author': FilterMenu(label='Author / Origin / Dataset', level=0, size='m'), + 'metadata': FilterMenu(label='Visibility / IDs / Schema', level=0), + 'optimade': FilterMenu(label='Optimade', level=0, size='m'), + } + ), + # Controls the default dashboard shown in the search interface + dashboard=yaml.safe_load( + """ + widgets: + - type: terms + scale: linear + search_quantity: data.journal#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell + layout: + xxl: + minH: 3 + minW: 3 + h: 9 + w: 6 + y: 0 + x: 6 + xl: + minH: 3 + minW: 3 + h: 5 + w: 6 + y: 0 + x: 6 + lg: + minH: 3 + minW: 3 + h: 9 + w: 6 + y: 0 + x: 6 + md: + minH: 3 + minW: 3 + h: 5 + w: 4 + y: 0 + x: 4 + sm: + minH: 3 + minW: 3 + h: 5 + w: 3 + y: 0 + x: 4 + - type: terms + scale: linear + search_quantity: authors.name + title: Reviewer names + layout: + xxl: + minH: 3 + minW: 3 + h: 9 + w: 6 + y: 0 + x: 0 + xl: + minH: 3 + minW: 3 + h: 5 + w: 6 + y: 0 + x: 0 + lg: + minH: 3 + minW: 3 + h: 9 + w: 6 + y: 0 + x: 0 + md: + minH: 3 + minW: 3 + h: 5 + w: 4 + y: 0 + x: 0 + sm: + minH: 3 + minW: 3 + h: 5 + w: 4 + y: 0 + x: 0 + - type: histogram + autorange: false + nbins: 30 + y: + scale: linear + x: + search_quantity: data.publication_date#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell + layout: + xxl: + minH: 3 + minW: 3 + h: 3 + w: 8 + y: 0 + x: 12 + xl: + minH: 3 + minW: 3 + h: 3 + w: 7 + y: 0 + x: 12 + lg: + minH: 3 + minW: 3 + h: 3 + w: 8 + y: 0 + x: 12 + md: + minH: 3 + minW: 3 + h: 3 + w: 7 + y: 0 + x: 8 + sm: + minH: 3 + minW: 3 + h: 3 + w: 5 + y: 0 + x: 7 + + """ + ), +) From 2e8145053c533e3f36789ef81213f9353facf6e6 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Wed, 6 Nov 2024 22:36:53 +0100 Subject: [PATCH 04/37] Fixed key in app. --- .../apps/llm_extracted_solarcells.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py b/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py index 513390a..7d28d84 100644 --- a/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py +++ b/src/perovskite_solar_cell_database/apps/llm_extracted_solarcells.py @@ -89,7 +89,7 @@ widgets: - type: terms scale: linear - search_quantity: data.journal#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell + quantity: data.journal#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell layout: xxl: minH: 3 @@ -128,7 +128,7 @@ x: 4 - type: terms scale: linear - search_quantity: authors.name + quantity: authors.name title: Reviewer names layout: xxl: @@ -172,7 +172,7 @@ y: scale: linear x: - search_quantity: data.publication_date#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell + quantity: data.publication_date#perovskite_solar_cell_database.llm_extraction_schema.LLMExtractedPerovskiteSolarCell layout: xxl: minH: 3 From c38362700d0fdbe58e48f629da394fd00f46c6a6 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Wed, 6 Nov 2024 22:56:18 +0100 Subject: [PATCH 05/37] Polishing some annotations in the ELN --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index b70fa6a..1d7dabc 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -54,7 +54,7 @@ class PerovskiteComposition(SectionRevision): dimensionality = Quantity( type=MEnum(['0D', '1D', '2D', '3D', '2D/3D']), description='Dimensionality of the perovskite structure', - a_eln=ELNAnnotation(label='Dimensionality', component='EnumEditQuantity'), + a_eln=ELNAnnotation(label='Dimensionality', component='RadioEditQuantity'), ) ions_a_site = SubSection( @@ -274,7 +274,7 @@ class ReactionSolution(SectionRevision): # ProcessingStep class class ProcessingStep(SectionRevision): - m_def = Section(label='Processing Step') + m_def = Section(label='Processing Step', label_quantity='method') step_name = Quantity( type=str, From d6c5775234975e31fa93ebffb14f687671bda8b5 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Thu, 7 Nov 2024 10:02:23 +0100 Subject: [PATCH 06/37] Added proper radio ELN component --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 1d7dabc..842f024 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -54,7 +54,7 @@ class PerovskiteComposition(SectionRevision): dimensionality = Quantity( type=MEnum(['0D', '1D', '2D', '3D', '2D/3D']), description='Dimensionality of the perovskite structure', - a_eln=ELNAnnotation(label='Dimensionality', component='RadioEditQuantity'), + a_eln=ELNAnnotation(label='Dimensionality', component='RadioEnumEditQuantity'), ) ions_a_site = SubSection( From 14314d503582d692217aec5d747f8439233aa21f Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Thu, 7 Nov 2024 10:03:32 +0100 Subject: [PATCH 07/37] fix typo in key --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 842f024..a6db5b9 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -61,7 +61,7 @@ class PerovskiteComposition(SectionRevision): section_def=Ion, repeats=True, a_eln=ELNAnnotation(label='A-site Ions') ) - b_ions_b_site = SubSection( + ions_b_site = SubSection( section_def=Ion, repeats=True, a_eln=ELNAnnotation(label='B-site Ions') ) From 0fd4a67e80bc088e280a34ae3691873ce40b6619 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Thu, 7 Nov 2024 13:59:06 +0100 Subject: [PATCH 08/37] Changed the enum of the device architecture. --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index a6db5b9..d7cc19e 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -428,7 +428,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch ) device_architecture = Quantity( - type=MEnum(['pin', 'nip', 'back-contacted', 'front-contacted']), + type=MEnum(['pin', 'nip', 'Back contacted', 'Front contacted']), description='Device architecture', a_eln=ELNAnnotation(label='Device Architecture', component='EnumEditQuantity'), ) From 6a7d6e9502175447ec88a43e37f6f76dbe255d68 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Thu, 7 Nov 2024 21:35:32 +0100 Subject: [PATCH 09/37] Unsaved fixes in pyproject --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3eec4b5..b562b1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,4 +137,4 @@ perovskite_composition = "perovskite_solar_cell_database:perovskite_composition" ion_parser = "perovskite_solar_cell_database:ion_parser" perovskite_ions_app = "perovskite_solar_cell_database.apps:perovskite_ions" llm_extraction_schema = "perovskite_solar_cell_database:llm_extraction_schema" -llm_extracted_solar_cells = "perovskite_solar_cell_database.apps:llm_extracted_solar_cells" \ No newline at end of file +llm_extracted_solar_cells = "perovskite_solar_cell_database.apps:llm_extracted_solar_cells" From 8d85166633cdf7ef57d06b9be2c4b46006ea189f Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Fri, 8 Nov 2024 09:46:20 +0100 Subject: [PATCH 10/37] Changed atmosphese from section to enum --- .../llm_extraction_schema.py | 39 +++---------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index d7cc19e..5c32235 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -209,36 +209,6 @@ class Stability(SectionRevision): ) -# ProcessingAtmosphere class -class ProcessingAtmosphere(SectionRevision): - m_def = Section(label='Processing Atmosphere') - - type = Quantity( - type=str, - description='Type of atmosphere', - a_eln=ELNAnnotation(label='Atmosphere Type', component='StringEditQuantity'), - ) - - pressure = Quantity( - type=float, - unit='mbar', - description='Pressure during processing', - a_eln=ELNAnnotation( - label='Pressure', defaultDisplayUnit='mbar', component='NumberEditQuantity' - ), - ) - - relative_humidity = Quantity( - type=float, - description='Relative humidity during processing', - a_eln=ELNAnnotation( - label='Relative Humidity', - component='NumberEditQuantity', - props=dict(minValue=0, maxValue=100), - ), - ) - - # ReactionSolution class class ReactionSolution(SectionRevision): m_def = Section(label='Reaction Solution') @@ -288,9 +258,12 @@ class ProcessingStep(SectionRevision): a_eln=ELNAnnotation(label='Method', component='StringEditQuantity'), ) - atmosphere = SubSection( - section_def=ProcessingAtmosphere, - a_eln=ELNAnnotation(label='Atmosphere'), + atmosphere = Quantity( + type=MEnum( + ['Ambient air', 'Dry air', 'Air', 'N2', 'Ar', 'He', 'H2', 'Vacuum', 'Other'] + ), + description='Atmosphere during the step', + a_eln=ELNAnnotation(label='Atmosphere', component='EnumEditQuantity'), ) temperature = Quantity( From 8405cb87ba735ee72df09d4e8577f3b42680b17d Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Fri, 8 Nov 2024 09:53:40 +0100 Subject: [PATCH 11/37] Replaced Perovskite composition --- .../llm_extraction_schema.py | 28 ++----------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 5c32235..b328bf5 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -7,6 +7,7 @@ from nomad.datamodel.metainfo.eln import ELNAnnotation from nomad.metainfo import JSON, Quantity, Section, SubSection from nomad.metainfo.metainfo import MEnum +from perovskite_solar_cell_database.composition import PerovskiteCompositionSection if TYPE_CHECKING: pass @@ -42,32 +43,9 @@ class Ion(SectionRevision): ) -class PerovskiteComposition(SectionRevision): +class PerovskiteComposition(SectionRevision, PerovskiteCompositionSection): m_def = Section(label='Perovskite Composition') - - formula = Quantity( - type=str, - description='The perovskite composition according to IUPAC recommendations, where standard abbreviations are used for all ions', - a_eln=ELNAnnotation(label='Formula', component='StringEditQuantity'), - ) - - dimensionality = Quantity( - type=MEnum(['0D', '1D', '2D', '3D', '2D/3D']), - description='Dimensionality of the perovskite structure', - a_eln=ELNAnnotation(label='Dimensionality', component='RadioEnumEditQuantity'), - ) - - ions_a_site = SubSection( - section_def=Ion, repeats=True, a_eln=ELNAnnotation(label='A-site Ions') - ) - - ions_b_site = SubSection( - section_def=Ion, repeats=True, a_eln=ELNAnnotation(label='B-site Ions') - ) - - ions_x_site = SubSection( - section_def=Ion, repeats=True, a_eln=ELNAnnotation(label='X-site Ions') - ) + pass # LightSource class From e18a5c72cfb11560368b065fa10c6c6326c55443 Mon Sep 17 00:00:00 2001 From: Pepe Marquez Date: Fri, 8 Nov 2024 10:00:42 +0100 Subject: [PATCH 12/37] Organized imports --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index b328bf5..60526c5 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -7,6 +7,7 @@ from nomad.datamodel.metainfo.eln import ELNAnnotation from nomad.metainfo import JSON, Quantity, Section, SubSection from nomad.metainfo.metainfo import MEnum + from perovskite_solar_cell_database.composition import PerovskiteCompositionSection if TYPE_CHECKING: From b9849696b870c23fb8151ca06f6cce4f7656ede8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Fri, 8 Nov 2024 12:24:45 +0100 Subject: [PATCH 13/37] Changed coefficient to str --- src/perovskite_solar_cell_database/composition.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/perovskite_solar_cell_database/composition.py b/src/perovskite_solar_cell_database/composition.py index 2be94c0..75c2b33 100644 --- a/src/perovskite_solar_cell_database/composition.py +++ b/src/perovskite_solar_cell_database/composition.py @@ -463,7 +463,7 @@ class PerovskiteIonComponent(SystemComponent, PerovskiteIonSection): ) ) coefficient = Quantity( - type=float, + type=str, description='The stoichiometric coefficient', a_eln=ELNAnnotation(component=ELNComponentEnum.NumberEditQuantity), shape=[], @@ -831,12 +831,10 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: self.short_form += ion.abbreviation if ion.coefficient is None: continue - if ion.coefficient == 1: + if ion.coefficient == '1': coefficient_str = '' - elif ion.coefficient == int(ion.coefficient): - coefficient_str = str(int(ion.coefficient)) else: - coefficient_str = f'{ion.coefficient:.2}' + coefficient_str = ion.coefficient self.long_form += f'{ion.abbreviation}{coefficient_str}' if not isinstance(ion.molecular_formula, str): continue From b379b6967db8ab2a02f2d044ac02ab2d49e36bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Fri, 8 Nov 2024 12:27:56 +0100 Subject: [PATCH 14/37] Changed descriptive formula to long form --- src/perovskite_solar_cell_database/composition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/perovskite_solar_cell_database/composition.py b/src/perovskite_solar_cell_database/composition.py index 75c2b33..31a4c66 100644 --- a/src/perovskite_solar_cell_database/composition.py +++ b/src/perovskite_solar_cell_database/composition.py @@ -860,6 +860,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: label='Perovskite Composition', description='A system describing the chemistry and components of the perovskite.', system_relation=Relation(type='root'), + chemical_formula_descriptive=self.long_form, ) parent_system.structural_type = archive.results.material.structural_type From 32a571e7f1f9ff4b701b269d22ff338906e8bd47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Fri, 8 Nov 2024 14:05:47 +0100 Subject: [PATCH 15/37] Changed edit quantity for coefficient --- src/perovskite_solar_cell_database/composition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/composition.py b/src/perovskite_solar_cell_database/composition.py index 31a4c66..ed4f002 100644 --- a/src/perovskite_solar_cell_database/composition.py +++ b/src/perovskite_solar_cell_database/composition.py @@ -465,7 +465,7 @@ class PerovskiteIonComponent(SystemComponent, PerovskiteIonSection): coefficient = Quantity( type=str, description='The stoichiometric coefficient', - a_eln=ELNAnnotation(component=ELNComponentEnum.NumberEditQuantity), + a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), shape=[], ) system = Quantity( From ab8cb7ba1140b21dd84decf909e21c06d22a1fe0 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sun, 10 Nov 2024 11:33:06 +0100 Subject: [PATCH 16/37] feat: update schema to match the pydantic model and add field for updating ordering --- .../llm_extraction_schema.py | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 60526c5..9705709 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -367,13 +367,6 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch a_eln=ELNAnnotation(label='DOI Number', component='URLEditQuantity'), ) - cell_stack = Quantity( - type=str, - shape=['*'], - description='The stack sequence of the cell.', - a_eln=ELNAnnotation(label='Cell Stack', component='StringEditQuantity'), - ) - perovskite_composition = SubSection( section_def=PerovskiteComposition, a_eln=ELNAnnotation(label='Perovskite Composition'), @@ -486,5 +479,31 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch section_def=Layer, repeats=True, a_eln=ELNAnnotation(label='Layers') ) + layer_order = Quantity( + type=str, + description='Order of the layers in the device stack. Use the layer names as they appear in the "Layers" section, separated by commas.', + a_eln=ELNAnnotation(label='Layer Order', component='StringEditQuantity'), + ) + + # normalizer that reorderes the layers according to the layer_order + def normalize(self): + if self.layer_order: + layer_order = self.layer_order.split(',') + layers = self.layers + new_layers = [] + for layer_name in layer_order: + layer_name_stripped = layer_name.strip() + for layer in layers: + if layer.name == layer_name_stripped: + self.layers.append(layer) + break + + # if the new list is not the same length as the old one + # then the are some issues with the keys and we should raise an error + if len(new_layers) != len(layers): + raise ValueError( + 'The layer order is not valid. Please check the layer names and try again.' + ) + m_package.__init_metainfo__() From 9dfdf9358415ecd933640add450c33cdcc50afe9 Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sun, 10 Nov 2024 11:36:20 +0100 Subject: [PATCH 17/37] chore: update signature of normalizer function --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 9705709..6b9a18d 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -486,7 +486,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch ) # normalizer that reorderes the layers according to the layer_order - def normalize(self): + def normalize(self, archive, logger): if self.layer_order: layer_order = self.layer_order.split(',') layers = self.layers From 0d4ad8d628f5db9b50d8874c91a8f7547feb643d Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sun, 10 Nov 2024 11:39:32 +0100 Subject: [PATCH 18/37] fix: bug in normalizer --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 6b9a18d..8e1d8ca 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -495,7 +495,7 @@ def normalize(self, archive, logger): layer_name_stripped = layer_name.strip() for layer in layers: if layer.name == layer_name_stripped: - self.layers.append(layer) + new_layers.append(layer) break # if the new list is not the same length as the old one @@ -504,6 +504,7 @@ def normalize(self, archive, logger): raise ValueError( 'The layer order is not valid. Please check the layer names and try again.' ) - + else: + self.layers = new_layers m_package.__init_metainfo__() From c238409ed692e64a387a15adf46abbdb310c377d Mon Sep 17 00:00:00 2001 From: Kevin Maik Jablonka Date: Sun, 10 Nov 2024 11:41:52 +0100 Subject: [PATCH 19/37] chore: use better code from sourcery --- .../llm_extraction_schema.py | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 8e1d8ca..f05d4f2 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -487,24 +487,17 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch # normalizer that reorderes the layers according to the layer_order def normalize(self, archive, logger): - if self.layer_order: - layer_order = self.layer_order.split(',') - layers = self.layers - new_layers = [] - for layer_name in layer_order: - layer_name_stripped = layer_name.strip() - for layer in layers: - if layer.name == layer_name_stripped: - new_layers.append(layer) - break - - # if the new list is not the same length as the old one - # then the are some issues with the keys and we should raise an error - if len(new_layers) != len(layers): - raise ValueError( - 'The layer order is not valid. Please check the layer names and try again.' - ) - else: - self.layers = new_layers + if not self.layer_order: + return + + layer_dict = {layer.name: layer for layer in self.layers} + ordered_names = [name.strip() for name in self.layer_order.split(',')] + + if set(ordered_names) != set(layer_dict.keys()): + raise ValueError('Layer order does not match available layers') + + # Reorder in single pass + self.layers = [layer_dict[name] for name in ordered_names] + m_package.__init_metainfo__() From 43ed10f27f4bc0adf0b43b98a200b0059d9d65d8 Mon Sep 17 00:00:00 2001 From: Kevin M Jablonka <32935233+kjappelbaum@users.noreply.github.com> Date: Mon, 11 Nov 2024 08:25:06 +0100 Subject: [PATCH 20/37] Update src/perovskite_solar_cell_database/llm_extraction_schema.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hampus Näsström --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index f05d4f2..4835cb0 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -494,7 +494,8 @@ def normalize(self, archive, logger): ordered_names = [name.strip() for name in self.layer_order.split(',')] if set(ordered_names) != set(layer_dict.keys()): - raise ValueError('Layer order does not match available layers') + logger.warn('The names in layer_order does not match available layers') + return # Reorder in single pass self.layers = [layer_dict[name] for name in ordered_names] From bdea31565b9991937484b60ebfc69f4bcabc8d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Wed, 13 Nov 2024 15:24:49 +0100 Subject: [PATCH 21/37] Added missing super normalize call and corrected default display unit --- src/perovskite_solar_cell_database/composition.py | 2 +- src/perovskite_solar_cell_database/llm_extraction_schema.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/composition.py b/src/perovskite_solar_cell_database/composition.py index ed4f002..0b8dc94 100644 --- a/src/perovskite_solar_cell_database/composition.py +++ b/src/perovskite_solar_cell_database/composition.py @@ -692,7 +692,7 @@ class Impurity(PureSubstanceComponent, PerovskiteChemicalSection): type=float, description='The concentration of the additive or impurity.', a_eln=ELNAnnotation( - component=ELNComponentEnum.NumberEditQuantity, defaultDisplayUnit='mol%' + component=ELNComponentEnum.NumberEditQuantity, defaultDisplayUnit='cm^-3' ), unit='cm^-3', shape=[], diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 4835cb0..16a65cc 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -499,6 +499,7 @@ def normalize(self, archive, logger): # Reorder in single pass self.layers = [layer_dict[name] for name in ordered_names] + super().normalize(archive, logger) m_package.__init_metainfo__() From 80c0d87f3b31993b66e8191101f2ab6b48c3d04e Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Fri, 15 Nov 2024 14:34:32 +0100 Subject: [PATCH 22/37] Implements all feedback from Jesper --- .../llm_extraction_schema.py | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 16a65cc..f3e65c1 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -63,6 +63,7 @@ class LightSource(SectionRevision): 'White LED', 'Other', 'Outdoor', + '' ] ), description='Type of light source', @@ -110,7 +111,7 @@ class Solute(SectionRevision): ) concentration_unit = Quantity( - type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'wt%', 'vol%', 'M']), + type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'wt%', 'vol%', 'M', '']), description='Unit of concentration', a_eln=ELNAnnotation(label='Concentration Unit', component='EnumEditQuantity'), ) @@ -187,6 +188,30 @@ class Stability(SectionRevision): a_eln=ELNAnnotation(label='PCE at End', component='NumberEditQuantity'), ) + potential_bias = Quantity( + type=MEnum( + ['Open circuit', 'MPPT', 'Constant potential', 'Constant current', 'Constant resistance', ''] + ), + description='Potential bias during stability test', + a_eln=ELNAnnotation(label='Potential Bias', component='EnumEditQuantity'), + ) + + + +class Solvent(SectionRevision): + m_def = Section(label='Solvent') + + name = Quantity( + type=str, + description='Name of the solvent', + a_eln=ELNAnnotation(label='Name', component='StringEditQuantity'), + ) + + ratio = Quantity( + type=float, + description='Ratio of this solvent with respect to others - (0-1)', + a_eln=ELNAnnotation(label='Concentration', component='NumberEditQuantity'), + ) # ReactionSolution class class ReactionSolution(SectionRevision): @@ -214,10 +239,8 @@ class ReactionSolution(SectionRevision): ), ) - solvent = Quantity( - type=str, - description='Solvent used', - a_eln=ELNAnnotation(label='Solvent', component='StringEditQuantity'), + solvents = SubSection( + section_def=Solvent, repeats=True, a_eln=ELNAnnotation(label='Solvents') ) @@ -239,7 +262,7 @@ class ProcessingStep(SectionRevision): atmosphere = Quantity( type=MEnum( - ['Ambient air', 'Dry air', 'Air', 'N2', 'Ar', 'He', 'H2', 'Vacuum', 'Other'] + ['Ambient air', 'Dry air', 'Air', 'N2', 'Ar', 'He', 'H2', 'Vacuum', 'Other', ''] ), description='Atmosphere during the step', a_eln=ELNAnnotation(label='Atmosphere', component='EnumEditQuantity'), @@ -269,6 +292,12 @@ class ProcessingStep(SectionRevision): a_eln=ELNAnnotation(label='Gas Quenching', component='BoolEditQuantity'), ) + antisolvent_quenching = Quantity( + type=bool, + description='Whether antisolvent quenching was used', + a_eln=ELNAnnotation(label='Antisolvent Quenching', component='BoolEditQuantity'), + ) + solution = SubSection( section_def=ReactionSolution, a_eln=ELNAnnotation(label='Solution') ) @@ -328,6 +357,7 @@ class Layer(SectionRevision): 'Absorber', 'Other', 'Substrate', + '' ] ), description='Functionality of the layer', @@ -373,7 +403,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch ) device_architecture = Quantity( - type=MEnum(['pin', 'nip', 'Back contacted', 'Front contacted']), + type=MEnum(['pin', 'nip', 'Back contacted', 'Front contacted', 'Other', '']), description='Device architecture', a_eln=ELNAnnotation(label='Device Architecture', component='EnumEditQuantity'), ) From 41143603d7a16b63f8468873415c50f701cde669 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Fri, 15 Nov 2024 14:39:31 +0100 Subject: [PATCH 23/37] ruff --- .../llm_extraction_schema.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index f3e65c1..2384d9f 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -63,7 +63,7 @@ class LightSource(SectionRevision): 'White LED', 'Other', 'Outdoor', - '' + '', ] ), description='Type of light source', @@ -190,14 +190,20 @@ class Stability(SectionRevision): potential_bias = Quantity( type=MEnum( - ['Open circuit', 'MPPT', 'Constant potential', 'Constant current', 'Constant resistance', ''] + [ + 'Open circuit', + 'MPPT', + 'Constant potential', + 'Constant current', + 'Constant resistance', + '', + ] ), description='Potential bias during stability test', a_eln=ELNAnnotation(label='Potential Bias', component='EnumEditQuantity'), ) - class Solvent(SectionRevision): m_def = Section(label='Solvent') @@ -213,6 +219,7 @@ class Solvent(SectionRevision): a_eln=ELNAnnotation(label='Concentration', component='NumberEditQuantity'), ) + # ReactionSolution class class ReactionSolution(SectionRevision): m_def = Section(label='Reaction Solution') @@ -262,7 +269,18 @@ class ProcessingStep(SectionRevision): atmosphere = Quantity( type=MEnum( - ['Ambient air', 'Dry air', 'Air', 'N2', 'Ar', 'He', 'H2', 'Vacuum', 'Other', ''] + [ + 'Ambient air', + 'Dry air', + 'Air', + 'N2', + 'Ar', + 'He', + 'H2', + 'Vacuum', + 'Other', + '', + ] ), description='Atmosphere during the step', a_eln=ELNAnnotation(label='Atmosphere', component='EnumEditQuantity'), @@ -295,7 +313,9 @@ class ProcessingStep(SectionRevision): antisolvent_quenching = Quantity( type=bool, description='Whether antisolvent quenching was used', - a_eln=ELNAnnotation(label='Antisolvent Quenching', component='BoolEditQuantity'), + a_eln=ELNAnnotation( + label='Antisolvent Quenching', component='BoolEditQuantity' + ), ) solution = SubSection( @@ -357,7 +377,7 @@ class Layer(SectionRevision): 'Absorber', 'Other', 'Substrate', - '' + '', ] ), description='Functionality of the layer', From a3b39235a6caf24b6ebc00a1a51067853efbe427 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Fri, 15 Nov 2024 15:21:20 +0100 Subject: [PATCH 24/37] Changed empty strings to Unknown and changed solvent ratio to volume_fraction --- .../llm_extraction_schema.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 2384d9f..0ce14b1 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -63,7 +63,7 @@ class LightSource(SectionRevision): 'White LED', 'Other', 'Outdoor', - '', + 'Unknown', ] ), description='Type of light source', @@ -111,7 +111,7 @@ class Solute(SectionRevision): ) concentration_unit = Quantity( - type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'wt%', 'vol%', 'M', '']), + type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'wt%', 'vol%', 'M', 'Unknown']), description='Unit of concentration', a_eln=ELNAnnotation(label='Concentration Unit', component='EnumEditQuantity'), ) @@ -196,7 +196,7 @@ class Stability(SectionRevision): 'Constant potential', 'Constant current', 'Constant resistance', - '', + 'Unknown', ] ), description='Potential bias during stability test', @@ -213,9 +213,9 @@ class Solvent(SectionRevision): a_eln=ELNAnnotation(label='Name', component='StringEditQuantity'), ) - ratio = Quantity( + volume_fraction = Quantity( type=float, - description='Ratio of this solvent with respect to others - (0-1)', + description='The volume fraction of the solvent with respect to the other solvents in the solution', a_eln=ELNAnnotation(label='Concentration', component='NumberEditQuantity'), ) @@ -279,7 +279,7 @@ class ProcessingStep(SectionRevision): 'H2', 'Vacuum', 'Other', - '', + 'Unknown', ] ), description='Atmosphere during the step', @@ -377,7 +377,7 @@ class Layer(SectionRevision): 'Absorber', 'Other', 'Substrate', - '', + 'Unknown', ] ), description='Functionality of the layer', @@ -423,7 +423,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch ) device_architecture = Quantity( - type=MEnum(['pin', 'nip', 'Back contacted', 'Front contacted', 'Other', '']), + type=MEnum(['pin', 'nip', 'Back contacted', 'Front contacted', 'Other', 'Unknown']), description='Device architecture', a_eln=ELNAnnotation(label='Device Architecture', component='EnumEditQuantity'), ) From 90a152a398f191fe61398cb2731352d7deae7c70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Fri, 15 Nov 2024 15:23:37 +0100 Subject: [PATCH 25/37] Ruff --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 0ce14b1..143bf19 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -423,7 +423,9 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch ) device_architecture = Quantity( - type=MEnum(['pin', 'nip', 'Back contacted', 'Front contacted', 'Other', 'Unknown']), + type=MEnum( + ['pin', 'nip', 'Back contacted', 'Front contacted', 'Other', 'Unknown'] + ), description='Device architecture', a_eln=ELNAnnotation(label='Device Architecture', component='EnumEditQuantity'), ) From d56b9bbe1aeb71a2c011068c79c5ce991e0d3b03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Mon, 18 Nov 2024 09:03:04 +0100 Subject: [PATCH 26/37] Remove gas and antisolvent quenching quantities from ProcessingStep --- .../llm_extraction_schema.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 143bf19..172b65d 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -304,20 +304,6 @@ class ProcessingStep(SectionRevision): ), ) - gas_quenching = Quantity( - type=bool, - description='Whether gas quenching was used', - a_eln=ELNAnnotation(label='Gas Quenching', component='BoolEditQuantity'), - ) - - antisolvent_quenching = Quantity( - type=bool, - description='Whether antisolvent quenching was used', - a_eln=ELNAnnotation( - label='Antisolvent Quenching', component='BoolEditQuantity' - ), - ) - solution = SubSection( section_def=ReactionSolution, a_eln=ELNAnnotation(label='Solution') ) From 6dba7cddb001cb8aa9c17cb5bca30c3148ff1db1 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 18 Nov 2024 11:00:06 +0100 Subject: [PATCH 27/37] Fixes label for volume fraction --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 172b65d..fd20cbf 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -216,7 +216,7 @@ class Solvent(SectionRevision): volume_fraction = Quantity( type=float, description='The volume fraction of the solvent with respect to the other solvents in the solution', - a_eln=ELNAnnotation(label='Concentration', component='NumberEditQuantity'), + a_eln=ELNAnnotation(label='Volume Fraction', component='NumberEditQuantity'), ) From d144f3fc424713c8cdaebf9c539ebec73a593ebc Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 18 Nov 2024 13:25:59 +0100 Subject: [PATCH 28/37] Adds hints for some appropriate fields that might require a quick check --- .../llm_extraction_schema.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index fd20cbf..804a465 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -142,7 +142,7 @@ class Stability(SectionRevision): humidity = Quantity( type=float, - description='Relative humidity during stability test', + description='The ambient humidity as a percentage without the % sign (i.e. a value between 0 and 100). When measurements are done in an inert atmosphere, this should be 0. If the humidity is fluctuating, use the average value.', a_eln=ELNAnnotation( label='Humidity', component='NumberEditQuantity', @@ -170,13 +170,13 @@ class Stability(SectionRevision): PCE_at_start = Quantity( type=float, - description='PCE at the start of the experiment', + description='PCE at the start of the experiment as a percentage without the % sign.', a_eln=ELNAnnotation(label='PCE at Start', component='NumberEditQuantity'), ) PCE_after_1000_hours = Quantity( type=float, - description='PCE after 1000 hours', + description='PCE after 1000 hours as a percentage without the % sign.', a_eln=ELNAnnotation( label='PCE after 1000 Hours', component='NumberEditQuantity' ), @@ -184,7 +184,7 @@ class Stability(SectionRevision): PCE_at_end = Quantity( type=float, - description='PCE at the end of the experiment', + description='PCE at the end of the experiment as a percentage without the % sign.', a_eln=ELNAnnotation(label='PCE at End', component='NumberEditQuantity'), ) @@ -289,7 +289,7 @@ class ProcessingStep(SectionRevision): temperature = Quantity( type=float, unit='°C', - description='Temperature during the step', + description='The temperature during the deposition step. Depending on the circumstances the most relevant temperature could be either the ambient temperature, the substrate temperature, or the solution temperature.', a_eln=ELNAnnotation( label='Temperature', defaultDisplayUnit='°C', component='NumberEditQuantity' ), @@ -418,7 +418,8 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch pce = Quantity( type=float, - description='Power Conversion Efficiency (PCE)', + description="""This is the device efficiency in %. Make sure to convert it to a percentage if it's given as a fraction before reporting. +Sometimes several different PCE values are presented for the same device. It could be a stabilized efficiency, a value extracted from a reversed JV scan, a value extracted from a forward JV scan. Only state one value. If several values are present for the device The priority is: Stabilized values is preferred before JV data from the reverse scan which is preferred before JV values from the forward scan.""", a_eln=ELNAnnotation( label='PCE', component='NumberEditQuantity', @@ -446,7 +447,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch ff = Quantity( type=float, - description='Mostly the Fill factor is given as a percentage (%). In case is not make sure to convert it from ratio to percentage.', + description='This field requires the fill factor as a percentage without the % sign. If the fill factor is given as a fraction, e.g. 0.2, convert it to and write it as 20 without any percentage sign (%).', a_eln=ELNAnnotation( label='Fill Factor', component='NumberEditQuantity', @@ -519,7 +520,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch layer_order = Quantity( type=str, - description='Order of the layers in the device stack. Use the layer names as they appear in the "Layers" section, separated by commas.', + description='Order of the layers in the device stack. Use the layer names as they appear in the "Layers" section, separated by commas. If you want to add a missing layer, please add it first to the Layers section below. Then make sure to add the name of the layer, as you list it below, in this field in the right order. When you hit save on the top right, the correct order will be set on the layers in the Layers section below.', a_eln=ELNAnnotation(label='Layer Order', component='StringEditQuantity'), ) From 3b2e58143154a796dc292495f9e0765a00cc3003 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Tue, 19 Nov 2024 16:10:41 +0100 Subject: [PATCH 29/37] ruff isort --- src/perovskite_solar_cell_database/apps/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/apps/__init__.py b/src/perovskite_solar_cell_database/apps/__init__.py index 0f646e4..7cf3bc7 100644 --- a/src/perovskite_solar_cell_database/apps/__init__.py +++ b/src/perovskite_solar_cell_database/apps/__init__.py @@ -1,9 +1,9 @@ from nomad.config.models.plugins import AppEntryPoint -from perovskite_solar_cell_database.apps.perovskite_ions_app import perovskite_ions_app from perovskite_solar_cell_database.apps.llm_extracted_solarcells import ( llm_extracted_solar_cells, ) +from perovskite_solar_cell_database.apps.perovskite_ions_app import perovskite_ions_app from perovskite_solar_cell_database.apps.solar_cell_app import solar_cell_app solar_cells = AppEntryPoint( From d6f0be755cde6131f3c1b9555006d0e7a16c9a7e Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Fri, 22 Nov 2024 15:21:53 +0100 Subject: [PATCH 30/37] Removes bandgap at the top level --- .../llm_extraction_schema.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 804a465..3bd60b7 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -483,17 +483,6 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch section_def=LightSource, a_eln=ELNAnnotation(label='Light Source') ) - bandgap = Quantity( - type=float, - unit='eV', - description='Bandgap of the perovskite material in eV. Include this field only if the bandgap has been directly measured in the experiment.', - a_eln=ELNAnnotation( - label='Bandgap', - component='NumberEditQuantity', - props=dict(minValue=0.5, maxValue=4.0), - ), - ) - encapsulated = Quantity( type=bool, description='True if the device is encapsulated', From aa12679b270c389897f722768667b0f97c97bed6 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 25 Nov 2024 13:39:08 +0100 Subject: [PATCH 31/37] Adds Evaporation to ProcessingStep Method --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 3bd60b7..aa16629 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -263,7 +263,7 @@ class ProcessingStep(SectionRevision): method = Quantity( type=str, - description='This is the method for the processing of steps in the design of the cells. Some examples are: Spin-coating, Drop-infiltration, Co-evaporation, Doctor blading, Spray coating, Slot-die coating, Ultrasonic spray, Dropcasting, Inkjet printing, Electrospraying, Thermal-annealing, Antisolvent-quenching', + description='This is the method for the processing of steps in the design of the cells. Some examples are: Spin-coating, Drop-infiltration, Evaporation, Co-evaporation, Doctor blading, Spray coating, Slot-die coating, Ultrasonic spray, Dropcasting, Inkjet printing, Electrospraying, Thermal-annealing, Antisolvent-quenching', a_eln=ELNAnnotation(label='Method', component='StringEditQuantity'), ) From ca4221c4d50e13b8969e0f13e763df9acc6fd5a3 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 25 Nov 2024 15:32:32 +0100 Subject: [PATCH 32/37] Updates the description for ReactionSolution Volume --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index aa16629..267b2d1 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -231,7 +231,7 @@ class ReactionSolution(SectionRevision): volume = Quantity( type=float, unit='L', - description='Volume of the solution', + description='This volume is the volume of solution used in the experiment, e.g. the solvent volume that is spin-coated rather than the volume of the stock solution.', a_eln=ELNAnnotation( label='Volume', defaultDisplayUnit='L', component='NumberEditQuantity' ), From 489e22c6e74cf804032652077fa292f3ed77926d Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Thu, 28 Nov 2024 14:20:24 +0100 Subject: [PATCH 33/37] Fixes entry_type --- .../llm_extraction_schema.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 267b2d1..437d532 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -515,18 +515,16 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch # normalizer that reorderes the layers according to the layer_order def normalize(self, archive, logger): - if not self.layer_order: - return + if self.layer_order: + layer_dict = {layer.name: layer for layer in self.layers} + ordered_names = [name.strip() for name in self.layer_order.split(',')] - layer_dict = {layer.name: layer for layer in self.layers} - ordered_names = [name.strip() for name in self.layer_order.split(',')] + if set(ordered_names) != set(layer_dict.keys()): + logger.warn('The names in layer_order does not match available layers') + return - if set(ordered_names) != set(layer_dict.keys()): - logger.warn('The names in layer_order does not match available layers') - return - - # Reorder in single pass - self.layers = [layer_dict[name] for name in ordered_names] + # Reorder in single pass + self.layers = [layer_dict[name] for name in ordered_names] super().normalize(archive, logger) From 5a748736f3a810535e2bfc9b8d5bf1d327b6623b Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 2 Dec 2024 13:33:13 +0100 Subject: [PATCH 34/37] Adds mg/mL unit --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index 437d532..ceb2eef 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -111,7 +111,7 @@ class Solute(SectionRevision): ) concentration_unit = Quantity( - type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'wt%', 'vol%', 'M', 'Unknown']), + type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'mg/mL', 'wt%', 'vol%', 'M', 'Unknown']), description='Unit of concentration', a_eln=ELNAnnotation(label='Concentration Unit', component='EnumEditQuantity'), ) From bbf2608116b37f2b6a60f873e187383943c64666 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Mon, 2 Dec 2024 13:39:43 +0100 Subject: [PATCH 35/37] Ruff --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index ceb2eef..c6925de 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -111,7 +111,9 @@ class Solute(SectionRevision): ) concentration_unit = Quantity( - type=MEnum(['mol/L', 'mmol/L', 'g/L', 'mg/L', 'mg/mL', 'wt%', 'vol%', 'M', 'Unknown']), + type=MEnum( + ['mol/L', 'mmol/L', 'g/L', 'mg/L', 'mg/mL', 'wt%', 'vol%', 'M', 'Unknown'] + ), description='Unit of concentration', a_eln=ELNAnnotation(label='Concentration Unit', component='EnumEditQuantity'), ) From dfabf121bcbd8f1f0ca54ba27eb9cf6b47366b76 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Fri, 6 Dec 2024 16:40:27 +0100 Subject: [PATCH 36/37] Adds lightsource fields to stability section --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index c6925de..e5c2c6c 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -120,7 +120,7 @@ class Solute(SectionRevision): # Stability class -class Stability(SectionRevision): +class Stability(LightSource): time = Quantity( type=float, unit='hour', From 4b10f9927fb51d5779a386727867c7542c54f3f7 Mon Sep 17 00:00:00 2001 From: Sherjeel Shabih Date: Thu, 19 Dec 2024 13:32:30 +0100 Subject: [PATCH 37/37] Moves up super()normalizer to get EntryType --- src/perovskite_solar_cell_database/llm_extraction_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/perovskite_solar_cell_database/llm_extraction_schema.py b/src/perovskite_solar_cell_database/llm_extraction_schema.py index e5c2c6c..6df3e9e 100644 --- a/src/perovskite_solar_cell_database/llm_extraction_schema.py +++ b/src/perovskite_solar_cell_database/llm_extraction_schema.py @@ -517,6 +517,7 @@ class LLMExtractedPerovskiteSolarCell(PublicationReference, SectionRevision, Sch # normalizer that reorderes the layers according to the layer_order def normalize(self, archive, logger): + super().normalize(archive, logger) if self.layer_order: layer_dict = {layer.name: layer for layer in self.layers} ordered_names = [name.strip() for name in self.layer_order.split(',')] @@ -527,7 +528,6 @@ def normalize(self, archive, logger): # Reorder in single pass self.layers = [layer_dict[name] for name in ordered_names] - super().normalize(archive, logger) m_package.__init_metainfo__()