Skip to content

Commit

Permalink
refine_schema
Browse files Browse the repository at this point in the history
  • Loading branch information
fekad committed Oct 15, 2024
1 parent b203f8e commit d5cff0a
Show file tree
Hide file tree
Showing 31 changed files with 233 additions and 64 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ This `nomad`_ plugin was generated with `Cookiecutter`_ along with `@nomad`_'s `
### Install

You should create a virtual environment. You will need the `nomad-lab` package (and `pytest`).
We recommend using Python 3.9.
We recommend using Python 3.11.

```sh
python3 -m venv .pyenv
python -m venv .pyenv
source .pyenv/bin/activate
pip install --upgrade pip
pip install -e '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple
Expand Down
2 changes: 1 addition & 1 deletion notebooks/arise.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "ARISE - Robust recognition and exploratory analysis of crystal structures via Bayesian deep learning",
"description": "In this tutorial, we give an introduction to ARISE (ARtificial-Intelligence-based Structure Evaluation), a powerful Bayesian-deep-neural-network tool for the recognition of atomistic structures (A. Leitherer, A. Ziletti, and L.M. Ghiringhelli, Nat. Commun. 12, 6234, 2021). ARISE is robust to structural noise and can treat more than 100 crystal structures, a number that can be extended on demand. While being trained on ideal structures only, ARISE correctly characterizes strongly perturbed single- and polycrystalline systems, from both synthetic and experimental resources. The probabilistic nature of the Bayesian-deep-learning model allows to obtain principled uncertainty estimates. By applying unsupervised learning to the internal neural-network representations, one can reveal grain boundaries and (unapparent) structural regions sharing easily interpretable geometrical properties.",
"date": "2021-03-22",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/atomic-features.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Atomic-features-package usage demonstration",
"description": "In this tutorial, we show how the atomic-features-package can be accessed and used to explore the atomic features form various sources and to prepare the input features for machine-learning studies.",
"date": "2021-12-07",
"category": "query_tutorial",
"category": "Query tutorial",
"methods": [
{
"name": ""
Expand Down
2 changes: 1 addition & 1 deletion notebooks/clustering-tutorial.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to clustering",
"description": "In this tutorial, we introduce to the most popular clustering algorithms. We focus on partitioning, hierarchical and density-based clustering algorithms. The methods are tested on synthetic datasets of increasing complexity",
"date": "2021-01-21",
"category": "beginner_tutorial",
"category": "Beginner tutorial",
"methods": [
{
"name": "Unsupervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/cmlkit.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "cmlkit: Toolkit for Machine Learning in Materials Science and Quantum Chemistry",
"description": "In this tutorial, we will get to know cmlkit, a python package for specifying, evaluating, and optimising machine learning models, and use it to compete in the Nomad 2018 Kaggle challenge.",
"date": "2021-01-14",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/co2-sgd-tutorial.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Subgroup discovery of catalysts\u2019 genes for carbon-dioxide activation on semiconductor oxides",
"description": "In this interactive tutorial we show the application of subgroup discovery for the search for indicators of carbond-dioxide activation with the aim of its further conversion.",
"date": "2021-08-26",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Subgroup discovery"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/compressed-sensing.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Symbolic regression via compressed sensing: a tutorial",
"description": "In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.",
"date": "2020-09-20",
"category": "beginner_tutorial",
"category": "Beginner tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/convolutional-nn.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to convolutional neural networks",
"description": "In this tutorial, we briefly introduce the main ideas behind convolutional neural networks, build a neural network model with Keras, and explain the classification decision process using attentive response maps.",
"date": "2021-01-29",
"category": "intermediate_tutorial",
"category": "Intermediate tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/decision-tree.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to decision-trees methods",
"description": "In this tutorial we will introduce decision trees. We go through a toy model introducing the SKLearn API. We then discuss step by step the different theoretical aspects of trees. We then move to training a regression tree and classification tree on different datasets related to materials science. We end the tutorial by covering random forests and bagging classfiers.",
"date": "2020-12-08",
"category": "beginner_tutorial",
"category": "Beginner tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/descriptor-role.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Predicting energy differences between crystal structures: (Meta-)stability of octet-binary compounds",
"description": "A tool for predicting the difference in the total energy between different polymorphs for 82 octet binary compounds, which gives an indication of the stability of the material. This is accomplished by identifying a set of descriptive parameters (a descriptor) from the free-atom data for the binary atomic species comprising the material using the Sure Independent Screening (SIS) + l0-norm minimization approach.",
"date": "2021-10-18",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/domain-of-applicability.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Identifying domains of applicability of machine-Learning models for materials science",
"description": "In this tutorial, we present a method, based on subgroup discovery, for detecting domains of applicability (DA) of ML models within a materials class. The domain of applicability of an ML model is the region of input space where the model predicts the target property with the smallest uncertainty. The utility of this approach is demonstrated by analyzing three state-of-the-art ML models for predicting the formation energy of transparent conducting oxides.",
"date": "2021-01-27",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/dos-similarity-search.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Electronic density-of-states similarity search",
"description": "This notebook shows how to compute the similarity of materials in terms of their electronic density-of-states (DOS), from data retrieved from the NOMAD Archive.",
"date": "2022-03-30",
"category": "intermediate_tutorial",
"category": "Intermediate tutorial",
"methods": [
{
"name": "Similarity search"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/error-estimates.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Error estimates from high-accuracy electronic-structure reference calculations",
"description": "A set of tools to analyze the error in electronic structure calculations due to the choice of numerical settings. We use the NOMAD infrastructure to systematically investigate the deviances in total and relative energies as function of typical settings for basis sets, k-grids, etc. for 71 elemental and 81 binary solids in three different electronic-structure codes.",
"date": "2021-01-21",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/exploratory-analysis.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to exploratory analysis (unsupervised learning) of materials spaces",
"description": "Exploratory analyses make use of unsupervised learning techniques to extract information from unknown datasets. In this tutorial, we make use of some of the most popular clustering and dimension reduction algorithms to analyze a dataset composed of 82 octet-binary compounds.",
"date": "2021-02-04",
"category": "beginner_tutorial",
"category": "Beginner tutorial",
"methods": [
{
"name": "Clustering"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/gap-si-surface.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "The SOAP descriptor, Gaussian Approximation Potentials (GAP) and machine learning of force fields",
"description": "In this tutorial, we will be using a Gaussian Approximation Potentials to analyse results of TB DFT calculations on the Si surface. Along the way we will learn about different descriptors (2b, 3b, SOAP) to describe local atomic environment in order to predict energies and forces of the Si surface.",
"date": "2020-06-18",
"category": "intermediate_tutorial",
"category": "Intermediate tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/grain-boundaries.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Structure similarity and structure-property relationship: grain boundaries of alpha-Fe",
"description": "In this tutorial, we will be using a machine-learning method (clustering) to analyse results of grain-boundary (GB) calculations of alpha-iron. Along the way, we will learn about different methods to describe local atomic environment in order to calculate properties of GBs. We will use these properties to separate the different regions of the GB using clustering methods. Finally we will determine how the energy of the GB is changing according to the angle difference of the regions.",
"date": "2020-01-18",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Unsupervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/hierarchical-sisso.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Hierarchical symbolic regression for identifying key physical parameters correlated with materials properties",
"description": "In this notebook, we describe a hierarchical symbolic-regression approach for finding, based on data, analytical expressions relating materials properties to simpler physicochemical parameters associated with the underlying processes governing the properties.",
"date": "2022-8-3",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/kaggle-competition.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "2018 NOMAD-Kaggle research competition",
"description": "In this tutorial, we will explore the best results of the NOMAD 2018 Kaggle research competition. The goal of this competition was to develop machine-learning models for the prediction of two target properties: the formation energy and the bandgap energy of transparent semiconducting oxides. The purpose of the modelling is to facilitate the discovery of new such materials and allow for advancements in (opto)electronic technologies",
"date": "2021-01-19",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/krr4mat.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to kernel ridge regression for materials-property prediction",
"description": "In this tutorial, we will explore the application of kernel ridge regression to the prediction of materials properties. We will begin with a largely informal, pragmatic introduction to kernel ridge regression, including a rudimentary implementation, in order to become familiar with the basic terminology and considerations. We will then discuss representations, and re-trace the NOMAD 2018 Kaggle challenge.",
"date": "2020-12-15",
"category": "beginner_tutorial",
"category": "Beginner tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/nn-regression.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to multilayer perceptrons (deep neural networks)",
"description": "In this tutorial, we discuss how multilayer perceptrons, a standard neural-network architecture, can be employed for regression tasks. Specifically, we will use the ElemNet neural-network architecture to predict the volume per atom of inorganic compounds, where the Open Quantum Materials Database (OQMD) is used as a resource.",
"date": "2021-01-29",
"category": "beginner_tutorial",
"category": "Beginner tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/perovskite-tolerance-factor.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Finding a tolerance factor to predict perovskite stability with SISSO",
"description": "This tutorial shows how a tolerance factor for predicting perovskite stability can be learned from data with the sure-independece-screening-and-sparsifying-operator (SISSO) descriptor-identification approach.",
"date": "2022-05-18",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/query-nomad-archive.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Querying the NOMAD Archive and performing artificial-intelligence modeling",
"description": "In this tutorial, we demonstrate how to query the NOMAD Archive from the NOMAD Analytics toolkit. We then show examples of machine learning analysis performed on the retrieved data set.",
"date": "2022-04-06",
"category": "query_tutorial",
"category": "Query tutorial",
"methods": [
{
"name": "Unsupervised learning"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to subgroup discovery: Identifying outstanding transition-metal-alloy catalysts",
"description": "This tutorial introduces, by means of two applications in materials science, the artificial-intelligence technique subgroup discovery.",
"date": "2021-10-28",
"category": "intermediate_tutorial",
"category": "Intermediate tutorial",
"methods": [
{
"name": "Subgroup discovery"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/sgd-propylene-oxidation-hte.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Learning Design Rules for Catalysts from High-Throughput Experimentation and Theory via Subgroup Discovery",
"description": "This tutorial explores the application of subgroup discovery (SGD) to an experimental-theoretical data set in order to identify rules on key physicochemical parameters that describe the materials and environmental conditions associated with outstanding performance in heterogeneous catalysis.",
"date": "2022-2-09",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Subgroup discovery"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/soap-atomic-charges.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Machine learning atomic charges",
"description": "In this tutorial, we will use Gaussian process regression, GPR (or equivalently, Kernel Ridge Regression, KRR) to train and predict charges on atoms in small organic molecules.",
"date": "2019-09-26",
"category": "intermediate_tutorial",
"category": "Intermediate tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/tcmi.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Introduction to total cumulative mutual information",
"description": "This interactive notebook introduces the concepts and original implementation of total cumulative mutual information (TCMI), as presented in the related publication. The main results of the publication are also reproduced in a hands-on style",
"date": "2020-02-06",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/tetradymite-PRM2020.archive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "Discovery of new topological insulators in alloyed tetradymites",
"description": "Learn how to find descriptive parameters (short formulas) that predict whether alloyed materials are topological or trivial insulators, using the example of tetradymites. This notebook is based on the algorithm 'sure independence screening and sparsifying operator' (SISSO) that enables to search for optimal descriptor by scanning huge feature spaces.",
"date": "2020-09-15",
"category": "advanced_tutorial",
"category": "Advanced tutorial",
"methods": [
{
"name": "Supervised learning"
Expand Down
209 changes: 187 additions & 22 deletions notebooks/tutorial_stats.ipynb

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ classifiers = [
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: Apache Software License",
]
name = "nomad-aitoolkit"
name = "nomad-schema-plugin-ai-toolkit"
description = "Schema and app for AI Toolkit notebooks."
version = "0.1.1"
version = "0.2.0"
readme = "README.rst"
requires-python = ">=3.9"
authors = [
Expand All @@ -28,7 +28,7 @@ license = { file = "LICENSE" }
# dependencies = ["nomad-lab>=1.2.2dev578"]

[project.urls]
Repository = "https://github.com/FAIRmat-NFDI/nomad-aitoolkit"
Repository = "https://github.com/FAIRmat-NFDI/nomad-schema-plugin-ai-toolkit"

[project.optional-dependencies]
dev = ["ruff", "pytest", "structlog"]
Expand Down
11 changes: 8 additions & 3 deletions src/nomad_aitoolkit/apps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@
),
WidgetTerms(
type='terms',
quantity='data.systems.name#nomad_aitoolkit.schema.AIToolkitNotebook',
title='Systems',
quantity='data.applications.name#nomad_aitoolkit.schema.AIToolkitNotebook',
title='Applications',
scale=ScaleEnum.POW1,
layout={
BreakpointEnum.XXL: Layout(h=6, w=6, x=12, y=0),
Expand All @@ -155,7 +155,12 @@
type='url',
path="data.references[?kind=='hub'].uri",
description='Launch Jupyter notebook',
)
),
'repository': RowActionURL(
type='downloadurl',
path="data.references[?kind=='repository'].uri",
description='Link to the repository',
),
},
),
details=RowDetails(enabled=True),
Expand Down
15 changes: 7 additions & 8 deletions src/nomad_aitoolkit/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ class Method(ArchiveSection):
)


class System(ArchiveSection):
class Application(ArchiveSection):
m_def = Section(a_eln=ELNAnnotation(overview=True))

name = Quantity(
type=str,
a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity),
description='Specifying name of the system.',
description='Specifying name of the application.',
)


Expand Down Expand Up @@ -154,11 +154,10 @@ class AIToolkitNotebook(Schema):
component=ELNComponentEnum.EnumEditQuantity,
props=dict(
suggestions=[
'advanced tutorial',
'beginner tutorial',
'intermediate tutorial',
'query tutorial',
'thermal transport',
'Advanced tutorial',
'Beginner tutorial',
'Intermediate tutorial',
'Query tutorial',
]
),
),
Expand All @@ -175,7 +174,7 @@ class AIToolkitNotebook(Schema):

methods = SubSection(section=Method, repeats=True)

systems = SubSection(section=System, repeats=True)
applications = SubSection(section=Application, repeats=True)

references = SubSection(section=Reference, repeats=True)

Expand Down

0 comments on commit d5cff0a

Please sign in to comment.