diff --git a/README.md b/README.md index 03e61c9..953946b 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,10 @@ This `nomad`_ plugin was generated with `Cookiecutter`_ along with `@nomad`_'s ` ### Install You should create a virtual environment. You will need the `nomad-lab` package (and `pytest`). -We recommend using Python 3.9. +We recommend using Python 3.11. ```sh -python3 -m venv .pyenv +python -m venv .pyenv source .pyenv/bin/activate pip install --upgrade pip pip install -e '.[dev]' --index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple diff --git a/notebooks/arise.archive.json b/notebooks/arise.archive.json index 756b4cc..b91fa1d 100644 --- a/notebooks/arise.archive.json +++ b/notebooks/arise.archive.json @@ -4,7 +4,7 @@ "name": "ARISE - Robust recognition and exploratory analysis of crystal structures via Bayesian deep learning", "description": "In this tutorial, we give an introduction to ARISE (ARtificial-Intelligence-based Structure Evaluation), a powerful Bayesian-deep-neural-network tool for the recognition of atomistic structures (A. Leitherer, A. Ziletti, and L.M. Ghiringhelli, Nat. Commun. 12, 6234, 2021). ARISE is robust to structural noise and can treat more than 100 crystal structures, a number that can be extended on demand. While being trained on ideal structures only, ARISE correctly characterizes strongly perturbed single- and polycrystalline systems, from both synthetic and experimental resources. The probabilistic nature of the Bayesian-deep-learning model allows to obtain principled uncertainty estimates. By applying unsupervised learning to the internal neural-network representations, one can reveal grain boundaries and (unapparent) structural regions sharing easily interpretable geometrical properties.", "date": "2021-03-22", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/atomic-features.archive.json b/notebooks/atomic-features.archive.json index 232c662..8e46e44 100644 --- a/notebooks/atomic-features.archive.json +++ b/notebooks/atomic-features.archive.json @@ -4,7 +4,7 @@ "name": "Atomic-features-package usage demonstration", "description": "In this tutorial, we show how the atomic-features-package can be accessed and used to explore the atomic features form various sources and to prepare the input features for machine-learning studies.", "date": "2021-12-07", - "category": "query_tutorial", + "category": "Query tutorial", "methods": [ { "name": "" diff --git a/notebooks/clustering-tutorial.archive.json b/notebooks/clustering-tutorial.archive.json index 5ea359e..5102135 100644 --- a/notebooks/clustering-tutorial.archive.json +++ b/notebooks/clustering-tutorial.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to clustering", "description": "In this tutorial, we introduce to the most popular clustering algorithms. We focus on partitioning, hierarchical and density-based clustering algorithms. The methods are tested on synthetic datasets of increasing complexity", "date": "2021-01-21", - "category": "beginner_tutorial", + "category": "Beginner tutorial", "methods": [ { "name": "Unsupervised learning" diff --git a/notebooks/cmlkit.archive.json b/notebooks/cmlkit.archive.json index 4ae2ff6..c2e0297 100644 --- a/notebooks/cmlkit.archive.json +++ b/notebooks/cmlkit.archive.json @@ -4,7 +4,7 @@ "name": "cmlkit: Toolkit for Machine Learning in Materials Science and Quantum Chemistry", "description": "In this tutorial, we will get to know cmlkit, a python package for specifying, evaluating, and optimising machine learning models, and use it to compete in the Nomad 2018 Kaggle challenge.", "date": "2021-01-14", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/co2-sgd-tutorial.archive.json b/notebooks/co2-sgd-tutorial.archive.json index 86ad11a..7bcff79 100644 --- a/notebooks/co2-sgd-tutorial.archive.json +++ b/notebooks/co2-sgd-tutorial.archive.json @@ -4,7 +4,7 @@ "name": "Subgroup discovery of catalysts\u2019 genes for carbon-dioxide activation on semiconductor oxides", "description": "In this interactive tutorial we show the application of subgroup discovery for the search for indicators of carbond-dioxide activation with the aim of its further conversion.", "date": "2021-08-26", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Subgroup discovery" diff --git a/notebooks/compressed-sensing.archive.json b/notebooks/compressed-sensing.archive.json index b80ccd3..6a8c438 100644 --- a/notebooks/compressed-sensing.archive.json +++ b/notebooks/compressed-sensing.archive.json @@ -4,7 +4,7 @@ "name": "Symbolic regression via compressed sensing: a tutorial", "description": "In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.", "date": "2020-09-20", - "category": "beginner_tutorial", + "category": "Beginner tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/convolutional-nn.archive.json b/notebooks/convolutional-nn.archive.json index f0becec..e5f1780 100644 --- a/notebooks/convolutional-nn.archive.json +++ b/notebooks/convolutional-nn.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to convolutional neural networks", "description": "In this tutorial, we briefly introduce the main ideas behind convolutional neural networks, build a neural network model with Keras, and explain the classification decision process using attentive response maps.", "date": "2021-01-29", - "category": "intermediate_tutorial", + "category": "Intermediate tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/decision-tree.archive.json b/notebooks/decision-tree.archive.json index e8187aa..3422e63 100644 --- a/notebooks/decision-tree.archive.json +++ b/notebooks/decision-tree.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to decision-trees methods", "description": "In this tutorial we will introduce decision trees. We go through a toy model introducing the SKLearn API. We then discuss step by step the different theoretical aspects of trees. We then move to training a regression tree and classification tree on different datasets related to materials science. We end the tutorial by covering random forests and bagging classfiers.", "date": "2020-12-08", - "category": "beginner_tutorial", + "category": "Beginner tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/descriptor-role.archive.json b/notebooks/descriptor-role.archive.json index 357116c..b7e5e4f 100644 --- a/notebooks/descriptor-role.archive.json +++ b/notebooks/descriptor-role.archive.json @@ -4,7 +4,7 @@ "name": "Predicting energy differences between crystal structures: (Meta-)stability of octet-binary compounds", "description": "A tool for predicting the difference in the total energy between different polymorphs for 82 octet binary compounds, which gives an indication of the stability of the material. This is accomplished by identifying a set of descriptive parameters (a descriptor) from the free-atom data for the binary atomic species comprising the material using the Sure Independent Screening (SIS) + l0-norm minimization approach.", "date": "2021-10-18", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/domain-of-applicability.archive.json b/notebooks/domain-of-applicability.archive.json index 2466c96..76d4322 100644 --- a/notebooks/domain-of-applicability.archive.json +++ b/notebooks/domain-of-applicability.archive.json @@ -4,7 +4,7 @@ "name": "Identifying domains of applicability of machine-Learning models for materials science", "description": "In this tutorial, we present a method, based on subgroup discovery, for detecting domains of applicability (DA) of ML models within a materials class. The domain of applicability of an ML model is the region of input space where the model predicts the target property with the smallest uncertainty. The utility of this approach is demonstrated by analyzing three state-of-the-art ML models for predicting the formation energy of transparent conducting oxides.", "date": "2021-01-27", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/dos-similarity-search.archive.json b/notebooks/dos-similarity-search.archive.json index 4e3e476..9107487 100644 --- a/notebooks/dos-similarity-search.archive.json +++ b/notebooks/dos-similarity-search.archive.json @@ -4,7 +4,7 @@ "name": "Electronic density-of-states similarity search", "description": "This notebook shows how to compute the similarity of materials in terms of their electronic density-of-states (DOS), from data retrieved from the NOMAD Archive.", "date": "2022-03-30", - "category": "intermediate_tutorial", + "category": "Intermediate tutorial", "methods": [ { "name": "Similarity search" diff --git a/notebooks/error-estimates.archive.json b/notebooks/error-estimates.archive.json index d579041..17aee0c 100644 --- a/notebooks/error-estimates.archive.json +++ b/notebooks/error-estimates.archive.json @@ -4,7 +4,7 @@ "name": "Error estimates from high-accuracy electronic-structure reference calculations", "description": "A set of tools to analyze the error in electronic structure calculations due to the choice of numerical settings. We use the NOMAD infrastructure to systematically investigate the deviances in total and relative energies as function of typical settings for basis sets, k-grids, etc. for 71 elemental and 81 binary solids in three different electronic-structure codes.", "date": "2021-01-21", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/exploratory-analysis.archive.json b/notebooks/exploratory-analysis.archive.json index e94d873..c20e8bc 100644 --- a/notebooks/exploratory-analysis.archive.json +++ b/notebooks/exploratory-analysis.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to exploratory analysis (unsupervised learning) of materials spaces", "description": "Exploratory analyses make use of unsupervised learning techniques to extract information from unknown datasets. In this tutorial, we make use of some of the most popular clustering and dimension reduction algorithms to analyze a dataset composed of 82 octet-binary compounds.", "date": "2021-02-04", - "category": "beginner_tutorial", + "category": "Beginner tutorial", "methods": [ { "name": "Clustering" diff --git a/notebooks/gap-si-surface.archive.json b/notebooks/gap-si-surface.archive.json index 8e69997..305481e 100644 --- a/notebooks/gap-si-surface.archive.json +++ b/notebooks/gap-si-surface.archive.json @@ -4,7 +4,7 @@ "name": "The SOAP descriptor, Gaussian Approximation Potentials (GAP) and machine learning of force fields", "description": "In this tutorial, we will be using a Gaussian Approximation Potentials to analyse results of TB DFT calculations on the Si surface. Along the way we will learn about different descriptors (2b, 3b, SOAP) to describe local atomic environment in order to predict energies and forces of the Si surface.", "date": "2020-06-18", - "category": "intermediate_tutorial", + "category": "Intermediate tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/grain-boundaries.archive.json b/notebooks/grain-boundaries.archive.json index 4774f5a..1fa6aed 100644 --- a/notebooks/grain-boundaries.archive.json +++ b/notebooks/grain-boundaries.archive.json @@ -4,7 +4,7 @@ "name": "Structure similarity and structure-property relationship: grain boundaries of alpha-Fe", "description": "In this tutorial, we will be using a machine-learning method (clustering) to analyse results of grain-boundary (GB) calculations of alpha-iron. Along the way, we will learn about different methods to describe local atomic environment in order to calculate properties of GBs. We will use these properties to separate the different regions of the GB using clustering methods. Finally we will determine how the energy of the GB is changing according to the angle difference of the regions.", "date": "2020-01-18", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Unsupervised learning" diff --git a/notebooks/hierarchical-sisso.archive.json b/notebooks/hierarchical-sisso.archive.json index 620d3e5..83f24c2 100644 --- a/notebooks/hierarchical-sisso.archive.json +++ b/notebooks/hierarchical-sisso.archive.json @@ -4,7 +4,7 @@ "name": "Hierarchical symbolic regression for identifying key physical parameters correlated with materials properties", "description": "In this notebook, we describe a hierarchical symbolic-regression approach for finding, based on data, analytical expressions relating materials properties to simpler physicochemical parameters associated with the underlying processes governing the properties.", "date": "2022-8-3", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/kaggle-competition.archive.json b/notebooks/kaggle-competition.archive.json index a99f70f..3c32daf 100644 --- a/notebooks/kaggle-competition.archive.json +++ b/notebooks/kaggle-competition.archive.json @@ -4,7 +4,7 @@ "name": "2018 NOMAD-Kaggle research competition", "description": "In this tutorial, we will explore the best results of the NOMAD 2018 Kaggle research competition. The goal of this competition was to develop machine-learning models for the prediction of two target properties: the formation energy and the bandgap energy of transparent semiconducting oxides. The purpose of the modelling is to facilitate the discovery of new such materials and allow for advancements in (opto)electronic technologies", "date": "2021-01-19", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/krr4mat.archive.json b/notebooks/krr4mat.archive.json index 9a84c9e..f3b74e5 100644 --- a/notebooks/krr4mat.archive.json +++ b/notebooks/krr4mat.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to kernel ridge regression for materials-property prediction", "description": "In this tutorial, we will explore the application of kernel ridge regression to the prediction of materials properties. We will begin with a largely informal, pragmatic introduction to kernel ridge regression, including a rudimentary implementation, in order to become familiar with the basic terminology and considerations. We will then discuss representations, and re-trace the NOMAD 2018 Kaggle challenge.", "date": "2020-12-15", - "category": "beginner_tutorial", + "category": "Beginner tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/nn-regression.archive.json b/notebooks/nn-regression.archive.json index 8b97a52..2e41a2a 100644 --- a/notebooks/nn-regression.archive.json +++ b/notebooks/nn-regression.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to multilayer perceptrons (deep neural networks)", "description": "In this tutorial, we discuss how multilayer perceptrons, a standard neural-network architecture, can be employed for regression tasks. Specifically, we will use the ElemNet neural-network architecture to predict the volume per atom of inorganic compounds, where the Open Quantum Materials Database (OQMD) is used as a resource.", "date": "2021-01-29", - "category": "beginner_tutorial", + "category": "Beginner tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/perovskite-tolerance-factor.archive.json b/notebooks/perovskite-tolerance-factor.archive.json index 8ee23c7..cce794c 100644 --- a/notebooks/perovskite-tolerance-factor.archive.json +++ b/notebooks/perovskite-tolerance-factor.archive.json @@ -4,7 +4,7 @@ "name": "Finding a tolerance factor to predict perovskite stability with SISSO", "description": "This tutorial shows how a tolerance factor for predicting perovskite stability can be learned from data with the sure-independece-screening-and-sparsifying-operator (SISSO) descriptor-identification approach.", "date": "2022-05-18", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/query-nomad-archive.archive.json b/notebooks/query-nomad-archive.archive.json index 5cde39e..f2a001a 100644 --- a/notebooks/query-nomad-archive.archive.json +++ b/notebooks/query-nomad-archive.archive.json @@ -4,7 +4,7 @@ "name": "Querying the NOMAD Archive and performing artificial-intelligence modeling", "description": "In this tutorial, we demonstrate how to query the NOMAD Archive from the NOMAD Analytics toolkit. We then show examples of machine learning analysis performed on the retrieved data set.", "date": "2022-04-06", - "category": "query_tutorial", + "category": "Query tutorial", "methods": [ { "name": "Unsupervised learning" diff --git a/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json b/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json index e842906..e6ea92c 100644 --- a/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json +++ b/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to subgroup discovery: Identifying outstanding transition-metal-alloy catalysts", "description": "This tutorial introduces, by means of two applications in materials science, the artificial-intelligence technique subgroup discovery.", "date": "2021-10-28", - "category": "intermediate_tutorial", + "category": "Intermediate tutorial", "methods": [ { "name": "Subgroup discovery" diff --git a/notebooks/sgd-propylene-oxidation-hte.archive.json b/notebooks/sgd-propylene-oxidation-hte.archive.json index eb3f533..e97b7b0 100644 --- a/notebooks/sgd-propylene-oxidation-hte.archive.json +++ b/notebooks/sgd-propylene-oxidation-hte.archive.json @@ -4,7 +4,7 @@ "name": "Learning Design Rules for Catalysts from High-Throughput Experimentation and Theory via Subgroup Discovery", "description": "This tutorial explores the application of subgroup discovery (SGD) to an experimental-theoretical data set in order to identify rules on key physicochemical parameters that describe the materials and environmental conditions associated with outstanding performance in heterogeneous catalysis.", "date": "2022-2-09", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Subgroup discovery" diff --git a/notebooks/soap-atomic-charges.archive.json b/notebooks/soap-atomic-charges.archive.json index ba247e7..e5bf849 100644 --- a/notebooks/soap-atomic-charges.archive.json +++ b/notebooks/soap-atomic-charges.archive.json @@ -4,7 +4,7 @@ "name": "Machine learning atomic charges", "description": "In this tutorial, we will use Gaussian process regression, GPR (or equivalently, Kernel Ridge Regression, KRR) to train and predict charges on atoms in small organic molecules.", "date": "2019-09-26", - "category": "intermediate_tutorial", + "category": "Intermediate tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/tcmi.archive.json b/notebooks/tcmi.archive.json index 9db412b..06ce148 100644 --- a/notebooks/tcmi.archive.json +++ b/notebooks/tcmi.archive.json @@ -4,7 +4,7 @@ "name": "Introduction to total cumulative mutual information", "description": "This interactive notebook introduces the concepts and original implementation of total cumulative mutual information (TCMI), as presented in the related publication. The main results of the publication are also reproduced in a hands-on style", "date": "2020-02-06", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/tetradymite-PRM2020.archive.json b/notebooks/tetradymite-PRM2020.archive.json index 21ba7fa..eb8ff80 100644 --- a/notebooks/tetradymite-PRM2020.archive.json +++ b/notebooks/tetradymite-PRM2020.archive.json @@ -4,7 +4,7 @@ "name": "Discovery of new topological insulators in alloyed tetradymites", "description": "Learn how to find descriptive parameters (short formulas) that predict whether alloyed materials are topological or trivial insulators, using the example of tetradymites. This notebook is based on the algorithm 'sure independence screening and sparsifying operator' (SISSO) that enables to search for optimal descriptor by scanning huge feature spaces.", "date": "2020-09-15", - "category": "advanced_tutorial", + "category": "Advanced tutorial", "methods": [ { "name": "Supervised learning" diff --git a/notebooks/tutorial_stats.ipynb b/notebooks/tutorial_stats.ipynb index aa1b3c1..fc4ba55 100644 --- a/notebooks/tutorial_stats.ipynb +++ b/notebooks/tutorial_stats.ipynb @@ -7,16 +7,25 @@ "# Migration tool \"analitics\" notebooks\n", "\n", "Generate legacy list of tools:\n", - "```bash \n", + "\n", + "```bash\n", "nomad dev toolkit-metadata > tutorials.jso\n", - "```" + "```\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'authors': ['Ahmetcik, Emre', 'Ziletti, Angelo', 'Ouyang, Runhai', 'Sbailò, Luigi', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Symbolic regression via compressed sensing: a tutorial', 'description': 'In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.', 'notebook_name': 'compressed_sensing.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-compressed-sensing', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/compressed_sensing.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/compressed_sensing.ipynb', 'link_video': 'https://www.youtube.com/watch?v=73mLp6C2opY', 'link_paper': 'https://th.fhi-berlin.mpg.de/site/uploads/Publications/NJP-19-023017-2017.pdf', 'link_doi_paper': 'https://doi.org/10.1088/1367-2630/aa57bf', 'updated': '2020-09-20', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_keyword': [], 'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Octet binaries'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Compressed sensing', 'Symbolic regression', 'LASSO', 'SISSO', 'Kernel ridge regression', 'Features selection', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Liu, Xiangyue', 'Sutton, Christopher', 'Yamamoto, Takenori', 'Blumenthal, Lars', 'Golebiowski, Jacek', 'Ziletti, Angelo', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': '2018 NOMAD-Kaggle research competition', 'description': 'In this tutorial, we will explore the best results of the NOMAD 2018 Kaggle research competition. The goal of this competition was to develop machine-learning models for the prediction of two target properties: the formation energy and the bandgap energy of transparent semiconducting oxides. The purpose of the modelling is to facilitate the discovery of new such materials and allow for advancements in (opto)electronic technologies', 'notebook_name': 'kaggle_competition.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-kaggle-competition', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/kaggle_competition.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/kaggle_competition.ipynb', 'link_paper': 'https://th.fhi.mpg.de/site/uploads/Publications/s41524-019-0239-3.pdf', 'link_doi_paper': 'https://www.nature.com/articles/s41524-019-0239-3', 'updated': '2021-01-19', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Transparent conducting oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Kernel ridge regression', 'Neural networks', 'SOAP', 'n-gram'], 'platform': ['jupyter']}}, {'authors': ['Ziletti, Angelo', 'Leitherer, Andreas', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Introduction to convolutional neural networks', 'description': 'In this tutorial, we briefly introduce the main ideas behind convolutional neural networks, build a neural network model with Keras, and explain the classification decision process using attentive response maps.', 'notebook_name': 'convolutional_nn.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-convolutional-nn', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/convolutional_nn.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/convolutional_nn.ipynb', 'link_video': 'https://youtu.be/MST8X1yCWK8', 'updated': '2021-01-29', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Images'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Supervised learning', 'Classification', 'Neural networks', 'Convolutional neural networks', 'Attentive response map'], 'platform': ['jupyter']}}, {'authors': ['Fekete, Ádám', 'Stella, Martina', 'Lambert, Henry', 'De Vita, Alessandro', 'Csányi, Gábor'], 'email': 'adam.fekete@kcl.ac.uk', 'title': 'The SOAP descriptor, Gaussian Approximation Potentials (GAP) and machine learning of force fields', 'description': 'In this tutorial, we will be using a Gaussian Approximation Potentials to analyse results of TB DFT calculations on the Si surface. Along the way we will learn about different descriptors (2b, 3b, SOAP) to describe local atomic environment in order to predict energies and forces of the Si surface.', 'notebook_name': 'gap_si_surface.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-gap-si-surface', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/gap_si_surface.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/gap_si_surface.ipynb', 'updated': '2020-06-18', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Silicon', 'Surface'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Gaussian-process regression', 'Kernel ridge regression', 'SOAP', 'Gaussian approximation potentials (GAP)'], 'platform': ['jupyter']}}, {'authors': ['Csányi, Gábor', 'Kermode, James R.'], 'email': 'gc121@cam.ac.uk', 'title': 'Machine learning atomic charges', 'description': 'In this tutorial, we will use Gaussian process regression, GPR (or equivalently, Kernel Ridge Regression, KRR) to train and predict charges on atoms in small organic molecules.', 'notebook_name': 'soap_atomic_charges.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-soap-atomic-charges', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/soap_atomic_charges.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/soap_atomic_charges.ipynb', 'updated': '2019-09-26', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['GDB molecular database', 'GDB7'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Gaussian-process regression', 'Kernel ridge regression', 'SOAP'], 'platform': ['jupyter']}}, {'authors': ['Fekete, Ádám', 'Stella, Martina', 'Lambert, Henry', 'De Vita, Alessandro', 'Csányi, Gábor'], 'email': 'adam.fekete@kcl.ac.uk', 'title': 'Structure similarity and structure-property relationship: grain boundaries of alpha-Fe', 'description': 'In this tutorial, we will be using a machine-learning method (clustering) to analyse results of grain-boundary (GB) calculations of alpha-iron. Along the way, we will learn about different methods to describe local atomic environment in order to calculate properties of GBs. We will use these properties to separate the different regions of the GB using clustering methods. Finally we will determine how the energy of the GB is changing according to the angle difference of the regions.', 'notebook_name': 'grain_boundaries.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-grain-boundaries', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/grain_boundaries.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/grain_boundaries.ipynb', 'link_paper': 'https://www.sciencedirect.com/science/article/pii/S0010465518301450?via%3Dihub', 'link_doi_paper': 'https://www.sciencedirect.com/science/article/pii/S0010465518301450/pdfft?md5=f21651f69edad3505ed3dd3ba38aee18&pid=1-s2.0-S0010465518301450-main.pdf', 'updated': '2020-01-18', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Iron', 'Grain boundaries'], 'category': ['advanced_tutorial'], 'ai_methods': ['Unsupervised learning', 'Supervised learning', 'Clustering', 'Regression', 'k-means', 'Gaussian mixture'], 'platform': ['jupyter']}}, {'authors': ['Regler, Benjamin', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'regler@fhi-berlin.mpg.de', 'title': 'Introduction to total cumulative mutual information', 'description': 'This interactive notebook introduces the concepts and original implementation of total cumulative mutual information (TCMI), as presented in the related publication. The main results of the publication are also reproduced in a hands-on style', 'notebook_name': 'tcmi.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-tcmi', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/tcmi.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/tcmi.ipynb', 'link_paper': 'https://arxiv.org/pdf/2001.11212', 'link_doi_paper': 'https://arxiv.org/abs/2001.11212', 'updated': '2020-02-06', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Synthetic data', 'UCI regression dataset', 'Octet binaries'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Unsupervised learning', 'Features selection', 'Information theory', 'Mutual information', 'Cumulative entropy', 'Clustering', 'TCMI'], 'language': ['python'], 'platform': ['jupyter']}}, {'authors': ['Arif, Mohammad-Yasin', 'Sbailò, Luigi', 'Purcell, Thomas A. R.', 'Ghiringhelli, Luca M.', 'Scheffler, Matthias'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Predicting energy differences between crystal structures: (Meta-)stability of octet-binary compounds', 'description': 'A tool for predicting the difference in the total energy between different polymorphs for 82 octet binary compounds, which gives an indication of the stability of the material. This is accomplished by identifying a set of descriptive parameters (a descriptor) from the free-atom data for the binary atomic species comprising the material using the Sure Independent Screening (SIS) + l0-norm minimization approach.', 'notebook_name': 'descriptor_role.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-descriptor-role', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/descriptor_role.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/descriptor_role.ipynb', 'link_paper': 'https://th.fhi.mpg.de/site/uploads/Publications/PRL-114-105503-2015.pdf', 'link_doi_paper': 'http://dx.doi.org/10.1103/PhysRevLett.114.105503', 'updated': '2021-10-18', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Octet binaries', 'Rock salt', 'Zinc blende'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Features selection', 'SISSO', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Bieniek, Björn', 'Strange, Mikkel', 'Carbogno, Christian', 'Arif, Mohammad-Yasin', 'Sbailò, Luigi', 'Scheffler, Matthias'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Error estimates from high-accuracy electronic-structure reference calculations', 'description': 'A set of tools to analyze the error in electronic structure calculations due to the choice of numerical settings. We use the NOMAD infrastructure to systematically investigate the deviances in total and relative energies as function of typical settings for basis sets, k-grids, etc. for 71 elemental and 81 binary solids in three different electronic-structure codes.', 'notebook_name': 'error_estimates.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-error-estimates', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/error_estimates.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/error_estimates.ipynb', 'link_paper': 'https://th.fhi-berlin.mpg.de/site/uploads/Publications/2008.10402.pdf', 'link_doi_paper': 'https://arxiv.org/abs/2008.10402', 'updated': '2021-01-21', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'category': ['advanced_tutorial'], 'application_system': ['Binaries', 'Elemental solids'], 'ai_methods': ['Supervised learning', 'Regression', 'Linear least-squares regression'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Querying the NOMAD Archive and performing artificial-intelligence modeling', 'description': 'In this tutorial, we demonstrate how to query the NOMAD Archive from the NOMAD Analytics toolkit. We then show examples of machine learning analysis performed on the retrieved data set.', 'notebook_name': 'query_nomad_archive.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-query-nomad-archive', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/query_nomad_archive.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/query_nomad_archive.ipynb', 'updated': '2022-04-06', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Analysing the content of the Archive'], 'application_system': ['Ternaries'], 'category': ['query_tutorial'], 'ai_methods': ['Unsupervised learning', 'Supervised learning', 'Regression', 'Clustering', 'Dimension reduction', 'Random forest'], 'platform': ['jupyter']}}, {'authors': ['Langer, Marcel F.'], 'email': 'langer@fhi-berlin.mpg.de', 'title': 'cmlkit: Toolkit for Machine Learning in Materials Science and Quantum Chemistry', 'description': 'In this tutorial, we will get to know cmlkit, a python package for specifying, evaluating, and optimising machine learning models, and use it to compete in the Nomad 2018 Kaggle challenge.', 'notebook_name': 'cmlkit.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-cmlkit', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/cmlkit.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/cmlkit.ipynb', 'link_paper': 'https://arxiv.org/pdf/2003.12081.pdf', 'link_doi_paper': 'https://arxiv.org/abs/2003.12081', 'updated': '2021-01-14', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Transparent conducting oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Kernel ridge regression', 'SOAP', 'MBTR', 'Symmetry functions'], 'platform': ['jupyter']}}, {'authors': ['Speckhard, Daniel', 'Leitherer, Andreas', 'Ghiringhelli, Luca M.'], 'email': 'speckhard@fhi-berlin.mpg.de', 'title': 'Introduction to decision-trees methods', 'description': 'In this tutorial we will introduce decision trees. We go through a toy model introducing the SKLearn API. We then discuss step by step the different theoretical aspects of trees. We then move to training a regression tree and classification tree on different datasets related to materials science. We end the tutorial by covering random forests and bagging classfiers.', 'notebook_name': 'decision_tree.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-decision-tree', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/decision_tree.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/decision_tree.ipynb', 'link_video': 'https://www.youtube.com/watch?v=YBy9STVaqvU', 'updated': '2020-12-08', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Images', 'Metals', 'Insulators'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Classification', 'Decision tree', 'Random forest', 'Bagging classifier', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'sbailo@fhi-berlin.mpg.de', 'title': 'Introduction to clustering', 'description': 'In this tutorial, we introduce to the most popular clustering algorithms. We focus on partitioning, hierarchical and density-based clustering algorithms. The methods are tested on synthetic datasets of increasing complexity', 'notebook_name': 'clustering_tutorial.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-clustering-tutorial', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/clustering_tutorial.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/clustering_tutorial.ipynb', 'updated': '2021-01-21', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Synthetic data'], 'category': ['beginner_tutorial'], 'ai_methods': ['Unsupervised learning', 'Clustering', 'k-means', 'Hierarchical clustering', 'DBSCAN', 'HDBSCAN'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'sbailo@fhi-berlin.mpg.de', 'title': 'Introduction to exploratory analysis (unsupervised learning) of materials spaces', 'description': 'Exploratory analyses make use of unsupervised learning techniques to extract information from unknown datasets. In this tutorial, we make use of some of the most popular clustering and dimension reduction algorithms to analyze a dataset composed of 82 octet-binary compounds.', 'notebook_name': 'exploratory_analysis.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-exploratory-analysis', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/exploratory_analysis.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/exploratory_analysis.ipynb', 'link_video': 'https://www.youtube.com/watch?v=EJTjF9ehp7k', 'updated': '2021-02-04', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Octet binaries'], 'category': ['beginner_tutorial'], 'ai_methods': ['Clustering', 'Dimension reduction', 'k-means', 'Hierarchical clustering', 'DBSCAN', 'HDBSCAN', 'DenPeak', 'PCA', 't-SNE', 'MDS'], 'platform': ['jupyter']}}, {'authors': ['Arif, Mohammad-Yasin', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Identifying domains of applicability of machine-Learning models for materials science', 'description': 'In this tutorial, we present a method, based on subgroup discovery, for detecting domains of applicability (DA) of ML models within a materials class. The domain of applicability of an ML model is the region of input space where the model predicts the target property with the smallest uncertainty. The utility of this approach is demonstrated by analyzing three state-of-the-art ML models for predicting the formation energy of transparent conducting oxides.', 'notebook_name': 'domain_of_applicability.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-domain-of-applicability', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/domain_of_applicability.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/domain_of_applicability.ipynb', 'link_paper': ' https://th.fhi-berlin.mpg.de/site/uploads/Publications/s41467-020-17112-9.pdf', 'link_doi_paper': 'https://www.nature.com/articles/s41467-020-17112-9', 'updated': '2021-01-27', 'flags': {'featured': True, 'top_of_list': False, 'paper': True}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Transparent conducting oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Subgroup discovery', 'Kernel ridge regression', 'SOAP', 'MBTR', 'n-gram'], 'platform': ['jupyter']}}, {'authors': ['Leitherer, Andreas', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'leitherer@fhi-berlin.mpg.de', 'title': 'Introduction to multilayer perceptrons (deep neural networks)', 'description': 'In this tutorial, we discuss how multilayer perceptrons, a standard neural-network architecture, can be employed for regression tasks. Specifically, we will use the ElemNet neural-network architecture to predict the volume per atom of inorganic compounds, where the Open Quantum Materials Database (OQMD) is used as a resource.', 'notebook_name': 'nn_regression.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-nn-regression', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/nn_regression.ipynb', 'link_public': 'https://nomad-lab.eu/prod/analytics/public/user-redirect/notebooks/tutorials/nn_regression.ipynb', 'link_video': 'https://www.youtube.com/watch?v=U0lI5n8Hleo', 'updated': '2021-01-29', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Materials property prediction'], 'application_system': ['Inorganic compounds', 'OQMD database'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Neural networks', 'Deep neural networks', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Purcell, Thomas A. R.', 'Ghiringhelli, Luca M.', 'Scheffler, Matthias'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Discovery of new topological insulators in alloyed tetradymites', 'description': \"Learn how to find descriptive parameters (short formulas) that predict whether alloyed materials are topological or trivial insulators, using the example of tetradymites. This notebook is based on the algorithm 'sure independence screening and sparsifying operator' (SISSO) that enables to search for optimal descriptor by scanning huge feature spaces.\", 'notebook_name': 'tetradymite_PRM2020.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-tetradymite-PRM2020', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/tetradymite_PRM2020.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/tetradymite_PRM2020.ipynb', 'link_paper': 'https://th.fhi.mpg.de/site/uploads/Publications/PhysRevMaterials.4.034204.pdf', 'link_doi_paper': 'https://journals.aps.org/prmaterials/abstract/10.1103/PhysRevMaterials.4.034204', 'updated': '2020-09-15', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Tetradymites', 'Topological insulators'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Classification', 'Symbolic regression', 'Features selection', 'Atomic features', 'SISSO'], 'platform': ['jupyter']}}, {'authors': ['Leitherer, Andreas', 'Ziletti, Angelo', 'Ghiringhelli, Luca M.'], 'email': 'leitherer@fhi-berlin.mpg.de', 'title': 'ARISE - Robust recognition and exploratory analysis of crystal structures via Bayesian deep learning', 'description': 'In this tutorial, we give an introduction to ARISE (ARtificial-Intelligence-based Structure Evaluation), a powerful Bayesian-deep-neural-network tool for the recognition of atomistic structures (A. Leitherer, A. Ziletti, and L.M. Ghiringhelli, Nat. Commun. 12, 6234, 2021). ARISE is robust to structural noise and can treat more than 100 crystal structures, a number that can be extended on demand. While being trained on ideal structures only, ARISE correctly characterizes strongly perturbed single- and polycrystalline systems, from both synthetic and experimental resources. The probabilistic nature of the Bayesian-deep-learning model allows to obtain principled uncertainty estimates. By applying unsupervised learning to the internal neural-network representations, one can reveal grain boundaries and (unapparent) structural regions sharing easily interpretable geometrical properties.', 'notebook_name': 'ARISE.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-arise', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/ARISE.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/ARISE.ipynb', 'link_paper': 'https://www.nature.com/articles/s41467-021-26511-5.pdf', 'link_doi_paper': 'https://www.nature.com/articles/s41467-021-26511-5', 'updated': '2021-03-22', 'flags': {'featured': True, 'top_of_list': False, 'paper': True}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials science'], 'application_system': ['Grain boundaries', 'Binaries', 'Ternaries', 'Low-dimensional materials'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Neural networks', 'Bayesian deep learning', 'Unsupervised learning', 'Clustering', 'Dimension reduction', 'HDBSCAN', 'UMAP', 'SOAP'], 'platform': ['jupyter']}}, {'authors': ['Langer, Marcel F.'], 'email': 'langer@fhi-berlin.mpg.de', 'title': 'Introduction to kernel ridge regression for materials-property prediction', 'description': 'In this tutorial, we will explore the application of kernel ridge regression to the prediction of materials properties. We will begin with a largely informal, pragmatic introduction to kernel ridge regression, including a rudimentary implementation, in order to become familiar with the basic terminology and considerations. We will then discuss representations, and re-trace the NOMAD 2018 Kaggle challenge.', 'notebook_name': 'krr4mat.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-krr4mat', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/krr4mat.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/krr4mat.ipynb', 'link_video': 'https://www.youtube.com/watch?v=H_MVlljpYHw', 'updated': '2020-12-15', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Transparent conducting oxides'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Kernel ridge regression', 'SOAP'], 'platform': ['jupyter']}}, {'authors': ['Mazheika, Aliaksei', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.', 'Levchenko, Sergey', 'Scheffler, Matthias'], 'email': 'mazheika@fhi-berlin.mpg.de', 'title': 'Subgroup discovery of catalysts’ genes for carbon-dioxide activation on semiconductor oxides', 'description': 'In this interactive tutorial we show the application of subgroup discovery for the search for indicators of carbond-dioxide activation with the aim of its further conversion.', 'notebook_name': 'CO2_SGD.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-co2-sgd-tutorial', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/CO2_SGD.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/CO2_SGD.ipynb', 'link_paper': 'https://arxiv.org/pdf/1912.06515', 'link_doi_paper': 'https://arxiv.org/abs/1912.06515', 'updated': '2021-08-26', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['CO2 activation', 'Heterogeneous catalysis', 'Semicondictor oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Subgroup discovery', 'Decision tree'], 'platform': ['jupyter']}}, {'authors': ['Foppa, Lucas', 'Ghiringhelli, Luca M.'], 'email': 'foppa@fhi-berlin.mpg.de', 'title': 'Introduction to subgroup discovery: Identifying outstanding transition-metal-alloy catalysts', 'description': 'This tutorial introduces, by means of two applications in materials science, the artificial-intelligence technique subgroup discovery.', 'notebook_name': 'sgd_alloys_oxygen_reduction_evolution.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-sgd-alloys-oxygen-reduction-evolution', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/sgd_alloys_oxygen_reduction_evolution.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/sgd_alloys_oxygen_reduction_evolution.ipynb', 'link_paper': 'https://link.springer.com/content/pdf/10.1007/s11244-021-01502-4.pdf', 'link_doi_paper': 'https://doi.org/10.1007/s11244-021-01502-4', 'updated': '2021-10-28', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Heterogeneous catalysis', 'Oxygen evolution reaction', 'Oxygen reduction reaction', 'Scaling relations'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Subgroup discovery', 'Decision tree'], 'platform': ['jupyter']}}, {'authors': ['Naik ,Aakash A.', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Atomic-features-package usage demonstration', 'description': 'In this tutorial, we show how the atomic-features-package can be accessed and used to explore the atomic features form various sources and to prepare the input features for machine-learning studies.', 'notebook_name': 'atomic_features.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-atomic-features', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/atomic_features.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/atomic_features.ipynb', 'updated': '2021-12-07', 'labels': {'application_system': ['Atoms'], 'category': ['query_tutorial'], 'platform': ['jupyter'], 'ai_methods': ['']}}, {'authors': ['Foppa, Lucas', 'Hassanzada, Qaem', 'Bartel, Christopher', 'Purcell, Thomas', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Finding a tolerance factor to predict perovskite stability with SISSO', 'description': 'This tutorial shows how a tolerance factor for predicting perovskite stability can be learned from data with the sure-independece-screening-and-sparsifying-operator (SISSO) descriptor-identification approach.', 'notebook_name': 'perovskites_tolerance_factor.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-perovskite-tolerance-factor', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/perovskites_tolerance_factor.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/perovskites_tolerance_factor.ipynb', 'link_paper': 'https://advances.sciencemag.org/content/advances/5/2/eaav0693.full.pdf', 'link_doi_paper': 'https://doi.org/10.1126/sciadv.aav0693', 'updated': '2022-05-18', 'flags': {'featured': True, 'top_of_list': False, 'paper': True}, 'labels': {'application_section': ['Timely artificial-intelligence applications to materials science'], 'category': ['advanced_tutorial'], 'application_system': ['Perovskites'], 'ai_methods': ['Supervised learning', 'Classification', 'Symbolic regression', 'Compressed sensing', 'SISSO', 'Decision tree', 'Features selection', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Foppa, Lucas', 'Ghiringhelli, Luca M.', 'Scheffler, Matthias'], 'email': 'foppa@fhi-berlin.mpg.de', 'title': 'Learning Design Rules for Catalysts from High-Throughput Experimentation and Theory via Subgroup Discovery', 'description': 'This tutorial explores the application of subgroup discovery (SGD) to an experimental-theoretical data set in order to identify rules on key physicochemical parameters that describe the materials and environmental conditions associated with outstanding performance in heterogeneous catalysis.', 'notebook_name': 'sgd_propylene_oxidation_hte.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-sgd-propylene-oxidation-hte', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/sgd_propylene_oxidation_hte.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/sgd_propylene_oxidation_hte.ipynb', 'link_paper': 'https://pubs.acs.org/doi/10.1021/acscatal.1c04793', 'link_doi_paper': 'https://pubs.acs.org/doi/10.1021/acscatal.1c04793', 'updated': '2022-2-09', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Heterogeneous catalysis'], 'category': ['advanced_tutorial'], 'ai_methods': ['Subgroup discovery'], 'platform': ['jupyter']}}, {'authors': ['Gabaj, Šimon', 'Kuban, Martin', 'Rigamonti, Santiago', 'Draxl, Claudia'], 'email': 'gabajsim@physik.hu-berlin.de', 'title': 'Electronic density-of-states similarity search', 'description': 'This notebook shows how to compute the similarity of materials in terms of their electronic density-of-states (DOS), from data retrieved from the NOMAD Archive.', 'notebook_name': 'dos_similarity_search.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-dos-similarity-search', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/dos_similarity_search.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/dos_similarity_search.ipynb', 'updated': '2022-03-30', 'flags': {'featured': True, 'top_of_list': False, 'paper': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Binaries', 'Ternaries'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Similarity search', 'Fingerprint'], 'platform': ['jupyter']}}, {'authors': ['Foppa, Lucas', 'Purcell, Thomas A. R.', 'Levchenko, Sergey V.', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'foppa@fhi-berlin.mpg.de', 'title': 'Hierarchical symbolic regression for identifying key physical parameters correlated with materials properties', 'description': 'In this notebook, we describe a hierarchical symbolic-regression approach for finding, based on data, analytical expressions relating materials properties to simpler physicochemical parameters associated with the underlying processes governing the properties.', 'notebook_name': 'hierarchical_sisso.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-hierarchical-sisso', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/hierarchical_sisso.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/hierarchical_sisso.ipynb', 'link_paper': 'https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.129.055301', 'link_doi_paper': 'https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.129.055301', 'updated': '2022-8-3', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Bulk properties', 'Perovskites'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Compressed sensing', 'Symbolic regression', 'SISSO', 'Features selection', 'Atomic features'], 'platform': ['jupyter']}}]\n" + ] + } + ], "source": [ "import json\n", "\n", @@ -28,9 +37,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'authors',\n", + " 'description',\n", + " 'email',\n", + " 'flags',\n", + " 'labels',\n", + " 'link',\n", + " 'link_doi_paper',\n", + " 'link_paper',\n", + " 'link_public',\n", + " 'link_video',\n", + " 'notebook_name',\n", + " 'title',\n", + " 'updated',\n", + " 'url'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -40,9 +73,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "('flags', {'featured', 'paper', 'top_of_list'})" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -52,9 +96,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'ai_methods',\n", + " 'application_keyword',\n", + " 'application_section',\n", + " 'application_system',\n", + " 'category',\n", + " 'language',\n", + " 'platform'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -64,9 +125,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "set()" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -76,9 +148,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Analysing the content of the Archive',\n", + " 'Materials property prediction',\n", + " 'Timely artificial-intelligence applications to Materials Science',\n", + " 'Timely artificial-intelligence applications to Materials science',\n", + " 'Timely artificial-intelligence applications to materials science',\n", + " 'Tutorials for artificial-intelligence methods'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -88,9 +176,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Atoms',\n", + " 'Binaries',\n", + " 'Bulk properties',\n", + " 'CO2 activation',\n", + " 'Elemental solids',\n", + " 'GDB molecular database',\n", + " 'GDB7',\n", + " 'Grain boundaries',\n", + " 'Heterogeneous catalysis',\n", + " 'Images',\n", + " 'Inorganic compounds',\n", + " 'Insulators',\n", + " 'Iron',\n", + " 'Low-dimensional materials',\n", + " 'Metals',\n", + " 'OQMD database',\n", + " 'Octet binaries',\n", + " 'Oxygen evolution reaction',\n", + " 'Oxygen reduction reaction',\n", + " 'Perovskites',\n", + " 'Rock salt',\n", + " 'Scaling relations',\n", + " 'Semicondictor oxides',\n", + " 'Silicon',\n", + " 'Surface',\n", + " 'Synthetic data',\n", + " 'Ternaries',\n", + " 'Tetradymites',\n", + " 'Topological insulators',\n", + " 'Transparent conducting oxides',\n", + " 'UCI regression dataset',\n", + " 'Zinc blende'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -100,9 +230,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'advanced_tutorial',\n", + " 'beginner_tutorial',\n", + " 'intermediate_tutorial',\n", + " 'query_tutorial'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -110,6 +254,20 @@ "keys" ] }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "map_categories = {\n", + " 'advanced_tutorial': 'Advanced tutorial',\n", + " 'beginner_tutorial': 'Beginner tutorial',\n", + " 'intermediate_tutorial': 'Intermediate tutorial',\n", + " 'query_tutorial':'Query tutorial'\n", + "}" + ] + }, { "cell_type": "code", "execution_count": null, @@ -176,7 +334,14 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -187,9 +352,9 @@ " 'description': tutorial['description'],\n", " 'date': tutorial['updated'],\n", "\n", - " 'category': tutorial['labels']['category'][0],\n", + " 'category': map_categories[tutorial['labels']['category'][0]],\n", " 'methods': [ {'name': v } for v in tutorial['labels']['ai_methods'] ],\n", - " 'systems': [ {'name': v } for v in tutorial['labels']['application_system'] ],\n", + " 'application': [ {'name': v } for v in tutorial['labels']['application_system'] ],\n", " 'platform': 'Python'\n", " }\n", "\n", @@ -301,7 +466,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.11.10" }, "orig_nbformat": 4 }, diff --git a/pyproject.toml b/pyproject.toml index b4d9874..7ffb12d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,9 +13,9 @@ classifiers = [ "Programming Language :: Python :: 3.12", "License :: OSI Approved :: Apache Software License", ] -name = "nomad-aitoolkit" +name = "nomad-schema-plugin-ai-toolkit" description = "Schema and app for AI Toolkit notebooks." -version = "0.1.1" +version = "0.2.0" readme = "README.rst" requires-python = ">=3.9" authors = [ @@ -28,7 +28,7 @@ license = { file = "LICENSE" } # dependencies = ["nomad-lab>=1.2.2dev578"] [project.urls] -Repository = "https://github.com/FAIRmat-NFDI/nomad-aitoolkit" +Repository = "https://github.com/FAIRmat-NFDI/nomad-schema-plugin-ai-toolkit" [project.optional-dependencies] dev = ["ruff", "pytest", "structlog"] diff --git a/src/nomad_aitoolkit/apps/__init__.py b/src/nomad_aitoolkit/apps/__init__.py index cd69987..feb6efd 100644 --- a/src/nomad_aitoolkit/apps/__init__.py +++ b/src/nomad_aitoolkit/apps/__init__.py @@ -134,8 +134,8 @@ ), WidgetTerms( type='terms', - quantity='data.systems.name#nomad_aitoolkit.schema.AIToolkitNotebook', - title='Systems', + quantity='data.applications.name#nomad_aitoolkit.schema.AIToolkitNotebook', + title='Applications', scale=ScaleEnum.POW1, layout={ BreakpointEnum.XXL: Layout(h=6, w=6, x=12, y=0), @@ -155,7 +155,12 @@ type='url', path="data.references[?kind=='hub'].uri", description='Launch Jupyter notebook', - ) + ), + 'repository': RowActionURL( + type='downloadurl', + path="data.references[?kind=='repository'].uri", + description='Link to the repository', + ), }, ), details=RowDetails(enabled=True), diff --git a/src/nomad_aitoolkit/schema/__init__.py b/src/nomad_aitoolkit/schema/__init__.py index 44b6d7e..bcc842b 100644 --- a/src/nomad_aitoolkit/schema/__init__.py +++ b/src/nomad_aitoolkit/schema/__init__.py @@ -44,13 +44,13 @@ class Method(ArchiveSection): ) -class System(ArchiveSection): +class Application(ArchiveSection): m_def = Section(a_eln=ELNAnnotation(overview=True)) name = Quantity( type=str, a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), - description='Specifying name of the system.', + description='Specifying name of the application.', ) @@ -154,11 +154,10 @@ class AIToolkitNotebook(Schema): component=ELNComponentEnum.EnumEditQuantity, props=dict( suggestions=[ - 'advanced tutorial', - 'beginner tutorial', - 'intermediate tutorial', - 'query tutorial', - 'thermal transport', + 'Advanced tutorial', + 'Beginner tutorial', + 'Intermediate tutorial', + 'Query tutorial', ] ), ), @@ -175,7 +174,7 @@ class AIToolkitNotebook(Schema): methods = SubSection(section=Method, repeats=True) - systems = SubSection(section=System, repeats=True) + applications = SubSection(section=Application, repeats=True) references = SubSection(section=Reference, repeats=True)