From 6da4cc57c24004b95fda8c660a215212bf84b0b7 Mon Sep 17 00:00:00 2001 From: Adam Fekete Date: Mon, 29 Jul 2024 02:53:31 +0200 Subject: [PATCH 1/4] fix entrypoint name of the schema package --- notebooks/tutorial_stats.ipynb | 342 ++---------------- pyproject.toml | 5 +- src/nomad_aitoolkit/schema/__init__.py | 4 +- .../schema/{aitoolkit.py => package.py} | 2 +- 4 files changed, 39 insertions(+), 314 deletions(-) rename src/nomad_aitoolkit/schema/{aitoolkit.py => package.py} (99%) diff --git a/notebooks/tutorial_stats.ipynb b/notebooks/tutorial_stats.ipynb index 71a43ff..d771623 100644 --- a/notebooks/tutorial_stats.ipynb +++ b/notebooks/tutorial_stats.ipynb @@ -14,17 +14,9 @@ }, { "cell_type": "code", - "execution_count": 118, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'authors': ['Ahmetcik, Emre', 'Ziletti, Angelo', 'Ouyang, Runhai', 'Sbailò, Luigi', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Symbolic regression via compressed sensing: a tutorial', 'description': 'In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.', 'notebook_name': 'compressed_sensing.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-compressed-sensing', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/compressed_sensing.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/compressed_sensing.ipynb', 'link_video': 'https://www.youtube.com/watch?v=73mLp6C2opY', 'link_paper': 'https://th.fhi-berlin.mpg.de/site/uploads/Publications/NJP-19-023017-2017.pdf', 'link_doi_paper': 'https://doi.org/10.1088/1367-2630/aa57bf', 'updated': '2020-09-20', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_keyword': [], 'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Octet binaries'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Compressed sensing', 'Symbolic regression', 'LASSO', 'SISSO', 'Kernel ridge regression', 'Features selection', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Liu, Xiangyue', 'Sutton, Christopher', 'Yamamoto, Takenori', 'Blumenthal, Lars', 'Golebiowski, Jacek', 'Ziletti, Angelo', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': '2018 NOMAD-Kaggle research competition', 'description': 'In this tutorial, we will explore the best results of the NOMAD 2018 Kaggle research competition. The goal of this competition was to develop machine-learning models for the prediction of two target properties: the formation energy and the bandgap energy of transparent semiconducting oxides. The purpose of the modelling is to facilitate the discovery of new such materials and allow for advancements in (opto)electronic technologies', 'notebook_name': 'kaggle_competition.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-kaggle-competition', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/kaggle_competition.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/kaggle_competition.ipynb', 'link_paper': 'https://th.fhi.mpg.de/site/uploads/Publications/s41524-019-0239-3.pdf', 'link_doi_paper': 'https://www.nature.com/articles/s41524-019-0239-3', 'updated': '2021-01-19', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Transparent conducting oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Kernel ridge regression', 'Neural networks', 'SOAP', 'n-gram'], 'platform': ['jupyter']}}, {'authors': ['Ziletti, Angelo', 'Leitherer, Andreas', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Introduction to convolutional neural networks', 'description': 'In this tutorial, we briefly introduce the main ideas behind convolutional neural networks, build a neural network model with Keras, and explain the classification decision process using attentive response maps.', 'notebook_name': 'convolutional_nn.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-convolutional-nn', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/convolutional_nn.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/convolutional_nn.ipynb', 'link_video': 'https://youtu.be/MST8X1yCWK8', 'updated': '2021-01-29', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Images'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Supervised learning', 'Classification', 'Neural networks', 'Convolutional neural networks', 'Attentive response map'], 'platform': ['jupyter']}}, {'authors': ['Fekete, Ádám', 'Stella, Martina', 'Lambert, Henry', 'De Vita, Alessandro', 'Csányi, Gábor'], 'email': 'adam.fekete@kcl.ac.uk', 'title': 'The SOAP descriptor, Gaussian Approximation Potentials (GAP) and machine learning of force fields', 'description': 'In this tutorial, we will be using a Gaussian Approximation Potentials to analyse results of TB DFT calculations on the Si surface. Along the way we will learn about different descriptors (2b, 3b, SOAP) to describe local atomic environment in order to predict energies and forces of the Si surface.', 'notebook_name': 'gap_si_surface.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-gap-si-surface', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/gap_si_surface.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/gap_si_surface.ipynb', 'updated': '2020-06-18', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Silicon', 'Surface'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Gaussian-process regression', 'Kernel ridge regression', 'SOAP', 'Gaussian approximation potentials (GAP)'], 'platform': ['jupyter']}}, {'authors': ['Csányi, Gábor', 'Kermode, James R.'], 'email': 'gc121@cam.ac.uk', 'title': 'Machine learning atomic charges', 'description': 'In this tutorial, we will use Gaussian process regression, GPR (or equivalently, Kernel Ridge Regression, KRR) to train and predict charges on atoms in small organic molecules.', 'notebook_name': 'soap_atomic_charges.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-soap-atomic-charges', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/soap_atomic_charges.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/soap_atomic_charges.ipynb', 'updated': '2019-09-26', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['GDB molecular database', 'GDB7'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Gaussian-process regression', 'Kernel ridge regression', 'SOAP'], 'platform': ['jupyter']}}, {'authors': ['Fekete, Ádám', 'Stella, Martina', 'Lambert, Henry', 'De Vita, Alessandro', 'Csányi, Gábor'], 'email': 'adam.fekete@kcl.ac.uk', 'title': 'Structure similarity and structure-property relationship: grain boundaries of alpha-Fe', 'description': 'In this tutorial, we will be using a machine-learning method (clustering) to analyse results of grain-boundary (GB) calculations of alpha-iron. Along the way, we will learn about different methods to describe local atomic environment in order to calculate properties of GBs. We will use these properties to separate the different regions of the GB using clustering methods. Finally we will determine how the energy of the GB is changing according to the angle difference of the regions.', 'notebook_name': 'grain_boundaries.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-grain-boundaries', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/grain_boundaries.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/grain_boundaries.ipynb', 'link_paper': 'https://www.sciencedirect.com/science/article/pii/S0010465518301450?via%3Dihub', 'link_doi_paper': 'https://www.sciencedirect.com/science/article/pii/S0010465518301450/pdfft?md5=f21651f69edad3505ed3dd3ba38aee18&pid=1-s2.0-S0010465518301450-main.pdf', 'updated': '2020-01-18', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Iron', 'Grain boundaries'], 'category': ['advanced_tutorial'], 'ai_methods': ['Unsupervised learning', 'Supervised learning', 'Clustering', 'Regression', 'k-means', 'Gaussian mixture'], 'platform': ['jupyter']}}, {'authors': ['Regler, Benjamin', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'regler@fhi-berlin.mpg.de', 'title': 'Introduction to total cumulative mutual information', 'description': 'This interactive notebook introduces the concepts and original implementation of total cumulative mutual information (TCMI), as presented in the related publication. The main results of the publication are also reproduced in a hands-on style', 'notebook_name': 'tcmi.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-tcmi', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/tcmi.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/tcmi.ipynb', 'link_paper': 'https://arxiv.org/pdf/2001.11212', 'link_doi_paper': 'https://arxiv.org/abs/2001.11212', 'updated': '2020-02-06', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Synthetic data', 'UCI regression dataset', 'Octet binaries'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Unsupervised learning', 'Features selection', 'Information theory', 'Mutual information', 'Cumulative entropy', 'Clustering', 'TCMI'], 'language': ['python'], 'platform': ['jupyter']}}, {'authors': ['Arif, Mohammad-Yasin', 'Sbailò, Luigi', 'Purcell, Thomas A. R.', 'Ghiringhelli, Luca M.', 'Scheffler, Matthias'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Predicting energy differences between crystal structures: (Meta-)stability of octet-binary compounds', 'description': 'A tool for predicting the difference in the total energy between different polymorphs for 82 octet binary compounds, which gives an indication of the stability of the material. This is accomplished by identifying a set of descriptive parameters (a descriptor) from the free-atom data for the binary atomic species comprising the material using the Sure Independent Screening (SIS) + l0-norm minimization approach.', 'notebook_name': 'descriptor_role.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-descriptor-role', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/descriptor_role.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/descriptor_role.ipynb', 'link_paper': 'https://th.fhi.mpg.de/site/uploads/Publications/PRL-114-105503-2015.pdf', 'link_doi_paper': 'http://dx.doi.org/10.1103/PhysRevLett.114.105503', 'updated': '2021-10-18', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Octet binaries', 'Rock salt', 'Zinc blende'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Features selection', 'SISSO', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Bieniek, Björn', 'Strange, Mikkel', 'Carbogno, Christian', 'Arif, Mohammad-Yasin', 'Sbailò, Luigi', 'Scheffler, Matthias'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Error estimates from high-accuracy electronic-structure reference calculations', 'description': 'A set of tools to analyze the error in electronic structure calculations due to the choice of numerical settings. We use the NOMAD infrastructure to systematically investigate the deviances in total and relative energies as function of typical settings for basis sets, k-grids, etc. for 71 elemental and 81 binary solids in three different electronic-structure codes.', 'notebook_name': 'error_estimates.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-error-estimates', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/error_estimates.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/error_estimates.ipynb', 'link_paper': 'https://th.fhi-berlin.mpg.de/site/uploads/Publications/2008.10402.pdf', 'link_doi_paper': 'https://arxiv.org/abs/2008.10402', 'updated': '2021-01-21', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'category': ['advanced_tutorial'], 'application_system': ['Binaries', 'Elemental solids'], 'ai_methods': ['Supervised learning', 'Regression', 'Linear least-squares regression'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Querying the NOMAD Archive and performing artificial-intelligence modeling', 'description': 'In this tutorial, we demonstrate how to query the NOMAD Archive from the NOMAD Analytics toolkit. We then show examples of machine learning analysis performed on the retrieved data set.', 'notebook_name': 'query_nomad_archive.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-query-nomad-archive', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/query_nomad_archive.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/query_nomad_archive.ipynb', 'updated': '2022-04-06', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Analysing the content of the Archive'], 'application_system': ['Ternaries'], 'category': ['query_tutorial'], 'ai_methods': ['Unsupervised learning', 'Supervised learning', 'Regression', 'Clustering', 'Dimension reduction', 'Random forest'], 'platform': ['jupyter']}}, {'authors': ['Langer, Marcel F.'], 'email': 'langer@fhi-berlin.mpg.de', 'title': 'cmlkit: Toolkit for Machine Learning in Materials Science and Quantum Chemistry', 'description': 'In this tutorial, we will get to know cmlkit, a python package for specifying, evaluating, and optimising machine learning models, and use it to compete in the Nomad 2018 Kaggle challenge.', 'notebook_name': 'cmlkit.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-cmlkit', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/cmlkit.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/cmlkit.ipynb', 'link_paper': 'https://arxiv.org/pdf/2003.12081.pdf', 'link_doi_paper': 'https://arxiv.org/abs/2003.12081', 'updated': '2021-01-14', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Transparent conducting oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Kernel ridge regression', 'SOAP', 'MBTR', 'Symmetry functions'], 'platform': ['jupyter']}}, {'authors': ['Speckhard, Daniel', 'Leitherer, Andreas', 'Ghiringhelli, Luca M.'], 'email': 'speckhard@fhi-berlin.mpg.de', 'title': 'Introduction to decision-trees methods', 'description': 'In this tutorial we will introduce decision trees. We go through a toy model introducing the SKLearn API. We then discuss step by step the different theoretical aspects of trees. We then move to training a regression tree and classification tree on different datasets related to materials science. We end the tutorial by covering random forests and bagging classfiers.', 'notebook_name': 'decision_tree.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-decision-tree', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/decision_tree.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/decision_tree.ipynb', 'link_video': 'https://www.youtube.com/watch?v=YBy9STVaqvU', 'updated': '2020-12-08', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Images', 'Metals', 'Insulators'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Classification', 'Decision tree', 'Random forest', 'Bagging classifier', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'sbailo@fhi-berlin.mpg.de', 'title': 'Introduction to clustering', 'description': 'In this tutorial, we introduce to the most popular clustering algorithms. We focus on partitioning, hierarchical and density-based clustering algorithms. The methods are tested on synthetic datasets of increasing complexity', 'notebook_name': 'clustering_tutorial.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-clustering-tutorial', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/clustering_tutorial.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/clustering_tutorial.ipynb', 'updated': '2021-01-21', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Synthetic data'], 'category': ['beginner_tutorial'], 'ai_methods': ['Unsupervised learning', 'Clustering', 'k-means', 'Hierarchical clustering', 'DBSCAN', 'HDBSCAN'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'sbailo@fhi-berlin.mpg.de', 'title': 'Introduction to exploratory analysis (unsupervised learning) of materials spaces', 'description': 'Exploratory analyses make use of unsupervised learning techniques to extract information from unknown datasets. In this tutorial, we make use of some of the most popular clustering and dimension reduction algorithms to analyze a dataset composed of 82 octet-binary compounds.', 'notebook_name': 'exploratory_analysis.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-exploratory-analysis', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/exploratory_analysis.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/exploratory_analysis.ipynb', 'link_video': 'https://www.youtube.com/watch?v=EJTjF9ehp7k', 'updated': '2021-02-04', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Octet binaries'], 'category': ['beginner_tutorial'], 'ai_methods': ['Clustering', 'Dimension reduction', 'k-means', 'Hierarchical clustering', 'DBSCAN', 'HDBSCAN', 'DenPeak', 'PCA', 't-SNE', 'MDS'], 'platform': ['jupyter']}}, {'authors': ['Arif, Mohammad-Yasin', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Identifying domains of applicability of machine-Learning models for materials science', 'description': 'In this tutorial, we present a method, based on subgroup discovery, for detecting domains of applicability (DA) of ML models within a materials class. The domain of applicability of an ML model is the region of input space where the model predicts the target property with the smallest uncertainty. The utility of this approach is demonstrated by analyzing three state-of-the-art ML models for predicting the formation energy of transparent conducting oxides.', 'notebook_name': 'domain_of_applicability.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-domain-of-applicability', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/domain_of_applicability.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/domain_of_applicability.ipynb', 'link_paper': ' https://th.fhi-berlin.mpg.de/site/uploads/Publications/s41467-020-17112-9.pdf', 'link_doi_paper': 'https://www.nature.com/articles/s41467-020-17112-9', 'updated': '2021-01-27', 'flags': {'featured': True, 'top_of_list': False, 'paper': True}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Transparent conducting oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Subgroup discovery', 'Kernel ridge regression', 'SOAP', 'MBTR', 'n-gram'], 'platform': ['jupyter']}}, {'authors': ['Leitherer, Andreas', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'leitherer@fhi-berlin.mpg.de', 'title': 'Introduction to multilayer perceptrons (deep neural networks)', 'description': 'In this tutorial, we discuss how multilayer perceptrons, a standard neural-network architecture, can be employed for regression tasks. Specifically, we will use the ElemNet neural-network architecture to predict the volume per atom of inorganic compounds, where the Open Quantum Materials Database (OQMD) is used as a resource.', 'notebook_name': 'nn_regression.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-nn-regression', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/nn_regression.ipynb', 'link_public': 'https://nomad-lab.eu/prod/analytics/public/user-redirect/notebooks/tutorials/nn_regression.ipynb', 'link_video': 'https://www.youtube.com/watch?v=U0lI5n8Hleo', 'updated': '2021-01-29', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Materials property prediction'], 'application_system': ['Inorganic compounds', 'OQMD database'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Neural networks', 'Deep neural networks', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Sbailò, Luigi', 'Purcell, Thomas A. R.', 'Ghiringhelli, Luca M.', 'Scheffler, Matthias'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Discovery of new topological insulators in alloyed tetradymites', 'description': \"Learn how to find descriptive parameters (short formulas) that predict whether alloyed materials are topological or trivial insulators, using the example of tetradymites. This notebook is based on the algorithm 'sure independence screening and sparsifying operator' (SISSO) that enables to search for optimal descriptor by scanning huge feature spaces.\", 'notebook_name': 'tetradymite_PRM2020.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-tetradymite-PRM2020', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/tetradymite_PRM2020.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/tetradymite_PRM2020.ipynb', 'link_paper': 'https://th.fhi.mpg.de/site/uploads/Publications/PhysRevMaterials.4.034204.pdf', 'link_doi_paper': 'https://journals.aps.org/prmaterials/abstract/10.1103/PhysRevMaterials.4.034204', 'updated': '2020-09-15', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Tetradymites', 'Topological insulators'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Classification', 'Symbolic regression', 'Features selection', 'Atomic features', 'SISSO'], 'platform': ['jupyter']}}, {'authors': ['Leitherer, Andreas', 'Ziletti, Angelo', 'Ghiringhelli, Luca M.'], 'email': 'leitherer@fhi-berlin.mpg.de', 'title': 'ARISE - Robust recognition and exploratory analysis of crystal structures via Bayesian deep learning', 'description': 'In this tutorial, we give an introduction to ARISE (ARtificial-Intelligence-based Structure Evaluation), a powerful Bayesian-deep-neural-network tool for the recognition of atomistic structures (A. Leitherer, A. Ziletti, and L.M. Ghiringhelli, Nat. Commun. 12, 6234, 2021). ARISE is robust to structural noise and can treat more than 100 crystal structures, a number that can be extended on demand. While being trained on ideal structures only, ARISE correctly characterizes strongly perturbed single- and polycrystalline systems, from both synthetic and experimental resources. The probabilistic nature of the Bayesian-deep-learning model allows to obtain principled uncertainty estimates. By applying unsupervised learning to the internal neural-network representations, one can reveal grain boundaries and (unapparent) structural regions sharing easily interpretable geometrical properties.', 'notebook_name': 'ARISE.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-arise', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/ARISE.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/ARISE.ipynb', 'link_paper': 'https://www.nature.com/articles/s41467-021-26511-5.pdf', 'link_doi_paper': 'https://www.nature.com/articles/s41467-021-26511-5', 'updated': '2021-03-22', 'flags': {'featured': True, 'top_of_list': False, 'paper': True}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials science'], 'application_system': ['Grain boundaries', 'Binaries', 'Ternaries', 'Low-dimensional materials'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Neural networks', 'Bayesian deep learning', 'Unsupervised learning', 'Clustering', 'Dimension reduction', 'HDBSCAN', 'UMAP', 'SOAP'], 'platform': ['jupyter']}}, {'authors': ['Langer, Marcel F.'], 'email': 'langer@fhi-berlin.mpg.de', 'title': 'Introduction to kernel ridge regression for materials-property prediction', 'description': 'In this tutorial, we will explore the application of kernel ridge regression to the prediction of materials properties. We will begin with a largely informal, pragmatic introduction to kernel ridge regression, including a rudimentary implementation, in order to become familiar with the basic terminology and considerations. We will then discuss representations, and re-trace the NOMAD 2018 Kaggle challenge.', 'notebook_name': 'krr4mat.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-krr4mat', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/krr4mat.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/krr4mat.ipynb', 'link_video': 'https://www.youtube.com/watch?v=H_MVlljpYHw', 'updated': '2020-12-15', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Transparent conducting oxides'], 'category': ['beginner_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Kernel ridge regression', 'SOAP'], 'platform': ['jupyter']}}, {'authors': ['Mazheika, Aliaksei', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.', 'Levchenko, Sergey', 'Scheffler, Matthias'], 'email': 'mazheika@fhi-berlin.mpg.de', 'title': 'Subgroup discovery of catalysts’ genes for carbon-dioxide activation on semiconductor oxides', 'description': 'In this interactive tutorial we show the application of subgroup discovery for the search for indicators of carbond-dioxide activation with the aim of its further conversion.', 'notebook_name': 'CO2_SGD.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-co2-sgd-tutorial', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/CO2_SGD.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/CO2_SGD.ipynb', 'link_paper': 'https://arxiv.org/pdf/1912.06515', 'link_doi_paper': 'https://arxiv.org/abs/1912.06515', 'updated': '2021-08-26', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['CO2 activation', 'Heterogeneous catalysis', 'Semicondictor oxides'], 'category': ['advanced_tutorial'], 'ai_methods': ['Subgroup discovery', 'Decision tree'], 'platform': ['jupyter']}}, {'authors': ['Foppa, Lucas', 'Ghiringhelli, Luca M.'], 'email': 'foppa@fhi-berlin.mpg.de', 'title': 'Introduction to subgroup discovery: Identifying outstanding transition-metal-alloy catalysts', 'description': 'This tutorial introduces, by means of two applications in materials science, the artificial-intelligence technique subgroup discovery.', 'notebook_name': 'sgd_alloys_oxygen_reduction_evolution.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-sgd-alloys-oxygen-reduction-evolution', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/sgd_alloys_oxygen_reduction_evolution.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/sgd_alloys_oxygen_reduction_evolution.ipynb', 'link_paper': 'https://link.springer.com/content/pdf/10.1007/s11244-021-01502-4.pdf', 'link_doi_paper': 'https://doi.org/10.1007/s11244-021-01502-4', 'updated': '2021-10-28', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Heterogeneous catalysis', 'Oxygen evolution reaction', 'Oxygen reduction reaction', 'Scaling relations'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Subgroup discovery', 'Decision tree'], 'platform': ['jupyter']}}, {'authors': ['Naik ,Aakash A.', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Atomic-features-package usage demonstration', 'description': 'In this tutorial, we show how the atomic-features-package can be accessed and used to explore the atomic features form various sources and to prepare the input features for machine-learning studies.', 'notebook_name': 'atomic_features.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-atomic-features', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/atomic_features.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/atomic_features.ipynb', 'updated': '2021-12-07', 'labels': {'application_system': ['Atoms'], 'category': ['query_tutorial'], 'platform': ['jupyter'], 'ai_methods': ['']}}, {'authors': ['Foppa, Lucas', 'Hassanzada, Qaem', 'Bartel, Christopher', 'Purcell, Thomas', 'Sbailò, Luigi', 'Ghiringhelli, Luca M.'], 'email': 'ghiringhelli@fhi-berlin.mpg.de', 'title': 'Finding a tolerance factor to predict perovskite stability with SISSO', 'description': 'This tutorial shows how a tolerance factor for predicting perovskite stability can be learned from data with the sure-independece-screening-and-sparsifying-operator (SISSO) descriptor-identification approach.', 'notebook_name': 'perovskites_tolerance_factor.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-perovskite-tolerance-factor', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/perovskites_tolerance_factor.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/perovskites_tolerance_factor.ipynb', 'link_paper': 'https://advances.sciencemag.org/content/advances/5/2/eaav0693.full.pdf', 'link_doi_paper': 'https://doi.org/10.1126/sciadv.aav0693', 'updated': '2022-05-18', 'flags': {'featured': True, 'top_of_list': False, 'paper': True}, 'labels': {'application_section': ['Timely artificial-intelligence applications to materials science'], 'category': ['advanced_tutorial'], 'application_system': ['Perovskites'], 'ai_methods': ['Supervised learning', 'Classification', 'Symbolic regression', 'Compressed sensing', 'SISSO', 'Decision tree', 'Features selection', 'Atomic features'], 'platform': ['jupyter']}}, {'authors': ['Foppa, Lucas', 'Ghiringhelli, Luca M.', 'Scheffler, Matthias'], 'email': 'foppa@fhi-berlin.mpg.de', 'title': 'Learning Design Rules for Catalysts from High-Throughput Experimentation and Theory via Subgroup Discovery', 'description': 'This tutorial explores the application of subgroup discovery (SGD) to an experimental-theoretical data set in order to identify rules on key physicochemical parameters that describe the materials and environmental conditions associated with outstanding performance in heterogeneous catalysis.', 'notebook_name': 'sgd_propylene_oxidation_hte.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-sgd-propylene-oxidation-hte', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/sgd_propylene_oxidation_hte.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/sgd_propylene_oxidation_hte.ipynb', 'link_paper': 'https://pubs.acs.org/doi/10.1021/acscatal.1c04793', 'link_doi_paper': 'https://pubs.acs.org/doi/10.1021/acscatal.1c04793', 'updated': '2022-2-09', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Heterogeneous catalysis'], 'category': ['advanced_tutorial'], 'ai_methods': ['Subgroup discovery'], 'platform': ['jupyter']}}, {'authors': ['Gabaj, Šimon', 'Kuban, Martin', 'Rigamonti, Santiago', 'Draxl, Claudia'], 'email': 'gabajsim@physik.hu-berlin.de', 'title': 'Electronic density-of-states similarity search', 'description': 'This notebook shows how to compute the similarity of materials in terms of their electronic density-of-states (DOS), from data retrieved from the NOMAD Archive.', 'notebook_name': 'dos_similarity_search.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-dos-similarity-search', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/dos_similarity_search.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/dos_similarity_search.ipynb', 'updated': '2022-03-30', 'flags': {'featured': True, 'top_of_list': False, 'paper': False}, 'labels': {'application_section': ['Tutorials for artificial-intelligence methods'], 'application_system': ['Binaries', 'Ternaries'], 'category': ['intermediate_tutorial'], 'ai_methods': ['Similarity search', 'Fingerprint'], 'platform': ['jupyter']}}, {'authors': ['Foppa, Lucas', 'Purcell, Thomas A. R.', 'Levchenko, Sergey V.', 'Scheffler, Matthias', 'Ghiringhelli, Luca M.'], 'email': 'foppa@fhi-berlin.mpg.de', 'title': 'Hierarchical symbolic regression for identifying key physical parameters correlated with materials properties', 'description': 'In this notebook, we describe a hierarchical symbolic-regression approach for finding, based on data, analytical expressions relating materials properties to simpler physicochemical parameters associated with the underlying processes governing the properties.', 'notebook_name': 'hierarchical_sisso.ipynb', 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-hierarchical-sisso', 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/hierarchical_sisso.ipynb', 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/hierarchical_sisso.ipynb', 'link_paper': 'https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.129.055301', 'link_doi_paper': 'https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.129.055301', 'updated': '2022-8-3', 'flags': {'featured': True, 'top_of_list': False}, 'labels': {'application_section': ['Timely artificial-intelligence applications to Materials Science'], 'application_system': ['Bulk properties', 'Perovskites'], 'category': ['advanced_tutorial'], 'ai_methods': ['Supervised learning', 'Regression', 'Compressed sensing', 'Symbolic regression', 'SISSO', 'Features selection', 'Atomic features'], 'platform': ['jupyter']}}]\n" - ] - } - ], + "outputs": [], "source": [ "import json\n", "\n", @@ -36,33 +28,9 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'authors',\n", - " 'description',\n", - " 'email',\n", - " 'flags',\n", - " 'labels',\n", - " 'link',\n", - " 'link_doi_paper',\n", - " 'link_paper',\n", - " 'link_public',\n", - " 'link_video',\n", - " 'notebook_name',\n", - " 'title',\n", - " 'updated',\n", - " 'url'}" - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -72,20 +40,9 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "('flags', {'featured', 'paper', 'top_of_list'})" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -95,26 +52,9 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'ai_methods',\n", - " 'application_keyword',\n", - " 'application_section',\n", - " 'application_system',\n", - " 'category',\n", - " 'language',\n", - " 'platform'}" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -124,20 +64,9 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "set()" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -147,25 +76,9 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Analysing the content of the Archive',\n", - " 'Materials property prediction',\n", - " 'Timely artificial-intelligence applications to Materials Science',\n", - " 'Timely artificial-intelligence applications to Materials science',\n", - " 'Timely artificial-intelligence applications to materials science',\n", - " 'Tutorials for artificial-intelligence methods'}" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -175,51 +88,9 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Atoms',\n", - " 'Binaries',\n", - " 'Bulk properties',\n", - " 'CO2 activation',\n", - " 'Elemental solids',\n", - " 'GDB molecular database',\n", - " 'GDB7',\n", - " 'Grain boundaries',\n", - " 'Heterogeneous catalysis',\n", - " 'Images',\n", - " 'Inorganic compounds',\n", - " 'Insulators',\n", - " 'Iron',\n", - " 'Low-dimensional materials',\n", - " 'Metals',\n", - " 'OQMD database',\n", - " 'Octet binaries',\n", - " 'Oxygen evolution reaction',\n", - " 'Oxygen reduction reaction',\n", - " 'Perovskites',\n", - " 'Rock salt',\n", - " 'Scaling relations',\n", - " 'Semicondictor oxides',\n", - " 'Silicon',\n", - " 'Surface',\n", - " 'Synthetic data',\n", - " 'Ternaries',\n", - " 'Tetradymites',\n", - " 'Topological insulators',\n", - " 'Transparent conducting oxides',\n", - " 'UCI regression dataset',\n", - " 'Zinc blende'}" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -229,23 +100,9 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'advanced_tutorial',\n", - " 'beginner_tutorial',\n", - " 'intermediate_tutorial',\n", - " 'query_tutorial'}" - ] - }, - "execution_count": 125, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -255,65 +112,9 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'',\n", - " 'Atomic features',\n", - " 'Attentive response map',\n", - " 'Bagging classifier',\n", - " 'Bayesian deep learning',\n", - " 'Classification',\n", - " 'Clustering',\n", - " 'Compressed sensing',\n", - " 'Convolutional neural networks',\n", - " 'Cumulative entropy',\n", - " 'DBSCAN',\n", - " 'Decision tree',\n", - " 'Deep neural networks',\n", - " 'DenPeak',\n", - " 'Dimension reduction',\n", - " 'Features selection',\n", - " 'Fingerprint',\n", - " 'Gaussian approximation potentials (GAP)',\n", - " 'Gaussian mixture',\n", - " 'Gaussian-process regression',\n", - " 'HDBSCAN',\n", - " 'Hierarchical clustering',\n", - " 'Information theory',\n", - " 'Kernel ridge regression',\n", - " 'LASSO',\n", - " 'Linear least-squares regression',\n", - " 'MBTR',\n", - " 'MDS',\n", - " 'Mutual information',\n", - " 'Neural networks',\n", - " 'PCA',\n", - " 'Random forest',\n", - " 'Regression',\n", - " 'SISSO',\n", - " 'SOAP',\n", - " 'Similarity search',\n", - " 'Subgroup discovery',\n", - " 'Supervised learning',\n", - " 'Symbolic regression',\n", - " 'Symmetry functions',\n", - " 'TCMI',\n", - " 'UMAP',\n", - " 'Unsupervised learning',\n", - " 'k-means',\n", - " 'n-gram',\n", - " 't-SNE'}" - ] - }, - "execution_count": 126, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -323,20 +124,9 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'python'}" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -346,20 +136,9 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'jupyter'}" - ] - }, - "execution_count": 128, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "keys=set()\n", "for tutorial in tutorials:\n", @@ -369,51 +148,9 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'authors': ['Ahmetcik, Emre',\n", - " 'Ziletti, Angelo',\n", - " 'Ouyang, Runhai',\n", - " 'Sbailò, Luigi',\n", - " 'Scheffler, Matthias',\n", - " 'Ghiringhelli, Luca M.'],\n", - " 'email': 'ghiringhelli@fhi-berlin.mpg.de',\n", - " 'title': 'Symbolic regression via compressed sensing: a tutorial',\n", - " 'description': 'In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.',\n", - " 'notebook_name': 'compressed_sensing.ipynb',\n", - " 'url': 'https://gitlab.mpcdf.mpg.de/nomad-lab/analytics-compressed-sensing',\n", - " 'link': 'https://analytics-toolkit.nomad-coe.eu/hub/user-redirect/notebooks/tutorials/compressed_sensing.ipynb',\n", - " 'link_public': 'https://analytics-toolkit.nomad-coe.eu/public/user-redirect/notebooks/tutorials/compressed_sensing.ipynb',\n", - " 'link_video': 'https://www.youtube.com/watch?v=73mLp6C2opY',\n", - " 'link_paper': 'https://th.fhi-berlin.mpg.de/site/uploads/Publications/NJP-19-023017-2017.pdf',\n", - " 'link_doi_paper': 'https://doi.org/10.1088/1367-2630/aa57bf',\n", - " 'updated': '2020-09-20',\n", - " 'flags': {'featured': True, 'top_of_list': False},\n", - " 'labels': {'application_keyword': [],\n", - " 'application_section': ['Tutorials for artificial-intelligence methods'],\n", - " 'application_system': ['Octet binaries'],\n", - " 'category': ['beginner_tutorial'],\n", - " 'ai_methods': ['Supervised learning',\n", - " 'Regression',\n", - " 'Compressed sensing',\n", - " 'Symbolic regression',\n", - " 'LASSO',\n", - " 'SISSO',\n", - " 'Kernel ridge regression',\n", - " 'Features selection',\n", - " 'Atomic features'],\n", - " 'platform': ['jupyter']}}" - ] - }, - "execution_count": 129, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "tutorial = tutorials[0]\n", "tutorial\n" @@ -421,27 +158,9 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'last_name': 'Ahmetcik', 'first_name': 'Emre'},\n", - " {'last_name': 'Ziletti', 'first_name': 'Angelo'},\n", - " {'last_name': 'Ouyang', 'first_name': 'Runhai'},\n", - " {'last_name': 'Sbailò', 'first_name': 'Luigi'},\n", - " {'last_name': 'Scheffler', 'first_name': 'Matthias'},\n", - " {'last_name': 'Ghiringhelli',\n", - " 'first_name': 'Luca M.',\n", - " 'email': 'ghiringhelli@fhi-berlin.mpg.de'}]" - ] - }, - "execution_count": 130, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "authors=[]\n", "for author in tutorial.get('authors'):\n", @@ -457,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -551,6 +270,13 @@ " json.dump(new_tutorial, outfile, indent=2)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/pyproject.toml b/pyproject.toml index 527cb0b..c39bb40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ ] name = "nomad-aitoolkit" description = "Schema and app for AI Toolkit notebooks." -version = "0.1.0" +version = "0.1.1" readme = "README.rst" requires-python = ">=3.9" authors = [ @@ -109,7 +109,6 @@ package-dir = { "" = "src" } where = ["src"] [project.entry-points.'nomad.plugin'] - -aitookitschema = "nomad_aitoolkit.schema:package" +aitookitschema = "nomad_aitoolkit.schema:aitoolkit" aitookitapp = "nomad_aitoolkit.apps:aitoolkit" diff --git a/src/nomad_aitoolkit/schema/__init__.py b/src/nomad_aitoolkit/schema/__init__.py index 6d20c5b..b51b104 100644 --- a/src/nomad_aitoolkit/schema/__init__.py +++ b/src/nomad_aitoolkit/schema/__init__.py @@ -3,12 +3,12 @@ class AIToolkitSchemaPackageEntryPoint(SchemaPackageEntryPoint): def load(self): - from nomad_aitoolkit.schema.aitoolkit import m_package + from nomad_aitoolkit.schema.package import m_package return m_package -package = AIToolkitSchemaPackageEntryPoint( +aitoolkit = AIToolkitSchemaPackageEntryPoint( name='AIToolkit', description='Describes the basic schemas for AI Toolkit notebooks.', ) diff --git a/src/nomad_aitoolkit/schema/aitoolkit.py b/src/nomad_aitoolkit/schema/package.py similarity index 99% rename from src/nomad_aitoolkit/schema/aitoolkit.py rename to src/nomad_aitoolkit/schema/package.py index 31455eb..61a9a4f 100644 --- a/src/nomad_aitoolkit/schema/aitoolkit.py +++ b/src/nomad_aitoolkit/schema/package.py @@ -24,7 +24,7 @@ SubSection, ) -configuration = config.get_plugin_entry_point('nomad_aitoolkit.schema:package') +configuration = config.get_plugin_entry_point('nomad_aitoolkit.schema:aitoolkit') def remove_tags(text): From f4381bae0a423dec7149873d7bcdc0a61789986d Mon Sep 17 00:00:00 2001 From: Adam Fekete Date: Mon, 29 Jul 2024 04:19:21 +0200 Subject: [PATCH 2/4] refactor schema --- notebooks/arise.archive.json | 2 +- notebooks/atomic-features.archive.json | 2 +- notebooks/clustering-tutorial.archive.json | 2 +- notebooks/cmlkit.archive.json | 2 +- notebooks/co2-sgd-tutorial.archive.json | 2 +- notebooks/compressed-sensing.archive.json | 2 +- notebooks/convolutional-nn.archive.json | 2 +- notebooks/decision-tree.archive.json | 2 +- notebooks/descriptor-role.archive.json | 2 +- .../domain-of-applicability.archive.json | 2 +- notebooks/dos-similarity-search.archive.json | 2 +- notebooks/error-estimates.archive.json | 2 +- notebooks/exploratory-analysis.archive.json | 2 +- notebooks/gap-si-surface.archive.json | 2 +- notebooks/grain-boundaries.archive.json | 2 +- notebooks/hierarchical-sisso.archive.json | 2 +- notebooks/kaggle-competition.archive.json | 2 +- notebooks/krr4mat.archive.json | 2 +- notebooks/nn-regression.archive.json | 2 +- .../perovskite-tolerance-factor.archive.json | 2 +- notebooks/query-nomad-archive.archive.json | 2 +- ...ys-oxygen-reduction-evolution.archive.json | 2 +- .../sgd-propylene-oxidation-hte.archive.json | 2 +- notebooks/soap-atomic-charges.archive.json | 2 +- notebooks/tcmi.archive.json | 2 +- notebooks/tetradymite-PRM2020.archive.json | 2 +- notebooks/tutorial_stats.ipynb | 4 +- pyproject.toml | 2 +- src/nomad_aitoolkit/__init__.py | 14 ++ src/nomad_aitoolkit/apps/__init__.py | 36 +-- src/nomad_aitoolkit/schema/__init__.py | 210 +++++++++++++++++- src/nomad_aitoolkit/schema/package.py | 206 ----------------- 32 files changed, 262 insertions(+), 262 deletions(-) delete mode 100644 src/nomad_aitoolkit/schema/package.py diff --git a/notebooks/arise.archive.json b/notebooks/arise.archive.json index f738a7c..756b4cc 100644 --- a/notebooks/arise.archive.json +++ b/notebooks/arise.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "ARISE - Robust recognition and exploratory analysis of crystal structures via Bayesian deep learning", "description": "In this tutorial, we give an introduction to ARISE (ARtificial-Intelligence-based Structure Evaluation), a powerful Bayesian-deep-neural-network tool for the recognition of atomistic structures (A. Leitherer, A. Ziletti, and L.M. Ghiringhelli, Nat. Commun. 12, 6234, 2021). ARISE is robust to structural noise and can treat more than 100 crystal structures, a number that can be extended on demand. While being trained on ideal structures only, ARISE correctly characterizes strongly perturbed single- and polycrystalline systems, from both synthetic and experimental resources. The probabilistic nature of the Bayesian-deep-learning model allows to obtain principled uncertainty estimates. By applying unsupervised learning to the internal neural-network representations, one can reveal grain boundaries and (unapparent) structural regions sharing easily interpretable geometrical properties.", "date": "2021-03-22", diff --git a/notebooks/atomic-features.archive.json b/notebooks/atomic-features.archive.json index ddc3200..232c662 100644 --- a/notebooks/atomic-features.archive.json +++ b/notebooks/atomic-features.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Atomic-features-package usage demonstration", "description": "In this tutorial, we show how the atomic-features-package can be accessed and used to explore the atomic features form various sources and to prepare the input features for machine-learning studies.", "date": "2021-12-07", diff --git a/notebooks/clustering-tutorial.archive.json b/notebooks/clustering-tutorial.archive.json index d59b10f..5ea359e 100644 --- a/notebooks/clustering-tutorial.archive.json +++ b/notebooks/clustering-tutorial.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to clustering", "description": "In this tutorial, we introduce to the most popular clustering algorithms. We focus on partitioning, hierarchical and density-based clustering algorithms. The methods are tested on synthetic datasets of increasing complexity", "date": "2021-01-21", diff --git a/notebooks/cmlkit.archive.json b/notebooks/cmlkit.archive.json index 11e5f14..4ae2ff6 100644 --- a/notebooks/cmlkit.archive.json +++ b/notebooks/cmlkit.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "cmlkit: Toolkit for Machine Learning in Materials Science and Quantum Chemistry", "description": "In this tutorial, we will get to know cmlkit, a python package for specifying, evaluating, and optimising machine learning models, and use it to compete in the Nomad 2018 Kaggle challenge.", "date": "2021-01-14", diff --git a/notebooks/co2-sgd-tutorial.archive.json b/notebooks/co2-sgd-tutorial.archive.json index a887551..86ad11a 100644 --- a/notebooks/co2-sgd-tutorial.archive.json +++ b/notebooks/co2-sgd-tutorial.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Subgroup discovery of catalysts\u2019 genes for carbon-dioxide activation on semiconductor oxides", "description": "In this interactive tutorial we show the application of subgroup discovery for the search for indicators of carbond-dioxide activation with the aim of its further conversion.", "date": "2021-08-26", diff --git a/notebooks/compressed-sensing.archive.json b/notebooks/compressed-sensing.archive.json index abeabab..b80ccd3 100644 --- a/notebooks/compressed-sensing.archive.json +++ b/notebooks/compressed-sensing.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Symbolic regression via compressed sensing: a tutorial", "description": "In this tutorial we will show how to find descriptive parameters to predict materials properties using symbolic regrression combined with compressed sensing tools. The relative stability of the zincblende (ZB) versus rocksalt (RS) structure of binary materials is predicted and compared against a model trained with kernel ridge regression.", "date": "2020-09-20", diff --git a/notebooks/convolutional-nn.archive.json b/notebooks/convolutional-nn.archive.json index 7195f94..f0becec 100644 --- a/notebooks/convolutional-nn.archive.json +++ b/notebooks/convolutional-nn.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to convolutional neural networks", "description": "In this tutorial, we briefly introduce the main ideas behind convolutional neural networks, build a neural network model with Keras, and explain the classification decision process using attentive response maps.", "date": "2021-01-29", diff --git a/notebooks/decision-tree.archive.json b/notebooks/decision-tree.archive.json index e265dea..e8187aa 100644 --- a/notebooks/decision-tree.archive.json +++ b/notebooks/decision-tree.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to decision-trees methods", "description": "In this tutorial we will introduce decision trees. We go through a toy model introducing the SKLearn API. We then discuss step by step the different theoretical aspects of trees. We then move to training a regression tree and classification tree on different datasets related to materials science. We end the tutorial by covering random forests and bagging classfiers.", "date": "2020-12-08", diff --git a/notebooks/descriptor-role.archive.json b/notebooks/descriptor-role.archive.json index 315a04a..357116c 100644 --- a/notebooks/descriptor-role.archive.json +++ b/notebooks/descriptor-role.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Predicting energy differences between crystal structures: (Meta-)stability of octet-binary compounds", "description": "A tool for predicting the difference in the total energy between different polymorphs for 82 octet binary compounds, which gives an indication of the stability of the material. This is accomplished by identifying a set of descriptive parameters (a descriptor) from the free-atom data for the binary atomic species comprising the material using the Sure Independent Screening (SIS) + l0-norm minimization approach.", "date": "2021-10-18", diff --git a/notebooks/domain-of-applicability.archive.json b/notebooks/domain-of-applicability.archive.json index aa9ae54..2466c96 100644 --- a/notebooks/domain-of-applicability.archive.json +++ b/notebooks/domain-of-applicability.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Identifying domains of applicability of machine-Learning models for materials science", "description": "In this tutorial, we present a method, based on subgroup discovery, for detecting domains of applicability (DA) of ML models within a materials class. The domain of applicability of an ML model is the region of input space where the model predicts the target property with the smallest uncertainty. The utility of this approach is demonstrated by analyzing three state-of-the-art ML models for predicting the formation energy of transparent conducting oxides.", "date": "2021-01-27", diff --git a/notebooks/dos-similarity-search.archive.json b/notebooks/dos-similarity-search.archive.json index 74fddc8..4e3e476 100644 --- a/notebooks/dos-similarity-search.archive.json +++ b/notebooks/dos-similarity-search.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Electronic density-of-states similarity search", "description": "This notebook shows how to compute the similarity of materials in terms of their electronic density-of-states (DOS), from data retrieved from the NOMAD Archive.", "date": "2022-03-30", diff --git a/notebooks/error-estimates.archive.json b/notebooks/error-estimates.archive.json index 68c0735..d579041 100644 --- a/notebooks/error-estimates.archive.json +++ b/notebooks/error-estimates.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Error estimates from high-accuracy electronic-structure reference calculations", "description": "A set of tools to analyze the error in electronic structure calculations due to the choice of numerical settings. We use the NOMAD infrastructure to systematically investigate the deviances in total and relative energies as function of typical settings for basis sets, k-grids, etc. for 71 elemental and 81 binary solids in three different electronic-structure codes.", "date": "2021-01-21", diff --git a/notebooks/exploratory-analysis.archive.json b/notebooks/exploratory-analysis.archive.json index 0152dc1..e94d873 100644 --- a/notebooks/exploratory-analysis.archive.json +++ b/notebooks/exploratory-analysis.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to exploratory analysis (unsupervised learning) of materials spaces", "description": "Exploratory analyses make use of unsupervised learning techniques to extract information from unknown datasets. In this tutorial, we make use of some of the most popular clustering and dimension reduction algorithms to analyze a dataset composed of 82 octet-binary compounds.", "date": "2021-02-04", diff --git a/notebooks/gap-si-surface.archive.json b/notebooks/gap-si-surface.archive.json index 4934e48..8e69997 100644 --- a/notebooks/gap-si-surface.archive.json +++ b/notebooks/gap-si-surface.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "The SOAP descriptor, Gaussian Approximation Potentials (GAP) and machine learning of force fields", "description": "In this tutorial, we will be using a Gaussian Approximation Potentials to analyse results of TB DFT calculations on the Si surface. Along the way we will learn about different descriptors (2b, 3b, SOAP) to describe local atomic environment in order to predict energies and forces of the Si surface.", "date": "2020-06-18", diff --git a/notebooks/grain-boundaries.archive.json b/notebooks/grain-boundaries.archive.json index 7a61c3e..4774f5a 100644 --- a/notebooks/grain-boundaries.archive.json +++ b/notebooks/grain-boundaries.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Structure similarity and structure-property relationship: grain boundaries of alpha-Fe", "description": "In this tutorial, we will be using a machine-learning method (clustering) to analyse results of grain-boundary (GB) calculations of alpha-iron. Along the way, we will learn about different methods to describe local atomic environment in order to calculate properties of GBs. We will use these properties to separate the different regions of the GB using clustering methods. Finally we will determine how the energy of the GB is changing according to the angle difference of the regions.", "date": "2020-01-18", diff --git a/notebooks/hierarchical-sisso.archive.json b/notebooks/hierarchical-sisso.archive.json index af1460b..620d3e5 100644 --- a/notebooks/hierarchical-sisso.archive.json +++ b/notebooks/hierarchical-sisso.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Hierarchical symbolic regression for identifying key physical parameters correlated with materials properties", "description": "In this notebook, we describe a hierarchical symbolic-regression approach for finding, based on data, analytical expressions relating materials properties to simpler physicochemical parameters associated with the underlying processes governing the properties.", "date": "2022-8-3", diff --git a/notebooks/kaggle-competition.archive.json b/notebooks/kaggle-competition.archive.json index 7100fdd..a99f70f 100644 --- a/notebooks/kaggle-competition.archive.json +++ b/notebooks/kaggle-competition.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "2018 NOMAD-Kaggle research competition", "description": "In this tutorial, we will explore the best results of the NOMAD 2018 Kaggle research competition. The goal of this competition was to develop machine-learning models for the prediction of two target properties: the formation energy and the bandgap energy of transparent semiconducting oxides. The purpose of the modelling is to facilitate the discovery of new such materials and allow for advancements in (opto)electronic technologies", "date": "2021-01-19", diff --git a/notebooks/krr4mat.archive.json b/notebooks/krr4mat.archive.json index 923faa2..9a84c9e 100644 --- a/notebooks/krr4mat.archive.json +++ b/notebooks/krr4mat.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to kernel ridge regression for materials-property prediction", "description": "In this tutorial, we will explore the application of kernel ridge regression to the prediction of materials properties. We will begin with a largely informal, pragmatic introduction to kernel ridge regression, including a rudimentary implementation, in order to become familiar with the basic terminology and considerations. We will then discuss representations, and re-trace the NOMAD 2018 Kaggle challenge.", "date": "2020-12-15", diff --git a/notebooks/nn-regression.archive.json b/notebooks/nn-regression.archive.json index 3ced614..8b97a52 100644 --- a/notebooks/nn-regression.archive.json +++ b/notebooks/nn-regression.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to multilayer perceptrons (deep neural networks)", "description": "In this tutorial, we discuss how multilayer perceptrons, a standard neural-network architecture, can be employed for regression tasks. Specifically, we will use the ElemNet neural-network architecture to predict the volume per atom of inorganic compounds, where the Open Quantum Materials Database (OQMD) is used as a resource.", "date": "2021-01-29", diff --git a/notebooks/perovskite-tolerance-factor.archive.json b/notebooks/perovskite-tolerance-factor.archive.json index 45e2559..8ee23c7 100644 --- a/notebooks/perovskite-tolerance-factor.archive.json +++ b/notebooks/perovskite-tolerance-factor.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Finding a tolerance factor to predict perovskite stability with SISSO", "description": "This tutorial shows how a tolerance factor for predicting perovskite stability can be learned from data with the sure-independece-screening-and-sparsifying-operator (SISSO) descriptor-identification approach.", "date": "2022-05-18", diff --git a/notebooks/query-nomad-archive.archive.json b/notebooks/query-nomad-archive.archive.json index 2806da2..5cde39e 100644 --- a/notebooks/query-nomad-archive.archive.json +++ b/notebooks/query-nomad-archive.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Querying the NOMAD Archive and performing artificial-intelligence modeling", "description": "In this tutorial, we demonstrate how to query the NOMAD Archive from the NOMAD Analytics toolkit. We then show examples of machine learning analysis performed on the retrieved data set.", "date": "2022-04-06", diff --git a/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json b/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json index 08ec8c8..e842906 100644 --- a/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json +++ b/notebooks/sgd-alloys-oxygen-reduction-evolution.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to subgroup discovery: Identifying outstanding transition-metal-alloy catalysts", "description": "This tutorial introduces, by means of two applications in materials science, the artificial-intelligence technique subgroup discovery.", "date": "2021-10-28", diff --git a/notebooks/sgd-propylene-oxidation-hte.archive.json b/notebooks/sgd-propylene-oxidation-hte.archive.json index 2419208..eb3f533 100644 --- a/notebooks/sgd-propylene-oxidation-hte.archive.json +++ b/notebooks/sgd-propylene-oxidation-hte.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Learning Design Rules for Catalysts from High-Throughput Experimentation and Theory via Subgroup Discovery", "description": "This tutorial explores the application of subgroup discovery (SGD) to an experimental-theoretical data set in order to identify rules on key physicochemical parameters that describe the materials and environmental conditions associated with outstanding performance in heterogeneous catalysis.", "date": "2022-2-09", diff --git a/notebooks/soap-atomic-charges.archive.json b/notebooks/soap-atomic-charges.archive.json index e3d7f16..ba247e7 100644 --- a/notebooks/soap-atomic-charges.archive.json +++ b/notebooks/soap-atomic-charges.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Machine learning atomic charges", "description": "In this tutorial, we will use Gaussian process regression, GPR (or equivalently, Kernel Ridge Regression, KRR) to train and predict charges on atoms in small organic molecules.", "date": "2019-09-26", diff --git a/notebooks/tcmi.archive.json b/notebooks/tcmi.archive.json index 746b9c1..9db412b 100644 --- a/notebooks/tcmi.archive.json +++ b/notebooks/tcmi.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Introduction to total cumulative mutual information", "description": "This interactive notebook introduces the concepts and original implementation of total cumulative mutual information (TCMI), as presented in the related publication. The main results of the publication are also reproduced in a hands-on style", "date": "2020-02-06", diff --git a/notebooks/tetradymite-PRM2020.archive.json b/notebooks/tetradymite-PRM2020.archive.json index bf96145..21ba7fa 100644 --- a/notebooks/tetradymite-PRM2020.archive.json +++ b/notebooks/tetradymite-PRM2020.archive.json @@ -1,6 +1,6 @@ { "data": { - "m_def": "nomad_aitoolkit.schema.package.AIToolkitNotebook", + "m_def": "nomad_aitoolkit.schema.AIToolkitNotebook", "name": "Discovery of new topological insulators in alloyed tetradymites", "description": "Learn how to find descriptive parameters (short formulas) that predict whether alloyed materials are topological or trivial insulators, using the example of tetradymites. This notebook is based on the algorithm 'sure independence screening and sparsifying operator' (SISSO) that enables to search for optimal descriptor by scanning huge feature spaces.", "date": "2020-09-15", diff --git a/notebooks/tutorial_stats.ipynb b/notebooks/tutorial_stats.ipynb index d771623..aa1b3c1 100644 --- a/notebooks/tutorial_stats.ipynb +++ b/notebooks/tutorial_stats.ipynb @@ -176,13 +176,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "def build_new_tutorial(tutorial):\n", " new = {\n", - " \"m_def\": \"nomad_aitoolkit.schema.package.AIToolkitNotebook\",\n", + " \"m_def\": \"nomad_aitoolkit.schema.AIToolkitNotebook\",\n", " 'name': tutorial['title'],\n", " 'description': tutorial['description'],\n", " 'date': tutorial['updated'],\n", diff --git a/pyproject.toml b/pyproject.toml index c39bb40..b4d9874 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,6 @@ package-dir = { "" = "src" } where = ["src"] [project.entry-points.'nomad.plugin'] -aitookitschema = "nomad_aitoolkit.schema:aitoolkit" +aitookitschema = "nomad_aitoolkit:aitoolkit" aitookitapp = "nomad_aitoolkit.apps:aitoolkit" diff --git a/src/nomad_aitoolkit/__init__.py b/src/nomad_aitoolkit/__init__.py index e69de29..5e80b22 100644 --- a/src/nomad_aitoolkit/__init__.py +++ b/src/nomad_aitoolkit/__init__.py @@ -0,0 +1,14 @@ +from nomad.config.models.plugins import SchemaPackageEntryPoint + + +class AIToolkitSchemaPackageEntryPoint(SchemaPackageEntryPoint): + def load(self): + from nomad_aitoolkit.schema import m_package + + return m_package + + +aitoolkit = AIToolkitSchemaPackageEntryPoint( + name='AIToolkit', + description='Describes the basic schemas for AI Toolkit notebooks.', +) diff --git a/src/nomad_aitoolkit/apps/__init__.py b/src/nomad_aitoolkit/apps/__init__.py index 3e85f01..cd69987 100644 --- a/src/nomad_aitoolkit/apps/__init__.py +++ b/src/nomad_aitoolkit/apps/__init__.py @@ -36,13 +36,13 @@ filters_locked = { 'upload_id': upload_ids, 'section_defs.definition_qualified_name': [ - 'nomad_aitoolkit.schema.package.AIToolkitNotebook' + 'nomad_aitoolkit.schema.AIToolkitNotebook' ], } else: filters_locked = { 'section_defs.definition_qualified_name': [ - 'nomad_aitoolkit.schema.package.AIToolkitNotebook' + 'nomad_aitoolkit.schema.AIToolkitNotebook' ] } @@ -56,8 +56,8 @@ path='ai-toolkit', category='Tools', filters=Filters( - include=['*#nomad_aitoolkit.schema.package.AIToolkitNotebook'], - exclude=['*#nomad.datamodel.metainfo.eln.BasicEln'], + include=['*#nomad_aitoolkit.schema.AIToolkitNotebook'], + # exclude=['*#nomad.datamodel.metainfo.eln.BasicEln'], ), filters_locked=filters_locked, columns=Columns( @@ -65,31 +65,31 @@ 'entry_id', 'entry_type', 'authors', - 'data.name#nomad_aitoolkit.schema.package.AIToolkitNotebook', - 'data.category#nomad_aitoolkit.schema.package.AIToolkitNotebook', - 'data.platform#nomad_aitoolkit.schema.package.AIToolkitNotebook', - 'data.date#nomad_aitoolkit.schema.package.AIToolkitNotebook', + 'data.name#nomad_aitoolkit.schema.AIToolkitNotebook', + 'data.category#nomad_aitoolkit.schema.AIToolkitNotebook', + 'data.platform#nomad_aitoolkit.schema.AIToolkitNotebook', + 'data.date#nomad_aitoolkit.schema.AIToolkitNotebook', ], selected=[ - 'data.name#nomad_aitoolkit.schema.package.AIToolkitNotebook', + 'data.name#nomad_aitoolkit.schema.AIToolkitNotebook', 'authors', - 'data.category#nomad_aitoolkit.schema.package.AIToolkitNotebook', - 'data.date#nomad_aitoolkit.schema.package.AIToolkitNotebook', + 'data.category#nomad_aitoolkit.schema.AIToolkitNotebook', + 'data.date#nomad_aitoolkit.schema.AIToolkitNotebook', ], options={ 'entry_id': Column(), 'entry_type': Column(label='Entry type', align=AlignEnum.LEFT), 'authors': Column(label='Authors', align=AlignEnum.LEFT), - 'data.name#nomad_aitoolkit.schema.package.AIToolkitNotebook': Column( + 'data.name#nomad_aitoolkit.schema.AIToolkitNotebook': Column( label='Name', align=AlignEnum.LEFT ), - 'data.category#nomad_aitoolkit.schema.package.AIToolkitNotebook': Column( + 'data.category#nomad_aitoolkit.schema.AIToolkitNotebook': Column( label='Category' ), - 'data.platform#nomad_aitoolkit.schema.package.AIToolkitNotebook': Column( + 'data.platform#nomad_aitoolkit.schema.AIToolkitNotebook': Column( label='Platform', align=AlignEnum.LEFT ), - 'data.date#nomad_aitoolkit.schema.package.AIToolkitNotebook': Column( + 'data.date#nomad_aitoolkit.schema.AIToolkitNotebook': Column( label='Last update', align=AlignEnum.LEFT, format=Format(mode=ModeEnum.DATE), @@ -109,7 +109,7 @@ widgets=[ WidgetTerms( type='terms', - quantity='data.category#nomad_aitoolkit.schema.package.AIToolkitNotebook', + quantity='data.category#nomad_aitoolkit.schema.AIToolkitNotebook', scale=ScaleEnum.POW1, layout={ BreakpointEnum.XXL: Layout(h=6, w=6, x=0, y=0), @@ -121,7 +121,7 @@ ), WidgetTerms( type='terms', - quantity='data.methods.name#nomad_aitoolkit.schema.package.AIToolkitNotebook', + quantity='data.methods.name#nomad_aitoolkit.schema.AIToolkitNotebook', title='Methods', scale=ScaleEnum.POW1, layout={ @@ -134,7 +134,7 @@ ), WidgetTerms( type='terms', - quantity='data.systems.name#nomad_aitoolkit.schema.package.AIToolkitNotebook', + quantity='data.systems.name#nomad_aitoolkit.schema.AIToolkitNotebook', title='Systems', scale=ScaleEnum.POW1, layout={ diff --git a/src/nomad_aitoolkit/schema/__init__.py b/src/nomad_aitoolkit/schema/__init__.py index b51b104..a1c4a08 100644 --- a/src/nomad_aitoolkit/schema/__init__.py +++ b/src/nomad_aitoolkit/schema/__init__.py @@ -1,14 +1,206 @@ -from nomad.config.models.plugins import SchemaPackageEntryPoint +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from structlog.stdlib import BoundLogger -class AIToolkitSchemaPackageEntryPoint(SchemaPackageEntryPoint): - def load(self): - from nomad_aitoolkit.schema.package import m_package +import xml - return m_package +from nomad.config import config +from nomad.datamodel.data import ( + ArchiveSection, + EntryDataCategory, + Schema, +) +from nomad.datamodel.data import Author as NomadAuthor +from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum +from nomad.metainfo import ( + Category, + Datetime, + MEnum, + Quantity, + SchemaPackage, + Section, + SubSection, +) +# configuration = config.get_plugin_entry_point('nomad_aitoolkit:aitoolkit') -aitoolkit = AIToolkitSchemaPackageEntryPoint( - name='AIToolkit', - description='Describes the basic schemas for AI Toolkit notebooks.', -) + +m_package = SchemaPackage(name='AI Toolkit Notebook schema') + + +class ToolsCategory(EntryDataCategory): + m_def = Category(label='Tools', categories=[EntryDataCategory]) + + +def remove_tags(text): + return ''.join(xml.etree.ElementTree.fromstring(text).itertext()) + + +class Method(ArchiveSection): + m_def = Section(a_eln=ELNAnnotation(overview=True)) + + name = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), + description='Specifying the name of method.', + ) + + +class System(ArchiveSection): + m_def = Section(a_eln=ELNAnnotation(overview=True)) + + name = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), + description='Specifying name of the system.', + ) + + +class Author(ArchiveSection): + m_def = Section(a_eln=ELNAnnotation(overview=True)) + + first_name = Quantity( + type=str, + a_eln=ELNAnnotation( + component=ELNComponentEnum.StringEditQuantity, label='First Name' + ), + description='First name of the author', + ) + + last_name = Quantity( + type=str, + a_eln=ELNAnnotation( + component=ELNComponentEnum.StringEditQuantity, label='Last Name' + ), + description='Last name of the author.', + ) + + +class Reference(ArchiveSection): + m_def = Section(a_eln=ELNAnnotation(overview=True)) + + kind = Quantity( + type=str, + a_eln=ELNAnnotation( + component=ELNComponentEnum.EnumEditQuantity, + props=dict( + suggestions=[ + 'article url', + 'article doi', + 'repository', + 'video', + 'docker image', + 'documentation', + 'hub', + 'other', + ] + ), + ), + ) + + name = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), + description='Human readable name for the reference.', + ) + + description = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.RichTextEditQuantity), + description='Extra details about the reference.', + ) + + uri = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.URLEditQuantity, label='URI'), + description='External URI for the reference.', + ) + + version = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), + description='Optional field to adding version information.', + ) + + +class AIToolkitNotebook(Schema): + m_def = Section( + label='AI Toolkit Notebook', + categories=[ToolsCategory], + a_eln=ELNAnnotation(), + ) + + name = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), + label='Name/Title', + description='The short name of the AI Toolkit.', + ) + + description = Quantity( + type=str, + a_eln=ELNAnnotation(component=ELNComponentEnum.RichTextEditQuantity), + description='Short description of the AI Toolkit', + ) + + date = Quantity( + type=Datetime, + a_eln=ELNAnnotation(component=ELNComponentEnum.DateEditQuantity), + label='Last update', + description='The date of the last update.', + ) + + category = Quantity( + type=str, + a_eln=ELNAnnotation( + component=ELNComponentEnum.EnumEditQuantity, + props=dict( + suggestions=[ + 'advanced tutorial', + 'beginner tutorial', + 'intermediate tutorial', + 'query tutorial', + 'thermal transport', + ] + ), + ), + ) + + platform = Quantity( + type=MEnum(['Python', 'Julia', 'R', 'other']), + a_eln=ELNAnnotation( + component=ELNComponentEnum.AutocompleteEditQuantity, + ), + ) + + authors = SubSection(section=Author, repeats=True) + + methods = SubSection(section=Method, repeats=True) + + systems = SubSection(section=System, repeats=True) + + references = SubSection(section=Reference, repeats=True) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + if self.name: + archive.metadata.entry_name = self.name + + if self.description: + if self.description.startswith('<'): + comment = remove_tags(self.description) + else: + comment = self.description + + archive.metadata.comment = comment + + if self.authors: + archive.metadata.entry_coauthors = [ + NomadAuthor(**author.m_to_dict()) for author in self.authors + ] + + +m_package.__init_metainfo__() diff --git a/src/nomad_aitoolkit/schema/package.py b/src/nomad_aitoolkit/schema/package.py deleted file mode 100644 index 61a9a4f..0000000 --- a/src/nomad_aitoolkit/schema/package.py +++ /dev/null @@ -1,206 +0,0 @@ -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import EntryArchive - from structlog.stdlib import BoundLogger - -import xml - -from nomad.config import config -from nomad.datamodel.data import ( - ArchiveSection, - EntryDataCategory, - Schema, -) -from nomad.datamodel.data import Author as NomadAuthor -from nomad.datamodel.metainfo.annotations import ELNAnnotation, ELNComponentEnum -from nomad.metainfo import ( - Category, - Datetime, - MEnum, - Quantity, - SchemaPackage, - Section, - SubSection, -) - -configuration = config.get_plugin_entry_point('nomad_aitoolkit.schema:aitoolkit') - - -def remove_tags(text): - return ''.join(xml.etree.ElementTree.fromstring(text).itertext()) - - -m_package = SchemaPackage(name='AI Toolkit Notebook schema') - - -class ToolsCategory(EntryDataCategory): - m_def = Category(label='Tools', categories=[EntryDataCategory]) - - -class Method(ArchiveSection): - m_def = Section(a_eln=ELNAnnotation(overview=True)) - - name = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), - description='Specifying the name of method.', - ) - - -class System(ArchiveSection): - m_def = Section(a_eln=ELNAnnotation(overview=True)) - - name = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), - description='Specifying name of the system.', - ) - - -class Author(ArchiveSection): - m_def = Section(a_eln=ELNAnnotation(overview=True)) - - first_name = Quantity( - type=str, - a_eln=ELNAnnotation( - component=ELNComponentEnum.StringEditQuantity, label='First Name' - ), - description='First name of the author', - ) - - last_name = Quantity( - type=str, - a_eln=ELNAnnotation( - component=ELNComponentEnum.StringEditQuantity, label='Last Name' - ), - description='Last name of the author.', - ) - - -class Reference(ArchiveSection): - m_def = Section(a_eln=ELNAnnotation(overview=True)) - - kind = Quantity( - type=str, - a_eln=ELNAnnotation( - component=ELNComponentEnum.EnumEditQuantity, - props=dict( - suggestions=[ - 'article url', - 'article doi', - 'repository', - 'video', - 'docker image', - 'documentation', - 'hub', - 'other', - ] - ), - ), - ) - - name = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), - description='Human readable name for the reference.', - ) - - description = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.RichTextEditQuantity), - description='Extra details about the reference.', - ) - - uri = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.URLEditQuantity, label='URI'), - description='External URI for the reference.', - ) - - version = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), - description='Optional field to adding version information.', - ) - - -class AIToolkitNotebook(Schema): - m_def = Section( - label='AI Toolkit Notebook', - categories=[ToolsCategory], - a_eln=ELNAnnotation(), - ) - - name = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.StringEditQuantity), - label='Name/Title', - description='The short name of the AI Toolkit.', - ) - - description = Quantity( - type=str, - a_eln=ELNAnnotation(component=ELNComponentEnum.RichTextEditQuantity), - description='Short description of the AI Toolkit', - ) - - date = Quantity( - type=Datetime, - a_eln=ELNAnnotation(component=ELNComponentEnum.DateEditQuantity), - label='Last update', - description='The date of the last update.', - ) - - category = Quantity( - type=str, - a_eln=ELNAnnotation( - component=ELNComponentEnum.EnumEditQuantity, - props=dict( - suggestions=[ - 'advanced tutorial', - 'beginner tutorial', - 'intermediate tutorial', - 'query tutorial', - 'thermal transport', - ] - ), - ), - ) - - platform = Quantity( - type=MEnum(['Python', 'Julia', 'R', 'other']), - a_eln=ELNAnnotation( - component=ELNComponentEnum.AutocompleteEditQuantity, - ), - ) - - authors = SubSection(section=Author, repeats=True) - - methods = SubSection(section=Method, repeats=True) - - systems = SubSection(section=System, repeats=True) - - references = SubSection(section=Reference, repeats=True) - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - - if self.name: - archive.metadata.entry_name = self.name - - if self.description: - if self.description.startswith('<'): - comment = remove_tags(self.description) - else: - comment = self.description - - archive.metadata.comment = comment - - if self.authors: - archive.metadata.entry_coauthors = [ - NomadAuthor(**author.m_to_dict()) for author in self.authors - ] - - -m_package.__init_metainfo__() From c28b1dde40277fd0f586c92ecbb01d1a9a51a3d7 Mon Sep 17 00:00:00 2001 From: Adam Fekete Date: Mon, 29 Jul 2024 04:42:40 +0200 Subject: [PATCH 3/4] fix format --- src/nomad_aitoolkit/schema/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/nomad_aitoolkit/schema/__init__.py b/src/nomad_aitoolkit/schema/__init__.py index a1c4a08..88c11e5 100644 --- a/src/nomad_aitoolkit/schema/__init__.py +++ b/src/nomad_aitoolkit/schema/__init__.py @@ -6,7 +6,6 @@ import xml -from nomad.config import config from nomad.datamodel.data import ( ArchiveSection, EntryDataCategory, @@ -24,8 +23,6 @@ SubSection, ) -# configuration = config.get_plugin_entry_point('nomad_aitoolkit:aitoolkit') - m_package = SchemaPackage(name='AI Toolkit Notebook schema') From 288710a00f51051983c12d86fd855d11fe1303f7 Mon Sep 17 00:00:00 2001 From: Adam Fekete Date: Mon, 29 Jul 2024 04:45:16 +0200 Subject: [PATCH 4/4] fix linter warnings --- src/nomad_aitoolkit/schema/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/nomad_aitoolkit/schema/__init__.py b/src/nomad_aitoolkit/schema/__init__.py index 88c11e5..44b6d7e 100644 --- a/src/nomad_aitoolkit/schema/__init__.py +++ b/src/nomad_aitoolkit/schema/__init__.py @@ -23,7 +23,6 @@ SubSection, ) - m_package = SchemaPackage(name='AI Toolkit Notebook schema')