diff --git a/src/nomad_bayesian_optimization/apps/tasks.py b/src/nomad_bayesian_optimization/apps/tasks.py index 1dba981..144b24a 100644 --- a/src/nomad_bayesian_optimization/apps/tasks.py +++ b/src/nomad_bayesian_optimization/apps/tasks.py @@ -8,7 +8,7 @@ SearchQuantities, ) -schema_name = 'nomad_bayesian_optimization.schema_packages.bayesian_optimization.BayesianOptimization' +schema_name = 'nomad_bayesian_optimization.schema_packages.bayesian_optimization.BayesianOptimization' # noqa: E501 app = App( label='Bayesian Optimizations Tasks', path='bayesian-optimization-tasks', diff --git a/src/nomad_bayesian_optimization/example_uploads/getting_started/notebook.ipynb b/src/nomad_bayesian_optimization/example_uploads/getting_started/notebook.ipynb index c1d5c0e..1d11e92 100644 --- a/src/nomad_bayesian_optimization/example_uploads/getting_started/notebook.ipynb +++ b/src/nomad_bayesian_optimization/example_uploads/getting_started/notebook.ipynb @@ -1,255 +1,257 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 1. Introduction\n", - "\n", - "This notebook demonstrates how to build a Bayesian optimization task with manual acquisition. Manual acquisition means that although this notebook will propose what kind of new samples should be created, NOMAD users will need to create them manually. This is required when the sample creation cannot be automated or fully controlled by the notebook, e.g. they require manual work in a lab.\n", - "\n", - "## What is Bayesian Optimization?\n", - "\n", - "Bayesian Optimization is a technique for finding the best solution to a problem when testing every possible option is too time-consuming or expensive. Imagine you're trying to make the perfect cake, but each ingredient combination takes a lot of time and effort to try. Instead of baking every possible cake, Bayesian Optimization helps you decide which combinations to try next, based on what you’ve learned so far.\n", - "\n", - "It does this by building a \"probability model\" that predicts how good different options might be. After trying a few options, the model suggests the next best option to test—one that has a good chance of being the best or teaches you something new. Over time, this approach zeroes in on the best solution without needing to try everything.\n", - "\n", - "This method is widely used in areas like machine learning, where testing models can be very expensive, or in engineering, where designing experiments can be costly. It’s like having a smart assistant that helps you explore the most promising paths first, saving you time and resources." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 2. Defining the search space\n", - "\n", - "The first task is to define the search space for the optimization. This simply means that we give a list of the parameters that can be controlled, and some kind of reasonable limits for them.\n", - "\n", - "In the NOMAD context, this means that we define a list of `quantities` that are used as the inputs, and give them some ranges. These quantities need to be part of some existing schema that describes the samples that will be created. E.g. for an experiment, they could be quantities in a schema that describes how the sample is created. If you do not already have a schema for the samples you are working with, please head over to our documentation on creating new schemas." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "Bc5bQEtUsBs_" - }, - "outputs": [], - "source": [ - "from baybe.parameters import CategoricalParameter, NumericalContinuousParameter\n", - "from baybe.searchspace import SearchSpace\n", - "api_url = ''\n", - "schema_name = 'nomad_bayesian_optimization.schema_packages.experiments.CVDExperiment'\n", - "\n", - "parameters = [\n", - " CategoricalParameter(\n", - " name='substrate',\n", - " values=['Silicon carbide', 'Silicon', 'Gallium nitride'],\n", - " encoding='OHE', # one-hot encoding of categories\n", - " ),\n", - " NumericalContinuousParameter(\n", - " name='temperature',\n", - " bounds=(300, 600),\n", - " ),\n", - " NumericalContinuousParameter(\n", - " name='gas_flow_rate',\n", - " bounds=(0.2, 5),\n", - " ),\n", - "]\n", - "\n", - "searchspace = SearchSpace.from_product(parameters)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 3. Define the optimization target\n", - "\n", - "Next we need to define the optimization target. Depending on the use case, it might be that we wish for a certain property to achive a specific value, or we wish to minimize/maximize some value.\n", - "\n", - "In the NOMAD context, we need to define a quantity in our sample schema that we wish to use as an optimization target, and then specify what an optimal value is." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "RUBqNtLjsO-Y" - }, - "outputs": [], - "source": [ - "from baybe.objectives import SingleTargetObjective\n", - "from baybe.targets import NumericalTarget\n", - "\n", - "refractive_index_target = 2.6473\n", - "refractive_index_sigma = 0.2\n", - "target = NumericalTarget(\n", - " name='refractive_index',\n", - " mode='MATCH',\n", - " bounds=(\n", - " refractive_index_target - refractive_index_sigma,\n", - " refractive_index_target + refractive_index_sigma,\n", - " ),\n", - " transformation='BELL',\n", - ")\n", - "objective = SingleTargetObjective(target=target)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 4. Define acquisition strategy\n", - "\n", - "Acquisition is the process of suggesting new samples that should be tried out. Here you can control how this acquisition step should be performed by the algorithm by e.g. balancing between \"exploration\" and \"exploitation\": we can control how adventurously we wish the algorithm to pick new samples, taking into account the existing samples. If starting a new acquisition from scratch, we often need to make the process in two steps: First we pick samples more or less randomly from the search space, after which we start another approach that takes the gained knowledge into account.\n", - "\n", - "Fully understanding and properly tuning this step requires knowledge about the Bayesian Optimization theory. We do, however, give some reasonable defaults here for different optimization use cases." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "Qi_-6nSxscPR" - }, - "outputs": [], - "source": [ - "from baybe.recommenders import TwoPhaseMetaRecommender, NaiveHybridSpaceRecommender\n", - "\n", - "recommender = TwoPhaseMetaRecommender(recommender=NaiveHybridSpaceRecommender())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 5. Define how new samples are fetched" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "\n", - "import numpy as np\n", - "\n", - "from nomad_bayesian_optimization.schema_packages.experiments import CVDExperiment\n", - "\n", - "\n", - "def get_samples(recommendations):\n", - " \"\"\"In this function you can decide how the actual experiment/simulation is\n", - " performed. There are several alternatives:\n", - "\n", - " - Maybe you can control measurement devices directly through API calls.\n", - " - Maybe you create a loop that waits until someone manually inserts the\n", - " experiment results into NOMAD, and then query the results from it using\n", - " the NOMAD API.\n", - " - Maybe you run a simulation in this notebook\n", - " - Maybe you run a simulation using an HPC batch system\n", - "\n", - " In this example, we will create entries by sampling from a\n", - " fake model.\n", - " \"\"\"\n", - " for _, row in recommendations.iterrows():\n", - " cvd_experiment = CVDExperiment().m_from_dict(row.to_dict())\n", - " temp_mu = 400\n", - " temp_sigma = 200\n", - " gas_flow_mu = 2\n", - " gas_flow_sigma = 3\n", - " ideal_substrate = 'Silicon carbide'\n", - " refractive_index = float(\n", - " 2.6473\n", - " * np.exp(-((cvd_experiment.temperature.m - temp_mu) ** 2 / temp_sigma**2))\n", - " * np.exp(\n", - " -(\n", - " (cvd_experiment.gas_flow_rate.m - gas_flow_mu) ** 2\n", - " / gas_flow_sigma**2\n", - " )\n", - " )\n", - " )\n", - " if cvd_experiment.substrate != ideal_substrate:\n", - " refractive_index *= 0.9\n", - " cvd_experiment.refractive_index = refractive_index\n", - " return cvd_experiment\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 6. Start the optimization loop" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4PSMvrp-sdDL" - }, - "outputs": [], - "source": [ - "import json\n", - "from baybe import Campaign\n", - "from nomad.datamodel import EntryArchive\n", - "from nomad_bayesian_optimization.schema_packages.bayesian_optimization import (\n", - " BayesianOptimization, Optimization\n", - ")\n", - "\n", - "# Start a new optimization task. This task will run until the desired accuracy\n", - "# has been achieved. You can leave this running as a NORTH tool and come back to\n", - "# it later.\n", - "campaign = Campaign(searchspace, objective, recommender)\n", - "i = 0\n", - "result = 0\n", - "threshold = 0.05\n", - "while abs(refractive_index_target - result) > threshold:\n", - " df = campaign.recommend(batch_size=1)\n", - " print('New recommendation:')\n", - " print(df)\n", - " print('Start testing recommendation...')\n", - " archive = get_samples(df)\n", - " result = archive.refractive_index\n", - " print(f'Testing finished, refractive_index: {result}')\n", - " df['refractive_index'] = [result]\n", - " campaign.add_measurements(df)\n", - "print('Optimization finished!')\n", - "\n", - "# At the end of the run, lets store the whole optimization run into an entry\n", - "archive = EntryArchive()\n", - "bopt = BayesianOptimization.from_baybe_campaign(campaign)\n", - "archive.data = bopt\n", - "bopt.normalize(archive, None)\n", - "with open('example.archive.json', 'w') as fout:\n", - " json.dump(archive.m_to_dict(), fout, indent=2)" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "nomad", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0rc1" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Introduction\n", + "\n", + "This notebook demonstrates how to build a Bayesian optimization task with manual acquisition. Manual acquisition means that although this notebook will propose what kind of new samples should be created, NOMAD users will need to create them manually. This is required when the sample creation cannot be automated or fully controlled by the notebook, e.g. they require manual work in a lab.\n", + "\n", + "## What is Bayesian Optimization?\n", + "\n", + "Bayesian Optimization is a technique for finding the best solution to a problem when testing every possible option is too time-consuming or expensive. Imagine you're trying to make the perfect cake, but each ingredient combination takes a lot of time and effort to try. Instead of baking every possible cake, Bayesian Optimization helps you decide which combinations to try next, based on what you’ve learned so far.\n", + "\n", + "It does this by building a \"probability model\" that predicts how good different options might be. After trying a few options, the model suggests the next best option to test—one that has a good chance of being the best or teaches you something new. Over time, this approach zeroes in on the best solution without needing to try everything.\n", + "\n", + "This method is widely used in areas like machine learning, where testing models can be very expensive, or in engineering, where designing experiments can be costly. It’s like having a smart assistant that helps you explore the most promising paths first, saving you time and resources." + ] }, - "nbformat": 4, - "nbformat_minor": 0 + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. Defining the search space\n", + "\n", + "The first task is to define the search space for the optimization. This simply means that we give a list of the parameters that can be controlled, and some kind of reasonable limits for them.\n", + "\n", + "In the NOMAD context, this means that we define a list of `quantities` that are used as the inputs, and give them some ranges. These quantities need to be part of some existing schema that describes the samples that will be created. E.g. for an experiment, they could be quantities in a schema that describes how the sample is created. If you do not already have a schema for the samples you are working with, please head over to our documentation on creating new schemas." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "Bc5bQEtUsBs_" + }, + "outputs": [], + "source": [ + "from baybe.parameters import CategoricalParameter, NumericalContinuousParameter\n", + "from baybe.searchspace import SearchSpace\n", + "\n", + "api_url = ''\n", + "schema_name = 'nomad_bayesian_optimization.schema_packages.experiments.CVDExperiment'\n", + "\n", + "parameters = [\n", + " CategoricalParameter(\n", + " name='substrate',\n", + " values=['Silicon carbide', 'Silicon', 'Gallium nitride'],\n", + " encoding='OHE', # one-hot encoding of categories\n", + " ),\n", + " NumericalContinuousParameter(\n", + " name='temperature',\n", + " bounds=(300, 600),\n", + " ),\n", + " NumericalContinuousParameter(\n", + " name='gas_flow_rate',\n", + " bounds=(0.2, 5),\n", + " ),\n", + "]\n", + "\n", + "searchspace = SearchSpace.from_product(parameters)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Define the optimization target\n", + "\n", + "Next we need to define the optimization target. Depending on the use case, it might be that we wish for a certain property to achive a specific value, or we wish to minimize/maximize some value.\n", + "\n", + "In the NOMAD context, we need to define a quantity in our sample schema that we wish to use as an optimization target, and then specify what an optimal value is." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "RUBqNtLjsO-Y" + }, + "outputs": [], + "source": [ + "from baybe.objectives import SingleTargetObjective\n", + "from baybe.targets import NumericalTarget\n", + "\n", + "refractive_index_target = 2.6473\n", + "refractive_index_sigma = 0.2\n", + "target = NumericalTarget(\n", + " name='refractive_index',\n", + " mode='MATCH',\n", + " bounds=(\n", + " refractive_index_target - refractive_index_sigma,\n", + " refractive_index_target + refractive_index_sigma,\n", + " ),\n", + " transformation='BELL',\n", + ")\n", + "objective = SingleTargetObjective(target=target)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Define acquisition strategy\n", + "\n", + "Acquisition is the process of suggesting new samples that should be tried out. Here you can control how this acquisition step should be performed by the algorithm by e.g. balancing between \"exploration\" and \"exploitation\": we can control how adventurously we wish the algorithm to pick new samples, taking into account the existing samples. If starting a new acquisition from scratch, we often need to make the process in two steps: First we pick samples more or less randomly from the search space, after which we start another approach that takes the gained knowledge into account.\n", + "\n", + "Fully understanding and properly tuning this step requires knowledge about the Bayesian Optimization theory. We do, however, give some reasonable defaults here for different optimization use cases." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "Qi_-6nSxscPR" + }, + "outputs": [], + "source": [ + "from baybe.recommenders import NaiveHybridSpaceRecommender, TwoPhaseMetaRecommender\n", + "\n", + "recommender = TwoPhaseMetaRecommender(recommender=NaiveHybridSpaceRecommender())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Define how new samples are fetched" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import numpy as np\n", + "\n", + "from nomad_bayesian_optimization.schema_packages.experiments import CVDExperiment\n", + "\n", + "\n", + "def get_samples(recommendations):\n", + " \"\"\"In this function you can decide how the actual experiment/simulation is\n", + " performed. There are several alternatives:\n", + "\n", + " - Maybe you can control measurement devices directly through API calls.\n", + " - Maybe you create a loop that waits until someone manually inserts the\n", + " experiment results into NOMAD, and then query the results from it using\n", + " the NOMAD API.\n", + " - Maybe you run a simulation in this notebook\n", + " - Maybe you run a simulation using an HPC batch system\n", + "\n", + " In this example, we will create entries by sampling from a\n", + " fake model.\n", + " \"\"\"\n", + " for _, row in recommendations.iterrows():\n", + " cvd_experiment = CVDExperiment().m_from_dict(row.to_dict())\n", + " temp_mu = 400\n", + " temp_sigma = 200\n", + " gas_flow_mu = 2\n", + " gas_flow_sigma = 3\n", + " ideal_substrate = 'Silicon carbide'\n", + " refractive_index = float(\n", + " 2.6473\n", + " * np.exp(-((cvd_experiment.temperature.m - temp_mu) ** 2 / temp_sigma**2))\n", + " * np.exp(\n", + " -(\n", + " (cvd_experiment.gas_flow_rate.m - gas_flow_mu) ** 2\n", + " / gas_flow_sigma**2\n", + " )\n", + " )\n", + " )\n", + " if cvd_experiment.substrate != ideal_substrate:\n", + " refractive_index *= 0.9\n", + " cvd_experiment.refractive_index = refractive_index\n", + " return cvd_experiment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 6. Start the optimization loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4PSMvrp-sdDL" + }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "from baybe import Campaign\n", + "from nomad.datamodel import EntryArchive\n", + "\n", + "from nomad_bayesian_optimization.schema_packages.bayesian_optimization import (\n", + " BayesianOptimization,\n", + ")\n", + "\n", + "# Start a new optimization task. This task will run until the desired accuracy\n", + "# has been achieved. You can leave this running as a NORTH tool and come back to\n", + "# it later.\n", + "campaign = Campaign(searchspace, objective, recommender)\n", + "i = 0\n", + "result = 0\n", + "threshold = 0.05\n", + "while abs(refractive_index_target - result) > threshold:\n", + " df = campaign.recommend(batch_size=1)\n", + " print('New recommendation:')\n", + " print(df)\n", + " print('Start testing recommendation...')\n", + " archive = get_samples(df)\n", + " result = archive.refractive_index\n", + " print(f'Testing finished, refractive_index: {result}')\n", + " df['refractive_index'] = [result]\n", + " campaign.add_measurements(df)\n", + "print('Optimization finished!')\n", + "\n", + "# At the end of the run, lets store the whole optimization run into an entry\n", + "archive = EntryArchive()\n", + "bopt = BayesianOptimization.from_baybe_campaign(campaign)\n", + "archive.data = bopt\n", + "bopt.normalize(archive, None)\n", + "with open('example.archive.json', 'w') as fout:\n", + " json.dump(archive.m_to_dict(), fout, indent=2)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "nomad", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0rc1" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/src/nomad_bayesian_optimization/schema_packages/__init__.py b/src/nomad_bayesian_optimization/schema_packages/__init__.py index 522d3d5..911cf44 100644 --- a/src/nomad_bayesian_optimization/schema_packages/__init__.py +++ b/src/nomad_bayesian_optimization/schema_packages/__init__.py @@ -17,13 +17,6 @@ def load(self): return m_package -# class JobsPackageEntryPoint(SchemaPackageEntryPoint): -# def load(self): -# from nomad_bayesian_optimization.schema_packages.jobs import m_package - -# return m_package - - experiments = ExperimentsPackageEntryPoint( name='Experiments', description='Dummy schema package for experiments.', @@ -33,8 +26,3 @@ def load(self): name='Bayesian Optimization', description='Schema package for Bayesian optimization runs.', ) - -# jobs = JobsPackageEntryPoint( -# name='Jobs', -# description='Schema package for Jobs.', -# ) diff --git a/src/nomad_bayesian_optimization/schema_packages/bayesian_optimization.py b/src/nomad_bayesian_optimization/schema_packages/bayesian_optimization.py index 02eef30..e520320 100644 --- a/src/nomad_bayesian_optimization/schema_packages/bayesian_optimization.py +++ b/src/nomad_bayesian_optimization/schema_packages/bayesian_optimization.py @@ -196,7 +196,6 @@ class BayesianOptimization(PlotSection, Schema): Optimization. """, ) - my_subsection = SubSection(section_def=MySection) def from_baybe_campaign(campaign): # Populate the parts that are directly compatible with a BayBE campaign @@ -215,10 +214,6 @@ def from_baybe_campaign(campaign): def normalize(self, archive, logger): super().normalize(archive, logger) - self.my_subsection = MySection(my_quantity='Test') - self.reference_direct = archive.data.my_subsection - self.reference_inherited = archive.data.my_subsection - # If this entry has been created from a BayBE run, we use that data to # create plots. if self.baybe_campaign: diff --git a/src/nomad_bayesian_optimization/schema_packages/jobs.py b/src/nomad_bayesian_optimization/schema_packages/jobs.py deleted file mode 100644 index a6ddadd..0000000 --- a/src/nomad_bayesian_optimization/schema_packages/jobs.py +++ /dev/null @@ -1,67 +0,0 @@ -# from nomad.datamodel.data import Schema -# from nomad.datamodel.metainfo.action import ActionSection -# from nomad.datamodel.metainfo.annotations import ELNAnnotation -# from nomad.metainfo import MEnum, MSection, Quantity, SchemaPackage, Section, SubSection - -# m_package = SchemaPackage() - - -# class Job(MSection): -# """Represents a single job.""" - -# status = Quantity( -# type=MEnum('Initialized', 'Finished', 'Error'), -# description='Job status.', -# a_eln=ELNAnnotation( -# component='EnumEditQuantity', -# ), -# ) -# batch_id = Quantity( -# type=str, -# description='Batch identifier for this job', -# a_eln=dict(component='StringEditQuantity'), -# ) - - -# class MyAction(ActionSection): -# """Used to create jobs in a batch.""" - -# action_trigger = Quantity( -# description='Press to create a batch of jobs.', -# a_eln=dict(component='ActionEditQuantity', label='Submit batch'), -# ) - -# def perform_action(self, archive, logger): -# if self.action_trigger: -# for i in range(self.n_jobs or 0): -# self.m_add_sub_section( -# Jobs.jobs, Job(batch_id=self.batch_id, status='Initialized') -# ) - - -# class Jobs(MyAction, Schema): -# """Used to create jobs in batches.""" - -# m_def = Section( -# a_eln=ELNAnnotation( -# lane_width='600px', -# ) -# ) - -# batch_id = Quantity( -# type=str, -# description='Identifier for this batch', -# a_eln=dict(component='StringEditQuantity'), -# ) -# n_jobs = Quantity( -# type=int, -# description='How many jobs to create.', -# a_eln=dict(component='NumberEditQuantity'), -# ) -# jobs = SubSection(section_def=Job, repeats=True) - -# def normalize(self, archive, logger): -# super().normalize(archive, logger) - - -# m_package.__init_metainfo__() diff --git a/tests/example_uploads/test_getting_started.py b/tests/example_uploads/test_getting_started.py index 14e00be..e21d839 100644 --- a/tests/example_uploads/test_getting_started.py +++ b/tests/example_uploads/test_getting_started.py @@ -1,4 +1,3 @@ def test_importing_app(): # this will raise an exception if pydantic model validation fails for th app - from nomad_bayesian_optimization.apps import myapp - + pass