diff --git a/docs/source/rst_doc_files/gallery.rst b/docs/source/rst_doc_files/gallery.rst index 6b1f0282..0632d685 100644 --- a/docs/source/rst_doc_files/gallery.rst +++ b/docs/source/rst_doc_files/gallery.rst @@ -47,3 +47,14 @@ The :mod:`f3dasm` package comes with built-in implementations of the :class:`~f3 :glob: notebooks/builtins/* + +Integration with hydra +---------------------- + +Examples that integrate the :mod:`f3dasm` package with the configuration manager `hydra `_ + +.. nblinkgallery:: + :name: hydra + :glob: + + notebooks/hydra/* diff --git a/docs/source/rst_doc_files/notebooks/builtins/builtinoptimizers.ipynb b/docs/source/rst_doc_files/notebooks/builtins/builtinoptimizers.ipynb index 7f602c99..78ea3fa8 100644 --- a/docs/source/rst_doc_files/notebooks/builtins/builtinoptimizers.ipynb +++ b/docs/source/rst_doc_files/notebooks/builtins/builtinoptimizers.ipynb @@ -16,9 +16,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Domain(input_space={'x0': ContinuousParameter(lower_bound=0.0, upper_bound=1.0, log=False), 'x1': ContinuousParameter(lower_bound=0.0, upper_bound=1.0, log=False)}, output_space={})" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from f3dasm.design import make_nd_continuous_domain\n", "domain = make_nd_continuous_domain(bounds=[[0., 1.], [0., 1.]])\n", @@ -34,9 +45,137 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jobsinputoutput
x0x1y
0finished0.7739560.43887818.573435
1finished0.8585980.69736821.308662
2finished0.0941770.97562221.815402
3finished0.7611400.78606420.719672
4finished0.1281140.45038621.396762
5finished0.3707980.92676521.396784
6finished0.6438650.82276221.112180
7finished0.4434140.22723919.952145
8finished0.5545850.06381721.959833
9finished0.8276310.63166421.527180
\n", + "
" + ], + "text/plain": [ + " jobs input output\n", + " x0 x1 y\n", + "0 finished 0.773956 0.438878 18.573435\n", + "1 finished 0.858598 0.697368 21.308662\n", + "2 finished 0.094177 0.975622 21.815402\n", + "3 finished 0.761140 0.786064 20.719672\n", + "4 finished 0.128114 0.450386 21.396762\n", + "5 finished 0.370798 0.926765 21.396784\n", + "6 finished 0.643865 0.822762 21.112180\n", + "7 finished 0.443414 0.227239 19.952145\n", + "8 finished 0.554585 0.063817 21.959833\n", + "9 finished 0.827631 0.631664 21.527180" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from f3dasm import ExperimentData\n", "# 1. Sample the points from the domain\n", @@ -74,9 +213,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/martin/mambaforge/envs/f3dasm_env3/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "/home/martin/mambaforge/envs/f3dasm_env3/lib/python3.8/site-packages/scipy/optimize/_minimize.py:549: RuntimeWarning: Method Nelder-Mead does not use gradient information (jac).\n", + " warn('Method %s does not use gradient information (jac).' % method,\n" + ] + } + ], "source": [ "experiment_data.optimize(optimizer='nelder mead', data_generator='ackley',\n", " kwargs={'scale_bounds': [[0., 1.], [0., 1.]], 'offset': False},\n", @@ -85,9 +235,217 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jobsinputoutput
x0x1y
0finished0.7739560.43887818.573435
1finished0.8585980.69736821.308662
2finished0.0941770.97562221.815402
3finished0.7611400.78606420.719672
4finished0.1281140.45038621.396762
5finished0.3707980.92676521.396784
6finished0.6438650.82276221.112180
7finished0.4434140.22723919.952145
8finished0.5545850.06381721.959833
9finished0.8276310.63166421.527180
10finished0.7739560.43887821.465102
11finished0.7739560.43887821.465102
12finished0.7739560.43887821.465102
13finished0.7739560.43887821.465102
14finished0.7739560.43887821.465102
15finished0.7739560.43887821.465102
16finished0.7739560.43887821.465102
17finished0.7739560.43887821.465102
18finished0.7739560.43887821.465102
19finished0.7739560.43887821.465102
\n", + "
" + ], + "text/plain": [ + " jobs input output\n", + " x0 x1 y\n", + "0 finished 0.773956 0.438878 18.573435\n", + "1 finished 0.858598 0.697368 21.308662\n", + "2 finished 0.094177 0.975622 21.815402\n", + "3 finished 0.761140 0.786064 20.719672\n", + "4 finished 0.128114 0.450386 21.396762\n", + "5 finished 0.370798 0.926765 21.396784\n", + "6 finished 0.643865 0.822762 21.112180\n", + "7 finished 0.443414 0.227239 19.952145\n", + "8 finished 0.554585 0.063817 21.959833\n", + "9 finished 0.827631 0.631664 21.527180\n", + "10 finished 0.773956 0.438878 21.465102\n", + "11 finished 0.773956 0.438878 21.465102\n", + "12 finished 0.773956 0.438878 21.465102\n", + "13 finished 0.773956 0.438878 21.465102\n", + "14 finished 0.773956 0.438878 21.465102\n", + "15 finished 0.773956 0.438878 21.465102\n", + "16 finished 0.773956 0.438878 21.465102\n", + "17 finished 0.773956 0.438878 21.465102\n", + "18 finished 0.773956 0.438878 21.465102\n", + "19 finished 0.773956 0.438878 21.465102" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "experiment_data" ] @@ -103,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -117,7 +475,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -133,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -149,9 +507,225 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/martin/mambaforge/envs/f3dasm_env3/lib/python3.8/site-packages/scipy/optimize/_minimize.py:549: RuntimeWarning: Method Nelder-Mead does not use gradient information (jac).\n", + " warn('Method %s does not use gradient information (jac).' % method,\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jobsinputoutput
x0x1y
0finished0.7739560.43887818.573435
1finished0.8585980.69736821.308662
2finished0.0941770.97562221.815402
3finished0.7611400.78606420.719672
4finished0.1281140.45038621.396762
5finished0.3707980.92676521.396784
6finished0.6438650.82276221.112180
7finished0.4434140.22723919.952145
8finished0.5545850.06381721.959833
9finished0.8276310.63166421.527180
10finished0.7739560.43887821.465102
11finished0.7739560.43887821.465102
12finished0.7739560.43887821.465102
13finished0.7739560.43887821.465102
14finished0.7739560.43887821.465102
15finished0.7739560.43887821.465102
16finished0.7739560.43887821.465102
17finished0.7739560.43887821.465102
18finished0.7739560.43887821.465102
19finished0.7739560.43887821.465102
\n", + "
" + ], + "text/plain": [ + " jobs input output\n", + " x0 x1 y\n", + "0 finished 0.773956 0.438878 18.573435\n", + "1 finished 0.858598 0.697368 21.308662\n", + "2 finished 0.094177 0.975622 21.815402\n", + "3 finished 0.761140 0.786064 20.719672\n", + "4 finished 0.128114 0.450386 21.396762\n", + "5 finished 0.370798 0.926765 21.396784\n", + "6 finished 0.643865 0.822762 21.112180\n", + "7 finished 0.443414 0.227239 19.952145\n", + "8 finished 0.554585 0.063817 21.959833\n", + "9 finished 0.827631 0.631664 21.527180\n", + "10 finished 0.773956 0.438878 21.465102\n", + "11 finished 0.773956 0.438878 21.465102\n", + "12 finished 0.773956 0.438878 21.465102\n", + "13 finished 0.773956 0.438878 21.465102\n", + "14 finished 0.773956 0.438878 21.465102\n", + "15 finished 0.773956 0.438878 21.465102\n", + "16 finished 0.773956 0.438878 21.465102\n", + "17 finished 0.773956 0.438878 21.465102\n", + "18 finished 0.773956 0.438878 21.465102\n", + "19 finished 0.773956 0.438878 21.465102" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "experiment_data.optimize(optimizer='nelder mead', data_generator='ackley',\n", " kwargs={'scale_bounds': [[0., 1.], [0., 1.]], 'offset': False},\n", diff --git a/docs/source/rst_doc_files/notebooks/data-driven/blocks.ipynb b/docs/source/rst_doc_files/notebooks/data-driven/blocks.ipynb index 9cb6de97..5f6a8cba 100644 --- a/docs/source/rst_doc_files/notebooks/data-driven/blocks.ipynb +++ b/docs/source/rst_doc_files/notebooks/data-driven/blocks.ipynb @@ -27,10 +27,164 @@ "\n", "```\n", "\n", - "To create a new block, subclass the `Block` class and implement the `call` method. This method is executed when the block is invoked, accepting any keyword arguments and returning an `ExperimentData` instance. Before the `call` method runs, the `arm` method is used to equip the block with the `ExperimentData` instance it will process.\n", + "To create a new block, subclass the `Block` class and implement the `call` method. This method is executed when the block is invoked, accepting any keyword arguments and returning an `ExperimentData` instance. Before the `call` method runs, the `arm` method is used to equip the block with the `ExperimentData` instance it will process.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "\n", + "class CustomBlock(Block)\n", + " def call(self):\n", + " ...\n", + " # Any method that manipulates dthe experiments\n", + " ...\n", + " return self.data\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "\n", "In order to start the data-driven process, you need to create an `ExperimentData` instance and call the `run()` method of experiment data instance with the block object(s) you want to run." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```pyton\n", + "custom_block = CustomBlock()\n", + "\n", + "experiment_data.run(block=custom_block)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from f3dasm import ExperimentData, Block\n", + "from f3dasm.design import Domain\n", + "\n", + "\n", + "# Create a new domain object\n", + "domain = Domain()\n", + "\n", + "domain.add_parameter(name='word')\n", + "\n", + "input_data = [\n", + " {'word': 'bar'},\n", + " {'word': 'foo'},\n", + " {'word': 'bar'},\n", + "]\n", + "\n", + "experiment_data = ExperimentData(domain=domain, input_data=input_data)\n", + "experiment_data\n", + "\n", + "class ReadWord(Block):\n", + " def call(self):\n", + " for _, experiment_sample in self.data:\n", + " if experiment_sample.input_data['word'] == 'bar':\n", + " result = 1\n", + "\n", + " else:\n", + " result = 0\n", + "\n", + " experiment_sample.store(name='result', object=result)\n", + " experiment_sample.mark('finished')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jobsinputoutput
wordresult
0finishedbar1
1finishedfoo0
2finishedbar1
\n", + "
" + ], + "text/plain": [ + " jobs input output\n", + " word result\n", + "0 finished bar 1\n", + "1 finished foo 0\n", + "2 finished bar 1" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "read_word = ReadWord()\n", + "experiment_data.run(read_word)\n", + "experiment_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -40,7 +194,15 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", "version": "3.8.17" } }, diff --git a/docs/source/rst_doc_files/notebooks/design/domain_creation.ipynb b/docs/source/rst_doc_files/notebooks/design/domain_creation.ipynb index 8ff94f0c..230fc604 100644 --- a/docs/source/rst_doc_files/notebooks/design/domain_creation.ipynb +++ b/docs/source/rst_doc_files/notebooks/design/domain_creation.ipynb @@ -68,9 +68,7 @@ "\n", "### Input Parameters\n", "\n", - "Now we will add some input parameters. There are four types of parameters that can be created:\n", - "\n", - "- floating point parameters" + "Now we will add some input parameters. You can use the `add_parameter` method to add an input parameter:" ] }, { @@ -79,15 +77,16 @@ "metadata": {}, "outputs": [], "source": [ - "domain.add_float(name='x1', low=0.0, high=100.0)\n", - "domain.add_float(name='x2', low=0.0, high=4.0)" + "domain.add_parameter(name='x0')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- discrete integer parameters" + "Parameters can be of any type. `f3dasm` has built-in support for the following types:\n", + "\n", + "- floating point parameters" ] }, { @@ -96,15 +95,15 @@ "metadata": {}, "outputs": [], "source": [ - "domain.add_int(name='x3', low=2, high=4)\n", - "domain.add_int(name='x4', low=74, high=99)" + "domain.add_float(name='x1', low=0.0, high=100.0)\n", + "domain.add_float(name='x2', low=0.0, high=4.0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- categorical parameters" + "- discrete integer parameters" ] }, { @@ -113,15 +112,15 @@ "metadata": {}, "outputs": [], "source": [ - "domain.add_category(name='x5', categories=['test1', 'test2', 'test3', 'test4'])\n", - "domain.add_category(name='x6', categories=[0.9, 0.2, 0.1, -2])" + "domain.add_int(name='x3', low=2, high=4)\n", + "domain.add_int(name='x4', low=74, high=99)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "- constant parameters" + "- categorical parameters" ] }, { @@ -130,14 +129,15 @@ "metadata": {}, "outputs": [], "source": [ - "domain.add_constant(name='x7', value=0.9)" + "domain.add_category(name='x5', categories=['test1', 'test2', 'test3', 'test4'])\n", + "domain.add_category(name='x6', categories=[0.9, 0.2, 0.1, -2])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "if you want to create a parameter that does not have one of the above types, you can use the `add_parameter` method to add a parameter:" + "- constant parameters" ] }, { @@ -146,7 +146,7 @@ "metadata": {}, "outputs": [], "source": [ - "domain.add_parameter(name='x8')" + "domain.add_constant(name='x7', value=0.9)" ] }, { @@ -166,7 +166,7 @@ "output_type": "stream", "text": [ "Domain(\n", - " Input Space: { x1: ContinuousParameter(lower_bound=0.0, upper_bound=100.0, log=False), x2: ContinuousParameter(lower_bound=0.0, upper_bound=4.0, log=False), x3: DiscreteParameter(lower_bound=2, upper_bound=4, step=1), x4: DiscreteParameter(lower_bound=74, upper_bound=99, step=1), x5: CategoricalParameter(categories=['test1', 'test2', 'test3', 'test4']), x6: CategoricalParameter(categories=[0.9, 0.2, 0.1, -2]), x7: ConstantParameter(value=0.9), x8: Parameter(type=object, to_disk=False) }\n", + " Input Space: { x0: Parameter(type=object, to_disk=False), x1: ContinuousParameter(lower_bound=0.0, upper_bound=100.0, log=False), x2: ContinuousParameter(lower_bound=0.0, upper_bound=4.0, log=False), x3: DiscreteParameter(lower_bound=2, upper_bound=4, step=1), x4: DiscreteParameter(lower_bound=74, upper_bound=99, step=1), x5: CategoricalParameter(categories=['test1', 'test2', 'test3', 'test4']), x6: CategoricalParameter(categories=[0.9, 0.2, 0.1, -2]), x7: ConstantParameter(value=0.9) }\n", " Output Space: { }\n", ")\n" ] @@ -207,7 +207,13 @@ "source": [ "### Storing parameters on disk\n", "\n", - "When the data associated with a parameter is very large (e.g., large arrays or matrices), you can choose to only store a reference in the `ExperimentData` object and store the data on disk. This can be done by setting the `to_disk` parameter to `True` when adding the parameter to the domain.\n", + "As you will see in the next section, ?the `ExperimentData` object stores data associated with parameters. The data is stored in a tabular format, where each row corresponds to a single evaluation of the designspace. The columns of the table correspond to the input parameters and the output values.\n", + "\n", + "Sometimes it is wise to store the data associated with a parameter separately outside this table:\n", + "- when the data associated with a parameter is very large (e.g., large arrays or matrices), it allows you to lazy-load the data when needed\n", + "- when the data should not or cannot be casted to a `.csv` file (e.g., a custom object)\n", + "\n", + "You can choose to only store a reference in the `ExperimentData` object and store the data on disk. This can be done by setting the `to_disk` parameter to `True` when adding the parameter to the domain.\n", "\n", "`f3dasm` supports storing and loading data for a few commonly used data types:\n", "\n", @@ -215,7 +221,7 @@ "- pandas dataframes\n", "- xarray datasets and data arrays\n", "\n", - "For any other data types, you can define custom functions to store and load data. These functions should take the data as input and return a string that can be used to identify the data when loading it. You can define these functions using the `store_function` and `load_function` parameters when adding the parameter to the domain.\n", + "For any other data types, you have to define custom functions to store and load data. These functions should take the data as input and return a string that can be used to identify the data when loading it. You can define these functions using the `store_function` and `load_function` parameters when adding the parameter to the domain.\n", "\n", "The following example demonstrates how to store and load a numpy array to and from disk. We will use a custom store and load function for this example, but these functions are not necessary for numpy arrays, as `f3dasm` provides built-in support for storing and loading numpy arrays:" ] @@ -374,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -390,16 +396,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Domain(input_space={'x0': ContinuousParameter(lower_bound=-1.0, upper_bound=1.0, log=False), 'x1': ContinuousParameter(lower_bound=-1.0, upper_bound=1.0, log=False)}, output_space={})" + "Domain(input_space={'x0': Parameter(to_disk=False), 'x1': ContinuousParameter(lower_bound=0.0, upper_bound=100.0, log=False), 'x2': ContinuousParameter(lower_bound=0.0, upper_bound=4.0, log=False), 'x3': DiscreteParameter(lower_bound=2, upper_bound=4, step=1), 'x4': DiscreteParameter(lower_bound=74, upper_bound=99, step=1), 'x5': CategoricalParameter(categories=['test1', 'test2', 'test3', 'test4']), 'x6': CategoricalParameter(categories=[0.9, 0.2, 0.1, -2]), 'x7': ConstantParameter(value=0.9), 'array_input': Parameter(to_disk=True)}, output_space={'y': Parameter(to_disk=False), 'array_output': Parameter(to_disk=True)})" ] }, - "execution_count": 19, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -408,6 +414,13 @@ "Domain.from_file('my_domain.json')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> Custom storing and loading functions will be encoded with `pickle` and converted to hexadecimal strings. This allows you to store and load custom functions without having to define them again." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -427,7 +440,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -436,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { diff --git a/docs/source/rst_doc_files/notebooks/design/my_domain.json b/docs/source/rst_doc_files/notebooks/design/my_domain.json index dd4104ea..8e861606 100644 --- a/docs/source/rst_doc_files/notebooks/design/my_domain.json +++ b/docs/source/rst_doc_files/notebooks/design/my_domain.json @@ -1,23 +1,97 @@ { "input_space": { "x0": { + "type": "object", + "to_disk": false, + "store_function": null, + "load_function": null + }, + "x1": { "type": "float", "to_disk": false, "store_function": null, "load_function": null, - "lower_bound": -1.0, - "upper_bound": 1.0, + "lower_bound": 0.0, + "upper_bound": 100.0, "log": false }, - "x1": { + "x2": { "type": "float", "to_disk": false, "store_function": null, "load_function": null, - "lower_bound": -1.0, - "upper_bound": 1.0, + "lower_bound": 0.0, + "upper_bound": 4.0, "log": false + }, + "x3": { + "type": "int", + "to_disk": false, + "store_function": null, + "load_function": null, + "lower_bound": 2, + "upper_bound": 4, + "step": 1 + }, + "x4": { + "type": "int", + "to_disk": false, + "store_function": null, + "load_function": null, + "lower_bound": 74, + "upper_bound": 99, + "step": 1 + }, + "x5": { + "type": "category", + "to_disk": false, + "store_function": null, + "load_function": null, + "categories": [ + "test1", + "test2", + "test3", + "test4" + ] + }, + "x6": { + "type": "category", + "to_disk": false, + "store_function": null, + "load_function": null, + "categories": [ + 0.9, + 0.2, + 0.1, + -2 + ] + }, + "x7": { + "type": "constant", + "to_disk": false, + "store_function": null, + "load_function": null, + "value": 0.9 + }, + "array_input": { + "type": "object", + "to_disk": true, + "store_function": "8004951c000000000000008c085f5f6d61696e5f5f948c0b6e756d70795f73746f72659493942e", + "load_function": "8004951b000000000000008c085f5f6d61696e5f5f948c0a6e756d70795f6c6f61649493942e" } }, - "output_space": {} + "output_space": { + "y": { + "type": "object", + "to_disk": false, + "store_function": null, + "load_function": null + }, + "array_output": { + "type": "object", + "to_disk": true, + "store_function": "8004951c000000000000008c085f5f6d61696e5f5f948c0b6e756d70795f73746f72659493942e", + "load_function": "8004951b000000000000008c085f5f6d61696e5f5f948c0a6e756d70795f6c6f61649493942e" + } + } } \ No newline at end of file diff --git a/docs/source/rst_doc_files/notebooks/hydra/config.yaml b/docs/source/rst_doc_files/notebooks/hydra/config.yaml new file mode 100644 index 00000000..38b6bf81 --- /dev/null +++ b/docs/source/rst_doc_files/notebooks/hydra/config.yaml @@ -0,0 +1,19 @@ +domain: + input: + param_1: + type: float + low: -1.0 + high: 1.0 + param_2: + type: int + low: 1 + high: 10 + param_3: + type: category + categories: ['red', 'blue', 'green', 'yellow', 'purple'] + param_4: + type: constant + value: some_value + output: + y: + to_disk: False diff --git a/docs/source/rst_doc_files/notebooks/hydra/usehydra.ipynb b/docs/source/rst_doc_files/notebooks/hydra/usehydra.ipynb new file mode 100644 index 00000000..893deb55 --- /dev/null +++ b/docs/source/rst_doc_files/notebooks/hydra/usehydra.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Combining hydra configurations with `f3dasm`\n", + "\n", + "[hydra](https://hydra.cc/) is an open-source configuration management framework that is widely used in machine learning and other software development domains. It is designed to help developers manage and organize complex configuration settings for their projects, making it easier to experiment with different configurations, manage multiple environments, and maintain reproducibility in their work.\n", + "\n", + "[hydra](https://hydra.cc/) can be seamlessly integrated with the worfklows in f3dasm to manage the configuration settings for the project." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from hydra import compose, initialize\n", + "\n", + "from f3dasm import ExperimentData\n", + "from f3dasm.design import Domain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `Domain` from a [hydra](https://hydra.cc/) configuration file\n", + "\n", + "If you are using [hydra](https://hydra.cc/) to manage your configuration files, you can create a `Domain` from a configuration file. Your config needs to have a key (e.g. `'domain'`) that has two keys: `'input_space'` and `'output_space'`. Each design space dictionary can have parameter names (e.g. `'param_1'`) as keys and a dictionary with an optional parameter type (`'type'`) and the corresponding arguments as values:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```yaml\n", + "domain:\n", + " input:\n", + " param_1:\n", + " type: float\n", + " low: -1.0\n", + " high: 1.0\n", + " param_2:\n", + " type: int\n", + " low: 1\n", + " high: 10\n", + " param_3:\n", + " type: category\n", + " categories: ['red', 'blue', 'green', 'yellow', 'purple']\n", + " param_4:\n", + " type: constant\n", + " value: some_value\n", + " output:\n", + " y:\n", + " to_disk: False\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to run the following code snippet, you need to have a configuration file named `'config.yaml'` in the current working directory." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "with initialize(version_base=None, config_path=\".\"):\n", + " config = compose(config_name=\"config\")\n", + "\n", + "domain = Domain.from_yaml(config.domain)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `ExperimentData` from a [hydra](https://hydra.cc/) configuration file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are using [hydra](https://hydra.cc/) for configuring your experiments, you can use it to construct an `ExperimentData` object from the information in the `'config.yaml'` file with the `from_yaml()` method:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### `ExperimentData` from file\n", + "\n", + "You can create an `ExperimentData` object in the same way as the `from_file()` method, but with the `'from_file'` key in the `'config.yaml'` file:\n", + "\n", + "```yaml\n", + "experimentdata:\n", + " from_file: ./example_project_dir\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "\n", + "with initialize(version_base=None, config_path=\".\"):\n", + " config = compose(config_name=\"config\")\n", + "\n", + "\n", + "experiment_data = ExperimentData.from_yaml(config.experimentdata)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## `ExperimentData` from sampling with [hydra](https://hydra.cc/)\n", + "\n", + "To create the `ExperimentData` object with the `from_sampling()` method, you can use the following configuration:\n", + "\n", + "```yaml\n", + "domain:\n", + " input:\n", + " param_1:\n", + " type: float\n", + " low: -1.0\n", + " high: 1.0\n", + " param_2:\n", + " type: int\n", + " low: 1\n", + " high: 10\n", + " param_3:\n", + " type: category\n", + " categories: ['red', 'blue', 'green', 'yellow', 'purple']\n", + " param_4:\n", + " type: constant\n", + " value: some_value\n", + " output:\n", + " y:\n", + " to_disk: False\n", + "\n", + "experimentdata:\n", + " from_sampling:\n", + " domain: ${domain}\n", + " sampler: random\n", + " seed: 1\n", + " n_samples: 1\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "\n", + "with initialize(version_base=None, config_path=\".\"):\n", + " config = compose(config_name=\"config\")\n", + "\n", + "experiment_data = ExperimentData.from_yaml(config.experimentdata)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> You can also combine both approaches to create the `ExperimentData` object by continuing an existing experiment with new samples. This can be done by providing both keys `'from_sampling'` and '`from_file'`" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "f3dasm_env3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/f3dasm/__init__.py b/src/f3dasm/__init__.py index 080f6221..4fc27698 100644 --- a/src/f3dasm/__init__.py +++ b/src/f3dasm/__init__.py @@ -15,7 +15,7 @@ from .__version__ import __version__ from ._src._argparser import HPC_JOBID -from ._src.core import Block +from ._src.core import Block, loop from ._src.experimentdata import ExperimentData from ._src.experimentsample import ExperimentSample from ._src.logger import DistributedFileHandler, logger @@ -36,6 +36,7 @@ 'ExperimentData', 'ExperimentSample', 'Block', + 'loop', 'DistributedFileHandler', 'logger', 'HPC_JOBID', diff --git a/src/f3dasm/_src/core.py b/src/f3dasm/_src/core.py index c30221d2..a9776c4d 100644 --- a/src/f3dasm/_src/core.py +++ b/src/f3dasm/_src/core.py @@ -39,6 +39,53 @@ def arm(self, data: ExperimentData) -> None: def call(self, **kwargs) -> ExperimentData: pass + +class LoopBlock(Block): + def __init__(self, blocks: Block | Iterable[Block], n_loops: int): + """ + LoopBlock constructor + + Parameters + ---------- + blocks : Block | Iterable[Block] + The block or blocks to loop over + n_loops : int + The number of loops to perform + """ + if isinstance(blocks, Block): + blocks = [blocks] + + self.blocks = blocks + self.n_loops = n_loops + + def call(self, **kwargs) -> ExperimentData: + for _ in range(self.n_loops): + for block in self.blocks: + block.arm(self.data) + self.data = block.call(**kwargs) + + return self.data + + +def loop(blocks: Block | Iterable[Block], n_loops: int) -> Block: + """ + Loop function + + Parameters + ---------- + blocks : Block | Iterable[Block] + The block or blocks to loop over + n_loops : int + The number of loops to perform + + Returns + ------- + + Block + The loop block + """ + return LoopBlock(blocks=blocks, n_loops=n_loops) + # ============================================================================= diff --git a/src/f3dasm/_src/experimentdata.py b/src/f3dasm/_src/experimentdata.py index 879fb1d5..e3348f58 100644 --- a/src/f3dasm/_src/experimentdata.py +++ b/src/f3dasm/_src/experimentdata.py @@ -1161,14 +1161,14 @@ def mark_all(self, for _, es in self: es.mark(status) - def run(self, block: Block, **kwargs) -> ExperimentData: + def run(self, block: Block | Iterable[Block], **kwargs) -> ExperimentData: """ Run a block over the entire ExperimentData object. Parameters ---------- block : Block - The block to run. + The block(s) to run. **kwargs Additional keyword arguments passed to the block. @@ -1181,8 +1181,14 @@ def run(self, block: Block, **kwargs) -> ExperimentData: -------- >>> experiment_data.run(block) """ - block.arm(data=self) - return block.call(**kwargs) + if isinstance(block, Block): + block = [block] + + for b in block: + b.arm(data=self) + self = b.call(**kwargs) + + return self # Datageneration # =========================================================================