diff --git a/docs/source/notebooks/Create validation plot from raw data.ipynb b/docs/source/notebooks/Create validation plot from raw data.ipynb new file mode 100644 index 0000000..5fc93d0 --- /dev/null +++ b/docs/source/notebooks/Create validation plot from raw data.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook contains the code for creating the validation plot depicted in the documentation chapter denominated as \"Validation of `PeakPerformance`\" from the processed data of test 1 and raw data of tests 2 and 3. \n", + "For the data processing of test 1, the reader is referred to the `Processing test 1 raw data` notebook. \n", + "The raw data files are located within the `PeakPerformance` repository under `/docs/source/notebooks`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import arviz as az\n", + "import json\n", + "import numpy as np\n", + "import pandas\n", + "import pymc as pm\n", + "from matplotlib import pyplot as plt\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1) Preparation of evaluation of synthetic data (test 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "with open('test1_all_data.txt', 'r') as file:\n", + " all_data = json.loads(file.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2) Prepartion of border-line cases normal vs. skew normal (test 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pandas.read_excel(\"test2_summary.xlsx\")\n", + "df_normal = df[(df.loc[:, \"model\"] == \"normal\") & (df.loc[:, \"Unnamed: 0\"].isin([\"area\", \"height\"]))]\n", + "df_normal.reset_index(inplace=True)\n", + "df_skew = df[(df.loc[:, \"model\"] == \"skew_normal\") & (df.loc[:, \"Unnamed: 0\"].isin([\"area\", \"height\"]))]\n", + "df_skew.reset_index(inplace=True)\n", + "df_comparison = pandas.DataFrame()\n", + "df_comparison.loc[:, \"ratio_mean_normal_to_skew\"] = df_normal.loc[:, \"mean\"] / df_skew.loc[:, \"mean\"]\n", + "df_comparison[\"ratio_sd_normal_to_skew\"] = df_normal[\"sd\"] / df_skew[\"sd\"]\n", + "df_comparison[\"parameter\"] = df_normal[\"Unnamed: 0\"]\n", + "df_comparison[\"test_iteration\"] = df_normal[\"test_iteration\"]\n", + "df_comparison_area = df_comparison[df_comparison[\"parameter\"] == \"area\"]\n", + "df_comparison_height = df_comparison[df_comparison[\"parameter\"] == \"height\"]\n", + "comparison_dict = {}\n", + "comparison_dict[\"fraction of mean (normal / skew normal)\"] = [[df_comparison_area[\"ratio_mean_normal_to_skew\"].mean(), df_comparison_height[\"ratio_mean_normal_to_skew\"].mean()], [df_comparison_area[\"ratio_mean_normal_to_skew\"].std(), df_comparison_height[\"ratio_mean_normal_to_skew\"].std()]]\n", + "comparison_dict[\"fraction of standard deviation (normal / skew normal)\"] = [[df_comparison_area[\"ratio_sd_normal_to_skew\"].mean(), df_comparison_height[\"ratio_sd_normal_to_skew\"].mean()], [df_comparison_area[\"ratio_sd_normal_to_skew\"].std(), df_comparison_height[\"ratio_sd_normal_to_skew\"].std()]]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3) Prepartion of comparison to MultiQuant (test 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df_comparison_total = pandas.read_excel(\"test3_df_comparison.xlsx\")\n", + "df_comparison_single = df_comparison_total[~df_comparison_total[\"PP experiment\"].isin([23, 24])]\n", + "df_comparison_double = df_comparison_total[df_comparison_total[\"PP experiment\"].isin([23, 24])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4) Plotting in one graph (for PeakPerformance paper)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure(figsize=(7.5, 4.8))\n", + "ax1= fig.add_subplot(2,1,1)\n", + "ax2= fig.add_subplot(2,2,3)\n", + "ax3= fig.add_subplot(2,2,4)\n", + "# moving the lower graphs down to create space for the legend\n", + "pos1 = ax1.get_position()\n", + "ax1.set_position([pos1.x0, pos1.y0-0.08, pos1.width, pos1.height])\n", + "\n", + "# add a - c to the graphs\n", + "ax1.text(-0.62, 1.2, \"A)\", fontsize=14, fontweight=\"bold\")\n", + "ax2.text(-0.5, 1.2, \"B)\", fontsize=14, fontweight=\"bold\")\n", + "ax3.text(-0.9, 1.2, \"C)\", fontsize=14, fontweight=\"bold\")\n", + "\n", + "# graph 1\n", + "params = ['mean', 'std', 'area', 'height']\n", + "x = np.arange(len(params)) # the label locations\n", + "width = 0.12 # the width of the bars\n", + "colors = [\n", + " (\"#023d6b\"), # Jülich dark blue\n", + " (\"#adbde3\"), # Jülich light blue\n", + " (\"#af82b9\"), # Jülich hyancith violet\n", + " (\"#eb5f73\"), # Jülich raspberry\n", + " (\"#fab45a\"), # Jülich apricot\n", + " (\"#faeb5a\"), # Jülich lemon\n", + "]\n", + "multiplier = 0\n", + "for metric, result in all_data.items():\n", + " offset = width * multiplier\n", + " if metric in [\"skew normal data, normal model\", \"normal data, skew normal model\"]:\n", + " rects = ax1.bar(x + offset, result[0], width, label=metric, yerr=result[1], color=colors[multiplier], hatch=\"///\")\n", + " else:\n", + " rects = ax1.bar(x + offset, result[0], width, label=metric, yerr=result[1], color=colors[multiplier])\n", + " multiplier += 1 \n", + "ax1.set_ylabel(r\"$\\bf{F_{y / \\^y}}}$ (-)\", fontsize=9, fontweight=\"bold\")\n", + "box = ax1.get_position()\n", + "# legend below\n", + "h, l = ax1.get_legend_handles_labels()\n", + "ph = [ax1.plot([],marker=\"\", ls=\"\")[0]]*2\n", + "handles = ph[:1] + h[:3] + ph[1:] + h[3:]\n", + "# labels = [\"normal model:\"] + l[:3] + [\"skew normal model:\"] + l[3:]\n", + "labels = [\"normal model:\", \"normal data\", \"normal data (higher noise)\", \"skew normal data\",\"skew normal model:\", \"skew normal data\", \"skew normal data (higher noise)\", \"normal data\"]\n", + "order = []\n", + "leg = plt.legend(handles, labels, ncol=2)\n", + "for vpack in leg._legend_handle_box.get_children():\n", + " for hpack in vpack.get_children()[:1]:\n", + " hpack.get_children()[0].set_width(0)\n", + "\n", + "ax1.set_position([box.x0, box.y0 + box.height * 0.15, box.width, box.height * 0.85])\n", + "ax1.legend(handles, labels, loc=\"upper center\", bbox_to_anchor=(0.5, -0.32), fancybox=True, shadow=True, ncol=2, fontsize=8)\n", + "# legend on the right\n", + "# ax.legend(loc=\"center left\", bbox_to_anchor=(1, 0.5), fancybox=True, shadow=True, ncol=1)\n", + "params = [x if not x == \"baseline_intercept\" else \"baseline\\nintercept\" for x in params]\n", + "params = [x if not x == \"baseline_slope\" else \"baseline\\nslope\" for x in params]\n", + "params = [x if not x == \"std\" else \"standard\\ndeviation\" for x in params]\n", + "ax1.set_xticks(x + 2.5 * width, params, fontsize=9, fontweight=\"bold\")\n", + "\n", + "# graph 2\n", + "params = [\"area\", \"height\"]\n", + "x = np.arange(len(params)) # the label locations\n", + "width = 0.2 # the width of the bars\n", + "multiplier = 0\n", + "for metric, result in comparison_dict.items():\n", + " offset = width * multiplier\n", + " rects = ax2.bar(x + offset, result[0], width, label=metric, yerr=result[1], color=colors[multiplier])\n", + " multiplier += 1 \n", + "ax2.set_ylabel(r\"$\\bf{F_{n / sn}}}$ (-)\", fontsize=9, fontweight=\"bold\")\n", + "box = ax2.get_position()\n", + "# legend below\n", + "# ax2.set_position([box.x0, box.y0 + box.height * 0.15, box.width, box.height * 0.85])\n", + "ax2.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.2), fancybox=True, shadow=True, ncol=1, fontsize=8)\n", + "# legend on the right\n", + "# ax2.legend(loc=\"center left\", bbox_to_anchor=(1, 0.5), fancybox=True, shadow=True, ncol=1)\n", + "params = [x if not x == \"baseline_intercept\" else \"baseline\\nintercept\" for x in params]\n", + "params = [x if not x == \"baseline_slope\" else \"baseline\\nslope\" for x in params]\n", + "params = [x if not x == \"std\" else \"standard\\ndeviation\" for x in params]\n", + "ax2.set_xticks(x + 0.5 * width, params, fontsize=9, fontweight=\"bold\")\n", + "ax2.set_ylim(0.0, 1.264356410280364)\n", + "\n", + "# graph 3\n", + "categories = [\"overall\", \"single\\npeaks\", \"double\\npeaks\"]\n", + "means = [\n", + " df_comparison_total[\"area MQ / PP\"].mean(), \n", + " df_comparison_single[\"area MQ / PP\"].mean(),\n", + " df_comparison_double[\"area MQ / PP\"].mean(),\n", + "]\n", + "sds = [\n", + " df_comparison_total[\"area MQ / PP\"].std(),\n", + " df_comparison_single[\"area MQ / PP\"].std(),\n", + " df_comparison_double[\"area MQ / PP\"].std(),\n", + "]\n", + "ax3.bar(\n", + " x=categories,\n", + " width=0.5,\n", + " height=means,\n", + " yerr=sds,\n", + " color=(\"#023d6b\"), # Jülich dark blue\n", + " label=\"fraction of area (MultiQuant / PeakPerformance)\",\n", + ")\n", + "ax3.set_ylabel(r\"$\\bf{F_{MQ / PP}}}$ (-)\", fontsize=9, fontweight=\"bold\")\n", + "ax3.set_xticks(np.arange(len(categories)), categories, fontsize=9, fontweight=\"bold\")\n", + "ax3.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.35), fancybox=True, shadow=True, ncol=1, fontsize=8)\n", + "\n", + "fig.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Last updated: 2024-10-11T17:56:34.996952+02:00\n", + "\n" + ] + } + ], + "source": [ + "%load_ext watermark\n", + "%watermark -idu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nutpie_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/notebooks/Normal model_normal data_noise level 0.6.xlsx b/docs/source/notebooks/Normal model_normal data_noise level 0.6.xlsx new file mode 100644 index 0000000..47b00fa Binary files /dev/null and b/docs/source/notebooks/Normal model_normal data_noise level 0.6.xlsx differ diff --git a/docs/source/notebooks/Normal model_normal data_noise level 1.2.xlsx b/docs/source/notebooks/Normal model_normal data_noise level 1.2.xlsx new file mode 100644 index 0000000..9b8ef63 Binary files /dev/null and b/docs/source/notebooks/Normal model_normal data_noise level 1.2.xlsx differ diff --git a/docs/source/notebooks/Normal model_skew normal data_noise level 0.6.xlsx b/docs/source/notebooks/Normal model_skew normal data_noise level 0.6.xlsx new file mode 100644 index 0000000..f25f740 Binary files /dev/null and b/docs/source/notebooks/Normal model_skew normal data_noise level 0.6.xlsx differ diff --git a/docs/source/notebooks/Processing test 1 raw data.ipynb b/docs/source/notebooks/Processing test 1 raw data.ipynb new file mode 100644 index 0000000..5518ff1 --- /dev/null +++ b/docs/source/notebooks/Processing test 1 raw data.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import arviz as az\n", + "import json\n", + "import numpy as np\n", + "import pandas\n", + "import pymc as pm\n", + "from matplotlib import pyplot as plt\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "raw_data_files = [\n", + " \"Normal model_normal data_noise level 0.6.xlsx\",\n", + " \"Normal model_normal data_noise level 1.2.xlsx\",\n", + " \"Normal model_skew normal data_noise level 0.6.xlsx\",\n", + " \"Skew normal model_skew normal data_noise level 0.6.xlsx\",\n", + " \"Skew normal model_skew normal data_noise level 1.2.xlsx\",\n", + " \"Skew normal model_normal data_noise level 0.6.xlsx\",\n", + "]\n", + "\n", + "parameters = [\"mean\", \"std\", \"area\", \"height\", \"alpha\", \"baseline_intercept\", \"baseline_slope\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare data in df_results" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'mean'}\n", + "{'std'}\n", + "{'area'}\n", + "{'height'}\n", + "alpha skipalpha\n", + "{'baseline_intercept'}\n", + "{'baseline_slope'}\n", + "{'mean'}\n", + "{'std'}\n", + "{'area'}\n", + "{'height'}\n", + "alpha skipalpha\n", + "{'baseline_intercept'}\n", + "{'baseline_slope'}\n", + "{'mean'}\n", + "{'std'}\n", + "{'area'}\n", + "{'height'}\n", + "alpha skipalpha\n", + "{'baseline_intercept'}\n", + "{'baseline_slope'}\n", + "{'mean'}\n", + "{'std'}\n", + "{'area'}\n", + "{'height'}\n", + "{'alpha'}\n", + "{'baseline_intercept'}\n", + "{'baseline_slope'}\n", + "{'mean'}\n", + "{'std'}\n", + "{'area'}\n", + "{'height'}\n", + "{'alpha'}\n", + "{'baseline_intercept'}\n", + "{'baseline_slope'}\n", + "{'mean'}\n", + "{'std'}\n", + "{'area'}\n", + "{'height'}\n", + "{'alpha'}\n", + "{'baseline_intercept'}\n", + "{'baseline_slope'}\n" + ] + } + ], + "source": [ + "df_results = pandas.DataFrame()\n", + "\n", + "for path in raw_data_files:\n", + " for param in parameters:\n", + " # print(path, param)\n", + " # normal distribution does not have the alpha parameter so skip that when necessary\n", + " if path in [raw_data_files[0], raw_data_files[1], raw_data_files[2]] and param == \"alpha\":\n", + " print(\"alpha skip\" + param)\n", + " continue\n", + " # summary laden\n", + " summary = pandas.read_excel(path, index_col=0)\n", + " # sort summary and calculate differences between true and simulated values\n", + " df = summary.loc[param, [\"mean\", \"sd\", \"true_values\"]]\n", + " print(set(df.index))\n", + " df[\"ratio_mean_to_truth\"] = np.abs(df.loc[:, \"mean\"] / df.loc[:, \"true_values\"])\n", + " df[\"absolute_difference\"] = df.loc[:, \"mean\"] - df.loc[:, \"true_values\"]\n", + " df[\"ratio_std_to_mean\"] = df.loc[:, \"sd\"] / df.loc[:, \"mean\"]\n", + " df[\"within_range_of_1_std\"] = [True if df.iloc[x, 0] - df.iloc[x, 1] <= df.iloc[x, 2] <= df.iloc[x, 0] + df.iloc[x, 1] else False for x in range(len(df))]\n", + " df[\"within_range_of_3_stds\"] = [True if df.iloc[x, 0] - 3 * df.iloc[x, 1] <= df.iloc[x, 2] <= df.iloc[x, 0] + 3 * df.iloc[x, 1] else False for x in range(len(df))]\n", + " df[\"noise_level\"] = len(df) * [list(set(summary.loc[:,\"noise_scale\"]))[0]]\n", + " df[\"draws\"] = len(df) * [list(set(summary.loc[:,\"draws\"]))[0]]\n", + " df[\"tuning\"] = len(df) * [list(set(summary.loc[:,\"tuning_samples\"]))[0]]\n", + " # calculate mean and std of differences\n", + " df2 = pandas.DataFrame()\n", + " df2[\"path\"] = [path]\n", + " df2[\"parameter\"] = [\"\".join(set(df.index))]\n", + " df2[\"ratio_mean_to_truth\"] = [(np.mean(df.loc[:, \"ratio_mean_to_truth\"]), np.std(df.loc[:, \"ratio_mean_to_truth\"]))]\n", + " df2[\"absolute_difference\"] = [(np.mean(df.loc[:, \"absolute_difference\"]), np.std(df.loc[:, \"absolute_difference\"]))]\n", + " df2[\"within_range_of_3_stds\"] = np.count_nonzero(df.loc[:, \"within_range_of_3_stds\"]) / len(df)\n", + " df2[\"within_range_of_1_std\"] = np.count_nonzero(df.loc[:, \"within_range_of_1_std\"]) / len(df)\n", + " df2[\"noise_level\"] = list(set(df[\"noise_level\"]))[0]\n", + " df2[\"tuning samples\"] = list(set(df[\"tuning\"]))[0]\n", + " df2[\"draws\"] = list(set(df[\"draws\"]))[0] \n", + " if path in [raw_data_files[0], raw_data_files[1]]:\n", + " df2[\"data_distribution\"] = [\"normal\"]\n", + " df2[\"model_distribution\"] = [\"normal\"]\n", + " elif path == raw_data_files[2]:\n", + " df2[\"data_distribution\"] = [\"skew normal\"]\n", + " df2[\"model_distribution\"] = [\"normal\"]\n", + " elif path in [raw_data_files[3], raw_data_files[4]]:\n", + " df2[\"data_distribution\"] = [\"skew normal\"]\n", + " df2[\"model_distribution\"] = [\"skew normal\"]\n", + " elif path == raw_data_files[5]:\n", + " df2[\"data_distribution\"] = [\"normal\"]\n", + " df2[\"model_distribution\"] = [\"skew normal\"] \n", + " # save results in one DataFrame for subsequent plotting\n", + " df_results = pandas.concat([df_results, df2])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "model: normal, data: normal, noise level: 0.6\n", + "model: normal, data: normal, noise level: 1.2\n", + "model: normal, data: skew normal, noise level: 0.6\n", + "model: skew normal, data: normal, noise level: 0.6\n", + "model: skew normal, data: skew normal, noise level: 0.6\n", + "model: skew normal, data: skew normal, noise level: 1.2\n" + ] + } + ], + "source": [ + "for model in set(df_results.loc[:, \"model_distribution\"]):\n", + " dfdf = df_results[df_results.loc[:, \"model_distribution\"] == model]\n", + " for data in set(dfdf.loc[:, \"data_distribution\"]):\n", + " dfdf2 = dfdf[dfdf.loc[:, \"data_distribution\"] == data]\n", + " for noise_level in set(dfdf2.loc[:, \"noise_level\"]):\n", + " dfdf3 = dfdf2[dfdf2.loc[:, \"noise_level\"] == noise_level]\n", + " model = list(dfdf3.loc[:,\"model_distribution\"])[0]\n", + " data = list(dfdf3.loc[:,\"data_distribution\"])[0]\n", + " noise = list(dfdf3.loc[:,\"noise_level\"])[0]\n", + " print(f\"model: {model}, data: {data}, noise level: {noise}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "dfdf = df_results[df_results.loc[:, \"model_distribution\"] == \"skew normal\"]\n", + "dfdf2 = dfdf[dfdf.loc[:, \"data_distribution\"] == \"skew normal\"]\n", + "dfdf3 = dfdf2[dfdf2.loc[:, \"noise_level\"] == 0.6]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'normal data, normal model': [[0.9999001336005343,\n", + " 1.0025702609677298,\n", + " 1.0017147282600856,\n", + " 1.000123572878139],\n", + " [0.002286555631402294,\n", + " 0.028900726978078068,\n", + " 0.02958680525019264,\n", + " 0.022445046960539197]],\n", + " 'normal data (higher noise), normal model': [[0.9997316666666668,\n", + " 1.0059567381829964,\n", + " 1.001356598861276,\n", + " 0.9977187067316658],\n", + " [0.004410296979166418,\n", + " 0.05488690135089093,\n", + " 0.055093378982298734,\n", + " 0.04168657187789078]],\n", + " 'skew normal data, normal model': [[0.9990176666666667,\n", + " 0.7598253910963016,\n", + " 0.9869124703934096,\n", + " 0.9889579711666672],\n", + " [0.04540922653553522,\n", + " 0.1425229338854569,\n", + " 0.029251994462966387,\n", + " 0.02178598822049324]],\n", + " 'normal data, skew normal model': [[0.9993873333333333,\n", + " 1.145324094260921,\n", + " 1.0038603930164334,\n", + " 1.0021702322498285],\n", + " [0.025492314214288193,\n", + " 0.06460165579288266,\n", + " 0.0295645094605588,\n", + " 0.022277250178015084]],\n", + " 'skew normal data, skew normal model': [[1.0003276666666665,\n", + " 1.0178059537564914,\n", + " 0.9995769654521169,\n", + " 0.9994046368514812],\n", + " [0.022164664598810824,\n", + " 0.08144664654979102,\n", + " 0.02553221429137138,\n", + " 0.019596288333603468]],\n", + " 'skew normal data (higher noise), skew normal model': [[0.9975454545454545,\n", + " 1.062975971807339,\n", + " 1.0078594345558298,\n", + " 1.0013061414928683],\n", + " [0.029588612507556917,\n", + " 0.13828870506270582,\n", + " 0.050852728197426554,\n", + " 0.03782158437972263]]}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_data = {}\n", + "for model in set(df_results.loc[:, \"model_distribution\"]):\n", + " dfdf = df_results[df_results.loc[:, \"model_distribution\"] == model]\n", + " for data in set(dfdf.loc[:, \"data_distribution\"]):\n", + " dfdf2 = dfdf[dfdf.loc[:, \"data_distribution\"] == data]\n", + " for noise_level in set(dfdf2.loc[:, \"noise_level\"]):\n", + " dfdf3 = dfdf2[dfdf2.loc[:, \"noise_level\"] == noise_level]\n", + " model = list(dfdf3.loc[:,\"model_distribution\"])[0]\n", + " data = list(dfdf3.loc[:,\"data_distribution\"])[0]\n", + " noise = list(dfdf3.loc[:,\"noise_level\"])[0]\n", + " # print(f\"model: {model}, data: {data}, noise level: {noise}\")\n", + " # print(noise)\n", + " dfdf4 = dfdf3[~dfdf3.loc[:, \"parameter\"].isin([\"alpha\", \"baseline_intercept\", \"baseline_slope\"])]\n", + " if noise == 1.2:\n", + " all_data[f\"{data} data (higher noise), {model} model\"] = [[x[0] for x in list(dfdf4.loc[:,\"ratio_mean_to_truth\"])], [x[1] for x in list(dfdf4.loc[:,\"ratio_mean_to_truth\"])]]\n", + " else:\n", + " all_data[f\"{data} data, {model} model\"] = [[x[0] for x in list(dfdf4.loc[:,\"ratio_mean_to_truth\"])], [x[1] for x in list(dfdf4.loc[:,\"ratio_mean_to_truth\"])]]\n", + "all_data" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['normal data, normal model', 'normal data (higher noise), normal model', 'skew normal data, normal model', 'skew normal data, skew normal model', 'skew normal data (higher noise), skew normal model', 'normal data, skew normal model'])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rearrange = ['normal data, normal model', 'normal data (higher noise), normal model', 'skew normal data, normal model', 'skew normal data, skew normal model', 'skew normal data (higher noise), skew normal model','normal data, skew normal model']\n", + "reordered_dict = {k: all_data[k] for k in rearrange}\n", + "reordered_dict.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# save processed data in file\n", + "\n", + "# with open('all_data.txt', 'w') as file:\n", + "# file.write(json.dumps(reordered_dict)) # use `json.loads` to do the reverse" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Last updated: 2024-10-11T18:34:57.629742+02:00\n", + "\n" + ] + } + ], + "source": [ + "%load_ext watermark\n", + "%watermark -idu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nutpie_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/notebooks/Skew normal model_normal data_noise level 0.6.xlsx b/docs/source/notebooks/Skew normal model_normal data_noise level 0.6.xlsx new file mode 100644 index 0000000..6686fe5 Binary files /dev/null and b/docs/source/notebooks/Skew normal model_normal data_noise level 0.6.xlsx differ diff --git a/docs/source/notebooks/Skew normal model_skew normal data_noise level 0.6.xlsx b/docs/source/notebooks/Skew normal model_skew normal data_noise level 0.6.xlsx new file mode 100644 index 0000000..451f204 Binary files /dev/null and b/docs/source/notebooks/Skew normal model_skew normal data_noise level 0.6.xlsx differ diff --git a/docs/source/notebooks/Skew normal model_skew normal data_noise level 1.2.xlsx b/docs/source/notebooks/Skew normal model_skew normal data_noise level 1.2.xlsx new file mode 100644 index 0000000..1e20add Binary files /dev/null and b/docs/source/notebooks/Skew normal model_skew normal data_noise level 1.2.xlsx differ diff --git a/docs/source/notebooks/test1_all_data.txt b/docs/source/notebooks/test1_all_data.txt new file mode 100644 index 0000000..6cdfcb4 --- /dev/null +++ b/docs/source/notebooks/test1_all_data.txt @@ -0,0 +1 @@ +{"normal data, normal model": [[0.9999001336005343, 1.0025702609677298, 1.0017147282600856, 1.000123572878139], [0.002286555631402294, 0.028900726978078068, 0.02958680525019264, 0.022445046960539197]], "normal data (higher noise), normal model": [[0.9997316666666668, 1.0059567381829964, 1.001356598861276, 0.9977187067316658], [0.004410296979166418, 0.05488690135089093, 0.055093378982298734, 0.04168657187789078]], "skew normal data, normal model": [[0.9990176666666667, 0.7598253910963016, 0.9869124703934096, 0.9889579711666672], [0.04540922653553522, 0.1425229338854569, 0.029251994462966387, 0.02178598822049324]], "skew normal data, skew normal model": [[1.0003276666666665, 1.0178059537564914, 0.9995769654521169, 0.9994046368514812], [0.022164664598810824, 0.08144664654979102, 0.02553221429137138, 0.019596288333603468]], "skew normal data (higher noise), skew normal model": [[0.9975454545454545, 1.062975971807339, 1.0078594345558298, 1.0013061414928683], [0.029588612507556917, 0.13828870506270582, 0.050852728197426554, 0.03782158437972263]], "normal data, skew normal model": [[0.9993873333333333, 1.145324094260921, 1.0038603930164334, 1.0021702322498285], [0.025492314214288193, 0.06460165579288266, 0.0295645094605588, 0.022277250178015084]]} \ No newline at end of file diff --git a/docs/source/notebooks/test2_summary.xlsx b/docs/source/notebooks/test2_summary.xlsx new file mode 100644 index 0000000..5a05df3 Binary files /dev/null and b/docs/source/notebooks/test2_summary.xlsx differ diff --git a/docs/source/notebooks/test3_df_comparison.xlsx b/docs/source/notebooks/test3_df_comparison.xlsx new file mode 100644 index 0000000..7862a78 Binary files /dev/null and b/docs/source/notebooks/test3_df_comparison.xlsx differ