From 0993e21b7e3be94d790c1679fb620f46330a9a0c Mon Sep 17 00:00:00 2001 From: kta-intel Date: Fri, 20 Sep 2024 21:09:19 +0000 Subject: [PATCH 1/2] enable digress in workflow interface api Signed-off-by: kta-intel --- .../DiGress/Workflow_Interface_DiGress.ipynb | 554 ++++++++++++++ .../experimental/DiGress/digress/__init__.py | 0 .../DiGress/digress/analysis/__init__.py | 0 .../digress/analysis/rdkit_functions.py | 339 +++++++++ .../DiGress/digress/analysis/visualization.py | 221 ++++++ .../digress/configs/qm9_config_guidance.yaml | 48 ++ .../DiGress/digress/datasets/__init__.py | 0 .../digress/datasets/abstract_dataset.py | 147 ++++ .../DiGress/digress/datasets/qm9_dataset.py | 415 +++++++++++ .../DiGress/digress/diffusion/__init__.py | 0 .../digress/diffusion/diffusion_utils.py | 399 ++++++++++ .../digress/diffusion/distributions.py | 34 + .../digress/diffusion/extra_features.py | 278 +++++++ .../diffusion/extra_features_molecular.py | 60 ++ .../digress/diffusion/noise_schedule.py | 226 ++++++ .../digress/diffusion_model_discrete.py | 696 +++++++++++++++++ .../DiGress/digress/guidance/__init__.py | 0 .../guidance_diffusion_model_discrete.py | 698 ++++++++++++++++++ .../guidance/qm9_regressor_discrete.py | 306 ++++++++ .../DiGress/digress/metrics/__init__.py | 0 .../digress/metrics/abstract_metrics.py | 137 ++++ .../digress/metrics/molecular_metrics.py | 407 ++++++++++ .../metrics/molecular_metrics_discrete.py | 198 +++++ .../DiGress/digress/metrics/train_metrics.py | 129 ++++ .../DiGress/digress/models/__init__.py | 0 .../DiGress/digress/models/layers.py | 49 ++ .../digress/models/transformer_model.py | 287 +++++++ .../experimental/DiGress/digress/utils.py | 142 ++++ .../experimental/DiGress/requirements.txt | 14 + 29 files changed, 5784 insertions(+) create mode 100644 openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb create mode 100644 openfl-tutorials/experimental/DiGress/digress/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/analysis/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/analysis/rdkit_functions.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/analysis/visualization.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/configs/qm9_config_guidance.yaml create mode 100644 openfl-tutorials/experimental/DiGress/digress/datasets/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/datasets/abstract_dataset.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/datasets/qm9_dataset.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion/diffusion_utils.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion/distributions.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features_molecular.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion/noise_schedule.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/diffusion_model_discrete.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/guidance/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/guidance/guidance_diffusion_model_discrete.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/guidance/qm9_regressor_discrete.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/metrics/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/metrics/abstract_metrics.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics_discrete.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/metrics/train_metrics.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/models/__init__.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/models/layers.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/models/transformer_model.py create mode 100644 openfl-tutorials/experimental/DiGress/digress/utils.py create mode 100644 openfl-tutorials/experimental/DiGress/requirements.txt diff --git a/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb b/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb new file mode 100644 index 0000000000..e8c1bb8d57 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb @@ -0,0 +1,554 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1448948b-5667-4f40-b1c8-8b91fbf5bf6b", + "metadata": {}, + "source": [ + "# Federated Discrete Denoising Diffusion Model for Molecular Generation" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a7989e72", + "metadata": {}, + "source": [ + "Using OpenFL's workflow interface API, we will train a discrete denoising diffusion model for moleculuar generation, a crucial step in the drug discovery process.\n", + "\n", + "The workflow interface is a new way of composing federated learning expermients with OpenFL. It was borne through conversations with researchers and existing users who had novel use cases that didn't quite fit the standard paradigm. \n", + "\n", + "We will be using the [DiGress model (Vignac et al., 2023)](https://arxiv.org/pdf/2209.14734) (for conditional generation) and the [QM9 dataset](http://quantum-machine.org/datasets/)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fc8e35da", + "metadata": {}, + "source": [ + "## Installation" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4dbb89b6", + "metadata": {}, + "source": [ + "First we start by installing the necessary dependencies for the workflow interface and DiGress model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7f98600", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# !pip install git+https://github.com/intel/openfl.git\n", + "!pip install -r requirements.txt" + ] + }, + { + "cell_type": "markdown", + "id": "30ce388e-46a2-4786-a504-04feacbaaaa3", + "metadata": {}, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "markdown", + "id": "bf644bee-3a34-44a4-adad-a5d1d39f6d38", + "metadata": {}, + "source": [ + "Next, we will initialize the denoising model and the regressor model using the `qm9_config_guidance.yaml`. \\\n", + "When constructing the `datamodule`, the qm9 dataset will also be automatically downloaded, verified, and processed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "379b67d4-b7d8-4eb1-a3d8-e9acac338e16", + "metadata": {}, + "outputs": [], + "source": [ + "from omegaconf import OmegaConf\n", + "cfg = OmegaConf.load('./digress/configs/qm9_config_guidance.yaml')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41982963-db85-4919-8b21-dc6e998f197a", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "torch.cuda.empty_cache()\n", + "from pytorch_lightning import Trainer\n", + "\n", + "from digress import utils\n", + "\n", + "# from digress.analysis.visualization import MolecularVisualization\n", + "\n", + "from digress.datasets import qm9_dataset\n", + "\n", + "from digress.diffusion_model_discrete import DiscreteDenoisingDiffusion\n", + "from digress.diffusion.extra_features import DummyExtraFeatures, ExtraFeatures\n", + "from digress.diffusion.extra_features_molecular import ExtraMolecularFeatures\n", + "\n", + "from digress.guidance.qm9_regressor_discrete import Qm9RegressorDiscrete\n", + "from digress.guidance.guidance_diffusion_model_discrete import DiscreteDenoisingDiffusion as DiscreteDenoisingDiffusionGuidance\n", + "\n", + "from digress.metrics.abstract_metrics import TrainAbstractMetricsDiscrete, TrainAbstractMetrics\n", + "from digress.metrics.molecular_metrics import TrainMolecularMetrics, SamplingMolecularMetrics\n", + "from digress.metrics.molecular_metrics_discrete import TrainMolecularMetricsDiscrete\n", + "\n", + "\n", + "dataset_config = cfg[\"dataset\"]\n", + "\n", + "# Create separate datamodules and dataset_infos objects for each \n", + "datamodule = qm9_dataset.QM9DataModule(cfg, regressor=True)\n", + "dataset_infos = qm9_dataset.QM9infos(datamodule=datamodule, cfg=cfg)\n", + "# datamodule.prepare_data()\n", + "train_smiles = None\n", + "\n", + "datamodule_diff = qm9_dataset.QM9DataModule(cfg)\n", + "dataset_infos_diff = qm9_dataset.QM9infos(datamodule=datamodule_diff, cfg=cfg)\n", + "# datamodule_diff.prepare_data()\n", + "train_smiles_diff = qm9_dataset.get_train_smiles(cfg=cfg, train_dataloader=datamodule_diff.train_dataloader(),\n", + " dataset_infos=dataset_infos_diff, evaluate_dataset=False)\n", + "\n", + "# We do not use extra features when we plan to use conditional generation\n", + "extra_features = DummyExtraFeatures() # Extra features not used for conditional generation\n", + "domain_features = DummyExtraFeatures() # Extra features not used for conditional generation\n", + "\n", + "dataset_infos.compute_input_output_dims(datamodule=datamodule, extra_features=extra_features,\n", + " domain_features=domain_features)\n", + "dataset_infos.output_dims = {'X': 0, 'E': 0, 'y': 2 if cfg.general.guidance_target == 'both' else 1}\n", + "\n", + "dataset_infos_diff.compute_input_output_dims(datamodule=datamodule_diff, extra_features=extra_features,\n", + " domain_features=domain_features)\n", + "\n", + "# Training and Sampling metrics as well as visualization tools are different for the 2 models\n", + "train_metrics = TrainMolecularMetricsDiscrete(dataset_infos)\n", + "sampling_metrics = SamplingMolecularMetrics(dataset_infos, train_smiles)\n", + "# visualization_tools = MolecularVisualization(cfg.dataset.remove_h, dataset_infos=dataset_infos)\n", + "\n", + "train_metrics_diff = TrainMolecularMetricsDiscrete(dataset_infos_diff)\n", + "sampling_metrics_diff = SamplingMolecularMetrics(dataset_infos_diff, train_smiles_diff)\n", + "# visualization_tools_diff = MolecularVisualization(cfg.dataset.remove_h, dataset_infos=dataset_infos_diff)\n", + "\n", + "# for Regressor\n", + "model_kwargs = {'dataset_infos': dataset_infos, 'train_metrics': train_metrics,\n", + " 'sampling_metrics': sampling_metrics, 'visualization_tools': None,\n", + " 'extra_features': extra_features, 'domain_features': domain_features}\n", + "\n", + "# for Diffusion model\n", + "model_kwargs_diff = {'dataset_infos': dataset_infos_diff, 'train_metrics': train_metrics_diff,\n", + " 'sampling_metrics': sampling_metrics_diff, 'visualization_tools': None,\n", + " 'extra_features': extra_features, 'domain_features': domain_features}\n", + "\n", + "model = Qm9RegressorDiscrete(cfg=cfg, **model_kwargs)\n", + "diff_model = DiscreteDenoisingDiffusion(cfg=cfg, **model_kwargs_diff)" + ] + }, + { + "cell_type": "markdown", + "id": "c02b8ace-07ac-4ed7-9e49-1d68eea62d4a", + "metadata": {}, + "source": [ + "Now, let's define some helper functions that will allow us to properly build the model and execute the proper training and validation logic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68e7c15e-c333-448a-8c9c-be2b5c996790", + "metadata": {}, + "outputs": [], + "source": [ + "from copy import deepcopy\n", + "from digress.diffusion_model_discrete import DiscreteDenoisingDiffusion\n", + "from digress.guidance.qm9_regressor_discrete import Qm9RegressorDiscrete\n", + "from digress.guidance.guidance_diffusion_model_discrete import DiscreteDenoisingDiffusion as DiscreteDenoisingDiffusionGuidance\n", + "\n", + "def build_model(cfg, model_kwargs, model_type='regressor', model_state_dict=None, optimizer_state_dict=None):\n", + " model_kwargs_copy = deepcopy(model_kwargs)\n", + " \n", + " if cfg.model.type == 'discrete':\n", + "\n", + " if model_type == \"regressor\":\n", + " model = Qm9RegressorDiscrete(cfg=cfg, **model_kwargs_copy)\n", + " elif model_type == \"diffusion\":\n", + " model = DiscreteDenoisingDiffusion(cfg=cfg, **model_kwargs_copy)\n", + " else:\n", + " raise ValueError(\"Invalid phase. Choose 'regressor' or 'diffusion'.\")\n", + "\n", + " if model_state_dict:\n", + " model.load_state_dict(model_state_dict)\n", + " if optimizer_state_dict:\n", + " model.configure_optimizers().load_state_dict(optimizer_state_dict)\n", + "\n", + " return model\n", + "\n", + "def run_model(model_args, dataloader, model_type='regressor', phase='validate', model_state_dict=None, optimizer_state_dict=None):\n", + "\n", + " trainer = Trainer(accelerator='gpu', devices=[0], max_epochs=1, enable_checkpointing=False, logger=False)\n", + "\n", + " if model_type == \"regressor\":\n", + " model = build_model(model_args[0], model_args[1], model_type, model_state_dict, optimizer_state_dict)\n", + " elif model_type == \"diffusion\":\n", + " model = build_model(model_args[0], model_args[2], model_type, model_state_dict, optimizer_state_dict)\n", + "\n", + " if phase == 'train':\n", + " trainer.fit(model, dataloader)\n", + " loss = trainer.logged_metrics['train loss'].item()\n", + " elif phase == 'validate':\n", + " trainer.validate(model, dataloader)\n", + " loss = trainer.logged_metrics['val_nll'].item()\n", + " else:\n", + " raise ValueError(\"Invalid phase. Choose 'train' or 'validate'.\")\n", + "\n", + " return loss, model.state_dict(), model.configure_optimizers()\n", + "\n", + "\n", + "def build_guidance(cfg, model_kwargs, model_kwargs_r, diff_model_state_dict=None):\n", + " model_kwargs_copy = deepcopy(model_kwargs)\n", + " model_kwargs_r_copy = deepcopy(model_kwargs_r)\n", + "\n", + " regressor = Qm9RegressorDiscrete(cfg=cfg, **model_kwargs_r_copy)\n", + " conditional_model = DiscreteDenoisingDiffusionGuidance(cfg=cfg, **model_kwargs_copy)\n", + " \n", + " conditional_model.load_state_dict(diff_model_state_dict, strict=False)\n", + " conditional_model.guidance_model = regressor\n", + "\n", + " return conditional_model\n", + "\n", + "\n", + "def run_guidance(model_args, dataloader, model_state_dict=None):\n", + "\n", + " trainer = Trainer(accelerator='gpu', devices=[0], max_epochs=1, enable_checkpointing=False, logger=False)\n", + "\n", + " model = build_guidance(model_args[0], model_args[2], model_args[1] , model_state_dict)\n", + "\n", + " trainer.validate(model, dataloader)\n", + " \n", + " validity = trainer.logged_metrics['Validity'].item()\n", + " uniqueness = trainer.logged_metrics['Uniqueness'].item()\n", + "\n", + " return validity, uniqueness" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "cd268911", + "metadata": {}, + "source": [ + "Next we import the `FLSpec`, `LocalRuntime`, and placement decorators. We also define the aggregation algorithm.\n", + "\n", + "- `FLSpec` – Defines the flow specification. User defined flows are subclasses of this.\n", + "- `Runtime` – Defines where the flow runs, infrastructure for task transitions (how information gets sent). The `LocalRuntime` runs the flow on a single node.\n", + "- `aggregator/collaborator` - placement decorators that define where the task will be assigned" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "precise-studio", + "metadata": {}, + "outputs": [], + "source": [ + "from openfl.experimental.interface import FLSpec, Aggregator, Collaborator\n", + "from openfl.experimental.runtime import LocalRuntime\n", + "from openfl.experimental.placement import aggregator, collaborator\n", + "import numpy as np\n", + "from copy import deepcopy\n", + "\n", + "def FedAvg(model_state_dicts, weights=None):\n", + " state_dict = deepcopy(model_state_dicts[0])\n", + " for key in state_dict:\n", + " tensors = [state[key] for state in model_state_dicts]\n", + " non_empty_tensors = [tensor for tensor in tensors if tensor.numel() > 0]\n", + " \n", + " if non_empty_tensors:\n", + " state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in model_state_dicts],\n", + " axis=0, \n", + " weights=weights))\n", + " else:\n", + " print(f\"All tensors for key '{key}' are empty. Skipping.\")\n", + " continue\n", + "\n", + " return state_dict" + ] + }, + { + "attachments": { + "image.png": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAt0AAAI6CAYAAAD7dvTIAAAgAElEQVR4nOzde3RUVZ43/C8mIVW5VlJFQqBCCk3R3BKCIB0gQRAaW2Y6QDuCtHar8D79rofhomucfp9xtThqr561enyWArbvWj1L2m7bB9RRSGba+wUJQqRBYkBAKkoCBSFQRSrXSkjFPH8Ue3NO3VKV5FTl8v2s5ZJUqs4+lfrVPr/z2/vsM6a3t7cXRERERESkmVtivQNERERERCMdk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSWHysd4CIhof9tS7sr3Wh/KQTddc6AQAutyfGe0UUHoM+HovzDbBk6rByphGL8w2x3iUiGmXG9Pb29sZ6J4ho6Npf68Jj+75F9cW2WO8K0aApmpiCrYsm4uF542O9K0Q0SjDpJqKAXG4PHtn9DfadcMR6V4g0UzQxBZ/+4ywY9Bz4JSJtMekmIj/VF9uwetfXchoJ0Uhm0Mfj03+chaKJKbHeFSIawZh0E5GKy+3B7OeOMeGmUcWgj8fxx+fAkqmL9a4Q0QjF1UuISOWR3d8w4aZRR0ynIiLSCpNuIpL2nXBwDjeNWvtrXXjlyOVY7wYRjVBMuolIevr9+ljvAlFMPbbv21jvAhGNUEy6iQiAt8rNZQFptHO5Pax2E5EmmHQTEQAw4Sa64bNvm2O9C0Q0AjHpJiIAwFeX2mO9C0RDwv5aV6x3gYhGICbdRASAlW4iweX2xHoXiGgEYtJNRACYaBAJ/C4QkRaYdBMRACYaREREWmLSTURERESksfhY7wAR0XC086f5WHSbAfkmPZLGeusXzvZuXGy+jgPfurD57dqgr33mHgsmZ+rw89fORGt3Y9ImERHdxKSbiCgCeRk6HNxSBLMh0e93xuQEGJMTUDghGasKTCjZUY36pk7Vcz76n4VYOiUDFSed0drlmLRJRERqTLqJiCLw4f8shNmQiI7r32PXFw14o/oqKr/zrutcems6Hl+Si2VTMmA2JOLgliLkPl2lev2kDF3U9zkWbRIRkRrndBMRhWnt7HGwjtMDANbvOYPNb9fKhBsAKr9rxsqXT+Jf/vodAMBsSMQz91hisq9ERDS0MOkmIgrTz27PBgDYXV14/fjVoM/bceAi7K4uAMDkTFaZiYgIGNPb29sb650gotgb89hnsd6FIW/nT/OxqXQiAMDyzBd+87VD2bJoIravzvd73HbVjSm/PSJ/Lr01Hb8ruxXm9ETVvHHbVTe+qG/xuxCyfMNMlM00ouKkE19dasPGhRNgTE6As70bNocbxXlpfbZJ/nqfvzPWu0BEIwwr3UREYXqj+mZ1++CWooimjjS2Xoftqhsd178H4F3pxHbVjfOKxH3nT/NxYHMRivPSkJmUANtVt3yNdZweD87Nxtkn5gXc/rTsJDy5PA+AN6nWJ8ShVvH6YG0SEVF0sNJNRABY6Q6Xstot2K66cbqxAx/bmrDjwMWQrz/7xDxYx+lRcdKJlS+flI/nZehw6n/dgaSxt/j9DrhZ0QaA+/98Sk5vUT5ec6kds/79qNyeqMQHa5OCY6WbiAYbK91ERBHY/HYttu6tlXO2AcA6To+ymUZsX52P3ufvxFf/PBdbFk0MsRV/G4rH41pHN5zt3QET45Uvn5QV6xnjkwNu47cf1ct/RzL1hYiItMekm4goQjsOXETu01VYtLMafznaiJpL7TIhBoDCCcnYvjofhx+dHfY2t71bh9ynq2D69aGgz7nY7E30Z01ICfj7UBd3EhFRbHGdbiKifqr8rlm1ZODa2ePw99ONuGdaJozJCSjOS0P5hpn9mtKxdvY4ZKeOxR25qchJG4sfZCUFvCGPYLvq7td7ICKi6GDSTUQ0SF4/fhWvH7+KvAwdKv6fmSickIxlUzLCfr1YuSTQiiMd179Hx/Xv5S3niYhoeGHvTUQUJsdvFqD3+Tv7nK9d39SJl79oAICwk+S1s8fhvf+3UCbctqtuVJx04i9HG7F1by2S/79KOb2EiIiGH1a6iYjCpE+IAwAstWb0uUpJpJ5YloeksbfA7upCyY7qgBdCTkwPPr2EiIiGNla6iYjCVNPQBgBYNiUDa2ePC/ncDT/MARD+XGt9grc7/tLeFjDh3rJoIqeWEBENY+zBiYjCdP+fTst51Xt+MR2HH52tmmqSl6HDM/dYcPaJeSic4F3W78WD6oq4u9u7ykmyTwItHl84OQ15Gepbx+/8aT7+7e9u7fd+B2uTiIiih9NLiIjCVN/UifV7zuC5sttgNiSiOC8NxXlpAW/v3nH9e+z6osFvGkrNpTYUTkjG0ikZOPvEPJxv6sSy/78Gv/2oHrvunwpjcgLqtv1QVsgnpiciaewtcLZ342JzF6zj9BHvd7A2iYgoeph0ExFFQKxQsvOn+Vh0mwET08fCmJwgfy/uTrnl7dqA00R+/toZ5KSNxXxLOqzj9HKetlhj+4llecg36WVybbvqxvtnrmHz27XY+dN8WMdNxMLJ/qubhBKsTSIiih7eBp6IAPA28ERKvA08EQ02TvAjIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIiIiIo0x6SYiIiIi0hiTbiIiIiIijTHpJiIiIiLSGJNuIiIiIiKNMekmIiIiItIYk24iIh+lt6aj9/k7cfaJebHelX4T76H01vRY7woREYFJNxGRn8eX5MJ21Q3rOD22LJoY690hIqIRID7WO0BENNQsnJyG3V9eAZCJdbdnYceBi6rfn31iHqzj9ACAipNOlM00YtHOalR+14wtiyZi++p8AICzvRs2hxsAMP+F4yjfMBNZqQkozksDAGzdW4sdBy6qtuds74bp14dkW+UbZqJsphEAUFXfAqtJj91fXsHmt2sBAL3P36nat4qTTjz36QUc2FwEADiwuQgvVl7E5rdrVfsGQD4OAI7fLIDN4UZxXprfPhAR0cCx0k1EpLDzp96kdPPbtXj/zDWZIAuHH52NzKR4jHnsM4x57DMsnHzz96W3pmP76ny8WHkRYx77DLu/vOL3+uK8NGzdW4sxj30mE25nR7fc3ufnWuD4zQK5LyKhH/PYZwAAY3KC3JbjNwtQcdIpXytOAABg0c5q+X9lwi3a3rq3FptKJ6oq+VaTHmMe+4wJNxGRBph0ExEp3D01E5+fawEAWQUWiTjgTZqf+aBe/qz8t5iWIl63+e1a2K66Vdt3tnfLynnpremwjtNj/gvH5e9XvnwSxuQEbFk0EXdPzUTFSScqv2sGANXzAMD060NY+fJJ+fPHtqag72vd7Vmoqm+Rbe84cBFV9S3YVHIz6Rbvm4iIBh+nlxAR3SCS4A17vpGPVdW34O6pmfL3AHDc3iZ/r/x3VmoCnB3dqm36/nytwyP/vaZoHAD/KSKAt+qcmRSP802d6u21d/s9Vzk9JRhjUgJON3aoHjt6vhXrbs+SP/u2RUREg4dJNxHRDY8vyQUAOR9aacuiiaoEe7CEmj+tTIgDEcm27aobYx77zG/ONhERDR1MuomIblg4OU11caHg+M0C1QWVs80pcsrHbHOKfN6V1m5My05SvdaY5F/9FmwON4zJCSi9NV1uT+lahweTMnTq7d2Y0y2q8uICzr44O7qRlZqgemzupFRV5Z2IiLTDOd1ERPDO2zYmJ/gl3IB3rrO4ILKqvgXblufJ3yn//dynF2Adp5dzwHf+ND/ktI8dBy7CdtWNvetnyMe2LJqI3ufvxJZFE/H+mWsom2mU01oOPzrbbxvKpD9UlVtc1CkunNyyaCKK89Lw4sGLQV9DRESDh5VuIiJ4L6Csqg98IeFzn15A2UwjyjfMxPwXjuPsE/PkPGzliiGV3zVj695abF+dj02lE+Fs7w66TWHKb4+otgdAVb2elKGT012q6lvknO7K75rxYuVFbF+dL5PtRTurcWBzEdYUjZMXcSqXDASger5YspCIiLQ3pre3tzfWO0FEsZfxxOdwuTnVIFJiHrVY0s/X2Sfm4XRjh2qVkYHoff5OJstREOjiViKigeD0EiKiCPQ+fyfKN8yUP6+7PUsuC1i+YaYqWRPzrkMt5RfK2SfmqW5FL6atMOEmIhp+WOkmIgDA7OeOofri4K/OMdL4rhBiu+rGlN8ekT/7Lt830Kq0b8U1WEWdBk/RxBQcf3xOrHeDiEYYJt1EBAB4bN+3eOEze6x3gyjmHp43Hn9c94NY7wYRjTCcXkJEAICVNy4GJBrt+F0gIi0w6SYiAMDifAMsmbq+n0g0glkydVicb4j1bhDRCMSkm4ikp+7O6/tJRCPYQ3dkw6DnarpENPiYdBOR9PC88azy0ai1ON+AR+80x3o3iGiEYtJNRCp7189gpY9GHYM+Hs+vuo2xT0SaYdJNRCoGfTyOPz6HyQeNGgZ9PPaun4GiiSmx3hUiGsG4ZCARBeRye7Dk919x7W4a0RbnG/DHdT/gRcREpDkm3UQU0guf2bH9wEXUXeuM9a4QDRqDPh5P3Z2Hh+eN56gOEUUFk24iCsu+Ew589m0z9te6WP0OR1cb0HJZ/d/VWu/js/8BmP5j9XOP/ydgrwYazwAJeiBjEjDuNiA1G0gbD6Td+L+eF7r2h0EfD0umDpZMHR66IxuL8w1Mtokoqph0ExH1g8vlQl1dnfyvvr4e1dXV8nGXyxX0tU899RT+9V//Ffv370d5eTleeeWVkM9XSklJQV5enuo/i8Ui/52TkzNYb5GIiAYRk24ioiCCJdYiqQ6VKBsMBlgsFvlfXl4eioqKEBcXh7/+9a94//33UV1dLZ9bVFSErVu3or29HXv37sX777+PtrabIwppaWno6elBe3t7yH3W6XR+ibgyOTebuSQeEVEsMOkmolFrINVqg8GgSqxFUltUVASLxQKDQT0NJFBV22AwYOvWrVi1ahWKior82njrrbfwxhtv4I033pCPxcXFYfny5SgqKkJOTg7q6+tRX18v9//q1ash33NCQkLQKrn4b8yYMZH8GYmIKAxMuoloxBLV6FDV6lCCVavF476JdaD2X3jhBZSXlwesai9evLjPbQBAc3OzTL4/+ugj+bjJZMKaNWuwZs0a3HnnnQCAtrY2v0Rc+V9DQ0PItsaMGRMwGVf+HB/PudBERJFi0k1Ew9pAq9UDSaqD2b9/P/70pz9h3759sn2LxYKHHnooaFU7XOfOncMbb7yBN998E8eOHZOPT5kyRSbgBQUFQV/f2dnpl4wrf7bb7X3uQ25ubsgpLImJif1+f0REIxWTbiIa0gaSVAOBE2tRYRb/DdZ+BqtqP/XUUzKZH0xffvmlrICfO3dOPl5cXIz77rsPa9asiXgOd3d3d9BKufi5LxMmTAg5hSU5OTni90pENNwx6SaimNPigsWBVqvD3e/q6mps374d+/fv96tqP/zww7BYLJq1r/TRRx/JBLy5uVk+fs8998gKeFJS0oDb+f777/2mrPgm5x6PJ+Q2srKyQk5hSUtLG/B+EhENNUy6iUhz0bxgMRpCVbWff/75mO2X8Oabb+KNN97Af/7nf8rHEhISZPJdVlamafsXLlwIOJ9cPNbV1RXy9UajMWSlPDMzU9P9JyLSApNuIhqwWF+wGA1DqaodrqamJln9/uSTT+TjWVlZMgEvLS2N+n5dunQp5MWefS2LmJ6eHnQ+eV5eHsaNGxeld0JEFD4m3UQUlqF4wWI01NXV4ZVXXsGf/vQn1NXVARhaVe1wffvttzIBF9V5AJg6dapMwGfMmBHDPbzpypUrIS/2bGlpCfl65Q2EAiXnvIEQEcUCk24iAjB4FywWFRUhLy8PBoNBkwsWo0FUtR977DHVe1+8eDFWrlyJhx9+eFi9H19Hjx6VCbjywsgFCxbIBHwoJ6ZOpzPkxZ5NTU0hX6/T6UIuicgbCBGRFph0E40iw/WCxWjpq6o9kKX+hqoPPvhAJuCtra3y8b/7u7+TCbhOp4vhHkbO5XKFvNgzkhsIBUvOiYgixaSbaAQZaRcsRsNIr2qH6/vvv5cXYL799tvy8cTERJl8//3f/30M93DwiBsIBbvY8/LlyyFff8stt/R5V0/eQIiIfDHpJhpGRsMFi9FSXV2NP/3pT363ZV+8eLFcV3u0cjqdsvq9f/9++XhOTg7WrFmD++67DwsXLozhHmrL7XaHXBIx3BsIhbrYkzcQIhp9mHQTDTGj9YLFaFBWtZUXE462qnYkbDabTMBramrk49OnT5cV8GnTpsVwD6Ovu7s75JKIkd5AKFByzhsIEY08TLqJomwwLlgUUz6UU0BEYk3+9u/fj/Lycr+q9qpVq7B169ZRXdWOxJEjR2QCfuHCBfl4SUmJTMCzs7NjuIdDg/IGQsGS856enpDbyM7ODlol5w2EiIYnJt1EGgiUWIu1nXnBYnS4XC7s378fTz/9NKvaGnjvvfdkAq5cV/snP/mJTMDHjh0bwz0c2s6fPx9yCku4NxAKNoUlIyMjSu+EiMLFpJuoHwbjgkVRrU5PT5f/ZlI9cKxqR5fH45HJd3l5uXxcr9fL5HvFihUx3MPhSdxAKNh65R0dHSFfbzAYQl7syRsIEUUfk26iAHjB4vDicrmwb98+bN++nVXtGLp69apMwA8cOCAfnzhxokzAi4uLY7iHI0djY2PISnm4NxAKtiTi+PHjo/ROiEYPJt00avGCxeEvWFX74YcfxkMPPcSqdgx98803MgE/efKkfHzmzJkyAf/BD34Qwz0c2ZxOZ8iLPfu6gZBerw9ZKecNhIgix6SbRqyBXrDom1TzgsWhgVXt4aeqqkom4BcvXpSPL1q0SCbgnO4QXcobCAVKzsO5gVCoJRF5AyEif0y6aVhTJtbV1dVobm7mBYsjVLCq9tatW7Fq1SpWtYeJd955RybgbrdbPr5q1SqZgMfFxcVwDwnw3kAo2Hzy+vr6iG4gFCw55+dMow2TbhrSOAVkdHO5XPK27KxqjyzXr1+Xyfd//dd/yceTk5Nl8v3jH/84hntIoShvIBQoOVeOaAQzadKkkFNYeAMhGmmYdFNMaXXBokiqmZANT6xqjy6NjY0yAT948KB8PDc3Vybg8+bNi+EeUqSuX78e8uZB4d5AKNQUlqSkpCi8E6LBw6SbNMdqNYXD5XLhhRdeQHl5Oavao9jp06dlAn7q1Cn5+KxZs3DfffdhzZo1sFqtMdxDGgziBkKhLvaM5AZCgZJz3kCIhhom3TRgg7FmNS9YHL1Y1aZgPv/8c7zxxht488030dDQIB9fvHixrIAbjcYY7iFpSXkDoUDJeTg3EApVKecNhCjamHRTWEQCLaZ9KC9YrKurC/laVqvJV6Cqtrhh0EMPPYRVq1YxLkjlv//7v2UFXJls/fSnP5UJ+JgxY2K4hxRtFy9eDDmFJZIbCAVKzrmiDg02Jt0EgFNAKDpY1aaB6uzslMn3X//6V/l4amqqTL6XL18ewz2koULcQCjYFJbW1taQr09NTQ25JCJvIESRYtI9yuzfv58XLFJUhapqb926FYsXL2b8UL80NDTIBPzQoUPy8by8PKxZswa/+93vYrh3NNQ5HI6QF3qGewMh3yr5z372syi9AxpumHSPIi6XK+gcNlarSSvKuLNYLHL6CKvaNJi+/vprmYCfOXMGAPDmm2/iH/7hH2K8ZzRciRsIBVuv3OFwBHzdtm3b8PTTT0d5b2k4GLVJt6vLg1fONKC8zgFXlwd1rZ1wdXlivVvae/VG5SczGzBmA+Z8wDge0KfEdr8GmSVVhyJTKmYZk/HorFwYEuNjvUsARnHcffoWYL7NG28jLNaUGHdDRO0J4MxR4O8fifWeRAXjLkY6O4BrjTf/c172/n/FL4Cc0bEIwFCNvaFq1CXdri4Pnj5ah1fONIysLz+F9OisXDy/MD9m7TPuRifGHcUC445iJdaxN9SNqqS72tGG1e+eQF1rZ6x3hWLAkqrD3nsKUGSKbqWVcTe6Me4oFhh3FCuxir3hYNQk3dWONiwpP86z7lHOkBiPT1fOjlpnwLgjgHFHscG4o1iJduwNF7fEegeiwdXlwSOfnGZHQHB1eaJ2UGDckcC4o1hg3FGsRDP2hpNRkXS/8NUFVDvaYr0bNES4ujx47PNazdth3JES445igXFHsRKt2BtORnzS7eryYPsJu+btrM3PQu/GJejduASlOVxeDwDKVxSgd+MSbCk0R/W14XjlTIOmcw6jFXcA0P7LRejduAQ7S61RaW+o21JoRu/GJShfURDV14ZjJMXdV2vvQO/GJTh875yotDcc9G5cgrMPFEf9tX0ZSXH36rLp6N24BI71JVFpbzg4+0Axejcuifprw6F17A03Iz7pjtbV04/OyoWzsxsA8PjsXM3bo4Hb991VzbYdrbjbWWpFUnwcnJ3duHuSUfP2aOBGQtyV5hhQaEyBs7MbhcZk5KXqNG+TBmYkxB0A3DMpE87Obhh1CZoVZWhwaRl7w82IT7o/u9QclXYKjcmwNbthb+vCwvHpUWmTBkbL2IhW3C2aYICzsxvVjjZY0/UcZRkGRkLc/XLGBADAu+evISk+Do8XsdAw1I2EuFubnwWjLgHvnr8GAFhnzY5KuzQw0YqP4WDEJ93VjlbN2xDVxg8vXMP+Sy4YdQl4Zt5kzdulgdl/KfQtfgciGnEnqo3Vjjb8x6lLAICn7hgdN2QYzoZ73AHeaqO9rQs//+gUOjw9WDV5XFTapf4bCXH36Czvyd2vv/gONc42FGencZRlGNAy9oabEX/rINd17Ye8Fk0woMPTg21HziEvVYcHp2Rj5WQTth05F/D5ry6bjnsmZcKoS4Czsxvvnr+GtLFxKLOYMOalT+XzSnMMeHGRFYVG75I7Nc42bDpgw4HVs1FR58DKd04E3afyFQUos5hgefUwKlYUqLZR9s4JFGen4dkf3gpruh4dnh4cvtyCDZ+eQb1i7lVeqg57ls9AoTEZSfFx6PD0oMbZjvs/+Fr1PAB4Zt5kPDI1B+aURLm9YJTPBQB7Wxf+vfo8dtREZ06goOVwaDTiTlQb/+PUJbxeewXPLcjH/PFpQZ+/pdCMfy6apPqMDl1uxpNzLdh60Kb6+4cbo4Ha2F5ixdaDNpRZTJg/Pg1J8XGwt3Xh8UO1qGpswZ7lM1Cc7d1PEdOVDS7VdspXFGDh+HQYdQnyeb89Vo/Xa6+onrc2PwtPzMlTxfeBS+ptBXuueF8//+hU0PejheEed6La+JezjQCAw5dbsNScgbX5WX6fDxBZPxZJjPrq3bgEFXUOnG/txDprtozd3bZGbK60oXxFAZaZM2Q8BupzAvVNfzzT4NeX+/aNYnuBBOpHA/W3WhvucQd4R5RrnG2ob+1E+TkHCo0p+M0Pbw34Hc5L1eHlJVNVfdC/V5/HpgLvlJQpr1XJ5w7kWCvm4T/5xXd4bkG+KnaXVVTjmXmTsXHmRFVf6ru/gfqmzy83B2xX2TeK7QUTqB8N1N9qjSuY3DTyk26NP2xRbaxq9CaZ9a2dqHG2odCYgtIcg19wf1RWhKXmDDg7u1FR50CWfiwenJKNDk+P33bf+0khkuLjUNXYgivu61g4Ph3v/aQwov07uPp2uHu+R0WdA5ZUHQqNKfiwrAgTk8eixtmO003tuN2UiqXmDOwotcoveaD2s/RjUZydhlPr5mH67iPygPHMvMl4cq4FHZ4efGz3ntGKjs7XzlIrNhWY5XPbPT1YOD4d20usMOkSgp6oDDfR6GTE3EaR6Oy/5MKDU7Kxs9SKzZU21XODfUaBkvRwYzSUfy6aBH38LfjI3oTk+DgsNWfg94umwO353i8eX75rquoAeOEXC2BOSYSt2Y3PLzcjOT4O88enYc/yGchOGisTpdIcA3bdNdXvOyIOqkpr87P8njstIxkPTslGTtJYLKuoDvu9DWXRun4FAP7wtXd05T9OXcJScwYenZXrl3RH0o9FEqPB3G5KxTJzBg5fbkG7pwfLzBnYVGDGogkGTExOVMXjvxXfivJzDtmP+cY9ACwcn44n51owy5SiSoAOrr5dxujppnZMy0jG9hL/C5nzUnV+z83Sj8VScwYOrr4duX8+FPZ7G8qief2KOKneduQc/qkoF/dMygz4/GCfUYenBxfbr8vnDcaxNjMxHrvumooaZzu+dLTKY+pXa+9AfrpeFY8PTsnG3660yH7MN+7bPT2YlpGMMosJZx8oVvWNwfrmQA7fOwfF2WnyuaIffe8nhfjxf9VEPfEmrxGfdGtNVBtf+OqCfEycgf9yxgRVYJfmGLDUnAFbs1v1RRLVQaXfLbgNSfFxftWdsw8Uw5quD3v/rnV1Y9brf5M/X/jFAljT9aqz97xUHU6tm4dpGcl+7b94wq5K4ETSvGf5DMx/6xgAYOPMiejw9KgScbFNZeKdl6rD+mk5cHZ2Y86bR1XPPXbfXPxTUe6ISbq15lttBLxDrg9OycaiCf7zukN9RkqRxGgo+vhbVJ+xOFj4btc3nneWWmFOSURVY4uML/F+d901FdvmWuT3IZLvyLM/vBVJ8XG4/4OvVYnhV2vvCFmlJX+i2ij6NjHKUmhM9ntuJAK2C/kAACAASURBVJ9RuDEaijklUfUZi4QmP12v2q4YCVw52YQdNXYZ9/a2LpTs/VLV/sHVt6PMYpJFFBGjvhVQsU2lHTee+5ezjarq5qvLpuPBKdl4ddn0qI+0DFdiRFl5PAo2yhLJZzQYx1rRF4vPMi9Vh7qfz0ehMUW1XdGPLjVnyMdE3K//5IzqPYikWRRRIumbtxSaUZydhhpnm+r4vzY/C3uWz8CLi6yqxyl6Rvycbq35VhsB7xl4h6fH7wxcJOh7bI2qx3fU2FHjVK9vWmhMhr2ty2/488kvvoto/14+3aD62d3zPQBgi6Ljqm/tVJ35A0BxdhrsbV1+FdPNlTbY27rk9ACR/B2+3KIaKq1v7fSbYvJ4US6S4uPw7vlrfs/dbWtEUnwc58KHybfaCPiPsgihPqO3v3OothtJjIby+eVmVVvtN6rkvts93dSu+lmswHL/B1+rHn+99goOX25RrVgQ7Dvi20ZpjgHWdD1qnG1+ifWmA974/h/TJ4T93kYz32qjsP+SC0nxcX7LVobbj0USo6HYmt2qz9hxY0Up3+2KSrogVpz645kGv/b/eKZB9RxxUrvFp2/0/RnwVso7PD1+ifXPPzoFZ2c3Fgc4QSZ/YkS5xqnuL8S1LKI/FIJ9RivfOeE3YjdYx1rlZyxiyHe7vm1sKTTLuPftm0QfKPrEUH2zva1L9Zi4wPS3x+pVj79eewVVjS0oNKZwLnyMsNI9AOJAASDgOpdJ8XHYUmiWX7T8G2fNvokwANS1dsr5XKU5BiTFx+Gwy39e9Ou1V7Bn+YwB73s4cwm/cXUEfNze3iXnPGYnjQXgnzwBwKHLzVhqzpA/T7rxJc9P1/utg5yl925nFm8ZGxZRVTywenbA3z8+O1dWIktyvKvpHLrsfwX53660qIYnw43R/hJJUCj2tq6A8Xm6qV0VT0nxcahp94+7l0834Mm5Ny8onT3Ou8/6+Lig629P4gEoLCKZ2VRgDjiNx5sgeBOdSPqxSGK0P9rDnBoVKO4/vNCkiif9jSU6fWO0vrXTL/kR826DxZ3oRyk0kXAWZ6cFPNaKZSvFZzIxOTFoP+I7tUTLY60ocvUlUNzXt3bKZYgBIG2sd9T4wwv+FyV+4+pQxZLIS342JRs/8/nuiN+JUR6KLibdAyDOrsX8PyUxb3CdNXvUBnawBEtUyal/RLVRzD/0tcycMaqXrQx2QmlN10c0NYvURLVRzJH1dbspVS5bOVrniwZKsoy6BL8pDRQZMaL8eYDkVFwb8nhRrt/I7GgR7KSScTf0MOkeADEsFezKZsf6ErmkUX1rp0yQNkzL8Zu7bFFU2sQB6weGJL9trs3PGqzd71Og9gHAnHzzjLqxw/uelPPBBWVVEgBarns7hr5WIaDQRLUx0CoywM25gM/Mm4xtR87B1uwGACwIkIjfkaU+AQo3RrVkTklUVa0E3xjr8PSoYlHwvWGGiNG+ViGg0JTD24GuvRDXezx1hwXLKqoj6sciiVEtBYr7H+Wq+zG3pwfGdH3AGLWm6+V7ASAv2lPOwaXIiBHlj+1NAb+/pTkGHFg9G6smj5NJ97UuD6xBPqOJyWNltXuoHGsDxX1eqg5GXQKu3bhIVRw/f5Sb4XdS69s3um8k4aFWmqLY4JzufhLVxv1BlicDIJfy+c0PbwUAPHfce7Hl/T4L+q/Nz/Ibtq9qbIE5JdEvgXhiTt6A9z0con3fOZrKC90A7xCcs7Mb88enqTqpvFSdX7VVzD8ONCx9+N45mt72faS4ObexLWhFV1zUu3Kyt8qxo8YuPyPlPL68VJ3fdQeRxKgW3j/vBAC/Yd21+VmYP957Jb44YatxtgeM0Q3TclQ/ixhdZs7wm8e4s9Sq6W3fR5J7JmXKpVED2VxpQ4enR7XaSLj9WCQxqgUR949MzfFr/5GpOarniPnsvjH66rLpftutcbbDmq73e/9r87PQ/stFmt32fSQRI8pi/ravygYXapxtMKckymOQ6Ed2+PQNry6b7reqViyPtcq4903yRXyJ9yKOn74xujY/y28ET8Sob7+Wl6rDhV8sQPsvF3FOd4yw0t1PotqovJDN1x++voQHp2TLg0Zlgwsf25uw1JwBx/oSfH65GVn6sSg0JqPD06PqDH516Fu895NCbC+xYp01G1fc13G7KRWZuuh8ZKL9TQVmzM1KUy0Z2OHpwa8OfSuf+8zROmwvsWLXXVPlBWmBlvmqbHDhxRN2bCoww7G+BNWONrk8kjVdH/BiFlIT1cZga1ED3iTz94umqJatfOnkRTw514JT6+bJC1yLTCnQx6vPuyOJUS1srrRh1eRxKM5Ow9kHinG6qV0udZUUH4f/XX1zlaD7P/gap9bNU8XowvHpfu8JAP7xwFnsumsqTq2bhxpnu188i4SKAhPVRnGyHUyNs1214kIk/Vi4MaoFZdwfu2+unMYg1jj+2N4kq4ubK224e5JRFaOiD/O9SO/+D77GwdW3Y3uJFRum5aCutVMVz74XxZG/QmOy32IFvg5ccqHQmCKXrRT9iFh273RTOyypOnnNilKsj7Ui7sXx0/eYKKr3lQ0uVNQ5UGYxyRhV9mHKvnlzpQ1zs9JQZjHhwi8W4MsbNy9SxnM014inm1jp7gdRbbS3dYWcu1jZ4IK9rUu14sKyimo5B7zMYoI1XY9dpxv8Vg+pbHBh/Sdn5F23yiwmXOvqViUdWqpscGH67iM3rnT2rhlaaExGVWMLpu8+onrfO2rsuP+Dr1Hb7MZScwaWmjNQ42wPuK+bK2149mgdrnV5sNScgTKLCZmJ8aioc6Bk75dReW/DmTiBe66POKh2eFcaESsubDtyzvt37/T+3eePT0O1ow0f2f0vygk3RrWS++dDqKhzIDMxHmUWE5aaM1Db7Mb9H3ytqrLWt3b6xei1Lg/Wf3LGb5uv117B+k/OoLbZ7RfPXLO2b6La+OGF4DfiUP5erLgQST8WSYxqYVlFNZ49Wge353uUWUwos5jg9nyPZ4/W+a3jPuW1KlWMZibG49mjdX7fkfrWTpTs/RJVjS2YmJyoiuetB21cIrUPYkRZ9GfBiP5QXFAJQPF3H3vjM0oI2DfE+li77cg51fFTeUz0Xcd95TsnVDFaaExGRZ0j4Hdk/lvHVP24iOcXT9hHzH0JhqMxvb29vbHeCS0NlzlNYk3QvvZXrMnJ+amDI9CV8INhuMSdWLc2nHn24cYo9W20x10k/VgkMUqhjfa4A7x/A9+1rgPhsXZwaRV7ww0r3VG0pdCM9l8u8luLWszJUq6DfPaB4oDz/cR81f9zlsOSFL5A85bFvPsOT4/q5g3hxihRXyLpx8KNUaK+lK8ogGN9iep+BcDNefdfKKZJ8VhL0cQ53VFUfs6Bfyu+FU/OteBHuZm44r4u5/d1eHpUC9mfbmpHmcUk59UCkPO8qhr9F9InCsXW7FbNbwRuzu978cTNZCaSGCXqSyT9WLgxStSXj+1NKLOY8N5PCuUt2MX8Z3tbF36tuPENj7UUTZxeEmWlOQb8bsFtKDQmywsfapxt+O2xer8v985SK1ZNHicXvRfrlHKoa/CMluHWvFQddpRaZRIDeG9C88czDX7zSiOJUeqf0RJ3QPj9WCQxSv0zmuJuS6EZG6blyFWXOjw9qHG2B1xqlcda7XF6iReTbhrVRtNBiIYOxh3FAuOOYoVJtxfndBMRERERaYxJNxERERGRxph0ExERERFpjEk3EREREZHGmHQPcVsKzX7r1/ZuXBJwXVFf5SsK0Ltxibwb5mAIt20a/nw/67MPFId1MUygmB2ocNum4c/3s46kHxvs/kmLPpSGJt/POpJ+bLD7Jy36UBoamHRTQHmpOpSvKJA3EyCKlp2lVhy+d06sd4NGmS2FZhYUKOpKcww4fO8cntiNErw5zjAUjeWZVk42ocxiQkWdI+pt09DU122TB8umAjNsze6YtE1DT7TWSt5UYIY1XR+Ttmno2VFjj8pdUB+fnYvi7DTstt2882W02qboY6WbiIiIiEhjrHQPop2lVmwqMOMvZxvx849OBfzdiyfs2Fxpk3dfu92UqroLlq3ZHfCOWUq9G5fA1uxWVf+emTcZj0zNgTklER2eHhy+3BLwteG0W76iAGUWEwCgzGJC78Yl2HrQhh019oBtr83PwhNz8uSdvwLdzWtLoRnbS6zYetCGpeYMLDNnICk+Ds7Obrx7/prf34vCV5pjwIHVs1HjbMOs1/8W8HdVjS2Y/9YxAN5YvHuSUVb1Ojw9qG1293nHybMPFMOarleNdvh+9jXONhy45Ar4+r7aFTECANZ0PXo3LkFFnQMr3zkRsO28VB32LJ8h75wZ7I5zYsrAk198h2d/eKtsv8bZhk0HbKhsCLy/1DfH+hLo429B8h8OBPwdAJh2HQRw8w6B+el6eadTW7Mbe2yNIe84Kfoj0QcB/p+9va0L/159PuDrw2lXOR9X2ccFalvsk/LOmYHu2Cpea3n1MPYsn4Hi7DQAkPvKSmb/Hb53Doqz03D/B1/79Vnid4v2Hkdlg0veYdearlfd6fRLR2vIkQzRH4k+SFB+9uL4FUg47Yp+DQC2l1ixvcSKMS99GrRt5XFebM/3jq081g5trHQPos2VNnR4erB4gsHvd4smGNDh6cHmShsA4MOyIpRZTLjW1Y2KOoecxlGcnYY9y2dE1O4z8ybjybkWZOri8bG9CYcvt2D++DQsNWf4PTecdj+2N6Gq0Zu025rdqKhz4PjVtqBt71k+A/npenxsb0JFnQPXujwos5gCzo/856JJWDg+HYcvt+BjexP08bfgwSnZ2Flqjeg9002VDS7UONtQaExBXqpO9btfzpgAAHjhqwsAvAeMTQVm6ONukZ//xfbrKDSm4PeLpkTUbmmOAbvumopCYwqqGltQUefAxOREbCrwn5sYTrvHr7bJeHR2euPzY3tT0LZPrZuH4uw01DjbUVHnQI2zHcXZaTi1bp7f3yEzMR677poKt6cHFXUO2JrdKDSmYO89MyN6z6T27vlrSIqPwzPzJqseX5ufBaMuQSYkIhGYmJyIw5e9sVLV2AJruh5PzrWgNMe/zwzl4OrbUZydhovt11FR54C753t5wqYUbrsVdQ44O7vlv98/7wza9oVfLLjRh3pkjOan67Fn+YyA83IPrr4d5uRE2bY5JRHbS6wRv2e6SfRn/2P6BL/fFRqTUeNsQ2WDC3mpOrz3k0IUGpPlsexjexMydfEos5giPu58dOP4CUD2Iw9OyfablhRuu++fd8qpdKIPDdX2k3Mt0Mff7EP18bfgybmWgBdc8lg7NLHSPcjEgb80xyAraKU5BpmYiJ8zE+NV1UfBsb5EVkTCtXHmRHR4ejB99xFZ4ctL1eHUunmyshNJu6ICU5ydhtNN7SGrAaLt9Z+cUVUcRLVhZ6lVnmgAgD7+Fsx586jcz7X5WdizfAYWBThRofAduORCoTEFjxflqv7e90zKhLOzW3420zKS4ezsRu6fD6leLz6vLYXmsCtwv1twG5Li4/yqgMrqjRBuu5UNLvRuXIJrXZ6QcSfaFiNHghhR2rN8hirGjboEvxGor9begUJjCtbmZ4Ws8FNwf/j6Eh6cko0f5Waqqm2PzsqVvwcgCwCr3z2pGlkQn9fjs3PDHnHYWWqFOSUxYAVSJERCuO2K0RSjLiFk3Im2ffvQtflZ2HXXVGyba/H7/lzr6laNQL26bDoenJKNX86YwFGWfnq99gp+v2gKikwpqsd3llqRFB8nR9s2TMsBAOw63aDqJ8QI4N2TjABsCEdpjgFLzRl+I73KEToh3HY3V9pQvkIHa7oeu22NQfte0ba9rQsle79UHecPrr4dZRaTKucAeKwdqljpHmTiDFxUGJX/Fr+rbHDBtOugX+ILANe6PBG1JypKhy+3qIbU61s7/aaYDGa7gLezEW37Ji33f/A1ANzoXG76/HKzaj/F6/SKkwOKnBhlUXaovtVGwHtBohjuV7rivh5xm4XGZNjbuvwOFHsUFwRp0S7gPSG0t3WpDmiA9+9gb+sKeOLqO6xadyMOs5PG9msfSDnKkqx6XFltBLwXJI556VO/JNP3gtlwiBjf4vPZ+/482O0CN/sz0b8Jr9deweHLLTDqEvyq3S+fblD9/Lcr3n45bSz7vIF49/w1GHUJWJufJR/zHVHeduQckv9wwK+f6M/JjjiO+/ZvO2rssLd1qR4bzHYB78WWAPDHMw1+x/k/nmlQPUfgsXZoYqV7kIkz8HsmZcrHFk8wqKqNSlsKvVfMT0rVYVpGsl+FsC8iYTjd1O73u0OXmwNOMRmMdn3b8VXf2imHayk6fEdZxNCrqDYqrc3PwozMZMwypcCSqkN+Pz7/pPg41LT7x93Lpxvw5FxLwNcMRrvCN66OgI/b27vknEfSnhhlEaNaz8ybrKo2KpXmGDB7XAqWmjOQpR/br35Hf2N+qu91L/WtnX7Jz2C2K9jbugJec3O6qT1of0uDT4yyPDorF6/XXkFeqk41oqyUl6rDyskm3JGVhpyksfiBISni9sRJ0ocX/Ke8fePqCNjnDEa7Sr4ncGJ/gvW3NPQw6dbAu+ev4cEp2Vibn4VL7ddhTknEX86qz44/KitSddDOzm5cbO+Cs7NbXnQxUI4ASW802qXYeOGrC9izfIYcti4ypaiqjYB3+HX9tBw57ajD04OL7ddxsf36gBIRpUAJSTTapdjYXGnD+mk5sgL9o9xMVbUR8J5s/X7RFFUfY2t242J716D2O+6e71U/R6tdij7fUZbHi7yVXjGiDHiT3g/LilR9jL2ta9BPzNs9Paqfo9UuDT9MujUgzsCVF3koq407S61Yas5AVWMLXvjqgqoCLuYVhquxwzs8Py0j2e93vlWXwWxXacH4dL/H8lJ1MOoS+jVthfpHjLIsnmDAM/Mmw6hLUK39WppjwKYCM+xtXfiX6u/8VmOINPnt8PTAnOx/APEdXh/sdoVgVaNA+0TaUo6yFGen+VUbf79oCvTxt+DZo3V4+fTNIfJA82H74vb0wJiuR16qzu8Ez5quV00dGcx2BXNKYsC2A/XBpC0xyvLMvMlYNXmc34jynuUzYE3X4y9nG/GHry+pChCR3kGy5bo3sf5RbobfNBHfz34w21XaMC3Hb6WfH+VydGU44ZxuDYgz8CJTSsBq46QbKyt8eOGaqoNYm58VcQLyeu0VODu7MX98mmpuW16qDgt9kuHBbBfwzmUL1DYAuRJKqFUAaPC9e/4azCmJuN+a7VdtnD3Oe9HRN64OVeIbKFbCUeNshzkl0e9qeHERkVbtApCrQPi2rbzQjaJHVBdfvmuq6mfBqEvAtU4Pth05p0pWfWMlHGLaiu8qT4HunjuY7QI3+zPfttfmZ2H++DQ4O7u5FGAUiWtZ7rdmw5yS6Ld8nygk/fqL71TH4P7caVkUzh6ZmqNaHSnQ8XMw2wWA545fCNh2XqoOj0zNUT2HhjZWujVy4JJLLp222+fCi68cbSizmPBPRblYMD4d7Z4eWG7MR+vw9KhWHAnHM0frsL3Eil13TZXV9fnj/S8ki6Td8nMObC/xruddvqIAzx2/EPAikJdOXsSTcy2y7XZPj5wjHuhCN9KWGGWxpuv9Es/jV9vQ4enBUnMGDt87B1fc15GlH+t3EVy47v/ga5xaNw+bCsyYm5WGK+7rWDg+Hfp49bl8pO3a27owMXksylcU4GN7U8Ak5leHvsV7PylUtZ2lH4vi7DR0eHrwq0Pf9us9Uf+IURZruj7g9Sv2Nu+w+tkHinG6qR3J8XEoMqX4xUo4NlfacPckI4qz0+T2RJ/T4TPMH0m751s7YU3X46OyIpxuag/Yd22utGHV5HGqtpPj4zB/fBqS4uPwv6uZ+ESbGGUB/K9fEZ/psfvm4vMb1x6Je1T4xkpfKhtcqKhzoMxikttT9jnK42ck7Yrj8oZpOVhqzgi4ek5lgwsf25uw1Jyh2qZYL/xjexNXwhkmWOnWiDgD9602At4rm188YYfb8z2WmjNQZjFBHx+HZ4/W4e3vvOt0+q57G8qOGjvu/+Br1Da7sdScgaXmDNQ42/0OAJG0W9/aiY/tTTCnJKLMYpLVSl/bjpxTtV1mMSEzMR4VdQ6/5eFIe2KUBfCvNlY2uLD+kzNydY8yiwnm5ER8ZG/C+k/OAIDfkmuh1Ld2YvruI6hqbEGhMVmuXSy21d929527Kh8LdmFaZYPLr+1CYzKqGlswffcRHoBiQFQZA90spGTvl6hxtsGarkeZxYQiUwpszW5M330EHZ4e3G5KjaitKa9VoaLOgczEeNnnPHu0Dhfb1avhRNLuf5y6BGdnN5aaM/xWXVLK/fMhVdtLzRmovXFzsVA3+SFtiH7Od0QZAJZVVMs1qsssJiwzZ+BaV7c8ZllvTFMK18p3TuDZo3Vwe76XfU5FnQMf+dxPIJJ2Xz7dIO8bEKr/XVZRrWq7zGKC2/M9nj1ah2UV1WG/B4qtMb29vb2x3gktKe9gR+RrIPPrQmHcUSiMO4oFxh3FilaxN9yw0k1EREREpDEm3UREREREGmPSTURERESkMSbdREREREQaY9JNRERERKQxJt1ERERERBpj0k1EREREpDEm3QPkWF+C3o1L5H/lKwrCfm35igKcfaAYgPf21Y71JWG97uwDxRG1MxBbCs2arK+p3ObZB4qxpdAc8DmlOYZBb3skOHzvHFXciTgKh/IzLc0xhP13VsZrNPRuXBIwLgZCGWvlKwpU3yPl3zPc7+Jos7PUqvo7Rdo3KD9Tx/oS7Cy19vkarfqgYLToX5WxtqXQrPoela8oYNz1QfRTyv8i6RuUn+nhe+fg8L1zwnqdFn1QMFr0r76xFux7dPaB4rD/JjQwTLr7SXQCu22NGPPSp/K/MouJwduHLYVm2Jrd8mdrut7vVt9cSD84cWBWxp3ycQpOGWvTMpLx8Y07yTnWl6CiziH/nte6PFE9wRgODt87B+us2aq4q6hz8OQ4DMpYW2rOwOmmdgDek5gyi0n+PW3Nbsadj52lVhxYPRuL9h6Xf6etB23YXmIN66RtNFPGmu9xV9hZaoU1XR/tXRu14mO9A8PV3ntmoqLO4XeL90V7j+PA6tnYUmiWB3dlAlnV2IL5bx3rc/uH752D4uw0+bOzsxumXQdVz1Fud9He4/IWuKU5BhxYPVv+rqLOgZXvnJDbBSC3vWjvccwel4LtJerOa+tB7/sSj/duXIKtB23YUWNH+YoC1e1qRdui3arGFhRnpwV9r9Z0PZyd3XJfxb+V71tsg9QO3zsH17o8fn/XKa9VwbG+BOUrClSftfJvGM5d47YUmgPGgvKk6OwDxbKTfvGEXfUdUP5OGbM7S624e5IRmYnxMOoS5Ot8T65ErIrHt5dYsdScgZXvnPDbN2XbjvUlsDW7UZydFvC7AvjHWmZiPI5fbUNpjgFGXYL8u4lt+/4dRrOdpVYUZ6f5xdDKd07g8L1z8PJdUzHltSr53E0FN6uDvvETTLBYEJT9jm/fEmmfpIxTALA1uzHltSr5uDVdj7MPFMv3FKwPD9Sf+t6KHLgZawCQpR+Lo1daAAB3TzKios4hn7fb1si4UyjNMWBTgRlbD9pUf9cdNXZY0/XYVGCWfUCo414owWJBWGrOkJ+Jb98SaZ/kG6eAt19WPt67cYn8nkXan/pSxpryuKu0qSBwMk7aGPFJtyExHq4uz6BuUxyknzt+we93lQ0u1YGpd+MS1ZdfTEEJ1RmUryiANV0vtyM6E+XryiwmVRJ8YPVsjHnpU/lc8SUUP+8stcovZXF2muqgtL3Eqjownn2gGNvmWuQXfHuJVe6L6ByUP4u2lQIleL5JoPJAJjqaK+7rqvcxXGkRd4C349xtawz4O+XB4PC9c1QxdPjeOXCsLwmYjAqhYkH8bE3Xo6LOgSmvVckDjq3ZjR01dlmhUx4wlImLNV3vd1BSfjdEbJXmGDDmpU9VJ3qiLd+fRdti+4HizjcJVMbdgdWzsfWgze91wQ5QQ51WcTc3y5uwBqJMfsXfWvQvO0ut2F5ixfGrbQGTUSFULAjTMpLl56TsRyPtk0SiLH4WsbSz1CoT79NN7ao+O1QfruxPlXz7MOW/i7PTMClVp0ruAGCdNXtYJkBaxd2a/HFwdnYHPGnbXGnzS7hDHfcCCRUL4nXK2FL2aZH2ScpRDaF34xIcvncO5r91DOUrCjAtI1nGRKT9qZJjfQmMugQA3lhT9n/KbRy+dw5ePGHH3ZOMQf9GNLhG/PQSw9jBP6+YPS4FAEIeRADvl8zZ2e1XQfM90/W18p0TquSossHllwBUNbbIL/bKd07A2dmNnaVWPD47F7Zmt/wiVja4UFHnwDprtnytrdkt912cJCg7NTEcFcjC8el48cTN54q2lXMggyWF8986hjEvfQpnZ7ccKqxqbMGLJ+yyYwmnMjFYDInanXNqEXcAYNQlhHVQLs5OwzNH6+TP8986BqMuIeRwbDixoIznHTV2VDW2YJ01G6U5BljT9djwyRn53A2fnIE1Xa+aE6k8QJh2HVR93mL4PZB11mxVzIu2lQeTzy83B3zt5kqbX6y9eML7et/3q2wv2PYGarjG3RX39T6fJyq3on/ZXGmDrdmN3y24LeTrwokFZWy9eMKOhePTAUTeJ81/65gq2Q1VhQ+nD1f2p0ri+6SMtUV7j8PZ2Y0xL33qV8Xv3bgExdlpqvc5mIZj3E1K1eFaGMl8OMe9QMKJBWVsiT6tNMcQcZ8k+iGlYH15f/pTJdOug6pYUx53xfvdUmiGUZcQ8qRksGgZe8PNiP9LWNJ0qGvtjEnbgTqMN2qvYlOBOaw5kL4VOiUxZCRc6/JgUqoOWfqxfgn6x/Ym1UEiWAVPWQEM9pxASZ+t2Y0s/Vj5sxhGDcaoS5AHqXCTSC1YUnXabTuGcSdiy/dzsDW7MSlVF9bfO1gs+L72ivs6pmUkBzwRFSeLlyMbGgAAIABJREFUyuHRQHyHdwMx6hL8TgCOXmlRHVTP9/H3VsbapFRd0CTSsb4E17o8mp0AjtS4A7zTKHw/h9NN7ZiWkRzW64PFgrOzWxVbtma3rOT1t08KNNTvK5w+vK8REWWszR6XEjCJFLEmKrThTsmJxEiOu3COe6GEioU3aq/Kf4sYnD0upd99ku+UlEDxM5D+VLkNZawpj7sAsG2uBavfPRlyG4NFy9gbbkZ80r3SYsL+i6Er0pESHXhpjiFghSPY4+FSJtti2FLLi+REgiWGUMtXFMgq0mBSTi9RJnXbS6yqKSzRsjLMDrm/2x7suAOg6nR9DTTugOjFAnAzwbI1uzHmpU8DzicfDMrvk4g1QTmFRfzsO6dzsA3XuFMmsUqDEXfRigVAnWApp6sMNt/pJb7T6QLNOa5scMHW7MZSc8agJ93DMe7Ot3aG7H8GGnvRigVAnWyLPkeri2aV00t8487W7MbppnZ8frl5wN/bcGkZe8PNiJ9e8vDUnEHfpugYH5+dG/D3Yv71+dZOZPoMq6zJHye3EYwYoh3z0qdBnzfJ58xRVJiuuK/LL5uw1JwR9KxYDJ/6DncGEyjps6brwxp6nv/WMVTUOeR723rQJg+y0U64AeDhaYMfG3LbGsQd4B2uDDb/7uW7puLsA8WqaoySNV0fshocTiz4xpaoMClPRAVx7UOgyroYPlUOd4YSKOmbm5UW1tDz5kpbwFgT/1Ym3FWNLZom3MDwjLvdtsagFzY/PjtXHtjFiJvStIzkkFW5cGLBN+6Uc+4j7ZPEdJRw+pz+9uHi976xZmt2y2sIVr5zIuxlEwfDcIy7zZU2GHUJAZft21JolosWRHrcE8KJBWU/qhxFjLRPEtNRgk1pU4q0P/Vl2nVQFWvK4+6U16qwcHw6yiwmufyiNV2P4uw0zYp7WsbecDPik25DYrwmHYKY1+e7nqs4k1z5zgnZYSifs6nArLpaPRjll/nwvXP8OhTlRUblKwrk3Kznjl+ANV0vO/LSHAPKLKag86wB9QFtS6E55JDc55ebVVNeRNvhDsVn6cfKxC+WF6s9OitX0yEvreJu5Tsn5MoKSqJSKJKWqsYWbJtrkb8/fO8cODu7+5y/11csKGNrS6EZxdlp2G1rlCeiL981VT735bumqi4qCkR5QAtV2RRJnzj4iraV8y1DCbViDuCtDIW7stBADNe421Fjh63Z7VcFFJVCsdrR++edqr5JLEf2q0Pf9tlGX7GgXIp1U4FZzpftT5+kPDEIVW0cSB8OBF8xR/Ddd/H3CnSR/kAM17gDvKNu20usqsRbVI0r6hzYUWPv13FP6CsWlP2o6NMqG1z96pOU/atYMCGQ/vanSr4r5igLLqZdB1VLf9qa3ahqbAl5oX1/aR17w82In14CAM8vzMf+i02DOudsR40dO2rsfjeI8D1wi1UYfIftQ5nyWpXfa6oaW1TzIivqHKqhS3GmXtngkssWis482BXOgPegMjcrTbbl7OyWa6CW5hiwo8aObXMtchheTDlQvudIqtTKA/DcrDS/uenRYEnV4SlFR6oVLeIO8P69zz5Q7DfvWvk5zH/rmLyBjvh9Xx1qX7EAeOfK3j3JiN6N3thSTs0QKz+I14eaplHZ4JLL8okES8TtmvxxqGxwoaqxRbVkIKCeHhLJvFdlrK3JH6eqFu0stcKoS4BRl+CXVAZbAq4/hnvcTXmtyu+7D6j/RqKfUfZNff0N+4oF8VldcV8P2I9G2ietfvckDqyeLZ//4gk7MMmIuVneSv77553YVGCWqzz0pw8XlPsvvkPKv4VYcjHY8q+DYbjHnXK50GDL80V63BP6igXAe2IUqE8TfU+4fdL8t47Jm+kB3lyhos4hp888d/yC3BdRkQ63P/XlG2vhnvgOtmjF3nAypre3tzfWOxEN1Y42zH7jb7HeDYoxQ2I8/njXVKyaPC4q7THuCGDcUWww7ihWoh17w8WIn14iFJlScHzNHRzmGMVi0Qkw7ohxR7HAuKNYYcId3KipdCs98skZvHKmIda7QVFiSIzHqsnj8NQdlpgeDBh3owvjjmKBcUexMlRibygblUk3ANS1dmLfd1dRXufQZKkjii1DYjyKTCm4M8eAVbeOQ5Eppe8XRQHjbmRj3FEsMO4oVoZq7A1VozbpHo3GjBkDAOBHTtHEuKNYYNxRLDDuKJRRM6ebiIiIiChWmHQTEREREWmMSTcRERERkcaYdBMRERERaYxJNxERERGRxph0ExERERFpjEk3EREREZHGmHQTEREREWmMSTcRERERkcaYdBMRERERaYxJNxERERGRxph0ExERERFpjEk3EREREZHGmHQTEREREWmMSTcRERERkcbiY70DFD379u2L9S4QERERjUpjent7e2O9E7Hg6vLglTMNKK9zwNXlQV1rJ1xdnljvFg0SS6oORaZUzDIm49FZuTAkDo3zS1eXB/vOXUX5OSfqWt2jI+4ufef9/4RbY7sfGjMkxsMwNh5FplSsnGzEw1NzYr1LEuNu5GLcDTGjJO6AoR17Q9WoS7pdXR48fbQOr5xpGPlffpIenZWL5xfmx6x9xt3oxLijWGDcUazEOvaGulGVdFc72rD63ROoa+2M9a5QDFhSddh7TwGKTClRbbeutROPfHIa+y+6otouDQ1FphTsvacAllRdVNtl3I1ujDuKlVjF3nAwapLuakcblpQf51n3KGdIjMenK2dHLfGua+3Ekn3HeaI3yhkS43F8zR1ROwgx7ghg3FHsRDv2hotRsXqJq8uDRz45zYSb4OryRO3kS8QdD0Dk6vJgyb7jUWuLcUcA445iJ5qxN5yMiqT7ha8uoNrRFuvdoCHC1eXBY5/Xat7OC19d4BArSXWtnfjXI+c0b4dxR0qMO4qVaMXecDLik25XlwfbT9hjvRs0xLxypkHTigzjjgLZfsLOuKOoY9xRrGgde8PNiE+6efU0BbPvu6vabfvcVcYd+XF1eRh3FHWMO4oVrWNvuBnxSfdnl5pjvQs0RGkZG+XnnJptm4Y3xh3FAuOOYoV52E0jPumudrTGehdoiNp/qUmzbTPuKBjGHcUC445iRcvYG25GfNLtus4hLwpMy+FQxh0Fw7ijWGDcUaxw6tFNIz/p5odNMcC4o1hg3FEsMO6IwjPik24iIiIiolhj0k1EREREpDEm3UREREREGouP9Q6Mdr0bl/TrdVsP2rCjRrubEZSvKECZxYSKOgdWvnNCs3aIiIiIRgMm3USj0NkHimFN10f8Oq1PwrYUmrG9xApbsxtTXqvSrB2KDXEyH6loxIMogIx56VNN26HoE/1Kf2gdD6Iv1rqQRkMDk+4YC/aFFgcAfhGJiIiIhj8m3RQQp5SMbMGqhqLqwmlFpIVgMSUq4BzhIC3sqLEHLF4pK+Ac4aBoYNJNRENGsIMjkdaYdFEs8CRzdGHSPcwp54MtNWdgmTkDSfFxsLd14fFDtXi99goA7xn9hmk5mJicCKMuAQDQ4elBbbMb5ecc2HbknGq7wS6kVM57fHXZdCyeYIA5JREAYG/rwr5zV7G50haNt05EREQ0bDDpHiE2FZhhTdfD3taFiz3XMTF5LKoaWwAAh++dg+LsNACAs7MbtmY3AMCarkehMQWFxhTMMqVENJ1AbFNsLzMxHuaURGwqMGNaRjKWVVQP/pukIUN5UvaVow0bZ06EUZcAZ2c3dtsa5YlXaY4Bv1twG8zJifLkDPBeGPdFYwt+/tEp1XaDXUipPLk06RKwcrIJhcYUAN6Y/vxyM7ZU2lDf2hmFd0+xooyPJ7/4Ds/+8FZY0/Xo8PTg8OUW2e/kpeqwo9SKaRnJqguG7W1d+MbVgQ2fnvGLlUAXUirj/P+cbcSjs3JRaExGUnwcOjw9qHG241eHvkVlgysK755iScSH5dXD2LN8hjym1jjbUPbOCRlPO0utuHuSEROTxyIpPg7AzePuC19dkIUwIdCFlMo4/1FFNV5eMhVFphRZMKtxtuHl0w0cFRyGmHSPENZ0Pf5ytlEmMXmpOtS3dmJLoVl2Dr4XZeal6lCxogCFxhQsM2dE1F5xdhpePGFXVbXFAWqpOQOlOQYeiEaBaRnJKLOY5EFlYvJYeVK3s9SKTQVmAN5RFfH4xOSxsKbrYU3X44fZaRENr66zZqM4O01uTx93C8wpiSizmHC7KRW5fz40+G+Shhx93C3YdddUAJAn/e2eHgDA2vws7Lprqkx4RNyJwoA5JRGn1s3D9N1Hwj5Js6Tq5DaVRYvi7DS895NC/Pi/atjfjRIflhXBmq6X/Y8+Pg71rZ3IS9Xh4OrbVSO/F3uuyz7KqEvAnuUzkJ00NuxkWR93i9ymva1L9rGFxhRsL7HCpEvwG6WmoY1J9wjR4elRVQ3FwWSpOUMmRL5f9PrWTvz2WD32LJ+BpPg4maiHo6LO4TeNZOU7J9D+y0VIio/DmvxxPAiNAtZ0PWqcbZj1+t8A3DzZy0vVYf20HACBlxkUJ2jWdD3W5mf5VX+CKc5OQ1VjC+a/dUw+JqpC5pRE7Cy1cnrTKGBOSYSzsxtz3jwq+6y8VB0A4Nkf3oqk+Di/CiRwM1aS4uPwmx/e6jfSEkyhMQX2ti5M33szUS/NMWDvPTNh1CXgdwtuU8UkjVzWdD3u/+Br2WeJuNtRapVxufrdk6rjnzJWNkzLCTvpNqckosPT49eeKJZtnDmRSfcwwztSjhAX268HfHzlOydg2nUw6AFBmeysnBz++rkf25tC7sekGx0RjXy/PVYv/y0Skg3TcnCt0wNnZ3fAaUsr3zmBjhuVyRmZyWG3ZWt2+8Xyjho7apxtAIC5WWkR7z8NT++ev6ZKqOtbO1GaY5A/bzrgP91IGSs5SWPDbqvD04OSvV+qtlfZ4MK7568BQL/WvKfhqcbZpjpuipiwpOrQ4enBblujX8FJGSuZiQkRtfcvVd/5tSf6XKMuQRXzNPSx0j1CnG5qD+t5Wwq9w/1LzRnI0o/t98Gi/JyjX6+jkSdQlXrbkXN9VmAutl+HNV2PWaaUsNsKFud1rZ0oNN6c80gj39+utPg9Vtng6nO6koiVSAoDF9uvBxwF/NuVFjw4JZtxN4rUBRkNFqN9wYhYUV7b8n/Zu/vgJs48X/RfBxtL+EUykm0cZFskFgMBGxt7GBMsEiCTnHBrTRjuhrDMbM3APVP3csFkarNza1ObzG6yNeeeOTmV4WWyVXMrZGqyuSR7KsPYt06ymUlCgp3Yw0AwdhIIYhIDIsYg2bKxLRnL8f1DPE23uiVLtlryy/dTRWG99fOo9eunf/3000/HQqtX/I2L1/H6wysAAFX52TyrPIMw6Z4DGips0oWW4byB0UktkxesEXBnvOxEtpcVoHDBfHy7IBdFC+bjW+YFce98AOCsZzDuz9DsFMspemeRGVX52XCYjCjJMagurIzVZbZ3dFuks7xypTkGbFlihdWQgVXWbNhzDCibRNy5B0cmU0Waxph0z3LhF7Nd7Pej62YAl28G0NLdjzcuXpeuyiZKJDFzibiQV244OIbh4Jh0sVusPJM8SKS5Q8xcIqZPDecNjMbdMy0u0iSK5pDTgR0O7TMf3sBo3O2df+ybRFWNpgkm3bOcuJgt/OIzYXtZQbKrRHOA1gwS5/qGMHBrDH++PoCDHW5pqiyiRNGaQeIL3zCGgmN4z92Hxq88OOh0oN4e+/UrRLHQmpr3uv8WznoG8ccrfajKz5bufklzF5PuWU4kPUddPZqv/83SwmRWh+aIp6tLpZs0hV+AJizOiv1CNqJY/Mt37pFmfNj1/nnN6w3svMibEmx7WYGUcIdPpSv8eMXdya4WTUOcvWSWEzNE7L7d4y2U5hikaduIEs14+2DvE89NzYS7ocIW96lWoonkzg/F1NWhW5oJd+kkx9YSRVMomwkn0pSl39EYZkdzD5PuWe7IuW4AoXlmh368Hhd21uLCzlp0/WAt6u1WuPr9k76YkigS/+2DvXWLTNI8tsIhpwP/pfaeVFSLZrmBW6G4E/O/yzVU2NCydTUP9ijheobvTNn76kP3KV5zFplxdvu3OZSOAHB4yay3r9mFvpEgnnAUSncBDN2++M5tZFu3VcNiyMAORyFvK0sJ8fPTl3Bk4zJYDBno+sFaxd0oF6TPgzcwKk0ZSJQo//inL/Hg3WbYsjPx+sMr8ML9ZfCPfYOFmemwGDKktq/CEvs0lUQTeePidTy5qhi1hbn4/tJCPFqyEL0jQelulEDouiqti8ppbmHSPU2lvXQ8pvfFcgvtieZM1rrAUuuGJrHUK55betPsJU7tP11dirLbB3tA6ILKdy57sa/ZhUNOBxzlNqxbZEplVWkWuXQzgLpjn+D1h1fAYTIqLqhs6vKgodmFkmwDTmytgsNkhLPIzDmOKSHWvnkajZvLsdqaI9323RsYRVvPAH768V/Q3O2DZ1cdLIYM3jl3DksbHx8fT3Ul9BRr8kpzk17TJTLuKBrGHaUC445ShVMTh3BMNxERERGRzph0ExERERHpjEk3EREREZHOmHQTEREREemMSTcRERERkc6YdBMRERER6YxJNxERERGRzph0T5FnVx3G92yQ/jVuLo/5s42by3FhZy2A0K2xPbvqYvrchZ21cZUzFQ0VNl3m15Qv88LOWjRU2DTf4ywyJ7xsIiIiomRj0j1JziIzxvdswFFXD9JeOi79q7db0bqtOtXVm9YaKmzSbcEBwGEyqm4/z4n0o2vdVq042BMHb7GQH0iJOI7l4EZ+kJgM43s2aB6MTYX8AK9xc7ni4FW+PmM9AJ5rDjkdivUU73Yq/009u+pwyOmY8DN6HfhHokenhjzWGipsiu2ocXM5424Cop2S/4unbZD/pq3bqmPeR+vRBkWiR/saHmuRtqMLO2uZtyQJbwM/ScceXYmmLo/qVq7rj53Bia1VaKiwSYmkPNDbegY0b7sernVbNWoLc6XH3sAorEdaFO+RL3f9sTPS7YydRWac2FolvdbU5ZFu6y42LLHs9cfOoCo/GwfqlDu//S2h7yWeH9+zAftbXDjY4Ubj5nLU262qskW5bT0DqC3MjfhdHSYjvIFRqa7ib/n3FssgNc+uOrj6/Yq7wF3YWQvPrjpVjJCS/ABveV4WDneG/vbsqlNsJxd21uLCzlosfa0tZXWdblq3VcNhMiriTiSM8vaH1OSxtsmWh3N9QwBCBzH1dqu0Tlu3VTPuwhxyOrC33KaIsYYKGw7UOeAwGXk79SjksRbe2SUccjoU+2TSF5PuSXAWmWExZOCFM1dUrzV3+xQ7pfE9GxQ7czEERTzW0ri5XLFzE8ms/HP1dqsiCT6xtQppLx2X3nu40419zS7p8SGnQ2qcagtzFYnygTqHtCwglHA8W2OXErgDdQ6pLiLhlj8WZctp3RY4/EBCftAwvmcD0l46juv+W4rvQUqt26rROxJUHcwsfa0Nnl11ihgJX9+x3KpZ7Mzk5LEBhOLDYTICgBRnWq/JDxQPOR14pMSChZnpsBgypM+F97yIbUU8f6DOgU22PGx5q1NVN3nZ4kCktjBX8wAVUB/gLcxMx5kbg9L2LN8mD3e6VethLjvkdKC2MFcVQ1ve6kTrtmq8vHGZlCiKJEkIj59IIsWCID/YDz+gj7cjQB6nAODq92Ppa23S8w6TUZH8Ruo40erE0Dr4ELEGAAXG+Th1fQAA8EiJBU1dHul9R109jDsZZ5EZe8tt2N/iUqzXgx1uOExG7C23SW1AtM6maCLFgrDJlif9JuFtS7xtUnicAqF2Wf682BeG1y2W9jScPNYiJdZ7y7WTcdIHk+5JqMrPBoAJe3YOOR3wBkZVO3P5DklLeEPR3O1TbSxtPQPSjmzLW53SqdqSHANc/X5pA2zu9qGpy4MdjkLpOVe/X6p7+EECAJzrG8K6RSbNuq1bZJJ6bORlN24ulw5Cjrp6ND8rdlSeXXXY+vanaO72oXVbNU5dH5DqFksjOZc5TMaI61e+MwjvlWzdVj1hT3i0AzDx2GEyoqnLg6WvtUk7HFe/Hwc73NJpTPkOQ564OExG1U5JvmMUOx5nkRlpLx1XnF0RZYU/FmWL5WsdWIQngfIE6sTWKuxvcak+x54fpZqCUMKqRZ78hvdKHnI6cKDOgTM3BqO2l9FiQVielyX9TvLOi3g7AkSiLB6LWDrkdEiJ97m+IUVHSbSOE3knhlx4Eij/u7YwFyU5BlWP9g5HIRMgmcfL8uENjGoetO1rdqkS7midTVqixYL4nDy25G1avG1S+FkNIBRLrduqsfbN02jcXI7leVlSTMTbnsp5dtXBYsgAEIo1efsnX0brtmoc7nTjkRJLxHVEicUx3ToqyTGgdySoeO7fL94AgJjG0MrHT4oNSBBHr0LvSBAlOQYUGOerkoX33H2Kz0dKJkRZ4UfichZDhmqn4Or3o8A4X3osenSiLUPsoLSWR5HFur5qC3Px3Kku6fHaN0/DYsiIOoZWHIDJd3Di1KQgP4g82OFGW88AdjgK4Swyw2EyYvf756X37n7/PBwmo2JMpHwHYT3SojjIes/dF7FuOxyFigNNUbZ8Z/LRtX7Nz+5rDiXVbT0DONzpRtpLx3G4M/T58O8rLy/S8uYiiyED1/23Jnyf6LkV2/e+Zhdc/X784v57o34ulliQx9bhTrfUMaDVEeANjCrGZcsPVNe+eVqR7EbrhY/UcSJvI+WdGHJie5LH2vpjZ+ANjCLtpeOqXvzxPRtQW5ir+J5zndY+VMtTVcURO5uiiSUW5LEl2jRnkTnuNkm0Q3KR2vLJtKdy1iMtilhLe+k4vIFRrD92Rvq+DRU2WAwZHJ6TZOzpngSRVDqLzJqNbaTnYyXvmRM9KHpeYCN6/kRvTuPm8og93VMhH+4g7208UOdQDGGhqREHdOEHP65+v3QmZCLy30d+kBb+2ev+W1iel6V59kecoZGfHtUSfnpXi8WQoToAOHV9QLFTvXwzMOEyRP1LcgwRk0jPrjr0jgR51mUSFmamq36Hc31DWJ6XFdPnI8WCNzCqiC1Xv1/qSJhsR4DWqf5wkTpO9pbbpO1sojMi8lirys/WTCJFrIke2liH5FBIpM6miX5fIVosiI4y4E77VpWfPek2KXxIilb8TKU9lS9DHmvyzi4AeLbGjq1vfxp1GZR4TLonobnbB1e/H09VFWsm1ye2VqGpy4PLNwOq5PXxsnxpGU9VFWsuX/QWRdvpl+QYFI/lO7vwHdwmW17EDVT05MR6AZ58oxccJmNMvYLiFBoAaYzu3nIbLxqKg9b6F6Z6sAck7wAMuJNgiYtCtcaTJ4L8IFYc4AnyISzicfiYTgrFnTyJlUtE3CUrFgBlgiUfrpJo4cNLwq9h0Wrjxb5lky2PSTeguQ+Vm2rsJSsWAGWyLdocvWaDkg8vCY87V78f5/qG8NG1fl78nAIcXjJJ4hRj+NRSIqi3vNWJfc0uWAwZivfsLbcpLpyJRL6Da91WrRpeIh/v2Li5XDpN9MKZK3CYjNIwAmeRGfV2a8RxwAAUy26osEXtHfjoWr/i9JkoO9ZewQLjfOnggONm4/fRtf6I4+9e3rgMF3bWKnpj5BwmY9TeYHEAFn7qWy48DkUPk/zsjyAuUNTqWRenT+WnO6PRSvpqCnJjOvW8r9mF/S0uKaETO1fxtzzhbusZYMKt4airJ+JsQk9VFUs7djHMTW55XlbU7TyWWAiPO3nbEakjINKZDDEcJZYza5dvBrAwU9k3Je84iUZ+vYyINVe/X7qGQH4tDmkT+1CtafsaKmzSTGHX/bdUMRKts0mIJRbk7aj8LGK8bZIYjhJpSJtcvO1pOOuRFkWsNXV50NTlQdpLx7H0tTasW2RCvd0qDSl1mIyoLczllJVJwKR7kg52uKV5ueVzh4bvtMPfE8sV1Utfa0NtYa70mev+W2jrGVD0YDd1eXBia5U0Bls0Gs3dPqw/dgZ7y0Nz28ovLtGyr9mFtp4Bqaxna+zY3xJq6JxFZhzscMMbGJXmK93yVieaujyK8d/xDAtxmIzS6bqaglzV2HSKbstbndLMCnKip1DEXlvPAJ6tsUuvt26rhjcwOuH4vYkOwOQHdA0VNtQW5uKoq0fqoXt54zLpvS9vXKa4qEiLfIcWrWdTJH1i5yvKlo+3jCbaNJVAqGco1uk856KDHW64+v2qXkDRUyimGH3nslfRISCmI/vpx3+ZsIyJYkE+j/Decpt0dm0yHQHyA4NovY1T6TgBIs+YI4TXXawvrZmx5qqmLg8O1DkUibfoNW7q8uBgh3tSnU3CRLEgb0dFm9bc7ZtUmyRvX8UsZVom257Khc+YI+9wsR5pUdxfxNXvR1vPAKecTQIOL5miWBLOSO+R7xTkV2JPtNyJeuK0ZiQRtJIKrefkG3b4hhhpZxatXK1lRUtwYlnWXJX20nFc2FmrGnctX19r3zwt3UBHvD5Rg7qv2YWaglzFZ/a3uHCgziElUa5+Px4psWB8T2hHIx+aIWZ+EJ+PNkyjudsnTcsnEiwxx/3jZflo7vahrWdAMWUgoBweEs+4V/kB3uNl+YreokNOByyGDFgMGaqkkvNP37H0tTbpgj85+ToSbZh8WMVE63CiWBC/1XX/LdXwJwDSMCh5vaK1HVvf/lTqsABuXyhXYkFNQagn/53LXuwtt0mzPIiZdLTKnoi8/mIbkq8LMeVipHsu0J2hiOFDw+SdSaKz6cTWKukgJlpnkzBRLAChAyOtNk20PbG2SWvfPC3dwRoIdYw0dXmk4TMvnLki1UX0SMfanoYLj7VYD3xJf2nj4+Pjqa6Enpi4UTR6jd9j3FE0jDtKBcYdpQrvMh0y64eXmDPZmU9EREREqTX7k+75TLpJm54HZDzYo1Rg3FEqMO6IYjPrk257rmHiN9GcZM/RLzZ4sEeRMO4oFRh3lCp6xt5MM+uT7i0xTo5Pc4+esfHg4jzdlk0zW6U1R7dlM+4oEsYwc97hAAAgAElEQVQdpYqesTfTzPqk+4fLilJdBZqmfrhcv9h44G59bihDM98qS2x3Z5wMxh1FwrijVNEz9maaWZ90mzPTmXiTypOrinU95fXYknzdlk0zlzkzXdeDPcYdaWHcUaroHXszzaxPugHgxXVlHFNEEnuOAT+T3fBAD+bMdN3LoJnnh8uK9B1by7gjDYw7ShW9Y2+mmRNJtzkzHcceLZ/4jTTrmTPT8WJdWVKutv+nNUtQac2e+I00JyTjYA9g3JES445SJVmxN5PMiaQbACqt2Tjz+Ld5xDWHmTPT8crGZUk9FXrs0XJOp0XSgX+yYoFxRwDjjlIn2bE3U8z6O1Jq+dH75/Gb892prgYliTkzHY8tycfPvm1PyUFX180Atr7diXbPYNLLptSrtGbj2KPlSY89xt3cxrijVElV7M0EczLpBkINw++/vIHGLg8+uOpLdXUowcyZ6ai0ZuOBIjMeuyd/Wpz2/KeTX+GfT3WluhqUJObMdOwvt+HJVcUp7e1h3M0tjDtKlekSe9PZnE2656K0tDQAAH/y1Prl2Sto7PKg3TMI30gw1dWhBJIf7P1w+fS6gIhxN3sx7ihVpnPsTUdMuucQJt2UCow7SgXGHaUC446imTMXUhIRERERpQqTbiIiIiIinTHpJiIiIiLSGZNuIiIiIiKdMekmIiIiItIZk24iIiIiIp0x6SYiIiIi0hmTbiIiIiIinTHpJiIiIiLSGZNuIiIiIiKdMekmIiIiItIZk24iIiIiIp0x6SYiIiIi0hmTbiIiIiIinaWNj4+Pp7oSqeAbCeI357vR2OWBbySIrpsB+EaCqa6Wvjo+Cv1fsS619UgCe44BldYcrLJk4clVxTBnpqe6SgDmaNwdfzP0/4Ztqa1HEjDuphHGXcox7ma/6Rp709WcS7p9I0H886ku/OZ89+zf+Eny5KpivLiuLGXlM+7mJsYdpQLjjlIl1bE33c2ppLvdM4itb3ei62Yg1VWhFLDnGHDs0XJUWrOTWi7jbm5j3FEqMO4oVVIVezPBnEm62z2D2NB4hkfdc5w5Mx3Ht1QlrTFg3BHAuKPUYNxRqiQ79maKOXEhpW8kiB+9f44NAcE3EkzaToFxRwLjjlKBcUepkszYm0nmRNL9y7NX0O4ZTHU1aJrwjQTxk48u6l4O447kGHeUCow7SpVkxd5MMuuTbt9IEAc63QlbXuPmcozv2YCGClvCljlZDRU2jO/ZgMbN5amuyoQu7KzF+J4NSf9sJL85363rmMNEx910+63H92zAhZ21qa7GhKayveqxrc+0uAOm12+tR1ugh6lsr3ps6zMl7mbC7zudcoCJTGXbTdRvoXfszTSzPunm1dMUye+/vKHbshl3FAnjjlKBcUepomfszTSzPun+8Ov+VFeBpik9Y4NxR5Ew7igVGHeUKoyPO2Z90t3uuZnqKtA09cHXfbotm3FHkTDuKBUYd5QqesbeTDPrbx3ku5WcU16Nm8uxbpEJFkMGAKDDO4ifn76ENy5eV7xve1kBnq4uRYUlNI2ONzCKj671Y8tbndJ7SnMMOOh0YLU1B7bsTOl9rn4/nvjDZ7gU5/goMabrmT99iRfuL4MtOxPDwTG0XhvAQ03teG7NEuxZuRgWQwa8gVG8fbkXP3j387jrrbUuxPK0lOYY8PrDK1BhycKC9HlSnXYfPx/3d5wMPU+HJivunluzBD9aViTFiXtwBK+c78azJ79SvE9rXXd4h1TxdMjpwCMlFjhMRgDAcHAMF/v9mrE8kcbN5ai3W2F/tRVNm8ul2OnwDqL+rU7UFubi+e/cA4fJGPG3j7Xe4etCLC9a3cK3170nXGju9sX1HSdjNsTdZNsDQLttbKiwYffyIpSZjFiQPg8A4Or343VXjyqWJ9JQYcOBOgf2t7hQb7di7aJcLEifB/fgCJ76+CLaegbw+sMrUFuYK9VH67efbJve4R3Eia+140hre/1v7ZdxsCOx4/C1zIa4C8f97p26y9tJEVdaUrHf5dCjO2Z/0p2EH/vK394PW3YmXP1+fHStH1np87B2US5ef3gFChfMlxpUsTMYDo6hrWcA1/23sDwvC/V2K96tr8RDTe0AgD/WV8JhMqLDO4hPbvcgrFtkQm1haJlr3zwddx0XZqbjyMZl6PAO4RPPTay25mCTLQ9nt38bZSYjWq8NYCg4hodsefj+0kL8+fqAVO/n1izBMzV2DAfH8J67D0PBManeF3bWYulrbVI579ZXYpMtD97AKJq6PCgwzsf3lxaq6lOaY0DL1tXSejvXN4QC43xssuWhZetqFP/247i/43SSjLgLX9dAKE6eqbFjlTVbaphLcwz4fMcaLEifhw7vILpuBlBgnI/awlzFuhZJsntwRFre8rwsVFiy8av1S+NOuoWWravhH/sGTV0e2HMMqLBk44/1lVicNR8d3iGc6xuS4vGg0yHV21lkxn/8VQUWpM+TthdR7893rMF9R09KO4nwGAUgJVvhWrdVo7YwV1pvYnv9j7+qwH/6/zqSknjrJRlxF097cGFnLRwmoxRTYl0f2bgMXw/dQnO3T2oXvYFRqR0Sv/MzNXb88UrfpH6Tv68sgTH9Lrzr7kNW+jxssuXhV+uXwh/8RhWPL29cpqh3rG26s8iMIxuXKWJ03SIT9parL7I75HRgb7lNsd7WLTLhQJ0DVkNG3AcX00kqkirud+/Ea/i+dHleFg7UOVT1me373Zlg1ifdejvkdMCWnYm2ngHFRrm9rABHNi7DszV2aSP6+8oSDAfHsOv984oE5uz2b+Nb5gUozTGgJNuAhZnpquUBgGdXndQ7Ey+LIQP/dqFHOpIuzTGg6wdrUWHJxv4Wl6qB2mTLk57bs3KxZr1F8nLI6cC+ZhecRWZssuXB1e9XNAhimXIHb683eZ0A4NWH7sP3lxbi1YfuUx310x1iXbsHR1B37BMp+RSNar3dCmeRGc3dPry8IZQUHO50Y1+zS1rGqw/dh0dLFqKhwoaDHW4sz8uCNzCqanjF7yzeF6/ekVGseuPP0uMrf3s/HCYjmro8qgOD5XlZ0vt+cf+9mvUWyYt8RyhiVJ6Iyw82hIYKG2oLc9HhHVTUaXtZAV5/eAUOr3conie1WNuD59YsgcNkjNg2/njF3Wju9mGTLQ8AsPXtTxXJtfidn6oqnlTSbUy/C9X/45QUD+IgNbx9EgcG8nJjbdNFjMrbUK1lluYYsGt5EbyBUUWdSnMMOP3XNfi7yuIZnXQnG/e7d7YzsS7k7SlwpxNFjvvd1Jv1Y7r19kiJBQDwxB8+Uzz/xsXraL02AIshAw0VNjiLzLBlZ6LDO6TqMVz1xp9R/NuPcelmAM3dPliPtGgeVfdOsTdBvjGJRt89OKLYWYQnVQ0VNlgMGWi9NqCqt/jOYh38eMXdAIDXXT2K9x3scMM9OKJ4bt0iE4aDY6oN/Afvfg5vYBQP3m2O+/vNJU9VFQMAXjnfrTgleOlmAK+c71a8p9KaDW9gVJG4AqF1bT3SIv3mS19rg/VIi6qs6/5bU6rry+e6FY/9Y98AABpk9bl0M4CrQ8pyagtz4R4cUdV7X7ML7sERaUe4vaxAitHwdRE+xGSHI3TW5eenLymef+PidbT1DKDCko3SHMNkvuacEE978N3ihQCAn378F8X73rh4HVm/PiFt+1ve6kTaS8dVibWr3z+lun50rV8RD0PBMQDq9ulc35DicaxtOgBUWLJUbahWGU9VFmNB+jy8fblXFaNHXT1YkD4Pz61ZMpmvOSdxv3tnHay/va9sCGsnwx8D3O9OB+zpTgD34IjmWKhzfUNSL05VfmhMVjwJTEOFDQ6TESU5BizPy1L0nCSKSIAm8vE19dXHl24G4A2MSo9z54d6FP94RX3RxBe+YWmcHABpHFukuWjl76XIwhNaILT+n6mxS48thoy4EpjtZQVYsTALq6zZsOcYUKZD3AGIafzgF75hzefdQyNSjBQumA9AnTwBobgV2yAAaezn3ywtxN+EDXsSr21ZYk3KGNuZLJb2QKzPWHupnUVmVOVnY5MtDwXG+bq0dwDgkdUxkljadAChIVtD6rh7+Vy3YhssuX0gV2Yyqtq8AmMoflfxdtlx4X43xJg+D97AqGpdXLoZUHV2cb+beky6pyFxGlTwBkZxdWgE3sCotCObaUQvk5zFkKE6/UWpc8jpwK7lRdJwjOHgGK4O3cLVoVu6JUB6i5RgMe6mj+1lBfjV+qWKts3V78fVoZEZ295FOqCc7DAF0t9s3O9qJffc76YWk+4EsGVnojTHoGpo5eNTz9wI3R5X9GrIvfrQffjePVbsev886opM2GTLQ1vPAH559ori1NKFnbUp2/jvX2RSPVeaY4DFkCGdfhu4FUqsv1ucp+rdkq8L4E5CJx9bSfHbvbxINRb0u8V5isfewCgWZqo3dWkcc6cb/37xBvaW2+AeHME/tH+p6Olt3FyesqT7W+YFms/bsu70yPQMh3qxwmMMgGInCgD+2wd/aS8dT1QV56RY2gNvYBQOk1G6tkBu6Mfr0eEdwto3T+NX65fCmH4Xnj/VhZfP3RkupXUtSLLE0qYDoXZMHotC+N0KRdsYPvabJo/73dB25g+OwWIyaq4Lh8moOMvJ/W7qcUz3FL1z2QsAeP3hFYrnt5cVYO2i0AwJBzvcaO72wT04ggpLFraXFUjvK80xSOOo3rh4XToN+ccrvYoNf3tZQUoSn4MdbngDo1i7KFdRb+DOdxbr4NeffQ0A+NGyIsW4WK26d3iH4DAZVTun7WUFGPrx+mlz2+np6oUzVwCo13VpjgE/WlakeE+7ZxAWQwYOOZUJzJOrQmO+W7r7pdOwX/iGFUlBaY4B6zQa/mRo6xmALTtTVW/5RVRAaLvRilGtuoup3MJPr5bmGHDlb+/H0I/Xc0x3FPG0B3+8Epoq9Bf336t43yGnAwvS5+Hi7WTAYshAbyCIZ09+pUgadi8v0u17RBNrmw6E2jGtGA2vu2gbtWY1ad1WPWNuKz5dcL97Zx2INi18Xbz60H2q5XK/m3rs6Z6ifc0uPLYkH7WFubiwsxbn+oakqYsWpM/Df2+/Ir33qY8v4sjGZTiycRn+8313S1NGWQwZONwZasTPegZRb7fi7yqLcf8iE4aCY9K0VsPBMc0p0PT20qdX8UyNXVFvMdZNfqFbc7cPTV0e1NutOP3XNfjoWr809Vd43Z/4w2do2boaB+oc2L28CF03A4r1Fn4hEik1d/vwnrsPm2x50roGIMXTe+4706ztPn4eLVtXY2+5DTUFudKUWWJmiTcuXoezyIzh4Bg22fLQuq1amp6vwqLuPU6Wn378F/zHX1Uo6i2PJ/kFes+d6sKBOocUo0BoysBw+5pdqCnIRb3diit/e79iajCx3pIxR/xMFmt78OzJr/Dd4oWKtlH8fu7BEfzjn74EEBqba8vOVLSfldZsGNNT0ycUT5v+xB8+w+c71ihidN0ik6ruzd0+HO50Y2+5DZ5ddWj3DKrWG3vA74g05vjyzQD2Nbu435VtZ/uaXXikxKJYF+J9w2HDOrnfTT32dCdA8W8/RlOXBwsz01Fvt2KTLQ8Xb0+oLz/1/8bF69j1/nlc7Pdjky0P9XYr/MFv8PypLsWO6nCnG/7gN9J7jOnz8PypLvzuy9Dcycm+yv3Zk1/hiT98pqj3wsx0NHV5VNPLbXmrE8+f6oI/+A3q7VZUWLLQ1OXBu27lxZWXbgZQd+wTtPUMYHFWpmK97W9xcfqsGDzU1K5Y1/J4EnPPAsp1XWHJUvx+4mr95m4fdr1/XpoVpN5uhS0rE++6+7Dr/fMAkj8Ournbh/uOnlTUu8KShbaeAdx39KRiyMLBDrciRjfZ8tDhHVLsfIW1b56W5iGXr7fDnW7FeiNt8bQHYl2LtlH8fvJpLuuOfYIO7yAcJiPq7VZUWrPh6vfjvqMnMRwcw2prTtK/Y6xt+qWbAVWM9o4EpW1Gbl+zC8+f6kLvSFC13uqOfZLMrzftie0y/J+YsQPgfldu6WttinWxMDMdz5/qUs0Ixf1u6qWNj4+Pp7oSeuLYTYpmfM8GXZbLuKNoGHeUCow7ShW9Ym+mYU83EREREZHOmHQTEREREemMSTcRERERkc6YdBMRERER6YxJNxERERGRzph0ExERERHpjEk3EREREZHOmHQTEREREemMSfc011Bhw/ieDYrb4o7v2YALO2sn/Gzj5nKM79mAhgpbwuoTa9k084X/1hd21sZ0gwOtmJ2qWMummS/8t46nHUt0+6RHG0rTU/hvHU87luj2SY82lKYHJt2kqTTHgMbN5Xj1oftSXRWaYw45HWjdVp3qatAc01BhY4cCJZ2zyIzWbdU8sJsj0lNdAYpfMm65u2WJFfV2K5q6PEkvm6anpa+1JaWcveU2uPr9KSmbpp8tb3UmpZy95TY4TMaUlE3Tz8EONw52uHUv56mqYtQW5uKoqyfpZVPysaebiIiIiEhn7OlOoENOB/aW2/BvF3rwg3c/13ztcKcb+5pdKM0x4KDTgdXWHNiyMwEA3sAoXP1+PPGHz3DpZiBiOeN7NsDV71f0/j23Zgl+tKwItuxMDAfH0HptQPOzsZTbuLkc9XYrAKDebsX4ng3Y3+LCwQ63ZtnbywrwdHUpKizZ0vI+utav6iUa37MBTV0enPUMYs/KxbAYMqS67j5+Pup3psicRWac2FqFDu8gVr3xZ83X2noGsPbN0wBCsfhIiUXq1RsOjuFivx8/P30Jb1y8HrGcCztr4TAZFWc7wn/7Du8gTnzt0/z8ROU2VNhwoM4BAHCYjFK8bHmrU7Ps0hwDXn94BSosWViQPg/DwTF0eIdU248YMvDMn77E89+5Ryq/wzuIvSdcaO7Wri9NzLOrDsb0u5D16xOarwGA9UgLgNDwjd3Li1BmMmJB+jwAgKvfj9ddPXj25FcRyxDtkWiDAPVv7x4cwX9rv6z5+VjKlY/HlbdxWmWLOq1bZILFkAEgFEvh24/4rP3VVrz+8ArUFuYCgFRX9mROXuu2atQW5uKJP3ymarPEa+uPnUFztw/OIjN+cf+9cJiM0u/lHhzBJ56bUc9kiPZItEGC/Lf3Bkbx9uVezc/HUq5o1wDgQJ0DB+ocSHvpeMSy5ft5sbxXzncrth/x2f0tLmyy5eEhWx4WpM+T6hqem1Bysac7gfY1uzAcHMODd5tVr62/24zh4Bj2NbsAAH+sr0S93YrekVE0dXmkYRy1hbl4/eEVcZX73JoleKbGjoWGdLzn7kPrtQGsXZSLTbY81XtjKfc9dx/aekJJu6vfj6YuD87cGIxY9usPr0CZyYj33H1o6vKgdySIertVc3zkamsO/q6yWFquP/gNNtny4v7OdEdztw8d3kFUWLJRmmNQvPbjFXcDAH559gqA0A5jb7kNxnl3Sb//1aFbqLBk41frl8ZVrrPIjCMbl6HCko22ngE0dXmwOCsTe8vVYxNjKffMjUEpHr2BUHy+5+6LWPbnO9agtjAXHd4hNHV50OEdQm1hLj7fsUa1HhZmpuPIxmXwB8fQ1OWBq9+PCks2jj26Mq7vTEpvX+7FgvR5eG7NEsXz28sKYDFkSAmJSAQWZ2Wi9VooVtp6BuAwGfFMjR3OInWbGU3L1tWoLczF1aFboXZk7BvpgE0u1nKbujzwBkalv9+57I1Y9pW/vf92GxqUYrTMZMTrD6/QHJfbsnU1bFmZUtm27EwcqHPE/Z3pDtGe/ef77la9VmHJQod3EM3dPpTmGPAff1WBCkuWtM95z92HhYZ01NutOORUx0w0797efwKQ2pHvLy1UDUuKtdx3LnuloXSiDY1W9jM1dhjT77ShxvS78EyNXfOCy7+vLMG6RSa0XhvAe+4+GNPvwveXFsb9nSmx2NOdYGLH7ywySz1oziKzlJiIxwsz0xW9j4JnV53UIxKrPSsXYzg4hvuOnpR6+EpzDPh8xxqpZyeeckUPTG1hLs71DUXtDRBl73r/vKLHQfQ2HHI6pAMNALBlZyp6J0pzDDj91zWosGTF9Z1J6cTXPlRYsvFUZbFifT9ashDewKi0vpfnZcEbGEXxbz9WfF78Xg0Vtph74H5x/71YkD5P1Qso770RYi23uduH8T0b0DsSjBp3omxx5kgQZ5Ref3iFIsYthgzVGaiz27+NCks2tpcVRO3hp8h+/dnX+P7SQny3eKGit+3JVcXS6wCkDoCtb3+qOLMgfq+nqopjPuNwyOmALTtTswdSJERCrOWKsykWQ0bUuBNlh7eh28sKcGTjMjxbY1dtP70jo4ozUK8+dB++v7QQP15xN8+yTNIbF6/jV+uXotKarXj+kNOBBenzpLNtu5cXAQCOnOtWtBPiDOAjJRYALsTCWWTGJlue6kyv/AydEGu5+5pdaNxsgMNkxFFXT8S2V5TtHhxB3bFPFPv5lq2rUW+3KnIOADCm34Xq/3FKeu/2sgK8/vAKrNfoFKTkYU93gokjcNHDKP9bvNbc7YP1SIsq8QWA3pFgXOWJHqXWawOKU+qXbgZUQ0wSWS4QamxE2eFJyxN/+AwAbjcud7j6/Yr3XroZQO9IUHFwQPETZ1nkDWp4byMQuiBRnO6Xu+6/FXeZFZYsuAdHVDuK12UXBOlRLhA6IHQPjih2aEBoPbgHRzQPXMNPq3bd3l4KF8yfVB1IfpZFedAs720EQhckpr10XJVkhl8wGwsR4w1hv33440SXC9xpz0T7Jrxx8Tparw3AYshQ9Xa/fK5b8fjP10Ptcu58tnlT8fblXlgMGdheViA9F35G+dmTXyHr1ydU7cRkDnbEfjy8fTvY4YZ7cETxXCLLBUIXWwLAK+e7Vfv5V853K94jfHStX/Fesd81cl+bUuzpTjBxBP5oyULpuQfvNit6G+UaKkJXzJfkGLA8L0vVQzgRkTCc6xtSvfbxtX7NISaJKDe8nHCXbgak07WUHOFnWcSpV9HbKLe9rAArFmZhlTUb9hwDyibx+y9In4eOIXXcvXyuG8/U2DU/k4hyhS98w5rPu4dGpDGPpD9xlkWc1XpuzRJFb6Ocs8iMqvxsbLLlocA4f1LtjvH2+NTwa0Au3Qyokp9Eliu4B0c0rz851zcUsb2lxBNnWZ5cVYw3Ll5HaY5BcUZZrjTHgC1LrPh2QS6KFszHt8wL4i5PHCT98Yp6yNsXvmHNNicR5cqFH8CJ+kRqb2n6YdKtg7cv9+L7SwuxvawAXw/dgi07E/92QXl0/G59paKB9gZGcXVoBN7AqHTRxVR5NJLeZJRLqfHLs1fw+sMrpNPWldZsRW8jEDr9umt5kXRmYTg4hqtDt3B16NaUEhE5rYQkGeVSauxrdmHX8iKpB/q7xQsVvY1A6GDrV+uXKtoYV78fV4dGEtru+Me+UTxOVrmUfOFnWZ6qDPX0ijPKQCjp/WN9paKNcQ+OJPzAfCg4pnicrHJp5mHSrQNxBC6/yEPe23jI6cAmWx7aegbwy7NXFD3gYlxhrHqGQ6fnl+epx0SH97oksly5+xeZVM+V5hhgMWRMatgKTY44y/Lg3WY8t2YJLIYMxdyvziIz9pbb4B4cwT+0f6majSHe5Hc4OAZblnoHEn56PdHlCpF6jbTqRPqSn2WpLcxV9Tb+av1SGNPvwvOnuvDyuTunyLXGw07EHxyDxWREaY5BdYDnMBkVQ0cSWa5gy87ULFurDSZ9ibMsz61ZgseW5KvOKL/+8Ao4TEb824Ue/PqzrxUdEPHeQXLgViix/m5xnmqYSPhvn8hy5XYvL1LN9PPdYp5dmUk4plsH4gi80pqt2dtYcntmhT9e6VU0ENvLCuJOQN64eB3ewCjWLspVjG0rzTFgXVgynMhygdBYNq2yAUizkUSbBYAS7+3LvaGLVR2Fqt7GqvzQRUdf+IYVia9WrMSiwzsEW3am6mp4cRGRXuUCkGaBCC9bfqEbJY/oXXx54zLFY8FiyEBvIIhnT36lSFbDYyUWYthK+IxHWnfPTWS5wJ32LLzs7WUFWLsoF97AKKcCTCJxLcsTjkLYsjNV0/eJjqR//NOXin3wZO60LDrOfrSsSDE7ktb+M5HlAsALZ65oll2aY8CPlhUp3kPTG3u6dXLia580ddrRsAsvznoGUW+34u8qi3H/IhOGgmOw3x6PNhwci/uiwudOdeFAnQNHNi6TetfXLlJfSBZPuY1feXCgLjSfd+Pmcrxw5ormRSAvfXoVz9TYpbKHgmPSGHGtC91IX+Isi8NkVCWeZ24MYjg4hk22PLRuq8Z1/y0UGOdPeuaYJ/7wGT7fsQZ7y22oKcjFdf8trFtkgjFdeSwfb7nuwREszpqPxs3leM/dp5nE/PTjv+A//qpCUXaBcT5qC3MxHBzDTz/+y6S+E02OOMviMBk1r19xD4ZOq1/YWYtzfUPISp+HSmu2KlZisa/ZhUdKLKgtzJWWJ9qc4bDT/PGUe/lmAA6TEe/WV+Jc35Bm27Wv2YXHluQrys5Kn4e1i3KxIH0e/ns7E59kE2dZAPX1K+I3Pf3XNfjo9rVH4h4V4bEykeZuH5q6PKi3W6Xlydsc+f4znnLFfnn38iJssuVpzp7T3O3De+4+bLLlKZYp5gt/z93HmXBmCPZ060QcgYf3NgKhK5sPd7qlOarr7VYY0+fh+VNd+N2XoXk6w+e9jeZghxtP/OEzXOz3Y5MtD5tseejwDql2APGUe+lmAO+5+2DLzkS93Sr1VoZ79uRXirLr7VYszExHU5dHNT0c6U+cZQHUvY3N3T7sev+8NLtHvd0KW1Ym3nX3Ydf75wFANeVaNJduBnDf0ZNo6xlAhSVLmrtYLGuy5f7+qxvSc5EuTGvu9qnKrrBkoa1nAPcdPckdUAqIXkatm4XUHfsEHd5BOExG1NutqLRmw9Xvx31HT2I4OIbV1py4ylr6WhuaujxYmJkutTnPn+rC1SHlbDjxlPv/fP41vIFRbLLlqWZdkg/vrDUAACAASURBVCv+7ceKsjfZ8nDx9s3Fot3kh/Qh2rnwM8oA8FBTuzRHdb3diodseegdGZX2WY7bw5RiteWtTjx/qgv+4DdSm9PU5cG7YfcTiKfcl891S/cNiNb+PtTUrii73m6FP/gNnj/VhYea2mP+DpRaaePj4+OproSe5HewIwo3lfF10TDuKBrGHaUC445SRa/Ym2nY001EREREpDMm3UREREREOmPSTURERESkMybdREREREQ6Y9JNRERERKQzJt1ERERERDpj0k1EREREpDMm3VPk2VWH8T0bpH+Nm8tj/mzj5nJc2FkLIHT7as+uupg+d2FnbVzlTEVDhU2X+TXly7ywsxYNFTbN9ziLzAkvezZo3VatiDsRR7GQ/6bOInPM61ker8kwvmeDZlxMhTzWGjeXK7Yj+fqMdVucaw45HYr1FG/bIP9NPbvqcMjpmPAzerVBkejRvspjraHCptiOGjeXM+4mINop+b942gb5b9q6rRqt26pj+pwebVAkerSv4bEWaTu6sLM25nVCU8Oke5JEI3DU1YO0l45L/+rtVgbvBBoqbHD1+6XHDpNRdatvTqQfmdgxy+NO/jxFJo+15XlZeO/2neQ8u+rQ1OWR1mfvSDCpBxgzQeu2auxwFCrirqnLw4PjGMhjbZMtD+f6hgCEDmLq7VZpfbr6/Yy7MIecDpzYWoX1x85I62l/iwsH6hwxHbTNZfJYC9/vCoecDjhMxmRXbc5KT3UFZqpjj65EU5dHdYv39cfO4MTWKjRU2KSdu2dXHSyGDACAq9+Ppa+1Tbj8xs3lqlvCht/1S56Yrj92RnELXPlrbT0DWPvmaQCQDghqC3Olz1XlZ+NAnbLx2t8S+l7i+fE9G7C/xYWDHW5V3UTZziIzTmytQlvPAGoLcxXlyjlMRngDowBCBy/ib1E/8VlRR7qjdVs1ekeCqvW69LU2eHbVoXFzOba81Sm9V74OY7lrXEOFTTMW5AdFF3bWSo304U63YhuQv+YNjMJ6pAVAqGF/pMSChZnpsBgypM+FH1w1dXmw5a1O6fkDdQ5ssuVhy1udqrrJy/bsqoOr34/awlxFuXLhsbYwMx1nbgzCWWSGxZAhrTex7PD1MJcdcjpQW5iriqEtb3WidVs1Xt64TGrXDjkd2Ft+p3cwPH4iiRQLgrzdCW9b4m2T5HEK3GmXxfMOkxEXdtZK3yme9jT8VuTAnVgDgALjfJy6PgAAeKTEgqYuj/S+o64exp2Ms8iMveU27G9xKdbrwQ43HCYj9pbbpDZA/NZCePxEEikWhE22POk3CW9b4m2TIu3X5c+P79kgbWfxtqfh5LEm3+/K7S3XTsZJH7M+6TZnpsM3EkzoMsVO+oUzV1SvNXf7FDsmseGJjcWzqw6t26o1k1FB3vshjO/ZoPhcvd2qSIJPbK2S3j++Z4OiwRHDXsTj2sJcxU7pQJ1DsWO8sLMWz9bYpTofqHNIyxaNg/yxvGxBK8ELTwLlOzLR0Fz330LaS8dVDehMo0fcAaGG86irR/M1+c6gdVs1HCaj9Du0bquGZ1edZjIqRIsF8dhhMqKpy4Olr7VJOxxXvx8HO9xSD518hyFPXBwmo2qnJI9TEVvOIjPSXjquONATZYU/FmWL5WvFXXgSKI+7E1ursL/FpfpcpB3UdKdX3NUUhBJWLfK2TKxr0b4ccjpwoM6BMzcGNZNRIVosCMvzshRtnGjT4m2TRKIsHotYOuR0SIn3ub4hRfsZa3sqF96Gyf+uLcxFSY5B1QGzw1E4IxMgveLu8bJ8eAOjmgdt+5pdqoRbtC/i8SGnQzMZFaLFgvicPLbkbVq8bdJE+/XGzeVYnpclxUS87amcvKOvtjBX0f7Jl9G6rRqHO914pMQScR1RYs364SXm+Yk/rqjKzwaAqDsRILQBWwwZip3Sc6e6UFuYG/V07L5mdRIQ3hC39QxIG/aWtzrhDYzikDPUWHgDo6peO/nRtavfL9VdHCTIGzVxOkrLukUmHO68815RtnwMZKSkcO2bp5H20nF4A6PSqcK2ngEc7nRL3zeWnolEMWfqd8ypR9wBgMWQEdNOubYwF8+d6pIer33zNCyGjKinY2OJBXlsHexwo61nADschXAWmeEwGbH7/fPSe3e/fx4Ok1ExJlK+g7AeaVH83uL0u5YdjkJFzIuy5TuTj671a35WbE/yWDvcGfp8+PeVlxdpeVM1U+Puuv/WhO8TPbeifdnX7IKr349f3H9v1M/FEgvy2Drc6ca6RSYA8bdJa988rUh2o/XCx9ueyontSR5r64+dgTcwirSXjqt68cf3bEBtYa7ieybSTIy7khwDemNI5p+qKoar3y+1L83dPjR1ebDDURj1c7HEgjy2RJvmLDLH3SbFsl8XJtOeylmPtChiTb7fFd9X5CfRDkoSRc/Ym2lm/Zqw5xrQdTOQkrK1essOdoROW4vEPZrwU1fyZYlTRkLvSBAlOQbpb7l/v3gDe8ttUqIfqQdP3gMY6T1aSZ+r348C43zpsTiNGonFkCHtpGJNIvVgv72+dFl2CuNO/M7hv4Or34+SHENM6ztSLIR/9rr/FpbnZWkeiDZ3++ANjCpOj2oJP72rxWLIUB0AnLo+oNipXp5gfctjrSTHEDGJ9OyqQ+9IULcDwNkad0BoGEX473CubwjL87Ji+nykWPAGRhWx5er3Sz15k22TtE71h9NK+mJtT+XLELFWlZ+tmUSKWBM9tLEOyYnHbI67AuN81e/wnrtvwt9XiBYL/37xhvS3iMGq/OxJt0nR9uvCVNpT+TLksSbf7wLAszV2bH3706jLSBQ9Y2+mmfVJ9xa7FR9cjd4jHS/RgDuLzJo9HJGej5V8oxSNr54X14gES5xCbdxcLvUiJZJ8eIk8qTtQ51AMYUmWLTE2yJNddqLjDoCi0Q031bgDkhcLwJ0Ey9XvR9pLxzXHkyeCfHiJiDVBPoRFPI71uovJmqlxJ09i5RIRd8mKBUCZYMmHqyRa+PCS8OF0WmOOm7t9cPX7scmWl/CkeybG3eWbgajtz1RjL1mxACR3vy4fXhIed65+P871DeGja/1T3m5jpWfszTSzfnjJD5cVJXyZomF8qqpY8/UTW6vQuLlc0RsjiFND0XqDxWmrSKe+AUi92oLoYbp8M4CFYadyHi/Ll+odTpw+DT/dGYlW0ucwGWM69bz2zdNo6vJIs0Tsb3FJO9lkJ9wA8MPliY8Nadk6xB0QOl0ZafzdyxuX4cLOWkVvjJzDZIzaGxxLLITHs+hhkh+ICuLaB62edXH6VH66MxqtpK+mIDemU8/7ml2asSb+lifcbT0DuibcwMyMu6OunogXNj9VVSzt2OVn3ITleVlRe+ViiYXwuJOfRYy3TRLDUWJpc+JtT+Xk1/fIZycR1xBseasz5mkTE2Emxt2+ZhcshgzNafsaKmzSpAXX/bdUMbLJljdhb3AssSBvR+VnEeNtk2LZrwvxtqfhrEdaFLEm3+8ufa0N6xaZUG+3StMvOkxG1Bbm6jYDlp6xN9PM+qTbnJmuS4MgxvWFz+cqjiS3vNWJgx1ueAOjiikEn62xo61nYMIGW96ANG4uV+1U5BcZNW4ul8ZmiUZKXq+95TbFFfLRymqosEU9JffRtX7FmDVRdqyn4guM86XEL5UXqz25qljXU156xd2WtzqlmRXkRE+hSFraegbwbI1der11WzW8gdEJx+9NFAsOk1FKEhoqbKgtzMVRV490IPryxmXSe1/euExxUZEW+Q4tWs+mSPrEzleULR9vGU20GXOAUM9QpNl2Emmmxt3BDjdc/X5VL6DoKRSzHb1z2atom8R0ZD/9+C8TljFRLMjb0b3lNmm87GTaJPmBQbTexsm0p3KRZswRwusu1pfWRfpTMVPjDgiddTtQ51Ak3qLXuKnLg4Mdbrxw5oqibXIWmVFvt0a8vkhuoliQt6OiTWvu9k2qTZpovy5Mtj2VC58xR97hYj3Sopj609XvR1vPQNQL7SdL79ibaWb98BIAeHFdGT642pfQMWcHO9w42OFW3SAifMdtPdIi3UBH63Uta988rfpMU5dHcZqtqcujOHUpP1IXMz+EDxXQsq/ZhZqCXOm93sCoNAeqs8iMgx1uPFtjl07DiyEH8u8cTy+1fAdcU5CrGpueDPYcA34ma0j1okfcAaH1fWFnrWrctfx3WPvmaekGOuL1iRrUiWIBCI2VfaTEgvE9oR2NfGiGmPlBfD7aMI3mbp80LZ9IsMR0m4+X5aO524e2ngHFlIGAcnhIPONe5bH2eFm+orfokNMBiyEDFkOGKqmMNAXcZMz0uFv6Wptq2weU60gc1MnbponW4USxIH6r6/5bmm1avG3S1rc/xYmtVdL7D3e6gRILagpCPfnvXPZib7lNmuUhnvY0nLz+YhuSrwsx5WK06V+naqbHnXy60EjT8zV3+6SYEQcxkWb2kJsoFoDQgZFWmybanljbpIn26y+cuSLVRfRIx9qehguPtVgPfBMtWbE3k6SNj4+Pp7oSydDuGUTVv/851dWgFDNnpuOVjcvw2JL8pJTHuCOAcUepwbijVEl27M0Us354iVBpzcaZx7/N0xxzWCoaAcYdMe4oFRh3lCpMuCObMz3dcj96/zx+c7471dWgJDFnpuOxJfn42bftKd0ZMO7mFsYdpQLjjlJlusTedDYnk24A6LoZwO+/vIHGLo8uUx1NW3+/BciYDyzIBow5wIKc0N8Lsm//nQMY5Y9lz8+fORuROTMdldZsPFBkxmP35KPSOvG86MkwZ+Puv/7vQE4eUPotoHhp6H/T7LsLGuMuhfyDwIV2oLcHcJ0FvNeAHz8HWBaluma6Y9zpwH/7gtfhwdDf3mt3nvcPyR7f/lu833sN+F/3ALX/Kfl1ToHpGnvT1ZxNuueqtLS0SX82MzMTZrMZeXl5in+xPJeTk5PAb0EzyejoKObPV8/xXFJSgpqaGlRXV0v/WyyzLxGnxPL5fOjq6kJXVxfa29vx4Ycfor29HT6fOql75ZVX8MMf/jAFtaTpwOfzRfwHAJcuXZLiSev9k/Wzn/0M//RP/5SQ70CzC5PuOWZ4eBg+nw99fX2Kf7E8Nzw8POlyMzIyJp2wm0z63JyFkufLL7/E6dOncfr0aZw6dQqnT5/W3Kk5HA5VIp6dzZ6Tucrn86G9vR3t7e24dOkSPvjgA3R1dWnGjtlsRmVlJSorK1FaWooHH3wQdrsdZrNZY8k0E4jfWSTF4f9funRJeqyVME8lcRZxYzabYTabYbfbFY9LS0ulx/I4k7+PKByTbopZIBCIOUEPf25wMPqt4aOZN29ezAm61nM0PZ07d06ViPv96hs/rFy5EjU1NVISXl1djYyMDI0l0kzW3t6u6L0WCXc4keTY7XasWrUKDz74oJRw0/QjT4TDe5RF0ixekyfLU+1tBu4kyFr/AKC0tFQzoZa/hyiRmHRTUty6dWvSCfvAwOTn8k5LS4srYQ9//q675swEP9NCe3u7Igk/ffo0vvnmG9X7wnvDq6qqNJZG05FIsD744IOYeq/tdjsefPBB9l6nSHgCLO9VBu4M0Yg2jGOywnuPw/8Xvc0iJsITZsYJTTdMumnaCwaDcQ2DkT+XiEY/3qRdvJaePifuPaWrYDCoSsI7OjpU78vMzFQl4vfdd18Kakxy8uEhH374oTQWO1KCLZLqBx54QOrNZuI0NZMZoiF/PZG9zbEO0WBvM81WTLppVvvmm2/iHrsuf24qm0dubm5cw2Dk/zh8IrKhoSFVIv7FF1+o3mcymRRJeHV1Ne69994U1Hj2i+fiRq3hIZWVlUywoojWk+zz+dDf35+UIRqAslcZuDNEI9owDiIKYdJNFMVUEvaxsbFJl5udnR3XMBj5v8zMzASugZmht7dXkYSfOnVK6sGTKygoUCTiNTU1WLx4cQpqPHPx4sb4zbQhGvLX59pvRaQnJt1EOunv75900j46OjrpchcsWBD32HXxz2g0JnANpNbXX3+tulDz2rVrqvcVFxcresNrampgtVpTUOPpR/RcT3RxIwBUVlbO2osbw3uPIw3RCO9t1mOIRvg/k8kUcfgGe5uJphcm3UTT0M2bNyedsI+MjEy6XIPBMKkLTvPy8pCVlZXANaCPr776SpWI9/X1qd5XVlamGiM+m+eaF8lie3s7zp49OysvboxniIbW+6eCQzSICGDSTTTrDA0NTWoe9r6+PgQCgUmXO3/+/LjGrctfT2VCe/78edUYca056VeuXKnoDa+urta86c90Jx8ecvbsWWmqvomGh6T64sbpMESDczYT0VQw6SYiyUy4eVL463rcPOns2bOqRFxrjH54Er569eqE12WytC5uFI/DJePixkhDNCaas5lDNIhotmDSTUQJEQgEJp2wT/XmSRNN3xjttViMjY2pkvCzZ8+q3peZmalKxFesWDHp7xarZF3cmOohGgDnbCaimYtJNxGl3HS8eVIss8SIBFwk5FpTF+bm5qqmLiwrK5t0neO5uNFut6OyslLqvZ43bx7Ky8t5W20iohRg0k1EM5q4eZJWcq71vPxxom+elJWVhWAwiKGhIfT29qK7uxter1f1ufz8fFUiXlxcrHiP1sWNX331Ffr7+1XLEzPWmEwm5Ofnw2Qyobe3VzqDoMcFgRyiQUQUHybdRDRnyW+eNJle9kQ2n0ajEQUFBcjKysK1a9cwMDCAYDCYsOUDHKJBRJRKTLqJiCZJj5snpaWlRU3m5Ykwb6tNRDRzMOkmoqh8/iB+c/IaGj/1wucPoqs3AJ8/sT2wc9LIEDByEwjcBEYGAe9XwK1BwHsJWFgCmBYD2VYgM1v5j6bEvtCAysXZWHV3Fp58wAazMT3VVSKiOYJJNxFp8vmD+Od3LuE3J68xyaZZ68kHbHjxsXtTXQ0imgOYdBORSvvVQWw98hm6eid/sxyimcK+0IBju1agcjHPJBCRfph0E5FC+9VBbPjVWfZu05xiNqbj+P+5iok3EenmrlRXgIimD58/iB8d/YIJN805Pn+QB5tEpCsm3UQk+eWHbrRfnfzdIYlmMp8/iJ/8/i+prgYRzVJMuokIQCjhOHDiaqqrQZRSvzl5jdcyEJEumHQTEQBwlhKi237f6Ul1FYhoFmLSTUQAgA//or69OE1O4+6VGH/xAVx4ek1Clnfh6TUYf/EBNKxfnJDlUXTcFohID0y6iQgAOJab6LYPLvpSXQUimoWYdBMRAHBoCdFt3BaISA+8/y0RAWCiMZ0t/fnJVFeBiIimiD3dREREREQ6Y083EVGSNaxfjN3fKUKZ1YgF80N9H64bfrxzvhf7fndR9f4LT6+BI9+I/ccu4uDtaR0b1i/Gga1lcN3w47v/2oGXn1iKysXZsGRlAAA6vh7Cy3/qlt5PRESpxaSbiCiJ3v0/KrBpaR4AYPjWN3Dd8MOYcRcc+UY48hfjsXIr6g6241JfbHNFGzPuQktDJWzmTLh9I3Dd8GOxKRMVd2fhwNYyWLMy8OzbXXp+JSIiigGHlxARJcmrO5dJCfe/nepB1v/VjKU/P4nif27D/mMXMXzrG9jMmWj631bGvEybORMLF2Tgid9+juJ/bsPSn5/Eff/3n9Hx9RAAYM+6u3X5LkREFB8m3URESfLo8oUAgKZPvfjBa+cVrx08cRX/8D+/BABU3J2F7VX5MS/3H/7nl3jjzA3p8aW+AH7+7iUAgCUrA857TFOtOhERTRGTbiKiJGhYv1gab/3C8Sua7zl44ircvhEAwN+sLox52VrjtuVJeJUtO56qEhGRDph0ExElgcNqBBAax938ZeQ7Hn5xfRgAsLxwQUzLFUk6ERFNb0y6iYiSoCTPAAC42h89SR669U1cy/WPxvd+IiJKDSbdRESTcOHpNRj6r86Yx15fvj0byWJTZtT3Zc1ns0xENBuxdScimgRHfmiO7cKc+arXCnIyVM+5PH4AwIL5d0W9sPFbBaFhJed6hhNUUyIimg6YdBMRTYIYS12/wqJ6zXa7N/uybK7tgyeuwjs0CgB4akOx5jIb1i+GzRz67P/7SU9C60tERKnFpJuIaBI+cQ8CANbaTTj0vTIAQGmeAa1PVkmJc9NnXsVn3j7XCwCoX2nBqzuXKV5rWL8Y/+V/uQdA6O6U8tlHiIho5uMdKYmIJqHhdxex2pYNmzkTe52Lsde5WPF626UB1VR+P3jtPIpy52PT0jx8v6YQ36vIx9X+ERgz7pISdbdvBN/9146kfQ8iIkoO9nQTEU3Cpb4A6g62o+lTrzRsBAj1Uh9uvoq1vzyj+bmH/rUD+49dlO4Y6cg3wmbOlD5X/M9tMd8CnoiIZo608fHx8VRXgohSL+0nH6a6CkTTxviLD6S6CkQ0y7Cnm4iIiIhIZ0y6iYiIiIh0xqSbiIiIiEhnTLqJiIiIiHTGpJuIiIiISGdMuomIiIiIdMakm4iIiIhIZ0y6iYiIiIh0xqSbiIiIiEhnTLqJiIiIiHTGpJuIiIiISGdMuomIiIiIdMakm4iIiIhIZ0y6iYiIiIh0xqSbiIiIiEhnTLqJiIiIiHTGpJuIiIiISGdMuomIiIiIdMakm4iIiIhIZ0y6iYjCOO8xYfzFB3Dh6TWprsqkie/gvMeU6qoQERGYdBMRqTy1oRiuG3448o1oWL841dUhIqJZID3VFSAimm7WLcnF0U+uA1iIHasLcPDEVcXrF55eA0e+EQDQ9KkX9SstWH+oHc1f9qNh/WIc2FoGAPAOjcLl8QMA1v7yDBp3r0RBTgZqS3MBAPuPXcTBE1cVy/MOjcL6jx9LZTXuXon6lRYAQNulATisRhz95Dr2/e4iAGD8xQcUdWv61IsXjl/BiX2VAIAT+ypxuPkq9v3uoqJuAKTnAcDzL/fD5fGjtjRXVQciIpo69nQTEckc+l4oKd33u4t453yvlCALrU9WYeGCdKT95EOk/eRDrFty53XnPSYc2FqGw81XkfaTD3H0k+uqz9eW5mL/sYtI+8mHUsLtHR6VlvfRVwPw/Mv9Ul1EQp/2kw8BAJasDGlZnn+5H02feqXPigMAAFh/qF36X55wi7L3H7uIvc7Fip58h9WItJ98yISbiEgHTLqJiGQeWbYQH301AABSL7BIxIFQ0vzcHy5Jj+V/i2Ep4nP7fncRrht+xfK9Q6NSz7nzHhMc+Uas/eUZ6fUtL38KS1YGGtYvxiPLFqLpUy+av+wHAMX7AMD6jx9jy8ufSo/fc/VF/F47Vheg7dKAVPbBE1fRdmkAe+vuJN3iexMRUeJxeAkR0W0iCd79+hfSc22XBvDIsoXS6wBwxj0ovS7/uyAnA97hUcUywx/3Dgelvx+vzAegHiIChHqdFy5Ix+W+gHJ5Q6Oq98qHp0RiWZCBcz3DiudOXb6JHasLpMfhZRERUeIw6SYiuu2pDcUAII2HlmtYv1iRYCdKtPHT8oRYi0i2XTf8SPvJh6ox20RENH0w6SYium3dklzFxYWC51/uV1xQWWXLloZ8VNmypfddvzmK5YULFJ+1LFD3fgsujx+WrAw47zFJy5PrHQ6iJM+gXN7tMd2iV15cwDkR7/AoCnIyFM/VlOQoet6JiEg/HNNNRAAAs3FuH4Mf+l4ZLFkZqoQbCI11FhdEtl0awLMPl0qvyf9+4fgVOPKN0hjwQ98rizrs4+CJq3Dd8OPYrhXScw3rF2P8xQfQsH4x3jnfi/qVFmlYS+uTVaplyJP+aL3c4qJOceFkw/rFqC3NxeGWqxE/Q0REiTO397JEJDEb0+Hzz91ez0eWLUTbJe0LCV84fgX1Ky1o3L0Sa395BheeXiONw5bPGNL8ZT/2H7uIA1vLsNe5GN6h0YjLFJb+/KRieQAUvdcleQZpuEvbpQFpTHfzl/043HwVB7aWScn2+kPtOLGvEo9X5ksXccqnDASgeL+YspCU5voBKBHpI218fHw81ZUgotTb8Kuz+OCiL9XVmHHEOGoxpV+4C0+vwbmeYcUsI1Mx/uIDTJZ1Vrk4G2eeqk51NYholuHwEiICAGy53VtL0Y2/+AAad6+UHu9YXSBNC9i4e6Wix1qMu442lV80F55eo7gVvRi2woRbX9wWiEgP7OkmIgCAzx9E3tMfpboa0174DCGuG34s/flJ6XH49H1T7ZUOn04wUo86Jc5Xz3wH9oWGid9IRBQHJt1EJPnR0S/wm5PXUl0NopR58gEbXnzs3lRXg4hmIQ4vISLJi4/dyx4+mrPsCw342SOlE7+RiGgSmHQTkcRsTFdMX0c0V5iN6XjxsXs5cwkR6YZJNxEpiJkb2ONNc4XZmI5XdnwLj5VbU10VIprFOKabiCLiGG+azczGdDxWbsXPHinlQSYR6Y5JNxFF1dUbwO87PWj81Mt5vGnGMxvTUbk4Gw/ca8Jj5VZULs6e+ENERAnApJuIaJpIS0sDALBZJiKafTimm4iIiIhIZ0y6iYiIiIh0xqSbiIiIiEhnTLqJiIiIiHTGpJuIiIiISGdMuomIiIiIdMakm4iIiIhIZ0y6iYiIiIh0xqSbiIiIiEhnTLqJiIiIiHTGpJuIiIiISGdMuomIiIiIdMakm4iIiIhIZ0y6iYiIiIh0ljY+Pj6e6koQ0fTl8wfxm5PX0PipFz5/EF29Afj8wf+/vbuLrbO+7wD+NYohzquxk2xgg0OIO6YG6oQqC7A4QpOWSZVCQqd2uVvJxS5GDJaiXUTVuk6olSamvMHd2nFHJ00JydWYVFE7pURoIynNBMTmxQUzIHFIUmwznOFdhPPgE+cFSJ4dx/58bnzO83Z+5+JYX//8e/6n1mVNT4f/9dzPlX9e2zqmsaVNs9PRMi/fuHluHl3XmsaGWbUuCZghhG7ggk6Nns0Pnx3IUy++J2QzbT26rjU7Nt5e6zKAGUDoBiY5MvhRNv30v/LWyY9rXQqUbmnT7Ox76OvpaJlX61KAaUzoBqocGfwo9z/5a91tZpTGhll57q+/wQDeQwAACcNJREFUIXgDpXEjJVA4NXo233v6NYGbGefU6Fl/bAKlErqBws6ed3Jk8KNalwE1cWr0bLqfeb3WZQDTlNANJDkXOHb1Dta6DKipp158z70MQCmEbiBJrFICn3nmNydqXQIwDQndQJKk5/XTtS4BpgSfBaAMvhUASBKz3F/R+I51X/qcuu6eEiqpdmz76rQvbsgj+/qz29jQl/KL/lO1LgGYhnS6gSQxWgKf8VkAyqDTDSQRNK6UjjIAl6LTDQAAJRO6AQCgZMZLAGpo4g2Pi+bW54EVi3LXzXOTJEPDY3n+zTPp2tufgQ8nrx29dtnC/MOGZbnrpnmZc/11Gfnk07z83x/lbw688f/9NgC4DKEbYArYvGpJ1rQtyMgnn6bv+Gga6q9La+MN2bCiOata5+WWHx6qOv67Kxfnp39xR+Zcf+4flpVz1rQtyL/91V0ZHfvfWrwNAC5C6AaYAta0LcihgTO5Z+fhYltXZ0t2bVqe1sYbsufB5dm6tz9J0nbj7Dz57fbMuf66vPzucDb809GiE97V2ZIff2tZmufW1+R9AHBhQjfAVbBr0/Ls2rT8ssddbJWTvuOjVYE7SXb3DmbLH92Uu26em2/eOr/Yvu3+1jTPrc/IJ59WBe7KOZV6AJg63EgJMAW88v7IBbe/dfJcoG6e83nnuvP2xiTJC2+dvuCs9+7ewQwNj5VQJQBflU43wFVwpet0//rdL/6NoA315/olw598etFjTo6cNWICMIXodANMASe+RGe6fXFDkuTnfR+WVQ4AV5nQDXCN6Ts+miT5k/Yba1wJAF+U0A1wjRkdOzdWsmT+xcdHmuaYHgSYSoRugGtM7+unkiR33TQvbTfOnrS/q7PFPDfAFCN0A1xjtu7tzzun/idzrr8uv+zqyNplC4t93125OD/+1rIaVgfAhfj/I8BV8EXX6U6ufKWTJNl24PU8+e32tDbekN6tHVXfYjnyyacZGh7T7QaYQnS6Aa5B/3L4eO7+x5dy4OhQhobH0r64Ia2NN+Tld4fz0M9ezcmRs7UuEYAJ6sbHx8drXQRQe3XdPbUuAaaM8R3ral0CMM3odAMAQMmEbgAAKJnQDQAAJRO6AQCgZEI3AACUTOgGAICSCd0AAFAyoRsAAEomdAMAQMmEbgAAKJnQDQAAJRO6AQCgZEI3AACUTOgGAICSCd0AAFAyoRsAAEomdAMAQMmEbgAAKJnQDQAAJRO6Ac6zdtnCjO9Yl2PbV9e6lK+s8h7WLltY61IAiNANMMm2+29J3/HRtC9uSFdnS63LAWAamFXrAgCmmvtuW5CnX/ogSVM2r1qS3b2DVfuPbV+d9sUNSZIDR4eyYUVzOvccycE3TqersyW7Ni1PkgwNj6XvxGiS5J6dh7N/y4osmV+fNW0LkiSP7OvP7t7BqusNDY9l0fd/VbzW/i0rsmFFc5Lk0MCZtC9qyNMvfZCte/uTJOM71lXVduDoUB5/7u30bu1IkvRu7cgTBwezdW9/VW1Jiu1JcuKxe9N3YjRr2hZMqgGAK6fTDTDBngfPhdKte/vz7Ksni4Bc8cKjK9M0Z1bquntS192T+277fP/aZQuza9PyPHFwMHXdPXn6pQ8mnb+mbUEe2defuu6eInAPjYwV13v+zTM58di9RS2VQF/X3ZMkaZ5bX1zrxGP35sDRoeLcyh8ASdK550jxc2Lgrrz2I/v68/DalqpOfvuihtR19wjcACUQugEmWH9HU55/80ySFF3gShBPzoXmv//3geL5xMeVsZTKeVv39qfv+GjV9YeGx4rO+dplC9O+uCH37Dxc7H/gJ0fTPLc+XZ0tWX9HUw4cHcrBN04nSdVxSbLo+7/KAz85Wjz/ed+HF31fm1ctyaGBM8Vr7+4dzKGBM3n4jz8P3ZX3DcDVZ7wE4DOVELzlZ68V2w4NnMn6O5qK/Uly+J2Piv0THy+ZX5+hkbGqa57//OTI2eLxdzoWJ5k8IpKc6zo3zZmV3374cfX1hscmHTtxPOVimufU55X3R6q2/cdvf5fNq5YUz89/LQCuHqEb4DPb7r8lSYp56Im6OluqAvbVcqn56YmB+EIqYbvv+GjqunsmzWwDMHUI3UCSpLFhVk6Nnr38gdPYfbctqLq5sOLEY/dW3VC5snVeMfKxsnVecdwHvxvLH/7enKpzm+dM7n5X9J0YTfPc+qxdtrC43kQnR87m1htnV1/vs5nuSle+cgPn5QyNjGXJ/Pqqbd+8dX5V5x2A8pjpBpKcC90z2Z4Hl6d5bv2kwJ2cm3Wu3BB5aOBM/vZP24p9Ex8//tzbaV/cUMyA73lw+SXHPnb3Dqbv+Gj2PfT1YltXZ0vGd6xLV2dLnn31ZDasaC7GWl54dOWka0wM/Zfqcldu6qzcONnV2ZI1bQvyxC8HL3rOTDXTPwtAOfxmAZIkS5tm562TM3emd/0dTTk0cOEbCR9/7u1sWNGc/VtW5J6dh3Ns++piDnviiiEH3zidR/b1Z9em5Xl4bUuGhscues2Kr/3oxarrJanqXt964+xi3OXQwJlipvvgG6fzxMHB7Nq0vAjbnXuOpHdrR77Tsbi4iXPikoFJqo6vLFlItaVNsy9/EMCXVDc+Pj5e6yKA2tvZ8066n3m91mVccypz1JUl/c53bPvqvPL+SNUqI1difMc6YblkP1jflr/7s6W1LgOYZoyXAEmSv1z9+7Uu4ZowvmNd9m9ZUTzfvGpJsSzg/i0rqjrWlbnrSy3ldynHtq+u+ir6ytiKwF0unwWgDDrdQOF7T7+Wp158r9ZlTGnnrxDSd3w0X/vRi8Xz85fvu9Ku9PnLCV6so87V8ei61uzYeHutywCmIaEbKJwaPZuVj//njJ7tZuZa2jQ7h7fd7UZKoBTGS4BCY8OsqpU0YKZobJiVHRtvF7iB0gjdQJWOlnk5vO1uKzgwYzQ2zMo/b/6DbLxzUa1LAaYx4yXARZnxZjprbJiVjXcuyg/Wt/kjEyid0A1c0lsnP84zvzmR/UeH8ov+U7UuB65IY8OsdLTMy7rbF2bjnYvS0TLv8icBXAVCNwAAlMxMNwAAlEzoBgCAkgndAABQMqEbAABKJnQDAEDJhG4AACiZ0A0AACUTugEAoGRCNwAAlEzoBgCAkgndAABQMqEbAABKJnQDAEDJhG4AACiZ0A0AACUTugEAoGRCNwAAlEzoBgCAkgndAABQMqEbAABKJnQDAEDJhG4AACiZ0A0AACUTugEAoGRCNwAAlEzoBgCAkgndAABQMqEbAABKJnQDAEDJhG4AACiZ0A0AACUTugEAoGRCNwAAlEzoBgCAkgndAABQMqEbAABKJnQDAEDJhG4AACjZ/wG/aKvqTLZkfAAAAABJRU5ErkJggg==" + } + }, + "cell_type": "markdown", + "id": "8e406db6", + "metadata": { + "scrolled": true + }, + "source": [ + "Now we come to the flow definition. The OpenFL Workflow Interface adopts the conventions set by Metaflow, that every workflow begins with `start` and concludes with the `end` task. The aggregator begins with an optionally passed in model and optimizer. The aggregator begins the flow with the `start` task, where the list of collaborators is extracted from the runtime (`self.collaborators = self.runtime.collaborators`) and is then used as the list of participants to run the task listed in `self.next`, `aggregated_model_validation`. The model, optimizer, and anything that is not explicitly excluded from the next function will be passed from the `start` function on the aggregator to the `aggregated_model_validation` task on the collaborator. Where the tasks run is determined by the placement decorator that precedes each task definition (`@aggregator` or `@collaborator`). Once each of the collaborators (defined in the runtime) complete the `aggregated_model_validation` task, they pass their current state onto the `train` task, from `train` to `local_model_validation`, and then finally to `join` at the aggregator. It is in `join` that an average is taken of the model weights, and the next round can begin.\n", + "\n", + "![image.png](attachment:image.png)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcf91e3c-afbd-4ac8-b9f3-249960c176b6", + "metadata": {}, + "outputs": [], + "source": [ + "class FederatedFlow(FLSpec):\n", + "\n", + " def __init__(self, model_state_dict=None, optimizer=None, \n", + " diffmodel_state_dict=None, diff_optimizer=None, \n", + " model_args=None, rounds=3, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.model_args = model_args\n", + " self.model_state_dict = model_state_dict\n", + " self.optimizer = optimizer\n", + " \n", + " self.model_state_dict_diff = diffmodel_state_dict\n", + " self.optimizer_diff = diff_optimizer\n", + " \n", + " self.rounds = rounds\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " print(f'Performing initialization for model')\n", + " self.collaborators = self.runtime.collaborators\n", + " self.private = 10\n", + " self.current_round = 0\n", + " self.next(self.aggregated_model_validation, foreach='collaborators', exclude=['private'])\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + "\n", + " # build model each time to avoid deepcopy issue (intermediate tensor, file handles, callback, etc.)\n", + " print(f'Performing aggregated model validation for collaborator {self.input}')\n", + " self.agg_validity, self.agg_uniqueness = run_guidance(self.model_args,\n", + " self.val_loader,\n", + " model_state_dict=self.model_state_dict_diff)\n", + " \n", + " self.agg_validation_score_diff, self.model_state_dict_diff, self.optimizer_diff = run_model(self.model_args,\n", + " self.val_loader_diff,\n", + " model_type ='diffusion',\n", + " phase='validate',\n", + " model_state_dict=self.model_state_dict_diff,\n", + " optimizer_state_dict=None)\n", + " \n", + " \n", + " print(f'{self.input} value of Val NLL: {self.agg_validation_score_diff}, Validity: {self.agg_validity}, Uniqueness: {self.agg_uniqueness}')\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " # build model each time to avoid deepcopy issue (intermediate tensor, file handles, callback, etc.)\n", + " print(f'Performing aggregated model validation for collaborator {self.input}')\n", + " self.loss, self.model_state_dict, self.optimizer = run_model(self.model_args, \n", + " self.train_loader,\n", + " model_type ='regressor', \n", + " phase='train',\n", + " model_state_dict=self.model_state_dict,\n", + " optimizer_state_dict=self.optimizer.state_dict())\n", + "\n", + " self.loss_diff, self.model_state_dict_diff, self.optimizer_diff = run_model(self.model_args, \n", + " self.train_loader_diff,\n", + " model_type ='diffusion',\n", + " phase='train',\n", + " model_state_dict=self.model_state_dict_diff,\n", + " optimizer_state_dict=self.optimizer_diff.state_dict())\n", + "\n", + "\n", + " self.training_completed = True\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self): \n", + " self.local_validity, self.local_uniqueness = run_guidance(self.model_args,\n", + " self.val_loader,\n", + " model_state_dict=self.model_state_dict_diff)\n", + " \n", + " self.local_validation_score_diff, self.model_state_dict_diff, self.optimizer_diff = run_model(self.model_args,\n", + " self.val_loader_diff,\n", + " model_type ='diffusion',\n", + " phase='validate',\n", + " model_state_dict=self.model_state_dict_diff,\n", + " optimizer_state_dict=None)\n", + " \n", + " \n", + " print(f'{self.input} value of Val NLL: {self.local_validation_score_diff}, Validity: {self.local_validity}, Uniqueness: {self.local_uniqueness}')\n", + " self.next(self.train)\n", + "\n", + " self.next(self.join, exclude=['training_completed'])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " self.average_loss = sum(input.loss_diff for input in inputs) / len(inputs)\n", + " \n", + " self.aggregated_model_accuracy = sum(\n", + " input.agg_validation_score_diff for input in inputs) / len(inputs) \n", + " self.local_model_accuracy = sum(\n", + " input.local_validation_score_diff for input in inputs) / len(inputs)\n", + " print(f'Average aggregated model validation losses = {self.aggregated_model_accuracy}')\n", + " print(f'Average local model validation losses = {self.local_model_accuracy}')\n", + " \n", + " self.aggregated_model_validity = sum(\n", + " input.agg_validity for input in inputs) / len(inputs) \n", + " self.local_model_validity = sum(\n", + " input.local_validity for input in inputs) / len(inputs)\n", + " print(f'Average aggregated validity = {self.aggregated_model_validity}')\n", + " print(f'Average local validity = {self.local_model_validity}')\n", + " \n", + " self.aggregated_model_uniqueness = sum(\n", + " input.agg_uniqueness for input in inputs) / len(inputs) \n", + " self.local_model_uniqueness = sum(\n", + " input.local_uniqueness for input in inputs) / len(inputs)\n", + " print(f'Average aggregated model validation losses = {self.aggregated_model_uniqueness}')\n", + " print(f'Average local model validation losses = {self.local_model_uniqueness}')\n", + " \n", + " self.model_state_dict = FedAvg([input.model_state_dict for input in inputs])\n", + " self.model_state_dict_diff = FedAvg([input.model_state_dict_diff for input in inputs])\n", + "\n", + " torch.save(self.model_state_dict, 'model_regressor.pth')\n", + " torch.save(self.model_state_dict_diff, 'model_diffusion.pth')\n", + " \n", + " self.current_round += 1\n", + " if self.current_round < self.rounds:\n", + " self.next(self.aggregated_model_validation,\n", + " foreach='collaborators', exclude=['private'])\n", + " else:\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " print(f'This is the end of the flow')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2aabf61e", + "metadata": {}, + "source": [ + "Now let's define some collaborators and partition the datasets. For this implementation, we will use two collaborators (Portland and Seattle) and randomly partition the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "forward-world", + "metadata": {}, + "outputs": [], + "source": [ + "# Setup participants\n", + "aggregator = Aggregator()\n", + "aggregator.private_attributes = {}\n", + "\n", + "# Setup collaborators with private attributes\n", + "collaborator_names = ['Portland', 'Seattle']\n", + "collaborators = [Collaborator(name=name) for name in collaborator_names]\n", + "for idx, collaborator in enumerate(collaborators):\n", + " num_colab = idx+1\n", + "\n", + " if cfg[\"dataset\"][\"name\"] == 'qm9':\n", + " datamodule = qm9_dataset.QM9DataModule(cfg, num_colab, \"random\", regressor=True)\n", + " print(datamodule)\n", + " datamodule_diff = qm9_dataset.QM9DataModule(cfg, num_colab, \"random\")\n", + " print(datamodule_diff)\n", + "\n", + " collaborator.private_attributes = {\n", + " 'train_loader':datamodule.train_dataloader(),\n", + " 'val_loader':datamodule.val_dataloader(),\n", + " 'train_loader_diff':datamodule_diff.train_dataloader(),\n", + " 'val_loader_diff':datamodule_diff.val_dataloader()\n", + " }\n", + " else:\n", + " raise ValueError(\"Not yet supporting datasets other than QM9\")\n", + "\n", + "local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators, backend='single_process')\n", + "print(f'Local runtime collaborators = {local_runtime.collaborators}')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "278ad46b", + "metadata": {}, + "source": [ + "Now that we have our flow and runtime defined, let's run the experiment! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a175b4d6", + "metadata": {}, + "outputs": [], + "source": [ + "model_args = [cfg, model_kwargs, model_kwargs_diff]\n", + "\n", + "flflow = FederatedFlow(model.state_dict(), model.configure_optimizers(), \\\n", + " diff_model.state_dict(), diff_model.configure_optimizers(), \\\n", + " model_args, rounds=3)\n", + "flflow.runtime = local_runtime\n", + "flflow.run()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "86b3dd2e", + "metadata": {}, + "source": [ + "Now that the flow has completed, let's get the final model and accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863761fe", + "metadata": {}, + "outputs": [], + "source": [ + "print(f'\\nFinal aggregated model accuracy for {flflow.rounds} rounds of training: {flflow.aggregated_model_accuracy}')\n", + "print(f'\\nFinal aggregated model validity for {flflow.rounds} rounds of training: {flflow.aggregated_model_validity}')\n", + "print(f'\\nFinal aggregated model uniqueness for {flflow.rounds} rounds of training: {flflow.aggregated_model_uniqueness}')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (digress_openfl_2.0)", + "language": "python", + "name": "digress_openfl_2.0" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openfl-tutorials/experimental/DiGress/digress/__init__.py b/openfl-tutorials/experimental/DiGress/digress/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/analysis/__init__.py b/openfl-tutorials/experimental/DiGress/digress/analysis/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/analysis/rdkit_functions.py b/openfl-tutorials/experimental/DiGress/digress/analysis/rdkit_functions.py new file mode 100644 index 0000000000..49fa561e19 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/analysis/rdkit_functions.py @@ -0,0 +1,339 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import numpy as np +import torch +import re +# import wandb +try: + from rdkit import Chem + print("Found rdkit, all good") +except ModuleNotFoundError as e: + use_rdkit = False + from warnings import warn + warn("Didn't find rdkit, this will fail") + assert use_rdkit, "Didn't find rdkit" + +from rdkit import RDLogger +RDLogger.DisableLog('rdApp.*') + +allowed_bonds = {'H': 1, 'C': 4, 'N': 3, 'O': 2, 'F': 1, 'B': 3, 'Al': 3, 'Si': 4, 'P': [3, 5], + 'S': 4, 'Cl': 1, 'As': 3, 'Br': 1, 'I': 1, 'Hg': [1, 2], 'Bi': [3, 5], 'Se': [2, 4, 6]} +bond_dict = [None, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE, + Chem.rdchem.BondType.AROMATIC] +ATOM_VALENCY = {6: 4, 7: 3, 8: 2, 9: 1, 15: 3, 16: 2, 17: 1, 35: 1, 53: 1} + + +class BasicMolecularMetrics(object): + def __init__(self, dataset_info, train_smiles=None): + self.atom_decoder = dataset_info.atom_decoder + self.dataset_info = dataset_info + + # Retrieve dataset smiles only for qm9 currently. + self.dataset_smiles_list = train_smiles + + def compute_validity(self, generated): + """ generated: list of couples (positions, atom_types)""" + valid = [] + num_components = [] + all_smiles = [] + for graph in generated: + atom_types, edge_types = graph + mol = build_molecule(atom_types, edge_types, self.dataset_info.atom_decoder) + smiles = mol2smiles(mol) + try: + mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True) + num_components.append(len(mol_frags)) + except: + pass + if smiles is not None: + try: + mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True) + largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms()) + smiles = mol2smiles(largest_mol) + valid.append(smiles) + all_smiles.append(smiles) + except Chem.rdchem.AtomValenceException: + print("Valence error in GetmolFrags") + all_smiles.append(None) + except Chem.rdchem.KekulizeException: + print("Can't kekulize molecule") + all_smiles.append(None) + else: + all_smiles.append(None) + + return valid, len(valid) / len(generated), np.array(num_components), all_smiles + + def compute_uniqueness(self, valid): + """ valid: list of SMILES strings.""" + return list(set(valid)), len(set(valid)) / len(valid) + + def compute_novelty(self, unique): + num_novel = 0 + novel = [] + if self.dataset_smiles_list is None: + print("Dataset smiles is None, novelty computation skipped") + return 1, 1 + for smiles in unique: + if smiles not in self.dataset_smiles_list: + novel.append(smiles) + num_novel += 1 + return novel, num_novel / len(unique) + + def compute_relaxed_validity(self, generated): + valid = [] + for graph in generated: + atom_types, edge_types = graph + mol = build_molecule_with_partial_charges(atom_types, edge_types, self.dataset_info.atom_decoder) + smiles = mol2smiles(mol) + if smiles is not None: + try: + mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True) + largest_mol = max(mol_frags, default=mol, key=lambda m: m.GetNumAtoms()) + smiles = mol2smiles(largest_mol) + valid.append(smiles) + except Chem.rdchem.AtomValenceException: + print("Valence error in GetmolFrags") + except Chem.rdchem.KekulizeException: + print("Can't kekulize molecule") + return valid, len(valid) / len(generated) + + def evaluate(self, generated): + """ generated: list of pairs (positions: n x 3, atom_types: n [int]) + the positions and atom types should already be masked. """ + valid, validity, num_components, all_smiles = self.compute_validity(generated) + nc_mu = num_components.mean() if len(num_components) > 0 else 0 + nc_min = num_components.min() if len(num_components) > 0 else 0 + nc_max = num_components.max() if len(num_components) > 0 else 0 + # print(f"Validity over {len(generated)} molecules: {validity * 100 :.2f}%") + # print(f"Number of connected components of {len(generated)} molecules: min:{nc_min:.2f} mean:{nc_mu:.2f} max:{nc_max:.2f}") + + relaxed_valid, relaxed_validity = self.compute_relaxed_validity(generated) + # print(f"Relaxed validity over {len(generated)} molecules: {relaxed_validity * 100 :.2f}%") + if relaxed_validity > 0: + unique, uniqueness = self.compute_uniqueness(relaxed_valid) + # print(f"Uniqueness over {len(relaxed_valid)} valid molecules: {uniqueness * 100 :.2f}%") + + if self.dataset_smiles_list is not None: + _, novelty = self.compute_novelty(unique) + # print(f"Novelty over {len(unique)} unique valid molecules: {novelty * 100 :.2f}%") + else: + novelty = -1.0 + else: + novelty = -1.0 + uniqueness = 0.0 + unique = [] + return ([validity, relaxed_validity, uniqueness, novelty], unique, + dict(nc_min=nc_min, nc_max=nc_max, nc_mu=nc_mu), all_smiles) + + +def mol2smiles(mol): + try: + Chem.SanitizeMol(mol) + except ValueError: + return None + return Chem.MolToSmiles(mol) + + +def build_molecule(atom_types, edge_types, atom_decoder, verbose=False): + if verbose: + print("building new molecule") + + mol = Chem.RWMol() + for atom in atom_types: + a = Chem.Atom(atom_decoder[atom.item()]) + mol.AddAtom(a) + if verbose: + print("Atom added: ", atom.item(), atom_decoder[atom.item()]) + + edge_types = torch.triu(edge_types) + all_bonds = torch.nonzero(edge_types) + for i, bond in enumerate(all_bonds): + if bond[0].item() != bond[1].item(): + mol.AddBond(bond[0].item(), bond[1].item(), bond_dict[edge_types[bond[0], bond[1]].item()]) + if verbose: + print("bond added:", bond[0].item(), bond[1].item(), edge_types[bond[0], bond[1]].item(), + bond_dict[edge_types[bond[0], bond[1]].item()] ) + return mol + + +def build_molecule_with_partial_charges(atom_types, edge_types, atom_decoder, verbose=False): + if verbose: + print("\nbuilding new molecule") + + mol = Chem.RWMol() + for atom in atom_types: + a = Chem.Atom(atom_decoder[atom.item()]) + mol.AddAtom(a) + if verbose: + print("Atom added: ", atom.item(), atom_decoder[atom.item()]) + edge_types = torch.triu(edge_types) + all_bonds = torch.nonzero(edge_types) + + for i, bond in enumerate(all_bonds): + if bond[0].item() != bond[1].item(): + mol.AddBond(bond[0].item(), bond[1].item(), bond_dict[edge_types[bond[0], bond[1]].item()]) + if verbose: + print("bond added:", bond[0].item(), bond[1].item(), edge_types[bond[0], bond[1]].item(), + bond_dict[edge_types[bond[0], bond[1]].item()]) + # add formal charge to atom: e.g. [O+], [N+], [S+] + # not support [O-], [N-], [S-], [NH+] etc. + flag, atomid_valence = check_valency(mol) + if verbose: + print("flag, valence", flag, atomid_valence) + if flag: + continue + else: + assert len(atomid_valence) == 2 + idx = atomid_valence[0] + v = atomid_valence[1] + an = mol.GetAtomWithIdx(idx).GetAtomicNum() + if verbose: + print("atomic num of atom with a large valence", an) + if an in (7, 8, 16) and (v - ATOM_VALENCY[an]) == 1: + mol.GetAtomWithIdx(idx).SetFormalCharge(1) + # print("Formal charge added") + return mol + + +# Functions from GDSS +def check_valency(mol): + try: + Chem.SanitizeMol(mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_PROPERTIES) + return True, None + except ValueError as e: + e = str(e) + p = e.find('#') + e_sub = e[p:] + atomid_valence = list(map(int, re.findall(r'\d+', e_sub))) + return False, atomid_valence + + +def correct_mol(m): + # xsm = Chem.MolToSmiles(x, isomericSmiles=True) + mol = m + + ##### + no_correct = False + flag, _ = check_valency(mol) + if flag: + no_correct = True + + while True: + flag, atomid_valence = check_valency(mol) + if flag: + break + else: + assert len(atomid_valence) == 2 + idx = atomid_valence[0] + v = atomid_valence[1] + queue = [] + check_idx = 0 + for b in mol.GetAtomWithIdx(idx).GetBonds(): + type = int(b.GetBondType()) + queue.append((b.GetIdx(), type, b.GetBeginAtomIdx(), b.GetEndAtomIdx())) + if type == 12: + check_idx += 1 + queue.sort(key=lambda tup: tup[1], reverse=True) + + if queue[-1][1] == 12: + return None, no_correct + elif len(queue) > 0: + start = queue[check_idx][2] + end = queue[check_idx][3] + t = queue[check_idx][1] - 1 + mol.RemoveBond(start, end) + if t >= 1: + mol.AddBond(start, end, bond_dict[t]) + return mol, no_correct + + +def valid_mol_can_with_seg(m, largest_connected_comp=True): + if m is None: + return None + sm = Chem.MolToSmiles(m, isomericSmiles=True) + if largest_connected_comp and '.' in sm: + vsm = [(s, len(s)) for s in sm.split('.')] # 'C.CC.CCc1ccc(N)cc1CCC=O'.split('.') + vsm.sort(key=lambda tup: tup[1], reverse=True) + mol = Chem.MolFromSmiles(vsm[0][0]) + else: + mol = Chem.MolFromSmiles(sm) + return mol + + +if __name__ == '__main__': + smiles_mol = 'C1CCC1' + print("Smiles mol %s" % smiles_mol) + chem_mol = Chem.MolFromSmiles(smiles_mol) + block_mol = Chem.MolToMolBlock(chem_mol) + print("Block mol:") + print(block_mol) + +use_rdkit = True + + +def check_stability(atom_types, edge_types, dataset_info, debug=False,atom_decoder=None): + if atom_decoder is None: + atom_decoder = dataset_info.atom_decoder + + n_bonds = np.zeros(len(atom_types), dtype='int') + + for i in range(len(atom_types)): + for j in range(i + 1, len(atom_types)): + n_bonds[i] += abs((edge_types[i, j] + edge_types[j, i])/2) + n_bonds[j] += abs((edge_types[i, j] + edge_types[j, i])/2) + n_stable_bonds = 0 + for atom_type, atom_n_bond in zip(atom_types, n_bonds): + possible_bonds = allowed_bonds[atom_decoder[atom_type]] + if type(possible_bonds) == int: + is_stable = possible_bonds == atom_n_bond + else: + is_stable = atom_n_bond in possible_bonds + if not is_stable and debug: + print("Invalid bonds for molecule %s with %d bonds" % (atom_decoder[atom_type], atom_n_bond)) + n_stable_bonds += int(is_stable) + + molecule_stable = n_stable_bonds == len(atom_types) + return molecule_stable, n_stable_bonds, len(atom_types) + + +def compute_molecular_metrics(molecule_list, train_smiles, dataset_info): + """ molecule_list: (dict) """ + + if not dataset_info.remove_h: + print(f'Analyzing molecule stability...') + + molecule_stable = 0 + nr_stable_bonds = 0 + n_atoms = 0 + n_molecules = len(molecule_list) + + for i, mol in enumerate(molecule_list): + atom_types, edge_types = mol + + validity_results = check_stability(atom_types, edge_types, dataset_info) + + molecule_stable += int(validity_results[0]) + nr_stable_bonds += int(validity_results[1]) + n_atoms += int(validity_results[2]) + + # Validity + fraction_mol_stable = molecule_stable / float(n_molecules) + fraction_atm_stable = nr_stable_bonds / float(n_atoms) + validity_dict = {'mol_stable': fraction_mol_stable, 'atm_stable': fraction_atm_stable} + # if wandb.run: + # wandb.log(validity_dict) + else: + validity_dict = {'mol_stable': -1, 'atm_stable': -1} + + metrics = BasicMolecularMetrics(dataset_info, train_smiles) + rdkit_metrics = metrics.evaluate(molecule_list) + all_smiles = rdkit_metrics[-1] + # if wandb.run: + # nc = rdkit_metrics[-2] + # dic = {'Validity': rdkit_metrics[0][0], 'Relaxed Validity': rdkit_metrics[0][1], + # 'Uniqueness': rdkit_metrics[0][2], 'Novelty': rdkit_metrics[0][3], + # 'nc_max': nc['nc_max'], 'nc_mu': nc['nc_mu']} + # wandb.log(dic) + + return validity_dict, rdkit_metrics, all_smiles diff --git a/openfl-tutorials/experimental/DiGress/digress/analysis/visualization.py b/openfl-tutorials/experimental/DiGress/digress/analysis/visualization.py new file mode 100644 index 0000000000..b9f1f9f236 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/analysis/visualization.py @@ -0,0 +1,221 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import os + +from rdkit import Chem +from rdkit.Chem import Draw, AllChem +from rdkit.Geometry import Point3D +from rdkit import RDLogger +import imageio +import networkx as nx +import numpy as np +import rdkit.Chem +# import wandb +import matplotlib.pyplot as plt + + +class MolecularVisualization: + def __init__(self, remove_h, dataset_infos): + self.remove_h = remove_h + self.dataset_infos = dataset_infos + + def mol_from_graphs(self, node_list, adjacency_matrix): + """ + Convert graphs to rdkit molecules + node_list: the nodes of a batch of nodes (bs x n) + adjacency_matrix: the adjacency_matrix of the molecule (bs x n x n) + """ + # dictionary to map integer value to the char of atom + atom_decoder = self.dataset_infos.atom_decoder + + # create empty editable mol object + mol = Chem.RWMol() + + # add atoms to mol and keep track of index + node_to_idx = {} + for i in range(len(node_list)): + if node_list[i] == -1: + continue + a = Chem.Atom(atom_decoder[int(node_list[i])]) + molIdx = mol.AddAtom(a) + node_to_idx[i] = molIdx + + for ix, row in enumerate(adjacency_matrix): + for iy, bond in enumerate(row): + # only traverse half the symmetric matrix + if iy <= ix: + continue + if bond == 1: + bond_type = Chem.rdchem.BondType.SINGLE + elif bond == 2: + bond_type = Chem.rdchem.BondType.DOUBLE + elif bond == 3: + bond_type = Chem.rdchem.BondType.TRIPLE + elif bond == 4: + bond_type = Chem.rdchem.BondType.AROMATIC + else: + continue + mol.AddBond(node_to_idx[ix], node_to_idx[iy], bond_type) + + try: + mol = mol.GetMol() + except rdkit.Chem.KekulizeException: + print("Can't kekulize molecule") + mol = None + return mol + + def visualize(self, path: str, molecules: list, num_molecules_to_visualize: int, log='graph'): + # define path to save figures + if not os.path.exists(path): + os.makedirs(path) + + # visualize the final molecules + print(f"Visualizing {num_molecules_to_visualize} of {len(molecules)}") + if num_molecules_to_visualize > len(molecules): + print(f"Shortening to {len(molecules)}") + num_molecules_to_visualize = len(molecules) + + for i in range(num_molecules_to_visualize): + file_path = os.path.join(path, 'molecule_{}.png'.format(i)) + mol = self.mol_from_graphs(molecules[i][0].numpy(), molecules[i][1].numpy()) + try: + Draw.MolToFile(mol, file_path) + # if wandb.run and log is not None: + # print(f"Saving {file_path} to wandb") + # wandb.log({log: wandb.Image(file_path)}, commit=True) + except rdkit.Chem.KekulizeException: + print("Can't kekulize molecule") + + + def visualize_chain(self, path, nodes_list, adjacency_matrix, trainer=None): + RDLogger.DisableLog('rdApp.*') + # convert graphs to the rdkit molecules + mols = [self.mol_from_graphs(nodes_list[i], adjacency_matrix[i]) for i in range(nodes_list.shape[0])] + + # find the coordinates of atoms in the final molecule + final_molecule = mols[-1] + AllChem.Compute2DCoords(final_molecule) + + coords = [] + for i, atom in enumerate(final_molecule.GetAtoms()): + positions = final_molecule.GetConformer().GetAtomPosition(i) + coords.append((positions.x, positions.y, positions.z)) + + # align all the molecules + for i, mol in enumerate(mols): + AllChem.Compute2DCoords(mol) + conf = mol.GetConformer() + for j, atom in enumerate(mol.GetAtoms()): + x, y, z = coords[j] + conf.SetAtomPosition(j, Point3D(x, y, z)) + + # draw gif + save_paths = [] + num_frams = nodes_list.shape[0] + + for frame in range(num_frams): + file_name = os.path.join(path, 'fram_{}.png'.format(frame)) + Draw.MolToFile(mols[frame], file_name, size=(300, 300), legend=f"Frame {frame}") + save_paths.append(file_name) + + imgs = [imageio.imread(fn) for fn in save_paths] + gif_path = os.path.join(os.path.dirname(path), '{}.gif'.format(path.split('/')[-1])) + imgs.extend([imgs[-1]] * 10) + imageio.mimsave(gif_path, imgs, subrectangles=True, duration=20) + + # if wandb.run: + # print(f"Saving {gif_path} to wandb") + # wandb.log({"chain": wandb.Video(gif_path, fps=5, format="gif")}, commit=True) + + # draw grid image + try: + img = Draw.MolsToGridImage(mols, molsPerRow=10, subImgSize=(200, 200)) + img.save(os.path.join(path, '{}_grid_image.png'.format(path.split('/')[-1]))) + except Chem.rdchem.KekulizeException: + print("Can't kekulize molecule") + return mols + + +class NonMolecularVisualization: + def to_networkx(self, node_list, adjacency_matrix): + """ + Convert graphs to networkx graphs + node_list: the nodes of a batch of nodes (bs x n) + adjacency_matrix: the adjacency_matrix of the molecule (bs x n x n) + """ + graph = nx.Graph() + + for i in range(len(node_list)): + if node_list[i] == -1: + continue + graph.add_node(i, number=i, symbol=node_list[i], color_val=node_list[i]) + + rows, cols = np.where(adjacency_matrix >= 1) + edges = zip(rows.tolist(), cols.tolist()) + for edge in edges: + edge_type = adjacency_matrix[edge[0]][edge[1]] + graph.add_edge(edge[0], edge[1], color=float(edge_type), weight=3 * edge_type) + + return graph + + def visualize_non_molecule(self, graph, pos, path, iterations=100, node_size=100, largest_component=False): + if largest_component: + CGs = [graph.subgraph(c) for c in nx.connected_components(graph)] + CGs = sorted(CGs, key=lambda x: x.number_of_nodes(), reverse=True) + graph = CGs[0] + + # Plot the graph structure with colors + if pos is None: + pos = nx.spring_layout(graph, iterations=iterations) + + # Set node colors based on the eigenvectors + w, U = np.linalg.eigh(nx.normalized_laplacian_matrix(graph).toarray()) + vmin, vmax = np.min(U[:, 1]), np.max(U[:, 1]) + m = max(np.abs(vmin), vmax) + vmin, vmax = -m, m + + plt.figure() + nx.draw(graph, pos, font_size=5, node_size=node_size, with_labels=False, node_color=U[:, 1], + cmap=plt.cm.coolwarm, vmin=vmin, vmax=vmax, edge_color='grey') + + plt.tight_layout() + plt.savefig(path) + plt.close("all") + + def visualize(self, path: str, graphs: list, num_graphs_to_visualize: int, log='graph'): + # define path to save figures + if not os.path.exists(path): + os.makedirs(path) + + # visualize the final molecules + for i in range(num_graphs_to_visualize): + file_path = os.path.join(path, 'graph_{}.png'.format(i)) + graph = self.to_networkx(graphs[i][0].numpy(), graphs[i][1].numpy()) + self.visualize_non_molecule(graph=graph, pos=None, path=file_path) + im = plt.imread(file_path) + # if wandb.run and log is not None: + # wandb.log({log: [wandb.Image(im, caption=file_path)]}) + + def visualize_chain(self, path, nodes_list, adjacency_matrix): + # convert graphs to networkx + graphs = [self.to_networkx(nodes_list[i], adjacency_matrix[i]) for i in range(nodes_list.shape[0])] + # find the coordinates of atoms in the final molecule + final_graph = graphs[-1] + final_pos = nx.spring_layout(final_graph, seed=0) + + # draw gif + save_paths = [] + num_frams = nodes_list.shape[0] + + for frame in range(num_frams): + file_name = os.path.join(path, 'fram_{}.png'.format(frame)) + self.visualize_non_molecule(graph=graphs[frame], pos=final_pos, path=file_name) + save_paths.append(file_name) + + imgs = [imageio.imread(fn) for fn in save_paths] + gif_path = os.path.join(os.path.dirname(path), '{}.gif'.format(path.split('/')[-1])) + imgs.extend([imgs[-1]] * 10) + imageio.mimsave(gif_path, imgs, subrectangles=True, duration=20) + # if wandb.run: + # wandb.log({'chain': [wandb.Video(gif_path, caption=gif_path, format="gif")]}) diff --git a/openfl-tutorials/experimental/DiGress/digress/configs/qm9_config_guidance.yaml b/openfl-tutorials/experimental/DiGress/digress/configs/qm9_config_guidance.yaml new file mode 100644 index 0000000000..8eb9843209 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/configs/qm9_config_guidance.yaml @@ -0,0 +1,48 @@ +general: + name: qm9 + guidance_target: both + sample_every_val: 1 + samples_to_generate: 10000 + samples_to_save: 0 + chains_to_save: 0 + number_chain_steps: 50 + +model: + type: discrete + transition: marginal + model: graph_tf + diffusion_steps: 500 + diffusion_noise_schedule: cosine + n_layers: 5 + extra_features: null + hidden_mlp_dims: + X: 256 + E: 128 + y: 128 + hidden_dims: + dx: 256 + de: 64 + dy: 64 + n_head: 8 + dim_ffX: 256 + dim_ffE: 128 + dim_ffy: 128 + lambda_train: + - 5 + - 0 + input_dims: {'X': 4, 'E': 5, 'y': 1} + output_dims: {'X': 4, 'E': 5, 'y': 0} + torch_compile: False +guidance: + use_guidance: True + lambda_guidance: 100.0 +train: + batch_size: 512 + lr: 0.0002 + num_workers: 0 + weight_decay: 1.0e-12 +dataset: + name: qm9 + datadir: data/qm9/qm9_pyg/ + remove_h: true + pin_memory: false diff --git a/openfl-tutorials/experimental/DiGress/digress/datasets/__init__.py b/openfl-tutorials/experimental/DiGress/digress/datasets/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/datasets/abstract_dataset.py b/openfl-tutorials/experimental/DiGress/digress/datasets/abstract_dataset.py new file mode 100644 index 0000000000..3646b8d15a --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/datasets/abstract_dataset.py @@ -0,0 +1,147 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +from digress.diffusion.distributions import DistributionNodes +import digress.utils as utils +import torch +import pytorch_lightning as pl +from torch_geometric.loader import DataLoader +from torch_geometric.data.lightning import LightningDataset + + +class AbstractDataModule(LightningDataset): + def __init__(self, cfg, datasets): + super().__init__(train_dataset=datasets['train'], val_dataset=datasets['val'], test_dataset=datasets['test'], + batch_size=cfg.train.batch_size if 'debug' not in cfg.general.name else 2, + num_workers=cfg.train.num_workers, + pin_memory=getattr(cfg.dataset, "pin_memory", False)) + self.cfg = cfg + self.input_dims = None + self.output_dims = None + + def __getitem__(self, idx): + return self.train_dataset[idx] + + def node_counts(self, max_nodes_possible=300): + all_counts = torch.zeros(max_nodes_possible) + for loader in [self.train_dataloader(), self.val_dataloader()]: + for data in loader: + unique, counts = torch.unique(data.batch, return_counts=True) + for count in counts: + all_counts[count] += 1 + max_index = max(all_counts.nonzero()) + all_counts = all_counts[:max_index + 1] + all_counts = all_counts / all_counts.sum() + return all_counts + + def node_types(self): + num_classes = None + for data in self.train_dataloader(): + num_classes = data.x.shape[1] + break + + counts = torch.zeros(num_classes) + + for i, data in enumerate(self.train_dataloader()): + counts += data.x.sum(dim=0) + + counts = counts / counts.sum() + return counts + + def edge_counts(self): + num_classes = None + for data in self.train_dataloader(): + num_classes = data.edge_attr.shape[1] + break + + d = torch.zeros(num_classes, dtype=torch.float) + + for i, data in enumerate(self.train_dataloader()): + unique, counts = torch.unique(data.batch, return_counts=True) + + all_pairs = 0 + for count in counts: + all_pairs += count * (count - 1) + + num_edges = data.edge_index.shape[1] + num_non_edges = all_pairs - num_edges + + edge_types = data.edge_attr.sum(dim=0) + assert num_non_edges >= 0 + d[0] += num_non_edges + d[1:] += edge_types[1:] + + d = d / d.sum() + return d + + +class MolecularDataModule(AbstractDataModule): + def valency_count(self, max_n_nodes): + valencies = torch.zeros(3 * max_n_nodes - 2) # Max valency possible if everything is connected + + # No bond, single bond, double bond, triple bond, aromatic bond + multiplier = torch.tensor([0, 1, 2, 3, 1.5]) + + for data in self.train_dataloader(): + n = data.x.shape[0] + + for atom in range(n): + edges = data.edge_attr[data.edge_index[0] == atom] + edges_total = edges.sum(dim=0) + valency = (edges_total * multiplier).sum() + valencies[valency.long().item()] += 1 + valencies = valencies / valencies.sum() + return valencies + + +class AbstractDatasetInfos: + def complete_infos(self, n_nodes, node_types): + self.input_dims = None + self.output_dims = None + self.num_classes = len(node_types) + self.max_n_nodes = len(n_nodes) - 1 + self.nodes_dist = DistributionNodes(n_nodes) + + def compute_input_output_dims(self, datamodule=None, extra_features=None, domain_features=None, cfg=None, regressor=False): + """ + Compute the input and output dimensions for the model. + If a datamodule is provided, the function will compute the dimensions based onthe data. + If no datamodule is provided, the function will use the provided cfg to set the dimensions. + + Args: + datamodule: An instance that provides access to the data loaders. [optional] + extra_features: A callable that computes additional features for the input data. [optional] + domain_features: A callable that computes domain-specific features for the input data. [optional] + cfg: A configuration object that contains model information [optional] + regressor: A boolean flag indicating whether a regressor is being used (default False). + + Returns: + None: Set the input_dims and output_dims attributes of the instance. + """ + if datamodule: + example_batch = next(iter(datamodule.train_dataloader())) + ex_dense, node_mask = utils.to_dense(example_batch.x, example_batch.edge_index, example_batch.edge_attr, + example_batch.batch) + example_data = {'X_t': ex_dense.X, 'E_t': ex_dense.E, 'y_t': example_batch['y'], 'node_mask': node_mask} + + self.input_dims = {'X': example_batch['x'].size(1), + 'E': example_batch['edge_attr'].size(1), + 'y': 1} # + 1 due to time conditioning + ex_extra_feat = extra_features(example_data) + self.input_dims['X'] += ex_extra_feat.X.size(-1) + self.input_dims['E'] += ex_extra_feat.E.size(-1) + self.input_dims['y'] += ex_extra_feat.y.size(-1) + + ex_extra_molecular_feat = domain_features(example_data) + self.input_dims['X'] += ex_extra_molecular_feat.X.size(-1) + self.input_dims['E'] += ex_extra_molecular_feat.E.size(-1) + self.input_dims['y'] += ex_extra_molecular_feat.y.size(-1) + + self.output_dims = {'X': example_batch['x'].size(1), + 'E': example_batch['edge_attr'].size(1), + 'y': 0} + else: + if cfg==None: + raise ValueError('Datamodule is None, please provide cfg') + self.input_dims = cfg.model.input_dims + self.output_dims = cfg.model.output_dims \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/datasets/qm9_dataset.py b/openfl-tutorials/experimental/DiGress/digress/datasets/qm9_dataset.py new file mode 100644 index 0000000000..4d6ea92e50 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/datasets/qm9_dataset.py @@ -0,0 +1,415 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import hashlib +import os +import os.path as osp +import pathlib +from typing import Any, Sequence + +import torch +import torch.nn.functional as F +from rdkit import Chem, RDLogger +from rdkit.Chem.rdchem import BondType as BT +from tqdm import tqdm +import numpy as np +import pandas as pd +from torch_geometric.data import Data, InMemoryDataset, download_url, extract_zip +from torch_geometric.utils import subgraph + +import digress.utils as utils +from digress.datasets.abstract_dataset import MolecularDataModule, AbstractDatasetInfos +from digress.analysis.rdkit_functions import mol2smiles, build_molecule_with_partial_charges, compute_molecular_metrics + + +HAR2EV = 27.211386246 +KCALMOL2EV = 0.04336414 + +conversion = torch.tensor([1., 1., HAR2EV, HAR2EV, HAR2EV, 1., HAR2EV, HAR2EV, HAR2EV, HAR2EV, HAR2EV, + 1., KCALMOL2EV, KCALMOL2EV, KCALMOL2EV, KCALMOL2EV, 1., 1., 1.]) + + +def calculate_file_hash(file_path, hash_type='sha384'): + """Calculate the hash of a file.""" + hash_obj = hashlib.new(hash_type) + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + hash_obj.update(chunk) + return hash_obj.hexdigest() + + +def verify_file_hash(file_path, expected_hash, hash_type='sha384'): + """Verify the hash of a file.""" + calculated_hash = calculate_file_hash(file_path, hash_type) + if calculated_hash != expected_hash: + raise ValueError(f'Hash mismatch: {calculated_hash} != {expected_hash}') + + +def files_exist(files) -> bool: + # NOTE: We return `False` in case `files` is empty, leading to a + # re-processing of files on every instantiation. + return len(files) != 0 and all([osp.exists(f) for f in files]) + + +def to_list(value: Any) -> Sequence: + if isinstance(value, Sequence) and not isinstance(value, str): + return value + else: + return [value] + + +class RemoveYTransform: + def __call__(self, data): + data.y = torch.zeros((1, 0), dtype=torch.float) + return data + + +class SelectMuTransform: + def __call__(self, data): + data.y = data.y[..., :1] + return data + + +class SelectHOMOTransform: + def __call__(self, data): + data.y = data.y[..., 1:] + return data + + +class QM9Dataset(InMemoryDataset): + URLS =['https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/molnet_publish/qm9.zip', + 'https://ndownloader.figshare.com/files/3195404'] + EXPECTED_HASHES = { + 'qm9.zip': '1b2f3a9bee4e8f10d8dda1696cc6f1512b3970066efa995ac9a7049ab0dcdf0ea46787e6cadafe131da8ea46b42857af', + 'uncharacterized.txt': '5bb2f845068ce15c5b4a3cbb9ac1be1ba8eb8022c825c2c65e3f5eb0347dc38cb3e06fd7dae0115c3161e063a215d61b' + } + + def __init__(self, stage, root, remove_h: bool, transform=None, pre_transform=None, pre_filter=None): + """ stage: train, val, test + root: data directory + remove_h: remove hydrogens + target_prop: property to predict (for guidance only). + """ + self.stage = stage + if self.stage == 'train': + self.file_idx = 0 + elif self.stage == 'val': + self.file_idx = 1 + else: + self.file_idx = 2 + self.remove_h = remove_h + super().__init__(root, transform, pre_transform, pre_filter) + self.data, self.slices = torch.load(self.processed_paths[self.file_idx]) + + @property + def raw_file_names(self): + return ['gdb9.sdf', 'gdb9.sdf.csv', 'uncharacterized.txt'] + + @property + def split_file_name(self): + return ['train.csv', 'val.csv', 'test.csv'] + + @property + def split_paths(self): + r"""The absolute filepaths that must be present in order to skip + splitting.""" + files = to_list(self.split_file_name) + return [osp.join(self.raw_dir, f) for f in files] + + @property + def processed_file_names(self): + if self.remove_h: + return ['proc_tr_no_h.pt', 'proc_val_no_h.pt', 'proc_test_no_h.pt'] + else: + return ['proc_tr_h.pt', 'proc_val_h.pt', 'proc_test_h.pt'] + + def download(self): + """ + Download raw qm9 files. Taken from PyG QM9 class + """ + import rdkit # noqa + file_path = download_url(self.URLS[0], self.raw_dir) + verify_file_hash(file_path, self.EXPECTED_HASHES['qm9.zip']) + extract_zip(file_path, self.raw_dir) + os.unlink(file_path) + + file_path = download_url(self.URLS[1], self.raw_dir) + os.rename(osp.join(self.raw_dir, '3195404'), + osp.join(self.raw_dir, 'uncharacterized.txt')) + verify_file_hash(osp.join(self.raw_dir, 'uncharacterized.txt'), self.EXPECTED_HASHES['uncharacterized.txt']) + + if files_exist(self.split_paths): + return + + dataset = pd.read_csv(self.raw_paths[1]) + + n_samples = len(dataset) + n_train = 100000 + n_test = int(0.1 * n_samples) + n_val = n_samples - (n_train + n_test) + + # Shuffle dataset with df.sample, then split + train, val, test = np.split(dataset.sample(frac=1, random_state=42), [n_train, n_val + n_train]) + + train.to_csv(os.path.join(self.raw_dir, 'train.csv')) + val.to_csv(os.path.join(self.raw_dir, 'val.csv')) + test.to_csv(os.path.join(self.raw_dir, 'test.csv')) + + def process(self): + RDLogger.DisableLog('rdApp.*') + + types = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4} + bonds = {BT.SINGLE: 0, BT.DOUBLE: 1, BT.TRIPLE: 2, BT.AROMATIC: 3} + + target_df = pd.read_csv(self.split_paths[self.file_idx], index_col=0) + target_df.drop(columns=['mol_id'], inplace=True) + target = torch.tensor(target_df.values, dtype=torch.float) + target = torch.cat([target[:, 3:], target[:, :3]], dim=-1) + target = target * conversion.view(1, -1) + + with open(self.raw_paths[-1], 'r') as f: + skip = [int(x.split()[0]) - 1 for x in f.read().split('\n')[9:-2]] + + suppl = Chem.SDMolSupplier(self.raw_paths[0], removeHs=False, sanitize=False) + + data_list = [] + for i, mol in enumerate(tqdm(suppl)): + if i in skip or i not in target_df.index: + continue + + N = mol.GetNumAtoms() + + type_idx = [] + for atom in mol.GetAtoms(): + type_idx.append(types[atom.GetSymbol()]) + + row, col, edge_type = [], [], [] + for bond in mol.GetBonds(): + start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() + row += [start, end] + col += [end, start] + edge_type += 2 * [bonds[bond.GetBondType()] + 1] + + edge_index = torch.tensor([row, col], dtype=torch.long) + edge_type = torch.tensor(edge_type, dtype=torch.long) + edge_attr = F.one_hot(edge_type, num_classes=len(bonds)+1).to(torch.float) + + perm = (edge_index[0] * N + edge_index[1]).argsort() + edge_index = edge_index[:, perm] + edge_attr = edge_attr[perm] + + x = F.one_hot(torch.tensor(type_idx), num_classes=len(types)).float() + + y = target[target_df.index.get_loc(i)].unsqueeze(0) + y = torch.hstack((y[..., :1], y[..., 2:3])) # mu, homo + + if self.remove_h: + type_idx = torch.Tensor(type_idx).long() + to_keep = type_idx > 0 + edge_index, edge_attr = subgraph(to_keep, edge_index, edge_attr, relabel_nodes=True, + num_nodes=len(to_keep)) + x = x[to_keep] + # Shift onehot encoding to match atom decoder + x = x[:, 1:] + + data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, idx=i) + + if self.pre_filter is not None and not self.pre_filter(data): + continue + if self.pre_transform is not None: + data = self.pre_transform(data) + + data_list.append(data) + + torch.save(self.collate(data_list), self.processed_paths[self.file_idx]) + + +class QM9DataModule(MolecularDataModule): + def __init__(self, cfg, shard=None, split="random", regressor: bool = False): + self.datadir = cfg.dataset.datadir + self.remove_h = cfg.dataset.remove_h + self.regressor = regressor + self.shard = shard + self.split = split + + target = getattr(cfg.general, 'guidance_target', None) + if self.regressor and target == 'mu': + transform = SelectMuTransform() + elif self.regressor and target == 'homo': + transform = SelectHOMOTransform() + elif self.regressor and target == 'both': + transform = None + else: + transform = RemoveYTransform() + + datasets = {'train': QM9Dataset(stage='train', root=self.datadir , remove_h=self.remove_h, + transform=transform if self.regressor else RemoveYTransform()), + 'val': QM9Dataset(stage='val', root=self.datadir , remove_h=self.remove_h, + transform=transform if self.regressor else RemoveYTransform()), + 'test': QM9Dataset(stage='test', root=self.datadir , remove_h=self.remove_h, + transform=transform)} + + if self.shard: + if self.split=="random": + if self.shard == 1: + datasets = {'train': datasets['train'][:len(datasets['train'])//2], + 'val': datasets['val'][:len(datasets['val'])//2], + 'test': datasets['test']} + elif self.shard == 2: + datasets = {'train': datasets['train'][len(datasets['train'])//2:], + 'val': datasets['val'][len(datasets['val'])//2:], + 'test': datasets['test']} + + else: + raise ValueError("Max. collabs = 2") + else: + raise ValueError("Only randomized split currently supported") + + super().__init__(cfg, datasets) + + +class QM9infos(AbstractDatasetInfos): + def __init__(self, datamodule, cfg, recompute_statistics=False): + self.remove_h = cfg.dataset.remove_h + self.need_to_strip = False # to indicate whether we need to ignore one output from the model + + self.name = 'qm9' + if self.remove_h: + self.atom_encoder = {'C': 0, 'N': 1, 'O': 2, 'F': 3} + self.atom_decoder = ['C', 'N', 'O', 'F'] + self.num_atom_types = 4 + self.valencies = [4, 3, 2, 1] + self.atom_weights = {0: 12, 1: 14, 2: 16, 3: 19} + self.max_n_nodes = 9 + self.max_weight = 150 + self.n_nodes = torch.tensor([0, 2.2930e-05, 3.8217e-05, 6.8791e-05, 2.3695e-04, 9.7072e-04, + 0.0046472, 0.023985, 0.13666, 0.83337]) + self.node_types = torch.tensor([0.7230, 0.1151, 0.1593, 0.0026]) + self.edge_types = torch.tensor([0.7261, 0.2384, 0.0274, 0.0081, 0.0]) + + super().complete_infos(n_nodes=self.n_nodes, node_types=self.node_types) + self.valency_distribution = torch.zeros(3 * self.max_n_nodes - 2) + self.valency_distribution[0: 6] = torch.tensor([2.6071e-06, 0.163, 0.352, 0.320, 0.16313, 0.00073]) + else: + self.atom_encoder = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4} + self.atom_decoder = ['H', 'C', 'N', 'O', 'F'] + self.valencies = [1, 4, 3, 2, 1] + self.num_atom_types = 5 + self.max_n_nodes = 29 + self.max_weight = 390 + self.atom_weights = {0: 1, 1: 12, 2: 14, 3: 16, 4: 19} + self.n_nodes = torch.tensor([0, 0, 0, 1.5287e-05, 3.0574e-05, 3.8217e-05, + 9.1721e-05, 1.5287e-04, 4.9682e-04, 1.3147e-03, 3.6918e-03, 8.0486e-03, + 1.6732e-02, 3.0780e-02, 5.1654e-02, 7.8085e-02, 1.0566e-01, 1.2970e-01, + 1.3332e-01, 1.3870e-01, 9.4802e-02, 1.0063e-01, 3.3845e-02, 4.8628e-02, + 5.4421e-03, 1.4698e-02, 4.5096e-04, 2.7211e-03, 0.0000e+00, 2.6752e-04]) + + self.node_types = torch.tensor([0.5122, 0.3526, 0.0562, 0.0777, 0.0013]) + self.edge_types = torch.tensor([0.88162, 0.11062, 5.9875e-03, 1.7758e-03, 0]) + + super().complete_infos(n_nodes=self.n_nodes, node_types=self.node_types) + self.valency_distribution = torch.zeros(3 * self.max_n_nodes - 2) + self.valency_distribution[0:6] = torch.tensor([0, 0.5136, 0.0840, 0.0554, 0.3456, 0.0012]) + + if recompute_statistics: + np.set_printoptions(suppress=True, precision=5) + self.n_nodes = datamodule.node_counts() + print("Distribution of number of nodes", self.n_nodes) + np.savetxt('n_counts.txt', self.n_nodes.numpy()) + self.node_types = datamodule.node_types() # There are no node types + print("Distribution of node types", self.node_types) + np.savetxt('atom_types.txt', self.node_types.numpy()) + + self.edge_types = datamodule.edge_counts() + print("Distribution of edge types", self.edge_types) + np.savetxt('edge_types.txt', self.edge_types.numpy()) + + valencies = datamodule.valency_count(self.max_n_nodes) + print("Distribution of the valencies", valencies) + np.savetxt('valencies.txt', valencies.numpy()) + self.valency_distribution = valencies + assert False + + +def get_train_smiles(cfg, train_dataloader, dataset_infos, evaluate_dataset=False): + if evaluate_dataset: + assert dataset_infos is not None, "If wanting to evaluate dataset, need to pass dataset_infos" + datadir = cfg.dataset.datadir + remove_h = cfg.dataset.remove_h + atom_decoder = dataset_infos.atom_decoder + smiles_file_name = 'train_smiles_no_h.npy' if remove_h else 'train_smiles_h.npy' + smiles_path = os.path.join(datadir, smiles_file_name) + if os.path.exists(smiles_path): + print("Dataset smiles were found.") + train_smiles = np.load(smiles_path) + else: + print("Computing dataset smiles...") + train_smiles = compute_qm9_smiles(atom_decoder, train_dataloader, remove_h) + np.save(smiles_path, np.array(train_smiles)) + + if evaluate_dataset: + train_dataloader = train_dataloader + all_molecules = [] + for i, data in enumerate(train_dataloader): + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask, collapse=True) + X, E = dense_data.X, dense_data.E + + for k in range(X.size(0)): + n = int(torch.sum((X != -1)[k, :])) + atom_types = X[k, :n].cpu() + edge_types = E[k, :n, :n].cpu() + all_molecules.append([atom_types, edge_types]) + + print("Evaluating the dataset -- number of molecules to evaluate", len(all_molecules)) + metrics = compute_molecular_metrics(molecule_list=all_molecules, train_smiles=train_smiles, + dataset_info=dataset_infos) + print(metrics[0]) + + return train_smiles + + +def compute_qm9_smiles(atom_decoder, train_dataloader, remove_h): + ''' + + :param dataset_name: qm9 or qm9_second_half + :return: + ''' + print(f"\tConverting QM9 dataset to SMILES for remove_h={remove_h}...") + + mols_smiles = [] + len_train = len(train_dataloader) + invalid = 0 + disconnected = 0 + for i, data in enumerate(train_dataloader): + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask, collapse=True) + X, E = dense_data.X, dense_data.E + + n_nodes = [int(torch.sum((X != -1)[j, :])) for j in range(X.size(0))] + + molecule_list = [] + for k in range(X.size(0)): + n = n_nodes[k] + atom_types = X[k, :n].cpu() + edge_types = E[k, :n, :n].cpu() + molecule_list.append([atom_types, edge_types]) + + for l, molecule in enumerate(molecule_list): + mol = build_molecule_with_partial_charges(molecule[0], molecule[1], atom_decoder) + smile = mol2smiles(mol) + if smile is not None: + mols_smiles.append(smile) + mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=True) + if len(mol_frags) > 1: + print(f"Disconnected molecule {len(mol_frags)} fragments") + disconnected += 1 + else: + print("Invalid molecule obtained.") + invalid += 1 + + if i % 1000 == 0: + print("\tConverting QM9 dataset to SMILES {0:.2%}".format(float(i) / len_train)) + print("Number of invalid molecules", invalid) + print("Number of disconnected molecules", disconnected) + return mols_smiles diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion/__init__.py b/openfl-tutorials/experimental/DiGress/digress/diffusion/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion/diffusion_utils.py b/openfl-tutorials/experimental/DiGress/digress/diffusion/diffusion_utils.py new file mode 100644 index 0000000000..df90ecc426 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/diffusion/diffusion_utils.py @@ -0,0 +1,399 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +from torch.nn import functional as F +import numpy as np +import math + +from digress.utils import PlaceHolder + + +def sum_except_batch(x): + return x.reshape(x.size(0), -1).sum(dim=-1) + + +def assert_correctly_masked(variable, node_mask): + assert (variable * (1 - node_mask.long())).abs().max().item() < 1e-4, \ + 'Variables not masked properly.' + + +def sample_gaussian(size): + x = torch.randn(size) + return x + + +def sample_gaussian_with_mask(size, node_mask): + x = torch.randn(size) + x = x.type_as(node_mask.float()) + x_masked = x * node_mask + return x_masked + + +def clip_noise_schedule(alphas2, clip_value=0.001): + """ + For a noise schedule given by alpha^2, this clips alpha_t / alpha_t-1. This may help improve stability during + sampling. + """ + alphas2 = np.concatenate([np.ones(1), alphas2], axis=0) + + alphas_step = (alphas2[1:] / alphas2[:-1]) + + alphas_step = np.clip(alphas_step, a_min=clip_value, a_max=1.) + alphas2 = np.cumprod(alphas_step, axis=0) + + return alphas2 + + +def cosine_beta_schedule(timesteps, s=0.008, raise_to_power: float = 1): + """ + cosine schedule + as proposed in https://openreview.net/forum?id=-NEXDKk8gZ + """ + steps = timesteps + 2 + x = np.linspace(0, steps, steps) + alphas_cumprod = np.cos(((x / steps) + s) / (1 + s) * np.pi * 0.5) ** 2 + alphas_cumprod = alphas_cumprod / alphas_cumprod[0] + betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1]) + betas = np.clip(betas, a_min=0, a_max=0.999) + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + + if raise_to_power != 1: + alphas_cumprod = np.power(alphas_cumprod, raise_to_power) + + return alphas_cumprod + + +def cosine_beta_schedule_discrete(timesteps, s=0.008): + """ Cosine schedule as proposed in https://openreview.net/forum?id=-NEXDKk8gZ. """ + steps = timesteps + 2 + x = np.linspace(0, steps, steps) + + alphas_cumprod = np.cos(0.5 * np.pi * ((x / steps) + s) / (1 + s)) ** 2 + alphas_cumprod = alphas_cumprod / alphas_cumprod[0] + alphas = (alphas_cumprod[1:] / alphas_cumprod[:-1]) + betas = 1 - alphas + return betas.squeeze() + + +def custom_beta_schedule_discrete(timesteps, average_num_nodes=50, s=0.008): + """ Cosine schedule as proposed in https://openreview.net/forum?id=-NEXDKk8gZ. """ + steps = timesteps + 2 + x = np.linspace(0, steps, steps) + + alphas_cumprod = np.cos(0.5 * np.pi * ((x / steps) + s) / (1 + s)) ** 2 + alphas_cumprod = alphas_cumprod / alphas_cumprod[0] + alphas = (alphas_cumprod[1:] / alphas_cumprod[:-1]) + betas = 1 - alphas + + assert timesteps >= 100 + + p = 4 / 5 # 1 - 1 / num_edge_classes + num_edges = average_num_nodes * (average_num_nodes - 1) / 2 + + # First 100 steps: only a few updates per graph + updates_per_graph = 1.2 + beta_first = updates_per_graph / (p * num_edges) + + betas[betas < beta_first] = beta_first + return np.array(betas) + + + +def gaussian_KL(q_mu, q_sigma): + """Computes the KL distance between a normal distribution and the standard normal. + Args: + q_mu: Mean of distribution q. + q_sigma: Standard deviation of distribution q. + p_mu: Mean of distribution p. + p_sigma: Standard deviation of distribution p. + Returns: + The KL distance, summed over all dimensions except the batch dim. + """ + return sum_except_batch((torch.log(1 / q_sigma) + 0.5 * (q_sigma ** 2 + q_mu ** 2) - 0.5)) + + +def cdf_std_gaussian(x): + return 0.5 * (1. + torch.erf(x / math.sqrt(2))) + + +def SNR(gamma): + """Computes signal to noise ratio (alpha^2/sigma^2) given gamma.""" + return torch.exp(-gamma) + + +def inflate_batch_array(array, target_shape): + """ + Inflates the batch array (array) with only a single axis (i.e. shape = (batch_size,), or possibly more empty + axes (i.e. shape (batch_size, 1, ..., 1)) to match the target shape. + """ + target_shape = (array.size(0),) + (1,) * (len(target_shape) - 1) + return array.view(target_shape) + + +def sigma(gamma, target_shape): + """Computes sigma given gamma.""" + return inflate_batch_array(torch.sqrt(torch.sigmoid(gamma)), target_shape) + + +def alpha(gamma, target_shape): + """Computes alpha given gamma.""" + return inflate_batch_array(torch.sqrt(torch.sigmoid(-gamma)), target_shape) + + +def check_mask_correct(variables, node_mask): + for i, variable in enumerate(variables): + if len(variable) > 0: + assert_correctly_masked(variable, node_mask) + + +def check_tensor_same_size(*args): + for i, arg in enumerate(args): + if i == 0: + continue + assert args[0].size() == arg.size() + + +def sigma_and_alpha_t_given_s(gamma_t: torch.Tensor, gamma_s: torch.Tensor, target_size: torch.Size): + """ + Computes sigma t given s, using gamma_t and gamma_s. Used during sampling. + + These are defined as: + alpha t given s = alpha t / alpha s, + sigma t given s = sqrt(1 - (alpha t given s) ^2 ). + """ + sigma2_t_given_s = inflate_batch_array( + -torch.expm1(F.softplus(gamma_s) - F.softplus(gamma_t)), target_size + ) + + # alpha_t_given_s = alpha_t / alpha_s + log_alpha2_t = F.logsigmoid(-gamma_t) + log_alpha2_s = F.logsigmoid(-gamma_s) + log_alpha2_t_given_s = log_alpha2_t - log_alpha2_s + + alpha_t_given_s = torch.exp(0.5 * log_alpha2_t_given_s) + alpha_t_given_s = inflate_batch_array(alpha_t_given_s, target_size) + + sigma_t_given_s = torch.sqrt(sigma2_t_given_s) + + return sigma2_t_given_s, sigma_t_given_s, alpha_t_given_s + + +def reverse_tensor(x): + return x[torch.arange(x.size(0) - 1, -1, -1)] + + +def sample_feature_noise(X_size, E_size, y_size, node_mask): + """Standard normal noise for all features. + Output size: X.size(), E.size(), y.size() """ + # TODO: How to change this for the multi-gpu case? + epsX = sample_gaussian(X_size) + epsE = sample_gaussian(E_size) + epsy = sample_gaussian(y_size) + + float_mask = node_mask.float() + epsX = epsX.type_as(float_mask) + epsE = epsE.type_as(float_mask) + epsy = epsy.type_as(float_mask) + + # Get upper triangular part of edge noise, without main diagonal + upper_triangular_mask = torch.zeros_like(epsE) + indices = torch.triu_indices(row=epsE.size(1), col=epsE.size(2), offset=1) + upper_triangular_mask[:, indices[0], indices[1], :] = 1 + + epsE = epsE * upper_triangular_mask + epsE = (epsE + torch.transpose(epsE, 1, 2)) + + assert (epsE == torch.transpose(epsE, 1, 2)).all() + + return PlaceHolder(X=epsX, E=epsE, y=epsy).mask(node_mask) + + +def sample_normal(mu_X, mu_E, mu_y, sigma, node_mask): + """Samples from a Normal distribution.""" + # TODO: change for multi-gpu case + eps = sample_feature_noise(mu_X.size(), mu_E.size(), mu_y.size(), node_mask).type_as(mu_X) + X = mu_X + sigma * eps.X + E = mu_E + sigma.unsqueeze(1) * eps.E + y = mu_y + sigma.squeeze(1) * eps.y + return PlaceHolder(X=X, E=E, y=y) + + +def check_issues_norm_values(gamma, norm_val1, norm_val2, num_stdevs=8): + """ Check if 1 / norm_value is still larger than 10 * standard deviation. """ + zeros = torch.zeros((1, 1)) + gamma_0 = gamma(zeros) + sigma_0 = sigma(gamma_0, target_shape=zeros.size()).item() + max_norm_value = max(norm_val1, norm_val2) + if sigma_0 * num_stdevs > 1. / max_norm_value: + raise ValueError( + f'Value for normalization value {max_norm_value} probably too ' + f'large with sigma_0 {sigma_0:.5f} and ' + f'1 / norm_value = {1. / max_norm_value}') + + +def sample_discrete_features(probX, probE, node_mask): + ''' Sample features from multinomial distribution with given probabilities (probX, probE, proby) + :param probX: bs, n, dx_out node features + :param probE: bs, n, n, de_out edge features + :param proby: bs, dy_out global features. + ''' + bs, n, _ = probX.shape + # Noise X + # The masked rows should define probability distributions as well + probX[~node_mask] = 1 / probX.shape[-1] + + # Flatten the probability tensor to sample with multinomial + probX = probX.reshape(bs * n, -1) # (bs * n, dx_out) + + # Sample X + X_t = probX.multinomial(1) # (bs * n, 1) + X_t = X_t.reshape(bs, n) # (bs, n) + + # Noise E + # The masked rows should define probability distributions as well + inverse_edge_mask = ~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2)) + diag_mask = torch.eye(n).unsqueeze(0).expand(bs, -1, -1) + + probE[inverse_edge_mask] = 1 / probE.shape[-1] + probE[diag_mask.bool()] = 1 / probE.shape[-1] + + probE = probE.reshape(bs * n * n, -1) # (bs * n * n, de_out) + + # Sample E + E_t = probE.multinomial(1).reshape(bs, n, n) # (bs, n, n) + E_t = torch.triu(E_t, diagonal=1) + E_t = (E_t + torch.transpose(E_t, 1, 2)) + + return PlaceHolder(X=X_t, E=E_t, y=torch.zeros(bs, 0).type_as(X_t)) + + +def compute_posterior_distribution(M, M_t, Qt_M, Qsb_M, Qtb_M): + ''' M: X or E + Compute xt @ Qt.T * x0 @ Qsb / x0 @ Qtb @ xt.T + ''' + # Flatten feature tensors + M = M.flatten(start_dim=1, end_dim=-2).to(torch.float32) # (bs, N, d) with N = n or n * n + M_t = M_t.flatten(start_dim=1, end_dim=-2).to(torch.float32) # same + + Qt_M_T = torch.transpose(Qt_M, -2, -1) # (bs, d, d) + + left_term = M_t @ Qt_M_T # (bs, N, d) + right_term = M @ Qsb_M # (bs, N, d) + product = left_term * right_term # (bs, N, d) + + denom = M @ Qtb_M # (bs, N, d) @ (bs, d, d) = (bs, N, d) + denom = (denom * M_t).sum(dim=-1) # (bs, N, d) * (bs, N, d) + sum = (bs, N) + # denom = product.sum(dim=-1) + # denom[denom == 0.] = 1 + + prob = product / denom.unsqueeze(-1) # (bs, N, d) + + return prob + + +def compute_batched_over0_posterior_distribution(X_t, Qt, Qsb, Qtb): + """ M: X or E + Compute xt @ Qt.T * x0 @ Qsb / x0 @ Qtb @ xt.T for each possible value of x0 + X_t: bs, n, dt or bs, n, n, dt + Qt: bs, d_t-1, dt + Qsb: bs, d0, d_t-1 + Qtb: bs, d0, dt. + """ + # Flatten feature tensors + # Careful with this line. It does nothing if X is a node feature. If X is an edge features it maps to + # bs x (n ** 2) x d + X_t = X_t.flatten(start_dim=1, end_dim=-2).to(torch.float32) # bs x N x dt + + Qt_T = Qt.transpose(-1, -2) # bs, dt, d_t-1 + left_term = X_t @ Qt_T # bs, N, d_t-1 + left_term = left_term.unsqueeze(dim=2) # bs, N, 1, d_t-1 + + right_term = Qsb.unsqueeze(1) # bs, 1, d0, d_t-1 + numerator = left_term * right_term # bs, N, d0, d_t-1 + + X_t_transposed = X_t.transpose(-1, -2) # bs, dt, N + + prod = Qtb @ X_t_transposed # bs, d0, N + prod = prod.transpose(-1, -2) # bs, N, d0 + denominator = prod.unsqueeze(-1) # bs, N, d0, 1 + denominator[denominator == 0] = 1e-6 + + out = numerator / denominator + return out + + +def mask_distributions(true_X, true_E, pred_X, pred_E, node_mask): + """ + Set masked rows to arbitrary distributions, so it doesn't contribute to loss + :param true_X: bs, n, dx_out + :param true_E: bs, n, n, de_out + :param pred_X: bs, n, dx_out + :param pred_E: bs, n, n, de_out + :param node_mask: bs, n + :return: same sizes as input + """ + + row_X = torch.zeros(true_X.size(-1), dtype=true_X.dtype, device=true_X.device) + row_X[0] = 1. + row_E = torch.zeros(true_E.size(-1), dtype=true_E.dtype, device=true_E.device) + row_E[0] = 1. + + diag_mask = ~torch.eye(node_mask.size(1), device=node_mask.device, dtype=torch.bool).unsqueeze(0) + true_X[~node_mask] = row_X + pred_X[~node_mask] = row_X + true_E[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2) * diag_mask), :] = row_E + pred_E[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2) * diag_mask), :] = row_E + + true_X = true_X + 1e-7 + pred_X = pred_X + 1e-7 + true_E = true_E + 1e-7 + pred_E = pred_E + 1e-7 + + true_X = true_X / torch.sum(true_X, dim=-1, keepdim=True) + pred_X = pred_X / torch.sum(pred_X, dim=-1, keepdim=True) + true_E = true_E / torch.sum(true_E, dim=-1, keepdim=True) + pred_E = pred_E / torch.sum(pred_E, dim=-1, keepdim=True) + + return true_X, true_E, pred_X, pred_E + + +def posterior_distributions(X, E, y, X_t, E_t, y_t, Qt, Qsb, Qtb): + prob_X = compute_posterior_distribution(M=X, M_t=X_t, Qt_M=Qt.X, Qsb_M=Qsb.X, Qtb_M=Qtb.X) # (bs, n, dx) + prob_E = compute_posterior_distribution(M=E, M_t=E_t, Qt_M=Qt.E, Qsb_M=Qsb.E, Qtb_M=Qtb.E) # (bs, n * n, de) + + return PlaceHolder(X=prob_X, E=prob_E, y=y_t) + + +def sample_discrete_feature_noise(limit_dist, node_mask): + """ Sample from the limit distribution of the diffusion process""" + bs, n_max = node_mask.shape + x_limit = limit_dist.X[None, None, :].expand(bs, n_max, -1) + e_limit = limit_dist.E[None, None, None, :].expand(bs, n_max, n_max, -1) + y_limit = limit_dist.y[None, :].expand(bs, -1) + U_X = x_limit.flatten(end_dim=-2).multinomial(1).reshape(bs, n_max) + U_E = e_limit.flatten(end_dim=-2).multinomial(1).reshape(bs, n_max, n_max) + U_y = torch.empty((bs, 0)) + + long_mask = node_mask.long() + U_X = U_X.type_as(long_mask) + U_E = U_E.type_as(long_mask) + U_y = U_y.type_as(long_mask) + + U_X = F.one_hot(U_X, num_classes=x_limit.shape[-1]).float() + U_E = F.one_hot(U_E, num_classes=e_limit.shape[-1]).float() + + # Get upper triangular part of edge noise, without main diagonal + upper_triangular_mask = torch.zeros_like(U_E) + indices = torch.triu_indices(row=U_E.size(1), col=U_E.size(2), offset=1) + upper_triangular_mask[:, indices[0], indices[1], :] = 1 + + U_E = U_E * upper_triangular_mask + U_E = (U_E + torch.transpose(U_E, 1, 2)) + + assert (U_E == torch.transpose(U_E, 1, 2)).all() + + return PlaceHolder(X=U_X, E=U_E, y=U_y).mask(node_mask) + + diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion/distributions.py b/openfl-tutorials/experimental/DiGress/digress/diffusion/distributions.py new file mode 100644 index 0000000000..9db327eccf --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/diffusion/distributions.py @@ -0,0 +1,34 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch + + +class DistributionNodes: + def __init__(self, histogram): + """ Compute the distribution of the number of nodes in the dataset, and sample from this distribution. + historgram: dict. The keys are num_nodes, the values are counts + """ + + if type(histogram) == dict: + max_n_nodes = max(histogram.keys()) + prob = torch.zeros(max_n_nodes + 1) + for num_nodes, count in histogram.items(): + prob[num_nodes] = count + else: + prob = histogram + + self.prob = prob / prob.sum() + self.m = torch.distributions.Categorical(prob) + + def sample_n(self, n_samples, device): + idx = self.m.sample((n_samples,)) + return idx.to(device) + + def log_prob(self, batch_n_nodes): + assert len(batch_n_nodes.size()) == 1 + p = self.prob.to(batch_n_nodes.device) + + probas = p[batch_n_nodes] + log_p = torch.log(probas + 1e-30) + return log_p diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features.py b/openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features.py new file mode 100644 index 0000000000..ad069c52ea --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features.py @@ -0,0 +1,278 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +from digress import utils + + +class DummyExtraFeatures: + def __init__(self): + """ This class does not compute anything, just returns empty tensors.""" + + def __call__(self, noisy_data): + X = noisy_data['X_t'] + E = noisy_data['E_t'] + y = noisy_data['y_t'] + empty_x = X.new_zeros((*X.shape[:-1], 0)) + empty_e = E.new_zeros((*E.shape[:-1], 0)) + empty_y = y.new_zeros((y.shape[0], 0)) + return utils.PlaceHolder(X=empty_x, E=empty_e, y=empty_y) + + +class ExtraFeatures: + def __init__(self, extra_features_type, dataset_info): + self.max_n_nodes = dataset_info.max_n_nodes + self.ncycles = NodeCycleFeatures() + self.features_type = extra_features_type + if extra_features_type in ['eigenvalues', 'all']: + self.eigenfeatures = EigenFeatures(mode=extra_features_type) + + def __call__(self, noisy_data): + n = noisy_data['node_mask'].sum(dim=1).unsqueeze(1) / self.max_n_nodes + x_cycles, y_cycles = self.ncycles(noisy_data) # (bs, n_cycles) + + if self.features_type == 'cycles': + E = noisy_data['E_t'] + extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E) + return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles))) + + elif self.features_type == 'eigenvalues': + eigenfeatures = self.eigenfeatures(noisy_data) + E = noisy_data['E_t'] + extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E) + n_components, batched_eigenvalues = eigenfeatures # (bs, 1), (bs, 10) + return utils.PlaceHolder(X=x_cycles, E=extra_edge_attr, y=torch.hstack((n, y_cycles, n_components, + batched_eigenvalues))) + elif self.features_type == 'all': + eigenfeatures = self.eigenfeatures(noisy_data) + E = noisy_data['E_t'] + extra_edge_attr = torch.zeros((*E.shape[:-1], 0)).type_as(E) + n_components, batched_eigenvalues, nonlcc_indicator, k_lowest_eigvec = eigenfeatures # (bs, 1), (bs, 10), + # (bs, n, 1), (bs, n, 2) + + return utils.PlaceHolder(X=torch.cat((x_cycles, nonlcc_indicator, k_lowest_eigvec), dim=-1), + E=extra_edge_attr, + y=torch.hstack((n, y_cycles, n_components, batched_eigenvalues))) + else: + raise ValueError(f"Features type {self.features_type} not implemented") + + +class NodeCycleFeatures: + def __init__(self): + self.kcycles = KNodeCycles() + + def __call__(self, noisy_data): + adj_matrix = noisy_data['E_t'][..., 1:].sum(dim=-1).float() + + x_cycles, y_cycles = self.kcycles.k_cycles(adj_matrix=adj_matrix) # (bs, n_cycles) + x_cycles = x_cycles.type_as(adj_matrix) * noisy_data['node_mask'].unsqueeze(-1) + # Avoid large values when the graph is dense + x_cycles = x_cycles / 10 + y_cycles = y_cycles / 10 + x_cycles[x_cycles > 1] = 1 + y_cycles[y_cycles > 1] = 1 + return x_cycles, y_cycles + + +class EigenFeatures: + """ + Code taken from : https://github.com/Saro00/DGN/blob/master/models/pytorch/eigen_agg.py + """ + def __init__(self, mode): + """ mode: 'eigenvalues' or 'all' """ + self.mode = mode + + def __call__(self, noisy_data): + E_t = noisy_data['E_t'] + mask = noisy_data['node_mask'] + A = E_t[..., 1:].sum(dim=-1).float() * mask.unsqueeze(1) * mask.unsqueeze(2) + L = compute_laplacian(A, normalize=False) + mask_diag = 2 * L.shape[-1] * torch.eye(A.shape[-1]).type_as(L).unsqueeze(0) + mask_diag = mask_diag * (~mask.unsqueeze(1)) * (~mask.unsqueeze(2)) + L = L * mask.unsqueeze(1) * mask.unsqueeze(2) + mask_diag + + if self.mode == 'eigenvalues': + eigvals = torch.linalg.eigvalsh(L) # bs, n + eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True) + + n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals) + return n_connected_comp.type_as(A), batch_eigenvalues.type_as(A) + + elif self.mode == 'all': + eigvals, eigvectors = torch.linalg.eigh(L) + eigvals = eigvals.type_as(A) / torch.sum(mask, dim=1, keepdim=True) + eigvectors = eigvectors * mask.unsqueeze(2) * mask.unsqueeze(1) + # Retrieve eigenvalues features + n_connected_comp, batch_eigenvalues = get_eigenvalues_features(eigenvalues=eigvals) + + # Retrieve eigenvectors features + nonlcc_indicator, k_lowest_eigenvector = get_eigenvectors_features(vectors=eigvectors, + node_mask=noisy_data['node_mask'], + n_connected=n_connected_comp) + return n_connected_comp, batch_eigenvalues, nonlcc_indicator, k_lowest_eigenvector + else: + raise NotImplementedError(f"Mode {self.mode} is not implemented") + + +def compute_laplacian(adjacency, normalize: bool): + """ + adjacency : batched adjacency matrix (bs, n, n) + normalize: can be None, 'sym' or 'rw' for the combinatorial, symmetric normalized or random walk Laplacians + Return: + L (n x n ndarray): combinatorial or symmetric normalized Laplacian. + """ + diag = torch.sum(adjacency, dim=-1) # (bs, n) + n = diag.shape[-1] + D = torch.diag_embed(diag) # Degree matrix # (bs, n, n) + combinatorial = D - adjacency # (bs, n, n) + + if not normalize: + return (combinatorial + combinatorial.transpose(1, 2)) / 2 + + diag0 = diag.clone() + diag[diag == 0] = 1e-12 + + diag_norm = 1 / torch.sqrt(diag) # (bs, n) + D_norm = torch.diag_embed(diag_norm) # (bs, n, n) + L = torch.eye(n).unsqueeze(0) - D_norm @ adjacency @ D_norm + L[diag0 == 0] = 0 + return (L + L.transpose(1, 2)) / 2 + + +def get_eigenvalues_features(eigenvalues, k=5): + """ + values : eigenvalues -- (bs, n) + node_mask: (bs, n) + k: num of non zero eigenvalues to keep + """ + ev = eigenvalues + bs, n = ev.shape + n_connected_components = (ev < 1e-5).sum(dim=-1) + assert (n_connected_components > 0).all(), (n_connected_components, ev) + + to_extend = max(n_connected_components) + k - n + if to_extend > 0: + eigenvalues = torch.hstack((eigenvalues, 2 * torch.ones(bs, to_extend).type_as(eigenvalues))) + indices = torch.arange(k).type_as(eigenvalues).long().unsqueeze(0) + n_connected_components.unsqueeze(1) + first_k_ev = torch.gather(eigenvalues, dim=1, index=indices) + return n_connected_components.unsqueeze(-1), first_k_ev + + +def get_eigenvectors_features(vectors, node_mask, n_connected, k=2): + """ + vectors (bs, n, n) : eigenvectors of Laplacian IN COLUMNS + returns: + not_lcc_indicator : indicator vectors of largest connected component (lcc) for each graph -- (bs, n, 1) + k_lowest_eigvec : k first eigenvectors for the largest connected component -- (bs, n, k) + """ + bs, n = vectors.size(0), vectors.size(1) + + # Create an indicator for the nodes outside the largest connected components + first_ev = torch.round(vectors[:, :, 0], decimals=3) * node_mask # bs, n + # Add random value to the mask to prevent 0 from becoming the mode + random = torch.randn(bs, n, device=node_mask.device) * (~node_mask) # bs, n + first_ev = first_ev + random + most_common = torch.mode(first_ev, dim=1).values # values: bs -- indices: bs + mask = ~ (first_ev == most_common.unsqueeze(1)) + not_lcc_indicator = (mask * node_mask).unsqueeze(-1).float() + + # Get the eigenvectors corresponding to the first nonzero eigenvalues + to_extend = max(n_connected) + k - n + if to_extend > 0: + vectors = torch.cat((vectors, torch.zeros(bs, n, to_extend).type_as(vectors)), dim=2) # bs, n , n + to_extend + indices = torch.arange(k).type_as(vectors).long().unsqueeze(0).unsqueeze(0) + n_connected.unsqueeze(2) # bs, 1, k + indices = indices.expand(-1, n, -1) # bs, n, k + first_k_ev = torch.gather(vectors, dim=2, index=indices) # bs, n, k + first_k_ev = first_k_ev * node_mask.unsqueeze(2) + + return not_lcc_indicator, first_k_ev + +def batch_trace(X): + """ + Expect a matrix of shape B N N, returns the trace in shape B + :param X: + :return: + """ + diag = torch.diagonal(X, dim1=-2, dim2=-1) + trace = diag.sum(dim=-1) + return trace + + +def batch_diagonal(X): + """ + Extracts the diagonal from the last two dims of a tensor + :param X: + :return: + """ + return torch.diagonal(X, dim1=-2, dim2=-1) + + +class KNodeCycles: + """ Builds cycle counts for each node in a graph. + """ + + def __init__(self): + super().__init__() + + def calculate_kpowers(self): + self.k1_matrix = self.adj_matrix.float() + self.d = self.adj_matrix.sum(dim=-1) + self.k2_matrix = self.k1_matrix @ self.adj_matrix.float() + self.k3_matrix = self.k2_matrix @ self.adj_matrix.float() + self.k4_matrix = self.k3_matrix @ self.adj_matrix.float() + self.k5_matrix = self.k4_matrix @ self.adj_matrix.float() + self.k6_matrix = self.k5_matrix @ self.adj_matrix.float() + + def k3_cycle(self): + """ tr(A ** 3). """ + c3 = batch_diagonal(self.k3_matrix) + return (c3 / 2).unsqueeze(-1).float(), (torch.sum(c3, dim=-1) / 6).unsqueeze(-1).float() + + def k4_cycle(self): + diag_a4 = batch_diagonal(self.k4_matrix) + c4 = diag_a4 - self.d * (self.d - 1) - (self.adj_matrix @ self.d.unsqueeze(-1)).sum(dim=-1) + return (c4 / 2).unsqueeze(-1).float(), (torch.sum(c4, dim=-1) / 8).unsqueeze(-1).float() + + def k5_cycle(self): + diag_a5 = batch_diagonal(self.k5_matrix) + triangles = batch_diagonal(self.k3_matrix) + c5 = diag_a5 - 2 * triangles * self.d - (self.adj_matrix @ triangles.unsqueeze(-1)).sum(dim=-1) + triangles + return (c5 / 2).unsqueeze(-1).float(), (c5.sum(dim=-1) / 10).unsqueeze(-1).float() + + def k6_cycle(self): + term_1_t = batch_trace(self.k6_matrix) + term_2_t = batch_trace(self.k3_matrix ** 2) + term3_t = torch.sum(self.adj_matrix * self.k2_matrix.pow(2), dim=[-2, -1]) + d_t4 = batch_diagonal(self.k2_matrix) + a_4_t = batch_diagonal(self.k4_matrix) + term_4_t = (d_t4 * a_4_t).sum(dim=-1) + term_5_t = batch_trace(self.k4_matrix) + term_6_t = batch_trace(self.k3_matrix) + term_7_t = batch_diagonal(self.k2_matrix).pow(3).sum(-1) + term8_t = torch.sum(self.k3_matrix, dim=[-2, -1]) + term9_t = batch_diagonal(self.k2_matrix).pow(2).sum(-1) + term10_t = batch_trace(self.k2_matrix) + + c6_t = (term_1_t - 3 * term_2_t + 9 * term3_t - 6 * term_4_t + 6 * term_5_t - 4 * term_6_t + 4 * term_7_t + + 3 * term8_t - 12 * term9_t + 4 * term10_t) + return None, (c6_t / 12).unsqueeze(-1).float() + + def k_cycles(self, adj_matrix, verbose=False): + self.adj_matrix = adj_matrix + self.calculate_kpowers() + + k3x, k3y = self.k3_cycle() + # assert (k3x >= -0.1).all() + + k4x, k4y = self.k4_cycle() + # assert (k4x >= -0.1).all() + + k5x, k5y = self.k5_cycle() + # assert (k5x >= -0.1).all(), k5x + + _, k6y = self.k6_cycle() + # assert (k6y >= -0.1).all() + + kcyclesx = torch.cat([k3x, k4x, k5x], dim=-1) + kcyclesy = torch.cat([k3y, k4y, k5y, k6y], dim=-1) + return kcyclesx, kcyclesy \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features_molecular.py b/openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features_molecular.py new file mode 100644 index 0000000000..be2b30431a --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/diffusion/extra_features_molecular.py @@ -0,0 +1,60 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +from digress import utils + + +class ExtraMolecularFeatures: + def __init__(self, dataset_infos): + self.charge = ChargeFeature(remove_h=dataset_infos.remove_h, valencies=dataset_infos.valencies) + self.valency = ValencyFeature() + self.weight = WeightFeature(max_weight=dataset_infos.max_weight, atom_weights=dataset_infos.atom_weights) + + def __call__(self, noisy_data): + charge = self.charge(noisy_data).unsqueeze(-1) # (bs, n, 1) + valency = self.valency(noisy_data).unsqueeze(-1) # (bs, n, 1) + weight = self.weight(noisy_data) # (bs, 1) + + extra_edge_attr = torch.zeros((*noisy_data['E_t'].shape[:-1], 0)).type_as(noisy_data['E_t']) + + return utils.PlaceHolder(X=torch.cat((charge, valency), dim=-1), E=extra_edge_attr, y=weight) + + +class ChargeFeature: + def __init__(self, remove_h, valencies): + self.remove_h = remove_h + self.valencies = valencies + + def __call__(self, noisy_data): + bond_orders = torch.tensor([0, 1, 2, 3, 1.5], device=noisy_data['E_t'].device).reshape(1, 1, 1, -1) + weighted_E = noisy_data['E_t'] * bond_orders # (bs, n, n, de) + current_valencies = weighted_E.argmax(dim=-1).sum(dim=-1) # (bs, n) + + valencies = torch.tensor(self.valencies, device=noisy_data['X_t'].device).reshape(1, 1, -1) + X = noisy_data['X_t'] * valencies # (bs, n, dx) + normal_valencies = torch.argmax(X, dim=-1) # (bs, n) + + return (normal_valencies - current_valencies).type_as(noisy_data['X_t']) + + +class ValencyFeature: + def __init__(self): + pass + + def __call__(self, noisy_data): + orders = torch.tensor([0, 1, 2, 3, 1.5], device=noisy_data['E_t'].device).reshape(1, 1, 1, -1) + E = noisy_data['E_t'] * orders # (bs, n, n, de) + valencies = E.argmax(dim=-1).sum(dim=-1) # (bs, n) + return valencies.type_as(noisy_data['X_t']) + + +class WeightFeature: + def __init__(self, max_weight, atom_weights): + self.max_weight = max_weight + self.atom_weight_list = torch.tensor(list(atom_weights.values())) + + def __call__(self, noisy_data): + X = torch.argmax(noisy_data['X_t'], dim=-1) # (bs, n) + X_weights = self.atom_weight_list.to(X.device)[X] # (bs, n) + return X_weights.sum(dim=-1).unsqueeze(-1).type_as(noisy_data['X_t']) / self.max_weight # (bs, 1) \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion/noise_schedule.py b/openfl-tutorials/experimental/DiGress/digress/diffusion/noise_schedule.py new file mode 100644 index 0000000000..488c22f7c2 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/diffusion/noise_schedule.py @@ -0,0 +1,226 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import numpy as np +import torch +from digress import utils +from digress.diffusion import diffusion_utils + + +class PredefinedNoiseSchedule(torch.nn.Module): + """ + Predefined noise schedule. Essentially creates a lookup array for predefined (non-learned) noise schedules. + """ + + def __init__(self, noise_schedule, timesteps): + super(PredefinedNoiseSchedule, self).__init__() + self.timesteps = timesteps + + if noise_schedule == 'cosine': + alphas2 = diffusion_utils.cosine_beta_schedule(timesteps) + elif noise_schedule == 'custom': + raise NotImplementedError() + else: + raise ValueError(noise_schedule) + + # print('alphas2', alphas2) + + sigmas2 = 1 - alphas2 + + log_alphas2 = np.log(alphas2) + log_sigmas2 = np.log(sigmas2) + + log_alphas2_to_sigmas2 = log_alphas2 - log_sigmas2 # (timesteps + 1, ) + + # print('gamma', -log_alphas2_to_sigmas2) + + self.gamma = torch.nn.Parameter( + torch.from_numpy(-log_alphas2_to_sigmas2).float(), + requires_grad=False) + + def forward(self, t): + t_int = torch.round(t * self.timesteps).long() + return self.gamma[t_int] + + + +class PredefinedNoiseScheduleDiscrete(torch.nn.Module): + """ + Predefined noise schedule. Essentially creates a lookup array for predefined (non-learned) noise schedules. + """ + + def __init__(self, noise_schedule, timesteps): + super(PredefinedNoiseScheduleDiscrete, self).__init__() + self.timesteps = timesteps + + if noise_schedule == 'cosine': + betas = diffusion_utils.cosine_beta_schedule_discrete(timesteps) + elif noise_schedule == 'custom': + betas = diffusion_utils.custom_beta_schedule_discrete(timesteps) + else: + raise NotImplementedError(noise_schedule) + + self.register_buffer('betas', torch.from_numpy(betas).float()) + + self.alphas = 1 - torch.clamp(self.betas, min=0, max=0.9999) + + log_alpha = torch.log(self.alphas) + log_alpha_bar = torch.cumsum(log_alpha, dim=0) + self.alphas_bar = torch.exp(log_alpha_bar) + # print(f"[Noise schedule: {noise_schedule}] alpha_bar:", self.alphas_bar) + + def forward(self, t_normalized=None, t_int=None): + assert int(t_normalized is None) + int(t_int is None) == 1 + if t_int is None: + t_int = torch.round(t_normalized * self.timesteps) + return self.betas[t_int.long()] + + def get_alpha_bar(self, t_normalized=None, t_int=None): + assert int(t_normalized is None) + int(t_int is None) == 1 + if t_int is None: + t_int = torch.round(t_normalized * self.timesteps) + return self.alphas_bar.to(t_int.device)[t_int.long()] + + +class DiscreteUniformTransition: + def __init__(self, x_classes: int, e_classes: int, y_classes: int): + self.X_classes = x_classes + self.E_classes = e_classes + self.y_classes = y_classes + self.u_x = torch.ones(1, self.X_classes, self.X_classes) + if self.X_classes > 0: + self.u_x = self.u_x / self.X_classes + + self.u_e = torch.ones(1, self.E_classes, self.E_classes) + if self.E_classes > 0: + self.u_e = self.u_e / self.E_classes + + self.u_y = torch.ones(1, self.y_classes, self.y_classes) + if self.y_classes > 0: + self.u_y = self.u_y / self.y_classes + + def get_Qt(self, beta_t, device): + """ Returns one-step transition matrices for X and E, from step t - 1 to step t. + Qt = (1 - beta_t) * I + beta_t / K + + beta_t: (bs) noise level between 0 and 1 + returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy). + """ + beta_t = beta_t.unsqueeze(1) + beta_t = beta_t.to(device) + self.u_x = self.u_x.to(device) + self.u_e = self.u_e.to(device) + self.u_y = self.u_y.to(device) + + q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes, device=device).unsqueeze(0) + q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes, device=device).unsqueeze(0) + q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes, device=device).unsqueeze(0) + + return utils.PlaceHolder(X=q_x, E=q_e, y=q_y) + + def get_Qt_bar(self, alpha_bar_t, device): + """ Returns t-step transition matrices for X and E, from step 0 to step t. + Qt = prod(1 - beta_t) * I + (1 - prod(1 - beta_t)) / K + + alpha_bar_t: (bs) Product of the (1 - beta_t) for each time step from 0 to t. + returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy). + """ + alpha_bar_t = alpha_bar_t.unsqueeze(1) + alpha_bar_t = alpha_bar_t.to(device) + self.u_x = self.u_x.to(device) + self.u_e = self.u_e.to(device) + self.u_y = self.u_y.to(device) + + q_x = alpha_bar_t * torch.eye(self.X_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x + q_e = alpha_bar_t * torch.eye(self.E_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e + q_y = alpha_bar_t * torch.eye(self.y_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y + + return utils.PlaceHolder(X=q_x, E=q_e, y=q_y) + + +class MarginalUniformTransition: + def __init__(self, x_marginals, e_marginals, y_classes): + self.X_classes = len(x_marginals) + self.E_classes = len(e_marginals) + self.y_classes = y_classes + self.x_marginals = x_marginals + self.e_marginals = e_marginals + + self.u_x = x_marginals.unsqueeze(0).expand(self.X_classes, -1).unsqueeze(0) + self.u_e = e_marginals.unsqueeze(0).expand(self.E_classes, -1).unsqueeze(0) + self.u_y = torch.ones(1, self.y_classes, self.y_classes) + if self.y_classes > 0: + self.u_y = self.u_y / self.y_classes + + def get_Qt(self, beta_t, device): + """ Returns one-step transition matrices for X and E, from step t - 1 to step t. + Qt = (1 - beta_t) * I + beta_t / K + + beta_t: (bs) noise level between 0 and 1 + returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy). """ + beta_t = beta_t.unsqueeze(1) + beta_t = beta_t.to(device) + self.u_x = self.u_x.to(device) + self.u_e = self.u_e.to(device) + self.u_y = self.u_y.to(device) + + q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes, device=device).unsqueeze(0) + q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes, device=device).unsqueeze(0) + q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes, device=device).unsqueeze(0) + + return utils.PlaceHolder(X=q_x, E=q_e, y=q_y) + + def get_Qt_bar(self, alpha_bar_t, device): + """ Returns t-step transition matrices for X and E, from step 0 to step t. + Qt = prod(1 - beta_t) * I + (1 - prod(1 - beta_t)) * K + + alpha_bar_t: (bs) Product of the (1 - beta_t) for each time step from 0 to t. + returns: qx (bs, dx, dx), qe (bs, de, de), qy (bs, dy, dy). + """ + alpha_bar_t = alpha_bar_t.unsqueeze(1) + alpha_bar_t = alpha_bar_t.to(device) + self.u_x = self.u_x.to(device) + self.u_e = self.u_e.to(device) + self.u_y = self.u_y.to(device) + + q_x = alpha_bar_t * torch.eye(self.X_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x + q_e = alpha_bar_t * torch.eye(self.E_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e + q_y = alpha_bar_t * torch.eye(self.y_classes, device=device).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y + + return utils.PlaceHolder(X=q_x, E=q_e, y=q_y) + + +class AbsorbingStateTransition: + def __init__(self, abs_state: int, x_classes: int, e_classes: int, y_classes: int): + self.X_classes = x_classes + self.E_classes = e_classes + self.y_classes = y_classes + + self.u_x = torch.zeros(1, self.X_classes, self.X_classes) + self.u_x[:, :, abs_state] = 1 + + self.u_e = torch.zeros(1, self.E_classes, self.E_classes) + self.u_e[:, :, abs_state] = 1 + + self.u_y = torch.zeros(1, self.y_classes, self.y_classes) + self.u_e[:, :, abs_state] = 1 + + def get_Qt(self, beta_t): + """ Returns two transition matrix for X and E""" + beta_t = beta_t.unsqueeze(1) + q_x = beta_t * self.u_x + (1 - beta_t) * torch.eye(self.X_classes).unsqueeze(0) + q_e = beta_t * self.u_e + (1 - beta_t) * torch.eye(self.E_classes).unsqueeze(0) + q_y = beta_t * self.u_y + (1 - beta_t) * torch.eye(self.y_classes).unsqueeze(0) + return q_x, q_e, q_y + + def get_Qt_bar(self, alpha_bar_t): + """ beta_t: (bs) + Returns transition matrices for X and E""" + + alpha_bar_t = alpha_bar_t.unsqueeze(1) + + q_x = alpha_bar_t * torch.eye(self.X_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_x + q_e = alpha_bar_t * torch.eye(self.E_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_e + q_y = alpha_bar_t * torch.eye(self.y_classes).unsqueeze(0) + (1 - alpha_bar_t) * self.u_y + + return q_x, q_e, q_y diff --git a/openfl-tutorials/experimental/DiGress/digress/diffusion_model_discrete.py b/openfl-tutorials/experimental/DiGress/digress/diffusion_model_discrete.py new file mode 100644 index 0000000000..77d2c6be47 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/diffusion_model_discrete.py @@ -0,0 +1,696 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +import torch.nn as nn +import torch.nn.functional as F +import pytorch_lightning as pl +import time +# import wandb +import os +import tqdm + +from digress.models.transformer_model import GraphTransformer +from digress.diffusion.noise_schedule import DiscreteUniformTransition, PredefinedNoiseScheduleDiscrete,\ + MarginalUniformTransition +from digress.diffusion import diffusion_utils +from digress.metrics.train_metrics import TrainLossDiscrete +from digress.metrics.abstract_metrics import SumExceptBatchMetric, SumExceptBatchKL, NLL +from digress import utils + + +class DiscreteDenoisingDiffusion(pl.LightningModule): + def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools, extra_features, + domain_features): + super().__init__() + + input_dims = dataset_infos.input_dims + output_dims = dataset_infos.output_dims + nodes_dist = dataset_infos.nodes_dist + + self.cfg = cfg + self.name = cfg.general.name + self.model_dtype = torch.float32 + self.T = cfg.model.diffusion_steps + + self.Xdim = input_dims['X'] + self.Edim = input_dims['E'] + self.ydim = input_dims['y'] + self.Xdim_output = output_dims['X'] + self.Edim_output = output_dims['E'] + self.ydim_output = output_dims['y'] + self.node_dist = nodes_dist + + self.dataset_info = dataset_infos + + self.train_loss = TrainLossDiscrete(self.cfg.model.lambda_train) + + self.val_nll = NLL() + self.val_X_kl = SumExceptBatchKL() + self.val_E_kl = SumExceptBatchKL() + self.val_X_logp = SumExceptBatchMetric() + self.val_E_logp = SumExceptBatchMetric() + + self.test_nll = NLL() + self.test_X_kl = SumExceptBatchKL() + self.test_E_kl = SumExceptBatchKL() + self.test_X_logp = SumExceptBatchMetric() + self.test_E_logp = SumExceptBatchMetric() + + self.train_metrics = train_metrics + self.sampling_metrics = sampling_metrics + + self.visualization_tools = visualization_tools + self.extra_features = extra_features + self.domain_features = domain_features + + self.model = GraphTransformer(n_layers=cfg.model.n_layers, + input_dims=input_dims, + hidden_mlp_dims=cfg.model.hidden_mlp_dims, + hidden_dims=cfg.model.hidden_dims, + output_dims=output_dims, + act_fn_in=nn.ReLU(), + act_fn_out=nn.ReLU()) + + if getattr(self.cfg.model, 'torch_compile', False): + print("Compiling the model...") + self.model = torch.compile(self.model) + + self.noise_schedule = PredefinedNoiseScheduleDiscrete(cfg.model.diffusion_noise_schedule, + timesteps=cfg.model.diffusion_steps) + + if self.cfg.model.transition == 'uniform': + self.transition_model = DiscreteUniformTransition(x_classes=self.Xdim_output, e_classes=self.Edim_output, + y_classes=self.ydim_output) + x_limit = torch.ones(self.Xdim_output) / self.Xdim_output + e_limit = torch.ones(self.Edim_output) / self.Edim_output + y_limit = torch.ones(self.ydim_output) / self.ydim_output + self.limit_dist = utils.PlaceHolder(X=x_limit, E=e_limit, y=y_limit) + elif self.cfg.model.transition == 'marginal': + + node_types = self.dataset_info.node_types.float() + x_marginals = node_types / torch.sum(node_types) + + edge_types = self.dataset_info.edge_types.float() + e_marginals = edge_types / torch.sum(edge_types) + print(f"Marginal distribution of the classes: {x_marginals} for nodes, {e_marginals} for edges") + self.transition_model = MarginalUniformTransition(x_marginals=x_marginals, e_marginals=e_marginals, + y_classes=self.ydim_output) + self.limit_dist = utils.PlaceHolder(X=x_marginals, E=e_marginals, + y=torch.ones(self.ydim_output) / self.ydim_output) + else: + raise ValueError(f"Invalid transition type: {self.cfg.model.transition}. Expected 'uniform' or 'marginal'.") + + self.save_hyperparameters(ignore=['train_metrics', 'sampling_metrics']) + self.start_epoch_time = None + self.train_iterations = None + self.val_iterations = None + # self.log_every_steps = cfg.general.log_every_steps + self.number_chain_steps = cfg.general.number_chain_steps + self.best_val_nll = 1e8 + self.val_counter = 0 + + def training_step(self, data, i): + if data.edge_index.numel() == 0: + self.print("Found a batch with no edges. Skipping.") + return + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask) + X, E = dense_data.X, dense_data.E + noisy_data = self.apply_noise(X, E, data.y, node_mask) + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + loss = self.train_loss(masked_pred_X=pred.X, masked_pred_E=pred.E, pred_y=pred.y, + true_X=X, true_E=E, true_y=data.y, + log=False) + # log=i % self.log_every_steps == 0) + + self.train_metrics(masked_pred_X=pred.X, masked_pred_E=pred.E, true_X=X, true_E=E, + log=False) + # log=i % self.log_every_steps == 0) + + self.log_dict({'train loss': loss}) + + return {'loss': loss} + + def configure_optimizers(self): + return torch.optim.AdamW(self.parameters(), lr=self.cfg.train.lr, amsgrad=True, + weight_decay=self.cfg.train.weight_decay) + + # def on_fit_start(self) -> None: + # self.train_iterations = len(self.trainer.datamodule.train_dataloader()) + # self.print("Size of the input features", self.Xdim, self.Edim, self.ydim) + # # if self.local_rank == 0: + # # utils.setup_wandb(self.cfg) + + def on_train_epoch_start(self) -> None: + self.print("Starting train epoch...") + self.start_epoch_time = time.time() + self.train_loss.reset() + self.train_metrics.reset() + + # def on_train_epoch_end(self) -> None: + # to_log = self.train_loss.log_epoch_metrics() + # self.print(f"Epoch {self.current_epoch}: X_CE: {to_log['train_epoch/x_CE'] :.3f}" + # f" -- E_CE: {to_log['train_epoch/E_CE'] :.3f} --" + # f" y_CE: {to_log['train_epoch/y_CE'] :.3f}" + # f" -- {time.time() - self.start_epoch_time:.1f}s ") + # epoch_at_metrics, epoch_bond_metrics = self.train_metrics.log_epoch_metrics() + # self.print(f"Epoch {self.current_epoch}: {epoch_at_metrics} -- {epoch_bond_metrics}") + # print(torch.cuda.memory_summary()) + + def on_validation_epoch_start(self) -> None: + self.val_nll.reset() + self.val_X_kl.reset() + self.val_E_kl.reset() + self.val_X_logp.reset() + self.val_E_logp.reset() + self.sampling_metrics.reset() + + def validation_step(self, data, i): + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask) + noisy_data = self.apply_noise(dense_data.X, dense_data.E, data.y, node_mask) + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + nll = self.compute_val_loss(pred, noisy_data, dense_data.X, dense_data.E, data.y, node_mask, test=False) + + # self.log_dict({'val loss': nll}, on_epoch=True) + + return {'loss': nll} + + def on_validation_epoch_end(self) -> None: + metrics = [self.val_nll.compute(), self.val_X_kl.compute() * self.T, self.val_E_kl.compute() * self.T, + self.val_X_logp.compute(), self.val_E_logp.compute()] + # if wandb.run: + # wandb.log({"val/epoch_NLL": metrics[0], + # "val/X_kl": metrics[1], + # "val/E_kl": metrics[2], + # "val/X_logp": metrics[3], + # "val/E_logp": metrics[4]}, commit=False) + + self.print(f"Epoch {self.current_epoch}: Val NLL {metrics[0] :.2f} -- Val Atom type KL {metrics[1] :.2f} -- ", + f"Val Edge type KL: {metrics[2] :.2f}") + + # Log val nll with default Lightning logger, so it can be monitored by checkpoint callback + val_nll = metrics[0] + # self.log("val/epoch_NLL", val_nll, sync_dist=True) + + if val_nll < self.best_val_nll: + self.best_val_nll = val_nll + self.print('Val loss: %.4f \t Best val loss: %.4f\n' % (val_nll, self.best_val_nll)) + + self.val_counter += 1 + if getattr(getattr(self.cfg, 'guidance', None), 'use_guidance', False): + self.log_dict({'val_nll': val_nll}) + else: + # Do not perform sampling using just the diffusion model if guidance is used + if self.val_counter % self.cfg.general.sample_every_val == 0: + start = time.time() + samples_left_to_generate = self.cfg.general.samples_to_generate + samples_left_to_save = self.cfg.general.samples_to_save + chains_left_to_save = self.cfg.general.chains_to_save + + samples = [] + + ident = 0 + while samples_left_to_generate > 0: + bs = 2 * self.cfg.train.batch_size + to_generate = min(samples_left_to_generate, bs) + to_save = min(samples_left_to_save, bs) + chains_save = min(chains_left_to_save, bs) + samples.extend(self.sample_batch(batch_id=ident, batch_size=to_generate, num_nodes=None, + save_final=to_save, + keep_chain=chains_save, + number_chain_steps=self.number_chain_steps)) + ident += to_generate + + samples_left_to_save -= to_save + samples_left_to_generate -= to_generate + chains_left_to_save -= chains_save + self.print("Computing sampling metrics...") + self.sampling_metrics.forward(samples, self.name, self.current_epoch, val_counter=-1, test=False, + local_rank=self.local_rank, cfg=self.cfg) + self.print(f'Done. Sampling took {time.time() - start:.2f} seconds\n') + + self.log_dict({'val_nll': val_nll, + 'Validity': self.sampling_metrics.rdkit_metrics[0][0], + 'Uniqueness': self.sampling_metrics.rdkit_metrics[0][2]},) + + + def on_test_epoch_start(self) -> None: + self.print("Starting test...") + self.test_nll.reset() + self.test_X_kl.reset() + self.test_E_kl.reset() + self.test_X_logp.reset() + self.test_E_logp.reset() + # if self.local_rank == 0: + # utils.setup_wandb(self.cfg) + + def test_step(self, data, i): + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask) + noisy_data = self.apply_noise(dense_data.X, dense_data.E, data.y, node_mask) + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + nll = self.compute_val_loss(pred, noisy_data, dense_data.X, dense_data.E, data.y, node_mask, test=True) + + self.log_dict({'test loss': nll}, on_epoch=True) + + return {'loss': nll} + + # def on_test_epoch_end(self) -> None: + # """ Measure likelihood on a test set and compute stability metrics. """ + # metrics = [self.test_nll.compute(), self.test_X_kl.compute(), self.test_E_kl.compute(), + # self.test_X_logp.compute(), self.test_E_logp.compute()] + # # if wandb.run: + # # wandb.log({"test/epoch_NLL": metrics[0], + # # "test/X_kl": metrics[1], + # # "test/E_kl": metrics[2], + # # "test/X_logp": metrics[3], + # # "test/E_logp": metrics[4]}, commit=False) + + # self.print(f"Epoch {self.current_epoch}: Test NLL {metrics[0] :.2f} -- Test Atom type KL {metrics[1] :.2f} -- ", + # f"Test Edge type KL: {metrics[2] :.2f}") + + # test_nll = metrics[0] + # # if wandb.run: + # # wandb.log({"test/epoch_NLL": test_nll}, commit=False) + + # self.print(f'Test loss: {test_nll :.4f}') + + # samples_left_to_generate = self.cfg.general.final_model_samples_to_generate + # samples_left_to_save = self.cfg.general.final_model_samples_to_save + # chains_left_to_save = self.cfg.general.final_model_chains_to_save + + # samples = [] + # id = 0 + # while samples_left_to_generate > 0: + # self.print(f'Samples left to generate: {samples_left_to_generate}/' + # f'{self.cfg.general.final_model_samples_to_generate}', end='', flush=True) + # bs = 2 * self.cfg.train.batch_size + # to_generate = min(samples_left_to_generate, bs) + # to_save = min(samples_left_to_save, bs) + # chains_save = min(chains_left_to_save, bs) + # samples.extend(self.sample_batch(id, to_generate, num_nodes=None, save_final=to_save, + # keep_chain=chains_save, number_chain_steps=self.number_chain_steps)) + # id += to_generate + # samples_left_to_save -= to_save + # samples_left_to_generate -= to_generate + # chains_left_to_save -= chains_save + # self.print("Saving the generated graphs") + # filename = f'generated_samples1.txt' + # for i in range(2, 10): + # if os.path.exists(filename): + # filename = f'generated_samples{i}.txt' + # else: + # break + # with open(filename, 'w') as f: + # for item in samples: + # f.write(f"N={item[0].shape[0]}\n") + # atoms = item[0].tolist() + # f.write("X: \n") + # for at in atoms: + # f.write(f"{at} ") + # f.write("\n") + # f.write("E: \n") + # for bond_list in item[1]: + # for bond in bond_list: + # f.write(f"{bond} ") + # f.write("\n") + # f.write("\n") + # self.print("Generated graphs Saved. Computing sampling metrics...") + # self.sampling_metrics(samples, self.name, self.current_epoch, self.val_counter, test=True, local_rank=self.local_rank) + # self.print("Done testing.") + + + def kl_prior(self, X, E, node_mask): + """Computes the KL between q(z1 | x) and the prior p(z1) = Normal(0, 1). + + This is essentially a lot of work for something that is in practice negligible in the loss. However, you + compute it so that you see it when you've made a mistake in your noise schedule. + """ + # Compute the last alpha value, alpha_T. + ones = torch.ones((X.size(0), 1), device=X.device) + Ts = self.T * ones + alpha_t_bar = self.noise_schedule.get_alpha_bar(t_int=Ts) # (bs, 1) + + Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, self.device) + + # Compute transition probabilities + probX = X @ Qtb.X # (bs, n, dx_out) + probE = E @ Qtb.E.unsqueeze(1) # (bs, n, n, de_out) + assert probX.shape == X.shape + + bs, n, _ = probX.shape + + limit_X = self.limit_dist.X[None, None, :].expand(bs, n, -1).type_as(probX) + limit_E = self.limit_dist.E[None, None, None, :].expand(bs, n, n, -1).type_as(probE) + + # Make sure that masked rows do not contribute to the loss + limit_dist_X, limit_dist_E, probX, probE = diffusion_utils.mask_distributions(true_X=limit_X.clone(), + true_E=limit_E.clone(), + pred_X=probX, + pred_E=probE, + node_mask=node_mask) + + kl_distance_X = F.kl_div(input=probX.log(), target=limit_dist_X, reduction='none') + kl_distance_E = F.kl_div(input=probE.log(), target=limit_dist_E, reduction='none') + + return diffusion_utils.sum_except_batch(kl_distance_X) + \ + diffusion_utils.sum_except_batch(kl_distance_E) + + def compute_Lt(self, X, E, y, pred, noisy_data, node_mask, test): + pred_probs_X = F.softmax(pred.X, dim=-1) + pred_probs_E = F.softmax(pred.E, dim=-1) + pred_probs_y = F.softmax(pred.y, dim=-1) + + Qtb = self.transition_model.get_Qt_bar(noisy_data['alpha_t_bar'], self.device) + Qsb = self.transition_model.get_Qt_bar(noisy_data['alpha_s_bar'], self.device) + Qt = self.transition_model.get_Qt(noisy_data['beta_t'], self.device) + + # Compute distributions to compare with KL + bs, n, d = X.shape + prob_true = diffusion_utils.posterior_distributions(X=X, E=E, y=y, X_t=noisy_data['X_t'], E_t=noisy_data['E_t'], + y_t=noisy_data['y_t'], Qt=Qt, Qsb=Qsb, Qtb=Qtb) + prob_true.E = prob_true.E.reshape((bs, n, n, -1)) + prob_pred = diffusion_utils.posterior_distributions(X=pred_probs_X, E=pred_probs_E, y=pred_probs_y, + X_t=noisy_data['X_t'], E_t=noisy_data['E_t'], + y_t=noisy_data['y_t'], Qt=Qt, Qsb=Qsb, Qtb=Qtb) + prob_pred.E = prob_pred.E.reshape((bs, n, n, -1)) + + # Reshape and filter masked rows + prob_true_X, prob_true_E, prob_pred.X, prob_pred.E = diffusion_utils.mask_distributions(true_X=prob_true.X, + true_E=prob_true.E, + pred_X=prob_pred.X, + pred_E=prob_pred.E, + node_mask=node_mask) + kl_x = (self.test_X_kl if test else self.val_X_kl)(prob_true.X, torch.log(prob_pred.X)) + kl_e = (self.test_E_kl if test else self.val_E_kl)(prob_true.E, torch.log(prob_pred.E)) + return self.T * (kl_x + kl_e) + + def reconstruction_logp(self, t, X, E, node_mask): + # Compute noise values for t = 0. + t_zeros = torch.zeros_like(t) + beta_0 = self.noise_schedule(t_zeros) + Q0 = self.transition_model.get_Qt(beta_t=beta_0, device=self.device) + + probX0 = X @ Q0.X # (bs, n, dx_out) + probE0 = E @ Q0.E.unsqueeze(1) # (bs, n, n, de_out) + + sampled0 = diffusion_utils.sample_discrete_features(probX=probX0, probE=probE0, node_mask=node_mask) + + X0 = F.one_hot(sampled0.X, num_classes=self.Xdim_output).float() + E0 = F.one_hot(sampled0.E, num_classes=self.Edim_output).float() + y0 = sampled0.y + assert (X.shape == X0.shape) and (E.shape == E0.shape) + + sampled_0 = utils.PlaceHolder(X=X0, E=E0, y=y0).mask(node_mask) + + # Predictions + noisy_data = {'X_t': sampled_0.X, 'E_t': sampled_0.E, 'y_t': sampled_0.y, 'node_mask': node_mask, + 't': torch.zeros(X0.shape[0], 1).type_as(y0)} + extra_data = self.compute_extra_data(noisy_data) + pred0 = self.forward(noisy_data, extra_data, node_mask) + + # Normalize predictions + probX0 = F.softmax(pred0.X, dim=-1) + probE0 = F.softmax(pred0.E, dim=-1) + proby0 = F.softmax(pred0.y, dim=-1) + + # Set masked rows to arbitrary values that don't contribute to loss + probX0[~node_mask] = torch.ones(self.Xdim_output).type_as(probX0) + probE0[~(node_mask.unsqueeze(1) * node_mask.unsqueeze(2))] = torch.ones(self.Edim_output).type_as(probE0) + + diag_mask = torch.eye(probE0.size(1)).type_as(probE0).bool() + diag_mask = diag_mask.unsqueeze(0).expand(probE0.size(0), -1, -1) + probE0[diag_mask] = torch.ones(self.Edim_output).type_as(probE0) + + return utils.PlaceHolder(X=probX0, E=probE0, y=proby0) + + def apply_noise(self, X, E, y, node_mask): + """ Sample noise and apply it to the data. """ + + # Sample a timestep t. + # When evaluating, the loss for t=0 is computed separately + lowest_t = 0 if self.training else 1 + t_int = torch.randint(lowest_t, self.T + 1, size=(X.size(0), 1), device=X.device).float() # (bs, 1) + s_int = t_int - 1 + + t_float = t_int / self.T + s_float = s_int / self.T + + # beta_t and alpha_s_bar are used for denoising/loss computation + beta_t = self.noise_schedule(t_normalized=t_float) # (bs, 1) + alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s_float) # (bs, 1) + alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t_float) # (bs, 1) + + Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, device=self.device) # (bs, dx_in, dx_out), (bs, de_in, de_out) + assert (abs(Qtb.X.sum(dim=2) - 1.) < 1e-4).all(), Qtb.X.sum(dim=2) - 1 + assert (abs(Qtb.E.sum(dim=2) - 1.) < 1e-4).all() + + # Compute transition probabilities + probX = X @ Qtb.X # (bs, n, dx_out) + probE = E @ Qtb.E.unsqueeze(1) # (bs, n, n, de_out) + + sampled_t = diffusion_utils.sample_discrete_features(probX=probX, probE=probE, node_mask=node_mask) + + X_t = F.one_hot(sampled_t.X, num_classes=self.Xdim_output) + E_t = F.one_hot(sampled_t.E, num_classes=self.Edim_output) + assert (X.shape == X_t.shape) and (E.shape == E_t.shape) + + z_t = utils.PlaceHolder(X=X_t, E=E_t, y=y).type_as(X_t).mask(node_mask) + + noisy_data = {'t_int': t_int, 't': t_float, 'beta_t': beta_t, 'alpha_s_bar': alpha_s_bar, + 'alpha_t_bar': alpha_t_bar, 'X_t': z_t.X, 'E_t': z_t.E, 'y_t': z_t.y, 'node_mask': node_mask} + return noisy_data + + def compute_val_loss(self, pred, noisy_data, X, E, y, node_mask, test=False): + """Computes an estimator for the variational lower bound. + pred: (batch_size, n, total_features) + noisy_data: dict + X, E, y : (bs, n, dx), (bs, n, n, de), (bs, dy) + node_mask : (bs, n) + Output: nll (size 1) + """ + t = noisy_data['t'] + + # 1. + N = node_mask.sum(1).long() + log_pN = self.node_dist.log_prob(N) + + # 2. The KL between q(z_T | x) and p(z_T) = Uniform(1/num_classes). Should be close to zero. + kl_prior = self.kl_prior(X, E, node_mask) + + # 3. Diffusion loss + loss_all_t = self.compute_Lt(X, E, y, pred, noisy_data, node_mask, test) + + # 4. Reconstruction loss + # Compute L0 term : -log p (X, E, y | z_0) = reconstruction loss + prob0 = self.reconstruction_logp(t, X, E, node_mask) + + loss_term_0 = self.val_X_logp(X * prob0.X.log()) + self.val_E_logp(E * prob0.E.log()) + + # Combine terms + nlls = - log_pN + kl_prior + loss_all_t - loss_term_0 + assert len(nlls.shape) == 1, f'{nlls.shape} has more than only batch dim.' + + # Update NLL metric object and return batch nll + nll = (self.test_nll if test else self.val_nll)(nlls) # Average over the batch + + # if wandb.run: + # wandb.log({"kl prior": kl_prior.mean(), + # "Estimator loss terms": loss_all_t.mean(), + # "log_pn": log_pN.mean(), + # "loss_term_0": loss_term_0, + # 'batch_test_nll' if test else 'val_nll': nll}, commit=False) + return nll + + def forward(self, noisy_data, extra_data, node_mask): + X = torch.cat((noisy_data['X_t'], extra_data.X), dim=2).float() + E = torch.cat((noisy_data['E_t'], extra_data.E), dim=3).float() + y = torch.hstack((noisy_data['y_t'], extra_data.y)).float() + return self.model(X, E, y, node_mask) + + @torch.no_grad() + def sample_batch(self, batch_id: int, batch_size: int, keep_chain: int, number_chain_steps: int, + save_final: int, num_nodes=None): + """ + :param batch_id: int + :param batch_size: int + :param num_nodes: int, tensor (batch_size) (optional) for specifying number of nodes + :param save_final: int: number of predictions to save to file + :param keep_chain: int: number of chains to save to file + :param keep_chain_steps: number of timesteps to save for each chain + :return: molecule_list. Each element of this list is a tuple (atom_types, charges, positions) + """ + if num_nodes is None: + n_nodes = self.node_dist.sample_n(batch_size, self.device) + elif type(num_nodes) == int: + n_nodes = num_nodes * torch.ones(batch_size, device=self.device, dtype=torch.int) + else: + assert isinstance(num_nodes, torch.Tensor) + n_nodes = num_nodes + n_max = torch.max(n_nodes).item() + # Build the masks + arange = torch.arange(n_max, device=self.device).unsqueeze(0).expand(batch_size, -1) + node_mask = arange < n_nodes.unsqueeze(1) + # Sample noise -- z has size (n_samples, n_nodes, n_features) + z_T = diffusion_utils.sample_discrete_feature_noise(limit_dist=self.limit_dist, node_mask=node_mask) + X, E, y = z_T.X, z_T.E, z_T.y + + assert (E == torch.transpose(E, 1, 2)).all() + assert number_chain_steps < self.T + chain_X_size = torch.Size((number_chain_steps, keep_chain, X.size(1))) + chain_E_size = torch.Size((number_chain_steps, keep_chain, E.size(1), E.size(2))) + + chain_X = torch.zeros(chain_X_size) + chain_E = torch.zeros(chain_E_size) + + # Iteratively sample p(z_s | z_t) for t = 1, ..., T, with s = t - 1. + for s_int in tqdm.tqdm(reversed(range(0, self.T)), desc="Generating samples", total=self.T): + s_array = s_int * torch.ones((batch_size, 1)).type_as(y) + t_array = s_array + 1 + s_norm = s_array / self.T + t_norm = t_array / self.T + + # Sample z_s + sampled_s, discrete_sampled_s = self.sample_p_zs_given_zt(s_norm, t_norm, X, E, y, node_mask) + X, E, y = sampled_s.X, sampled_s.E, sampled_s.y + + # Save the first keep_chain graphs + write_index = (s_int * number_chain_steps) // self.T + chain_X[write_index] = discrete_sampled_s.X[:keep_chain] + chain_E[write_index] = discrete_sampled_s.E[:keep_chain] + + # Sample + sampled_s = sampled_s.mask(node_mask, collapse=True) + X, E, y = sampled_s.X, sampled_s.E, sampled_s.y + + + + # Prepare the chain for saving + if keep_chain > 0: + final_X_chain = X[:keep_chain] + final_E_chain = E[:keep_chain] + + chain_X[0] = final_X_chain # Overwrite last frame with the resulting X, E + chain_E[0] = final_E_chain + + chain_X = diffusion_utils.reverse_tensor(chain_X) + chain_E = diffusion_utils.reverse_tensor(chain_E) + + # Repeat last frame to see final sample better + chain_X = torch.cat([chain_X, chain_X[-1:].repeat(10, 1, 1)], dim=0) + chain_E = torch.cat([chain_E, chain_E[-1:].repeat(10, 1, 1, 1)], dim=0) + assert chain_X.size(0) == (number_chain_steps + 10) + + molecule_list = [] + for i in range(batch_size): + n = n_nodes[i] + atom_types = X[i, :n].cpu() + edge_types = E[i, :n, :n].cpu() + molecule_list.append([atom_types, edge_types]) + + # # Visualize chains + # if self.visualization_tools is not None: + # self.print('Visualizing chains...') + # current_path = os.getcwd() + # num_molecules = chain_X.size(1) # number of molecules + # for i in range(num_molecules): + # result_path = os.path.join(current_path, f'chains/{self.cfg.general.name}/' + # f'epoch{self.current_epoch}/' + # f'chains/molecule_{batch_id + i}') + # if not os.path.exists(result_path): + # os.makedirs(result_path) + # _ = self.visualization_tools.visualize_chain(result_path, + # chain_X[:, i, :].numpy(), + # chain_E[:, i, :].numpy()) + # self.print('\r{}/{} complete'.format(i+1, num_molecules), end='', flush=True) + # self.print('\nVisualizing molecules...') + + # # Visualize the final molecules + # current_path = os.getcwd() + # result_path = os.path.join(current_path, + # f'graphs/{self.name}/epoch{self.current_epoch}_b{batch_id}/') + # self.visualization_tools.visualize(result_path, molecule_list, save_final) + # self.print("Done.") + + return molecule_list + + def sample_p_zs_given_zt(self, s, t, X_t, E_t, y_t, node_mask): + """Samples from zs ~ p(zs | zt). Only used during sampling. + if last_step, return the graph prediction as well""" + bs, n, dxs = X_t.shape + beta_t = self.noise_schedule(t_normalized=t) # (bs, 1) + alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s) + alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t) + + # Retrieve transitions matrix + Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, self.device) + Qsb = self.transition_model.get_Qt_bar(alpha_s_bar, self.device) + Qt = self.transition_model.get_Qt(beta_t, self.device) + + # Neural net predictions + noisy_data = {'X_t': X_t, 'E_t': E_t, 'y_t': y_t, 't': t, 'node_mask': node_mask} + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + + # Normalize predictions + pred_X = F.softmax(pred.X, dim=-1) # bs, n, d0 + pred_E = F.softmax(pred.E, dim=-1) # bs, n, n, d0 + + p_s_and_t_given_0_X = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=X_t, + Qt=Qt.X, + Qsb=Qsb.X, + Qtb=Qtb.X) + + p_s_and_t_given_0_E = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=E_t, + Qt=Qt.E, + Qsb=Qsb.E, + Qtb=Qtb.E) + # Dim of these two tensors: bs, N, d0, d_t-1 + weighted_X = pred_X.unsqueeze(-1) * p_s_and_t_given_0_X # bs, n, d0, d_t-1 + unnormalized_prob_X = weighted_X.sum(dim=2) # bs, n, d_t-1 + unnormalized_prob_X[torch.sum(unnormalized_prob_X, dim=-1) == 0] = 1e-5 + prob_X = unnormalized_prob_X / torch.sum(unnormalized_prob_X, dim=-1, keepdim=True) # bs, n, d_t-1 + + pred_E = pred_E.reshape((bs, -1, pred_E.shape[-1])) + weighted_E = pred_E.unsqueeze(-1) * p_s_and_t_given_0_E # bs, N, d0, d_t-1 + unnormalized_prob_E = weighted_E.sum(dim=-2) + unnormalized_prob_E[torch.sum(unnormalized_prob_E, dim=-1) == 0] = 1e-5 + prob_E = unnormalized_prob_E / torch.sum(unnormalized_prob_E, dim=-1, keepdim=True) + prob_E = prob_E.reshape(bs, n, n, pred_E.shape[-1]) + + assert ((prob_X.sum(dim=-1) - 1).abs() < 1e-4).all() + assert ((prob_E.sum(dim=-1) - 1).abs() < 1e-4).all() + + sampled_s = diffusion_utils.sample_discrete_features(prob_X, prob_E, node_mask=node_mask) + + X_s = F.one_hot(sampled_s.X, num_classes=self.Xdim_output).float() + E_s = F.one_hot(sampled_s.E, num_classes=self.Edim_output).float() + + assert (E_s == torch.transpose(E_s, 1, 2)).all() + assert (X_t.shape == X_s.shape) and (E_t.shape == E_s.shape) + + out_one_hot = utils.PlaceHolder(X=X_s, E=E_s, y=torch.zeros(y_t.shape[0], 0)) + out_discrete = utils.PlaceHolder(X=X_s, E=E_s, y=torch.zeros(y_t.shape[0], 0)) + + return out_one_hot.mask(node_mask).type_as(y_t), out_discrete.mask(node_mask, collapse=True).type_as(y_t) + + def compute_extra_data(self, noisy_data): + """ At every training step (after adding noise) and step in sampling, compute extra information and append to + the network input. """ + + extra_features = self.extra_features(noisy_data) + extra_molecular_features = self.domain_features(noisy_data) + + extra_X = torch.cat((extra_features.X, extra_molecular_features.X), dim=-1) + extra_E = torch.cat((extra_features.E, extra_molecular_features.E), dim=-1) + extra_y = torch.cat((extra_features.y, extra_molecular_features.y), dim=-1) + + t = noisy_data['t'] + extra_y = torch.cat((extra_y, t), dim=1) + + return utils.PlaceHolder(X=extra_X, E=extra_E, y=extra_y) \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/guidance/__init__.py b/openfl-tutorials/experimental/DiGress/digress/guidance/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/guidance/guidance_diffusion_model_discrete.py b/openfl-tutorials/experimental/DiGress/digress/guidance/guidance_diffusion_model_discrete.py new file mode 100644 index 0000000000..44f6f4793c --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/guidance/guidance_diffusion_model_discrete.py @@ -0,0 +1,698 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import numpy as np +import torch +import pytorch_lightning as pl +import time +# import wandb +import os +import torch.nn as nn +import torch.nn.functional as F +import tqdm +from omegaconf import OmegaConf, open_dict + +from digress.models.transformer_model import GraphTransformer +from digress.diffusion.noise_schedule import DiscreteUniformTransition, PredefinedNoiseScheduleDiscrete, MarginalUniformTransition +from digress.diffusion import diffusion_utils +import networkx as nx +from digress.metrics.abstract_metrics import NLL, SumExceptBatchKL, SumExceptBatchMetric +from digress.metrics.train_metrics import TrainLossDiscrete +import digress.utils as utils + +# packages for conditional generation with guidance +from torchmetrics import MeanSquaredError, MeanAbsoluteError +from rdkit.Chem.rdDistGeom import ETKDGv3, EmbedMolecule +from rdkit.Chem.rdForceFieldHelpers import MMFFHasAllMoleculeParams, MMFFOptimizeMolecule +from rdkit import Chem +import math +try: + import psi4 +except ModuleNotFoundError: + print("PSI4 not found") +from digress.analysis.rdkit_functions import build_molecule, mol2smiles, build_molecule_with_partial_charges +import pickle +import pandas as pd + + +class DiscreteDenoisingDiffusion(pl.LightningModule): + def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools, extra_features, + domain_features, guidance_model=None, load_model=False): + super().__init__() + + # add for test + if load_model: + OmegaConf.set_struct(cfg, True) + with open_dict(cfg): + cfg.guidance = {'use_guidance': True, 'lambda_guidance': 0.5} + + input_dims = dataset_infos.input_dims + output_dims = dataset_infos.output_dims + nodes_dist = dataset_infos.nodes_dist + + self.cfg = cfg + self.name = cfg.general.name + self.model_dtype = torch.float32 + self.num_classes = dataset_infos.num_classes + self.T = cfg.model.diffusion_steps + + self.Xdim = input_dims['X'] + self.Edim = input_dims['E'] + self.ydim = input_dims['y'] + self.Xdim_output = output_dims['X'] + self.Edim_output = output_dims['E'] + self.ydim_output = output_dims['y'] + self.node_dist = nodes_dist + + self.dataset_info = dataset_infos + + self.train_loss = TrainLossDiscrete(self.cfg.model.lambda_train) + + self.val_nll = NLL() + self.val_X_kl = SumExceptBatchKL() + self.val_E_kl = SumExceptBatchKL() + self.val_y_kl = SumExceptBatchKL() + self.val_X_logp = SumExceptBatchMetric() + self.val_E_logp = SumExceptBatchMetric() + self.val_y_logp = SumExceptBatchMetric() + + self.test_nll = NLL() + self.test_X_kl = SumExceptBatchKL() + self.test_E_kl = SumExceptBatchKL() + self.test_y_kl = SumExceptBatchKL() + self.test_X_logp = SumExceptBatchMetric() + self.test_E_logp = SumExceptBatchMetric() + self.test_y_logp = SumExceptBatchMetric() + + self.train_metrics = train_metrics + self.sampling_metrics = sampling_metrics + + self.save_hyperparameters(ignore=[train_metrics, sampling_metrics]) + self.visualization_tools = visualization_tools + self.extra_features = extra_features + self.domain_features = domain_features + + self.model = GraphTransformer(n_layers=cfg.model.n_layers, + input_dims=input_dims, + hidden_mlp_dims=cfg.model.hidden_mlp_dims, + hidden_dims=cfg.model.hidden_dims, + output_dims=output_dims, + act_fn_in=nn.ReLU(), + act_fn_out=nn.ReLU()) + + if getattr(self.cfg.model, 'torch_compile', False): + print("Compiling the model...") + self.model = torch.compile(self.model) + + self.noise_schedule = PredefinedNoiseScheduleDiscrete(cfg.model.diffusion_noise_schedule, + timesteps=cfg.model.diffusion_steps) + # Marginal noise schedule + node_types = self.dataset_info.node_types.float() + x_marginals = node_types / torch.sum(node_types) + + edge_types = self.dataset_info.edge_types.float() + e_marginals = edge_types / torch.sum(edge_types) + print(f"Marginal distribution of the classes: {x_marginals} for nodes, {e_marginals} for edges") + self.transition_model = MarginalUniformTransition(x_marginals=x_marginals, e_marginals=e_marginals, + y_classes=self.ydim_output) + self.limit_dist = utils.PlaceHolder(X=x_marginals, E=e_marginals, + y=torch.ones(self.ydim_output) / self.ydim_output) + + self.save_hyperparameters(ignore=[train_metrics, sampling_metrics]) + + self.start_epoch_time = None + self.train_iterations = None + self.val_iterations = None + # self.log_every_steps = cfg.general.log_every_steps + self.number_chain_steps = cfg.general.number_chain_steps + self.best_val_nll = 1e8 + self.val_counter = 0 + + # specific properties to generate molecules + self.cond_val = MeanAbsoluteError() + self.num_valid_molecules = 0 + self.num_total = 0 + + self.guidance_model = guidance_model + self.validity = 0 + self.uniqueness = 0 + self.target_properties = [] + + def configure_optimizers(self): + return torch.optim.AdamW(self.parameters(), lr=self.cfg.train.lr, amsgrad=True, + weight_decay=self.cfg.train.weight_decay) + + def on_validation_epoch_start(self) -> None: + self.target_properties = [] + + def validation_step(self, data, i): + self.target_properties.append(data.y.clone()) + return + + @torch.enable_grad() + @torch.inference_mode(False) + def on_validation_epoch_end(self) -> None: + target_properties = torch.cat(self.target_properties, dim=0) + if self.val_counter % self.cfg.general.sample_every_val == 0: + start = time.time() + samples_left_to_generate = self.cfg.general.samples_to_generate + samples_left_to_save = self.cfg.general.samples_to_save + chains_left_to_save = self.cfg.general.chains_to_save + + samples = [] + + ident = 0 + self.print("Sampling...") + while samples_left_to_generate > 0: + bs = 2 * self.cfg.train.batch_size + to_generate = min(samples_left_to_generate, bs) + to_save = min(samples_left_to_save, bs) + chains_save = min(chains_left_to_save, bs) + # samples.extend(self.sample_batch(batch_id=ident, batch_size=to_generate, num_nodes=None, + # save_final=to_save, + # keep_chain=chains_save, + # number_chain_steps=self.number_chain_steps)) + + samples.extend(self.sample_batch(batch_id=ident, batch_size=to_generate, num_nodes=None, + save_final=to_save, + keep_chain=chains_save, + number_chain_steps=self.number_chain_steps, + input_properties=target_properties)) + + ident += to_generate + + samples_left_to_save -= to_save + samples_left_to_generate -= to_generate + chains_left_to_save -= chains_save + self.sampling_metrics.forward(samples, self.name, self.current_epoch, val_counter=-1, test=False, + local_rank=self.local_rank, cfg=self.cfg) + self.print(f'Done. Sampling took {time.time() - start:.2f} seconds\n') + + self.log_dict({'Validity': self.sampling_metrics.rdkit_metrics[0][0], + 'Uniqueness': self.sampling_metrics.rdkit_metrics[0][2]},) + + @torch.enable_grad() + @torch.inference_mode(False) + def test_step(self, data, i): + print(f'Select No.{i+1} test molecule') + # Extract properties + target_properties = data.y.clone() + + data.y = torch.zeros(data.y.shape[0], 0).type_as(data.y) + print("TARGET PROPERTIES", target_properties) + + start = time.time() + + ident = 0 + samples = self.sample_batch(batch_id=ident, batch_size=10, num_nodes=None, + save_final=10, + keep_chain=1, + number_chain_steps=self.number_chain_steps, + input_properties=target_properties) + print(f'Sampling took {time.time() - start:.2f} seconds\n') + + self.save_cond_samples(samples, target_properties, file_path=os.path.join(os.getcwd(), f'cond_smiles{i}.pkl')) + # save conditional generated samples + mae = self.cond_sample_metric(samples, target_properties) + return {'mae': mae} + + def test_epoch_end(self, outs) -> None: + """ Measure likelihood on a test set and compute stability metrics. """ + final_mae = self.cond_val.compute() + final_validity = self.num_valid_molecules / self.num_total + print("Final MAE", final_mae) + print("Final validity", final_validity * 100) + + wandb.run.summary['final_MAE'] = final_mae + wandb.run.summary['final_validity'] = final_validity + wandb.log({'final mae': final_mae, + 'final validity': final_validity}) + + def apply_noise(self, X, E, y, node_mask): + """ Sample noise and apply it to the data. """ + # Sample a timestep t. + # When evaluating, the loss for t=0 is computed separately + lowest_t = 0 if self.training else 1 + t_int = torch.randint(lowest_t, self.T + 1, size=(X.size(0), 1), device=X.device).float() # (bs, 1) + s_int = t_int - 1 + + t_float = t_int / self.T + s_float = s_int / self.T + + # beta_t and alpha_s_bar are used for denoising/loss computation + beta_t = self.noise_schedule(t_normalized=t_float) # (bs, 1) + alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s_float) # (bs, 1) + alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t_float) # (bs, 1) + + Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, device=self.device) # (bs, dx_in, dx_out), (bs, de_in, de_out) + assert (abs(Qtb.X.sum(dim=2) - 1.) < 1e-4).all(), Qtb.X.sum(dim=2) - 1 + assert (abs(Qtb.E.sum(dim=2) - 1.) < 1e-4).all() + + # Compute transition probabilities + probX = X @ Qtb.X # (bs, n, dx_out) + probE = E @ Qtb.E.unsqueeze(1) # (bs, n, n, de_out) + + sampled_t = diffusion_utils.sample_discrete_features(probX=probX, probE=probE, node_mask=node_mask) + + X_t = F.one_hot(sampled_t.X, num_classes=self.Xdim_output) + E_t = F.one_hot(sampled_t.E, num_classes=self.Edim_output) + assert (X.shape == X_t.shape) and (E.shape == E_t.shape) + + z_t = utils.PlaceHolder(X=X_t, E=E_t, y=y).type_as(X_t).mask(node_mask) + + noisy_data = {'t_int': t_int, 't': t_float, 'beta_t': beta_t, 'alpha_s_bar': alpha_s_bar, + 'alpha_t_bar': alpha_t_bar, 'X_t': z_t.X, 'E_t': z_t.E, 'y_t': z_t.y, 'node_mask': node_mask} + return noisy_data + + + def forward(self, noisy_data, extra_data, node_mask): + X = torch.cat((noisy_data['X_t'], extra_data.X), dim=2).float() + E = torch.cat((noisy_data['E_t'], extra_data.E), dim=3).float() + y = torch.hstack((noisy_data['y_t'], extra_data.y)).float() + return self.model(X, E, y, node_mask) + + @torch.no_grad() + def sample_batch(self, batch_id: int, batch_size: int, keep_chain: int, number_chain_steps: int, + save_final: int, num_nodes=None, input_properties=None): + """ + :param batch_id: int + :param batch_size: int + :param num_nodes: int, tensor (batch_size) (optional) for specifying number of nodes + :param save_final: int: number of predictions to save to file + :param keep_chain: int: number of chains to save to file + :param keep_chain_steps: number of timesteps to save for each chain + :return: molecule_list. Each element of this list is a tuple (atom_types, charges, positions) + """ + if num_nodes is None: + n_nodes = self.node_dist.sample_n(batch_size, self.device) + elif type(num_nodes) == int: + n_nodes = num_nodes * torch.ones(batch_size, device=self.device, dtype=torch.int) + else: + assert isinstance(num_nodes, torch.Tensor) + n_nodes = num_nodes + n_max = torch.max(n_nodes).item() + # Build the masks + arange = torch.arange(n_max, device=self.device).unsqueeze(0).expand(batch_size, -1) + node_mask = arange < n_nodes.unsqueeze(1) + # TODO: how to move node_mask on the right device in the multi-gpu case? + # TODO: everything else depends on its device + # Sample noise -- z has size (n_samples, n_nodes, n_features) + z_T = diffusion_utils.sample_discrete_feature_noise(limit_dist=self.limit_dist, node_mask=node_mask) + X, E, y = z_T.X, z_T.E, z_T.y + + assert (E == torch.transpose(E, 1, 2)).all() + assert number_chain_steps < self.T + chain_X_size = torch.Size((number_chain_steps, keep_chain, X.size(1))) + chain_E_size = torch.Size((number_chain_steps, keep_chain, E.size(1), E.size(2))) + + chain_X = torch.zeros(chain_X_size) + chain_E = torch.zeros(chain_E_size) + + # Iteratively sample p(z_s | z_t) for t = 1, ..., T, with s = t - 1. + for s_int in tqdm.tqdm(reversed(range(0, self.T)), desc="Generating samples", total=self.T): + s_array = s_int * torch.ones((batch_size, 1)).type_as(y) + t_array = s_array + 1 + t_norm = t_array / self.T + s_norm = s_array / self.T + + # Sample z_s + sampled_s, discrete_sampled_s = self.sample_p_zs_given_zt(s_norm, t_norm, X, E, y, node_mask, input_properties) + X, E, y = sampled_s.X, sampled_s.E, sampled_s.y + + # Save the first keep_chain graphs + write_index = (s_int * number_chain_steps) // self.T + chain_X[write_index] = discrete_sampled_s.X[:keep_chain] + chain_E[write_index] = discrete_sampled_s.E[:keep_chain] + + # Sample + sampled_s = sampled_s.mask(node_mask, collapse=True) + X, E, y = sampled_s.X, sampled_s.E, sampled_s.y + + # print("Examples of generated graphs:") + # for i in range(min(5, X.shape[0])): + # print("E: ", E[i]) + # print("X: ", X[i]) + + # Prepare the chain for saving + if keep_chain > 0: + final_X_chain = X[:keep_chain] + final_E_chain = E[:keep_chain] + + chain_X[0] = final_X_chain # Overwrite last frame with the resulting X, E + chain_E[0] = final_E_chain + + chain_X = diffusion_utils.reverse_tensor(chain_X) + chain_E = diffusion_utils.reverse_tensor(chain_E) + + # Repeat last frame to see final sample better + chain_X = torch.cat([chain_X, chain_X[-1:].repeat(10, 1, 1)], dim=0) + chain_E = torch.cat([chain_E, chain_E[-1:].repeat(10, 1, 1, 1)], dim=0) + assert chain_X.size(0) == (number_chain_steps + 10) + + molecule_list = [] + for i in range(batch_size): + n = n_nodes[i] + atom_types = X[i, :n].cpu() + edge_types = E[i, :n, :n].cpu() + molecule_list.append([atom_types, edge_types]) + + # Visualize chains + if self.visualization_tools is not None: + print('Visualizing chains starts!') + current_path = os.getcwd() + num_molecules = chain_X.size(1) # number of molecules + for i in range(num_molecules): + result_path = os.path.join(current_path, f'chains/{self.cfg.general.name}/' + f'epoch{self.current_epoch}/' + f'chains/molecule_{batch_id + i}') + if not os.path.exists(result_path): + os.makedirs(result_path) + _ = self.visualization_tools.visualize_chain(result_path, + chain_X[:, i, :].numpy(), + chain_E[:, i, :].numpy()) + print('\r{}/{} complete'.format(i+1, num_molecules), end='', flush=True) + print('\nVisualizing chains Ends!') + + # Visualize the final molecules + current_path = os.getcwd() + result_path = os.path.join(current_path, + f'graphs/{self.name}/epoch{self.current_epoch}_b{batch_id}/') + self.visualization_tools.visualize(result_path, molecule_list, save_final) + + return molecule_list + + + def cond_sample_metric(self, samples, input_properties): + mols_dipoles = [] + mols_homo = [] + + # Hardware side settings (CPU thread number and memory settings used for calculation) + psi4.set_num_threads(nthread=4) + psi4.set_memory("5GB") + psi4.core.set_output_file('psi4_output.dat', False) + + for sample in samples: + mol = build_molecule_with_partial_charges(sample[0], sample[1], self.dataset_info.atom_decoder) + + try: + Chem.SanitizeMol(mol) + except: + print('invalid chemistry') + continue + + # Coarse 3D structure optimization by generating 3D structure from SMILES + mol = Chem.AddHs(mol) + params = ETKDGv3() + params.randomSeed = 1 + try: + EmbedMolecule(mol, params) + except Chem.rdchem.AtomValenceException: + print('invalid chemistry') + continue + + # Structural optimization with MMFF (Merck Molecular Force Field) + try: + s = MMFFOptimizeMolecule(mol) + print(s) + except: + print('Bad conformer ID') + continue + + conf = mol.GetConformer() + + # Convert to a format that can be input to Psi4. + # Set charge and spin multiplicity (below is charge 0, spin multiplicity 1) + + # Get the formal charge + fc = 'FormalCharge' + mol_FormalCharge = int(mol.GetProp(fc)) if mol.HasProp(fc) else Chem.GetFormalCharge(mol) + + sm = 'SpinMultiplicity' + if mol.HasProp(sm): + mol_spin_multiplicity = int(mol.GetProp(sm)) + else: + # Calculate spin multiplicity using Hund's rule of maximum multiplicity... + NumRadicalElectrons = 0 + for Atom in mol.GetAtoms(): + NumRadicalElectrons += Atom.GetNumRadicalElectrons() + TotalElectronicSpin = NumRadicalElectrons / 2 + SpinMultiplicity = 2 * TotalElectronicSpin + 1 + mol_spin_multiplicity = int(SpinMultiplicity) + + mol_input = "%s %s" % (mol_FormalCharge, mol_spin_multiplicity) + print(mol_input) + #mol_input = "0 1" + + # Describe the coordinates of each atom in XYZ format + for atom in mol.GetAtoms(): + mol_input += "\n " + atom.GetSymbol() + " " + str(conf.GetAtomPosition(atom.GetIdx()).x) \ + + " " + str(conf.GetAtomPosition(atom.GetIdx()).y) \ + + " " + str(conf.GetAtomPosition(atom.GetIdx()).z) + + try: + molecule = psi4.geometry(mol_input) + except: + print('Can not calculate psi4 geometry') + continue + + # Convert to a format that can be input to pyscf + # Set calculation method (functional) and basis set + level = "b3lyp/6-31G*" + + # Calculation method (functional), example of basis set + # theory = ['hf', 'b3lyp'] + # basis_set = ['sto-3g', '3-21G', '6-31G(d)', '6-31+G(d,p)', '6-311++G(2d,p)'] + + # Perform structural optimization calculations + print('Psi4 calculation starts!!!') + #energy, wave_function = psi4.optimize(level, molecule=molecule, return_wfn=True) + try: + energy, wave_function = psi4.energy(level, molecule=molecule, return_wfn=True) + except psi4.driver.SCFConvergenceError: + print("Psi4 did not converge") + continue + + print('Chemistry information check!!!') + + if self.cfg.general.guidance_target in ['mu', 'both']: + dip_x, dip_y, dip_z = wave_function.variable('SCF DIPOLE')[0],\ + wave_function.variable('SCF DIPOLE')[1],\ + wave_function.variable('SCF DIPOLE')[2] + dipole_moment = math.sqrt(dip_x**2 + dip_y**2 + dip_z**2) * 2.5417464519 + print("Dipole moment", dipole_moment) + mols_dipoles.append(dipole_moment) + + if self.cfg.general.guidance_target in ['homo', 'both']: + # Compute HOMO (Unit: au= Hartree) + LUMO_idx = wave_function.nalpha() + HOMO_idx = LUMO_idx - 1 + homo = wave_function.epsilon_a_subset("AO", "ALL").np[HOMO_idx] + + # convert unit from a.u. to ev + homo = homo * 27.211324570273 + print("HOMO", homo) + mols_homo.append(homo) + + num_valid_molecules = max(len(mols_dipoles), len(mols_homo)) + print("Number of valid samples", num_valid_molecules) + self.num_valid_molecules += num_valid_molecules + self.num_total += len(samples) + + mols_dipoles = torch.FloatTensor(mols_dipoles) + mols_homo = torch.FloatTensor(mols_homo) + + if self.cfg.general.guidance_target == 'mu': + mae = self.cond_val(mols_dipoles.unsqueeze(1), + input_properties.repeat(len(mols_dipoles), 1).cpu()) + + elif self.cfg.general.guidance_target == 'homo': + mae = self.cond_val(mols_homo.unsqueeze(1), + input_properties.repeat(len(mols_homo), 1).cpu()) + + elif self.cfg.general.guidance_target == 'both': + properties = torch.hstack((mols_dipoles.unsqueeze(1), mols_homo.unsqueeze(1))) + mae = self.cond_val(properties, + input_properties.repeat(len(mols_dipoles), 1).cpu()) + + print('Conditional generation metric:') + print(f'Epoch {self.current_epoch}: MAE: {mae}') + # wandb.log({"val_epoch/conditional generation mae": mae, + # 'Valid molecules': num_valid_molecules}) + return mae + + def cond_fn(self, noisy_data, node_mask, target=None): + #self.guidance_model.eval() + loss = nn.MSELoss() + + t = noisy_data['t'] + + X = noisy_data['X_t'] + E = noisy_data['E_t'] + y = noisy_data['t'] + + with torch.enable_grad(): + x_in = X.float().detach().requires_grad_(True) + e_in = E.float().detach().requires_grad_(True) + + pred = self.guidance_model.model(x_in, e_in, y, node_mask) + + # normalize target + target = target.type_as(x_in) + # mse = loss(pred.y, target.repeat(pred.y.size(0), 1)) + + if pred.y.size(0) > target.size(0): + diff = pred.y.size(0) - target.size(0) + repeats = pred.y.size(0) // target.size(0) + additional = pred.y.size(0) % target.size(0) + target_padded = torch.cat([target.repeat(repeats, 1), target[:additional]], dim=0) + mse = loss(pred.y, target_padded) + elif pred.y.size(0) < target.size(0): + indices = torch.randperm(target.size(0))[:pred.y.size(0)] + target_truncated = target[indices] + mse = loss(pred.y, target_truncated) + else: + # If batch sizes are equal, compute the loss directly + mse = loss(pred.y, target) + + # t_int = int(t[0].item() * 500) + # if t_int % 10 == 0: + # print(f'Regressor MSE at step {t_int}: {mse.item()}') + # wandb.log({'Guidance MSE': mse}) + + # calculate gradient of mse with respect to x and e + grad_x = torch.autograd.grad(mse, x_in, retain_graph=True)[0] + grad_e = torch.autograd.grad(mse, e_in)[0] + + x_mask = node_mask.unsqueeze(-1) # bs, n, 1 + bs, n = x_mask.shape[0], x_mask.shape[1] + + e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1 + e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1 + diag_mask = torch.eye(n) + diag_mask = ~diag_mask.type_as(e_mask1).bool() + diag_mask = diag_mask.unsqueeze(0).unsqueeze(-1).expand(bs, -1, -1, -1) + + mask_grad_x = grad_x * x_mask + mask_grad_e = grad_e * e_mask1 * e_mask2 * diag_mask + + mask_grad_e = 1 / 2 * (mask_grad_e + torch.transpose(mask_grad_e, 1, 2)) + return mask_grad_x, mask_grad_e + + def sample_p_zs_given_zt(self, s, t, X_t, E_t, y_t, node_mask, input_properties): + """Samples from zs ~ p(zs | zt). Only used during sampling.""" + bs, n, dxs = X_t.shape + beta_t = self.noise_schedule(t_normalized=t) # (bs, 1) + alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s) + alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t) + + # Retrieve transitions matrix + Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, self.device) + Qsb = self.transition_model.get_Qt_bar(alpha_s_bar, self.device) + Qt = self.transition_model.get_Qt(beta_t, self.device) + + # Neural net predictions + noisy_data = {'X_t': X_t, 'E_t': E_t, 'y_t': y_t, 't': t, 'node_mask': node_mask} + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + + # Normalize predictions + pred_X = F.softmax(pred.X, dim=-1) # bs, n, d0 + pred_E = F.softmax(pred.E, dim=-1) # bs, n, n, d0 + + p_s_and_t_given_0_X = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=X_t, + Qt=Qt.X, + Qsb=Qsb.X, + Qtb=Qtb.X) + + p_s_and_t_given_0_E = diffusion_utils.compute_batched_over0_posterior_distribution(X_t=E_t, + Qt=Qt.E, + Qsb=Qsb.E, + Qtb=Qtb.E) + # Dim of these two tensors: bs, N, d0, d_t-1 + weighted_X = pred_X.unsqueeze(-1) * p_s_and_t_given_0_X # bs, n, d0, d_t-1 + unnormalized_prob_X = weighted_X.sum(dim=2) # bs, n, d_t-1 + unnormalized_prob_X[torch.sum(unnormalized_prob_X, dim=-1) == 0] = 1e-5 + prob_X = unnormalized_prob_X / torch.sum(unnormalized_prob_X, dim=-1, keepdim=True) # bs, n, d_t-1 + + pred_E = pred_E.reshape((bs, -1, pred_E.shape[-1])) + weighted_E = pred_E.unsqueeze(-1) * p_s_and_t_given_0_E # bs, N, d0, d_t-1 + unnormalized_prob_E = weighted_E.sum(dim=-2) + unnormalized_prob_E[torch.sum(unnormalized_prob_E, dim=-1) == 0] = 1e-5 + prob_E = unnormalized_prob_E / torch.sum(unnormalized_prob_E, dim=-1, keepdim=True) + prob_E = prob_E.reshape(bs, n, n, pred_E.shape[-1]) + + # # Guidance + lamb = self.cfg.guidance.lambda_guidance + + grad_x, grad_e = self.cond_fn(noisy_data, node_mask, input_properties) + + p_eta_x = torch.softmax(- lamb * grad_x, dim=-1) + p_eta_e = torch.softmax(- lamb * grad_e, dim=-1) + + prob_X_unnormalized = p_eta_x * prob_X + prob_X_unnormalized[torch.sum(prob_X_unnormalized, dim=-1) == 0] = 1e-7 + prob_X = prob_X_unnormalized / torch.sum(prob_X_unnormalized, dim=-1, keepdim=True) + + prob_E_unnormalized = p_eta_e * prob_E + prob_E_unnormalized[torch.sum(prob_E_unnormalized, dim=-1) == 0] = 1e-7 + prob_E = prob_E_unnormalized / torch.sum(prob_E_unnormalized, dim=-1, keepdim=True) + + assert ((prob_X.sum(dim=-1) - 1).abs() < 1e-4).all() + assert ((prob_E.sum(dim=-1) - 1).abs() < 1e-4).all() + + sampled_s = diffusion_utils.sample_discrete_features(prob_X, prob_E, node_mask=node_mask) + + X_s = F.one_hot(sampled_s.X, num_classes=self.Xdim_output).float() + E_s = F.one_hot(sampled_s.E, num_classes=self.Edim_output).float() + + assert (E_s == torch.transpose(E_s, 1, 2)).all() + assert (X_t.shape == X_s.shape) and (E_t.shape == E_s.shape) + + out_one_hot = utils.PlaceHolder(X=X_s, E=E_s, y=torch.zeros(y_t.shape[0], 0)) + out_discrete = utils.PlaceHolder(X=X_s, E=E_s, y=torch.zeros(y_t.shape[0], 0)) + + return out_one_hot.mask(node_mask).type_as(y_t), out_discrete.mask(node_mask, collapse=True).type_as(y_t) + + def compute_extra_data(self, noisy_data): + """ At every training step (after adding noise) and step in sampling, compute extra information and append to + the network input. """ + + extra_features = self.extra_features(noisy_data) + extra_molecular_features = self.domain_features(noisy_data) + + extra_X = torch.cat((extra_features.X, extra_molecular_features.X), dim=-1) + extra_E = torch.cat((extra_features.E, extra_molecular_features.E), dim=-1) + extra_y = torch.cat((extra_features.y, extra_molecular_features.y), dim=-1) + + t = noisy_data['t'] + extra_y = torch.cat((extra_y, t), dim=1) + + return utils.PlaceHolder(X=extra_X, E=extra_E, y=extra_y) + + def save_cond_samples(self, samples, target, file_path): + cond_results = {'smiles': [], 'input_targets': target} + invalid = 0 + disconnected = 0 + + print("\tConverting conditionally generated molecules to SMILES ...") + for sample in samples: + mol = build_molecule_with_partial_charges(sample[0], sample[1], self.dataset_info.atom_decoder) + smile = mol2smiles(mol) + if smile is not None: + cond_results['smiles'].append(smile) + mol_frags = Chem.rdmolops.GetMolFrags(mol, asMols=True, sanitizeFrags=False) + if len(mol_frags) > 1: + print("Disconnected molecule", mol, mol_frags) + disconnected += 1 + else: + print("Invalid molecule obtained.") + invalid += 1 + + print("Number of invalid molecules", invalid) + print("Number of disconnected molecules", disconnected) + + # save samples + with open(file_path, 'wb') as f: + pickle.dump(cond_results, f) + + return cond_results diff --git a/openfl-tutorials/experimental/DiGress/digress/guidance/qm9_regressor_discrete.py b/openfl-tutorials/experimental/DiGress/digress/guidance/qm9_regressor_discrete.py new file mode 100644 index 0000000000..30af259709 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/guidance/qm9_regressor_discrete.py @@ -0,0 +1,306 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ +import torch +import torch.nn as nn +import torch.nn.functional as F +import pytorch_lightning as pl +import time +# import wandb +from torchmetrics import MeanSquaredError, MeanAbsoluteError + +from digress.models.transformer_model import GraphTransformer +from digress.diffusion.noise_schedule import PredefinedNoiseScheduleDiscrete, MarginalUniformTransition +from digress.diffusion import diffusion_utils +from digress.metrics.abstract_metrics import NLL, SumExceptBatchKL, SumExceptBatchMetric +from digress.metrics.train_metrics import TrainLossDiscrete +import digress.utils as utils + + +def reset_metrics(metrics): + for metric in metrics: + metric.reset() + + +class Qm9RegressorDiscrete(pl.LightningModule): + def __init__(self, cfg, dataset_infos, train_metrics, sampling_metrics, visualization_tools, extra_features, + domain_features): + super().__init__() + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + input_dims = dataset_infos.input_dims + output_dims = dataset_infos.output_dims + nodes_dist = dataset_infos.nodes_dist + + self.args = cfg + self.name = cfg.general.name + self.model_dtype = torch.float32 + self.num_classes = dataset_infos.num_classes + self.T = cfg.model.diffusion_steps + + self.Xdim = input_dims['X'] + self.Edim = input_dims['E'] + self.ydim = input_dims['y'] + self.Xdim_output = output_dims['X'] + self.Edim_output = output_dims['E'] + self.ydim_output = output_dims['y'] + self.node_dist = nodes_dist + + self.dataset_info = dataset_infos + + self.val_nll = NLL() + self.val_X_kl = SumExceptBatchKL() + self.val_E_kl = SumExceptBatchKL() + self.val_y_kl = SumExceptBatchKL() + self.val_X_logp = SumExceptBatchMetric() + self.val_E_logp = SumExceptBatchMetric() + self.val_y_logp = SumExceptBatchMetric() + + self.test_nll = NLL() + self.test_X_kl = SumExceptBatchKL() + self.test_E_kl = SumExceptBatchKL() + self.test_y_kl = SumExceptBatchKL() + self.test_X_logp = SumExceptBatchMetric() + self.test_E_logp = SumExceptBatchMetric() + self.test_y_logp = SumExceptBatchMetric() + + self.train_metrics = train_metrics + self.sampling_metrics = sampling_metrics + + self.save_hyperparameters(ignore=[train_metrics, sampling_metrics]) + self.visualization_tools = visualization_tools + self.extra_features = extra_features + self.domain_features = domain_features + + self.model = GraphTransformer(n_layers=cfg.model.n_layers, + input_dims=input_dims, + hidden_mlp_dims=cfg.model.hidden_mlp_dims, + hidden_dims=cfg.model.hidden_dims, + output_dims=output_dims, + act_fn_in=nn.ReLU(), + act_fn_out=nn.ReLU()) + + if getattr(self.args.model, 'torch_compile', False): + print("Compiling the model...") + self.model = torch.compile(self.model) + + self.noise_schedule = PredefinedNoiseScheduleDiscrete(cfg.model.diffusion_noise_schedule, + timesteps=cfg.model.diffusion_steps) + + # Marginal transition model + node_types = self.dataset_info.node_types.float() + x_marginals = node_types / torch.sum(node_types) + + edge_types = self.dataset_info.edge_types.float() + e_marginals = edge_types / torch.sum(edge_types) + print(f"Marginal distribution of the classes: {x_marginals} for nodes, {e_marginals} for edges") + self.transition_model = MarginalUniformTransition(x_marginals=x_marginals, e_marginals=e_marginals, + y_classes=self.ydim_output) + + self.limit_dist = utils.PlaceHolder(X=x_marginals, E=e_marginals, + y=torch.ones(self.ydim_output) / self.ydim_output) + + self.save_hyperparameters(ignore=[train_metrics, sampling_metrics]) + + self.start_epoch_time = None + self.train_iterations = None + self.val_iterations = None + # self.log_every_steps = cfg.general.log_every_steps + self.number_chain_steps = cfg.general.number_chain_steps + self.best_val_nll = 1e8 + self.val_counter = 0 + + self.train_loss = MeanSquaredError(squared=True) + self.val_loss = MeanAbsoluteError() + self.test_loss = MeanAbsoluteError() + self.best_val_mae = 1e8 + + self.val_loss_each = [MeanAbsoluteError().to(device) for i in range(2)] + self.test_loss_each = [MeanAbsoluteError().to(device) for i in range(2)] + self.target_dict = {0: "mu", 1: "homo"} + + def training_step(self, data, i): + # input zero y to generate noised graphs + target = data.y.clone() + data.y = torch.zeros(data.y.shape[0], 0).type_as(data.y) + + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask) + X, E = dense_data.X, dense_data.E + noisy_data = self.apply_noise(X, E, data.y, node_mask) + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + + mse = self.compute_train_loss(pred, target, log=False) #, log=i % self.log_every_steps == 0) + self.log_dict({'train loss': mse}) + return {'loss': mse} + + def configure_optimizers(self): + # optimizer = torch.optim.AdamW(self.parameters(), lr=self.args.train.lr, amsgrad=True, weight_decay=1e-12) + # scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99) + # return { + # "optimizer": optimizer, + # "lr_scheduler": { + # "scheduler": scheduler, + # "interval": "epoch", + # "frequency": 1, + # # "monitor": "val_loss", + # }, + # } + return torch.optim.AdamW(self.parameters(), lr=self.args.train.lr, amsgrad=True, weight_decay=1e-12) + + # def on_fit_start(self) -> None: + # self.train_iterations = len(self.trainer.datamodule.train_dataloader()) + # print("Size of the input features", self.Xdim, self.Edim, self.ydim) + + def on_train_epoch_start(self) -> None: + self.start_epoch_time = time.time() + self.train_loss.reset() + self.train_metrics.reset() + + # def on_train_epoch_end(self) -> None: + # train_mse = self.train_loss.compute() + + # to_log = {"train_epoch/mse": train_mse} + # print(f"Epoch {self.current_epoch}: train_mse: {train_mse :.3f} -- {time.time() - self.start_epoch_time:.1f}s ") + + # # wandb.log(to_log) + # # self.train_loss.reset() + + def on_validation_epoch_start(self) -> None: + self.val_loss.reset() + reset_metrics(self.val_loss_each) + + def validation_step(self, data, i): + # input zero y to generate noised graphs + target = data.y.clone() + data.y = torch.zeros(data.y.shape[0], 0).type_as(data.y) + + dense_data, node_mask = utils.to_dense(data.x, data.edge_index, data.edge_attr, data.batch) + dense_data = dense_data.mask(node_mask) + noisy_data = self.apply_noise(dense_data.X, dense_data.E, data.y, node_mask) + extra_data = self.compute_extra_data(noisy_data) + pred = self.forward(noisy_data, extra_data, node_mask) + mae = self.compute_val_loss(pred, target) + # self.log('val_loss', mae, prog_bar=True, on_step=False, on_epoch=True) + return {'val_loss': mae} + + def on_validation_epoch_end(self) -> None: + val_mae = self.val_loss.compute() + # to_log = {"val/epoch_mae": val_mae} + print(f"Epoch {self.current_epoch}: val_mae: {val_mae :.3f}") + # wandb.log(to_log) + # self.log('val/epoch_mae', val_mae, on_epoch=True, on_step=False) + + if val_mae < self.best_val_mae: + self.best_val_mae = val_mae + print('Val loss: %.4f \t Best val loss: %.4f\n' % (val_mae, self.best_val_mae)) + + if self.args.general.guidance_target == 'both': + print('Val loss each target:') + for i in range(2): + mae_each = self.val_loss_each[i].compute() + print(f"Target {self.target_dict[i]}: val_mae: {mae_each :.3f}") + # self.log_dict({f"{self.target_dict[i]}_mae": mae_each}) + # to_log_each = {f"val_epoch/{self.target_dict[i]}_mae": mae_each} + # wandb.log(to_log_each) + # else: + self.log_dict({'val_mae': val_mae}) + + self.val_loss.reset() + reset_metrics(self.val_loss_each) + + def on_test_epoch_start(self) -> None: + self.test_loss.reset() + reset_metrics(self.test_loss_each) + + def apply_noise(self, X, E, y, node_mask): + """ Sample noise and apply it to the data. """ + + # Sample a timestep t. + # When evaluating, the loss for t=0 is computed separately + lowest_t = 0 if self.training else 1 + t_int = torch.randint(lowest_t, self.T + 1, size=(X.size(0), 1), device=X.device).float() # (bs, 1) + s_int = t_int - 1 + + t_float = t_int / self.T + s_float = s_int / self.T + + # beta_t and alpha_s_bar are used for denoising/loss computation + beta_t = self.noise_schedule(t_normalized=t_float) # (bs, 1) + alpha_s_bar = self.noise_schedule.get_alpha_bar(t_normalized=s_float) # (bs, 1) + alpha_t_bar = self.noise_schedule.get_alpha_bar(t_normalized=t_float) # (bs, 1) + + Qtb = self.transition_model.get_Qt_bar(alpha_t_bar, device=self.device) # (bs, dx_in, dx_out), (bs, de_in, de_out) + assert (abs(Qtb.X.sum(dim=2) - 1.) < 1e-4).all(), Qtb.X.sum(dim=2) - 1 + assert (abs(Qtb.E.sum(dim=2) - 1.) < 1e-4).all() + + # Compute transition probabilities + probX = X @ Qtb.X # (bs, n, dx_out) + probE = E @ Qtb.E.unsqueeze(1) # (bs, n, n, de_out) + + sampled_t = diffusion_utils.sample_discrete_features(probX=probX, probE=probE, node_mask=node_mask) + + X_t = F.one_hot(sampled_t.X, num_classes=self.Xdim) + E_t = F.one_hot(sampled_t.E, num_classes=self.Edim) + assert (X.shape == X_t.shape) and (E.shape == E_t.shape) + + z_t = utils.PlaceHolder(X=X_t, E=E_t, y=y).type_as(X_t).mask(node_mask) + + noisy_data = {'t_int': t_int, 't': t_float, 'beta_t': beta_t, 'alpha_s_bar': alpha_s_bar, + 'alpha_t_bar': alpha_t_bar, 'X_t': z_t.X, 'E_t': z_t.E, 'y_t': z_t.y, 'node_mask': node_mask} + return noisy_data + + def compute_val_loss(self, pred, target): + """Computes MAE. + pred: (batch_size, n, total_features) + target: (batch_size, total_features) + noisy_data: dict + X, E, y : (bs, n, dx), (bs, n, n, de), (bs, dy) + node_mask : (bs, n) + Output: nll (size 1) + """ + + for i in range(pred.y.shape[1]): + mae_each = self.val_loss_each[i](pred.y[:, i], target[:, i]) + + mae = self.val_loss(pred.y, target) + return mae + + def forward(self, noisy_data, extra_data, node_mask): + X = torch.cat((noisy_data['X_t'], extra_data.X), dim=2).float() + E = torch.cat((noisy_data['E_t'], extra_data.E), dim=3).float() + y = torch.hstack((noisy_data['y_t'], extra_data.y)).float() + return self.model(X, E, y, node_mask) + + def compute_extra_data(self, noisy_data): + """ At every training step (after adding noise) and step in sampling, compute extra information and append to + the network input. """ + + extra_features = self.extra_features(noisy_data) + extra_molecular_features = self.domain_features(noisy_data) + + extra_X = torch.cat((extra_features.X, extra_molecular_features.X), dim=-1) + extra_E = torch.cat((extra_features.E, extra_molecular_features.E), dim=-1) + + t = noisy_data['t'] + + assert extra_X.shape[-1] == 0, 'The regressor model should not be used with extra features' + assert extra_E.shape[-1] == 0, 'The regressor model should not be used with extra features' + return utils.PlaceHolder(X=extra_X, E=extra_E, y=t) + + def compute_train_loss(self, pred, target, log: bool): + """ + pred: (batch_size, n, total_features) + pred_epsX: bs, n, dx + pred_epsy: bs, n, n, dy + pred_eps_z: bs, dz + data: dict + noisy_data: dict + Output: mse (size 1) + """ + mse = self.train_loss(pred.y, target) + + # if log: + # wandb.log({"train_loss/batch_mse": mse.item()}, commit=True) + return mse \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/metrics/__init__.py b/openfl-tutorials/experimental/DiGress/digress/metrics/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/metrics/abstract_metrics.py b/openfl-tutorials/experimental/DiGress/digress/metrics/abstract_metrics.py new file mode 100644 index 0000000000..e7d2b1abb2 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/metrics/abstract_metrics.py @@ -0,0 +1,137 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +from torch import Tensor +from torch.nn import functional as F +from torchmetrics import Metric, MeanSquaredError + + +class TrainAbstractMetricsDiscrete(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, masked_pred_X, masked_pred_E, true_X, true_E, log: bool): + pass + + def reset(self): + pass + + def log_epoch_metrics(self): + return None, None + + +class TrainAbstractMetrics(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, masked_pred_epsX, masked_pred_epsE, pred_y, true_epsX, true_epsE, true_y, log): + pass + + def reset(self): + pass + + def log_epoch_metrics(self): + return None, None + + +class SumExceptBatchMetric(Metric): + def __init__(self): + super().__init__() + self.add_state('total_value', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total_samples', default=torch.tensor(0.), dist_reduce_fx="sum") + + def update(self, values) -> None: + self.total_value += torch.sum(values) + self.total_samples += values.shape[0] + + def compute(self): + return self.total_value / self.total_samples + + +class SumExceptBatchMSE(MeanSquaredError): + def update(self, preds: Tensor, target: Tensor) -> None: + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + assert preds.shape == target.shape + sum_squared_error, n_obs = self._mean_squared_error_update(preds, target) + + self.sum_squared_error += sum_squared_error + self.total += n_obs + + def _mean_squared_error_update(self, preds: Tensor, target: Tensor): + """ Updates and returns variables required to compute Mean Squared Error. Checks for same shape of input + tensors. + preds: Predicted tensor + target: Ground truth tensor + """ + diff = preds - target + sum_squared_error = torch.sum(diff * diff) + n_obs = preds.shape[0] + return sum_squared_error, n_obs + + +class SumExceptBatchKL(Metric): + def __init__(self): + super().__init__() + self.add_state('total_value', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total_samples', default=torch.tensor(0.), dist_reduce_fx="sum") + + def update(self, p, q) -> None: + self.total_value += F.kl_div(q, p, reduction='sum') + self.total_samples += p.size(0) + + def compute(self): + return self.total_value / self.total_samples + + +class CrossEntropyMetric(Metric): + def __init__(self): + super().__init__() + self.add_state('total_ce', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total_samples', default=torch.tensor(0.), dist_reduce_fx="sum") + + def update(self, preds: Tensor, target: Tensor) -> None: + """ Update state with predictions and targets. + preds: Predictions from model (bs * n, d) or (bs * n * n, d) + target: Ground truth values (bs * n, d) or (bs * n * n, d). """ + target = torch.argmax(target, dim=-1) + output = F.cross_entropy(preds, target, reduction='sum') + self.total_ce += output + self.total_samples += preds.size(0) + + def compute(self): + return self.total_ce / self.total_samples + + +class ProbabilityMetric(Metric): + def __init__(self): + """ This metric is used to track the marginal predicted probability of a class during training. """ + super().__init__() + self.add_state('prob', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total', default=torch.tensor(0.), dist_reduce_fx="sum") + + def update(self, preds: Tensor) -> None: + self.prob += preds.sum() + self.total += preds.numel() + + def compute(self): + return self.prob / self.total + + +class NLL(Metric): + def __init__(self): + super().__init__() + self.add_state('total_nll', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total_samples', default=torch.tensor(0.), dist_reduce_fx="sum") + + def update(self, batch_nll) -> None: + self.total_nll += torch.sum(batch_nll) + self.total_samples += batch_nll.numel() + + def compute(self): + return self.total_nll / self.total_samples \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics.py b/openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics.py new file mode 100644 index 0000000000..5a2ac7d522 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics.py @@ -0,0 +1,407 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +from rdkit import Chem +from torchmetrics import MeanSquaredError, MeanAbsoluteError + +### packages for visualization +from digress.analysis.rdkit_functions import compute_molecular_metrics +import torch +from torchmetrics import Metric, MetricCollection +from torch import Tensor +# import wandb +import torch.nn as nn +import os + + +class TrainMolecularMetrics(nn.Module): + def __init__(self, remove_h): + super().__init__() + self.train_atom_metrics = AtomMetrics(remove_h) + self.train_bond_metrics = BondMetrics() + + def forward(self, masked_pred_epsX, masked_pred_epsE, pred_y, true_epsX, true_epsE, true_y, log: bool): + self.train_atom_metrics(masked_pred_epsX, true_epsX) + self.train_bond_metrics(masked_pred_epsE, true_epsE) + if log: + to_log = {} + for key, val in self.train_atom_metrics.compute().items(): + to_log['train/' + key] = val.item() + for key, val in self.train_bond_metrics.compute().items(): + to_log['train/' + key] = val.item() + # if wandb.run: + # wandb.log(to_log, commit=False) + + def reset(self): + for metric in [self.train_atom_metrics, self.train_bond_metrics]: + metric.reset() + + def log_epoch_metrics(self): + epoch_atom_metrics = self.train_atom_metrics.compute() + epoch_bond_metrics = self.train_bond_metrics.compute() + + to_log = {} + for key, val in epoch_atom_metrics.items(): + to_log['train_epoch/epoch' + key] = val.item() + for key, val in epoch_bond_metrics.items(): + to_log['train_epoch/epoch' + key] = val.item() + + # if wandb.run: + # wandb.log(to_log, commit=False) + + for key, val in epoch_atom_metrics.items(): + epoch_atom_metrics[key] = f"{val.item() :.3f}" + for key, val in epoch_bond_metrics.items(): + epoch_bond_metrics[key] = f"{val.item() :.3f}" + + return epoch_atom_metrics, epoch_bond_metrics + + + +class SamplingMolecularMetrics(nn.Module): + def __init__(self, dataset_infos, train_smiles): + super().__init__() + di = dataset_infos + self.generated_n_dist = GeneratedNDistribution(di.max_n_nodes) + self.generated_node_dist = GeneratedNodesDistribution(di.output_dims['X']) + self.generated_edge_dist = GeneratedEdgesDistribution(di.output_dims['E']) + self.generated_valency_dist = ValencyDistribution(di.max_n_nodes) + + n_target_dist = di.n_nodes.type_as(self.generated_n_dist.n_dist) + n_target_dist = n_target_dist / torch.sum(n_target_dist) + self.register_buffer('n_target_dist', n_target_dist) + + node_target_dist = di.node_types.type_as(self.generated_node_dist.node_dist) + node_target_dist = node_target_dist / torch.sum(node_target_dist) + self.register_buffer('node_target_dist', node_target_dist) + + edge_target_dist = di.edge_types.type_as(self.generated_edge_dist.edge_dist) + edge_target_dist = edge_target_dist / torch.sum(edge_target_dist) + self.register_buffer('edge_target_dist', edge_target_dist) + + valency_target_dist = di.valency_distribution.type_as(self.generated_valency_dist.edgepernode_dist) + valency_target_dist = valency_target_dist / torch.sum(valency_target_dist) + self.register_buffer('valency_target_dist', valency_target_dist) + + self.n_dist_mae = HistogramsMAE(n_target_dist) + self.node_dist_mae = HistogramsMAE(node_target_dist) + self.edge_dist_mae = HistogramsMAE(edge_target_dist) + self.valency_dist_mae = HistogramsMAE(valency_target_dist) + + self.train_smiles = train_smiles + self.dataset_info = di + + self.rdkit_metrics = [] # validity, relaxed_validity, uniqueness, novelty + + def forward(self, molecules: list, name, current_epoch, val_counter, local_rank, cfg, test=False): + stability, rdkit_metrics, all_smiles = compute_molecular_metrics(molecules, self.train_smiles, self.dataset_info) + + self.rdkit_metrics=rdkit_metrics + + if test and local_rank == 0: + with open(r'final_smiles.txt', 'w') as fp: + for smiles in all_smiles: + # write each item on a new line + fp.write("%s\n" % smiles) + print('All smiles saved') + + print("Starting custom metrics") + self.generated_n_dist(molecules) + generated_n_dist = self.generated_n_dist.compute() + self.n_dist_mae(generated_n_dist) + + self.generated_node_dist(molecules) + generated_node_dist = self.generated_node_dist.compute() + self.node_dist_mae(generated_node_dist) + + self.generated_edge_dist(molecules) + generated_edge_dist = self.generated_edge_dist.compute() + self.edge_dist_mae(generated_edge_dist) + + self.generated_valency_dist(molecules) + generated_valency_dist = self.generated_valency_dist.compute() + self.valency_dist_mae(generated_valency_dist) + + to_log = {} + for i, atom_type in enumerate(self.dataset_info.atom_decoder): + generated_probability = generated_node_dist[i] + target_probability = self.node_target_dist[i] + to_log[f'molecular_metrics/{atom_type}_dist'] = (generated_probability - target_probability).item() + + for j, bond_type in enumerate(['No bond', 'Single', 'Double', 'Triple', 'Aromatic']): + generated_probability = generated_edge_dist[j] + target_probability = self.edge_target_dist[j] + to_log[f'molecular_metrics/bond_{bond_type}_dist'] = (generated_probability - target_probability).item() + + for valency in range(6): + generated_probability = generated_valency_dist[valency] + target_probability = self.valency_target_dist[valency] + to_log[f'molecular_metrics/valency_{valency}_dist'] = (generated_probability - target_probability).item() + + n_mae = self.n_dist_mae.compute() + node_mae = self.node_dist_mae.compute() + edge_mae = self.edge_dist_mae.compute() + valency_mae = self.valency_dist_mae.compute() + + # if wandb.run: + # wandb.log(to_log, commit=False) + # wandb.run.summary['Gen n distribution'] = generated_n_dist + # wandb.run.summary['Gen node distribution'] = generated_node_dist + # wandb.run.summary['Gen edge distribution'] = generated_edge_dist + # wandb.run.summary['Gen valency distribution'] = generated_valency_dist + + # wandb.log({'basic_metrics/n_mae': n_mae, + # 'basic_metrics/node_mae': node_mae, + # 'basic_metrics/edge_mae': edge_mae, + # 'basic_metrics/valency_mae': valency_mae}, commit=False) + + if local_rank == 0: + print("Custom metrics computed.") + os.makedirs(os.path.join(cfg.dataset.datadir,'graphs',name), exist_ok = True) + valid_unique_molecules = rdkit_metrics[1] + textfile = open(f'{cfg.dataset.datadir}/graphs/{name}/valid_unique_molecules_e{current_epoch}_b{val_counter}.txt', "w") + textfile.writelines(valid_unique_molecules) + textfile.close() + # print("Stability metrics:", stability, "--", rdkit_metrics[0]) + + def reset(self): + for metric in [self.n_dist_mae, self.node_dist_mae, self.edge_dist_mae, self.valency_dist_mae]: + metric.reset() + + +class GeneratedNDistribution(Metric): + full_state_update = False + def __init__(self, max_n): + super().__init__() + self.add_state('n_dist', default=torch.zeros(max_n + 1, dtype=torch.float), dist_reduce_fx="sum") + + def update(self, molecules): + for molecule in molecules: + atom_types, _ = molecule + n = atom_types.shape[0] + self.n_dist[n] += 1 + + def compute(self): + return self.n_dist / torch.sum(self.n_dist) + + +class GeneratedNodesDistribution(Metric): + full_state_update = False + def __init__(self, num_atom_types): + super().__init__() + self.add_state('node_dist', default=torch.zeros(num_atom_types, dtype=torch.float), dist_reduce_fx="sum") + + def update(self, molecules): + for molecule in molecules: + atom_types, _ = molecule + + for atom_type in atom_types: + assert int(atom_type) != -1, "Mask error, the molecules should already be masked at the right shape" + self.node_dist[int(atom_type)] += 1 + + def compute(self): + return self.node_dist / torch.sum(self.node_dist) + + +class GeneratedEdgesDistribution(Metric): + full_state_update = False + def __init__(self, num_edge_types): + super().__init__() + self.add_state('edge_dist', default=torch.zeros(num_edge_types, dtype=torch.float), dist_reduce_fx="sum") + + def update(self, molecules): + for molecule in molecules: + _, edge_types = molecule + mask = torch.ones_like(edge_types) + mask = torch.triu(mask, diagonal=1).bool() + edge_types = edge_types[mask] + unique_edge_types, counts = torch.unique(edge_types, return_counts=True) + for type, count in zip(unique_edge_types, counts): + self.edge_dist[type] += count + + def compute(self): + return self.edge_dist / torch.sum(self.edge_dist) + + +class MeanNumberEdge(Metric): + full_state_update = False + def __init__(self): + super().__init__() + self.add_state('total_edge', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total_samples', default=torch.tensor(0.), dist_reduce_fx="sum") + + def update(self, molecules, weight=1.0) -> None: + for molecule in molecules: + _, edge_types = molecule + triu_edge_types = torch.triu(edge_types, diagonal=1) + bonds = torch.nonzero(triu_edge_types) + self.total_edge += len(bonds) + self.total_samples += len(molecules) + + def compute(self): + return self.total_edge / self.total_samples + + +class ValencyDistribution(Metric): + full_state_update = False + def __init__(self, max_n): + super().__init__() + self.add_state('edgepernode_dist', default=torch.zeros(3 * max_n - 2, dtype=torch.float), dist_reduce_fx="sum") + + def update(self, molecules) -> None: + for molecule in molecules: + _, edge_types = molecule + edge_types[edge_types == 4] = 1.5 + valencies = torch.sum(edge_types, dim=0) + unique, counts = torch.unique(valencies, return_counts=True) + for valency, count in zip(unique, counts): + self.edgepernode_dist[valency] += count + + def compute(self): + return self.edgepernode_dist / torch.sum(self.edgepernode_dist) + + +class HistogramsMAE(MeanAbsoluteError): + def __init__(self, target_histogram, **kwargs): + """ Compute the distance between histograms. """ + super().__init__(**kwargs) + assert (target_histogram.sum() - 1).abs() < 1e-3 + self.target_histogram = target_histogram + + def update(self, pred): + pred = pred / pred.sum() + self.target_histogram = self.target_histogram.type_as(pred) + super().update(pred, self.target_histogram) + + +class MSEPerClass(MeanSquaredError): + full_state_update = False + def __init__(self, class_id): + super().__init__() + self.class_id = class_id + + def update(self, preds: Tensor, target: Tensor) -> None: + """Update state with predictions and targets. + + Args: + preds: Predictions from model + target: Ground truth values + """ + preds = preds[..., self.class_id] + target = target[..., self.class_id] + super().update(preds, target) + + +class HydroMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class CarbonMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class NitroMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class OxyMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class FluorMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class BoronMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class BrMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class ClMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class IodineMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class PhosphorusMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class SulfurMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class SeMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + +class SiMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + + + +# Bonds MSE + +class NoBondMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + + +class SingleMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + + +class DoubleMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + + +class TripleMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + + +class AromaticMSE(MSEPerClass): + def __init__(self, i): + super().__init__(i) + + +class AtomMetrics(MetricCollection): + def __init__(self, dataset_infos): + remove_h = dataset_infos.remove_h + self.atom_decoder = dataset_infos.atom_decoder + num_atom_types = len(self.atom_decoder) + + types = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4, 'B': 5, 'Br': 6, + 'Cl': 7, 'I': 8, 'P': 9, 'S': 10, 'Se': 11, 'Si': 12} + + class_dict = {'H': HydroMSE, 'C': CarbonMSE, 'N': NitroMSE, 'O': OxyMSE, 'F': FluorMSE, 'B': BoronMSE, + 'Br': BrMSE, 'Cl': ClMSE, 'I': IodineMSE, 'P': PhosphorusMSE, 'S': SulfurMSE, 'Se': SeMSE, + 'Si': SiMSE} + + metrics_list = [] + for i, atom_type in enumerate(self.atom_decoder): + metrics_list.append(class_dict[atom_type](i)) + + super().__init__(metrics_list) + + +class BondMetrics(MetricCollection): + def __init__(self): + mse_no_bond = NoBondMSE(0) + mse_SI = SingleMSE(1) + mse_DO = DoubleMSE(2) + mse_TR = TripleMSE(3) + mse_AR = AromaticMSE(4) + super().__init__([mse_no_bond, mse_SI, mse_DO, mse_TR, mse_AR]) + + +if __name__ == '__main__': + from torchmetrics.utilities import check_forward_full_state_property diff --git a/openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics_discrete.py b/openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics_discrete.py new file mode 100644 index 0000000000..9cbe73b6a5 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/metrics/molecular_metrics_discrete.py @@ -0,0 +1,198 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +from torchmetrics import Metric, MetricCollection +from torch import Tensor +# import wandb +import torch.nn as nn + + +class CEPerClass(Metric): + full_state_update = False + def __init__(self, class_id): + super().__init__() + self.class_id = class_id + self.add_state('total_ce', default=torch.tensor(0.), dist_reduce_fx="sum") + self.add_state('total_samples', default=torch.tensor(0.), dist_reduce_fx="sum") + self.softmax = torch.nn.Softmax(dim=-1) + self.binary_cross_entropy = torch.nn.BCELoss(reduction='sum') + + def update(self, preds: Tensor, target: Tensor) -> None: + """Update state with predictions and targets. + Args: + preds: Predictions from model (bs, n, d) or (bs, n, n, d) + target: Ground truth values (bs, n, d) or (bs, n, n, d) + """ + target = target.reshape(-1, target.shape[-1]) + mask = (target != 0.).any(dim=-1) + + prob = self.softmax(preds)[..., self.class_id] + prob = prob.flatten()[mask] + + target = target[:, self.class_id] + target = target[mask] + + output = self.binary_cross_entropy(prob, target) + self.total_ce += output + self.total_samples += prob.numel() + + def compute(self): + return self.total_ce / self.total_samples + + +class HydrogenCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class CarbonCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class NitroCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class OxyCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class FluorCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class BoronCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class BrCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class ClCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class IodineCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class PhosphorusCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class SulfurCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class SeCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class SiCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class NoBondCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class SingleCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class DoubleCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class TripleCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class AromaticCE(CEPerClass): + def __init__(self, i): + super().__init__(i) + + +class AtomMetricsCE(MetricCollection): + def __init__(self, dataset_infos): + atom_decoder = dataset_infos.atom_decoder + + class_dict = {'H': HydrogenCE, 'C': CarbonCE, 'N': NitroCE, 'O': OxyCE, 'F': FluorCE, 'B': BoronCE, + 'Br': BrCE, 'Cl': ClCE, 'I': IodineCE, 'P': PhosphorusCE, 'S': SulfurCE, 'Se': SeCE, + 'Si': SiCE} + + metrics_list = [] + for i, atom_type in enumerate(atom_decoder): + metrics_list.append(class_dict[atom_type](i)) + super().__init__(metrics_list) + + +class BondMetricsCE(MetricCollection): + def __init__(self): + ce_no_bond = NoBondCE(0) + ce_SI = SingleCE(1) + ce_DO = DoubleCE(2) + ce_TR = TripleCE(3) + ce_AR = AromaticCE(4) + super().__init__([ce_no_bond, ce_SI, ce_DO, ce_TR, ce_AR]) + + +class TrainMolecularMetricsDiscrete(nn.Module): + def __init__(self, dataset_infos): + super().__init__() + self.train_atom_metrics = AtomMetricsCE(dataset_infos=dataset_infos) + self.train_bond_metrics = BondMetricsCE() + + def forward(self, masked_pred_X, masked_pred_E, true_X, true_E, log: bool): + self.train_atom_metrics(masked_pred_X, true_X) + self.train_bond_metrics(masked_pred_E, true_E) + if log: + to_log = {} + for key, val in self.train_atom_metrics.compute().items(): + to_log['train/' + key] = val.item() + for key, val in self.train_bond_metrics.compute().items(): + to_log['train/' + key] = val.item() + # if wandb.run: + # wandb.log(to_log, commit=False) + + def reset(self): + for metric in [self.train_atom_metrics, self.train_bond_metrics]: + metric.reset() + + def log_epoch_metrics(self): + epoch_atom_metrics = self.train_atom_metrics.compute() + epoch_bond_metrics = self.train_bond_metrics.compute() + + to_log = {} + for key, val in epoch_atom_metrics.items(): + to_log['train_epoch/' + key] = val.item() + for key, val in epoch_bond_metrics.items(): + to_log['train_epoch/' + key] = val.item() + # if wandb.run: + # wandb.log(to_log, commit=False) + + for key, val in epoch_atom_metrics.items(): + epoch_atom_metrics[key] = val.item() + for key, val in epoch_bond_metrics.items(): + epoch_bond_metrics[key] = val.item() + + return epoch_atom_metrics, epoch_bond_metrics + diff --git a/openfl-tutorials/experimental/DiGress/digress/metrics/train_metrics.py b/openfl-tutorials/experimental/DiGress/digress/metrics/train_metrics.py new file mode 100644 index 0000000000..9076d0d020 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/metrics/train_metrics.py @@ -0,0 +1,129 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +from torch import Tensor +import torch.nn as nn +from torchmetrics import Metric, MeanSquaredError, MetricCollection +import time +# import wandb +from digress.metrics.abstract_metrics import SumExceptBatchMetric, SumExceptBatchMSE, SumExceptBatchKL, \ + CrossEntropyMetric, ProbabilityMetric, NLL + + +class NodeMSE(MeanSquaredError): + def __init__(self, *args): + super().__init__(*args) + + +class EdgeMSE(MeanSquaredError): + def __init__(self, *args): + super().__init__(*args) + + +class TrainLoss(nn.Module): + def __init__(self): + super(TrainLoss, self).__init__() + self.train_node_mse = NodeMSE() + self.train_edge_mse = EdgeMSE() + self.train_y_mse = MeanSquaredError() + + def forward(self, masked_pred_epsX, masked_pred_epsE, pred_y, true_epsX, true_epsE, true_y, log: bool): + mse_X = self.train_node_mse(masked_pred_epsX, true_epsX) if true_epsX.numel() > 0 else 0.0 + mse_E = self.train_edge_mse(masked_pred_epsE, true_epsE) if true_epsE.numel() > 0 else 0.0 + mse_y = self.train_y_mse(pred_y, true_y) if true_y.numel() > 0 else 0.0 + mse = mse_X + mse_E + mse_y + + if log: + to_log = {'train_loss/batch_mse': mse.detach(), + 'train_loss/node_MSE': self.train_node_mse.compute(), + 'train_loss/edge_MSE': self.train_edge_mse.compute(), + 'train_loss/y_mse': self.train_y_mse.compute()} + # if wandb.run: + # wandb.log(to_log, commit=True) + + return mse + + def reset(self): + for metric in (self.train_node_mse, self.train_edge_mse, self.train_y_mse): + metric.reset() + + def log_epoch_metrics(self): + epoch_node_mse = self.train_node_mse.compute() if self.train_node_mse.total > 0 else -1 + epoch_edge_mse = self.train_edge_mse.compute() if self.train_edge_mse.total > 0 else -1 + epoch_y_mse = self.train_y_mse.compute() if self.train_y_mse.total > 0 else -1 + + to_log = {"train_epoch/epoch_X_mse": epoch_node_mse, + "train_epoch/epoch_E_mse": epoch_edge_mse, + "train_epoch/epoch_y_mse": epoch_y_mse} + # if wandb.run: + # wandb.log(to_log) + return to_log + + + +class TrainLossDiscrete(nn.Module): + """ Train with Cross entropy""" + def __init__(self, lambda_train): + super().__init__() + self.node_loss = CrossEntropyMetric() + self.edge_loss = CrossEntropyMetric() + self.y_loss = CrossEntropyMetric() + self.lambda_train = lambda_train + + def forward(self, masked_pred_X, masked_pred_E, pred_y, true_X, true_E, true_y, log: bool): + """ Compute train metrics + masked_pred_X : tensor -- (bs, n, dx) + masked_pred_E : tensor -- (bs, n, n, de) + pred_y : tensor -- (bs, ) + true_X : tensor -- (bs, n, dx) + true_E : tensor -- (bs, n, n, de) + true_y : tensor -- (bs, ) + log : boolean. """ + true_X = torch.reshape(true_X, (-1, true_X.size(-1))) # (bs * n, dx) + true_E = torch.reshape(true_E, (-1, true_E.size(-1))) # (bs * n * n, de) + masked_pred_X = torch.reshape(masked_pred_X, (-1, masked_pred_X.size(-1))) # (bs * n, dx) + masked_pred_E = torch.reshape(masked_pred_E, (-1, masked_pred_E.size(-1))) # (bs * n * n, de) + + # Remove masked rows + mask_X = (true_X != 0.).any(dim=-1) + mask_E = (true_E != 0.).any(dim=-1) + + flat_true_X = true_X[mask_X, :] + flat_pred_X = masked_pred_X[mask_X, :] + + flat_true_E = true_E[mask_E, :] + flat_pred_E = masked_pred_E[mask_E, :] + + loss_X = self.node_loss(flat_pred_X, flat_true_X) if true_X.numel() > 0 else 0.0 + loss_E = self.edge_loss(flat_pred_E, flat_true_E) if true_E.numel() > 0 else 0.0 + loss_y = self.y_loss(pred_y, true_y) if true_y.numel() > 0 else 0.0 + + if log: + to_log = {"train_loss/batch_CE": (loss_X + loss_E + loss_y).detach(), + "train_loss/X_CE": self.node_loss.compute() if true_X.numel() > 0 else -1, + "train_loss/E_CE": self.edge_loss.compute() if true_E.numel() > 0 else -1, + "train_loss/y_CE": self.y_loss.compute() if true_y.numel() > 0 else -1} + # if wandb.run: + # wandb.log(to_log, commit=True) + return loss_X + self.lambda_train[0] * loss_E + self.lambda_train[1] * loss_y + + def reset(self): + for metric in [self.node_loss, self.edge_loss, self.y_loss]: + metric.reset() + + def log_epoch_metrics(self): + epoch_node_loss = self.node_loss.compute() if self.node_loss.total_samples > 0 else -1 + epoch_edge_loss = self.edge_loss.compute() if self.edge_loss.total_samples > 0 else -1 + epoch_y_loss = self.train_y_loss.compute() if self.y_loss.total_samples > 0 else -1 + + to_log = {"train_epoch/x_CE": epoch_node_loss, + "train_epoch/E_CE": epoch_edge_loss, + "train_epoch/y_CE": epoch_y_loss} + # if wandb.run: + # wandb.log(to_log, commit=False) + + return to_log + + + diff --git a/openfl-tutorials/experimental/DiGress/digress/models/__init__.py b/openfl-tutorials/experimental/DiGress/digress/models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/openfl-tutorials/experimental/DiGress/digress/models/layers.py b/openfl-tutorials/experimental/DiGress/digress/models/layers.py new file mode 100644 index 0000000000..b41b057577 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/models/layers.py @@ -0,0 +1,49 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import torch +import torch.nn as nn + + +class Xtoy(nn.Module): + def __init__(self, dx, dy): + """ Map node features to global features """ + super().__init__() + self.lin = nn.Linear(4 * dx, dy) + + def forward(self, X): + """ X: bs, n, dx. """ + m = X.mean(dim=1) + mi = X.min(dim=1)[0] + ma = X.max(dim=1)[0] + std = X.std(dim=1) + z = torch.hstack((m, mi, ma, std)) + out = self.lin(z) + return out + + +class Etoy(nn.Module): + def __init__(self, d, dy): + """ Map edge features to global features. """ + super().__init__() + self.lin = nn.Linear(4 * d, dy) + + def forward(self, E): + """ E: bs, n, n, de + Features relative to the diagonal of E could potentially be added. + """ + m = E.mean(dim=(1, 2)) + mi = E.min(dim=2)[0].min(dim=1)[0] + ma = E.max(dim=2)[0].max(dim=1)[0] + std = torch.std(E, dim=(1, 2)) + z = torch.hstack((m, mi, ma, std)) + out = self.lin(z) + return out + + +def masked_softmax(x, mask, **kwargs): + if mask.sum() == 0: + return x + x_masked = x.clone() + x_masked[mask == 0] = -float("inf") + return torch.softmax(x_masked, **kwargs) \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/digress/models/transformer_model.py b/openfl-tutorials/experimental/DiGress/digress/models/transformer_model.py new file mode 100644 index 0000000000..337df8451f --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/models/transformer_model.py @@ -0,0 +1,287 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import math + +import torch +import torch.nn as nn +from torch.nn.modules.dropout import Dropout +from torch.nn.modules.linear import Linear +from torch.nn.modules.normalization import LayerNorm +from torch.nn import functional as F +from torch import Tensor + +from digress import utils +from digress.diffusion import diffusion_utils +from digress.models.layers import Xtoy, Etoy, masked_softmax + + +class XEyTransformerLayer(nn.Module): + """ Transformer that updates node, edge and global features + d_x: node features + d_e: edge features + dz : global features + n_head: the number of heads in the multi_head_attention + dim_feedforward: the dimension of the feedforward network model after self-attention + dropout: dropout probablility. 0 to disable + layer_norm_eps: eps value in layer normalizations. + """ + def __init__(self, dx: int, de: int, dy: int, n_head: int, dim_ffX: int = 2048, + dim_ffE: int = 128, dim_ffy: int = 2048, dropout: float = 0.1, + layer_norm_eps: float = 1e-5, device=None, dtype=None) -> None: + kw = {'device': device, 'dtype': dtype} + super().__init__() + + self.self_attn = NodeEdgeBlock(dx, de, dy, n_head, **kw) + + self.linX1 = Linear(dx, dim_ffX, **kw) + self.linX2 = Linear(dim_ffX, dx, **kw) + self.normX1 = LayerNorm(dx, eps=layer_norm_eps, **kw) + self.normX2 = LayerNorm(dx, eps=layer_norm_eps, **kw) + self.dropoutX1 = Dropout(dropout) + self.dropoutX2 = Dropout(dropout) + self.dropoutX3 = Dropout(dropout) + + self.linE1 = Linear(de, dim_ffE, **kw) + self.linE2 = Linear(dim_ffE, de, **kw) + self.normE1 = LayerNorm(de, eps=layer_norm_eps, **kw) + self.normE2 = LayerNorm(de, eps=layer_norm_eps, **kw) + self.dropoutE1 = Dropout(dropout) + self.dropoutE2 = Dropout(dropout) + self.dropoutE3 = Dropout(dropout) + + self.lin_y1 = Linear(dy, dim_ffy, **kw) + self.lin_y2 = Linear(dim_ffy, dy, **kw) + self.norm_y1 = LayerNorm(dy, eps=layer_norm_eps, **kw) + self.norm_y2 = LayerNorm(dy, eps=layer_norm_eps, **kw) + self.dropout_y1 = Dropout(dropout) + self.dropout_y2 = Dropout(dropout) + self.dropout_y3 = Dropout(dropout) + + self.activation = F.relu + + def forward(self, X: Tensor, E: Tensor, y, node_mask: Tensor): + """ Pass the input through the encoder layer. + X: (bs, n, d) + E: (bs, n, n, d) + y: (bs, dy) + node_mask: (bs, n) Mask for the src keys per batch (optional) + Output: newX, newE, new_y with the same shape. + """ + + newX, newE, new_y = self.self_attn(X, E, y, node_mask=node_mask) + + newX_d = self.dropoutX1(newX) + X = self.normX1(X + newX_d) + + newE_d = self.dropoutE1(newE) + E = self.normE1(E + newE_d) + + new_y_d = self.dropout_y1(new_y) + y = self.norm_y1(y + new_y_d) + + ff_outputX = self.linX2(self.dropoutX2(self.activation(self.linX1(X)))) + ff_outputX = self.dropoutX3(ff_outputX) + X = self.normX2(X + ff_outputX) + + ff_outputE = self.linE2(self.dropoutE2(self.activation(self.linE1(E)))) + ff_outputE = self.dropoutE3(ff_outputE) + E = self.normE2(E + ff_outputE) + + ff_output_y = self.lin_y2(self.dropout_y2(self.activation(self.lin_y1(y)))) + ff_output_y = self.dropout_y3(ff_output_y) + y = self.norm_y2(y + ff_output_y) + + return X, E, y + + +class NodeEdgeBlock(nn.Module): + """ Self attention layer that also updates the representations on the edges. """ + def __init__(self, dx, de, dy, n_head, **kwargs): + super().__init__() + assert dx % n_head == 0, f"dx: {dx} -- nhead: {n_head}" + self.dx = dx + self.de = de + self.dy = dy + self.df = int(dx / n_head) + self.n_head = n_head + + # Attention + self.q = Linear(dx, dx) + self.k = Linear(dx, dx) + self.v = Linear(dx, dx) + + # FiLM E to X + self.e_add = Linear(de, dx) + self.e_mul = Linear(de, dx) + + # FiLM y to E + self.y_e_mul = Linear(dy, dx) # Warning: here it's dx and not de + self.y_e_add = Linear(dy, dx) + + # FiLM y to X + self.y_x_mul = Linear(dy, dx) + self.y_x_add = Linear(dy, dx) + + # Process y + self.y_y = Linear(dy, dy) + self.x_y = Xtoy(dx, dy) + self.e_y = Etoy(de, dy) + + # Output layers + self.x_out = Linear(dx, dx) + self.e_out = Linear(dx, de) + self.y_out = nn.Sequential(nn.Linear(dy, dy), nn.ReLU(), nn.Linear(dy, dy)) + + def forward(self, X, E, y, node_mask): + """ + :param X: bs, n, d node features + :param E: bs, n, n, d edge features + :param y: bs, dz global features + :param node_mask: bs, n + :return: newX, newE, new_y with the same shape. + """ + bs, n, _ = X.shape + x_mask = node_mask.unsqueeze(-1) # bs, n, 1 + e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1 + e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1 + + # 1. Map X to keys and queries + Q = self.q(X) * x_mask # (bs, n, dx) + K = self.k(X) * x_mask # (bs, n, dx) + diffusion_utils.assert_correctly_masked(Q, x_mask) + # 2. Reshape to (bs, n, n_head, df) with dx = n_head * df + + Q = Q.reshape((Q.size(0), Q.size(1), self.n_head, self.df)) + K = K.reshape((K.size(0), K.size(1), self.n_head, self.df)) + + Q = Q.unsqueeze(2) # (bs, 1, n, n_head, df) + K = K.unsqueeze(1) # (bs, n, 1, n head, df) + + # Compute unnormalized attentions. Y is (bs, n, n, n_head, df) + Y = Q * K + Y = Y / math.sqrt(Y.size(-1)) + diffusion_utils.assert_correctly_masked(Y, (e_mask1 * e_mask2).unsqueeze(-1)) + + E1 = self.e_mul(E) * e_mask1 * e_mask2 # bs, n, n, dx + E1 = E1.reshape((E.size(0), E.size(1), E.size(2), self.n_head, self.df)) + + E2 = self.e_add(E) * e_mask1 * e_mask2 # bs, n, n, dx + E2 = E2.reshape((E.size(0), E.size(1), E.size(2), self.n_head, self.df)) + + # Incorporate edge features to the self attention scores. + Y = Y * (E1 + 1) + E2 # (bs, n, n, n_head, df) + + # Incorporate y to E + newE = Y.flatten(start_dim=3) # bs, n, n, dx + ye1 = self.y_e_add(y).unsqueeze(1).unsqueeze(1) # bs, 1, 1, de + ye2 = self.y_e_mul(y).unsqueeze(1).unsqueeze(1) + newE = ye1 + (ye2 + 1) * newE + + # Output E + newE = self.e_out(newE) * e_mask1 * e_mask2 # bs, n, n, de + diffusion_utils.assert_correctly_masked(newE, e_mask1 * e_mask2) + + # Compute attentions. attn is still (bs, n, n, n_head, df) + softmax_mask = e_mask2.expand(-1, n, -1, self.n_head) # bs, 1, n, 1 + attn = masked_softmax(Y, softmax_mask, dim=2) # bs, n, n, n_head + + V = self.v(X) * x_mask # bs, n, dx + V = V.reshape((V.size(0), V.size(1), self.n_head, self.df)) + V = V.unsqueeze(1) # (bs, 1, n, n_head, df) + + # Compute weighted values + weighted_V = attn * V + weighted_V = weighted_V.sum(dim=2) + + # Send output to input dim + weighted_V = weighted_V.flatten(start_dim=2) # bs, n, dx + + # Incorporate y to X + yx1 = self.y_x_add(y).unsqueeze(1) + yx2 = self.y_x_mul(y).unsqueeze(1) + newX = yx1 + (yx2 + 1) * weighted_V + + # Output X + newX = self.x_out(newX) * x_mask + diffusion_utils.assert_correctly_masked(newX, x_mask) + + # Process y based on X axnd E + y = self.y_y(y) + e_y = self.e_y(E) + x_y = self.x_y(X) + new_y = y + x_y + e_y + new_y = self.y_out(new_y) # bs, dy + + return newX, newE, new_y + + +class GraphTransformer(nn.Module): + """ + n_layers : int -- number of layers + dims : dict -- contains dimensions for each feature type + """ + def __init__(self, n_layers: int, input_dims: dict, hidden_mlp_dims: dict, hidden_dims: dict, + output_dims: dict, act_fn_in: nn.ReLU(), act_fn_out: nn.ReLU()): + super().__init__() + self.n_layers = n_layers + self.out_dim_X = output_dims['X'] + self.out_dim_E = output_dims['E'] + self.out_dim_y = output_dims['y'] + + self.mlp_in_X = nn.Sequential(nn.Linear(input_dims['X'], hidden_mlp_dims['X']), act_fn_in, + nn.Linear(hidden_mlp_dims['X'], hidden_dims['dx']), act_fn_in) + + self.mlp_in_E = nn.Sequential(nn.Linear(input_dims['E'], hidden_mlp_dims['E']), act_fn_in, + nn.Linear(hidden_mlp_dims['E'], hidden_dims['de']), act_fn_in) + + self.mlp_in_y = nn.Sequential(nn.Linear(input_dims['y'], hidden_mlp_dims['y']), act_fn_in, + nn.Linear(hidden_mlp_dims['y'], hidden_dims['dy']), act_fn_in) + + self.tf_layers = nn.ModuleList([XEyTransformerLayer(dx=hidden_dims['dx'], + de=hidden_dims['de'], + dy=hidden_dims['dy'], + n_head=hidden_dims['n_head'], + dim_ffX=hidden_dims['dim_ffX'], + dim_ffE=hidden_dims['dim_ffE']) + for i in range(n_layers)]) + + self.mlp_out_X = nn.Sequential(nn.Linear(hidden_dims['dx'], hidden_mlp_dims['X']), act_fn_out, + nn.Linear(hidden_mlp_dims['X'], output_dims['X'])) + + self.mlp_out_E = nn.Sequential(nn.Linear(hidden_dims['de'], hidden_mlp_dims['E']), act_fn_out, + nn.Linear(hidden_mlp_dims['E'], output_dims['E'])) + + self.mlp_out_y = nn.Sequential(nn.Linear(hidden_dims['dy'], hidden_mlp_dims['y']), act_fn_out, + nn.Linear(hidden_mlp_dims['y'], output_dims['y'])) + + def forward(self, X, E, y, node_mask): + bs, n = X.shape[0], X.shape[1] + + diag_mask = torch.eye(n) + diag_mask = ~diag_mask.type_as(E).bool() + diag_mask = diag_mask.unsqueeze(0).unsqueeze(-1).expand(bs, -1, -1, -1) + + X_to_out = X[..., :self.out_dim_X] + E_to_out = E[..., :self.out_dim_E] + y_to_out = y[..., :self.out_dim_y] + + new_E = self.mlp_in_E(E) + new_E = (new_E + new_E.transpose(1, 2)) / 2 + after_in = utils.PlaceHolder(X=self.mlp_in_X(X), E=new_E, y=self.mlp_in_y(y)).mask(node_mask) + X, E, y = after_in.X, after_in.E, after_in.y + + for layer in self.tf_layers: + X, E, y = layer(X, E, y, node_mask) + + X = self.mlp_out_X(X) + E = self.mlp_out_E(E) + y = self.mlp_out_y(y) + + X = (X + X_to_out) + E = (E + E_to_out) * diag_mask + y = y + y_to_out + + E = 1/2 * (E + torch.transpose(E, 1, 2)) + + return utils.PlaceHolder(X=X, E=E, y=y).mask(node_mask) diff --git a/openfl-tutorials/experimental/DiGress/digress/utils.py b/openfl-tutorials/experimental/DiGress/digress/utils.py new file mode 100644 index 0000000000..1fa296f1fe --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/digress/utils.py @@ -0,0 +1,142 @@ +# Copyright (c) 2012-2022 Clement Vignac, Igor Krawczuk, Antoine Siraudin +# source: https://github.com/cvignac/DiGress/ + +import os +import torch_geometric.utils +from omegaconf import OmegaConf, open_dict +from torch_geometric.utils import to_dense_adj, to_dense_batch +import torch +import omegaconf +# import wandb + + +def create_folders(args): + try: + # os.makedirs('checkpoints') + os.makedirs('graphs') + os.makedirs('chains') + except OSError: + pass + + try: + # os.makedirs('checkpoints/' + args.general.name) + os.makedirs('graphs/' + args.general.name) + os.makedirs('chains/' + args.general.name) + except OSError: + pass + + +def normalize(X, E, y, norm_values, norm_biases, node_mask): + X = (X - norm_biases[0]) / norm_values[0] + E = (E - norm_biases[1]) / norm_values[1] + y = (y - norm_biases[2]) / norm_values[2] + + diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1) + E[diag] = 0 + + return PlaceHolder(X=X, E=E, y=y).mask(node_mask) + + +def unnormalize(X, E, y, norm_values, norm_biases, node_mask, collapse=False): + """ + X : node features + E : edge features + y : global features` + norm_values : [norm value X, norm value E, norm value y] + norm_biases : same order + node_mask + """ + X = (X * norm_values[0] + norm_biases[0]) + E = (E * norm_values[1] + norm_biases[1]) + y = y * norm_values[2] + norm_biases[2] + + return PlaceHolder(X=X, E=E, y=y).mask(node_mask, collapse) + + +def to_dense(x, edge_index, edge_attr, batch): + X, node_mask = to_dense_batch(x=x, batch=batch) + # node_mask = node_mask.float() + edge_index, edge_attr = torch_geometric.utils.remove_self_loops(edge_index, edge_attr) + # TODO: carefully check if setting node_mask as a bool breaks the continuous case + max_num_nodes = X.size(1) + E = to_dense_adj(edge_index=edge_index, batch=batch, edge_attr=edge_attr, max_num_nodes=max_num_nodes) + E = encode_no_edge(E) + + return PlaceHolder(X=X, E=E, y=None), node_mask + + +def encode_no_edge(E): + assert len(E.shape) == 4 + if E.shape[-1] == 0: + return E + no_edge = torch.sum(E, dim=3) == 0 + first_elt = E[:, :, :, 0] + first_elt[no_edge] = 1 + E[:, :, :, 0] = first_elt + diag = torch.eye(E.shape[1], dtype=torch.bool).unsqueeze(0).expand(E.shape[0], -1, -1) + E[diag] = 0 + return E + + +def update_config_with_new_keys(cfg, saved_cfg): + saved_general = saved_cfg.general + saved_train = saved_cfg.train + saved_model = saved_cfg.model + + for key, val in saved_general.items(): + OmegaConf.set_struct(cfg.general, True) + with open_dict(cfg.general): + if key not in cfg.general.keys(): + setattr(cfg.general, key, val) + + OmegaConf.set_struct(cfg.train, True) + with open_dict(cfg.train): + for key, val in saved_train.items(): + if key not in cfg.train.keys(): + setattr(cfg.train, key, val) + + OmegaConf.set_struct(cfg.model, True) + with open_dict(cfg.model): + for key, val in saved_model.items(): + if key not in cfg.model.keys(): + setattr(cfg.model, key, val) + return cfg + + +class PlaceHolder: + def __init__(self, X, E, y): + self.X = X + self.E = E + self.y = y + + def type_as(self, x: torch.Tensor): + """ Changes the device and dtype of X, E, y. """ + self.X = self.X.type_as(x) + self.E = self.E.type_as(x) + self.y = self.y.type_as(x) + return self + + def mask(self, node_mask, collapse=False): + x_mask = node_mask.unsqueeze(-1) # bs, n, 1 + e_mask1 = x_mask.unsqueeze(2) # bs, n, 1, 1 + e_mask2 = x_mask.unsqueeze(1) # bs, 1, n, 1 + + if collapse: + self.X = torch.argmax(self.X, dim=-1) + self.E = torch.argmax(self.E, dim=-1) + + self.X[node_mask == 0] = - 1 + self.E[(e_mask1 * e_mask2).squeeze(-1) == 0] = - 1 + else: + self.X = self.X * x_mask + self.E = self.E * e_mask1 * e_mask2 + assert torch.allclose(self.E, torch.transpose(self.E, 1, 2)) + return self + + +# def setup_wandb(cfg): +# config_dict = omegaconf.OmegaConf.to_container(cfg, resolve=True, throw_on_missing=True) +# kwargs = {'name': cfg.general.name, 'project': f'graph_ddm_{cfg.dataset.name}', 'config': config_dict, +# 'settings': wandb.Settings(_disable_stats=True), 'reinit': True, 'mode': cfg.general.wandb} +# wandb.init(**kwargs) +# wandb.save('*.txt') \ No newline at end of file diff --git a/openfl-tutorials/experimental/DiGress/requirements.txt b/openfl-tutorials/experimental/DiGress/requirements.txt new file mode 100644 index 0000000000..4f7c306a53 --- /dev/null +++ b/openfl-tutorials/experimental/DiGress/requirements.txt @@ -0,0 +1,14 @@ +dill==0.3.6 +imageio==2.34.2 +matplotlib==3.9.0 +metaflow==2.7.15 +networkx==2.8.7 +omegaconf==2.3.0 +pandas==2.2.2 +pytorch_lightning==2.0.4 +ray==2.9.2 +rdkit==2024.3.5 +torch==2.3.1 +torch_geometric==2.3.1 +torchmetrics==0.11.4 +tqdm==4.65.0 \ No newline at end of file From 5e007d90776cdbaa308c971a6849742d44c3d610 Mon Sep 17 00:00:00 2001 From: kta-intel Date: Mon, 14 Oct 2024 22:16:27 +0000 Subject: [PATCH 2/2] load regressor weights when building guidance model Signed-off-by: kta-intel --- .../DiGress/Workflow_Interface_DiGress.ipynb | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb b/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb index e8c1bb8d57..e4d95f5017 100644 --- a/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb +++ b/openfl-tutorials/experimental/DiGress/Workflow_Interface_DiGress.ipynb @@ -217,24 +217,25 @@ " return loss, model.state_dict(), model.configure_optimizers()\n", "\n", "\n", - "def build_guidance(cfg, model_kwargs, model_kwargs_r, diff_model_state_dict=None):\n", + "def build_guidance(cfg, model_kwargs, model_kwargs_r, diff_model_state_dict=None, reg_model_state_dict=None):\n", " model_kwargs_copy = deepcopy(model_kwargs)\n", " model_kwargs_r_copy = deepcopy(model_kwargs_r)\n", "\n", " regressor = Qm9RegressorDiscrete(cfg=cfg, **model_kwargs_r_copy)\n", - " conditional_model = DiscreteDenoisingDiffusionGuidance(cfg=cfg, **model_kwargs_copy)\n", + " regressor.load_state_dict(reg_model_state_dict, strict=False)\n", " \n", + " conditional_model = DiscreteDenoisingDiffusionGuidance(cfg=cfg, **model_kwargs_copy)\n", " conditional_model.load_state_dict(diff_model_state_dict, strict=False)\n", " conditional_model.guidance_model = regressor\n", "\n", " return conditional_model\n", "\n", "\n", - "def run_guidance(model_args, dataloader, model_state_dict=None):\n", + "def run_guidance(model_args, dataloader, diff_model_state_dict=None, reg_model_state_dict=None):\n", "\n", " trainer = Trainer(accelerator='gpu', devices=[0], max_epochs=1, enable_checkpointing=False, logger=False)\n", "\n", - " model = build_guidance(model_args[0], model_args[2], model_args[1] , model_state_dict)\n", + " model = build_guidance(model_args[0], model_args[2], model_args[1] , diff_model_state_dict, reg_model_state_dict)\n", "\n", " trainer.validate(model, dataloader)\n", " \n", @@ -341,7 +342,8 @@ " print(f'Performing aggregated model validation for collaborator {self.input}')\n", " self.agg_validity, self.agg_uniqueness = run_guidance(self.model_args,\n", " self.val_loader,\n", - " model_state_dict=self.model_state_dict_diff)\n", + " diff_model_state_dict=self.model_state_dict_diff,\n", + " reg_model_state_dict=self.model_state_dict)\n", " \n", " self.agg_validation_score_diff, self.model_state_dict_diff, self.optimizer_diff = run_model(self.model_args,\n", " self.val_loader_diff,\n", @@ -380,7 +382,8 @@ " def local_model_validation(self): \n", " self.local_validity, self.local_uniqueness = run_guidance(self.model_args,\n", " self.val_loader,\n", - " model_state_dict=self.model_state_dict_diff)\n", + " diff_model_state_dict=self.model_state_dict_diff,\n", + " reg_model_state_dict=self.model_state_dict)\n", " \n", " self.local_validation_score_diff, self.model_state_dict_diff, self.optimizer_diff = run_model(self.model_args,\n", " self.val_loader_diff,\n", @@ -503,7 +506,7 @@ "\n", "flflow = FederatedFlow(model.state_dict(), model.configure_optimizers(), \\\n", " diff_model.state_dict(), diff_model.configure_optimizers(), \\\n", - " model_args, rounds=3)\n", + " model_args, rounds=1)\n", "flflow.runtime = local_runtime\n", "flflow.run()" ]