-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial version as published in the paper
- Loading branch information
0 parents
commit 4df530a
Showing
64 changed files
with
4,203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
venv/* | ||
data/* | ||
runs/* | ||
runs_*/* | ||
.idea/* | ||
*.gv* | ||
*/__pycache__/* | ||
*.pyc | ||
wandb/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2023 Serhii Kostiuk | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Learnable Extended Activation Function (LEAF) for Deep Neural Networks | ||
|
||
Implementation of the experiment as published in the paper "Learnable Extended | ||
Activation Function for Deep Neural Networks" by | ||
Yevgeniy Bodyanskiy and Serhii Kostiuk. | ||
|
||
## Running experiments | ||
|
||
1. NVIDIA GPU recommended with at least 2 GiB of VRAM. | ||
2. Install the requirements from `requirements.txt`. | ||
3. Set `CUBLAS_WORKSPACE_CONFIG=:4096:8` in the environment variables. | ||
4. Use the root of this repository as the current directory. | ||
5. Add the current directory to `PYTHONPATH` so it can find the modules | ||
|
||
This repository contains a wrapper script that sets all the required | ||
environment variables: [run_experiment.sh](./run_experiment.sh). Use the bash shell to | ||
execute the experiment using the wrapper script: | ||
|
||
Example: | ||
|
||
```shell | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_new_base.py | ||
``` | ||
|
||
## Reproducing the results from the paper | ||
|
||
1. Training LeNet-5 and KerasNet networks with linear units from scratch: | ||
|
||
```shell | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_lus base | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_lus ahaf | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_lus ahaf --dspu4 | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_lus leaf --p24sl | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_lus leaf --p24sl --dspu4 | ||
``` | ||
|
||
2. Training LeNet-5 and KerasNet networks with linear units from scratch: | ||
|
||
```shell | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_bfs base | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_bfs ahaf | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_bfs ahaf --dspu4 | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_bfs leaf --p24sl | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--opt adam --end_ep 100 --acts all_bfs leaf --p24sl --dspu4 | ||
``` | ||
|
||
3. On stability of LEAF-as-ReLU: | ||
|
||
```shell | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--end_ep 100 --acts ReLU --net KerasNet --ds CIFAR-10 \ | ||
--opt adam leaf | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--end_ep 100 --acts ReLU --net KerasNet --ds CIFAR-10 \ | ||
--opt adam leaf --p24sl | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--end_ep 100 --acts ReLU --net KerasNet --ds CIFAR-10 \ | ||
--opt rmsprop ahaf | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--end_ep 100 --acts ReLU --net KerasNet --ds CIFAR-10 \ | ||
--opt rmsprop leaf | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--end_ep 100 --acts ReLU --net KerasNet --ds CIFAR-10 \ | ||
--opt rmsprop leaf --p24sl | ||
``` | ||
|
||
Add the `--wandb` parameter to log the training process to Weights and | ||
Biases. Weights and Biases provides visualization of the parameter values and | ||
the gradient values during training. | ||
|
||
4. On the effect of synaptic weights initialization. Execute all commands below | ||
once per each of the seed values: | ||
|
||
```shell | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--seed 7823 --opt adam --ds CIFAR-10 base | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--seed 7823 --opt adam --ds CIFAR-10 ahaf | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--seed 7823 --opt adam --ds CIFAR-10 ahaf --dspu4 | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--seed 7823 --opt adam --ds CIFAR-10 leaf --p24sl | ||
user@host:~/repo_path$ ./run_experiment.sh experiments/train_individual.py \ | ||
--seed 7823 --opt adam --ds CIFAR-10 leaf --p24sl --dspu4 | ||
``` | ||
|
||
Seed values to evaluate: 42, 100, 128, 1999, 7823. | ||
|
||
## Visualization of experiment results | ||
|
||
Use tools from the [post_experiment](./post_experiment) directory to visualize | ||
training process, create the training result summary tables and visualize the | ||
activation function form for LEAF/AHAF compared to the corresponding base | ||
activations. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .cont import AHAF, LEAF | ||
from .fuzzy import FNeuronAct | ||
from .af_builder import af_build, AfDefinition |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .af_definition import AfDefinition | ||
from .af_build_m import af_build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from typing import Tuple | ||
|
||
from ..fuzzy.f_neuron_act import FNeuronAct | ||
|
||
|
||
def af_build_fuzzy( | ||
af_base: str, af_start: float, af_end: float, n_segments: int, | ||
in_dims: Tuple[int, ...] = (1,) | ||
) -> FNeuronAct: | ||
init_f = FNeuronAct.get_init_f_by_name(af_base) | ||
|
||
return FNeuronAct( | ||
af_start, af_end, n_segments, | ||
init_f=init_f, input_dim=in_dims | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from typing import Union, Optional, Tuple | ||
|
||
from .af_definition import AfDefinition | ||
from ..cont import AHAF, LEAF | ||
from ..fuzzy import FNeuronAct | ||
from .af_build_fuzzy import af_build_fuzzy | ||
from .af_build_traditional import af_build_traditional, AfTraditional | ||
|
||
ActivationFunction = Union[ | ||
AfTraditional, AHAF, LEAF, FNeuronAct | ||
] | ||
|
||
|
||
def af_build( | ||
d: AfDefinition, in_dims: Optional[Tuple[int, ...]] = None | ||
) -> ActivationFunction: | ||
if in_dims is None: | ||
# Has sense only for adaptive activations | ||
in_dims = (1,) | ||
|
||
if d.af_type == AfDefinition.AfType.TRAD: | ||
if d.interval is None: | ||
return af_build_traditional(d.af_base) | ||
else: | ||
return af_build_traditional( | ||
d.af_base, | ||
d.interval.start, | ||
d.interval.end | ||
) | ||
elif d.af_type == AfDefinition.AfType.ADA_AHAF: | ||
return AHAF(size=in_dims, init_as=d.af_base) | ||
elif d.af_type == AfDefinition.AfType.ADA_LEAF: | ||
return LEAF(size=in_dims, init_as=d.af_base) | ||
elif d.af_type == AfDefinition.AfType.ADA_FUZZ: | ||
return af_build_fuzzy( | ||
d.af_base, | ||
d.interval.start, d.interval.end, | ||
d.interval.n_segments, | ||
in_dims | ||
) | ||
else: | ||
raise NotImplementedError("The requested AF type is not supported") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import Callable, Optional | ||
|
||
import torch | ||
import torch.nn | ||
import torch.nn.functional | ||
|
||
from torch import Tensor | ||
|
||
from ..trad import silu_manual | ||
|
||
|
||
AfTraditional = Callable[[Tensor], Tensor] | ||
|
||
|
||
def af_build_traditional( | ||
af_name: str, | ||
min_val: Optional[float] = None, max_val: Optional[float] = None | ||
) -> AfTraditional: | ||
if af_name == "ReLU": | ||
return torch.relu | ||
elif af_name == "SiLU": | ||
# Using a custom SiLU implementation to exactly follow AAF alternatives | ||
return silu_manual | ||
elif af_name == "Tanh": | ||
return torch.tanh | ||
elif af_name == "HardTanh": | ||
if min_val is None or max_val is None: | ||
return torch.nn.Hardtanh() | ||
else: | ||
return torch.nn.Hardtanh(min_val, max_val) | ||
elif af_name == "Sigmoid": | ||
return torch.sigmoid | ||
elif af_name == "HardSigmoid": | ||
return torch.nn.functional.hardsigmoid | ||
else: | ||
raise NotImplementedError( | ||
"The requested traditional activation function is not supported" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from enum import Enum | ||
from typing import Optional, Tuple | ||
|
||
|
||
class AfDefinition: | ||
class AfType(Enum): | ||
TRAD = 0 | ||
ADA_AHAF = 1 | ||
ADA_FUZZ = 2 | ||
ADA_LEAF = 3 | ||
|
||
class AfInterval: | ||
def __init__(self, start: float, end: float, n_segments: int = 0): | ||
self.start = start | ||
self.end = end | ||
self.n_segments = n_segments | ||
|
||
def __init__( | ||
self, af_base: str = "ReLU", af_type: AfType = AfType.TRAD, | ||
af_interval: Optional[AfInterval] = None | ||
): | ||
self.af_base = af_base | ||
self.af_type = af_type | ||
self.interval = af_interval |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .ahaf import AHAF | ||
from .leaf import LEAF |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from typing import Tuple, Any, Sequence | ||
|
||
import torch | ||
from torch.autograd.function import FunctionCtx | ||
|
||
|
||
class _AHAF(torch.autograd.Function): | ||
@staticmethod | ||
def forward(u, beta, gamma) -> Any: | ||
y = (beta * u) * torch.sigmoid(gamma * u) | ||
return y | ||
|
||
@staticmethod | ||
def setup_context(ctx: FunctionCtx, inputs: Sequence[Any], output: Any) -> None: | ||
u, beta, gamma = inputs | ||
ctx.save_for_backward(u, beta, gamma) | ||
|
||
@staticmethod | ||
def backward(ctx: FunctionCtx, grad_output: Any) -> Any: | ||
u, beta, gamma = ctx.saved_tensors | ||
grad_u = grad_beta = grad_gamma = None | ||
|
||
gamma_u = gamma * u | ||
sig_gamma_u = torch.sigmoid(gamma_u) | ||
|
||
if ctx.needs_input_grad[0]: | ||
grad_u = grad_output.mul( | ||
(beta * sig_gamma_u) | ||
* | ||
(1 + u * gamma * (1 - sig_gamma_u)) | ||
) | ||
if ctx.needs_input_grad[1]: | ||
grad_beta = grad_output.mul(u * sig_gamma_u) | ||
if ctx.needs_input_grad[2]: | ||
grad_gamma = grad_output.mul( | ||
(beta * u) | ||
* sig_gamma_u | ||
* torch.sigmoid(-gamma_u) | ||
* u | ||
) | ||
|
||
return grad_u, grad_beta, grad_gamma | ||
|
||
|
||
def _ahaf(u, beta, gamma): | ||
return _AHAF.apply(u, beta, gamma) | ||
|
||
|
||
class AHAF(torch.nn.Module): | ||
def __init__(self, *, size: Tuple[int, ...] = (1,), init_as: str = 'ReLU'): | ||
super(AHAF, self).__init__() | ||
|
||
if init_as == 'ReLU': | ||
self.gamma = torch.nn.Parameter(torch.ones(*size) * (2.0**16)) | ||
self.beta = torch.nn.Parameter(torch.ones(*size)) | ||
elif init_as == 'SiLU': | ||
self.gamma = torch.nn.Parameter(torch.ones(*size)) | ||
self.beta = torch.nn.Parameter(torch.ones(*size)) | ||
elif init_as == 'CUSTOM': | ||
self.gamma = torch.nn.Parameter(torch.ones(*size)*10) | ||
self.beta = torch.nn.Parameter(torch.ones(*size)) | ||
else: | ||
raise ValueError("Invalid initialization mode [{}]".format(init_as)) | ||
|
||
@staticmethod | ||
def _get_sample_value(t: torch.Tensor) -> float: | ||
size = t.size() | ||
|
||
for _ in size: | ||
t = t[0] | ||
|
||
return t.item() | ||
|
||
def forward(self, inputs): | ||
return _ahaf(inputs, self.beta, self.gamma) | ||
|
||
def __repr__(self): | ||
return "AHAF(size={},gamma={}, beta={})".format( | ||
tuple(self.gamma.size()), | ||
self._get_sample_value(self.gamma), | ||
self._get_sample_value(self.beta) | ||
) |
Oops, something went wrong.