Skip to content

Commit

Permalink
Feature/setup experiments (#12)
Browse files Browse the repository at this point in the history
* Add simple reporting

* Fix formatting

* Add synthetic data experiment config

* Add classification support

* Fix configs

* Fix nonfailing check when pytest fails

* Revert changes

* Add experiments scripts

* Fix classification support

* Add early stopping to config

* Final preprations

* Minor config adjustments

* Minor fixes

* Hotfix network

* Fixes

* Fixes

* Fix classification

* refactor logger interface;
adjust experiments and tests to new interface;
minor fixes, remove obsolate code

* Fix new_classes experiment

* Delete results/01_test_data.pkl

---------

Co-authored-by: DawidPludowski <[email protected]>
Co-authored-by: DawidPludowski <[email protected]>
  • Loading branch information
3 people authored Oct 26, 2023
1 parent 02d6af4 commit 0433275
Show file tree
Hide file tree
Showing 19 changed files with 171 additions and 264 deletions.
4 changes: 2 additions & 2 deletions config/01_synthetic_data_experiment_config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: "synthetic"
num_epochs: 100000
learning_rate: 0.001
weight_decay: 0.0001
weight_decay: 0
batch_size: 256
gradient_clipping: False
early_stopping: True
Expand All @@ -10,7 +10,7 @@ support_size: 5
query_size: 27

hidden_representation_size: 32
n_hidden_layers: 3 # ?
n_hidden_layers: 3
hidden_size: 32
dropout_rate: 0.1
is_classifier: False
Expand Down
2 changes: 1 addition & 1 deletion config/02_openml_data_experiment_config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: "openml"
num_epochs: 100000
learning_rate: 0.001
weight_decay: 0.0001
weight_decay: 0.0
batch_size: 37
gradient_clipping: False
early_stopping: True
Expand Down
2 changes: 1 addition & 1 deletion config/03_openml_clf_data_experiment_config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: "openml_clf"
num_epochs: 100000
learning_rate: 0.001
weight_decay: 0.0001
weight_decay: 0
batch_size: 37
gradient_clipping: False
early_stopping: True
Expand Down
6 changes: 3 additions & 3 deletions config/05_new_classes_experiment_config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
name: "new_classes"
num_epochs: 100000
learning_rate: 0.0001
weight_decay: 0.0001
batch_size: 8
weight_decay: 0
batch_size: 16
gradient_clipping: False
early_stopping: True

support_size: 3
query_size: 29

hidden_representation_size: 16
n_hidden_layers: 1
n_hidden_layers: 3
hidden_size: 16
dropout_rate: 0.1
is_classifier: True
Expand Down
8 changes: 4 additions & 4 deletions config/06_big_data_experiment_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ name: "big_data"
classification: true
num_epochs: 100000
learning_rate: 0.001
weight_decay: 0.0001
weight_decay: 0
batch_size: 16
gradient_clipping: False
early_stopping: True

support_size: 3
query_size: 29

hidden_representation_size: 16
n_hidden_layers: 1
hidden_size: 16
hidden_representation_size: 32
n_hidden_layers: 3
hidden_size: 32
dropout_rate: 0.1
is_classifier: True

Expand Down
31 changes: 7 additions & 24 deletions experiments/01_synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from pathlib import Path


def main(
):
def main():
config_path = Path("config/01_synthetic_data_experiment_config.yaml")
logger_type = "both"
use_profiler = "no"
Expand Down Expand Up @@ -65,34 +64,18 @@ def main(
is_classifier=config["is_classifier"],
)

if logger_type == "tb":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = None
elif logger_type == "flat":
tb_logger = None
file_logger = FileLogger("results/flat", name=config["name"])
elif logger_type == "both":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = FileLogger("results/flat", name=config["name"])
else:
raise ValueError("logger_type must from [tb, flat, both]")
results_path = Path("results") / config["name"]

trainer = HeterogenousAttributesNetworkTrainer(
n_epochs=config["num_epochs"],
gradient_clipping=config["gradient_clipping"],
learning_rate=config["learning_rate"],
weight_decay=config["weight_decay"],
early_stopping=config["early_stopping"],
file_logger=file_logger,
tb_logger=tb_logger,
file_logger=True,
tb_logger=True,
model_checkpoints=True,
results_path=results_path,
)

logger.info("Training model")
Expand All @@ -105,4 +88,4 @@ def main(


if __name__ == "__main__":
main()
main()
36 changes: 10 additions & 26 deletions experiments/02_openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from liltab.train.logger import TensorBoardLogger, FileLogger
from loguru import logger
from pathlib import Path
from torch import nn


def main(
):
def main():
config_path = Path("config/02_openml_data_experiment_config.yaml")
logger_type = "both"
use_profiler = "no"
Expand Down Expand Up @@ -57,44 +57,28 @@ def main(
)

logger.info("Creating model")
from torch import nn

model = HeterogenousAttributesNetwork(
hidden_representation_size=config["hidden_representation_size"],
n_hidden_layers=config["n_hidden_layers"],
hidden_size=config["hidden_size"],
dropout_rate=config["dropout_rate"],
is_classifier=config["is_classifier"],
inner_activation_function=nn.ELU()
inner_activation_function=nn.ReLU(),
)

if logger_type == "tb":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = None
elif logger_type == "flat":
tb_logger = None
file_logger = FileLogger("results/flat", name=config["name"])
elif logger_type == "both":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = FileLogger("results/flat", name=config["name"])
else:
raise ValueError("logger_type must from [tb, flat, both]")
results_path = Path("results") / config["name"]

trainer = HeterogenousAttributesNetworkTrainer(
n_epochs=config["num_epochs"],
gradient_clipping=config["gradient_clipping"],
learning_rate=config["learning_rate"],
weight_decay=config["weight_decay"],
early_stopping=config["early_stopping"],
file_logger=file_logger,
tb_logger=tb_logger,
file_logger=True,
tb_logger=True,
model_checkpoints=True,
results_path=results_path,
)

logger.info("Training model")
Expand All @@ -107,4 +91,4 @@ def main(


if __name__ == "__main__":
main()
main()
42 changes: 14 additions & 28 deletions experiments/03_openml_clf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import yaml
import pytorch_lightning as pl

from pytorch_lightning.callbacks import ModelCheckpoint
from datetime import datetime

from liltab.data.datasets import PandasDataset
from liltab.data.dataloaders import (
FewShotDataLoader,
Expand All @@ -16,8 +19,7 @@
from torch import nn


def main(
):
def main():
config_path = Path("config/03_openml_clf_data_experiment_config.yaml")
logger_type = "both"
use_profiler = "no"
Expand All @@ -32,7 +34,7 @@ def main(
train_loader = ComposedDataLoaderFactory.create_composed_dataloader_from_path(
Path(config["train_data_path"]),
PandasDataset,
{},
{"encode_categorical_target": True},
FewShotDataLoader,
{"support_size": config["support_size"], "query_size": config["query_size"]},
ComposedDataLoader,
Expand All @@ -41,7 +43,7 @@ def main(
val_loader = ComposedDataLoaderFactory.create_composed_dataloader_from_path(
Path(config["val_data_path"]),
PandasDataset,
{},
{"encode_categorical_target": True},
FewShotDataLoader,
{"support_size": config["support_size"], "query_size": config["query_size"]},
RepeatableOutputComposedDataLoader,
Expand All @@ -50,7 +52,7 @@ def main(
test_loader = ComposedDataLoaderFactory.create_composed_dataloader_from_path(
Path(config["test_data_path"]),
PandasDataset,
{},
{"encode_categorical_target": True},
FewShotDataLoader,
{"support_size": config["support_size"], "query_size": config["query_size"]},
RepeatableOutputComposedDataLoader,
Expand All @@ -66,34 +68,19 @@ def main(
is_classifier=config["is_classifier"],
)

if logger_type == "tb":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = None
elif logger_type == "flat":
tb_logger = None
file_logger = FileLogger("results/flat", name=config["name"])
elif logger_type == "both":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = FileLogger("results/flat", name=config["name"])
else:
raise ValueError("logger_type must from [tb, flat, both]")
results_path = Path("results") / config["name"]

trainer = HeterogenousAttributesNetworkTrainer(
n_epochs=config["num_epochs"],
gradient_clipping=config["gradient_clipping"],
learning_rate=config["learning_rate"],
weight_decay=config["weight_decay"],
early_stopping=config["early_stopping"],
file_logger=file_logger,
tb_logger=tb_logger,
loss=nn.CrossEntropyLoss(),
file_logger=True,
tb_logger=True,
model_checkpoints=True,
results_path=results_path,
)

logger.info("Training model")
Expand All @@ -102,9 +89,8 @@ def main(
train_loader=train_loader,
val_loader=val_loader,
test_loader=test_loader,
loss=nn.CrossEntropyLoss(),
)


if __name__ == "__main__":
main()
main()
31 changes: 7 additions & 24 deletions experiments/04_same_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from pathlib import Path


def main(
):
def main():
config_path = Path("config/04_same_domain_experiment_config.yaml")
logger_type = "both"
use_profiler = "no"
Expand Down Expand Up @@ -65,34 +64,18 @@ def main(
is_classifier=config["is_classifier"],
)

if logger_type == "tb":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = None
elif logger_type == "flat":
tb_logger = None
file_logger = FileLogger("results/flat", name=config["name"])
elif logger_type == "both":
tb_logger = TensorBoardLogger(
"results/tensorboard",
name=config["name"],
use_profiler=True if use_profiler == "yes" else False
)
file_logger = FileLogger("results/flat", name=config["name"])
else:
raise ValueError("logger_type must from [tb, flat, both]")
results_path = Path("results") / config["name"]

trainer = HeterogenousAttributesNetworkTrainer(
n_epochs=config["num_epochs"],
gradient_clipping=config["gradient_clipping"],
learning_rate=config["learning_rate"],
weight_decay=config["weight_decay"],
early_stopping=config["early_stopping"],
file_logger=file_logger,
tb_logger=tb_logger,
file_logger=True,
tb_logger=True,
model_checkpoints=True,
results_path=results_path,
)

logger.info("Training model")
Expand All @@ -105,4 +88,4 @@ def main(


if __name__ == "__main__":
main()
main()
Loading

0 comments on commit 0433275

Please sign in to comment.