Skip to content

Commit

Permalink
Merge pull request #29 from Fraunhofer-IIS/28-update-case-study
Browse files Browse the repository at this point in the history
elaborate case study
  • Loading branch information
bknico-iis authored Sep 18, 2024
2 parents 247c20b + bc813cb commit c9934c1
Show file tree
Hide file tree
Showing 17 changed files with 810 additions and 285 deletions.
4 changes: 2 additions & 2 deletions examples/case_study/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ Additionally install *tqdm*: ``pip install tqdm``


## Data
Download monthly data *2024-01.csv* from
https://files.stlouisfed.org/files/htdocs/fred-md/monthly/2024-01.csv
Download monthly data *2024-07.csv* from
https://files.stlouisfed.org/files/htdocs/fred-md/monthly/2024-07.csv
and place it in the current directory.

## Files
Expand Down
34 changes: 34 additions & 0 deletions examples/case_study/analyse_losses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# %%
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

# %%
dir = Path(__file__).parent
areas = ["output_and_income", "consumption_and_orders", "prices"]
df_collection = []
for area in areas:
df = pd.read_csv(dir / f"overall_losses_{area}.csv", index_col=[0, 1, 2])
df.index.set_names(["var", "model", "rolling_origin"], inplace=True)
df.columns = range(1, len(df.columns) + 1)
df.columns.name = "Forecast Step"
df_collection.append(df)

# %% Overall results
dfs = pd.concat(df_collection, keys=areas, axis=0)
dfs.groupby(level=(2)).mean().round(3).to_latex(dir / f"overall_results.tex")

# %% Results for first step over three variable groups
forecast_step = 1

dfs = pd.concat([df[forecast_step] for df in df_collection], keys=areas, axis=1)
mean_error_per_group = dfs.groupby(level=1).mean()

# Plot
ax = mean_error_per_group.rank().plot(kind='bar', figsize=(10, 4))
ax.spines[['right', 'top']].set_visible(False)
plt.ylabel('Rank')
plt.xlabel('Model')
plt.legend(bbox_to_anchor=(1,1))
plt.tight_layout()
plt.savefig(dir / 'model_ranking_in_groups.pdf')
223 changes: 51 additions & 172 deletions examples/case_study/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
import os

sys.path.append(os.path.abspath("../.."))
sys.path.append(os.path.abspath("../../.."))
sys.path.append(os.path.abspath("...."))
sys.path.append(os.path.abspath("..."))
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("."))


# %%
import torch
import torch.nn as nn
import pandas as pd
from tqdm import tqdm
from pathlib import Path

from prosper_nn.models.ecnn import ECNN
from prosper_nn.models.ensemble import Ensemble
from models import RNN_direct, RNN_recursive, RNN_S2S, Naive
from fredmd import Dataset_Fredmd
from training import EarlyStopping, Trainer
from evaluation import Evaluator
from models import init_models, Naive
from fredmd import init_datasets

from config import (
past_horizon,
Expand All @@ -29,199 +29,78 @@
n_evaluate_targets,
n_features_Y,
n_models,
area,
)

# %%
torch.manual_seed(0)


# %% Training
def train_model(
model: nn.Module,
dataloader: torch.utils.data.DataLoader,
dataset_val: torch.utils.data.Dataset,
n_epochs: int,
patience: int,
):
optimizer = torch.optim.Adam(model.parameters())
smallest_val_loss = torch.inf
epoch_smallest_val = 0
val_features_past, val_target_past, val_target_future = (
dataset_val.get_all_rolling_origins()
)
epochs = tqdm(range(n_epochs))

for epoch in epochs:
train_loss = 0
for features_past, target_past, target_future in dataloader:
target_past = target_past.transpose(1, 0)
target_future = target_future.transpose(1, 0)
features_past = features_past.transpose(1, 0)

model.zero_grad()

forecasts = get_forecast(model, features_past, target_past)

assert forecasts.shape == target_future.shape
loss = nn.functional.mse_loss(forecasts, target_future)
loss.backward()
train_loss += loss.detach()
optimizer.step()

# Validation loss
forecasts_val = get_forecast(model, val_features_past, val_target_past)
val_loss = nn.functional.mse_loss(forecasts_val[0], val_target_future[0]).item()
epochs.set_postfix(
{"val_loss": round(val_loss, 3), "train_loss": round(train_loss.item(), 3)}
)

# Save and later use model with best validation loss
if val_loss < smallest_val_loss:
print(f"Save model_state at epoch {epoch}")
best_model_state = model.state_dict()
smallest_val_loss = val_loss
epoch_smallest_val = epoch

# Early Stopping
if epoch >= epoch_smallest_val + patience:
print(f"No validation improvement since {patience} epochs -> Stop Training")
model.load_state_dict(best_model_state)
return

model.load_state_dict(best_model_state)


def get_forecast(
model: nn.Module, features_past: torch.Tensor, target_past: torch.Tensor
) -> torch.Tensor:
model_type = model.models[0]

# Select input
if isinstance(model_type, ECNN):
input = (features_past, target_past)
else:
input = (features_past,)

ensemble_output = model(*input)
mean = ensemble_output[-1]

# Extract forecasts
if isinstance(model_type, ECNN):
_, forecasts = torch.split(mean, past_horizon)
else:
forecasts = mean
return forecasts


def evaluate_model(model: nn.Module, dataset: torch.utils.data.Dataset) -> pd.DataFrame:
model.eval()
losses = []

for features_past, target_past, target_future in dataset:
features_past = features_past.unsqueeze(1)
target_past = target_past.unsqueeze(1)

with torch.no_grad():
forecasts = get_forecast(model, features_past, target_past)
forecasts = forecasts.squeeze(1)
assert forecasts.shape == target_future.shape
losses.append(
[
nn.functional.mse_loss(forecasts[i], target_future[i]).item()
for i in range(forecast_horizon)
]
)
return pd.DataFrame(losses)


# %% Get Data

fredmd = Dataset_Fredmd(
past_horizon,
forecast_horizon,
split_date=train_test_split_period,
data_type="train",
)
fredmd_val = Dataset_Fredmd(
past_horizon,
forecast_horizon,
split_date=train_test_split_period,
data_type="val",
)
fredmd_test = Dataset_Fredmd(
past_horizon,
forecast_horizon,
split_date=train_test_split_period,
data_type="test",
fredmd_train, fredmd_val, fredmd_test = init_datasets(
past_horizon, forecast_horizon, train_test_split_period, area
)

# %% Run benchmark
n_features_U = len(fredmd.features)
n_state_neurons = n_features_U + n_features_Y
n_features_U = len(fredmd_train.features)
n_state_neurons = 2 * (n_features_U + n_features_Y)

overall_losses = {}

for target in fredmd.features[:n_evaluate_targets]:
fredmd.target = target
if n_evaluate_targets == None:
n_evaluate_targets = len(fredmd_train.features)

benchmark_models = {}
for index_target, target in enumerate(fredmd_train.features[:n_evaluate_targets]):
fredmd_train.target = target
fredmd_val.target = target
fredmd_test.target = target

# Error Correction Neural Network (ECNN)
ecnn = ECNN(
n_state_neurons=n_state_neurons,
n_features_U=n_features_U,
n_features_Y=n_features_Y,
past_horizon=past_horizon,
forecast_horizon=forecast_horizon,
)

# Define an Ensemble for better forecasts, heatmap visualization and sensitivity analysis
ecnn_ensemble = Ensemble(model=ecnn, n_models=n_models).double()
benchmark_models = {"ECNN": ecnn_ensemble}

# Compare to further Recurrent Neural Networks
for forecast_module in [RNN_direct, RNN_recursive, RNN_S2S]:
for recurrent_cell_type in ["elman", "gru", "lstm"]:
model = forecast_module(
n_features_U,
n_state_neurons,
n_features_Y,
forecast_horizon,
recurrent_cell_type,
)
ensemble = Ensemble(model=model, n_models=n_models).double()
benchmark_models[f"{recurrent_cell_type}_{model.forecast_method}"] = (
ensemble
)

# Train models
dataloader = torch.utils.data.DataLoader(
fredmd, batch_size=batch_size, shuffle=True
benchmark_models = init_models(
benchmark_models,
n_features_U,
n_state_neurons,
n_features_Y,
past_horizon,
forecast_horizon,
n_models,
)

for name, model in benchmark_models.items():
print(f"### Train {name} ###")
train_model(model, dataloader, fredmd_val, n_epochs, patience)
is_untrained_multivariate_model = model.multivariate and (index_target == 0)
not_naive_model = not isinstance(model, Naive)

needs_training = (
not model.multivariate or is_untrained_multivariate_model
) and not_naive_model
if needs_training:
fredmd_train.set_target_future_format(multivariate=model.multivariate)
fredmd_val.set_target_future_format(multivariate=model.multivariate)

dataloader = torch.utils.data.DataLoader(
fredmd_train, batch_size=batch_size, shuffle=True, drop_last=True
)
trainer = Trainer(model, EarlyStopping(patience), n_epochs)
print(f"### Train {name} ###")
trainer.train(dataloader, fredmd_val)

if target == "DNDGRG3M086SBEA":
torch.save(
benchmark_models["ECNN"], Path(__file__).parent / f"ECNN_{target}.pt"
)

# Test
# Additionally, compare with the naive no-change forecast
benchmark_models["Naive"] = Ensemble(
Naive(past_horizon, forecast_horizon, n_features_Y), n_models
)
losses_one_target = {}
for name, model in benchmark_models.items():
fredmd_test.set_target_future_format(multivariate=False)
evaluator = Evaluator(model, forecast_horizon)
loss_one_target_one_model = evaluator.evaluate(fredmd_test, index_target)
losses_one_target[name] = loss_one_target_one_model

all_losses = {
name: evaluate_model(model, fredmd_test)
for name, model in benchmark_models.items()
}
overall_losses[target] = pd.concat(all_losses)
overall_losses[target] = pd.concat(losses_one_target)

overall_losses = pd.concat(overall_losses)
overall_losses.to_csv(Path(__file__).parent / f"overall_losses.csv")
overall_losses.to_csv(Path(__file__).parent / f"overall_losses_{area}.csv")
mean_overall_losses = overall_losses.groupby(level=1).mean()
mean_overall_losses.to_csv(Path(__file__).parent / f"mean_overall_losses.csv")
mean_overall_losses.to_csv(Path(__file__).parent / f"mean_overall_losses_{area}.csv")
print(mean_overall_losses)
1 change: 1 addition & 0 deletions examples/case_study/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

area = "output_and_income" # "consumption_and_orders", "prices", "output_and_income"
n_evaluate_targets = 19

past_horizon = 24
Expand Down
37 changes: 37 additions & 0 deletions examples/case_study/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from torch import nn
import torch
import pandas as pd


class Evaluator:
def __init__(self, model: nn.Module, forecast_horizon: int):
self.losses = []
self.model = model
self.forecast_horizon = forecast_horizon
self.loss_metric = nn.functional.mse_loss

def evaluate(
self, dataset: torch.utils.data.Dataset, index: int = None
) -> pd.DataFrame:
self.model.eval()

for features_past, target_past, target_future in dataset:
features_past = features_past.unsqueeze(1)
target_past = target_past.unsqueeze(1)

with torch.no_grad():
input = self.model.get_input(features_past, target_past)
output = self.model(*input)
forecasts = self.model.extract_forecasts(output)
forecasts = forecasts.squeeze(1)
if forecasts.size(-1) > 1:
forecasts = forecasts[..., [index]]

assert forecasts.shape == target_future.shape

self.losses.append(
self.loss_metric(
forecasts, target_future, reduction="none"
).flatten().tolist()
)
return pd.DataFrame(self.losses)
Loading

0 comments on commit c9934c1

Please sign in to comment.