Skip to content

Commit

Permalink
Add performance gain plot
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianstando committed Jun 14, 2023
1 parent 6abbeb3 commit 95fb2b6
Show file tree
Hide file tree
Showing 8 changed files with 364 additions and 59 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.0.2] - 2023-06-14

### Added

- Performance gain analysis plot

## [1.0.1.2] - 2023-06-02

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'Adrian Stańdo'

# The full version, including alpha/beta/rc tags
release = '1.0.1.2'
release = '1.0.2'


# -- General configuration ---------------------------------------------------
Expand Down
25 changes: 21 additions & 4 deletions edgaro/explain/explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import os
import multiprocessing

from typing import List, Optional, Literal
from typing import List, Optional, Literal, Callable
from sklearn.metrics import balanced_accuracy_score

from edgaro.data.dataset import Dataset
from edgaro.model.model import Model
Expand Down Expand Up @@ -44,6 +45,10 @@ class Explainer:
Random state seed.
B : int, optional, default=10
Number of permutation rounds to perform on each variable - applicable only if explanation_type='VI'.
performance_metric_name : str, default='balanced_accuracy'
Name of the performance metric.
performance_metric : callable, default=balanced_accuracy_score
Name of the performance metric.
Attributes
----------
Expand All @@ -67,6 +72,10 @@ class Explainer:
Random state seed
B : int, optional
Number of permutation rounds to perform on each variable - applicable only if explanation_type='VI'.
performance_metric_name : str
Name of the performance metric.
performance_metric : callable
Name of the performance metric.
References
----------
Expand All @@ -77,7 +86,8 @@ class Explainer:

def __init__(self, model: Model, N: Optional[int] = None, explanation_type: Literal['PDP', 'ALE', 'VI'] = 'PDP',
verbose: bool = False, processes: int = 1, random_state: Optional[int] = None,
B: Optional[int] = 10) -> None:
B: Optional[int] = 10, performance_metric_name: str = 'balanced_accuracy',
performance_metric: Callable[[pd.Series, pd.Series], float] = balanced_accuracy_score) -> None:
self.model = model
self.explainer = None
self.name = model.name
Expand All @@ -87,6 +97,8 @@ def __init__(self, model: Model, N: Optional[int] = None, explanation_type: Lite
self.processes = processes
self.random_state = random_state
self.B = B
self.performance_metric_name = performance_metric_name
self.performance_metric = performance_metric

if self.processes == -1:
self.processes = multiprocessing.cpu_count()
Expand Down Expand Up @@ -134,6 +146,9 @@ def transform(self, variables: Optional[List[str]] = None) -> Explanation:
-------
Explanation
"""
performance = self.model.evaluate(metrics_output_class=[self.performance_metric])
performance = performance['value'].iloc[0]

if self.explainer is None:
raise Exception('Explainer was not fitted!')
category_colnames_base = self.model.get_category_colnames()
Expand All @@ -156,7 +171,8 @@ def transform(self, variables: Optional[List[str]] = None) -> Explanation:
print_unbuffered(f'{self.explanation_type} was calculated calculated in {self.__repr__()} for '
f'{self.model.get_test_dataset().name}')

return ModelProfileExplanation(dict_output, self.name, self.model.get_category_colnames())
return ModelProfileExplanation(dict_output, self.name, self.model.get_category_colnames(),
performance, self.performance_metric_name)
elif self.explanation_type == 'VI':
explanation_type = 'variable_importance'
self.__transform_feature_importance(dict_output, variables, explanation_type)
Expand All @@ -165,7 +181,8 @@ def transform(self, variables: Optional[List[str]] = None) -> Explanation:
print_unbuffered(f'{self.explanation_type} was calculated calculated in {self.__repr__()} for '
f'{self.model.get_test_dataset().name}')

return ModelPartsExplanation(dict_output, self.name)
return ModelPartsExplanation(dict_output, self.name,
performance, self.performance_metric_name)
else:
raise Exception('Wrong curve type!')

Expand Down
26 changes: 22 additions & 4 deletions edgaro/explain/explainer_array.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import annotations

import multiprocessing
import pandas as pd

from typing import Union, Optional, Literal
from typing import Union, Optional, Literal, Callable
from sklearn.metrics import balanced_accuracy_score

from edgaro.model.model import Model
from edgaro.model.model_array import ModelArray
Expand Down Expand Up @@ -35,6 +37,10 @@ class ExplainerArray:
Random state seed.
B : int, optional, default=10
Number of permutation rounds to perform on each variable - applicable only if explanation_type='VI'.
performance_metric_name : str
Name of the performance metric.
performance_metric : callable
Name of the performance metric.
Attributes
----------
Expand All @@ -58,12 +64,18 @@ class ExplainerArray:
Random state seed
B : int, optional
Number of permutation rounds to perform on each variable - applicable only if explanation_type='VI'.
performance_metric_name : str
Name of the performance metric.
performance_metric : callable
Name of the performance metric.
"""

def __init__(self, models: Union[Model, ModelArray], N: Optional[int] = None,
explanation_type: Literal['PDP', 'ALE', 'VI'] = 'PDP', verbose: bool = False, processes: int = 1,
random_state: Optional[int] = None, B: Optional[int] = 10) -> None:
random_state: Optional[int] = None, B: Optional[int] = 10,
performance_metric_name: str = 'balanced_accuracy',
performance_metric: Callable[[pd.Series, pd.Series], float] = balanced_accuracy_score) -> None:
self.models = models
self.sub_calculators = None
self.name = models.name
Expand All @@ -75,6 +87,9 @@ def __init__(self, models: Union[Model, ModelArray], N: Optional[int] = None,
self.random_state = random_state
self.B = B

self.performance_metric_name = performance_metric_name
self.performance_metric = performance_metric

if self.processes == -1:
self.processes = multiprocessing.cpu_count()

Expand All @@ -86,11 +101,14 @@ def fit(self) -> None:
def create_sub_calculator(model: Union[Model, ModelArray]):
if isinstance(model, Model):
calc = Explainer(model=model, N=self.N, explanation_type=self.explanation_type, verbose=self.verbose,
processes=self.processes, random_state=self.random_state, B=self.B)
processes=self.processes, random_state=self.random_state, B=self.B,
performance_metric_name=self.performance_metric_name,
performance_metric=self.performance_metric)
else:
calc = ExplainerArray(models=model, N=self.N, explanation_type=self.explanation_type,
verbose=self.verbose, processes=self.processes, random_state=self.random_state,
B=self.B)
B=self.B, performance_metric_name=self.performance_metric_name,
performance_metric=self.performance_metric)

calc.fit()
return calc
Expand Down
79 changes: 77 additions & 2 deletions edgaro/explain/explainer_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ def plot(self) -> None:
pass

@abstractmethod
def compare(self, other: List[Explanation]) -> List[Union[float, list]]:
def compare(self, other: List[Explanation]) -> List[Union[float, List]]:
pass

@abstractmethod
def compare_performance(self, other: List[Explanation], percent: bool = False) -> List[float]:
pass


Expand All @@ -70,6 +74,10 @@ class ModelProfileExplanation(Explanation):
List of categorical variables.
explanation_type : {'PDP', 'ALE'}, default='PDP'
A curve type.
performance_metric_value : float
Value of the performance metric.
performance_metric_name : str
Name of the performance metric.
Attributes
----------
Expand All @@ -81,15 +89,22 @@ class ModelProfileExplanation(Explanation):
List of categorical variables.
explanation_type : {'PDP', 'ALE'}
A curve type.
performance_metric_value : float, optional
Value of the performance metric.
performance_metric_name : str, optional
Name of the performance metric.
"""

def __init__(self, results: Dict[str, Curve], name: str, categorical_columns: List[str],
performance_metric_value: float, performance_metric_name: str,
explanation_type: Literal['PDP', 'ALE'] = 'PDP') -> None:
self.results = results
self.name = name
self.categorical_columns = categorical_columns
self.explanation_type = explanation_type
self.performance_metric_value = performance_metric_value
self.performance_metric_name = performance_metric_name

def __getitem__(self, key: str) -> Optional[Curve]:
if key in self.results.keys():
Expand Down Expand Up @@ -285,6 +300,30 @@ def __retrieve_explainer_results(inp, explain_results_in):
for inp_i in inp.results:
ModelProfileExplanation.__retrieve_explainer_results(inp_i, explain_results_in)

def compare_performance(self, other: List[ModelProfileExplanation], percent: bool = False) -> List[float]:
"""
The function returns the difference between performance metric values. This object's value is subtracted
from other.
Parameters
----------
other : list[ModelProfileExplanation]
List of ModelProfileExplanation objects to compare against.
percent : bool, default=False
If True, the percentage change will be returned instead of difference.
Returns
----------
list[float]
"""

tab = [elem.performance_metric_value - self.performance_metric_value for elem in other]

if percent:
tab = [tab[i] / self.performance_metric_value for i in range(len(tab))]

return tab

def compare(self, other: List[ModelProfileExplanation], variable: Optional[Union[str, List[str]]] = None,
return_raw_per_variable: bool = False) -> List[Union[float, list]]:
"""
Expand Down Expand Up @@ -389,6 +428,10 @@ class ModelPartsExplanation(Explanation):
The name of ModelProfileExplanation. It is best if it is a Model name.
explanation_type : {'VI'}, default='VI'
An explanation type.
performance_metric_value : float
Value of the performance metric.
performance_metric_name : str
Name of the performance metric.
Attributes
----------
Expand All @@ -398,12 +441,20 @@ class ModelPartsExplanation(Explanation):
The name of ModelProfileExplanation. It is best if it is a Model name.
explanation_type : {'VI'}, default='VI'
An explanation type.
performance_metric_value : float
Value of the performance metric.
performance_metric_name : str
Name of the performance metric.
"""

def __init__(self, results: Dict[str, float], name: str, explanation_type: Literal['VI'] = 'VI') -> None:
def __init__(self, results: Dict[str, float], name: str, performance_metric_value: float,
performance_metric_name: str, explanation_type: Literal['VI'] = 'VI') -> None:
self.results = results
self.name = name
self.explanation_type = explanation_type
self.performance_metric_value = performance_metric_value
self.performance_metric_name = performance_metric_name

def __getitem__(self, key: str) -> Optional[float]:
if key in self.results.keys():
Expand Down Expand Up @@ -529,6 +580,30 @@ def extraction(a):
fontsize='medium'
)

def compare_performance(self, other: List[ModelPartsExplanation], percent: bool = False) -> List[float]:
"""
The function returns the difference between performance metric values. This object's value is subtracted
from other.
Parameters
----------
other : list[ModelPartsExplanation]
List of ModelPartsExplanation objects to compare against.
percent : bool, default=False
If True, the percentage change will be returned instead of difference.
Returns
----------
list[float]
"""

tab = [elem.performance_metric_value - self.performance_metric_value for elem in other]

if percent:
tab = [tab[i] / self.performance_metric_value for i in range(len(tab))]

return tab

def compare(self, other: List[ModelPartsExplanation], variable: Optional[Union[str, List[str]]] = None,
max_variables: Optional[int] = None, return_raw: bool = True) -> List[Union[float, list]]:
"""
Expand Down
Loading

0 comments on commit 95fb2b6

Please sign in to comment.