Skip to content

Commit

Permalink
Merge pull request #81 from orchardbirds/80-rmspe
Browse files Browse the repository at this point in the history
80 rmspe
  • Loading branch information
orchardbirds authored Jun 30, 2021
2 parents 9dc707f + 5a84f6b commit 69f248a
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Main features:
- [Weighted Cross Entropy](https://orchardbirds.github.io/bokbokbok/tutorials/weighted_cross_entropy.html)
- [Weighted Focal Loss](https://orchardbirds.github.io/bokbokbok/tutorials/focal_loss.html)
- [Log Cosh Loss](https://orchardbirds.github.io/bokbokbok/tutorials/log_cosh_loss.html)
- [Log Cosh Loss](https://orchardbirds.github.io/bokbokbok/tutorials/RMSPE.html)
- [F1 score](https://orchardbirds.github.io/bokbokbok/tutorials/F1_score.html)
- [Quadratic Weighted Kappa](https://orchardbirds.github.io/bokbokbok/tutorials/quadratic_weighted_kappa.html)

Expand Down
4 changes: 3 additions & 1 deletion bokbokbok/eval_metrics/regression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

from .regression_eval_metrics import (
LogCoshMetric,
RMSPEMetric,
)

__all__ = [
"LogCoshMetric"
"LogCoshMetric",
"RMSPEMetric",
]
31 changes: 31 additions & 0 deletions bokbokbok/eval_metrics/regression/regression_eval_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,34 @@ def log_cosh_error(yhat, dtrain, XGBoost=XGBoost):
return 'LogCosh', float(np.sum(elements) / len(y)), False

return log_cosh_error


def RMSPEMetric(XGBoost=False):
"""
Calculates the Root Mean Squared Percentage Error:
https://www.kaggle.com/c/optiver-realized-volatility-prediction/overview/evaluation
There is no loss function for this as the gradient is constant, meaning the Hessian is equal to 0.
Args:
XGBoost (Bool): Set to True if using XGBoost. We assume LightGBM as default use.
Note that you should also set `maximize=False` in the XGBoost train function
"""
def RMSPE(yhat, dtrain, XGBoost=XGBoost):
"""
Root Mean Squared Log Error.
All input labels are required to be greater than -1.
yhat: Predictions
dtrain: The XGBoost / LightGBM dataset
XGBoost (Bool): If XGBoost is to be implemented
"""

y = dtrain.get_label()
elements = ((y - yhat) / y) ** 2
if XGBoost:
return 'RMSPE', float(np.sqrt(np.sum(elements) / len(y)))
else:
return 'RMSPE', float(np.sqrt(np.sum(elements) / len(y))), False

return RMSPE
131 changes: 131 additions & 0 deletions docs/tutorials/RMSPE.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### When to use Root Mean Squared Percentage Error?\n",
"\n",
"This function is defined according to [this Kaggle competition](https://www.kaggle.com/c/optiver-realized-volatility-prediction/overview/evaluation) for volatility calculation. \n",
"\n",
"This function cannot be used as a Loss function - the gradient is constant and hence the Hessian is 0. Nevertheless, it can still be used as an evaluation metric as the model trains."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import make_regression\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_absolute_error\n",
"from bokbokbok.eval_metrics.regression import RMSPEMetric\n",
"\n",
"X, y = make_regression(n_samples=1000, \n",
" n_features=10, \n",
" random_state=41114)\n",
"\n",
"X_train, X_valid, y_train, y_valid = train_test_split(X, \n",
" y/100, \n",
" test_size=0.25, \n",
" random_state=41114)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Usage in LightGBM"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import lightgbm as lgb\n",
"\n",
"train = lgb.Dataset(X_train, y_train)\n",
"valid = lgb.Dataset(X_valid, y_valid, reference=train)\n",
"params = {\n",
" 'n_estimators': 3000,\n",
" 'seed': 41114,\n",
" 'n_jobs': 8,\n",
" 'num_leaves': 10,\n",
" 'learning_rate': 0.1,\n",
" 'verbose': 10,\n",
" #'objective': 'RMSE',\n",
" }\n",
"\n",
"clf = lgb.train(params=params,\n",
" train_set=train,\n",
" valid_sets=[train, valid],\n",
" valid_names=['train','valid'],\n",
" feval=RMSPEMetric(),\n",
" early_stopping_rounds=3000,\n",
" verbose_eval=1)\n",
"\n",
"mean_absolute_error(y_valid, clf.predict(X_valid))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Usage in XGBoost"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import xgboost as xgb\n",
"\n",
"dtrain = xgb.DMatrix(X_train, y_train)\n",
"dvalid = xgb.DMatrix(X_valid, y_valid)\n",
"\n",
"params = {\n",
" 'seed': 41114,\n",
" 'learning_rate': 0.1,\n",
" 'disable_default_eval_metric': 1\n",
" }\n",
"\n",
"bst = xgb.train(params,\n",
" dtrain=dtrain,\n",
" num_boost_round=3000,\n",
" early_stopping_rounds=100,\n",
" verbose_eval=100,\n",
" maximize=False,\n",
" feval=RMSPEMetric(XGBoost=True),\n",
" evals=[(dtrain, 'dtrain'), (dvalid, 'dvalid')])\n",
"\n",
"mean_absolute_error(y_valid, bst.predict(dvalid))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:skorecard_py37] *",
"language": "python",
"name": "conda-env-skorecard_py37-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ nav:
- Use Weighted Focal Loss: tutorials/focal_loss.ipynb
- Use F1 Score: tutorials/F1_score.ipynb
- Use Log Cosh Score: tutorials/log_cosh_loss.ipynb
- Use Root Mean Squared Percentage Error: tutorials/RMSPE.ipynb
- Use Quadratic Weighted Kappa: tutorials/quadratic_weighted_kappa.ipynb
- Derivations:
- A Note About Gradients in Classification Problems: derivations/note.md
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

setup(
name="bokbokbok",
version="0.3",
version="0.4",
description="Custom Losses and Metrics for XGBoost, LightGBM, CatBoost",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 69f248a

Please sign in to comment.