Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 118 catboost #119

Closed
wants to merge 16 commits into from
Closed
16 changes: 12 additions & 4 deletions src/fklearn/training/classification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import Any, Dict, List

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -240,7 +240,8 @@ def catboost_classification_learner(df: pd.DataFrame,
extra_params: LogType = None,
prediction_column: str = "prediction",
weight_column: str = None,
encode_extra_cols: bool = True) -> LearnerReturnType:
encode_extra_cols: bool = True,
fit_params: Dict[str, Any] = {'verbose': 0}) -> LearnerReturnType:
"""
Fits an CatBoost classifier to the dataset. It first generates a DMatrix
with the specified features and labels from `df`. Then, it fits a CatBoost
Expand Down Expand Up @@ -292,7 +293,14 @@ def catboost_classification_learner(df: pd.DataFrame,

encode_extra_cols : bool (default: True)
If True, treats all columns in `df` with name pattern fklearn_feat__col==val` as feature columns.

fit_params: dict, optional
Dictionary in the format {"hyperparameter_name" : hyperparameter_value}.
Other parameters for the CatBoost model. See the list in:
https://catboost.ai/docs/concepts/python-reference_catboostregressor_fit.html#python-reference_catboostregressor_fit
If not passed, the default will be used.
"""

from catboost import Pool, CatBoostClassifier
import catboost

Expand All @@ -309,7 +317,7 @@ def catboost_classification_learner(df: pd.DataFrame,
feature_names=list(map(str, features)), cat_features=cat_features)

cat_boost_classifier = CatBoostClassifier(iterations=num_estimators, **params)
cbr = cat_boost_classifier.fit(dtrain, verbose=0)
cbr = cat_boost_classifier.fit(dtrain, **fit_params)

def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:

Expand Down Expand Up @@ -359,6 +367,7 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
'package': "catboost",
'package_version': catboost.__version__,
'parameters': assoc(params, "num_estimators", num_estimators),
'fit_params': fit_params,
'feature_importance': cbr.feature_importances_,
'training_samples': len(df)},
'object': cbr}
Expand Down Expand Up @@ -408,7 +417,6 @@ def nlp_logistic_classification_learner(df: pd.DataFrame,
prediction_column : str
The name of the column with the predictions from the model.
"""

# set default params
default_vect_params = {"strip_accents": "unicode", "min_df": 20}
merged_vect_params = default_vect_params if not vectorizer_params else merge(default_vect_params, vectorizer_params)
Expand Down
12 changes: 10 additions & 2 deletions src/fklearn/training/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ def catboost_regressor_learner(df: pd.DataFrame,
num_estimators: int = 100,
extra_params: Dict[str, Any] = None,
prediction_column: str = "prediction",
weight_column: str = None) -> LearnerReturnType:
weight_column: str = None,
fit_params: Dict[str, Any] = {'verbose': 0}) -> LearnerReturnType:
"""
Fits an CatBoost regressor to the dataset. It first generates a Pool
with the specified features and labels from `df`. Then it fits a CatBoost
Expand Down Expand Up @@ -256,6 +257,12 @@ def catboost_regressor_learner(df: pd.DataFrame,

weight_column : str, optional
The name of the column with scores to weight the data.

fit_params: dict, optional
Dictionary in the format {"hyperparameter_name" : hyperparameter_value}.
Other parameters for the CatBoost model. See the list in:
https://catboost.ai/docs/concepts/python-reference_catboostregressor_fit.html#python-reference_catboostregressor_fit
If not passed, the default will be used.
"""
from catboost import Pool, CatBoostRegressor
import catboost
Expand All @@ -266,7 +273,7 @@ def catboost_regressor_learner(df: pd.DataFrame,

dtrain = Pool(df[features].values, df[target].values, weight=weights, feature_names=list(map(str, features)))
cat_boost_regressor = CatBoostRegressor(iterations=num_estimators, **params)
cbr = cat_boost_regressor.fit(dtrain, verbose=0)
cbr = cat_boost_regressor.fit(dtrain, **fit_params)

def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
dtest = Pool(new_df[features].values, feature_names=list(map(str, features)))
Expand Down Expand Up @@ -294,6 +301,7 @@ def p(new_df: pd.DataFrame, apply_shap: bool = False) -> pd.DataFrame:
'package': "catboost",
'package_version': catboost.__version__,
'parameters': assoc(params, "num_estimators", num_estimators),
'fit_params': fit_params,
'feature_importance': cbr.feature_importances_,
'training_samples': len(df)},
'object': cbr}
Expand Down
3 changes: 2 additions & 1 deletion tests/training/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ def test_catboost_regressor_learner():
num_estimators=20,
extra_params={"max_depth": 2, "random_seed": 42},
prediction_column="prediction",
weight_column="w")
weight_column="w",
fit_params={'verbose': 0})

predict_fn, pred_train, log = learner(df_train)

Expand Down