-
Notifications
You must be signed in to change notification settings - Fork 3
/
trainer.py
41 lines (33 loc) · 1.23 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import lightgbm as lgbm
# [MLRun] Import MLRun:
import mlrun
import pandas as pd
from mlrun.frameworks.lgbm import apply_mlrun
from sklearn.model_selection import train_test_split
@mlrun.handler()
def train(
train_set: pd.DataFrame,
label_column: str = "fare_amount",
model_name: str = "lgbm_ny_taxi",
boosting_type: str = "gbdt",
subsample: float = 0.8,
min_split_gain: float = 0.5,
min_child_samples: int = 10,
):
y = train_set[label_column]
train_df = train_set.drop(columns=[label_column])
x_train, x_test, y_train, y_test = train_test_split(
train_df, y, random_state=123, test_size=0.10
)
model = lgbm.LGBMRegressor(
boosting_type=boosting_type,
subsample=subsample,
min_split_gain=min_split_gain,
min_child_samples=min_child_samples,
)
# -------------------- The only line you need to add for MLOps -------------------------
# Wraps the model with MLOps (test set is provided for analysis & accuracy measurements)
apply_mlrun(model=model, model_name=model_name, x_test=x_test, y_test=y_test)
# --------------------------------------------------------------------------------------
model.fit(X=x_train, y=y_train)
return model