Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python-package] Introduce refit_tree_manual to Booster class. #6617

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Next Next commit
Use tmp_path instead of leaving files
  • Loading branch information
Atanas Dimitrov committed Aug 5, 2024
commit 14fcf216b10bad8ec2a74beec584b8914f19fe5b
7 changes: 4 additions & 3 deletions tests/c_api_test/test_.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,12 @@ def test_dataset():
free_dataset(train)


def test_booster():
def test_booster(tmp_path):
binary_example_dir = Path(__file__).absolute().parents[2] / "examples" / "binary_classification"
train = load_from_mat(binary_example_dir / "binary.train", None)
test = load_from_mat(binary_example_dir / "binary.test", train)
booster = ctypes.c_void_p()
model_path = tmp_path / "model.txt"
LIB.LGBM_BoosterCreate(train, c_str("app=binary metric=auc num_leaves=31 verbose=0"), ctypes.byref(booster))
LIB.LGBM_BoosterAddValidData(booster, test)
is_finished = ctypes.c_int(0)
Expand All @@ -192,13 +193,13 @@ def test_booster():
)
if i % 10 == 0:
print(f"{i} iteration test AUC {result[0]:.6f}")
LIB.LGBM_BoosterSaveModel(booster, ctypes.c_int(0), ctypes.c_int(-1), ctypes.c_int(0), c_str("model.txt"))
LIB.LGBM_BoosterSaveModel(booster, ctypes.c_int(0), ctypes.c_int(-1), ctypes.c_int(0), c_str(str(model_path)))
LIB.LGBM_BoosterFree(booster)
free_dataset(train)
free_dataset(test)
booster2 = ctypes.c_void_p()
num_total_model = ctypes.c_int(0)
LIB.LGBM_BoosterCreateFromModelfile(c_str("model.txt"), ctypes.byref(num_total_model), ctypes.byref(booster2))
LIB.LGBM_BoosterCreateFromModelfile(c_str(str(model_path)), ctypes.byref(num_total_model), ctypes.byref(booster2))
data = np.loadtxt(str(binary_example_dir / "binary.test"), dtype=np.float64)
mat = data[:, 1:]
preb = np.empty(mat.shape[0], dtype=np.float64)
Expand Down
15 changes: 8 additions & 7 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1112,15 +1112,15 @@ def _early_stop_after_seventh_iteration(env):
assert bst.current_iteration() == 7


def test_continue_train():
def test_continue_train(tmp_path):
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {"objective": "regression", "metric": "l1", "verbose": -1}
lgb_train = lgb.Dataset(X_train, y_train, free_raw_data=False)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, free_raw_data=False)
init_gbm = lgb.train(params, lgb_train, num_boost_round=20)
model_name = "model.txt"
init_gbm.save_model(model_name)
model_path = tmp_path / "model.txt"
init_gbm.save_model(model_path)
evals_result = {}
gbm = lgb.train(
params,
Expand All @@ -1130,7 +1130,7 @@ def test_continue_train():
# test custom eval metrics
feval=(lambda p, d: ("custom_mae", mean_absolute_error(p, d.get_label()), False)),
callbacks=[lgb.record_evaluation(evals_result)],
init_model="model.txt",
init_model=model_path,
)
ret = mean_absolute_error(y_test, gbm.predict(X_test))
assert ret < 13.6
Expand Down Expand Up @@ -1713,7 +1713,7 @@ def test_all_expected_params_are_written_out_to_model_text(tmp_path):

# why fixed seed?
# sometimes there is no difference how cols are treated (cat or not cat)
def test_pandas_categorical(rng_fixed_seed):
def test_pandas_categorical(rng_fixed_seed, tmp_path):
pd = pytest.importorskip("pandas")
X = pd.DataFrame(
{
Expand Down Expand Up @@ -1756,8 +1756,9 @@ def test_pandas_categorical(rng_fixed_seed):
gbm3 = lgb.train(params, lgb_train, num_boost_round=10, categorical_feature=["A", "B", "C", "D"])
pred3 = gbm3.predict(X_test)
assert lgb_train.categorical_feature == ["A", "B", "C", "D"]
gbm3.save_model("categorical.model")
gbm4 = lgb.Booster(model_file="categorical.model")
categorical_model_path = tmp_path / "categorical.model"
gbm3.save_model(categorical_model_path)
gbm4 = lgb.Booster(model_file=categorical_model_path)
pred4 = gbm4.predict(X_test)
model_str = gbm4.model_to_string()
gbm4.model_from_string(model_str)
Expand Down
7 changes: 4 additions & 3 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def test_feature_importances_type():

# why fixed seed?
# sometimes there is no difference how cols are treated (cat or not cat)
def test_pandas_categorical(rng_fixed_seed):
def test_pandas_categorical(rng_fixed_seed, tmp_path):
pd = pytest.importorskip("pandas")
X = pd.DataFrame(
{
Expand Down Expand Up @@ -593,8 +593,9 @@ def test_pandas_categorical(rng_fixed_seed):
pred2 = gbm2.predict(X_test, raw_score=True)
gbm3 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=["A", "B", "C", "D"])
pred3 = gbm3.predict(X_test, raw_score=True)
gbm3.booster_.save_model("categorical.model")
gbm4 = lgb.Booster(model_file="categorical.model")
categorical_model_path = tmp_path / "categorical.model"
gbm3.booster_.save_model(categorical_model_path)
gbm4 = lgb.Booster(model_file=categorical_model_path)
pred4 = gbm4.predict(X_test)
gbm5 = lgb.sklearn.LGBMClassifier(n_estimators=10).fit(X, y, categorical_feature=["A", "B", "C", "D", "E"])
pred5 = gbm5.predict(X_test, raw_score=True)
Expand Down
Loading