From 7399fe1b7349a57050d260a0e2e13e2946240cce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Fri, 12 Aug 2022 01:22:01 -0500
Subject: [PATCH 01/23] initial work to retrieve parameters from loaded booster

---
 include/LightGBM/boosting.h      |  2 ++
 include/LightGBM/c_api.h         |  6 ++++++
 python-package/lightgbm/basic.py | 22 ++++++++++++++++++++++
 src/boosting/gbdt.h              | 16 ++++++++++++++++
 src/c_api.cpp                    | 15 +++++++++++++++
 5 files changed, 61 insertions(+)
diff --git a/include/LightGBM/boosting.h b/include/LightGBM/boosting.h
index 7530495c0e17..fd2e6330869c 100644
--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -313,6 +313,8 @@ class LIGHTGBM_EXPORT Boosting {
   */
   static Boosting* CreateBoosting(const std::string& type, const char* filename);
 
+  virtual std::string GetParameters() const = 0;
+
   virtual bool IsLinear() const { return false; }
 
   virtual std::string ParserConfigStr() const = 0;
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index 8e4d8d4d8602..f287eee7831b 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -497,6 +497,12 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterLoadModelFromString(const char* model_str,
                                                       int* out_num_iterations,
                                                       BoosterHandle* out);
 
+LIGHTGBM_C_EXPORT int LGBM_BoosterGetParameters(BoosterHandle handle,
+                                               int64_t buffer_len,
+                                               int64_t* out_len,
+                                               char* out_str);
+
+
 /*!
  * \brief Free space for booster.
  * \param handle Handle of booster to be freed
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index a5e1bfb0a41e..db518c502138 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2765,6 +2765,28 @@ def __setstate__(self, state):
             state['handle'] = handle
         self.__dict__.update(state)
 
+    def _get_params(self) -> Dict[str, Any]:
+        buffer_len = 1 << 20
+        tmp_out_len = ctypes.c_int64(0)
+        string_buffer = ctypes.create_string_buffer(buffer_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_BoosterGetParameters(
+            self.handle,
+            ctypes.c_int64(buffer_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+        actual_len = tmp_out_len.value
+        # if buffer length is not long enough, re-allocate a buffer
+        if actual_len > buffer_len:
+            string_buffer = ctypes.create_string_buffer(actual_len)
+            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+            _safe_call(_LIB.LGBM_DumpParamAliases(
+                ctypes.c_int64(actual_len),
+                ctypes.byref(tmp_out_len),
+                ptr_string_buffer))
+        params = json.loads(string_buffer.value.decode('utf-8'))
+        return params
+
     def free_dataset(self) -> "Booster":
         """Free Booster's Datasets.
 
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index f699719b525e..515f0645ce33 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -157,6 +157,22 @@ class GBDT : public GBDTBase {
   */
   int GetCurrentIteration() const override { return static_cast<int>(models_.size()) / num_tree_per_iteration_; }
 
+  /*!
+  * \brief Get parameters as a JSON string
+  */
+  std::string GetParameters() const override {
+    std::stringstream str_buf;
+    auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
+    for (auto line : lines) {
+      auto pair = Common::Split(line.c_str(), "[:]");
+      if (pair[1] != " ") {
+        str_buf << pair[0] << "=" << Common::Trim(pair[1]) << "\n";
+      }
+    }
+    auto map = Config::Str2Map(str_buf.str().c_str());
+    return Json(map).dump();
+  }
+
   /*!
   * \brief Can use early stopping for prediction or not
   * \return True if cannot use early stopping for prediction
diff --git a/src/c_api.cpp b/src/c_api.cpp
index d86862060917..dcd69a72b9e8 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -1624,6 +1624,21 @@ int LGBM_BoosterLoadModelFromString(
   API_END();
 }
 
+int LGBM_BoosterGetParameters(
+  BoosterHandle handle,
+  int64_t buffer_len,
+  int64_t* out_len,
+  char* out_str) {
+  API_BEGIN();
+  Booster* ref_booster = reinterpret_cast<Booster*>(handle);
+  std::string params = ref_booster->GetBoosting()->GetParameters();
+  *out_len = static_cast<int64_t>(params.size()) + 1;
+  if (*out_len <= buffer_len) {
+    std::memcpy(out_str, params.c_str(), *out_len);
+  }
+  API_END();
+}
+
 #ifdef _MSC_VER
   #pragma warning(disable : 4702)
 #endif

From 02ca63a950af672190f7bd5df172086af1383c76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Sun, 14 Aug 2022 19:25:09 -0500
Subject: [PATCH 02/23] get parameter types and use to parse

---
 helpers/parameter_generator.py   |  25 ++++++
 include/LightGBM/c_api.h         |   5 ++
 include/LightGBM/config.h        |   1 +
 python-package/lightgbm/basic.py |  42 +++++++++-
 src/boosting/gbdt.h              |   3 +
 src/c_api.cpp                    |  12 +++
 src/io/config_auto.cpp           | 131 +++++++++++++++++++++++++++++++
 7 files changed, 216 insertions(+), 3 deletions(-)

diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py
index 9bc62b093a26..9e57ae7875a0 100644
--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@@ -6,6 +6,7 @@
 along with parameters description in LightGBM/docs/Parameters.rst file
 from the information in LightGBM/include/LightGBM/config.h file.
 """
+import re
 from collections import defaultdict
 from pathlib import Path
 from typing import Dict, List, Tuple
@@ -373,6 +374,30 @@ def gen_parameter_code(
 }
 
 """
+    str_to_write += """const std::string Config::ParameterTypes() {
+  std::stringstream str_buf;
+  str_buf << "{";"""
+    int_t_pat = re.compile(r'int\d+_t')
+    first = True
+    for x in infos:
+        for y in x:
+            if "[doc-only]" in y:
+                continue
+            param_type = int_t_pat.sub('int', y["inner_type"][0]).replace('std::', '')
+            name = y["name"][0]
+            prefix = f'\n  str_buf << "'
+            if first:
+                first = False
+            else:
+                prefix += ','
+            str_to_write += f'{prefix}\\"{name}\\": \\"{param_type}\\"";'
+    str_to_write += """
+  str_buf << "}";
+  return str_buf.str();
+}
+
+"""
+
     str_to_write += "}  // namespace LightGBM\n"
     with open(config_out_cpp, "w") as config_out_cpp_file:
         config_out_cpp_file.write(str_to_write)
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index f287eee7831b..130de5953355 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -63,6 +63,11 @@ LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
                                             int64_t* out_len,
                                             char* out_str);
 
+
+LIGHTGBM_C_EXPORT int LGBM_DumpParameterTypes(int64_t buffer_len,
+                                              int64_t* out_len,
+                                              char* out_str);
+
 /*!
  * \brief Register a callback function for log redirecting.
  * \param callback The callback function to register
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index e88c4d7b70b7..69b16d24ec58 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -1075,6 +1075,7 @@ struct Config {
   static const std::unordered_set<std::string>& parameter_set();
   std::vector<std::vector<double>> auc_mu_weights_matrix;
   std::vector<std::vector<int>> interaction_constraints_vector;
+  static const std::string ParameterTypes();
   static const std::string DumpAliases();
 
  private:
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index db518c502138..871f93d8f0dc 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2765,6 +2765,27 @@ def __setstate__(self, state):
             state['handle'] = handle
         self.__dict__.update(state)
 
+    def _get_param_types(self) -> Dict[str, Any]:
+        buffer_len = 1 << 20
+        tmp_out_len = ctypes.c_int64(0)
+        string_buffer = ctypes.create_string_buffer(buffer_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_DumpParameterTypes(
+            ctypes.c_int64(buffer_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+        actual_len = tmp_out_len.value
+        # if buffer length is not long enough, re-allocate a buffer
+        if actual_len > buffer_len:
+            string_buffer = ctypes.create_string_buffer(actual_len)
+            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+            _safe_call(_LIB.LGBM_DumpParameterTypes(
+                ctypes.c_int64(actual_len),
+                ctypes.byref(tmp_out_len),
+                ptr_string_buffer))
+        return json.loads(ptr_string_buffer.value.decode('utf-8'))
+
+
     def _get_params(self) -> Dict[str, Any]:
         buffer_len = 1 << 20
         tmp_out_len = ctypes.c_int64(0)
@@ -2780,12 +2801,27 @@ def _get_params(self) -> Dict[str, Any]:
         if actual_len > buffer_len:
             string_buffer = ctypes.create_string_buffer(actual_len)
             ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-            _safe_call(_LIB.LGBM_DumpParamAliases(
+            _safe_call(_LIB.LGBM_BoosterGetParameters(
+                self.handle,
                 ctypes.c_int64(actual_len),
                 ctypes.byref(tmp_out_len),
                 ptr_string_buffer))
-        params = json.loads(string_buffer.value.decode('utf-8'))
-        return params
+        params = json.loads(ptr_string_buffer.value.decode('utf-8'))
+        ptypes = self._get_param_types()
+        types_dict = {'string': str, 'int': int, 'double': float, 'bool': bool}
+
+        def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
+            if 'vector' in type_name:
+                if not value:
+                    return []
+                eltype_name = type_name[type_name.find('<') + 1 : type_name.find('>')]
+                eltype = types_dict[eltype_name]
+                return [eltype(v) for v in value.split(',')]
+            eltype = types_dict[type_name]
+            return eltype(value)
+
+        return {param: parse_param(value, ptypes.get(param, 'string')) for param, value in params.items()}
+
 
     def free_dataset(self) -> "Booster":
         """Free Booster's Datasets.
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 515f0645ce33..f7710da640e9 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -161,6 +161,9 @@ class GBDT : public GBDTBase {
   * \brief Get parameters as a JSON string
   */
   std::string GetParameters() const override {
+    if (loaded_parameter_.empty()) {
+      return std::string("{}");
+    }
     std::stringstream str_buf;
     auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
     for (auto line : lines) {
diff --git a/src/c_api.cpp b/src/c_api.cpp
index dcd69a72b9e8..7f365cb861bc 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -900,6 +900,18 @@ int LGBM_DumpParamAliases(int64_t buffer_len,
   API_END();
 }
 
+int LGBM_DumpParameterTypes(int64_t buffer_len,
+                            int64_t* out_len,
+                            char* out_str) {
+  API_BEGIN();
+  std::string ptypes = Config::ParameterTypes();
+  *out_len = static_cast<int64_t>(ptypes.size()) + 1;
+  if (*out_len <= buffer_len) {
+    std::memcpy(out_str, ptypes.c_str(), *out_len);
+  }
+  API_END();
+}
+
 int LGBM_RegisterLogCallback(void (*callback)(const char*)) {
   API_BEGIN();
   Log::ResetCallBack(callback);
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 6c2e3cabad00..9ef6c11bd22a 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -894,4 +894,135 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
   return map;
 }
 
+const std::string Config::ParameterTypes() {
+  std::stringstream str_buf;
+  str_buf << "{";
+  str_buf << "\"data\": \"string\"";
+  str_buf << ",\"valid\": \"vector<string>\"";
+  str_buf << ",\"num_iterations\": \"int\"";
+  str_buf << ",\"learning_rate\": \"double\"";
+  str_buf << ",\"num_leaves\": \"int\"";
+  str_buf << ",\"num_threads\": \"int\"";
+  str_buf << ",\"deterministic\": \"bool\"";
+  str_buf << ",\"force_col_wise\": \"bool\"";
+  str_buf << ",\"force_row_wise\": \"bool\"";
+  str_buf << ",\"histogram_pool_size\": \"double\"";
+  str_buf << ",\"max_depth\": \"int\"";
+  str_buf << ",\"min_data_in_leaf\": \"int\"";
+  str_buf << ",\"min_sum_hessian_in_leaf\": \"double\"";
+  str_buf << ",\"bagging_fraction\": \"double\"";
+  str_buf << ",\"pos_bagging_fraction\": \"double\"";
+  str_buf << ",\"neg_bagging_fraction\": \"double\"";
+  str_buf << ",\"bagging_freq\": \"int\"";
+  str_buf << ",\"bagging_seed\": \"int\"";
+  str_buf << ",\"feature_fraction\": \"double\"";
+  str_buf << ",\"feature_fraction_bynode\": \"double\"";
+  str_buf << ",\"feature_fraction_seed\": \"int\"";
+  str_buf << ",\"extra_trees\": \"bool\"";
+  str_buf << ",\"extra_seed\": \"int\"";
+  str_buf << ",\"early_stopping_round\": \"int\"";
+  str_buf << ",\"first_metric_only\": \"bool\"";
+  str_buf << ",\"max_delta_step\": \"double\"";
+  str_buf << ",\"lambda_l1\": \"double\"";
+  str_buf << ",\"lambda_l2\": \"double\"";
+  str_buf << ",\"linear_lambda\": \"double\"";
+  str_buf << ",\"min_gain_to_split\": \"double\"";
+  str_buf << ",\"drop_rate\": \"double\"";
+  str_buf << ",\"max_drop\": \"int\"";
+  str_buf << ",\"skip_drop\": \"double\"";
+  str_buf << ",\"xgboost_dart_mode\": \"bool\"";
+  str_buf << ",\"uniform_drop\": \"bool\"";
+  str_buf << ",\"drop_seed\": \"int\"";
+  str_buf << ",\"top_rate\": \"double\"";
+  str_buf << ",\"other_rate\": \"double\"";
+  str_buf << ",\"min_data_per_group\": \"int\"";
+  str_buf << ",\"max_cat_threshold\": \"int\"";
+  str_buf << ",\"cat_l2\": \"double\"";
+  str_buf << ",\"cat_smooth\": \"double\"";
+  str_buf << ",\"max_cat_to_onehot\": \"int\"";
+  str_buf << ",\"top_k\": \"int\"";
+  str_buf << ",\"monotone_constraints\": \"vector<int>\"";
+  str_buf << ",\"monotone_constraints_method\": \"string\"";
+  str_buf << ",\"monotone_penalty\": \"double\"";
+  str_buf << ",\"feature_contri\": \"vector<double>\"";
+  str_buf << ",\"forcedsplits_filename\": \"string\"";
+  str_buf << ",\"refit_decay_rate\": \"double\"";
+  str_buf << ",\"cegb_tradeoff\": \"double\"";
+  str_buf << ",\"cegb_penalty_split\": \"double\"";
+  str_buf << ",\"cegb_penalty_feature_lazy\": \"vector<double>\"";
+  str_buf << ",\"cegb_penalty_feature_coupled\": \"vector<double>\"";
+  str_buf << ",\"path_smooth\": \"double\"";
+  str_buf << ",\"interaction_constraints\": \"string\"";
+  str_buf << ",\"verbosity\": \"int\"";
+  str_buf << ",\"input_model\": \"string\"";
+  str_buf << ",\"output_model\": \"string\"";
+  str_buf << ",\"saved_feature_importance_type\": \"int\"";
+  str_buf << ",\"snapshot_freq\": \"int\"";
+  str_buf << ",\"linear_tree\": \"bool\"";
+  str_buf << ",\"max_bin\": \"int\"";
+  str_buf << ",\"max_bin_by_feature\": \"vector<int>\"";
+  str_buf << ",\"min_data_in_bin\": \"int\"";
+  str_buf << ",\"bin_construct_sample_cnt\": \"int\"";
+  str_buf << ",\"data_random_seed\": \"int\"";
+  str_buf << ",\"is_enable_sparse\": \"bool\"";
+  str_buf << ",\"enable_bundle\": \"bool\"";
+  str_buf << ",\"use_missing\": \"bool\"";
+  str_buf << ",\"zero_as_missing\": \"bool\"";
+  str_buf << ",\"feature_pre_filter\": \"bool\"";
+  str_buf << ",\"pre_partition\": \"bool\"";
+  str_buf << ",\"two_round\": \"bool\"";
+  str_buf << ",\"header\": \"bool\"";
+  str_buf << ",\"label_column\": \"string\"";
+  str_buf << ",\"weight_column\": \"string\"";
+  str_buf << ",\"group_column\": \"string\"";
+  str_buf << ",\"ignore_column\": \"string\"";
+  str_buf << ",\"categorical_feature\": \"string\"";
+  str_buf << ",\"forcedbins_filename\": \"string\"";
+  str_buf << ",\"save_binary\": \"bool\"";
+  str_buf << ",\"precise_float_parser\": \"bool\"";
+  str_buf << ",\"parser_config_file\": \"string\"";
+  str_buf << ",\"start_iteration_predict\": \"int\"";
+  str_buf << ",\"num_iteration_predict\": \"int\"";
+  str_buf << ",\"predict_raw_score\": \"bool\"";
+  str_buf << ",\"predict_leaf_index\": \"bool\"";
+  str_buf << ",\"predict_contrib\": \"bool\"";
+  str_buf << ",\"predict_disable_shape_check\": \"bool\"";
+  str_buf << ",\"pred_early_stop\": \"bool\"";
+  str_buf << ",\"pred_early_stop_freq\": \"int\"";
+  str_buf << ",\"pred_early_stop_margin\": \"double\"";
+  str_buf << ",\"output_result\": \"string\"";
+  str_buf << ",\"convert_model_language\": \"string\"";
+  str_buf << ",\"convert_model\": \"string\"";
+  str_buf << ",\"objective_seed\": \"int\"";
+  str_buf << ",\"num_class\": \"int\"";
+  str_buf << ",\"is_unbalance\": \"bool\"";
+  str_buf << ",\"scale_pos_weight\": \"double\"";
+  str_buf << ",\"sigmoid\": \"double\"";
+  str_buf << ",\"boost_from_average\": \"bool\"";
+  str_buf << ",\"reg_sqrt\": \"bool\"";
+  str_buf << ",\"alpha\": \"double\"";
+  str_buf << ",\"fair_c\": \"double\"";
+  str_buf << ",\"poisson_max_delta_step\": \"double\"";
+  str_buf << ",\"tweedie_variance_power\": \"double\"";
+  str_buf << ",\"lambdarank_truncation_level\": \"int\"";
+  str_buf << ",\"lambdarank_norm\": \"bool\"";
+  str_buf << ",\"label_gain\": \"vector<double>\"";
+  str_buf << ",\"metric_freq\": \"int\"";
+  str_buf << ",\"is_provide_training_metric\": \"bool\"";
+  str_buf << ",\"eval_at\": \"vector<int>\"";
+  str_buf << ",\"multi_error_top_k\": \"int\"";
+  str_buf << ",\"auc_mu_weights\": \"vector<double>\"";
+  str_buf << ",\"num_machines\": \"int\"";
+  str_buf << ",\"local_listen_port\": \"int\"";
+  str_buf << ",\"time_out\": \"int\"";
+  str_buf << ",\"machine_list_filename\": \"string\"";
+  str_buf << ",\"machines\": \"string\"";
+  str_buf << ",\"gpu_platform_id\": \"int\"";
+  str_buf << ",\"gpu_device_id\": \"int\"";
+  str_buf << ",\"gpu_use_dp\": \"bool\"";
+  str_buf << ",\"num_gpu\": \"int\"";
+  str_buf << "}";
+  return str_buf.str();
+}
+
 }  // namespace LightGBM

From c81f7682aa39f65c38c86714118d54ccfbfe36ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Mon, 15 Aug 2022 21:05:11 -0500
Subject: [PATCH 03/23] add test

---
 python-package/lightgbm/basic.py         | 51 +++++++++++++-----------
 tests/python_package_test/test_engine.py | 19 +++++++++
 2 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 871f93d8f0dc..e49b9fdf4844 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -6,7 +6,7 @@
 import warnings
 from collections import OrderedDict
 from copy import deepcopy
-from functools import wraps
+from functools import lru_cache, wraps
 from os import SEEK_END, environ
 from os.path import getsize
 from pathlib import Path
@@ -444,6 +444,30 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
     return params
 
 
+@lru_cache
+def _get_param_types() -> Dict[str, str]:
+    buffer_len = 1 << 20
+    tmp_out_len = ctypes.c_int64(0)
+    string_buffer = ctypes.create_string_buffer(buffer_len)
+    ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+    _safe_call(_LIB.LGBM_DumpParameterTypes(
+        ctypes.c_int64(buffer_len),
+        ctypes.byref(tmp_out_len),
+        ptr_string_buffer))
+    actual_len = tmp_out_len.value
+    # if buffer length is not long enough, re-allocate a buffer
+    if actual_len > buffer_len:
+        string_buffer = ctypes.create_string_buffer(actual_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_DumpParameterTypes(
+            ctypes.c_int64(actual_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+    res = json.loads(ptr_string_buffer.value.decode('utf-8'))
+    res['categorical_feature'] = 'vector<int>'
+    return res
+
+
 MAX_INT32 = (1 << 31) - 1
 
 """Macro definition of data type in C API of LightGBM"""
@@ -2722,6 +2746,8 @@ def __init__(
         else:
             raise TypeError('Need at least one training dataset or model file or model string '
                             'to create Booster instance')
+        if model_file is not None or model_str is not None:
+            params = self._get_params()
         self.params = params
 
     def __del__(self) -> None:
@@ -2765,27 +2791,6 @@ def __setstate__(self, state):
             state['handle'] = handle
         self.__dict__.update(state)
 
-    def _get_param_types(self) -> Dict[str, Any]:
-        buffer_len = 1 << 20
-        tmp_out_len = ctypes.c_int64(0)
-        string_buffer = ctypes.create_string_buffer(buffer_len)
-        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        _safe_call(_LIB.LGBM_DumpParameterTypes(
-            ctypes.c_int64(buffer_len),
-            ctypes.byref(tmp_out_len),
-            ptr_string_buffer))
-        actual_len = tmp_out_len.value
-        # if buffer length is not long enough, re-allocate a buffer
-        if actual_len > buffer_len:
-            string_buffer = ctypes.create_string_buffer(actual_len)
-            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-            _safe_call(_LIB.LGBM_DumpParameterTypes(
-                ctypes.c_int64(actual_len),
-                ctypes.byref(tmp_out_len),
-                ptr_string_buffer))
-        return json.loads(ptr_string_buffer.value.decode('utf-8'))
-
-
     def _get_params(self) -> Dict[str, Any]:
         buffer_len = 1 << 20
         tmp_out_len = ctypes.c_int64(0)
@@ -2807,7 +2812,7 @@ def _get_params(self) -> Dict[str, Any]:
                 ctypes.byref(tmp_out_len),
                 ptr_string_buffer))
         params = json.loads(ptr_string_buffer.value.decode('utf-8'))
-        ptypes = self._get_param_types()
+        ptypes = _get_param_types()
         types_dict = {'string': str, 'int': int, 'double': float, 'bool': bool}
 
         def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index d4852ce4a95a..1c8662b27c86 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1102,6 +1102,25 @@ def test_feature_name_with_non_ascii():
     assert feature_names == gbm2.feature_name()
 
 
+def test_parameters_are_loaded_from_model_file(tmp_path):
+    X = np.hstack([np.random.rand(100, 1), np.random.randint(0, 5, (100, 2))])
+    y = np.random.rand(100)
+    ds = lgb.Dataset(X, y)
+    params = {
+        'num_leaves': 5,
+        'bagging_fraction': 0.8,
+        'bagging_freq': 2,
+        'feature_fraction': 0.7,
+        'force_col_wise': True,
+        'num_threads': 1,
+    }
+    model_file = tmp_path / 'model.txt'
+    lgb.train(params, ds, num_boost_round=1, categorical_feature=[1, 2]).save_model(model_file)
+    bst = lgb.Booster(model_file=model_file)
+    assert all(bst.params[k] == params[k] for k in params)  # bst.params has all parameters
+    assert bst.params['categorical_feature'] == [1, 2]
+
+
 def test_save_load_copy_pickle():
     def train_and_predict(init_model=None, return_model=False):
         X, y = make_synthetic_regression()

From b33d6a03d3beb2245164a1be2365b61caad591fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 16 Aug 2022 14:12:59 -0500
Subject: [PATCH 04/23] True for boolean field if it's equal to '1'

---
 python-package/lightgbm/basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index ec7fa05b81b7..7c77f651c5c7 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2829,7 +2829,7 @@ def _get_params(self) -> Dict[str, Any]:
                 ptr_string_buffer))
         params = json.loads(ptr_string_buffer.value.decode('utf-8'))
         ptypes = _get_param_types()
-        types_dict = {'string': str, 'int': int, 'double': float, 'bool': bool}
+        types_dict = {'string': str, 'int': int, 'double': float, 'bool': lambda x: x == '1'}
 
         def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
             if 'vector' in type_name:

From c7a6a229e448e0a0eaf82c248a320bfddd625d83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 16 Aug 2022 15:31:50 -0500
Subject: [PATCH 05/23] remove bound on cache

---
 python-package/lightgbm/basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 7c77f651c5c7..c58d65c2561e 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -455,7 +455,7 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
     return params
 
 
-@lru_cache
+@lru_cache(maxsize=None)
 def _get_param_types() -> Dict[str, str]:
     buffer_len = 1 << 20
     tmp_out_len = ctypes.c_int64(0)

From f43934e92511378ac0affec7dca8eebe5222946d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 16 Aug 2022 22:48:36 -0500
Subject: [PATCH 06/23] remove duplicated code

---
 python-package/lightgbm/basic.py | 92 +++++++++++---------------------
 1 file changed, 32 insertions(+), 60 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index ec7fa05b81b7..59f8f91da32d 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -156,6 +156,28 @@ def _safe_call(ret: int) -> None:
         raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8'))
 
 
+def _get_string_from_c_api(func: Callable, booster_handle: Optional[ctypes.c_void_p] = None) -> str:
+    def c_api_call(buffer_len: int, out_len: ctypes.c_int64):
+        string_buffer = ctypes.create_string_buffer(buffer_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        args = (ctypes.c_int64(buffer_len), ctypes.byref(out_len), ptr_string_buffer)
+        if booster_handle is None:
+            f = func(*args)
+        else:
+            f = func(booster_handle, *args)
+        _safe_call(f)
+        return ptr_string_buffer.value.decode('utf-8')
+
+    buffer_len = 1 << 20
+    tmp_out_len = ctypes.c_int64(0)
+    res = c_api_call(buffer_len, tmp_out_len)
+    actual_len = tmp_out_len.value
+    # if buffer length is not long enough, re-allocate a buffer
+    if actual_len > buffer_len:
+        res = c_api_call(actual_len, tmp_out_len)
+    return res
+
+
 def _is_numeric(obj: Any) -> bool:
     """Check whether object is a number or not, include numpy number, etc."""
     try:
@@ -357,25 +379,9 @@ class _ConfigAliases:
 
     @staticmethod
     def _get_all_param_aliases() -> Dict[str, List[str]]:
-        buffer_len = 1 << 20
-        tmp_out_len = ctypes.c_int64(0)
-        string_buffer = ctypes.create_string_buffer(buffer_len)
-        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        _safe_call(_LIB.LGBM_DumpParamAliases(
-            ctypes.c_int64(buffer_len),
-            ctypes.byref(tmp_out_len),
-            ptr_string_buffer))
-        actual_len = tmp_out_len.value
-        # if buffer length is not long enough, re-allocate a buffer
-        if actual_len > buffer_len:
-            string_buffer = ctypes.create_string_buffer(actual_len)
-            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-            _safe_call(_LIB.LGBM_DumpParamAliases(
-                ctypes.c_int64(actual_len),
-                ctypes.byref(tmp_out_len),
-                ptr_string_buffer))
+        aliases_str = _get_string_from_c_api(_LIB.LGBM_DumpParamAliases)
         aliases = json.loads(
-            string_buffer.value.decode('utf-8'),
+            aliases_str,
             object_hook=lambda obj: {k: [k] + v for k, v in obj.items()}
         )
         return aliases
@@ -456,25 +462,9 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
 
 
 @lru_cache
-def _get_param_types() -> Dict[str, str]:
-    buffer_len = 1 << 20
-    tmp_out_len = ctypes.c_int64(0)
-    string_buffer = ctypes.create_string_buffer(buffer_len)
-    ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-    _safe_call(_LIB.LGBM_DumpParameterTypes(
-        ctypes.c_int64(buffer_len),
-        ctypes.byref(tmp_out_len),
-        ptr_string_buffer))
-    actual_len = tmp_out_len.value
-    # if buffer length is not long enough, re-allocate a buffer
-    if actual_len > buffer_len:
-        string_buffer = ctypes.create_string_buffer(actual_len)
-        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        _safe_call(_LIB.LGBM_DumpParameterTypes(
-            ctypes.c_int64(actual_len),
-            ctypes.byref(tmp_out_len),
-            ptr_string_buffer))
-    res = json.loads(ptr_string_buffer.value.decode('utf-8'))
+def _get_parameter_types() -> Dict[str, str]:
+    types_str = _get_string_from_c_api(_LIB.LGBM_DumpParameterTypes)
+    res = json.loads(types_str)
     res['categorical_feature'] = 'vector<int>'
     return res
 
@@ -2763,7 +2753,7 @@ def __init__(
             raise TypeError('Need at least one training dataset or model file or model string '
                             'to create Booster instance')
         if model_file is not None or model_str is not None:
-            params = self._get_params()
+            params = self._get_parameters()
         self.params = params
 
     def __del__(self) -> None:
@@ -2807,28 +2797,10 @@ def __setstate__(self, state):
             state['handle'] = handle
         self.__dict__.update(state)
 
-    def _get_params(self) -> Dict[str, Any]:
-        buffer_len = 1 << 20
-        tmp_out_len = ctypes.c_int64(0)
-        string_buffer = ctypes.create_string_buffer(buffer_len)
-        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        _safe_call(_LIB.LGBM_BoosterGetParameters(
-            self.handle,
-            ctypes.c_int64(buffer_len),
-            ctypes.byref(tmp_out_len),
-            ptr_string_buffer))
-        actual_len = tmp_out_len.value
-        # if buffer length is not long enough, re-allocate a buffer
-        if actual_len > buffer_len:
-            string_buffer = ctypes.create_string_buffer(actual_len)
-            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-            _safe_call(_LIB.LGBM_BoosterGetParameters(
-                self.handle,
-                ctypes.c_int64(actual_len),
-                ctypes.byref(tmp_out_len),
-                ptr_string_buffer))
-        params = json.loads(ptr_string_buffer.value.decode('utf-8'))
-        ptypes = _get_param_types()
+    def _get_parameters(self) -> Dict[str, Any]:
+        params_str = _get_string_from_c_api(_LIB.LGBM_BoosterGetParameters, self.handle)
+        params = json.loads(params_str)
+        ptypes = _get_parameter_types()
         types_dict = {'string': str, 'int': int, 'double': float, 'bool': bool}
 
         def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:

From 7761124b822ad34f2545ed658845e033b499e60f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 16 Aug 2022 23:12:09 -0500
Subject: [PATCH 07/23] manually parse json string

---
 src/boosting/gbdt.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index f7710da640e9..aa1b10f8a7ed 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -173,7 +173,20 @@ class GBDT : public GBDTBase {
       }
     }
     auto map = Config::Str2Map(str_buf.str().c_str());
-    return Json(map).dump();
+    str_buf.str("");
+    str_buf << "{";
+    bool first = true;
+    for (auto it = map.cbegin(); it != map.cend(); ++it) {
+      if (first) {
+        first = false;
+        str_buf << "\"";
+      } else {
+        str_buf << ",\"";
+      }
+      str_buf << it->first << "\": \"" << it->second << "\"";
+    }
+    str_buf << "}";
+    return str_buf.str();
   }
 
   /*!

From 26ba91f2b45ceacdb2ffb3c9fe9db8ddf70cb321 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 16 Aug 2022 23:29:46 -0500
Subject: [PATCH 08/23] dont create temporary map. lint

---
 python-package/lightgbm/basic.py |  3 +--
 src/boosting/gbdt.h              | 23 +++++++++--------------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index ac952fdbff24..6dcab0085fd0 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2807,7 +2807,7 @@ def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
             if 'vector' in type_name:
                 if not value:
                     return []
-                eltype_name = type_name[type_name.find('<') + 1 : type_name.find('>')]
+                eltype_name = type_name[type_name.find('<') + 1:type_name.find('>')]
                 eltype = types_dict[eltype_name]
                 return [eltype(v) for v in value.split(',')]
             eltype = types_dict[type_name]
@@ -2815,7 +2815,6 @@ def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
 
         return {param: parse_param(value, ptypes.get(param, 'string')) for param, value in params.items()}
 
-
     def free_dataset(self) -> "Booster":
         """Free Booster's Datasets.
 
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index aa1b10f8a7ed..a6133a2ed047 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -165,25 +165,20 @@ class GBDT : public GBDTBase {
       return std::string("{}");
     }
     std::stringstream str_buf;
+    str_buf << "{";
     auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
+    bool first = true;
     for (auto line : lines) {
       auto pair = Common::Split(line.c_str(), "[:]");
       if (pair[1] != " ") {
-        str_buf << pair[0] << "=" << Common::Trim(pair[1]) << "\n";
-      }
-    }
-    auto map = Config::Str2Map(str_buf.str().c_str());
-    str_buf.str("");
-    str_buf << "{";
-    bool first = true;
-    for (auto it = map.cbegin(); it != map.cend(); ++it) {
-      if (first) {
-        first = false;
-        str_buf << "\"";
-      } else {
-        str_buf << ",\"";
+        if (first) {
+          first = false;
+          str_buf << "\"";
+        } else {
+          str_buf << ",\"";
+        }
+        str_buf << pair[0] << "\": \"" << Common::Trim(pair[1]) << "\"";
       }
-      str_buf << it->first << "\": \"" << it->second << "\"";
     }
     str_buf << "}";
     return str_buf.str();

From ec113c0c4c31485b56c6a175bc39dcfb0b60749c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 16 Aug 2022 23:32:19 -0500
Subject: [PATCH 09/23] add doc

---
 include/LightGBM/c_api.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index 2fe83731330c..302cd9fdbe94 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -600,6 +600,14 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterLoadModelFromString(const char* model_str,
                                                       int* out_num_iterations,
                                                       BoosterHandle* out);
 
+/*!
+ * \brief Get parameters as JSON string.
+ * \param handle Handle of booster.
+ * \param buffer_len Allocated space for string.
+ * \param[out] out_len Actual size of string.
+ * \param[out] out_str JSON string containing parameters.
+ * \return 0 when succeed, -1 when failure happens
+ */
 LIGHTGBM_C_EXPORT int LGBM_BoosterGetParameters(BoosterHandle handle,
                                                int64_t buffer_len,
                                                int64_t* out_len,

From 39c7a8ce795376dcfc2b8bc4641e47723e3ee4fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Sat, 27 Aug 2022 18:48:05 -0500
Subject: [PATCH 10/23] minor fixes

---
 include/LightGBM/c_api.h                 | 8 +++++++-
 src/boosting/gbdt.h                      | 6 +++---
 tests/python_package_test/test_engine.py | 1 +
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index 302cd9fdbe94..e51b2119c263 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -63,7 +63,13 @@ LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
                                             int64_t* out_len,
                                             char* out_str);
 
-
+/*!
+ * \brief Dump all parameter names with their types to JSON.
+ * \param buffer_len String buffer length, if ``buffer_len < out_len``, you should re-allocate buffer
+ * \param[out] out_len Actual output length
+ * \param[out] out_str JSON format string of parameters, should pre-allocate memory
+ * \return 0 when succeed, -1 when failure happens
+ */
 LIGHTGBM_C_EXPORT int LGBM_DumpParameterTypes(int64_t buffer_len,
                                               int64_t* out_len,
                                               char* out_str);
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index a6133a2ed047..200aa588c006 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -166,10 +166,10 @@ class GBDT : public GBDTBase {
     }
     std::stringstream str_buf;
     str_buf << "{";
-    auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
+    const auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
     bool first = true;
-    for (auto line : lines) {
-      auto pair = Common::Split(line.c_str(), "[:]");
+    for (const auto& line : lines) {
+      const auto pair = Common::Split(line.c_str(), "[:]");
       if (pair[1] != " ") {
         if (first) {
           first = false;
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 1c8662b27c86..4205173ab524 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1107,6 +1107,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
     y = np.random.rand(100)
     ds = lgb.Dataset(X, y)
     params = {
+        'boosting': 'rf',
         'num_leaves': 5,
         'bagging_fraction': 0.8,
         'bagging_freq': 2,

From 0e6591b974f151eb8fb90afcf63112b250cc13fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Sat, 27 Aug 2022 21:52:28 -0500
Subject: [PATCH 11/23] revert _get_string_from_c_api. rename parameter to
 param

---
 include/LightGBM/c_api.h         |  2 +-
 python-package/lightgbm/basic.py | 95 +++++++++++++++++++++-----------
 src/c_api.cpp                    |  2 +-
 3 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index e51b2119c263..cf38bc113414 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -70,7 +70,7 @@ LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
  * \param[out] out_str JSON format string of parameters, should pre-allocate memory
  * \return 0 when succeed, -1 when failure happens
  */
-LIGHTGBM_C_EXPORT int LGBM_DumpParameterTypes(int64_t buffer_len,
+LIGHTGBM_C_EXPORT int LGBM_DumpParamTypes(int64_t buffer_len,
                                               int64_t* out_len,
                                               char* out_str);
 
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 6dcab0085fd0..f4253ea09af6 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -156,28 +156,6 @@ def _safe_call(ret: int) -> None:
         raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8'))
 
 
-def _get_string_from_c_api(func: Callable, booster_handle: Optional[ctypes.c_void_p] = None) -> str:
-    def c_api_call(buffer_len: int, out_len: ctypes.c_int64):
-        string_buffer = ctypes.create_string_buffer(buffer_len)
-        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        args = (ctypes.c_int64(buffer_len), ctypes.byref(out_len), ptr_string_buffer)
-        if booster_handle is None:
-            f = func(*args)
-        else:
-            f = func(booster_handle, *args)
-        _safe_call(f)
-        return ptr_string_buffer.value.decode('utf-8')
-
-    buffer_len = 1 << 20
-    tmp_out_len = ctypes.c_int64(0)
-    res = c_api_call(buffer_len, tmp_out_len)
-    actual_len = tmp_out_len.value
-    # if buffer length is not long enough, re-allocate a buffer
-    if actual_len > buffer_len:
-        res = c_api_call(actual_len, tmp_out_len)
-    return res
-
-
 def _is_numeric(obj: Any) -> bool:
     """Check whether object is a number or not, include numpy number, etc."""
     try:
@@ -379,9 +357,25 @@ class _ConfigAliases:
 
     @staticmethod
     def _get_all_param_aliases() -> Dict[str, List[str]]:
-        aliases_str = _get_string_from_c_api(_LIB.LGBM_DumpParamAliases)
+        buffer_len = 1 << 20
+        tmp_out_len = ctypes.c_int64(0)
+        string_buffer = ctypes.create_string_buffer(buffer_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_DumpParamAliases(
+            ctypes.c_int64(buffer_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+        actual_len = tmp_out_len.value
+        # if buffer length is not long enough, re-allocate a buffer
+        if actual_len > buffer_len:
+            string_buffer = ctypes.create_string_buffer(actual_len)
+            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+            _safe_call(_LIB.LGBM_DumpParamAliases(
+                ctypes.c_int64(actual_len),
+                ctypes.byref(tmp_out_len),
+                ptr_string_buffer))
         aliases = json.loads(
-            aliases_str,
+            string_buffer.value.decode('utf-8'),
             object_hook=lambda obj: {k: [k] + v for k, v in obj.items()}
         )
         return aliases
@@ -462,10 +456,28 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
 
 
 @lru_cache(maxsize=None)
-def _get_parameter_types() -> Dict[str, str]:
-    types_str = _get_string_from_c_api(_LIB.LGBM_DumpParameterTypes)
-    res = json.loads(types_str)
+def _get_param_types() -> Dict[str, str]:
+    buffer_len = 1 << 20
+    tmp_out_len = ctypes.c_int64(0)
+    string_buffer = ctypes.create_string_buffer(buffer_len)
+    ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+    _safe_call(_LIB.LGBM_DumpParamTypes(
+        ctypes.c_int64(buffer_len),
+        ctypes.byref(tmp_out_len),
+        ptr_string_buffer))
+    actual_len = tmp_out_len.value
+    # if buffer length is not long enough, re-allocate a buffer
+    if actual_len > buffer_len:
+        string_buffer = ctypes.create_string_buffer(actual_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_DumpParamTypes(
+            ctypes.c_int64(actual_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+    res = json.loads(string_buffer.value.decode('utf-8'))
     res['categorical_feature'] = 'vector<int>'
+    res['monotone_constraints'] = 'vector<int>'
+    res['max_bin_by_feature'] = 'vector<int>'
     return res
 
 
@@ -2747,13 +2759,12 @@ def __init__(
                 ctypes.byref(out_num_class)))
             self.__num_class = out_num_class.value
             self.pandas_categorical = _load_pandas_categorical(file_name=model_file)
+            params = self._get_params()
         elif model_str is not None:
             self.model_from_string(model_str)
         else:
             raise TypeError('Need at least one training dataset or model file or model string '
                             'to create Booster instance')
-        if model_file is not None or model_str is not None:
-            params = self._get_parameters()
         self.params = params
 
     def __del__(self) -> None:
@@ -2797,10 +2808,28 @@ def __setstate__(self, state):
             state['handle'] = handle
         self.__dict__.update(state)
 
-    def _get_parameters(self) -> Dict[str, Any]:
-        params_str = _get_string_from_c_api(_LIB.LGBM_BoosterGetParameters, self.handle)
-        params = json.loads(params_str)
-        ptypes = _get_parameter_types()
+    def _get_params(self) -> Dict[str, Any]:
+        buffer_len = 1 << 20
+        tmp_out_len = ctypes.c_int64(0)
+        string_buffer = ctypes.create_string_buffer(buffer_len)
+        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+        _safe_call(_LIB.LGBM_BoosterGetParameters(
+            self.handle,
+            ctypes.c_int64(buffer_len),
+            ctypes.byref(tmp_out_len),
+            ptr_string_buffer))
+        actual_len = tmp_out_len.value
+        # if buffer length is not long enough, re-allocate a buffer
+        if actual_len > buffer_len:
+            string_buffer = ctypes.create_string_buffer(actual_len)
+            ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
+            _safe_call(_LIB.LGBM_BoosterGetParameters(
+                self.handle,
+                ctypes.c_int64(actual_len),
+                ctypes.byref(tmp_out_len),
+                ptr_string_buffer))
+        params = json.loads(string_buffer.value.decode('utf-8'))
+        ptypes = _get_param_types()
         types_dict = {'string': str, 'int': int, 'double': float, 'bool': lambda x: x == '1'}
 
         def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
diff --git a/src/c_api.cpp b/src/c_api.cpp
index 3df5900f2f37..b51f385fb942 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -900,7 +900,7 @@ int LGBM_DumpParamAliases(int64_t buffer_len,
   API_END();
 }
 
-int LGBM_DumpParameterTypes(int64_t buffer_len,
+int LGBM_DumpParamTypes(int64_t buffer_len,
                             int64_t* out_len,
                             char* out_str) {
   API_BEGIN();

From d4e781b2a8883ba523bc7da64cf158addd14b8ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Sat, 27 Aug 2022 21:54:35 -0500
Subject: [PATCH 12/23] add R-package functions

---
 R-package/R/aliases.R                       | 21 ++++++++++
 R-package/R/lgb.Booster.R                   | 41 ++++++++++++++++++++
 R-package/src/lightgbm_R.cpp                | 43 +++++++++++++++++++++
 R-package/src/lightgbm_R.h                  | 15 +++++++
 R-package/tests/testthat/test_lgb.Booster.R | 21 +++++++---
 5 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R
index 0aa886ab90c2..0729fabf148a 100644
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -63,6 +63,27 @@
     return(params_to_aliases)
 }
 
+# [description] List of parameter types. Wrapped in a function to take advantage of
+#               lazy evaluation (so it doesn't matter what order R sources files during installation).
+# [return] A named list, where each key is a main LightGBM parameter and each value is a character
+#          vector of corresponding of their type name in C++.
+.PARAMETER_TYPES <- function() {
+    json_str <- .Call(
+      LGBM_DumpParamTypes_R
+    )
+    param_types <- jsonlite::fromJSON(json_str)
+    param_types["categorical_feature"] <- "vector<int>"
+    param_types["monotone_constraints"] <- "vector<int>"
+    param_types["max_bin_by_feature"] <- "vector<int>"
+    # store in cache so the next call to `.PARAMETER_TYPES()` doesn't need to recompute this
+    assign(
+        x = "PARAMETER_TYPES"
+        , value = param_types
+        , envir = .lgb_session_cache_env
+    )
+    return(param_types)
+}
+
 # [description]
 #     Per https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst#metric,
 #     a few different strings can be used to indicate "no metrics".
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 5fd0ef02f229..876b6ce5b1a0 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -77,6 +77,7 @@ Booster <- R6::R6Class(
           LGBM_BoosterCreateFromModelfile_R
           , modelfile
         )
+        params <- private$get_params(handle)
 
       } else if (!is.null(model_str)) {
 
@@ -674,6 +675,46 @@ Booster <- R6::R6Class(
 
     },
 
+    get_params = function(handle) {
+      params_str <- .Call(
+        LGBM_BoosterGetParameters_R
+        , handle
+      )
+      params <- jsonlite::fromJSON(params_str)
+      param_types <- .PARAMETER_TYPES()
+
+      type_name_to_fn <- c(
+        "string" = as.character
+        , "int" = as.integer
+        , "double" = as.numeric
+        , "bool" = function(x) x == "1"
+      )
+
+      parse_param <- function(value, type_name) {
+        if (grepl("vector", type_name)) {
+          eltype_name <- sub("vector<(.*)>", "\\1", type_name)
+          parse_fn <- type_name_to_fn[[eltype_name]]
+          values <- strsplit(value, ",")
+          return(lapply(values, parse_fn))
+        }
+        parse_fn <- type_name_to_fn[[type_name]]
+        parse_fn(value)
+      }
+
+      res <- list()
+      for (param_name in names(params)) {
+        if (param_name %in% names(param_types)) {
+          type_name <- param_types[[param_name]]
+        } else {
+          type_name <- "string"
+        }
+        res[param_name] <- parse_param(params[[param_name]], type_name)
+      }
+
+      return(res)
+
+    },
+
     inner_eval = function(data_name, data_idx, feval = NULL) {
 
       # Check for unknown dataset (over the maximum provided range)
diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 560622788422..aa18df8e61ad 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -1019,6 +1019,47 @@ SEXP LGBM_DumpParamAliases_R() {
   R_API_END();
 }
 
+SEXP LGBM_BoosterGetParameters_R(SEXP handle) {
+  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  R_API_BEGIN();
+  _AssertBoosterHandleNotNull(handle);
+  SEXP params_str;
+  int64_t out_len = 0;
+  int64_t buf_len = 1024 * 1024;
+  std::vector<char> inner_char_buf(buf_len);
+  CHECK_CALL(LGBM_BoosterGetParameters(R_ExternalPtrAddr(handle), buf_len, &out_len, inner_char_buf.data()));
+  // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
+  if (out_len > buf_len) {
+    inner_char_buf.resize(out_len);
+    CHECK_CALL(LGBM_BoosterGetParameters(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
+  }
+  params_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  SET_STRING_ELT(params_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
+  UNPROTECT(2);
+  return params_str;
+  R_API_END();
+}
+
+SEXP LGBM_DumpParamTypes_R() {
+  SEXP cont_token = PROTECT(R_MakeUnwindCont());
+  R_API_BEGIN();
+  SEXP types_str;
+  int64_t out_len = 0;
+  int64_t buf_len = 1024 * 1024;
+  std::vector<char> inner_char_buf(buf_len);
+  CHECK_CALL(LGBM_DumpParamTypes(buf_len, &out_len, inner_char_buf.data()));
+  // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
+  if (out_len > buf_len) {
+    inner_char_buf.resize(out_len);
+    CHECK_CALL(LGBM_DumpParamTypes(out_len, &out_len, inner_char_buf.data()));
+  }
+  types_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
+  SET_STRING_ELT(types_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
+  UNPROTECT(2);
+  return types_str;
+  R_API_END();
+}
+
 // .Call() calls
 static const R_CallMethodDef CallEntries[] = {
   {"LGBM_HandleIsNull_R"              , (DL_FUNC) &LGBM_HandleIsNull_R              , 1},
@@ -1056,6 +1097,7 @@ static const R_CallMethodDef CallEntries[] = {
   {"LGBM_BoosterGetEvalNames_R"       , (DL_FUNC) &LGBM_BoosterGetEvalNames_R       , 1},
   {"LGBM_BoosterGetEval_R"            , (DL_FUNC) &LGBM_BoosterGetEval_R            , 3},
   {"LGBM_BoosterGetNumPredict_R"      , (DL_FUNC) &LGBM_BoosterGetNumPredict_R      , 3},
+  {"LGBM_BoosterGetParameters_R"      , (DL_FUNC) &LGBM_BoosterGetParameters_R      , 1},
   {"LGBM_BoosterGetPredict_R"         , (DL_FUNC) &LGBM_BoosterGetPredict_R         , 3},
   {"LGBM_BoosterPredictForFile_R"     , (DL_FUNC) &LGBM_BoosterPredictForFile_R     , 10},
   {"LGBM_BoosterCalcNumPredict_R"     , (DL_FUNC) &LGBM_BoosterCalcNumPredict_R     , 8},
@@ -1067,6 +1109,7 @@ static const R_CallMethodDef CallEntries[] = {
   {"LGBM_BoosterDumpModel_R"          , (DL_FUNC) &LGBM_BoosterDumpModel_R          , 3},
   {"LGBM_NullBoosterHandleError_R"    , (DL_FUNC) &LGBM_NullBoosterHandleError_R    , 0},
   {"LGBM_DumpParamAliases_R"          , (DL_FUNC) &LGBM_DumpParamAliases_R          , 0},
+  {"LGBM_DumpParamTypes_R"            , (DL_FUNC) &LGBM_DumpParamTypes_R            , 0},
   {NULL, NULL, 0}
 };
 
diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h
index 0f2a0949b61c..d3f4db40cd5a 100644
--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -266,6 +266,15 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterLoadModelFromString_R(
   SEXP model_str
 );
 
+/*!
+* \brief Get parameters as JSON string.
+* \param handle Booster handle
+* \return R character vector (length=1) with parameters in JSON format
+*/
+LIGHTGBM_C_EXPORT SEXP LGBM_BoosterGetParameters_R(
+  SEXP handle
+);
+
 /*!
 * \brief Merge model in two Boosters to first handle
 * \param handle handle primary Booster handle, will merge other handle to this
@@ -650,4 +659,10 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();
 
+/*!
+* \brief Dump parameter types to JSON
+* \return R character vector (length=1) with types JSON
+*/
+LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamTypes_R();
+
 #endif  // LIGHTGBM_R_H_
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 8208ef416a65..1f4683ba5385 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -172,15 +172,21 @@ test_that("Loading a Booster from a text file works", {
     data(agaricus.test, package = "lightgbm")
     train <- agaricus.train
     test <- agaricus.test
+    params <- list(
+        num_leaves = 4L
+        , boosting = "rf"
+        , bagging_fraction = 0.8
+        , bagging_freq = 1L
+        , force_col_wise = TRUE
+        , categorical_feature = c(1L, 2L)
+        , learning_rate = 1.0
+        , objective = "binary"
+        , verbosity = VERBOSITY
+    )
     bst <- lightgbm(
         data = as.matrix(train$data)
         , label = train$label
-        , params = list(
-            num_leaves = 4L
-            , learning_rate = 1.0
-            , objective = "binary"
-            , verbose = VERBOSITY
-        )
+        , params = params
         , nrounds = 2L
     )
     expect_true(lgb.is.Booster(bst))
@@ -199,6 +205,9 @@ test_that("Loading a Booster from a text file works", {
     )
     pred2 <- predict(bst2, test$data)
     expect_identical(pred, pred2)
+
+    # check that the parameters are loaded correctly
+    expect_identical(bst2$params[names(params)], params)
 })
 
 test_that("boosters with linear models at leaves can be written to text file and re-loaded successfully", {

From 483a3f48e67d08899e07dced1a43e8e201a371cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Mon, 29 Aug 2022 00:11:39 -0500
Subject: [PATCH 13/23] rename functions to BoosterGetLoadedParam. override
 array parameters. check interaction constraints are properly loaded

---
 R-package/R/aliases.R                       |  3 --
 R-package/R/lgb.Booster.R                   | 39 ++++++++++++---------
 R-package/src/lightgbm_R.cpp                |  8 ++---
 R-package/src/lightgbm_R.h                  |  2 +-
 R-package/tests/testthat/test_lgb.Booster.R |  3 +-
 helpers/parameter_generator.py              | 15 ++++++--
 include/LightGBM/boosting.h                 |  2 +-
 include/LightGBM/c_api.h                    |  8 ++---
 python-package/lightgbm/basic.py            | 26 +++++++-------
 src/boosting/gbdt.h                         |  8 ++---
 src/c_api.cpp                               |  4 +--
 src/io/config_auto.cpp                      | 15 +++++---
 tests/python_package_test/test_engine.py    |  5 +--
 13 files changed, 80 insertions(+), 58 deletions(-)

diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R
index 0729fabf148a..4c4a593ca47d 100644
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -72,9 +72,6 @@
       LGBM_DumpParamTypes_R
     )
     param_types <- jsonlite::fromJSON(json_str)
-    param_types["categorical_feature"] <- "vector<int>"
-    param_types["monotone_constraints"] <- "vector<int>"
-    param_types["max_bin_by_feature"] <- "vector<int>"
     # store in cache so the next call to `.PARAMETER_TYPES()` doesn't need to recompute this
     assign(
         x = "PARAMETER_TYPES"
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index e6896ff7d82b..adc4fc071c2e 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -77,7 +77,7 @@ Booster <- R6::R6Class(
           LGBM_BoosterCreateFromModelfile_R
           , modelfile
         )
-        params <- private$get_params(handle)
+        params <- private$get_loaded_param(handle)
 
       } else if (!is.null(model_str)) {
 
@@ -728,9 +728,9 @@ Booster <- R6::R6Class(
 
     },
 
-    get_params = function(handle) {
+    get_loaded_param = function(handle) {
       params_str <- .Call(
-        LGBM_BoosterGetParameters_R
+        LGBM_BoosterGetLoadedParam_R
         , handle
       )
       params <- jsonlite::fromJSON(params_str)
@@ -744,24 +744,31 @@ Booster <- R6::R6Class(
       )
 
       parse_param <- function(value, type_name) {
-        if (grepl("vector", type_name)) {
-          eltype_name <- sub("vector<(.*)>", "\\1", type_name)
-          parse_fn <- type_name_to_fn[[eltype_name]]
-          values <- strsplit(value, ",")
-          return(lapply(values, parse_fn))
-        }
-        parse_fn <- type_name_to_fn[[type_name]]
-        parse_fn(value)
+          if (grepl("vector", type_name)) {
+            eltype_name <- sub("vector<(.*)>", "\\1", type_name)
+            if (grepl("vector", eltype_name)) {
+              arr_pat <- "\\[(.*?)\\]"
+              matches <- regmatches(value, gregexpr(arr_pat, value))[[1L]]
+              # the previous returns the matches with the square brackets
+              matches <- sapply(matches, function(x) gsub(arr_pat, "\\1", x))
+              values <- unname(sapply(matches, parse_param, eltype_name))
+            } else {
+              parse_fn <- type_name_to_fn[[eltype_name]]
+              values <- parse_fn(strsplit(value, ",")[[1L]])
+            }
+            return(values)
+          }
+          parse_fn <- type_name_to_fn[[type_name]]
+          parse_fn(value)
       }
 
       res <- list()
       for (param_name in names(params)) {
-        if (param_name %in% names(param_types)) {
-          type_name <- param_types[[param_name]]
-        } else {
-          type_name <- "string"
+        value <- parse_param(params[[param_name]], param_types[[param_name]])
+        if (param_name == "interaction_constraints") {
+          value <- lapply(value, function(x) x + 1L)
         }
-        res[param_name] <- parse_param(params[[param_name]], type_name)
+        res[[param_name]] <- value
       }
 
       return(res)
diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 96658a236bdc..9755fb9d59ca 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -1183,7 +1183,7 @@ SEXP LGBM_DumpParamAliases_R() {
   R_API_END();
 }
 
-SEXP LGBM_BoosterGetParameters_R(SEXP handle) {
+SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
   SEXP cont_token = PROTECT(R_MakeUnwindCont());
   R_API_BEGIN();
   _AssertBoosterHandleNotNull(handle);
@@ -1191,11 +1191,11 @@ SEXP LGBM_BoosterGetParameters_R(SEXP handle) {
   int64_t out_len = 0;
   int64_t buf_len = 1024 * 1024;
   std::vector<char> inner_char_buf(buf_len);
-  CHECK_CALL(LGBM_BoosterGetParameters(R_ExternalPtrAddr(handle), buf_len, &out_len, inner_char_buf.data()));
+  CHECK_CALL(LGBM_BoosterGetLoadedParam(R_ExternalPtrAddr(handle), buf_len, &out_len, inner_char_buf.data()));
   // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
   if (out_len > buf_len) {
     inner_char_buf.resize(out_len);
-    CHECK_CALL(LGBM_BoosterGetParameters(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
+    CHECK_CALL(LGBM_BoosterGetLoadedParam(R_ExternalPtrAddr(handle), out_len, &out_len, inner_char_buf.data()));
   }
   params_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
   SET_STRING_ELT(params_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
@@ -1252,7 +1252,7 @@ static const R_CallMethodDef CallEntries[] = {
   {"LGBM_BoosterResetParameter_R"                , (DL_FUNC) &LGBM_BoosterResetParameter_R                , 2},
   {"LGBM_BoosterGetNumClasses_R"                 , (DL_FUNC) &LGBM_BoosterGetNumClasses_R                 , 2},
   {"LGBM_BoosterGetNumFeature_R"                 , (DL_FUNC) &LGBM_BoosterGetNumFeature_R                 , 1},
-  {"LGBM_BoosterGetParameters_R"                 , (DL_FUNC) &LGBM_BoosterGetParameters_R                 , 1},
+  {"LGBM_BoosterGetLoadedParam_R"                , (DL_FUNC) &LGBM_BoosterGetLoadedParam_R                , 1},
   {"LGBM_BoosterUpdateOneIter_R"                 , (DL_FUNC) &LGBM_BoosterUpdateOneIter_R                 , 1},
   {"LGBM_BoosterUpdateOneIterCustom_R"           , (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R           , 4},
   {"LGBM_BoosterRollbackOneIter_R"               , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R               , 1},
diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h
index e3d606f40fff..7bbc1737372a 100644
--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -271,7 +271,7 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterLoadModelFromString_R(
 * \param handle Booster handle
 * \return R character vector (length=1) with parameters in JSON format
 */
-LIGHTGBM_C_EXPORT SEXP LGBM_BoosterGetParameters_R(
+LIGHTGBM_C_EXPORT SEXP LGBM_BoosterGetLoadedParam_R(
   SEXP handle
 );
 
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 1f4683ba5385..93ccac3f2205 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -179,6 +179,7 @@ test_that("Loading a Booster from a text file works", {
         , bagging_freq = 1L
         , force_col_wise = TRUE
         , categorical_feature = c(1L, 2L)
+        , interaction_constraints = list(c(1L, 2L), 1L)
         , learning_rate = 1.0
         , objective = "binary"
         , verbosity = VERBOSITY
@@ -207,7 +208,7 @@ test_that("Loading a Booster from a text file works", {
     expect_identical(pred, pred2)
 
     # check that the parameters are loaded correctly
-    expect_identical(bst2$params[names(params)], params)
+    expect_equal(bst2$params[names(params)], params)
 })
 
 test_that("boosters with linear models at leaves can be written to text file and re-loaded successfully", {
diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py
index 9e57ae7875a0..814edc682aba 100644
--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@@ -379,12 +379,21 @@ def gen_parameter_code(
   str_buf << "{";"""
     int_t_pat = re.compile(r'int\d+_t')
     first = True
+    # the following are stored as comma separated strings but are arrays in the wrappers
+    overrides = {
+        'categorical_feature': 'vector<int>',
+        'ignore_column': 'vector<int>',
+        'interaction_constraints': 'vector<vector<int>>',
+    }
     for x in infos:
         for y in x:
-            if "[doc-only]" in y:
-                continue
-            param_type = int_t_pat.sub('int', y["inner_type"][0]).replace('std::', '')
             name = y["name"][0]
+            if name == 'task':
+                continue
+            if name in overrides:
+                param_type = overrides[name]
+            else:
+                param_type = int_t_pat.sub('int', y["inner_type"][0]).replace('std::', '')
             prefix = f'\n  str_buf << "'
             if first:
                 first = False
diff --git a/include/LightGBM/boosting.h b/include/LightGBM/boosting.h
index fd2e6330869c..1bfc18b4470b 100644
--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -313,7 +313,7 @@ class LIGHTGBM_EXPORT Boosting {
   */
   static Boosting* CreateBoosting(const std::string& type, const char* filename);
 
-  virtual std::string GetParameters() const = 0;
+  virtual std::string GetLoadedParam() const = 0;
 
   virtual bool IsLinear() const { return false; }
 
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index cf38bc113414..f777422a7402 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -614,10 +614,10 @@ LIGHTGBM_C_EXPORT int LGBM_BoosterLoadModelFromString(const char* model_str,
  * \param[out] out_str JSON string containing parameters.
  * \return 0 when succeed, -1 when failure happens
  */
-LIGHTGBM_C_EXPORT int LGBM_BoosterGetParameters(BoosterHandle handle,
-                                               int64_t buffer_len,
-                                               int64_t* out_len,
-                                               char* out_str);
+LIGHTGBM_C_EXPORT int LGBM_BoosterGetLoadedParam(BoosterHandle handle,
+                                                 int64_t buffer_len,
+                                                 int64_t* out_len,
+                                                 char* out_str);
 
 
 /*!
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 88a91fd6390d..968769a7c26d 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -3,6 +3,7 @@
 import abc
 import ctypes
 import json
+import re
 import warnings
 from collections import OrderedDict
 from copy import deepcopy
@@ -478,9 +479,6 @@ def _get_param_types() -> Dict[str, str]:
             ctypes.byref(tmp_out_len),
             ptr_string_buffer))
     res = json.loads(string_buffer.value.decode('utf-8'))
-    res['categorical_feature'] = 'vector<int>'
-    res['monotone_constraints'] = 'vector<int>'
-    res['max_bin_by_feature'] = 'vector<int>'
     return res
 
 
@@ -2790,7 +2788,7 @@ def __init__(
                 ctypes.byref(out_num_class)))
             self.__num_class = out_num_class.value
             self.pandas_categorical = _load_pandas_categorical(file_name=model_file)
-            params = self._get_params()
+            params = self._get_loaded_param()
         elif model_str is not None:
             self.model_from_string(model_str)
         else:
@@ -2839,12 +2837,12 @@ def __setstate__(self, state: Dict[str, Any]) -> None:
             state['handle'] = handle
         self.__dict__.update(state)
 
-    def _get_params(self) -> Dict[str, Any]:
+    def _get_loaded_param(self) -> Dict[str, Any]:
         buffer_len = 1 << 20
         tmp_out_len = ctypes.c_int64(0)
         string_buffer = ctypes.create_string_buffer(buffer_len)
         ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        _safe_call(_LIB.LGBM_BoosterGetParameters(
+        _safe_call(_LIB.LGBM_BoosterGetLoadedParam(
             self.handle,
             ctypes.c_int64(buffer_len),
             ctypes.byref(tmp_out_len),
@@ -2854,7 +2852,7 @@ def _get_params(self) -> Dict[str, Any]:
         if actual_len > buffer_len:
             string_buffer = ctypes.create_string_buffer(actual_len)
             ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-            _safe_call(_LIB.LGBM_BoosterGetParameters(
+            _safe_call(_LIB.LGBM_BoosterGetLoadedParam(
                 self.handle,
                 ctypes.c_int64(actual_len),
                 ctypes.byref(tmp_out_len),
@@ -2865,15 +2863,17 @@ def _get_params(self) -> Dict[str, Any]:
 
         def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
             if 'vector' in type_name:
-                if not value:
-                    return []
-                eltype_name = type_name[type_name.find('<') + 1:type_name.find('>')]
-                eltype = types_dict[eltype_name]
-                return [eltype(v) for v in value.split(',')]
+                eltype_name = type_name[type_name.find('<') + 1:type_name.rfind('>')]
+                if 'vector' in eltype_name:
+                    values = [parse_param(v, eltype_name) for v in re.findall(r'\[(.*?)\]', value)]
+                else:
+                    eltype = types_dict[eltype_name]
+                    values = [eltype(v) for v in value.split(',')]
+                return values
             eltype = types_dict[type_name]
             return eltype(value)
 
-        return {param: parse_param(value, ptypes.get(param, 'string')) for param, value in params.items()}
+        return {param: parse_param(value, ptypes[param]) for param, value in params.items()}
 
     def free_dataset(self) -> "Booster":
         """Free Booster's Datasets.
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index 200aa588c006..d3809a7e82a8 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -160,7 +160,7 @@ class GBDT : public GBDTBase {
   /*!
   * \brief Get parameters as a JSON string
   */
-  std::string GetParameters() const override {
+  std::string GetLoadedParam() const override {
     if (loaded_parameter_.empty()) {
       return std::string("{}");
     }
@@ -169,15 +169,15 @@ class GBDT : public GBDTBase {
     const auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
     bool first = true;
     for (const auto& line : lines) {
-      const auto pair = Common::Split(line.c_str(), "[:]");
-      if (pair[1] != " ") {
+      const auto pair = Common::Split(line.c_str(), ":");
+      if (pair[1] != " ]") {
         if (first) {
           first = false;
           str_buf << "\"";
         } else {
           str_buf << ",\"";
         }
-        str_buf << pair[0] << "\": \"" << Common::Trim(pair[1]) << "\"";
+        str_buf << pair[0].substr(1) << "\": \"" << pair[1].substr(1, pair[1].size() - 2) << "\"";
       }
     }
     str_buf << "}";
diff --git a/src/c_api.cpp b/src/c_api.cpp
index b51f385fb942..8a3d3dae33ac 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -1760,14 +1760,14 @@ int LGBM_BoosterLoadModelFromString(
   API_END();
 }
 
-int LGBM_BoosterGetParameters(
+int LGBM_BoosterGetLoadedParam(
   BoosterHandle handle,
   int64_t buffer_len,
   int64_t* out_len,
   char* out_str) {
   API_BEGIN();
   Booster* ref_booster = reinterpret_cast<Booster*>(handle);
-  std::string params = ref_booster->GetBoosting()->GetParameters();
+  std::string params = ref_booster->GetBoosting()->GetLoadedParam();
   *out_len = static_cast<int64_t>(params.size()) + 1;
   if (*out_len <= buffer_len) {
     std::memcpy(out_str, params.c_str(), *out_len);
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 9ef6c11bd22a..67cab2258902 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -897,12 +897,18 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
 const std::string Config::ParameterTypes() {
   std::stringstream str_buf;
   str_buf << "{";
-  str_buf << "\"data\": \"string\"";
+  str_buf << "\"config\": \"string\"";
+  str_buf << ",\"objective\": \"string\"";
+  str_buf << ",\"boosting\": \"string\"";
+  str_buf << ",\"data\": \"string\"";
   str_buf << ",\"valid\": \"vector<string>\"";
   str_buf << ",\"num_iterations\": \"int\"";
   str_buf << ",\"learning_rate\": \"double\"";
   str_buf << ",\"num_leaves\": \"int\"";
+  str_buf << ",\"tree_learner\": \"string\"";
   str_buf << ",\"num_threads\": \"int\"";
+  str_buf << ",\"device_type\": \"string\"";
+  str_buf << ",\"seed\": \"int\"";
   str_buf << ",\"deterministic\": \"bool\"";
   str_buf << ",\"force_col_wise\": \"bool\"";
   str_buf << ",\"force_row_wise\": \"bool\"";
@@ -952,7 +958,7 @@ const std::string Config::ParameterTypes() {
   str_buf << ",\"cegb_penalty_feature_lazy\": \"vector<double>\"";
   str_buf << ",\"cegb_penalty_feature_coupled\": \"vector<double>\"";
   str_buf << ",\"path_smooth\": \"double\"";
-  str_buf << ",\"interaction_constraints\": \"string\"";
+  str_buf << ",\"interaction_constraints\": \"vector<vector<int>>\"";
   str_buf << ",\"verbosity\": \"int\"";
   str_buf << ",\"input_model\": \"string\"";
   str_buf << ",\"output_model\": \"string\"";
@@ -975,8 +981,8 @@ const std::string Config::ParameterTypes() {
   str_buf << ",\"label_column\": \"string\"";
   str_buf << ",\"weight_column\": \"string\"";
   str_buf << ",\"group_column\": \"string\"";
-  str_buf << ",\"ignore_column\": \"string\"";
-  str_buf << ",\"categorical_feature\": \"string\"";
+  str_buf << ",\"ignore_column\": \"vector<int>\"";
+  str_buf << ",\"categorical_feature\": \"vector<int>\"";
   str_buf << ",\"forcedbins_filename\": \"string\"";
   str_buf << ",\"save_binary\": \"bool\"";
   str_buf << ",\"precise_float_parser\": \"bool\"";
@@ -1007,6 +1013,7 @@ const std::string Config::ParameterTypes() {
   str_buf << ",\"lambdarank_truncation_level\": \"int\"";
   str_buf << ",\"lambdarank_norm\": \"bool\"";
   str_buf << ",\"label_gain\": \"vector<double>\"";
+  str_buf << ",\"metric\": \"vector<string>\"";
   str_buf << ",\"metric_freq\": \"int\"";
   str_buf << ",\"is_provide_training_metric\": \"bool\"";
   str_buf << ",\"eval_at\": \"vector<int>\"";
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 83307d83f09f..3589992cef66 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1208,12 +1208,13 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
     y = np.random.rand(100)
     ds = lgb.Dataset(X, y)
     params = {
-        'boosting': 'rf',
-        'num_leaves': 5,
         'bagging_fraction': 0.8,
         'bagging_freq': 2,
+        'boosting': 'rf',
         'feature_fraction': 0.7,
         'force_col_wise': True,
+        'interaction_constraints': [[0, 1], [0]],
+        'num_leaves': 5,
         'num_threads': 1,
     }
     model_file = tmp_path / 'model.txt'

From 4ab5dd42f2dff064fe43bf339901c8e529840708 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Mon, 29 Aug 2022 18:45:40 -0500
Subject: [PATCH 14/23] add missing types to tests

---
 R-package/tests/testthat/test_lgb.Booster.R | 2 ++
 tests/python_package_test/test_engine.py    | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 93ccac3f2205..20ddf47019be 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -180,6 +180,8 @@ test_that("Loading a Booster from a text file works", {
         , force_col_wise = TRUE
         , categorical_feature = c(1L, 2L)
         , interaction_constraints = list(c(1L, 2L), 1L)
+        , feature_contri = rep(0.5, ncol(train)),
+        , metric = c("map", "average_precision"),
         , learning_rate = 1.0
         , objective = "binary"
         , verbosity = VERBOSITY
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 3589992cef66..f0f4f28a0da2 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1211,9 +1211,11 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
         'bagging_fraction': 0.8,
         'bagging_freq': 2,
         'boosting': 'rf',
+        'feature_contri': [0.5, 0.5, 0.5],
         'feature_fraction': 0.7,
         'force_col_wise': True,
         'interaction_constraints': [[0, 1], [0]],
+        'metric': ['l2', 'rmse'],
         'num_leaves': 5,
         'num_threads': 1,
     }

From bd4eec08f8903a456badbe7e97e7e0b7c4451177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Mon, 29 Aug 2022 19:06:28 -0500
Subject: [PATCH 15/23] fix R params

---
 R-package/tests/testthat/test_lgb.Booster.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 20ddf47019be..2e9065c95698 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -180,8 +180,8 @@ test_that("Loading a Booster from a text file works", {
         , force_col_wise = TRUE
         , categorical_feature = c(1L, 2L)
         , interaction_constraints = list(c(1L, 2L), 1L)
-        , feature_contri = rep(0.5, ncol(train)),
-        , metric = c("map", "average_precision"),
+        , feature_contri = rep(0.5, ncol(train$data))
+        , metric = c("mape", "average_precision")
         , learning_rate = 1.0
         , objective = "binary"
         , verbosity = VERBOSITY

From 9a00fdec50317430611b26d77327be9a474c9c9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Mon, 29 Aug 2022 21:38:42 -0500
Subject: [PATCH 16/23] assert equal dicts

---
 tests/python_package_test/test_engine.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index f0f4f28a0da2..749c7af471c6 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1222,7 +1222,8 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
     model_file = tmp_path / 'model.txt'
     lgb.train(params, ds, num_boost_round=1, categorical_feature=[1, 2]).save_model(model_file)
     bst = lgb.Booster(model_file=model_file)
-    assert all(bst.params[k] == params[k] for k in params)  # bst.params has all parameters
+    set_params = {k: bst.params[k] for k in params.keys()}
+    assert set_params == params
     assert bst.params['categorical_feature'] == [1, 2]
 
 

From de6ef8a42694833daea07fd26380111010065a6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 30 Aug 2022 10:15:17 -0500
Subject: [PATCH 17/23] use boost_from_average as boolean param

---
 R-package/tests/testthat/test_lgb.Booster.R | 2 +-
 tests/python_package_test/test_engine.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 2e9065c95698..0f619e0c1dc8 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -177,7 +177,7 @@ test_that("Loading a Booster from a text file works", {
         , boosting = "rf"
         , bagging_fraction = 0.8
         , bagging_freq = 1L
-        , force_col_wise = TRUE
+        , boost_from_average = TRUE
         , categorical_feature = c(1L, 2L)
         , interaction_constraints = list(c(1L, 2L), 1L)
         , feature_contri = rep(0.5, ncol(train$data))
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 749c7af471c6..a730f9b7d635 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1213,7 +1213,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
         'boosting': 'rf',
         'feature_contri': [0.5, 0.5, 0.5],
         'feature_fraction': 0.7,
-        'force_col_wise': True,
+        'boost_from_average': True,
         'interaction_constraints': [[0, 1], [0]],
         'metric': ['l2', 'rmse'],
         'num_leaves': 5,

From 2cec69272295f479c7157ca45ec813057290d4bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 30 Aug 2022 10:16:31 -0500
Subject: [PATCH 18/23] set boost_from_average to false

---
 R-package/tests/testthat/test_lgb.Booster.R | 2 +-
 tests/python_package_test/test_engine.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index 0f619e0c1dc8..1bd565a07345 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -177,7 +177,7 @@ test_that("Loading a Booster from a text file works", {
         , boosting = "rf"
         , bagging_fraction = 0.8
         , bagging_freq = 1L
-        , boost_from_average = TRUE
+        , boost_from_average = FALSE
         , categorical_feature = c(1L, 2L)
         , interaction_constraints = list(c(1L, 2L), 1L)
         , feature_contri = rep(0.5, ncol(train$data))
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index a730f9b7d635..f42231c9074e 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1213,7 +1213,7 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
         'boosting': 'rf',
         'feature_contri': [0.5, 0.5, 0.5],
         'feature_fraction': 0.7,
-        'boost_from_average': True,
+        'boost_from_average': False,
         'interaction_constraints': [[0, 1], [0]],
         'metric': ['l2', 'rmse'],
         'num_leaves': 5,

From f066dbae4593955863f863a85fade0368d1794ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 30 Aug 2022 17:27:03 -0500
Subject: [PATCH 19/23] simplify R's parse_param

---
 R-package/R/lgb.Booster.R        | 29 ++++++++++++++---------------
 python-package/lightgbm/basic.py |  1 +
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index adc4fc071c2e..36fe6813901a 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -744,22 +744,21 @@ Booster <- R6::R6Class(
       )
 
       parse_param <- function(value, type_name) {
-          if (grepl("vector", type_name)) {
-            eltype_name <- sub("vector<(.*)>", "\\1", type_name)
-            if (grepl("vector", eltype_name)) {
-              arr_pat <- "\\[(.*?)\\]"
-              matches <- regmatches(value, gregexpr(arr_pat, value))[[1L]]
-              # the previous returns the matches with the square brackets
-              matches <- sapply(matches, function(x) gsub(arr_pat, "\\1", x))
-              values <- unname(sapply(matches, parse_param, eltype_name))
-            } else {
-              parse_fn <- type_name_to_fn[[eltype_name]]
-              values <- parse_fn(strsplit(value, ",")[[1L]])
-            }
-            return(values)
+        if (grepl("vector", type_name)) {
+          eltype_name <- sub("vector<(.*)>", "\\1", type_name)
+          if (grepl("vector", eltype_name)) {
+            # value is like "[0,1],[0]", we make it a JSON array to parse it as a list
+            values <- jsonlite::fromJSON(paste0("[", value, "]"))
+          } else {
+            parse_fn <- type_name_to_fn[[eltype_name]]
+            values <- parse_fn(strsplit(value, ",")[[1L]])
           }
-          parse_fn <- type_name_to_fn[[type_name]]
-          parse_fn(value)
+          return(values)
+        }
+        parse_fn <- type_name_to_fn[[type_name]]
+        parsed_value <- parse_fn(value)
+
+        return(parsed_value)
       }
 
       res <- list()
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 968769a7c26d..250b273a1428 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2865,6 +2865,7 @@ def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
             if 'vector' in type_name:
                 eltype_name = type_name[type_name.find('<') + 1:type_name.rfind('>')]
                 if 'vector' in eltype_name:
+                    # value is like "[0,1],[0]"
                     values = [parse_param(v, eltype_name) for v in re.findall(r'\[(.*?)\]', value)]
                 else:
                     eltype = types_dict[eltype_name]

From db36cb94376a254bde01d23c7ed83327e1c44941 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Wed, 31 Aug 2022 03:25:43 -0500
Subject: [PATCH 20/23] parse types on cpp side

---
 R-package/R/aliases.R            |  18 --
 R-package/R/lgb.Booster.R        |  38 +----
 R-package/src/lightgbm_R.cpp     |  21 ---
 R-package/src/lightgbm_R.h       |   6 -
 helpers/parameter_generator.py   |  17 +-
 include/LightGBM/c_api.h         |  11 --
 include/LightGBM/config.h        |   2 +-
 python-package/lightgbm/basic.py |  45 +----
 src/boosting/gbdt.h              |  43 ++++-
 src/c_api.cpp                    |  12 --
 src/io/config_auto.cpp           | 271 +++++++++++++++----------------
 11 files changed, 181 insertions(+), 303 deletions(-)

diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R
index 4c4a593ca47d..0aa886ab90c2 100644
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -63,24 +63,6 @@
     return(params_to_aliases)
 }
 
-# [description] List of parameter types. Wrapped in a function to take advantage of
-#               lazy evaluation (so it doesn't matter what order R sources files during installation).
-# [return] A named list, where each key is a main LightGBM parameter and each value is a character
-#          vector of corresponding of their type name in C++.
-.PARAMETER_TYPES <- function() {
-    json_str <- .Call(
-      LGBM_DumpParamTypes_R
-    )
-    param_types <- jsonlite::fromJSON(json_str)
-    # store in cache so the next call to `.PARAMETER_TYPES()` doesn't need to recompute this
-    assign(
-        x = "PARAMETER_TYPES"
-        , value = param_types
-        , envir = .lgb_session_cache_env
-    )
-    return(param_types)
-}
-
 # [description]
 #     Per https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst#metric,
 #     a few different strings can be used to indicate "no metrics".
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 36fe6813901a..3240c164f41b 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -734,43 +734,11 @@ Booster <- R6::R6Class(
         , handle
       )
       params <- jsonlite::fromJSON(params_str)
-      param_types <- .PARAMETER_TYPES()
-
-      type_name_to_fn <- c(
-        "string" = as.character
-        , "int" = as.integer
-        , "double" = as.numeric
-        , "bool" = function(x) x == "1"
-      )
-
-      parse_param <- function(value, type_name) {
-        if (grepl("vector", type_name)) {
-          eltype_name <- sub("vector<(.*)>", "\\1", type_name)
-          if (grepl("vector", eltype_name)) {
-            # value is like "[0,1],[0]", we make it a JSON array to parse it as a list
-            values <- jsonlite::fromJSON(paste0("[", value, "]"))
-          } else {
-            parse_fn <- type_name_to_fn[[eltype_name]]
-            values <- parse_fn(strsplit(value, ",")[[1L]])
-          }
-          return(values)
-        }
-        parse_fn <- type_name_to_fn[[type_name]]
-        parsed_value <- parse_fn(value)
-
-        return(parsed_value)
-      }
-
-      res <- list()
-      for (param_name in names(params)) {
-        value <- parse_param(params[[param_name]], param_types[[param_name]])
-        if (param_name == "interaction_constraints") {
-          value <- lapply(value, function(x) x + 1L)
-        }
-        res[[param_name]] <- value
+      if ("interaction_constraints" %in% names(params)) {
+        params[["interaction_constraints"]] <- lapply(params[["interaction_constraints"]], function(x) x + 1L)
       }
 
-      return(res)
+      return(params)
 
     },
 
diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp
index 9755fb9d59ca..82956daef4b9 100644
--- a/R-package/src/lightgbm_R.cpp
+++ b/R-package/src/lightgbm_R.cpp
@@ -1204,26 +1204,6 @@ SEXP LGBM_BoosterGetLoadedParam_R(SEXP handle) {
   R_API_END();
 }
 
-SEXP LGBM_DumpParamTypes_R() {
-  SEXP cont_token = PROTECT(R_MakeUnwindCont());
-  R_API_BEGIN();
-  SEXP types_str;
-  int64_t out_len = 0;
-  int64_t buf_len = 1024 * 1024;
-  std::vector<char> inner_char_buf(buf_len);
-  CHECK_CALL(LGBM_DumpParamTypes(buf_len, &out_len, inner_char_buf.data()));
-  // if aliases string was larger than the initial buffer, allocate a bigger buffer and try again
-  if (out_len > buf_len) {
-    inner_char_buf.resize(out_len);
-    CHECK_CALL(LGBM_DumpParamTypes(out_len, &out_len, inner_char_buf.data()));
-  }
-  types_str = PROTECT(safe_R_string(static_cast<R_xlen_t>(1), &cont_token));
-  SET_STRING_ELT(types_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token));
-  UNPROTECT(2);
-  return types_str;
-  R_API_END();
-}
-
 // .Call() calls
 static const R_CallMethodDef CallEntries[] = {
   {"LGBM_HandleIsNull_R"                         , (DL_FUNC) &LGBM_HandleIsNull_R                         , 1},
@@ -1280,7 +1260,6 @@ static const R_CallMethodDef CallEntries[] = {
   {"LGBM_BoosterDumpModel_R"                     , (DL_FUNC) &LGBM_BoosterDumpModel_R                     , 3},
   {"LGBM_NullBoosterHandleError_R"               , (DL_FUNC) &LGBM_NullBoosterHandleError_R               , 0},
   {"LGBM_DumpParamAliases_R"                     , (DL_FUNC) &LGBM_DumpParamAliases_R                     , 0},
-  {"LGBM_DumpParamTypes_R"                       , (DL_FUNC) &LGBM_DumpParamTypes_R                       , 0},
   {NULL, NULL, 0}
 };
 
diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h
index 7bbc1737372a..fbd2d7d6fd59 100644
--- a/R-package/src/lightgbm_R.h
+++ b/R-package/src/lightgbm_R.h
@@ -847,10 +847,4 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterDumpModel_R(
 */
 LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamAliases_R();
 
-/*!
-* \brief Dump parameter types to JSON
-* \return R character vector (length=1) with types JSON
-*/
-LIGHTGBM_C_EXPORT SEXP LGBM_DumpParamTypes_R();
-
 #endif  // LIGHTGBM_R_H_
diff --git a/helpers/parameter_generator.py b/helpers/parameter_generator.py
index 814edc682aba..407f2c73e1e3 100644
--- a/helpers/parameter_generator.py
+++ b/helpers/parameter_generator.py
@@ -374,11 +374,9 @@ def gen_parameter_code(
 }
 
 """
-    str_to_write += """const std::string Config::ParameterTypes() {
-  std::stringstream str_buf;
-  str_buf << "{";"""
+    str_to_write += """const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
+  static std::unordered_map<std::string, std::string> map({"""
     int_t_pat = re.compile(r'int\d+_t')
-    first = True
     # the following are stored as comma separated strings but are arrays in the wrappers
     overrides = {
         'categorical_feature': 'vector<int>',
@@ -394,15 +392,10 @@ def gen_parameter_code(
                 param_type = overrides[name]
             else:
                 param_type = int_t_pat.sub('int', y["inner_type"][0]).replace('std::', '')
-            prefix = f'\n  str_buf << "'
-            if first:
-                first = False
-            else:
-                prefix += ','
-            str_to_write += f'{prefix}\\"{name}\\": \\"{param_type}\\"";'
+            str_to_write += '\n    {"' + name + '", "' + param_type + '"},'
     str_to_write += """
-  str_buf << "}";
-  return str_buf.str();
+  });
+  return map;
 }
 
 """
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index f777422a7402..287826ea182c 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -63,17 +63,6 @@ LIGHTGBM_C_EXPORT int LGBM_DumpParamAliases(int64_t buffer_len,
                                             int64_t* out_len,
                                             char* out_str);
 
-/*!
- * \brief Dump all parameter names with their types to JSON.
- * \param buffer_len String buffer length, if ``buffer_len < out_len``, you should re-allocate buffer
- * \param[out] out_len Actual output length
- * \param[out] out_str JSON format string of parameters, should pre-allocate memory
- * \return 0 when succeed, -1 when failure happens
- */
-LIGHTGBM_C_EXPORT int LGBM_DumpParamTypes(int64_t buffer_len,
-                                              int64_t* out_len,
-                                              char* out_str);
-
 /*!
  * \brief Register a callback function for log redirecting.
  * \param callback The callback function to register
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 69b16d24ec58..c924c6b17485 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -1075,7 +1075,7 @@ struct Config {
   static const std::unordered_set<std::string>& parameter_set();
   std::vector<std::vector<double>> auc_mu_weights_matrix;
   std::vector<std::vector<int>> interaction_constraints_vector;
-  static const std::string ParameterTypes();
+  static const std::unordered_map<std::string, std::string>& ParameterTypes();
   static const std::string DumpAliases();
 
  private:
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index 250b273a1428..f65232f9b914 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -3,12 +3,11 @@
 import abc
 import ctypes
 import json
-import re
 import warnings
 from collections import OrderedDict
 from copy import deepcopy
 from enum import Enum
-from functools import lru_cache, wraps
+from functools import wraps
 from os import SEEK_END, environ
 from os.path import getsize
 from pathlib import Path
@@ -459,29 +458,6 @@ def _choose_param_value(main_param_name: str, params: Dict[str, Any], default_va
     return params
 
 
-@lru_cache(maxsize=None)
-def _get_param_types() -> Dict[str, str]:
-    buffer_len = 1 << 20
-    tmp_out_len = ctypes.c_int64(0)
-    string_buffer = ctypes.create_string_buffer(buffer_len)
-    ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-    _safe_call(_LIB.LGBM_DumpParamTypes(
-        ctypes.c_int64(buffer_len),
-        ctypes.byref(tmp_out_len),
-        ptr_string_buffer))
-    actual_len = tmp_out_len.value
-    # if buffer length is not long enough, re-allocate a buffer
-    if actual_len > buffer_len:
-        string_buffer = ctypes.create_string_buffer(actual_len)
-        ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
-        _safe_call(_LIB.LGBM_DumpParamTypes(
-            ctypes.c_int64(actual_len),
-            ctypes.byref(tmp_out_len),
-            ptr_string_buffer))
-    res = json.loads(string_buffer.value.decode('utf-8'))
-    return res
-
-
 MAX_INT32 = (1 << 31) - 1
 
 """Macro definition of data type in C API of LightGBM"""
@@ -2857,24 +2833,7 @@ def _get_loaded_param(self) -> Dict[str, Any]:
                 ctypes.c_int64(actual_len),
                 ctypes.byref(tmp_out_len),
                 ptr_string_buffer))
-        params = json.loads(string_buffer.value.decode('utf-8'))
-        ptypes = _get_param_types()
-        types_dict = {'string': str, 'int': int, 'double': float, 'bool': lambda x: x == '1'}
-
-        def parse_param(value: str, type_name: str) -> Union[Any, List[Any]]:
-            if 'vector' in type_name:
-                eltype_name = type_name[type_name.find('<') + 1:type_name.rfind('>')]
-                if 'vector' in eltype_name:
-                    # value is like "[0,1],[0]"
-                    values = [parse_param(v, eltype_name) for v in re.findall(r'\[(.*?)\]', value)]
-                else:
-                    eltype = types_dict[eltype_name]
-                    values = [eltype(v) for v in value.split(',')]
-                return values
-            eltype = types_dict[type_name]
-            return eltype(value)
-
-        return {param: parse_param(value, ptypes[param]) for param, value in params.items()}
+        return json.loads(string_buffer.value.decode('utf-8'))
 
     def free_dataset(self) -> "Booster":
         """Free Booster's Datasets.
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index d3809a7e82a8..1883590ceffc 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -164,20 +164,47 @@ class GBDT : public GBDTBase {
     if (loaded_parameter_.empty()) {
       return std::string("{}");
     }
-    std::stringstream str_buf;
-    str_buf << "{";
+    const auto param_types = Config::ParameterTypes();
     const auto lines = Common::Split(loaded_parameter_.c_str(), "\n");
     bool first = true;
+    std::stringstream str_buf;
+    str_buf << "{";
     for (const auto& line : lines) {
       const auto pair = Common::Split(line.c_str(), ":");
-      if (pair[1] != " ]") {
-        if (first) {
-          first = false;
-          str_buf << "\"";
+      if (pair[1] == " ]")
+        continue;
+      if (first) {
+        first = false;
+        str_buf << "\"";
+      } else {
+        str_buf << ",\"";
+      }
+      const auto param = pair[0].substr(1);
+      const auto value_str = pair[1].substr(1, pair[1].size() - 2);
+      const auto param_type = param_types.at(param);
+      str_buf << param << "\": ";
+      if (param_type == "string") {
+        str_buf << "\"" << value_str << "\"";
+      } else if (param_type == "int") {
+        int value;
+        Common::Atoi(value_str.c_str(), &value);
+        str_buf << value;
+      } else if (param_type == "double") {
+        double value;
+        Common::Atof(value_str.c_str(), &value);
+        str_buf << value;
+      } else if (param_type == "bool") {
+        bool value = value_str == "1";
+        str_buf << std::boolalpha << value;
+      } else if (param_type.substr(0, 6) == "vector") {
+        str_buf << "[";
+        if (param_type.substr(7, 6) == "string") {
+          const auto parts = Common::Split(value_str.c_str(), ",");
+          str_buf << "\"" << Common::Join(parts, "\",\"") << "\"";
         } else {
-          str_buf << ",\"";
+          str_buf << value_str;
         }
-        str_buf << pair[0].substr(1) << "\": \"" << pair[1].substr(1, pair[1].size() - 2) << "\"";
+        str_buf << "]";
       }
     }
     str_buf << "}";
diff --git a/src/c_api.cpp b/src/c_api.cpp
index 8a3d3dae33ac..20633273134e 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -900,18 +900,6 @@ int LGBM_DumpParamAliases(int64_t buffer_len,
   API_END();
 }
 
-int LGBM_DumpParamTypes(int64_t buffer_len,
-                            int64_t* out_len,
-                            char* out_str) {
-  API_BEGIN();
-  std::string ptypes = Config::ParameterTypes();
-  *out_len = static_cast<int64_t>(ptypes.size()) + 1;
-  if (*out_len <= buffer_len) {
-    std::memcpy(out_str, ptypes.c_str(), *out_len);
-  }
-  API_END();
-}
-
 int LGBM_RegisterLogCallback(void (*callback)(const char*)) {
   API_BEGIN();
   Log::ResetCallBack(callback);
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 67cab2258902..a86abd3a2c1d 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -894,142 +894,141 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
   return map;
 }
 
-const std::string Config::ParameterTypes() {
-  std::stringstream str_buf;
-  str_buf << "{";
-  str_buf << "\"config\": \"string\"";
-  str_buf << ",\"objective\": \"string\"";
-  str_buf << ",\"boosting\": \"string\"";
-  str_buf << ",\"data\": \"string\"";
-  str_buf << ",\"valid\": \"vector<string>\"";
-  str_buf << ",\"num_iterations\": \"int\"";
-  str_buf << ",\"learning_rate\": \"double\"";
-  str_buf << ",\"num_leaves\": \"int\"";
-  str_buf << ",\"tree_learner\": \"string\"";
-  str_buf << ",\"num_threads\": \"int\"";
-  str_buf << ",\"device_type\": \"string\"";
-  str_buf << ",\"seed\": \"int\"";
-  str_buf << ",\"deterministic\": \"bool\"";
-  str_buf << ",\"force_col_wise\": \"bool\"";
-  str_buf << ",\"force_row_wise\": \"bool\"";
-  str_buf << ",\"histogram_pool_size\": \"double\"";
-  str_buf << ",\"max_depth\": \"int\"";
-  str_buf << ",\"min_data_in_leaf\": \"int\"";
-  str_buf << ",\"min_sum_hessian_in_leaf\": \"double\"";
-  str_buf << ",\"bagging_fraction\": \"double\"";
-  str_buf << ",\"pos_bagging_fraction\": \"double\"";
-  str_buf << ",\"neg_bagging_fraction\": \"double\"";
-  str_buf << ",\"bagging_freq\": \"int\"";
-  str_buf << ",\"bagging_seed\": \"int\"";
-  str_buf << ",\"feature_fraction\": \"double\"";
-  str_buf << ",\"feature_fraction_bynode\": \"double\"";
-  str_buf << ",\"feature_fraction_seed\": \"int\"";
-  str_buf << ",\"extra_trees\": \"bool\"";
-  str_buf << ",\"extra_seed\": \"int\"";
-  str_buf << ",\"early_stopping_round\": \"int\"";
-  str_buf << ",\"first_metric_only\": \"bool\"";
-  str_buf << ",\"max_delta_step\": \"double\"";
-  str_buf << ",\"lambda_l1\": \"double\"";
-  str_buf << ",\"lambda_l2\": \"double\"";
-  str_buf << ",\"linear_lambda\": \"double\"";
-  str_buf << ",\"min_gain_to_split\": \"double\"";
-  str_buf << ",\"drop_rate\": \"double\"";
-  str_buf << ",\"max_drop\": \"int\"";
-  str_buf << ",\"skip_drop\": \"double\"";
-  str_buf << ",\"xgboost_dart_mode\": \"bool\"";
-  str_buf << ",\"uniform_drop\": \"bool\"";
-  str_buf << ",\"drop_seed\": \"int\"";
-  str_buf << ",\"top_rate\": \"double\"";
-  str_buf << ",\"other_rate\": \"double\"";
-  str_buf << ",\"min_data_per_group\": \"int\"";
-  str_buf << ",\"max_cat_threshold\": \"int\"";
-  str_buf << ",\"cat_l2\": \"double\"";
-  str_buf << ",\"cat_smooth\": \"double\"";
-  str_buf << ",\"max_cat_to_onehot\": \"int\"";
-  str_buf << ",\"top_k\": \"int\"";
-  str_buf << ",\"monotone_constraints\": \"vector<int>\"";
-  str_buf << ",\"monotone_constraints_method\": \"string\"";
-  str_buf << ",\"monotone_penalty\": \"double\"";
-  str_buf << ",\"feature_contri\": \"vector<double>\"";
-  str_buf << ",\"forcedsplits_filename\": \"string\"";
-  str_buf << ",\"refit_decay_rate\": \"double\"";
-  str_buf << ",\"cegb_tradeoff\": \"double\"";
-  str_buf << ",\"cegb_penalty_split\": \"double\"";
-  str_buf << ",\"cegb_penalty_feature_lazy\": \"vector<double>\"";
-  str_buf << ",\"cegb_penalty_feature_coupled\": \"vector<double>\"";
-  str_buf << ",\"path_smooth\": \"double\"";
-  str_buf << ",\"interaction_constraints\": \"vector<vector<int>>\"";
-  str_buf << ",\"verbosity\": \"int\"";
-  str_buf << ",\"input_model\": \"string\"";
-  str_buf << ",\"output_model\": \"string\"";
-  str_buf << ",\"saved_feature_importance_type\": \"int\"";
-  str_buf << ",\"snapshot_freq\": \"int\"";
-  str_buf << ",\"linear_tree\": \"bool\"";
-  str_buf << ",\"max_bin\": \"int\"";
-  str_buf << ",\"max_bin_by_feature\": \"vector<int>\"";
-  str_buf << ",\"min_data_in_bin\": \"int\"";
-  str_buf << ",\"bin_construct_sample_cnt\": \"int\"";
-  str_buf << ",\"data_random_seed\": \"int\"";
-  str_buf << ",\"is_enable_sparse\": \"bool\"";
-  str_buf << ",\"enable_bundle\": \"bool\"";
-  str_buf << ",\"use_missing\": \"bool\"";
-  str_buf << ",\"zero_as_missing\": \"bool\"";
-  str_buf << ",\"feature_pre_filter\": \"bool\"";
-  str_buf << ",\"pre_partition\": \"bool\"";
-  str_buf << ",\"two_round\": \"bool\"";
-  str_buf << ",\"header\": \"bool\"";
-  str_buf << ",\"label_column\": \"string\"";
-  str_buf << ",\"weight_column\": \"string\"";
-  str_buf << ",\"group_column\": \"string\"";
-  str_buf << ",\"ignore_column\": \"vector<int>\"";
-  str_buf << ",\"categorical_feature\": \"vector<int>\"";
-  str_buf << ",\"forcedbins_filename\": \"string\"";
-  str_buf << ",\"save_binary\": \"bool\"";
-  str_buf << ",\"precise_float_parser\": \"bool\"";
-  str_buf << ",\"parser_config_file\": \"string\"";
-  str_buf << ",\"start_iteration_predict\": \"int\"";
-  str_buf << ",\"num_iteration_predict\": \"int\"";
-  str_buf << ",\"predict_raw_score\": \"bool\"";
-  str_buf << ",\"predict_leaf_index\": \"bool\"";
-  str_buf << ",\"predict_contrib\": \"bool\"";
-  str_buf << ",\"predict_disable_shape_check\": \"bool\"";
-  str_buf << ",\"pred_early_stop\": \"bool\"";
-  str_buf << ",\"pred_early_stop_freq\": \"int\"";
-  str_buf << ",\"pred_early_stop_margin\": \"double\"";
-  str_buf << ",\"output_result\": \"string\"";
-  str_buf << ",\"convert_model_language\": \"string\"";
-  str_buf << ",\"convert_model\": \"string\"";
-  str_buf << ",\"objective_seed\": \"int\"";
-  str_buf << ",\"num_class\": \"int\"";
-  str_buf << ",\"is_unbalance\": \"bool\"";
-  str_buf << ",\"scale_pos_weight\": \"double\"";
-  str_buf << ",\"sigmoid\": \"double\"";
-  str_buf << ",\"boost_from_average\": \"bool\"";
-  str_buf << ",\"reg_sqrt\": \"bool\"";
-  str_buf << ",\"alpha\": \"double\"";
-  str_buf << ",\"fair_c\": \"double\"";
-  str_buf << ",\"poisson_max_delta_step\": \"double\"";
-  str_buf << ",\"tweedie_variance_power\": \"double\"";
-  str_buf << ",\"lambdarank_truncation_level\": \"int\"";
-  str_buf << ",\"lambdarank_norm\": \"bool\"";
-  str_buf << ",\"label_gain\": \"vector<double>\"";
-  str_buf << ",\"metric\": \"vector<string>\"";
-  str_buf << ",\"metric_freq\": \"int\"";
-  str_buf << ",\"is_provide_training_metric\": \"bool\"";
-  str_buf << ",\"eval_at\": \"vector<int>\"";
-  str_buf << ",\"multi_error_top_k\": \"int\"";
-  str_buf << ",\"auc_mu_weights\": \"vector<double>\"";
-  str_buf << ",\"num_machines\": \"int\"";
-  str_buf << ",\"local_listen_port\": \"int\"";
-  str_buf << ",\"time_out\": \"int\"";
-  str_buf << ",\"machine_list_filename\": \"string\"";
-  str_buf << ",\"machines\": \"string\"";
-  str_buf << ",\"gpu_platform_id\": \"int\"";
-  str_buf << ",\"gpu_device_id\": \"int\"";
-  str_buf << ",\"gpu_use_dp\": \"bool\"";
-  str_buf << ",\"num_gpu\": \"int\"";
-  str_buf << "}";
-  return str_buf.str();
+const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
+  static std::unordered_map<std::string, std::string> map({
+    {"config", "string"},
+    {"objective", "string"},
+    {"boosting", "string"},
+    {"data", "string"},
+    {"valid", "vector<string>"},
+    {"num_iterations", "int"},
+    {"learning_rate", "double"},
+    {"num_leaves", "int"},
+    {"tree_learner", "string"},
+    {"num_threads", "int"},
+    {"device_type", "string"},
+    {"seed", "int"},
+    {"deterministic", "bool"},
+    {"force_col_wise", "bool"},
+    {"force_row_wise", "bool"},
+    {"histogram_pool_size", "double"},
+    {"max_depth", "int"},
+    {"min_data_in_leaf", "int"},
+    {"min_sum_hessian_in_leaf", "double"},
+    {"bagging_fraction", "double"},
+    {"pos_bagging_fraction", "double"},
+    {"neg_bagging_fraction", "double"},
+    {"bagging_freq", "int"},
+    {"bagging_seed", "int"},
+    {"feature_fraction", "double"},
+    {"feature_fraction_bynode", "double"},
+    {"feature_fraction_seed", "int"},
+    {"extra_trees", "bool"},
+    {"extra_seed", "int"},
+    {"early_stopping_round", "int"},
+    {"first_metric_only", "bool"},
+    {"max_delta_step", "double"},
+    {"lambda_l1", "double"},
+    {"lambda_l2", "double"},
+    {"linear_lambda", "double"},
+    {"min_gain_to_split", "double"},
+    {"drop_rate", "double"},
+    {"max_drop", "int"},
+    {"skip_drop", "double"},
+    {"xgboost_dart_mode", "bool"},
+    {"uniform_drop", "bool"},
+    {"drop_seed", "int"},
+    {"top_rate", "double"},
+    {"other_rate", "double"},
+    {"min_data_per_group", "int"},
+    {"max_cat_threshold", "int"},
+    {"cat_l2", "double"},
+    {"cat_smooth", "double"},
+    {"max_cat_to_onehot", "int"},
+    {"top_k", "int"},
+    {"monotone_constraints", "vector<int>"},
+    {"monotone_constraints_method", "string"},
+    {"monotone_penalty", "double"},
+    {"feature_contri", "vector<double>"},
+    {"forcedsplits_filename", "string"},
+    {"refit_decay_rate", "double"},
+    {"cegb_tradeoff", "double"},
+    {"cegb_penalty_split", "double"},
+    {"cegb_penalty_feature_lazy", "vector<double>"},
+    {"cegb_penalty_feature_coupled", "vector<double>"},
+    {"path_smooth", "double"},
+    {"interaction_constraints", "vector<vector<int>>"},
+    {"verbosity", "int"},
+    {"input_model", "string"},
+    {"output_model", "string"},
+    {"saved_feature_importance_type", "int"},
+    {"snapshot_freq", "int"},
+    {"linear_tree", "bool"},
+    {"max_bin", "int"},
+    {"max_bin_by_feature", "vector<int>"},
+    {"min_data_in_bin", "int"},
+    {"bin_construct_sample_cnt", "int"},
+    {"data_random_seed", "int"},
+    {"is_enable_sparse", "bool"},
+    {"enable_bundle", "bool"},
+    {"use_missing", "bool"},
+    {"zero_as_missing", "bool"},
+    {"feature_pre_filter", "bool"},
+    {"pre_partition", "bool"},
+    {"two_round", "bool"},
+    {"header", "bool"},
+    {"label_column", "string"},
+    {"weight_column", "string"},
+    {"group_column", "string"},
+    {"ignore_column", "vector<int>"},
+    {"categorical_feature", "vector<int>"},
+    {"forcedbins_filename", "string"},
+    {"save_binary", "bool"},
+    {"precise_float_parser", "bool"},
+    {"parser_config_file", "string"},
+    {"start_iteration_predict", "int"},
+    {"num_iteration_predict", "int"},
+    {"predict_raw_score", "bool"},
+    {"predict_leaf_index", "bool"},
+    {"predict_contrib", "bool"},
+    {"predict_disable_shape_check", "bool"},
+    {"pred_early_stop", "bool"},
+    {"pred_early_stop_freq", "int"},
+    {"pred_early_stop_margin", "double"},
+    {"output_result", "string"},
+    {"convert_model_language", "string"},
+    {"convert_model", "string"},
+    {"objective_seed", "int"},
+    {"num_class", "int"},
+    {"is_unbalance", "bool"},
+    {"scale_pos_weight", "double"},
+    {"sigmoid", "double"},
+    {"boost_from_average", "bool"},
+    {"reg_sqrt", "bool"},
+    {"alpha", "double"},
+    {"fair_c", "double"},
+    {"poisson_max_delta_step", "double"},
+    {"tweedie_variance_power", "double"},
+    {"lambdarank_truncation_level", "int"},
+    {"lambdarank_norm", "bool"},
+    {"label_gain", "vector<double>"},
+    {"metric", "vector<string>"},
+    {"metric_freq", "int"},
+    {"is_provide_training_metric", "bool"},
+    {"eval_at", "vector<int>"},
+    {"multi_error_top_k", "int"},
+    {"auc_mu_weights", "vector<double>"},
+    {"num_machines", "int"},
+    {"local_listen_port", "int"},
+    {"time_out", "int"},
+    {"machine_list_filename", "string"},
+    {"machines", "string"},
+    {"gpu_platform_id", "int"},
+    {"gpu_device_id", "int"},
+    {"gpu_use_dp", "bool"},
+    {"num_gpu", "int"},
+  });
+  return map;
 }
 
 }  // namespace LightGBM

From 9467814e7dfd3d3e772a738e7346d0ef5f43daa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Wed, 21 Sep 2022 14:02:32 -0500
Subject: [PATCH 21/23] warn about ignoring parameters passed to constructor

---
 python-package/lightgbm/basic.py         | 2 ++
 tests/python_package_test/test_engine.py | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index f65232f9b914..ebfd6797215d 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -2764,6 +2764,8 @@ def __init__(
                 ctypes.byref(out_num_class)))
             self.__num_class = out_num_class.value
             self.pandas_categorical = _load_pandas_categorical(file_name=model_file)
+            if params:
+                _log_warning('Ignoring params argument, using parameters from model file.')
             params = self._get_loaded_param()
         elif model_str is not None:
             self.model_from_string(model_str)
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index f42231c9074e..f7fbdd777cbd 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -1226,6 +1226,11 @@ def test_parameters_are_loaded_from_model_file(tmp_path):
     assert set_params == params
     assert bst.params['categorical_feature'] == [1, 2]
 
+    # check that passing parameters to the constructor raises warning and ignores them
+    with pytest.warns(UserWarning, match='Ignoring params argument'):
+        bst2 = lgb.Booster(params={'num_leaves': 7}, model_file=model_file)
+    assert bst.params == bst2.params
+
 
 def test_save_load_copy_pickle():
     def train_and_predict(init_model=None, return_model=False):

From 4cbf4771d4aa2e8efccdd194f87ced813d50a2f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Sat, 24 Sep 2022 13:19:24 -0500
Subject: [PATCH 22/23] trigger ci


From 17ad0c1974b0e07c901655ce00ab801fb7928aa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Morales?= <jmoralz92@gmail.com>
Date: Tue, 11 Oct 2022 09:18:05 -0500
Subject: [PATCH 23/23] trigger ci