Merge branch 'master' into ci/r-windows-ci

jameslamb · Apr 13, 2020 · 1a2053f · 1a2053f
2 parents 7cdfc62 + 5c0baf6
commit 1a2053f
Show file tree

Hide file tree

Showing 44 changed files with 1,126 additions and 884 deletions.
diff --git a/.ci/test.sh b/.ci/test.sh
@@ -48,7 +48,7 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "lint" ]]; then
     conda install -q -y -n $CONDA_ENV \
         -c conda-forge \
             libxml2 \
-            r-lintr>=2.0
+            "r-lintr>=2.0"
     pip install --user cpplint
     echo "Linting Python code"
     pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1
@@ -74,7 +74,7 @@ if [[ $TASK == "r-package" ]]; then
     exit 0
 fi
 
-conda install -q -y -n $CONDA_ENV joblib matplotlib numpy pandas psutil pytest python-graphviz "scikit-learn<=0.21.3" scipy
+conda install -q -y -n $CONDA_ENV joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy
 
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
     # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
@@ -35,11 +35,15 @@ fi
 # Installing R precompiled for Mac OS 10.11 or higher
 if [[ $OS_NAME == "macos" ]]; then
 
+    # temp fix for basictex
+    if [[ $AZURE == "true" ]]; then
+        brew update
+    fi
     brew install qpdf
     brew cask install basictex
     export PATH="/Library/TeX/texbin:$PATH"
-    sudo tlmgr update --self
-    sudo tlmgr install inconsolata helvetic
+    sudo tlmgr --verify-repo=none update --self
+    sudo tlmgr --verify-repo=none install inconsolata helvetic
 
     wget -q https://cran.r-project.org/bin/macosx/R-${R_MAC_VERSION}.pkg -O R.pkg
     sudo installer \

diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
@@ -22,7 +22,7 @@ conda init powershell
 conda activate
 conda config --set always_yes yes --set changeps1 no
 conda update -q -y conda
-conda create -q -y -n $env:CONDA_ENV python=$env:PYTHON_VERSION joblib matplotlib numpy pandas psutil pytest python-graphviz "scikit-learn<=0.21.3" scipy ; Check-Output $?
+conda create -q -y -n $env:CONDA_ENV python=$env:PYTHON_VERSION joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy ; Check-Output $?
 conda activate $env:CONDA_ENV
 
 if ($env:TASK -eq "regular") {

diff --git a/.travis.yml b/.travis.yml
@@ -12,7 +12,7 @@ osx_image: xcode11.3
 
 env:
   global:  # default values
-    - PYTHON_VERSION=3.7
+    - PYTHON_VERSION=3.8
   matrix:
     - TASK=regular PYTHON_VERSION=3.6
     - TASK=sdist PYTHON_VERSION=2.7
@@ -21,7 +21,7 @@ env:
     - TASK=lint
     - TASK=check-docs
     - TASK=mpi METHOD=source
-    - TASK=mpi METHOD=pip
+    - TASK=mpi METHOD=pip PYTHON_VERSION=3.7
     - TASK=gpu METHOD=source PYTHON_VERSION=3.5
     - TASK=gpu METHOD=pip PYTHON_VERSION=3.6
     - TASK=r-package

diff --git a/.vsts-ci.yml b/.vsts-ci.yml
@@ -6,7 +6,7 @@ trigger:
     include:
     - v*
 variables:
-  PYTHON_VERSION: 3.7
+  PYTHON_VERSION: 3.8
   CONDA_ENV: test-env
 resources:
   containers:
@@ -33,7 +33,7 @@ jobs:
         PYTHON_VERSION: 3.5
       bdist:
         TASK: bdist
-        PYTHON_VERSION: 3.6
+        PYTHON_VERSION: 3.7
       inference:
         TASK: if-else
       mpi_source:
@@ -82,7 +82,7 @@ jobs:
         TASK: r-package
       regular:
         TASK: regular
-        PYTHON_VERSION: 3.6
+        PYTHON_VERSION: 3.7
       sdist:
         TASK: sdist
         PYTHON_VERSION: 3.5
@@ -124,13 +124,12 @@ jobs:
         R_WINDOWS_VERSION: 3.6.3
       regular:
         TASK: regular
-        PYTHON_VERSION: 3.7
+        PYTHON_VERSION: 3.6
       sdist:
         TASK: sdist
         PYTHON_VERSION: 2.7
       bdist:
         TASK: bdist
-        PYTHON_VERSION: 3.5
   steps:
   - powershell: |
       Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
@@ -795,32 +795,27 @@ predict.lgb.Booster <- function(object,
 #' @export
 lgb.load <- function(filename = NULL, model_str = NULL) {
 
-  if (is.null(filename) && is.null(model_str)) {
-    stop("lgb.load: either filename or model_str must be given")
-  }
-
-  # Load from filename
-  if (!is.null(filename) && !is.character(filename)) {
-    stop("lgb.load: filename should be character")
-  }
+  filename_provided <- !is.null(filename)
+  model_str_provided <- !is.null(model_str)
 
-  # Return new booster
-  if (!is.null(filename) && !file.exists(filename)) {
-    stop("lgb.load: file does not exist for supplied filename")
-  }
-  if (!is.null(filename)) {
+  if (filename_provided) {
+    if (!is.character(filename)) {
+      stop("lgb.load: filename should be character")
+    }
+    if (!file.exists(filename)) {
+      stop(sprintf("lgb.load: file '%s' passed to filename does not exist", filename))
+    }
     return(invisible(Booster$new(modelfile = filename)))
   }
 
-  # Load from model_str
-  if (!is.null(model_str) && !is.character(model_str)) {
-    stop("lgb.load: model_str should be character")
-  }
-  # Return new booster
-  if (!is.null(model_str)) {
+  if (model_str_provided) {
+    if (!is.character(model_str)) {
+      stop("lgb.load: model_str should be character")
+    }
     return(invisible(Booster$new(model_str = model_str)))
   }
 
+  stop("lgb.load: either filename or model_str must be given")
 }
 
 #' @name lgb.save

diff --git a/R-package/README.md b/R-package/README.md
@@ -150,7 +150,6 @@ export CC=/usr/local/bin/gcc-8
 Rscript build_r.R
 
 # Get coverage
-rm -rf lightgbm_r/build
 Rscript -e " \
     coverage  <- covr::package_coverage('./lightgbm_r', quiet=FALSE);
     print(coverage);

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
@@ -571,3 +571,27 @@ test_that("lgb.train() works with early stopping for regression", {
     , early_stopping_rounds + 1L
   )
 })
+
+test_that("lgb.train() supports non-ASCII feature names", {
+  testthat::skip("UTF-8 feature names are not fully supported in the R package")
+  dtrain <- lgb.Dataset(
+    data = matrix(rnorm(400L), ncol =  4L)
+    , label = rnorm(100L)
+  )
+  feature_names <- c("F_零", "F_一", "F_二", "F_三")
+  bst <- lgb.train(
+    data = dtrain
+    , nrounds = 5L
+    , obj = "regression"
+    , params = list(
+      metric = "rmse"
+    )
+    , colnames = feature_names
+  )
+  expect_true(lgb.is.Booster(bst))
+  dumped_model <- jsonlite::fromJSON(bst$dump_model())
+  expect_identical(
+    dumped_model[["feature_names"]]
+    , feature_names
+  )
+})
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
@@ -88,3 +88,142 @@ test_that("lgb.get.eval.result() should throw an informative error for incorrect
         )
     }, regexp = "Only the following eval_names exist for dataset.*\\: \\[l2\\]", fixed = FALSE)
 })
+
+context("lgb.load()")
+
+test_that("lgb.load() gives the expected error messages given different incorrect inputs", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    data(agaricus.test, package = "lightgbm")
+    train <- agaricus.train
+    test <- agaricus.test
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , num_leaves = 4L
+        , learning_rate = 1.0
+        , nrounds = 2L
+        , objective = "binary"
+    )
+
+    # you have to give model_str or filename
+    expect_error({
+        lgb.load()
+    }, regexp = "either filename or model_str must be given")
+    expect_error({
+        lgb.load(filename = NULL, model_str = NULL)
+    }, regexp = "either filename or model_str must be given")
+
+    # if given, filename should be a string that points to an existing file
+    out_file <- "lightgbm.model"
+    expect_error({
+        lgb.load(filename = list(out_file))
+    }, regexp = "filename should be character")
+    file_to_check <- paste0("a.model")
+    while (file.exists(file_to_check)) {
+        file_to_check <- paste0("a", file_to_check)
+    }
+    expect_error({
+        lgb.load(filename = file_to_check)
+    }, regexp = "passed to filename does not exist")
+
+    # if given, model_str should be a string
+    expect_error({
+        lgb.load(model_str = c(4.0, 5.0, 6.0))
+    }, regexp = "model_str should be character")
+
+})
+
+test_that("Loading a Booster from a file works", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    data(agaricus.test, package = "lightgbm")
+    train <- agaricus.train
+    test <- agaricus.test
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , num_leaves = 4L
+        , learning_rate = 1.0
+        , nrounds = 2L
+        , objective = "binary"
+    )
+    expect_true(lgb.is.Booster(bst))
+
+    pred <- predict(bst, test$data)
+    lgb.save(bst, "lightgbm.model")
+
+    # finalize the booster and destroy it so you know we aren't cheating
+    bst$finalize()
+    expect_null(bst$.__enclos_env__$private$handle)
+    rm(bst)
+
+    bst2 <- lgb.load(
+        filename = "lightgbm.model"
+    )
+    pred2 <- predict(bst2, test$data)
+    expect_identical(pred, pred2)
+})
+
+test_that("Loading a Booster from a string works", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    data(agaricus.test, package = "lightgbm")
+    train <- agaricus.train
+    test <- agaricus.test
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , num_leaves = 4L
+        , learning_rate = 1.0
+        , nrounds = 2L
+        , objective = "binary"
+    )
+    expect_true(lgb.is.Booster(bst))
+
+    pred <- predict(bst, test$data)
+    model_string <- bst$save_model_to_string()
+
+    # finalize the booster and destroy it so you know we aren't cheating
+    bst$finalize()
+    expect_null(bst$.__enclos_env__$private$handle)
+    rm(bst)
+
+    bst2 <- lgb.load(
+        model_str = model_string
+    )
+    pred2 <- predict(bst2, test$data)
+    expect_identical(pred, pred2)
+})
+
+test_that("If a string and a file are both passed to lgb.load() the file is used model_str is totally ignored", {
+    set.seed(708L)
+    data(agaricus.train, package = "lightgbm")
+    data(agaricus.test, package = "lightgbm")
+    train <- agaricus.train
+    test <- agaricus.test
+    bst <- lightgbm(
+        data = as.matrix(train$data)
+        , label = train$label
+        , num_leaves = 4L
+        , learning_rate = 1.0
+        , nrounds = 2L
+        , objective = "binary"
+    )
+    expect_true(lgb.is.Booster(bst))
+
+    pred <- predict(bst, test$data)
+    lgb.save(bst, "lightgbm.model")
+
+    # finalize the booster and destroy it so you know we aren't cheating
+    bst$finalize()
+    expect_null(bst$.__enclos_env__$private$handle)
+    rm(bst)
+
+    bst2 <- lgb.load(
+        filename = "lightgbm.model"
+        , model_str = 4.0
+    )
+    pred2 <- predict(bst2, test$data)
+    expect_identical(pred, pred2)
+})
diff --git a/docker/dockerfile-python b/docker/dockerfile-python
@@ -18,7 +18,7 @@ RUN apt-get update && \
     export PATH="$CONDA_DIR/bin:$PATH" && \
     conda config --set always_yes yes --set changeps1 no && \
     # lightgbm
-    conda install -q -y numpy scipy "scikit-learn<=0.21.3" pandas && \
+    conda install -q -y numpy scipy scikit-learn pandas && \
     git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \
     cd LightGBM/python-package && python setup.py install && \
     # clean

diff --git a/docker/gpu/dockerfile.gpu b/docker/gpu/dockerfile.gpu
@@ -75,8 +75,8 @@ RUN echo "export PATH=$CONDA_DIR/bin:"'$PATH' > /etc/profile.d/conda.sh && \
     rm ~/miniconda.sh
 
 RUN conda config --set always_yes yes --set changeps1 no && \
-    conda create -y -q -n py2 python=2.7 mkl numpy scipy "scikit-learn<=0.21.3" jupyter notebook ipython pandas matplotlib && \
-    conda create -y -q -n py3 python=3.6 mkl numpy scipy "scikit-learn<=0.21.3" jupyter notebook ipython pandas matplotlib
+    conda create -y -q -n py2 python=2.7 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib && \
+    conda create -y -q -n py3 python=3.6 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib
 
 #################################################################################################################
 #           LightGBM

diff --git a/docs/GPU-Targets.rst b/docs/GPU-Targets.rst
@@ -157,7 +157,7 @@ Known issues:
 
 .. _Intel SDK for OpenCL: https://software.intel.com/en-us/articles/opencl-drivers
 
-.. _ROCm: https://rocm.github.io/
+.. _ROCm: https://rocm-documentation.readthedocs.io/en/latest/
 
 .. _our GitHub repo: https://github.com/microsoft/LightGBM/releases/download/v2.0.12/AMD-APP-SDKInstaller-v3.0.130.136-GA-linux64.tar.bz2
 

diff --git a/docs/GPU-Tutorial.rst b/docs/GPU-Tutorial.rst
@@ -1,4 +1,4 @@
-LightGBM GPU Tutorial
+LightGBM GPU Tutorial
 =====================
 
 The purpose of this document is to give you a quick step-by-step tutorial on GPU training.
@@ -78,7 +78,7 @@ If you want to use the Python interface of LightGBM, you can install it now (alo
 ::
 
     sudo apt-get -y install python-pip
-    sudo -H pip install setuptools numpy scipy "scikit-learn<=0.21.3" -U
+    sudo -H pip install setuptools numpy scipy scikit-learn -U
     cd python-package/
     sudo python setup.py install --precompile
     cd ..

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
@@ -470,6 +470,14 @@ Learning Control Parameters
 
       -  ``intermediate``, a `more advanced method <https://github.com/microsoft/LightGBM/files/3457826/PR-monotone-constraints-report.pdf>`__, which may slow the library very slightly. However, this method is much less constraining than the basic method and should significantly improve the results
 
+-  ``monotone_penalty`` :raw-html:`<a id="monotone_penalty" title="Permalink to this parameter" href="#monotone_penalty">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``monotone_splits_penalty``, ``ms_penalty``, ``mc_penalty``, constraints: ``monotone_penalty >= 0.0``
+
+   -  used only if ``monotone_constraints`` is set
+
+   -  `monotone penalty <https://github.com/microsoft/LightGBM/files/3457826/PR-monotone-constraints-report.pdf>`__: a penalization parameter X forbids any monotone splits on the first X (rounded down) level(s) of the tree. The penalty applied to monotone splits on a given depth is a continuous, increasing function the penalization parameter
+
+   -  if ``0.0`` (the default), no penalization is applied
+
 -  ``feature_contri`` :raw-html:`<a id="feature_contri" title="Permalink to this parameter" href="#feature_contri">&#x1F517;&#xFE0E;</a>`, default = ``None``, type = multi-double, aliases: ``feature_contrib``, ``fc``, ``fp``, ``feature_penalty``
 
    -  used to control feature's split gain, will use ``gain[i] = max(0, feature_contri[i]) * gain[i]`` to replace the split gain of i-th feature