From 4a77a8b89cd36b254059c56d2374be9e9be8f431 Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Mon, 13 May 2019 23:34:17 +0300 Subject: [PATCH 1/8] added R-package docs generation routines --- .appveyor.yml | 57 -------------- .readthedocs.yml | 11 +++ .travis.yml | 65 ---------------- .vsts-ci.yml | 164 ---------------------------------------- R-package/.Rbuildignore | 2 + R-package/_pkgdown.yml | 124 ++++++++++++++++++++++++++++++ build_r.R | 2 +- build_r_site.R | 23 ++++++ docs/R-API.rst | 4 + docs/conf.py | 50 +++++++++++- docs/index.rst | 1 + 11 files changed, 215 insertions(+), 288 deletions(-) delete mode 100644 .appveyor.yml create mode 100644 .readthedocs.yml delete mode 100644 .travis.yml delete mode 100644 .vsts-ci.yml create mode 100644 R-package/_pkgdown.yml create mode 100644 build_r_site.R create mode 100644 docs/R-API.rst diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index dac39ec25567..000000000000 --- a/.appveyor.yml +++ /dev/null @@ -1,57 +0,0 @@ -version: 2.2.4.{build} - -image: Visual Studio 2015 -platform: x64 -configuration: # a trick to construct a build matrix with multiple Python versions - - 3.6 - -environment: - matrix: - - COMPILER: MSVC - - COMPILER: MINGW - -clone_depth: 50 - -install: - - git submodule update --init --recursive # get `compute` folder - - set PATH=%PATH:C:\Program Files\Git\usr\bin;=% # delete sh.exe from PATH (mingw32-make fix) - - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH% - - set PYTHON_VERSION=%CONFIGURATION% - - ps: >- - switch ($env:PYTHON_VERSION) { - "2.7" {$env:MINICONDA = """C:\Miniconda-x64"""} - "3.5" {$env:MINICONDA = """C:\Miniconda35-x64"""} - "3.6" {$env:MINICONDA = """C:\Miniconda36-x64"""} - "3.7" {$env:MINICONDA = """C:\Miniconda37-x64"""} - default {$env:MINICONDA = """C:\Miniconda37-x64"""} - } - - set PATH=%MINICONDA%;%MINICONDA%\Scripts;%PATH% - - ps: $env:LGB_VER = (Get-Content VERSION.txt).trim() - - conda config --set always_yes yes --set changeps1 no - - conda update -q -y conda - - conda create -q -y -n test-env python=%PYTHON_VERSION% matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy - - activate test-env - - set PATH=%CONDA_PREFIX%\Library\bin\graphviz;%PATH% # temp graphviz hotfix - -build_script: - - cd %APPVEYOR_BUILD_FOLDER%\python-package - - IF "%COMPILER%"=="MINGW" ( - python setup.py install --mingw) - ELSE ( - python setup.py install) - -test_script: - - pytest %APPVEYOR_BUILD_FOLDER%\tests\python_package_test - - cd %APPVEYOR_BUILD_FOLDER%\examples\python-guide - - ps: >- - @("import matplotlib", "matplotlib.use('Agg')") + (Get-Content "plot_example.py") | Set-Content "plot_example.py" # prevent interactive window mode - (Get-Content "plot_example.py").replace('graph.render(view=True)', 'graph.render(view=False)') | Set-Content "plot_example.py" - - ps: >- - foreach ($file in @(Get-ChildItem *.py)) { - @("import sys, warnings", "warnings.showwarning = lambda message, category, filename, lineno, file=None, line=None: sys.stdout.write(warnings.formatwarning(message, category, filename, lineno, line))") + (Get-Content $file) | Set-Content $file - python $file - if (!$?) { $host.SetShouldExit(-1) } - } # run all examples - - cd %APPVEYOR_BUILD_FOLDER%\examples\python-guide\notebooks - - conda install -q -y -n test-env ipywidgets notebook - - jupyter nbconvert --ExecutePreprocessor.timeout=180 --to notebook --execute --inplace *.ipynb # run all notebooks diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 000000000000..c2e19847a4fc --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,11 @@ +version: 2 +formats: + - pdf +python: + version: 3 + install: + - requirements: docs/requirements.txt +sphinx: + builder: html + configuration: docs/conf.py + fail_on_warning: true diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c2c2a1ef7b12..000000000000 --- a/.travis.yml +++ /dev/null @@ -1,65 +0,0 @@ -language: cpp - -git: - submodules: true - -os: - - linux - - osx -dist: trusty -osx_image: xcode10.2 - -env: - global: # default values - - PYTHON_VERSION=3.7 - matrix: - - TASK=regular PYTHON_VERSION=3.6 - - TASK=sdist PYTHON_VERSION=2.7 - - TASK=bdist - - TASK=pylint - - TASK=check-docs - - TASK=mpi METHOD=source - - TASK=mpi METHOD=pip - - TASK=gpu METHOD=source PYTHON_VERSION=3.5 - - TASK=gpu METHOD=pip PYTHON_VERSION=3.6 - -matrix: - exclude: - - os: osx - env: TASK=gpu METHOD=source PYTHON_VERSION=3.5 - - os: osx - env: TASK=gpu METHOD=pip PYTHON_VERSION=3.6 - - os: osx - env: TASK=pylint - - os: osx - env: TASK=check-docs - -before_install: - - test -n $CC && unset CC - - test -n $CXX && unset CXX - - export HOME_DIRECTORY="$HOME" - - export BUILD_DIRECTORY="$TRAVIS_BUILD_DIR" - - if [[ $TRAVIS_OS_NAME == "osx" ]]; then - export OS_NAME="macos"; - export COMPILER="gcc"; - else - export OS_NAME="linux"; - export COMPILER="clang"; - fi - - export CONDA="$HOME/miniconda" - - export PATH="$CONDA/bin:$PATH" - - export CONDA_ENV="test-env" - - export LGB_VER=$(head -n 1 VERSION.txt) - - export AMDAPPSDK_PATH=$HOME/AMDAPPSDK - - export LD_LIBRARY_PATH="$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH" - - export LD_LIBRARY_PATH="/usr/local/clang/lib:$LD_LIBRARY_PATH" # fix error "libomp.so: cannot open shared object file: No such file or directory" on Linux with Clang - - export OPENCL_VENDOR_PATH=$AMDAPPSDK_PATH/etc/OpenCL/vendors - -install: - - bash .ci/setup.sh - -script: - - bash .ci/test.sh - -notifications: - email: false diff --git a/.vsts-ci.yml b/.vsts-ci.yml deleted file mode 100644 index 1f52e0ec8bcb..000000000000 --- a/.vsts-ci.yml +++ /dev/null @@ -1,164 +0,0 @@ -variables: - PYTHON_VERSION: 3.7 - CONDA_ENV: test-env -resources: - containers: - - container: ubuntu1404 - image: lightgbm/vsts-agent:ubuntu-14.04 -jobs: -########################################### -- job: Linux -########################################### - variables: - COMPILER: gcc - pool: - vmImage: 'ubuntu-16.04' - container: ubuntu1404 - strategy: - maxParallel: 7 - matrix: - regular: - TASK: regular - sdist: - TASK: sdist - PYTHON_VERSION: 3.5 - bdist: - TASK: bdist - PYTHON_VERSION: 3.6 - swig: - TASK: swig - inference: - TASK: if-else - mpi_source: - TASK: mpi - METHOD: source - PYTHON_VERSION: 2.7 - gpu_source: - TASK: gpu - METHOD: source - PYTHON_VERSION: 3.6 - steps: - - script: | - echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY" - echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" - echo "##vso[task.setvariable variable=OS_NAME]linux" - echo "##vso[task.setvariable variable=AZURE]true" - echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" - echo "##vso[task.prependpath]$CONDA/bin" - AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK - echo "##vso[task.setvariable variable=AMDAPPSDK_PATH]$AMDAPPSDK_PATH" - LD_LIBRARY_PATH=$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH - echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH" - echo "##vso[task.setvariable variable=OPENCL_VENDOR_PATH]$AMDAPPSDK_PATH/etc/OpenCL/vendors" - displayName: 'Set variables' - - bash: $(Build.SourcesDirectory)/.ci/setup.sh - displayName: Setup - - bash: $(Build.SourcesDirectory)/.ci/test.sh - displayName: Test - - task: PublishBuildArtifacts@1 - condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) - inputs: - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - artifactName: PackageAssets - artifactType: container -########################################### -- job: MacOS -########################################### - variables: - COMPILER: clang - pool: - vmImage: 'macOS-10.13' - strategy: - maxParallel: 3 - matrix: - regular: - TASK: regular - PYTHON_VERSION: 3.6 - sdist: - TASK: sdist - PYTHON_VERSION: 3.5 - bdist: - TASK: bdist - steps: - - script: | - echo "##vso[task.setvariable variable=HOME_DIRECTORY]$AGENT_HOMEDIRECTORY" - echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" - echo "##vso[task.setvariable variable=OS_NAME]macos" - echo "##vso[task.setvariable variable=AZURE]true" - echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" - CONDA=$AGENT_HOMEDIRECTORY/miniconda - echo "##vso[task.setvariable variable=CONDA]$CONDA" - echo "##vso[task.prependpath]$CONDA/bin" - displayName: 'Set variables' - - bash: $(Build.SourcesDirectory)/.ci/setup.sh - displayName: Setup - - bash: $(Build.SourcesDirectory)/.ci/test.sh - displayName: Test - - task: PublishBuildArtifacts@1 - condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) - inputs: - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - artifactName: PackageAssets - artifactType: container -########################################### -- job: Windows -########################################### - pool: - vmImage: 'vs2017-win2016' - strategy: - maxParallel: 3 - matrix: - regular: - TASK: regular - PYTHON_VERSION: 3.5 - sdist: - TASK: sdist - PYTHON_VERSION: 2.7 - bdist: - TASK: bdist - PYTHON_VERSION: 3.6 - steps: - - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" - displayName: Enable conda - - script: | - conda update -q -y conda - conda create -q -y -n %CONDA_ENV% python=%PYTHON_VERSION% matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy - cmd /c "activate %CONDA_ENV% & powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/test_windows.ps1" - displayName: Test - - task: PublishBuildArtifacts@1 - condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) - inputs: - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - artifactName: PackageAssets - artifactType: container - -########################################### -- job: Package -########################################### - dependsOn: - - Linux - - MacOS - - Windows - condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) - pool: - vmImage: 'vs2017-win2016' - steps: - # Download all agent packages from all previous phases - - task: DownloadBuildArtifacts@0 - displayName: Download package assets - inputs: - artifactName: PackageAssets - downloadPath: $(Build.SourcesDirectory)/binaries - - script: | - python %BUILD_SOURCESDIRECTORY%/.nuget/create_nuget.py %BUILD_SOURCESDIRECTORY%/binaries/PackageAssets - displayName: 'Create NuGet configuration files' - - task: NuGetCommand@2 - inputs: - command: pack - packagesToPack: '$(Build.SourcesDirectory)/.nuget/*.nuspec' - packDestination: '$(Build.ArtifactStagingDirectory)' - - task: PublishBuildArtifacts@1 - inputs: - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - artifactName: NuGet - artifactType: container diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore index 193677ab5a01..1fb7a359298e 100644 --- a/R-package/.Rbuildignore +++ b/R-package/.Rbuildignore @@ -1,5 +1,7 @@ ^build_package.R$ \.gitkeep$ +^docs$ +^_pkgdown\.yml$ # Objects created by compilation \.o$ diff --git a/R-package/_pkgdown.yml b/R-package/_pkgdown.yml new file mode 100644 index 000000000000..9cf6f6d8ad66 --- /dev/null +++ b/R-package/_pkgdown.yml @@ -0,0 +1,124 @@ +template: + params: + bootswatch: united + +authors: + Guolin Ke: + href: https://github.com/guolinke + html: Guolin Ke + Damien Soukhavong: + href: https://github.com/Laurae2 + html: Damien Soukhavong + Yachen Yan: + href: https://github.com/yanyachen + html: Yachen Yan + James Lamb: + href: https://github.com/jameslamb + html: James Lamb + + +site: + root: '' + title: LightGBM, Light Gradient Boosting Machine + +reference: + - title: Dataset + desc: Datasets included with the R package + contents: + - '`agaricus.test`' + - '`agaricus.train`' + - '`bank`' + - title: Data Input / Output + desc: Data I/O required for LightGBM + contents: + - '`dim.lgb.Dataset`' + - '`dimnames.lgb.Dataset`' + - '`getinfo`' + - '`setinfo`' + - '`slice`' + - '`lgb.Dataset.construct`' + - '`lgb.Dataset.create.valid`' + - '`lgb.Dataset`' + - '`lgb.Dataset.save`' + - '`lgb.Dataset.set.categorical`' + - '`lgb.Dataset.set.reference`' + - title: Machine Learning + desc: Train models with LightGBM + contents: + - '`lgb.prepare`' + - '`lgb.prepare2`' + - '`lgb.prepare_rules`' + - '`lgb.prepare_rules2`' + - '`lgb.cv`' + - '`lgb.train`' + - title: Saving / Loading Models + desc: Save and Load LightGBM models + contents: + - '`lgb.dump`' + - '`lgb.load`' + - '`lgb.model.dt.tree`' + - '`lgb.save`' + - '`predict.lgb.Booster`' + - '`readRDS.lgb.Booster`' + - '`saveRDS.lgb.Booster`' + - title: Predictive Analysis + desc: Analyze your predictions + contents: + - '`lgb.get.eval.result`' + - '`lgb.importance`' + - '`lgb.interprete`' + - '`lgb.plot.importance`' + - '`lgb.plot.interpretation`' + - title: Miscellaneous + desc: Ungroupable functions to troubleshoot LightGBM + contents: + - '`lgb.unloader`' + +navbar: + title: LightGBM + type: default + left: + - icon: fa-home fa-lg + href: index.html + - text: Reference + href: reference/index.html + - text: Vignettes + menu: + - text: Basic Walkthrough + href: articles/basic_walkthrough.html + - text: Boosting from existing prediction + href: articles/boost_from_prediction.html + - text: Categorical Feature Preparation + href: articles/categorical_features_prepare.html + - text: Categorical Feature Preparation with Rule + href: articles/categorical_features_rules.html + - text: Cross Validation + href: articles/cross_validation.html + - text: Early Stop in training + href: articles/early_stopping.html + - text: Efficiency for Many Model Trainings + href: articles/efficient_many_training.html + - text: Leaf (in)Stability example + href: articles/leaf_stability.html + - text: Multiclass training/prediction + href: articles/multiclass.html + - text: Weight-Parameter adjustment relationship + href: articles/weight_param.html + right: + - icon: fa-github fa-lg + href: https://github.com/Microsoft/LightGBM + +articles: +- title: Vignettes + desc: ~ + contents: + - '`basic_walkthrough`' + - '`boost_from_prediction`' + - '`categorical_features_prepare`' + - '`categorical_features_rules`' + - '`cross_validation`' + - '`early_stopping`' + - '`efficient_many_training`' + - '`leaf_stability`' + - '`multiclass`' + - '`weight_param`' diff --git a/build_r.R b/build_r.R index d6d2d0e17d01..f69f34c042ce 100644 --- a/build_r.R +++ b/build_r.R @@ -56,7 +56,7 @@ result <- file.copy(from = "CMakeLists.txt", overwrite = TRUE) .handle_result(result) -# Build the package +# Build the package (do not touch this line!) # NOTE: --keep-empty-dirs is necessary to keep the deep paths expected # by CMake while also meeting the CRAN req to create object files # on demand diff --git a/build_r_site.R b/build_r_site.R new file mode 100644 index 000000000000..3538daee8f54 --- /dev/null +++ b/build_r_site.R @@ -0,0 +1,23 @@ +setwd("/home/docs/checkouts/readthedocs.org/user_builds/lightgbm/checkouts/docs/lightgbm_r") + +if (!dir.exists("./docs")) { + dir.create("./docs") +} + +print("========================building pkgdown site====================================") +# options(pkgdown.internet=FALSE) +library(pkgdown) + +clean_site() +init_site() +build_home(quiet = FALSE, preview = FALSE) +build_reference(document = TRUE, preview = FALSE) +# # to-do +# build_articles(preview = FALSE) +# build_tutorials(preview = FALSE) +# build_news(preview = FALSE) + +# # don't work +# pkgdown::build_site(pkg = ".", examples = FALSE, document = TRUE, +# run_dont_run = TRUE, seed = 1014, lazy = FALSE, +# override = list(), preview = NA, new_process = FALSE) diff --git a/docs/R-API.rst b/docs/R-API.rst new file mode 100644 index 000000000000..38486ea62251 --- /dev/null +++ b/docs/R-API.rst @@ -0,0 +1,4 @@ +R API +===== + +Refer to https://lightgbm.readthedocs.io/en/docs/R/reference. diff --git a/docs/conf.py b/docs/conf.py index 99b5d1dc4894..689f30e4ed85 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,6 +22,7 @@ import sys import sphinx +from distutils.dir_util import copy_tree from docutils.parsers.rst import Directive from sphinx.errors import VersionRequirementError from subprocess import PIPE, Popen @@ -56,6 +57,8 @@ def run(self): os.environ['LIGHTGBM_BUILD_DOC'] = '1' C_API = os.environ.get('C_API', '').lower().strip() != 'no' +RTD = bool(os.environ.get('READTHEDOCS', '')) +R_API = RTD # TODO: allow users to build R API locally # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = '1.3' # Due to sphinx.ext.napoleon @@ -208,6 +211,44 @@ def generate_doxygen_xml(app): raise Exception("An error has occurred while executing Doxygen\n" + str(e)) +def generate_r_docs(app): + """Generate documentation for R-package. + + Parameters + ---------- + app : object + The application object representing the Sphinx process. + """ + commands = """ + export PATH="/home/docs/.conda/bin:$PATH" + echo 'options(repos = "https://cran.rstudio.com")' > $HOME/.Rprofile + conda create -q -y -n r_env r-base r-devtools r-data.table r-jsonlite r-magrittr r-matrix r-testthat cmake + conda install -q -y -n r_env -c conda-forge r-pkgdown + source activate r_env + export TAR=/bin/tar + cd /home/docs/checkouts/readthedocs.org/user_builds/lightgbm/checkouts/docs + sed -i'.bak' '/# Build the package (do not touch this line!)/q' build_r.R + Rscript build_r.R + Rscript build_r_site.R + """ + try: + # Warning! The following code can cause buffer overflows on RTD. + # Consider suppressing output completely if RTD project silently fails. + # Refer to https://github.com/svenevs/exhale + # /blob/fe7644829057af622e467bb529db6c03a830da99/exhale/deploy.py#L99-L111 + process = Popen(['/bin/bash'], + stdin=PIPE, stdout=PIPE, stderr=PIPE, + universal_newlines=True) + stdout, stderr = process.communicate(commands) + output = '\n'.join([i for i in (stdout, stderr) if i is not None]) + if process.returncode != 0: + raise RuntimeError(output) + else: + print(output) + except BaseException as e: + raise Exception("An error has occurred while generating documentation for R-package\n" + str(e)) + + def setup(app): """Add new elements at Sphinx initialization time. @@ -216,8 +257,15 @@ def setup(app): app : object The application object representing the Sphinx process. """ - if C_API: + first_run = not os.path.exists(os.path.join(CURR_PATH, '_FIRST_RUN.flag')) + if first_run and RTD: + open(os.path.join(CURR_PATH, '_FIRST_RUN.flag'), 'w').close() + if C_API and (not RTD or first_run): app.connect("builder-inited", generate_doxygen_xml) else: app.add_directive('doxygenfile', IgnoredDirective) + if R_API: + if not RTD or first_run: + app.connect("builder-inited", generate_r_docs) + app.connect("build-finished", lambda app, exception: copy_tree(app.confdir + '/../lightgbm_r/docs', app.outdir + '/R', verbose=0)) app.add_javascript("js/script.js") diff --git a/docs/index.rst b/docs/index.rst index 5ae58fc2b9f1..3a2574039bbd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,6 +29,7 @@ For more details, please refer to `Features <./Features.rst>`__. Parameters Tuning C API Python API + R API Parallel Learning Guide GPU Tutorial Advanced Topics From 026a7815adf19417a2bc4051a2977ad35a76b12f Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 18 May 2019 21:02:22 +0300 Subject: [PATCH 2/8] change theme to be more consistent with sphinx_rtd_theme on main site in terms of color scheme --- R-package/_pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/_pkgdown.yml b/R-package/_pkgdown.yml index a5c9c5ae6593..37318c01113f 100644 --- a/R-package/_pkgdown.yml +++ b/R-package/_pkgdown.yml @@ -1,6 +1,6 @@ template: params: - bootswatch: united + bootswatch: cerulean site: root: '' From 072092366c294ef4a28125700cb086eb3842135b Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Sun, 19 May 2019 00:38:10 +0300 Subject: [PATCH 3/8] placed man folder with old Rd files back --- R-package/man/agaricus.test.Rd | 31 ++++++ R-package/man/agaricus.train.Rd | 31 ++++++ R-package/man/bank.Rd | 25 +++++ R-package/man/dim.Rd | 34 +++++++ R-package/man/dimnames.lgb.Dataset.Rd | 37 ++++++++ R-package/man/getinfo.Rd | 48 ++++++++++ R-package/man/lgb.Dataset.Rd | 44 +++++++++ R-package/man/lgb.Dataset.construct.Rd | 22 +++++ R-package/man/lgb.Dataset.create.valid.Rd | 33 +++++++ R-package/man/lgb.Dataset.save.Rd | 28 ++++++ R-package/man/lgb.Dataset.set.categorical.Rd | 29 ++++++ R-package/man/lgb.Dataset.set.reference.Rd | 30 ++++++ R-package/man/lgb.cv.Rd | 99 ++++++++++++++++++++ R-package/man/lgb.dump.Rd | 39 ++++++++ R-package/man/lgb.get.eval.result.Rd | 46 +++++++++ R-package/man/lgb.importance.Rd | 41 ++++++++ R-package/man/lgb.interprete.Rd | 51 ++++++++++ R-package/man/lgb.load.Rd | 44 +++++++++ R-package/man/lgb.model.dt.tree.Rd | 55 +++++++++++ R-package/man/lgb.plot.importance.Rd | 50 ++++++++++ R-package/man/lgb.plot.interpretation.Rd | 50 ++++++++++ R-package/man/lgb.prepare.Rd | 49 ++++++++++ R-package/man/lgb.prepare2.Rd | 50 ++++++++++ R-package/man/lgb.prepare_rules.Rd | 78 +++++++++++++++ R-package/man/lgb.prepare_rules2.Rd | 78 +++++++++++++++ R-package/man/lgb.save.Rd | 41 ++++++++ R-package/man/lgb.train.Rd | 89 ++++++++++++++++++ R-package/man/lgb.unloader.Rd | 46 +++++++++ R-package/man/lgb_shared_params.Rd | 31 ++++++ R-package/man/lightgbm.Rd | 64 +++++++++++++ R-package/man/predict.lgb.Booster.Rd | 65 +++++++++++++ R-package/man/readRDS.lgb.Booster.Rd | 40 ++++++++ R-package/man/saveRDS.lgb.Booster.Rd | 51 ++++++++++ R-package/man/setinfo.Rd | 50 ++++++++++ R-package/man/slice.Rd | 36 +++++++ 35 files changed, 1635 insertions(+) create mode 100644 R-package/man/agaricus.test.Rd create mode 100644 R-package/man/agaricus.train.Rd create mode 100644 R-package/man/bank.Rd create mode 100644 R-package/man/dim.Rd create mode 100644 R-package/man/dimnames.lgb.Dataset.Rd create mode 100644 R-package/man/getinfo.Rd create mode 100644 R-package/man/lgb.Dataset.Rd create mode 100644 R-package/man/lgb.Dataset.construct.Rd create mode 100644 R-package/man/lgb.Dataset.create.valid.Rd create mode 100644 R-package/man/lgb.Dataset.save.Rd create mode 100644 R-package/man/lgb.Dataset.set.categorical.Rd create mode 100644 R-package/man/lgb.Dataset.set.reference.Rd create mode 100644 R-package/man/lgb.cv.Rd create mode 100644 R-package/man/lgb.dump.Rd create mode 100644 R-package/man/lgb.get.eval.result.Rd create mode 100644 R-package/man/lgb.importance.Rd create mode 100644 R-package/man/lgb.interprete.Rd create mode 100644 R-package/man/lgb.load.Rd create mode 100644 R-package/man/lgb.model.dt.tree.Rd create mode 100644 R-package/man/lgb.plot.importance.Rd create mode 100644 R-package/man/lgb.plot.interpretation.Rd create mode 100644 R-package/man/lgb.prepare.Rd create mode 100644 R-package/man/lgb.prepare2.Rd create mode 100644 R-package/man/lgb.prepare_rules.Rd create mode 100644 R-package/man/lgb.prepare_rules2.Rd create mode 100644 R-package/man/lgb.save.Rd create mode 100644 R-package/man/lgb.train.Rd create mode 100644 R-package/man/lgb.unloader.Rd create mode 100644 R-package/man/lgb_shared_params.Rd create mode 100644 R-package/man/lightgbm.Rd create mode 100644 R-package/man/predict.lgb.Booster.Rd create mode 100644 R-package/man/readRDS.lgb.Booster.Rd create mode 100644 R-package/man/saveRDS.lgb.Booster.Rd create mode 100644 R-package/man/setinfo.Rd create mode 100644 R-package/man/slice.Rd diff --git a/R-package/man/agaricus.test.Rd b/R-package/man/agaricus.test.Rd new file mode 100644 index 000000000000..dcff0241b7c5 --- /dev/null +++ b/R-package/man/agaricus.test.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lightgbm.R +\docType{data} +\name{agaricus.test} +\alias{agaricus.test} +\title{Test part from Mushroom Data Set} +\format{A list containing a label vector, and a dgCMatrix object with 1611 +rows and 126 variables} +\usage{ +data(agaricus.test) +} +\description{ +This data set is originally from the Mushroom data set, +UCI Machine Learning Repository. +} +\details{ +This data set includes the following fields: + +\itemize{ + \item \code{label} the label for each record + \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. +} +} +\references{ +https://archive.ics.uci.edu/ml/datasets/Mushroom + +Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository +[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, +School of Information and Computer Science. +} +\keyword{datasets} diff --git a/R-package/man/agaricus.train.Rd b/R-package/man/agaricus.train.Rd new file mode 100644 index 000000000000..539f8a5ba2d0 --- /dev/null +++ b/R-package/man/agaricus.train.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lightgbm.R +\docType{data} +\name{agaricus.train} +\alias{agaricus.train} +\title{Training part from Mushroom Data Set} +\format{A list containing a label vector, and a dgCMatrix object with 6513 +rows and 127 variables} +\usage{ +data(agaricus.train) +} +\description{ +This data set is originally from the Mushroom data set, +UCI Machine Learning Repository. +} +\details{ +This data set includes the following fields: + +\itemize{ + \item \code{label} the label for each record + \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. +} +} +\references{ +https://archive.ics.uci.edu/ml/datasets/Mushroom + +Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository +[http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, +School of Information and Computer Science. +} +\keyword{datasets} diff --git a/R-package/man/bank.Rd b/R-package/man/bank.Rd new file mode 100644 index 000000000000..dff44d63dd00 --- /dev/null +++ b/R-package/man/bank.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lightgbm.R +\docType{data} +\name{bank} +\alias{bank} +\title{Bank Marketing Data Set} +\format{A data.table with 4521 rows and 17 variables} +\usage{ +data(bank) +} +\description{ +This data set is originally from the Bank Marketing data set, +UCI Machine Learning Repository. +} +\details{ +It contains only the following: bank.csv with 10% of the examples and 17 inputs, +randomly selected from 3 (older version of this dataset with less inputs). +} +\references{ +http://archive.ics.uci.edu/ml/datasets/Bank+Marketing + +S. Moro, P. Cortez and P. Rita. (2014) +A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems +} +\keyword{datasets} diff --git a/R-package/man/dim.Rd b/R-package/man/dim.Rd new file mode 100644 index 000000000000..a8a567c9b85c --- /dev/null +++ b/R-package/man/dim.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{dim.lgb.Dataset} +\alias{dim.lgb.Dataset} +\title{Dimensions of an lgb.Dataset} +\usage{ +\method{dim}{lgb.Dataset}(x, ...) +} +\arguments{ +\item{x}{Object of class \code{lgb.Dataset}} + +\item{...}{other parameters} +} +\value{ +a vector of numbers of rows and of columns +} +\description{ +Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}. +} +\details{ +Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also +be directly used with an \code{lgb.Dataset} object. +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) + +stopifnot(nrow(dtrain) == nrow(train$data)) +stopifnot(ncol(dtrain) == ncol(train$data)) +stopifnot(all(dim(dtrain) == dim(train$data))) + +} diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd new file mode 100644 index 000000000000..54563ac5c00c --- /dev/null +++ b/R-package/man/dimnames.lgb.Dataset.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{dimnames.lgb.Dataset} +\alias{dimnames.lgb.Dataset} +\alias{dimnames<-.lgb.Dataset} +\title{Handling of column names of \code{lgb.Dataset}} +\usage{ +\method{dimnames}{lgb.Dataset}(x) + +\method{dimnames}{lgb.Dataset}(x) <- value +} +\arguments{ +\item{x}{object of class \code{lgb.Dataset}} + +\item{value}{a list of two elements: the first one is ignored +and the second one is column names} +} +\description{ +Only column names are supported for \code{lgb.Dataset}, thus setting of +row names would have no effect and returned row names would be NULL. +} +\details{ +Generic \code{dimnames} methods are used by \code{colnames}. +Since row names are irrelevant, it is recommended to use \code{colnames} directly. +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.construct(dtrain) +dimnames(dtrain) +colnames(dtrain) +colnames(dtrain) <- make.names(1:ncol(train$data)) +print(dtrain, verbose = TRUE) + +} diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd new file mode 100644 index 000000000000..04116a46b474 --- /dev/null +++ b/R-package/man/getinfo.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{getinfo} +\alias{getinfo} +\alias{getinfo.lgb.Dataset} +\title{Get information of an lgb.Dataset object} +\usage{ +getinfo(dataset, ...) + +\method{getinfo}{lgb.Dataset}(dataset, name, ...) +} +\arguments{ +\item{dataset}{Object of class \code{lgb.Dataset}} + +\item{...}{other parameters} + +\item{name}{the name of the information field to get (see details)} +} +\value{ +info data +} +\description{ +Get information of an lgb.Dataset object +} +\details{ +The \code{name} field can be one of the following: + +\itemize{ + \item \code{label}: label lightgbm learn from ; + \item \code{weight}: to do a weight rescale ; + \item \code{group}: group size + \item \code{init_score}: initial score is the base prediction lightgbm will boost from ; +} +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.construct(dtrain) + +labels <- lightgbm::getinfo(dtrain, "label") +lightgbm::setinfo(dtrain, "label", 1 - labels) + +labels2 <- lightgbm::getinfo(dtrain, "label") +stopifnot(all(labels2 == 1 - labels)) + +} diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd new file mode 100644 index 000000000000..e0d6b64aa98b --- /dev/null +++ b/R-package/man/lgb.Dataset.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{lgb.Dataset} +\alias{lgb.Dataset} +\title{Construct lgb.Dataset object} +\usage{ +lgb.Dataset(data, params = list(), reference = NULL, colnames = NULL, + categorical_feature = NULL, free_raw_data = TRUE, info = list(), + ...) +} +\arguments{ +\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename} + +\item{params}{a list of parameters} + +\item{reference}{reference dataset} + +\item{colnames}{names of columns} + +\item{categorical_feature}{categorical features} + +\item{free_raw_data}{TRUE for need to free raw data after construct} + +\item{info}{a list of information of the lgb.Dataset object} + +\item{...}{other information to pass to \code{info} or parameters pass to \code{params}} +} +\value{ +constructed dataset +} +\description{ +Construct lgb.Dataset object from dense matrix, sparse matrix +or local file (that was created previously by saving an \code{lgb.Dataset}). +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.save(dtrain, "lgb.Dataset.data") +dtrain <- lgb.Dataset("lgb.Dataset.data") +lgb.Dataset.construct(dtrain) + +} diff --git a/R-package/man/lgb.Dataset.construct.Rd b/R-package/man/lgb.Dataset.construct.Rd new file mode 100644 index 000000000000..23dfc0e9f67b --- /dev/null +++ b/R-package/man/lgb.Dataset.construct.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{lgb.Dataset.construct} +\alias{lgb.Dataset.construct} +\title{Construct Dataset explicitly} +\usage{ +lgb.Dataset.construct(dataset) +} +\arguments{ +\item{dataset}{Object of class \code{lgb.Dataset}} +} +\description{ +Construct Dataset explicitly +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.construct(dtrain) + +} diff --git a/R-package/man/lgb.Dataset.create.valid.Rd b/R-package/man/lgb.Dataset.create.valid.Rd new file mode 100644 index 000000000000..0d0f3454a8e2 --- /dev/null +++ b/R-package/man/lgb.Dataset.create.valid.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{lgb.Dataset.create.valid} +\alias{lgb.Dataset.create.valid} +\title{Construct validation data} +\usage{ +lgb.Dataset.create.valid(dataset, data, info = list(), ...) +} +\arguments{ +\item{dataset}{\code{lgb.Dataset} object, training data} + +\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename} + +\item{info}{a list of information of the lgb.Dataset object} + +\item{...}{other information to pass to \code{info}.} +} +\value{ +constructed dataset +} +\description{ +Construct validation data according to training data +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) + +} diff --git a/R-package/man/lgb.Dataset.save.Rd b/R-package/man/lgb.Dataset.save.Rd new file mode 100644 index 000000000000..f5664a9841a4 --- /dev/null +++ b/R-package/man/lgb.Dataset.save.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{lgb.Dataset.save} +\alias{lgb.Dataset.save} +\title{Save \code{lgb.Dataset} to a binary file} +\usage{ +lgb.Dataset.save(dataset, fname) +} +\arguments{ +\item{dataset}{object of class \code{lgb.Dataset}} + +\item{fname}{object filename of output file} +} +\value{ +passed dataset +} +\description{ +Save \code{lgb.Dataset} to a binary file +} +\examples{ + +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.save(dtrain, "data.bin") + +} diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd new file mode 100644 index 000000000000..1cec77c13d85 --- /dev/null +++ b/R-package/man/lgb.Dataset.set.categorical.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{lgb.Dataset.set.categorical} +\alias{lgb.Dataset.set.categorical} +\title{Set categorical feature of \code{lgb.Dataset}} +\usage{ +lgb.Dataset.set.categorical(dataset, categorical_feature) +} +\arguments{ +\item{dataset}{object of class \code{lgb.Dataset}} + +\item{categorical_feature}{categorical features} +} +\value{ +passed dataset +} +\description{ +Set categorical feature of \code{lgb.Dataset} +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.save(dtrain, "lgb.Dataset.data") +dtrain <- lgb.Dataset("lgb.Dataset.data") +lgb.Dataset.set.categorical(dtrain, 1:2) + +} diff --git a/R-package/man/lgb.Dataset.set.reference.Rd b/R-package/man/lgb.Dataset.set.reference.Rd new file mode 100644 index 000000000000..fabe7c03e6fd --- /dev/null +++ b/R-package/man/lgb.Dataset.set.reference.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{lgb.Dataset.set.reference} +\alias{lgb.Dataset.set.reference} +\title{Set reference of \code{lgb.Dataset}} +\usage{ +lgb.Dataset.set.reference(dataset, reference) +} +\arguments{ +\item{dataset}{object of class \code{lgb.Dataset}} + +\item{reference}{object of class \code{lgb.Dataset}} +} +\value{ +passed dataset +} +\description{ +If you want to use validation data, you should set reference to training data +} +\examples{ +library(lightgbm) +data(agaricus.train, package ="lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset(test$data, test = train$label) +lgb.Dataset.set.reference(dtest, dtrain) + +} diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd new file mode 100644 index 000000000000..28652ba1141e --- /dev/null +++ b/R-package/man/lgb.cv.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.cv.R +\name{lgb.cv} +\alias{lgb.cv} +\title{Main CV logic for LightGBM} +\usage{ +lgb.cv(params = list(), data, nrounds = 10, nfold = 3, + label = NULL, weight = NULL, obj = NULL, eval = NULL, + verbose = 1, record = TRUE, eval_freq = 1L, showsd = TRUE, + stratified = TRUE, folds = NULL, init_model = NULL, + colnames = NULL, categorical_feature = NULL, + early_stopping_rounds = NULL, callbacks = list(), + reset_data = FALSE, ...) +} +\arguments{ +\item{params}{List of parameters} + +\item{data}{a \code{lgb.Dataset} object, used for training} + +\item{nrounds}{number of training rounds} + +\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.} + +\item{label}{vector of response values. Should be provided only when data is an R-matrix.} + +\item{weight}{vector of response values. If not NULL, will set to dataset} + +\item{obj}{objective function, can be character or custom objective function. Examples include +\code{regression}, \code{regression_l1}, \code{huber}, +\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + +\item{eval}{evaluation function, can be (list of) character or custom eval function} + +\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} + +\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} + +\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} + +\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} + +\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified +by the values of outcome labels.} + +\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds +(each element must be a vector of test fold's indices). When folds are supplied, +the \code{nfold} and \code{stratified} parameters are ignored.} + +\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} + +\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} + +\item{categorical_feature}{list of str or int +type int represents index, +type str represents feature names} + +\item{early_stopping_rounds}{int +Activates early stopping. +Requires at least one validation data and one metric +If there's more than one, will check all of them except the training data +Returns the model with (best_iter + early_stopping_rounds) +If early stopping occurs, the model will have 'best_iter' field} + +\item{callbacks}{list of callback functions +List of callback functions that are applied at each iteration.} + +\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} + +\item{...}{other parameters, see Parameters.rst for more information. A few key parameters: +\itemize{ + \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}} + \item{num_leaves}{number of leaves in one tree. defaults to 127} + \item{max_depth}{Limit the max depth for tree model. This is used to deal with + overfit when #data is small. Tree still grow by leaf-wise.} + \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to + the number of real CPU cores, not the number of threads (most + CPU using hyper-threading to generate 2 threads per CPU core).} +}} +} +\value{ +a trained model \code{lgb.CVBooster}. +} +\description{ +Cross validation logic used by LightGBM +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +params <- list(objective = "regression", metric = "l2") +model <- lgb.cv(params, + dtrain, + 10, + nfold = 5, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +} diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd new file mode 100644 index 000000000000..a18483e3dad9 --- /dev/null +++ b/R-package/man/lgb.dump.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Booster.R +\name{lgb.dump} +\alias{lgb.dump} +\title{Dump LightGBM model to json} +\usage{ +lgb.dump(booster, num_iteration = NULL) +} +\arguments{ +\item{booster}{Object of class \code{lgb.Booster}} + +\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration} +} +\value{ +json format of model +} +\description{ +Dump LightGBM model to json +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +json_model <- lgb.dump(model) + +} diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd new file mode 100644 index 000000000000..52dd2b1dd18d --- /dev/null +++ b/R-package/man/lgb.get.eval.result.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Booster.R +\name{lgb.get.eval.result} +\alias{lgb.get.eval.result} +\title{Get record evaluation result from booster} +\usage{ +lgb.get.eval.result(booster, data_name, eval_name, iters = NULL, + is_err = FALSE) +} +\arguments{ +\item{booster}{Object of class \code{lgb.Booster}} + +\item{data_name}{name of dataset} + +\item{eval_name}{name of evaluation} + +\item{iters}{iterations, NULL will return all} + +\item{is_err}{TRUE will return evaluation error instead} +} +\value{ +vector of evaluation result +} +\description{ +Get record evaluation result from booster +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +lgb.get.eval.result(model, "test", "l2") + +} diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd new file mode 100644 index 000000000000..8e7fe941644e --- /dev/null +++ b/R-package/man/lgb.importance.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.importance.R +\name{lgb.importance} +\alias{lgb.importance} +\title{Compute feature importance in a model} +\usage{ +lgb.importance(model, percentage = TRUE) +} +\arguments{ +\item{model}{object of class \code{lgb.Booster}.} + +\item{percentage}{whether to show importance in relative percentage.} +} +\value{ +For a tree model, a \code{data.table} with the following columns: +\itemize{ + \item \code{Feature} Feature names in the model. + \item \code{Gain} The total gain of this feature's splits. + \item \code{Cover} The number of observation related to this feature. + \item \code{Frequency} The number of times a feature splited in trees. +} +} +\description{ +Creates a \code{data.table} of feature importances in a model. +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) + +params <- list(objective = "binary", + learning_rate = 0.01, num_leaves = 63, max_depth = -1, + min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) + model <- lgb.train(params, dtrain, 20) +model <- lgb.train(params, dtrain, 20) + +tree_imp1 <- lgb.importance(model, percentage = TRUE) +tree_imp2 <- lgb.importance(model, percentage = FALSE) + +} diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd new file mode 100644 index 000000000000..da28a542a852 --- /dev/null +++ b/R-package/man/lgb.interprete.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.interprete.R +\name{lgb.interprete} +\alias{lgb.interprete} +\title{Compute feature contribution of prediction} +\usage{ +lgb.interprete(model, data, idxset, num_iteration = NULL) +} +\arguments{ +\item{model}{object of class \code{lgb.Booster}.} + +\item{data}{a matrix object or a dgCMatrix object.} + +\item{idxset}{a integer vector of indices of rows needed.} + +\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.} +} +\value{ +For regression, binary classification and lambdarank model, a \code{list} of \code{data.table} with the following columns: +\itemize{ + \item \code{Feature} Feature names in the model. + \item \code{Contribution} The total contribution of this feature's splits. +} +For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class. +} +\description{ +Computes feature contribution components of rawscore prediction. +} +\examples{ +Sigmoid <- function(x) 1 / (1 + exp(-x)) +Logit <- function(x) log(x / (1 - x)) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label))) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test + +params <- list( + objective = "binary" + , learning_rate = 0.01 + , num_leaves = 63 + , max_depth = -1 + , min_data_in_leaf = 1 + , min_sum_hessian_in_leaf = 1 +) +model <- lgb.train(params, dtrain, 20) + +tree_interpretation <- lgb.interprete(model, test$data, 1:5) + +} diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd new file mode 100644 index 000000000000..bf298920e75d --- /dev/null +++ b/R-package/man/lgb.load.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Booster.R +\name{lgb.load} +\alias{lgb.load} +\title{Load LightGBM model} +\usage{ +lgb.load(filename = NULL, model_str = NULL) +} +\arguments{ +\item{filename}{path of model file} + +\item{model_str}{a str containing the model} +} +\value{ +lgb.Booster +} +\description{ +Load LightGBM model from saved model file or string +Load LightGBM takes in either a file path or model string +If both are provided, Load will default to loading from file +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +lgb.save(model, "model.txt") +load_booster <- lgb.load(filename = "model.txt") +model_string <- model$save_model_to_string(NULL) # saves best iteration +load_booster_from_str <- lgb.load(model_str = model_string) + +} diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd new file mode 100644 index 000000000000..be622f428e43 --- /dev/null +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.model.dt.tree.R +\name{lgb.model.dt.tree} +\alias{lgb.model.dt.tree} +\title{Parse a LightGBM model json dump} +\usage{ +lgb.model.dt.tree(model, num_iteration = NULL) +} +\arguments{ +\item{model}{object of class \code{lgb.Booster}} + +\item{num_iteration}{number of iterations you want to predict with. NULL or +<= 0 means use best iteration} +} +\value{ +A \code{data.table} with detailed information about model trees' nodes and leafs. + +The columns of the \code{data.table} are: + +\itemize{ + \item \code{tree_index}: ID of a tree in a model (integer) + \item \code{split_index}: ID of a node in a tree (integer) + \item \code{split_feature}: for a node, it's a feature name (character); + for a leaf, it simply labels it as \code{"NA"} + \item \code{node_parent}: ID of the parent node for current node (integer) + \item \code{leaf_index}: ID of a leaf in a tree (integer) + \item \code{leaf_parent}: ID of the parent node for current leaf (integer) + \item \code{split_gain}: Split gain of a node + \item \code{threshold}: Splitting threshold value of a node + \item \code{decision_type}: Decision type of a node + \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right + \item \code{internal_value}: Node value + \item \code{internal_count}: The number of observation collected by a node + \item \code{leaf_value}: Leaf value + \item \code{leaf_count}: The number of observation collected by a leaf +} +} +\description{ +Parse a LightGBM model json dump into a \code{data.table} structure. +} +\examples{ + +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) + +params <- list(objective = "binary", + learning_rate = 0.01, num_leaves = 63, max_depth = -1, + min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) + model <- lgb.train(params, dtrain, 20) +model <- lgb.train(params, dtrain, 20) + +tree_dt <- lgb.model.dt.tree(model) + +} diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd new file mode 100644 index 000000000000..9e7e688d2bd9 --- /dev/null +++ b/R-package/man/lgb.plot.importance.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.plot.importance.R +\name{lgb.plot.importance} +\alias{lgb.plot.importance} +\title{Plot feature importance as a bar graph} +\usage{ +lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain", + left_margin = 10, cex = NULL) +} +\arguments{ +\item{tree_imp}{a \code{data.table} returned by \code{\link{lgb.importance}}.} + +\item{top_n}{maximal number of top features to include into the plot.} + +\item{measure}{the name of importance measure to plot, can be "Gain", "Cover" or "Frequency".} + +\item{left_margin}{(base R barplot) allows to adjust the left margin size to fit feature names.} + +\item{cex}{(base R barplot) passed as \code{cex.names} parameter to \code{barplot}.} +} +\value{ +The \code{lgb.plot.importance} function creates a \code{barplot} +and silently returns a processed data.table with \code{top_n} features sorted by defined importance. +} +\description{ +Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph. +} +\details{ +The graph represents each feature as a horizontal bar of length proportional to the defined importance of a feature. +Features are shown ranked in a decreasing importance order. +} +\examples{ +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) + +params <- list( + objective = "binary" + , learning_rate = 0.01 + , num_leaves = 63 + , max_depth = -1 + , min_data_in_leaf = 1 + , min_sum_hessian_in_leaf = 1 +) + +model <- lgb.train(params, dtrain, 20) + +tree_imp <- lgb.importance(model, percentage = TRUE) +lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") +} diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd new file mode 100644 index 000000000000..c69b8f3354e6 --- /dev/null +++ b/R-package/man/lgb.plot.interpretation.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.plot.interpretation.R +\name{lgb.plot.interpretation} +\alias{lgb.plot.interpretation} +\title{Plot feature contribution as a bar graph} +\usage{ +lgb.plot.interpretation(tree_interpretation_dt, top_n = 10, cols = 1, + left_margin = 10, cex = NULL) +} +\arguments{ +\item{tree_interpretation_dt}{a \code{data.table} returned by \code{\link{lgb.interprete}}.} + +\item{top_n}{maximal number of top features to include into the plot.} + +\item{cols}{the column numbers of layout, will be used only for multiclass classification feature contribution.} + +\item{left_margin}{(base R barplot) allows to adjust the left margin size to fit feature names.} + +\item{cex}{(base R barplot) passed as \code{cex.names} parameter to \code{barplot}.} +} +\value{ +The \code{lgb.plot.interpretation} function creates a \code{barplot}. +} +\description{ +Plot previously calculated feature contribution as a bar graph. +} +\details{ +The graph represents each feature as a horizontal bar of length proportional to the defined contribution of a feature. +Features are shown ranked in a decreasing contribution order. +} +\examples{ +library(lightgbm) +Sigmoid <- function(x) {1 / (1 + exp(-x))} +Logit <- function(x) {log(x / (1 - x))} +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label))) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test + +params <- list(objective = "binary", + learning_rate = 0.01, num_leaves = 63, max_depth = -1, + min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) + model <- lgb.train(params, dtrain, 20) +model <- lgb.train(params, dtrain, 20) + +tree_interpretation <- lgb.interprete(model, test$data, 1:5) +lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) +} diff --git a/R-package/man/lgb.prepare.Rd b/R-package/man/lgb.prepare.Rd new file mode 100644 index 000000000000..625cb5a8e2db --- /dev/null +++ b/R-package/man/lgb.prepare.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare.R +\name{lgb.prepare} +\alias{lgb.prepare} +\title{Data preparator for LightGBM datasets (numeric)} +\usage{ +lgb.prepare(data) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} +} +\value{ +The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. +} +\examples{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +# When lightgbm package is installed, and you do not want to load it +# You can still use the function! +lgb.unloader() +str(lightgbm::lgb.prepare(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +} diff --git a/R-package/man/lgb.prepare2.Rd b/R-package/man/lgb.prepare2.Rd new file mode 100644 index 000000000000..5739b03363d5 --- /dev/null +++ b/R-package/man/lgb.prepare2.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare2.R +\name{lgb.prepare2} +\alias{lgb.prepare2} +\title{Data preparator for LightGBM datasets (integer)} +\usage{ +lgb.prepare2(data) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} +} +\value{ +The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. +} +\examples{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +# Convert all factors/chars to integer +str(lgb.prepare2(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +# When lightgbm package is installed, and you do not want to load it +# You can still use the function! +lgb.unloader() +str(lightgbm::lgb.prepare2(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +} diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd new file mode 100644 index 000000000000..ac8a8d43ef45 --- /dev/null +++ b/R-package/man/lgb.prepare_rules.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare_rules.R +\name{lgb.prepare_rules} +\alias{lgb.prepare_rules} +\title{Data preparator for LightGBM datasets with rules (numeric)} +\usage{ +lgb.prepare_rules(data, rules = NULL) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} + +\item{rules}{A set of rules from the data preparator, if already used.} +} +\value{ +A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter. +} +\examples{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +new_iris <- lgb.prepare_rules(data = iris) # Autoconverter +str(new_iris$data) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +data(iris) # Erase iris dataset +iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +# Warning message: +# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +# invalid factor level, NA generated + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1, ] # Species became 0 as it is an unknown factor +# Sepal.Length Sepal.Width Petal.Length Petal.Width Species +# 1 5.1 3.5 1.4 0.2 0 + +newer_iris$data[1, 5] <- 1 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) +# [1] TRUE + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list(Species = c("setosa" = 3, + "versicolor" = 2, + "virginica" = 1)) +newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 3 3 3 3 3 3 3 3 3 3 ... + +} diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd new file mode 100644 index 000000000000..2422cbc073d0 --- /dev/null +++ b/R-package/man/lgb.prepare_rules2.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare_rules2.R +\name{lgb.prepare_rules2} +\alias{lgb.prepare_rules2} +\title{Data preparator for LightGBM datasets with rules (integer)} +\usage{ +lgb.prepare_rules2(data, rules = NULL) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} + +\item{rules}{A set of rules from the data preparator, if already used.} +} +\value{ +A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. +} +\examples{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter +str(new_iris$data) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +data(iris) # Erase iris dataset +iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +# Warning message: +# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +# invalid factor level, NA generated + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1, ] # Species became 0 as it is an unknown factor +# Sepal.Length Sepal.Width Petal.Length Petal.Width Species +# 1 5.1 3.5 1.4 0.2 0 + +newer_iris$data[1, 5] <- 1 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) +# [1] TRUE + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list(Species = c("setosa" = 3L, + "versicolor" = 2L, + "virginica" = 1L)) +newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 3 3 3 3 3 3 3 3 3 3 ... + +} diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd new file mode 100644 index 000000000000..bad52ad603ad --- /dev/null +++ b/R-package/man/lgb.save.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Booster.R +\name{lgb.save} +\alias{lgb.save} +\title{Save LightGBM model} +\usage{ +lgb.save(booster, filename, num_iteration = NULL) +} +\arguments{ +\item{booster}{Object of class \code{lgb.Booster}} + +\item{filename}{saved filename} + +\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration} +} +\value{ +lgb.Booster +} +\description{ +Save LightGBM model +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +lgb.save(model, "model.txt") + +} diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd new file mode 100644 index 000000000000..7f1e9f957893 --- /dev/null +++ b/R-package/man/lgb.train.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.train.R +\name{lgb.train} +\alias{lgb.train} +\title{Main training logic for LightGBM} +\usage{ +lgb.train(params = list(), data, nrounds = 10, valids = list(), + obj = NULL, eval = NULL, verbose = 1, record = TRUE, + eval_freq = 1L, init_model = NULL, colnames = NULL, + categorical_feature = NULL, early_stopping_rounds = NULL, + callbacks = list(), reset_data = FALSE, ...) +} +\arguments{ +\item{params}{List of parameters} + +\item{data}{a \code{lgb.Dataset} object, used for training} + +\item{nrounds}{number of training rounds} + +\item{valids}{a list of \code{lgb.Dataset} objects, used for validation} + +\item{obj}{objective function, can be character or custom objective function. Examples include +\code{regression}, \code{regression_l1}, \code{huber}, +\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + +\item{eval}{evaluation function, can be (a list of) character or custom eval function} + +\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} + +\item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} + +\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} + +\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} + +\item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} + +\item{categorical_feature}{list of str or int +type int represents index, +type str represents feature names} + +\item{early_stopping_rounds}{int +Activates early stopping. +Requires at least one validation data and one metric +If there's more than one, will check all of them except the training data +Returns the model with (best_iter + early_stopping_rounds) +If early stopping occurs, the model will have 'best_iter' field} + +\item{callbacks}{list of callback functions +List of callback functions that are applied at each iteration.} + +\item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} + +\item{...}{other parameters, see Parameters.rst for more information. A few key parameters: +\itemize{ + \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}} + \item{num_leaves}{number of leaves in one tree. defaults to 127} + \item{max_depth}{Limit the max depth for tree model. This is used to deal with + overfit when #data is small. Tree still grow by leaf-wise.} + \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to + the number of real CPU cores, not the number of threads (most + CPU using hyper-threading to generate 2 threads per CPU core).} +}} +} +\value{ +a trained booster model \code{lgb.Booster}. +} +\description{ +Logic to train with LightGBM +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) + +} diff --git a/R-package/man/lgb.unloader.Rd b/R-package/man/lgb.unloader.Rd new file mode 100644 index 000000000000..9ea57f54a195 --- /dev/null +++ b/R-package/man/lgb.unloader.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.unloader.R +\name{lgb.unloader} +\alias{lgb.unloader} +\title{LightGBM unloading error fix} +\usage{ +lgb.unloader(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) +} +\arguments{ +\item{restore}{Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed.} + +\item{wipe}{Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global environment. Defaults to \code{FALSE} which means to not remove them.} + +\item{envir}{The environment to perform wiping on if \code{wipe == TRUE}. Defaults to \code{.GlobalEnv} which is the global environment.} +} +\value{ +NULL invisibly. +} +\description{ +Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object. +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +lgb.unloader(restore = FALSE, wipe = FALSE, envir = .GlobalEnv) +rm(model, dtrain, dtest) # Not needed if wipe = TRUE +gc() # Not needed if wipe = TRUE + +library(lightgbm) +# Do whatever you want again with LightGBM without object clashing + +} diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd new file mode 100644 index 000000000000..0a40c2ae31ed --- /dev/null +++ b/R-package/man/lgb_shared_params.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lightgbm.R +\name{lgb_shared_params} +\alias{lgb_shared_params} +\title{Shared parameter docs} +\arguments{ +\item{callbacks}{list of callback functions +List of callback functions that are applied at each iteration.} + +\item{data}{a \code{lgb.Dataset} object, used for training} + +\item{early_stopping_rounds}{int +Activates early stopping. +Requires at least one validation data and one metric +If there's more than one, will check all of them except the training data +Returns the model with (best_iter + early_stopping_rounds) +If early stopping occurs, the model will have 'best_iter' field} + +\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} + +\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} + +\item{nrounds}{number of training rounds} + +\item{params}{List of parameters} + +\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} +} +\description{ +Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} +} diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd new file mode 100644 index 000000000000..d0582110a156 --- /dev/null +++ b/R-package/man/lightgbm.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lightgbm.R +\name{lightgbm} +\alias{lightgbm} +\title{Train a LightGBM model} +\usage{ +lightgbm(data, label = NULL, weight = NULL, params = list(), + nrounds = 10, verbose = 1, eval_freq = 1L, + early_stopping_rounds = NULL, save_name = "lightgbm.model", + init_model = NULL, callbacks = list(), ...) +} +\arguments{ +\item{data}{a \code{lgb.Dataset} object, used for training} + +\item{label}{Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}} + +\item{weight}{vector of response values. If not NULL, will set to dataset} + +\item{params}{List of parameters} + +\item{nrounds}{number of training rounds} + +\item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} + +\item{eval_freq}{evaluation output frequency, only effect when verbose > 0} + +\item{early_stopping_rounds}{int +Activates early stopping. +Requires at least one validation data and one metric +If there's more than one, will check all of them except the training data +Returns the model with (best_iter + early_stopping_rounds) +If early stopping occurs, the model will have 'best_iter' field} + +\item{save_name}{File name to use when writing the trained model to disk. Should end in ".model".} + +\item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} + +\item{callbacks}{list of callback functions +List of callback functions that are applied at each iteration.} + +\item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example +\itemize{ + \item{valids}{a list of \code{lgb.Dataset} objects, used for validation} + \item{obj}{objective function, can be character or custom objective function. Examples include + \code{regression}, \code{regression_l1}, \code{huber}, + \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + \item{eval}{evaluation function, can be (a list of) character or custom eval function} + \item{record}{Boolean, TRUE will record iteration message to \code{booster$record_evals}} + \item{colnames}{feature names, if not null, will use this to overwrite the names in dataset} + \item{categorical_feature}{list of str or int. type int represents index, type str represents feature names} + \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model + into a predictor model which frees up memory and the original datasets} + \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}} + \item{num_leaves}{number of leaves in one tree. defaults to 127} + \item{max_depth}{Limit the max depth for tree model. This is used to deal with + overfit when #data is small. Tree still grow by leaf-wise.} + \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to + the number of real CPU cores, not the number of threads (most + CPU using hyper-threading to generate 2 threads per CPU core).} +}} +} +\description{ +Simple interface for training an LightGBM model. +} diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd new file mode 100644 index 000000000000..310375ba8742 --- /dev/null +++ b/R-package/man/predict.lgb.Booster.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Booster.R +\name{predict.lgb.Booster} +\alias{predict.lgb.Booster} +\title{Predict method for LightGBM model} +\usage{ +\method{predict}{lgb.Booster}(object, data, num_iteration = NULL, + rawscore = FALSE, predleaf = FALSE, predcontrib = FALSE, + header = FALSE, reshape = FALSE, ...) +} +\arguments{ +\item{object}{Object of class \code{lgb.Booster}} + +\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename} + +\item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration} + +\item{rawscore}{whether the prediction should be returned in the for of original untransformed +sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE} for +logistic regression would result in predictions for log-odds instead of probabilities.} + +\item{predleaf}{whether predict leaf index instead.} + +\item{predcontrib}{return per-feature contributions for each record.} + +\item{header}{only used for prediction for text file. True if text file has header} + +\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several +prediction outputs per case.} + +\item{...}{Additional named arguments passed to the \code{predict()} method of +the \code{lgb.Booster} object passed to \code{object}.} +} +\value{ +For regression or binary classification, it returns a vector of length \code{nrows(data)}. +For multiclass classification, either a \code{num_class * nrows(data)} vector or +a \code{(nrows(data), num_class)} dimension matrix is returned, depending on +the \code{reshape} value. + +When \code{predleaf = TRUE}, the output is a matrix object with the +number of columns corresponding to the number of trees. +} +\description{ +Predicted values based on class \code{lgb.Booster} +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +preds <- predict(model, test$data) + +} diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd new file mode 100644 index 000000000000..05f4eedb104e --- /dev/null +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readRDS.lgb.Booster.R +\name{readRDS.lgb.Booster} +\alias{readRDS.lgb.Booster} +\title{readRDS for lgb.Booster models} +\usage{ +readRDS.lgb.Booster(file = "", refhook = NULL) +} +\arguments{ +\item{file}{a connection or the name of the file where the R object is saved to or read from.} + +\item{refhook}{a hook function for handling reference objects.} +} +\value{ +lgb.Booster. +} +\description{ +Attempts to load a model using RDS. +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train(params, + dtrain, + 100, + valids, + min_data = 1, + learning_rate = 1, + early_stopping_rounds = 10) +saveRDS.lgb.Booster(model, "model.rds") +new_model <- readRDS.lgb.Booster("model.rds") + +} diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd new file mode 100644 index 000000000000..b302b2c3b6c0 --- /dev/null +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/saveRDS.lgb.Booster.R +\name{saveRDS.lgb.Booster} +\alias{saveRDS.lgb.Booster} +\title{saveRDS for lgb.Booster models} +\usage{ +saveRDS.lgb.Booster(object, file = "", ascii = FALSE, version = NULL, + compress = TRUE, refhook = NULL, raw = TRUE) +} +\arguments{ +\item{object}{R object to serialize.} + +\item{file}{a connection or the name of the file where the R object is saved to or read from.} + +\item{ascii}{a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), a binary one is used. See the comments in the help for save.} + +\item{version}{the workspace format version to use. \code{NULL} specifies the current default version (2). Versions prior to 2 are not supported, so this will only be relevant when there are later versions.} + +\item{compress}{a logical specifying whether saving to a named file is to use "gzip" compression, or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of compression to be used. Ignored if file is a connection.} + +\item{refhook}{a hook function for handling reference objects.} + +\item{raw}{whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}.} +} +\value{ +NULL invisibly. +} +\description{ +Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not. +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list(objective = "regression", metric = "l2") +valids <- list(test = dtest) +model <- lgb.train( + params + , dtrain + , 100 + , valids + , min_data = 1 + , learning_rate = 1 + , early_stopping_rounds = 10 +) +saveRDS.lgb.Booster(model, "model.rds") +} diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd new file mode 100644 index 000000000000..92da5034659a --- /dev/null +++ b/R-package/man/setinfo.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{setinfo} +\alias{setinfo} +\alias{setinfo.lgb.Dataset} +\title{Set information of an lgb.Dataset object} +\usage{ +setinfo(dataset, ...) + +\method{setinfo}{lgb.Dataset}(dataset, name, info, ...) +} +\arguments{ +\item{dataset}{Object of class "lgb.Dataset"} + +\item{...}{other parameters} + +\item{name}{the name of the field to get} + +\item{info}{the specific field of information to set} +} +\value{ +passed object +} +\description{ +Set information of an lgb.Dataset object +} +\details{ +The \code{name} field can be one of the following: + +\itemize{ + \item \code{label}: label lightgbm learn from ; + \item \code{weight}: to do a weight rescale ; + \item \code{init_score}: initial score is the base prediction lightgbm will boost from ; + \item \code{group}. +} +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +lgb.Dataset.construct(dtrain) + +labels <- lightgbm::getinfo(dtrain, "label") +lightgbm::setinfo(dtrain, "label", 1 - labels) + +labels2 <- lightgbm::getinfo(dtrain, "label") +stopifnot(all.equal(labels2, 1 - labels)) + +} diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd new file mode 100644 index 000000000000..e126b89a837a --- /dev/null +++ b/R-package/man/slice.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Dataset.R +\name{slice} +\alias{slice} +\alias{slice.lgb.Dataset} +\title{Slice a dataset} +\usage{ +slice(dataset, ...) + +\method{slice}{lgb.Dataset}(dataset, idxset, ...) +} +\arguments{ +\item{dataset}{Object of class "lgb.Dataset"} + +\item{...}{other parameters (currently not used)} + +\item{idxset}{a integer vector of indices of rows needed} +} +\value{ +constructed sub dataset +} +\description{ +Get a new \code{lgb.Dataset} containing the specified rows of +original lgb.Dataset object +} +\examples{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) + +dsub <- lightgbm::slice(dtrain, 1:42) +lgb.Dataset.construct(dsub) +labels <- lightgbm::getinfo(dsub, "label") + +} From 606cde52a97cc1917286b3fea8d31519cffaf29c Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Sun, 19 May 2019 19:01:41 +0300 Subject: [PATCH 4/8] specify full path to conda and make script more readable by one line - one pkg --- docs/conf.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 713eacdad57f..a212553bb451 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -219,11 +219,19 @@ def generate_r_docs(app): The application object representing the Sphinx process. """ commands = """ - export PATH="/home/docs/.conda/bin:$PATH" echo 'options(repos = "https://cran.rstudio.com")' > $HOME/.Rprofile - conda create -q -y -n r_env r-base r-devtools r-data.table r-jsonlite r-magrittr r-matrix r-testthat cmake - conda install -q -y -n r_env -c conda-forge r-pkgdown - source activate r_env + /home/docs/.conda/bin/conda create -q -y -n r_env \ + r-base \ + r-devtools \ + r-data.table \ + r-jsonlite \ + r-magrittr \ + r-matrix \ + r-testthat \ + cmake + /home/docs/.conda/bin/conda install -q -y -n r_env -c conda-forge \ + r-pkgdown + source /home/docs/.conda/bin/activate r_env export TAR=/bin/tar cd {0} sed -i'.bak' '/# Build the package (do not touch this line!)/q' build_r.R From 9ed2216e54171225e97c4404355a7365a21aebab Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Sun, 19 May 2019 19:04:28 +0300 Subject: [PATCH 5/8] removed commented lines from build_r_site script --- build_r_site.R | 9 --------- 1 file changed, 9 deletions(-) diff --git a/build_r_site.R b/build_r_site.R index db287503d363..0a284ba033c5 100644 --- a/build_r_site.R +++ b/build_r_site.R @@ -12,12 +12,3 @@ build_home(preview = FALSE, quiet = FALSE) build_reference(lazy = FALSE, document = FALSE, examples = TRUE, run_dont_run = FALSE, seed = 42, preview = FALSE) -# # to-do -# build_articles(preview = FALSE) -# build_tutorials(preview = FALSE) -# build_news(preview = FALSE) - -# # doesn't work -# pkgdown::build_site(pkg = ".", examples = FALSE, document = TRUE, -# run_dont_run = TRUE, seed = 1014, lazy = FALSE, -# override = list(), preview = NA, new_process = FALSE) From 05404c022f73459924b9a7d43788a1d2c4854270 Mon Sep 17 00:00:00 2001 From: StrikerRUS Date: Sun, 19 May 2019 19:06:48 +0300 Subject: [PATCH 6/8] made one line - one argument in build_reference() call --- build_r_site.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/build_r_site.R b/build_r_site.R index 0a284ba033c5..fd2f95bc7c7d 100644 --- a/build_r_site.R +++ b/build_r_site.R @@ -9,6 +9,9 @@ devtools::document() clean_site() init_site() build_home(preview = FALSE, quiet = FALSE) -build_reference(lazy = FALSE, document = FALSE, - examples = TRUE, run_dont_run = FALSE, - seed = 42, preview = FALSE) +build_reference(lazy = FALSE, + document = FALSE, + examples = TRUE, + run_dont_run = FALSE, + seed = 42, + preview = FALSE) From 014bee124102f4a1762e4be5316fab52e852f143 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 5 Jun 2019 15:07:08 +0300 Subject: [PATCH 7/8] pin R package versions --- docs/conf.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index a212553bb451..725baabd4db9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -221,16 +221,16 @@ def generate_r_docs(app): commands = """ echo 'options(repos = "https://cran.rstudio.com")' > $HOME/.Rprofile /home/docs/.conda/bin/conda create -q -y -n r_env \ - r-base \ - r-devtools \ - r-data.table \ - r-jsonlite \ - r-magrittr \ - r-matrix \ - r-testthat \ - cmake + r-base=3.5.1=h1e0a451_2 \ + r-devtools=1.13.6=r351h6115d3f_0 \ + r-data.table=1.11.4=r351h96ca727_0 \ + r-jsonlite=1.5=r351h96ca727_0 \ + r-magrittr=1.5=r351h6115d3f_4 \ + r-matrix=1.2_14=r351h96ca727_0 \ + r-testthat=2.0.0=r351h29659fb_0 \ + cmake=3.14.0=h52cb24c_0 /home/docs/.conda/bin/conda install -q -y -n r_env -c conda-forge \ - r-pkgdown + r-pkgdown=1.3.0=r351h6115d3f_1000 source /home/docs/.conda/bin/activate r_env export TAR=/bin/tar cd {0} From da2a9a32e9c1de7c2353c0c53ef017a7551554b1 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 5 Aug 2019 00:20:56 +0300 Subject: [PATCH 8/8] fixed conflict --- docs/README.rst | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/README.rst b/docs/README.rst index 87a41fb854b6..b4a3e3f9e464 100644 --- a/docs/README.rst +++ b/docs/README.rst @@ -20,15 +20,10 @@ You can build the documentation locally. Just install Doxygen and run in ``docs` pip install -r requirements.txt make html -<<<<<<< HEAD Unfortunately, documentation for R code is built only on our site, and commands above will not build it for you locally. Consider using common R utilities for documentation generation, if you need it. -If you faced any problems with Doxygen installation or you simply do not need documentation for C code, -it is possible to build the documentation without it: -======= If you faced any problems with Doxygen installation or you simply do not need documentation for C code, it is possible to build the documentation without it: ->>>>>>> master .. code:: sh