From 518f60feb1f378ec46ede3197a296a4dc5ed0426 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 18 Mar 2024 15:07:11 -0500 Subject: [PATCH 1/6] try to fix r-sanitizers jobs --- .appveyor.yml | 43 --- .github/workflows/cuda.yml | 115 -------- .github/workflows/python_package.yml | 113 -------- .github/workflows/r_package.yml | 2 +- .vsts-ci.yml | 400 --------------------------- 5 files changed, 1 insertion(+), 672 deletions(-) delete mode 100644 .appveyor.yml delete mode 100644 .github/workflows/cuda.yml delete mode 100644 .github/workflows/python_package.yml delete mode 100644 .vsts-ci.yml diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index bb15958136c3..000000000000 --- a/.appveyor.yml +++ /dev/null @@ -1,43 +0,0 @@ -version: 4.3.0.99.{build} - -image: Visual Studio 2015 -platform: x64 -configuration: # a trick to construct a build matrix with multiple Python versions - - '3.8' - -# only build pull requests and -# commits to 'master' or any branch starting with 'release' -branches: - only: - - master - - /^release/ - -environment: - matrix: - - COMPILER: MSVC - TASK: python - - COMPILER: MINGW - TASK: python - -clone_depth: 5 - -install: - - git submodule update --init --recursive # get `external_libs` folder - - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH% - - set PYTHON_VERSION=%CONFIGURATION% - - set CONDA_ENV="test-env" - - ps: | - $env:MINICONDA = "C:\Miniconda3-x64" - $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH" - $env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER" - $env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim() - -build: false - -test_script: - - conda config --remove channels defaults - - conda config --add channels nodefaults - - conda config --add channels conda-forge - - conda config --set channel_priority strict - - conda init powershell - - powershell.exe -ExecutionPolicy Bypass -File %APPVEYOR_BUILD_FOLDER%\.ci\test_windows.ps1 diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml deleted file mode 100644 index b7f825b245b8..000000000000 --- a/.github/workflows/cuda.yml +++ /dev/null @@ -1,115 +0,0 @@ -name: CUDA Version - -on: - push: - branches: - - master - pull_request: - branches: - - master - - release/* - -# automatically cancel in-progress builds if another commit is pushed -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - github_actions: 'true' - os_name: linux - conda_env: test-env - -jobs: - test: - name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }}) - runs-on: [self-hosted, linux] - timeout-minutes: 60 - strategy: - fail-fast: false - matrix: - include: - - method: wheel - compiler: gcc - python_version: "3.11" - cuda_version: "11.8.0" - task: cuda - - method: source - compiler: gcc - python_version: "3.9" - cuda_version: "12.2.0" - task: cuda - - method: pip - compiler: clang - python_version: "3.10" - cuda_version: "11.8.0" - task: cuda - steps: - - name: Setup or update software on host machine - run: | - sudo apt-get update - sudo apt-get install --no-install-recommends -y \ - apt-transport-https \ - ca-certificates \ - curl \ - git \ - gnupg-agent \ - lsb-release \ - software-properties-common - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y - curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - curl -sL https://nvidia.github.io/nvidia-docker/$(. /etc/os-release;echo $ID$VERSION_ID)/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - sudo apt-get update - sudo apt-get install --no-install-recommends -y \ - containerd.io \ - docker-ce \ - docker-ce-cli \ - nvidia-docker2 - sudo chmod a+rw /var/run/docker.sock - sudo systemctl restart docker - - name: Remove old folder with repository - run: sudo rm -rf $GITHUB_WORKSPACE - - name: Checkout repository - uses: actions/checkout@v1 - with: - fetch-depth: 5 - submodules: true - - name: Setup and run tests - run: | - export ROOT_DOCKER_FOLDER=/LightGBM - cat > docker.env < docker-script.sh < '$(Build.ArtifactStagingDirectory)/commit.txt' - displayName: 'Add commit hash to artifacts archive' - - bash: $(Build.SourcesDirectory)/.ci/setup.sh - displayName: Setup - - bash: $(Build.SourcesDirectory)/.ci/test.sh - displayName: Test - - task: PublishBuildArtifacts@1 - condition: and(succeeded(), in(variables['TASK'], 'regular', 'sdist', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) - inputs: - pathtoPublish: '$(Build.ArtifactStagingDirectory)' - artifactName: PackageAssets - artifactType: container -########################################### -- job: Linux_latest -########################################### - variables: - COMPILER: clang-17 - DEBIAN_FRONTEND: 'noninteractive' - IN_UBUNTU_BASE_CONTAINER: 'true' - OS_NAME: 'linux' - SETUP_CONDA: 'true' - pool: sh-mariner - container: ubuntu-latest - strategy: - matrix: - regular: - TASK: regular - sdist: - TASK: sdist - bdist: - TASK: bdist - PYTHON_VERSION: '3.9' - inference: - TASK: if-else - mpi_source: - TASK: mpi - METHOD: source - mpi_pip: - TASK: mpi - METHOD: pip - PYTHON_VERSION: '3.10' - mpi_wheel: - TASK: mpi - METHOD: wheel - PYTHON_VERSION: '3.8' - gpu_source: - TASK: gpu - METHOD: source - PYTHON_VERSION: '3.10' - gpu_pip: - TASK: gpu - METHOD: pip - PYTHON_VERSION: '3.9' - gpu_wheel: - TASK: gpu - METHOD: wheel - PYTHON_VERSION: '3.8' - cpp_tests: - TASK: cpp-tests - METHOD: with-sanitizers - steps: - - script: | - echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" - echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" - CONDA=$HOME/miniforge - echo "##vso[task.setvariable variable=CONDA]$CONDA" - echo "##vso[task.prependpath]$CONDA/bin" - displayName: 'Set variables' - # https://github.com/microsoft/azure-pipelines-agent/issues/2043#issuecomment-687983301 - - script: | - /tmp/docker exec -t -u 0 ci-container \ - sh -c "apt-get update && apt-get -o Dpkg::Options::="--force-confold" -y install sudo" - displayName: 'Install sudo' - - bash: $(Build.SourcesDirectory)/.ci/setup.sh - displayName: Setup - - bash: $(Build.SourcesDirectory)/.ci/test.sh - displayName: Test -########################################### -- job: QEMU_multiarch -########################################### - variables: - COMPILER: gcc - OS_NAME: 'linux' - PRODUCES_ARTIFACTS: 'true' - pool: - vmImage: ubuntu-22.04 - timeoutInMinutes: 180 - strategy: - matrix: - bdist: - TASK: bdist - ARCH: aarch64 - steps: - - script: | - sudo apt-get update - sudo apt-get install --no-install-recommends -y \ - binfmt-support \ - qemu \ - qemu-user \ - qemu-user-static - displayName: 'Install QEMU' - - script: | - docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - displayName: 'Enable Docker multi-architecture support' - - script: | - export ROOT_DOCKER_FOLDER=/LightGBM - cat > docker.env < docker-script.sh < Date: Mon, 18 Mar 2024 15:48:59 -0500 Subject: [PATCH 2/6] try compiling without sanitizers --- .github/workflows/r_package.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 43dc9ed59c03..dd1655673f35 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -253,9 +253,9 @@ jobs: - name: Install packages shell: bash run: | - RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org')" - sh build-cran-package.sh --r-executable=RD${{ matrix.r_customization }} - RD${{ matrix.r_customization }} CMD INSTALL lightgbm_*.tar.gz || exit 1 + RDscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org')" + sh build-cran-package.sh --r-executable=RD + RD CMD INSTALL lightgbm_*.tar.gz || exit 1 - name: Run tests with sanitizers shell: bash run: | From 6582aae48cd60bd3eeca121f7448ae16f1fb8ef2 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 18 Mar 2024 16:35:52 -0500 Subject: [PATCH 3/6] check docker resources --- .github/workflows/r_package.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index dd1655673f35..0f22e57d03d4 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -232,7 +232,7 @@ jobs: name: r-sanitizers (ubuntu-latest, R-devel, ${{ matrix.compiler }} ASAN/UBSAN) timeout-minutes: 60 runs-on: ubuntu-latest - container: wch1/r-debug + #container: wch1/r-debug strategy: fail-fast: false matrix: @@ -242,6 +242,10 @@ jobs: - r_customization: csan compiler: clang steps: + - name: get docker info + run: | + docker info + exit 1 - name: Trust git cloning LightGBM run: | git config --global --add safe.directory "${GITHUB_WORKSPACE}" From 15f75cc8e8ccea4be5c3784819a98ba6f26bde14 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 18 Mar 2024 23:39:50 -0500 Subject: [PATCH 4/6] try explicitly giving it more memory --- .github/workflows/r_package.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 0f22e57d03d4..5f0d80476c7c 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -232,7 +232,11 @@ jobs: name: r-sanitizers (ubuntu-latest, R-devel, ${{ matrix.compiler }} ASAN/UBSAN) timeout-minutes: 60 runs-on: ubuntu-latest - #container: wch1/r-debug + # ref: https://docs.github.com/en/actions/using-jobs/running-jobs-in-a-container#setting-container-resource-options + # ref: https://docs.docker.com/config/containers/resource_constraints/#limit-a-containers-access-to-memory + container: + image: wch1/r-debug + options: --cpus 2 --memory 8g strategy: fail-fast: false matrix: @@ -242,10 +246,6 @@ jobs: - r_customization: csan compiler: clang steps: - - name: get docker info - run: | - docker info - exit 1 - name: Trust git cloning LightGBM run: | git config --global --add safe.directory "${GITHUB_WORKSPACE}" @@ -257,9 +257,9 @@ jobs: - name: Install packages shell: bash run: | - RDscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org')" - sh build-cran-package.sh --r-executable=RD - RD CMD INSTALL lightgbm_*.tar.gz || exit 1 + RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + sh build-cran-package.sh --r-executable=RD${{ matrix.r_customization }} + RD${{ matrix.r_customization }} CMD INSTALL lightgbm_*.tar.gz || exit 1 - name: Run tests with sanitizers shell: bash run: | From fc7f740a1aa1ee500e97ded804d3ef94058f778a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 20 Mar 2024 15:56:44 -0500 Subject: [PATCH 5/6] I hope the new images fixed the problem --- .github/workflows/r_package.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 5f0d80476c7c..7d79cdab1d21 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -232,11 +232,7 @@ jobs: name: r-sanitizers (ubuntu-latest, R-devel, ${{ matrix.compiler }} ASAN/UBSAN) timeout-minutes: 60 runs-on: ubuntu-latest - # ref: https://docs.github.com/en/actions/using-jobs/running-jobs-in-a-container#setting-container-resource-options - # ref: https://docs.docker.com/config/containers/resource_constraints/#limit-a-containers-access-to-memory - container: - image: wch1/r-debug - options: --cpus 2 --memory 8g + container: wch1/r-debug strategy: fail-fast: false matrix: @@ -319,7 +315,7 @@ jobs: all-r-package-jobs-successful: if: always() runs-on: ubuntu-latest - needs: [test, test-r-debian-clang] + needs: [test, test-r-sanitizers, test-r-debian-clang] steps: - name: Note that all tests succeeded uses: re-actors/alls-green@v1.2.2 From becdd54eb796c5dd98f9896d3990060f63a0c66b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 20 Mar 2024 16:21:18 -0500 Subject: [PATCH 6/6] restore all the other CI jobs --- .appveyor.yml | 43 +++ .github/workflows/cuda.yml | 115 ++++++++ .github/workflows/python_package.yml | 113 ++++++++ .vsts-ci.yml | 400 +++++++++++++++++++++++++++ 4 files changed, 671 insertions(+) create mode 100644 .appveyor.yml create mode 100644 .github/workflows/cuda.yml create mode 100644 .github/workflows/python_package.yml create mode 100644 .vsts-ci.yml diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 000000000000..bb15958136c3 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,43 @@ +version: 4.3.0.99.{build} + +image: Visual Studio 2015 +platform: x64 +configuration: # a trick to construct a build matrix with multiple Python versions + - '3.8' + +# only build pull requests and +# commits to 'master' or any branch starting with 'release' +branches: + only: + - master + - /^release/ + +environment: + matrix: + - COMPILER: MSVC + TASK: python + - COMPILER: MINGW + TASK: python + +clone_depth: 5 + +install: + - git submodule update --init --recursive # get `external_libs` folder + - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH% + - set PYTHON_VERSION=%CONFIGURATION% + - set CONDA_ENV="test-env" + - ps: | + $env:MINICONDA = "C:\Miniconda3-x64" + $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH" + $env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER" + $env:LGB_VER = (Get-Content $env:APPVEYOR_BUILD_FOLDER\VERSION.txt).trim() + +build: false + +test_script: + - conda config --remove channels defaults + - conda config --add channels nodefaults + - conda config --add channels conda-forge + - conda config --set channel_priority strict + - conda init powershell + - powershell.exe -ExecutionPolicy Bypass -File %APPVEYOR_BUILD_FOLDER%\.ci\test_windows.ps1 diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml new file mode 100644 index 000000000000..b7f825b245b8 --- /dev/null +++ b/.github/workflows/cuda.yml @@ -0,0 +1,115 @@ +name: CUDA Version + +on: + push: + branches: + - master + pull_request: + branches: + - master + - release/* + +# automatically cancel in-progress builds if another commit is pushed +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + github_actions: 'true' + os_name: linux + conda_env: test-env + +jobs: + test: + name: ${{ matrix.task }} ${{ matrix.cuda_version }} ${{ matrix.method }} (linux, ${{ matrix.compiler }}, Python ${{ matrix.python_version }}) + runs-on: [self-hosted, linux] + timeout-minutes: 60 + strategy: + fail-fast: false + matrix: + include: + - method: wheel + compiler: gcc + python_version: "3.11" + cuda_version: "11.8.0" + task: cuda + - method: source + compiler: gcc + python_version: "3.9" + cuda_version: "12.2.0" + task: cuda + - method: pip + compiler: clang + python_version: "3.10" + cuda_version: "11.8.0" + task: cuda + steps: + - name: Setup or update software on host machine + run: | + sudo apt-get update + sudo apt-get install --no-install-recommends -y \ + apt-transport-https \ + ca-certificates \ + curl \ + git \ + gnupg-agent \ + lsb-release \ + software-properties-common + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" -y + curl -sL https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - + curl -sL https://nvidia.github.io/nvidia-docker/$(. /etc/os-release;echo $ID$VERSION_ID)/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list + sudo apt-get update + sudo apt-get install --no-install-recommends -y \ + containerd.io \ + docker-ce \ + docker-ce-cli \ + nvidia-docker2 + sudo chmod a+rw /var/run/docker.sock + sudo systemctl restart docker + - name: Remove old folder with repository + run: sudo rm -rf $GITHUB_WORKSPACE + - name: Checkout repository + uses: actions/checkout@v1 + with: + fetch-depth: 5 + submodules: true + - name: Setup and run tests + run: | + export ROOT_DOCKER_FOLDER=/LightGBM + cat > docker.env < docker-script.sh < '$(Build.ArtifactStagingDirectory)/commit.txt' + displayName: 'Add commit hash to artifacts archive' + - bash: $(Build.SourcesDirectory)/.ci/setup.sh + displayName: Setup + - bash: $(Build.SourcesDirectory)/.ci/test.sh + displayName: Test + - task: PublishBuildArtifacts@1 + condition: and(succeeded(), in(variables['TASK'], 'regular', 'sdist', 'bdist', 'swig'), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/'))) + inputs: + pathtoPublish: '$(Build.ArtifactStagingDirectory)' + artifactName: PackageAssets + artifactType: container +########################################### +- job: Linux_latest +########################################### + variables: + COMPILER: clang-17 + DEBIAN_FRONTEND: 'noninteractive' + IN_UBUNTU_BASE_CONTAINER: 'true' + OS_NAME: 'linux' + SETUP_CONDA: 'true' + pool: sh-mariner + container: ubuntu-latest + strategy: + matrix: + regular: + TASK: regular + sdist: + TASK: sdist + bdist: + TASK: bdist + PYTHON_VERSION: '3.9' + inference: + TASK: if-else + mpi_source: + TASK: mpi + METHOD: source + mpi_pip: + TASK: mpi + METHOD: pip + PYTHON_VERSION: '3.10' + mpi_wheel: + TASK: mpi + METHOD: wheel + PYTHON_VERSION: '3.8' + gpu_source: + TASK: gpu + METHOD: source + PYTHON_VERSION: '3.10' + gpu_pip: + TASK: gpu + METHOD: pip + PYTHON_VERSION: '3.9' + gpu_wheel: + TASK: gpu + METHOD: wheel + PYTHON_VERSION: '3.8' + cpp_tests: + TASK: cpp-tests + METHOD: with-sanitizers + steps: + - script: | + echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY" + echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)" + CONDA=$HOME/miniforge + echo "##vso[task.setvariable variable=CONDA]$CONDA" + echo "##vso[task.prependpath]$CONDA/bin" + displayName: 'Set variables' + # https://github.com/microsoft/azure-pipelines-agent/issues/2043#issuecomment-687983301 + - script: | + /tmp/docker exec -t -u 0 ci-container \ + sh -c "apt-get update && apt-get -o Dpkg::Options::="--force-confold" -y install sudo" + displayName: 'Install sudo' + - bash: $(Build.SourcesDirectory)/.ci/setup.sh + displayName: Setup + - bash: $(Build.SourcesDirectory)/.ci/test.sh + displayName: Test +########################################### +- job: QEMU_multiarch +########################################### + variables: + COMPILER: gcc + OS_NAME: 'linux' + PRODUCES_ARTIFACTS: 'true' + pool: + vmImage: ubuntu-22.04 + timeoutInMinutes: 180 + strategy: + matrix: + bdist: + TASK: bdist + ARCH: aarch64 + steps: + - script: | + sudo apt-get update + sudo apt-get install --no-install-recommends -y \ + binfmt-support \ + qemu \ + qemu-user \ + qemu-user-static + displayName: 'Install QEMU' + - script: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + displayName: 'Enable Docker multi-architecture support' + - script: | + export ROOT_DOCKER_FOLDER=/LightGBM + cat > docker.env < docker-script.sh <