Skip to content

Build win_openwebui #212

Build win_openwebui

Build win_openwebui #212

Workflow file for this run

name: build_win_env
run-name: Build ${{ github.event.inputs.build_folder }}
on:
workflow_dispatch:
inputs:
build_folder:
description: "From which folder's environment.yml, the conda environment will be built. Don't use any whitespace."
type: string
required: false
default: 'win_cuda_3.4'
target_folder:
description: 'Envs path (must be on D drive)'
type: string
required: false
default: 'D:\conda_envs_jianlins'
download_jars:
description: "whether download jars for pyspark"
type: string
required: false
default: 'false'
sparknlp_version:
description: "sparknlp fat jar version"
type: string
required: false
default: '5.3.3'
sparknlp_gpu:
description: "sparknlp fat jar version, if wanting gpu, use '-gpu', otherwise leave it blank"
type: string
required: false
default: ''
download_spacy_model:
description: "whether download sm spacy models"
type: string
required: false
default: 'true'
zip_vol_size:
description: 'Max 7zip volumn size'
type: string
required: false
default: '800m'
zip_jar_only:
description: "whether zip jars file only"
type: string
required: false
default: 'false'
retention_days:
description: 'Days to keep the artifacts'
type: int
required: false
default: 7
remove_cache:
description: "whether remove the ivy cache, some setups might require it in air-gap environment"
type: string
required: false
default: 'false'
permissions:
contents: write
jobs:
create_env:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Check Setting Folder
run: |
pwd
ls ${{ github.event.inputs.build_folder }}
shell: bash
- name: Determine tag name
id: tag
shell: bash
run: |
BUILD_NUMBER="$(git rev-list --count HEAD)"
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
echo "name=${{ github.event.inputs.build_folder }}-b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
else
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
echo "name=${{ github.event.inputs.build_folder }}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi
- name: Debug Permissions
run: |
echo "Permissions for this GITHUB_TOKEN:"
gh auth status
gh repo view
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Cache Miniforge Environment
uses: actions/cache@v4
with:
path: ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
key: ${{ runner.os }}-${{ github.event.inputs.build_folder }}-${{ hashFiles('${{ github.event.inputs.build_folder }}/environment.yml') }}
restore-keys: |
${{ runner.os }}-${{ github.event.inputs.build_folder }}-
- name: run pre build script
shell: pwsh {0}
run: |
cd ${{ github.event.inputs.build_folder }}
pwd
if (Test-Path -Path ".\pre_build.ps1") {
# sometimes the activate above doesn't work for the following ps1
pwsh .\pre_build.ps1 ${{ github.event.inputs.target_folder }}\${{ github.event.inputs.build_folder }}
}else{
pwd
ls
Write-Output ".\pre_build.ps1 doesn't exist. Skip executing it."
}
- name: Install miniforge
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-version: latest
auto-activate-base: true
- name: Create Conda Environment
shell: pwsh
run: |
cd ${{ github.event.inputs.build_folder }}
$envPath = "${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}"
# Check if the environment directory exists
if (Test-Path -Path $envPath) {
Write-Host "Environment ${{ github.event.inputs.build_folder }} already exists under ${{ github.event.inputs.target_folder }}. Will try update..."
conda env update -f environment.yml -p ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
} else {
# Environment doesn't exist, proceed to create a new one
Write-Host "Creating new environment ${{ github.event.inputs.build_folder }} under ${{ github.event.inputs.target_folder }}."
conda env create -f environment.yml -p ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
}
echo "clean up cache..."
conda clean --all -f -y
pip cache purge
- name: check final environment settings
run: |
conda activate ${{ github.event.inputs.target_folder }}\${{ github.event.inputs.build_folder }}
mkdir -p zipped
conda env export > zipped/detailed_environment.yml
cat zipped/detailed_environment.yml
- name: Set HADOOP_HOME environment variable
if: ${{ github.event.inputs.download_jars == 'true' }}
run: |
New-Item -ItemType Directory -Force -Path "C:\hadoop\bin"
echo "HADOOP_HOME=C:\hadoop" | Out-File -FilePath $env:GITHUB_ENV -Append
echo "C:\hadoop\bin" | Out-File -FilePath $env:GITHUB_PATH -Append
- name: Download Hadoop DLLs for Windows
if: ${{ github.event.inputs.download_jars == 'true' }}
env:
PYSPARK_JARS_IVY: ${{ github.event.inputs.target_folder }}/ivy
run: |
Invoke-WebRequest -Uri "https://github.com/steveloughran/winutils/raw/master/hadoop-3.0.0/bin/winutils.exe" -OutFile "C:\hadoop\bin\winutils.exe"
# Assuming hadoop.dll is also required and available at a certain URL - replace this URL with the actual location for hadoop.dll
Invoke-WebRequest -Uri "https://github.com/steveloughran/winutils/raw/master/hadoop-3.0.0/bin/hadoop.dll" -OutFile "C:\hadoop\bin\hadoop.dll"
echo "Create .ivy folder"
New-Item -ItemType Directory -Force -Path ${{ env.PYSPARK_JARS_IVY }}/jars
# change the default .ivy location to ${{ github.event.inputs.target_folder }}/ivy so that can be zipped together easily
- name: Cache Ivy jars
if: ${{ github.event.inputs.download_jars == 'true' }}
uses: actions/cache@v4
with:
path: |
$ivyDir = ${{ github.event.inputs.target_folder }}/ivy/jars
key: ${{ runner.os }}-ivy-${{ hashFiles('**/*.jar') }}
restore-keys: |
${{ runner.os }}-ivy-
- name: download sparknlp jars
if: ${{ github.event.inputs.download_jars == 'true' }}
env:
PYSPARK_JARS_IVY: ${{ github.event.inputs.target_folder }}/ivy
run: |
echo "PYSPARK_JARS_IVY=${{ env.PYSPARK_JARS_IVY }}"
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
# if (!(Test-Path -Path "${{ env.PYSPARK_JARS_IVY }}/jars/spark-nlp-gpu-assembly-${{ github.event.inputs.sparknlp_version }}.jar")) {
# curl -Lo "${{ env.PYSPARK_JARS_IVY }}/jars/spark-nlp${{ github.event.inputs.sparknlp_gpu }}-assembly-${{ github.event.inputs.sparknlp_version }}.jar" https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/jars/spark-nlp${{ github.event.inputs.sparknlp_gpu }}-assembly-${{ github.event.inputs.sparknlp_version }}.jar
# } else {
# echo "JAR already exists in ${{ env.PYSPARK_JARS_IVY }}/jars"
# }
if ( [string]::IsNullOrEmpty('${{ github.event.inputs.sparknlp_gpu }}')){
python -c "import sparknlp; spark = sparknlp.start(params={'spark.jars.ivy':'${{ github.event.inputs.target_folder }}/ivy'}); spark.stop();"
}else{
python -c "import sparknlp; spark = sparknlp.start(gpu=True, params={'spark.jars.ivy':'${{ github.event.inputs.target_folder }}/ivy'}); spark.stop();"
}
- name: download spacy models
if: ${{ github.event.inputs.download_spacy_model == 'true' }}
run: |
conda activate ${{ github.event.inputs.target_folder }}\${{ github.event.inputs.build_folder }}
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
# - name: copy cached jars
# run: |
# $sourceDir = Join-Path -Path $env:USERPROFILE -ChildPath ".ivy"
# Get-ChildItem -Path $sourceDir -Recurse -Filter *.jar | Copy-Item -Destination ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}/lib/site-packages/pyspark/jars -Force
- name: check folder
run: |
ls ${{ github.event.inputs.target_folder }}
ls ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
- name: check jar folder
if: ${{ github.event.inputs.download_jars == 'true' }}
run: |
ls ${{ github.event.inputs.target_folder }}/ivy
ls ${{ github.event.inputs.target_folder }}/ivy/jars
- name: clear jar cache folder
if: ${{ github.event.inputs.download_jars == 'true' && github.event.inputs.remove_cache == 'true' }}
run: |
Remove-Item -Path ${{ github.event.inputs.target_folder }}/ivy/cache/* -Recurse -Force
- name: run post build script
shell: pwsh {0}
run: |
cd ${{ github.event.inputs.build_folder }}
pwd
conda activate ${{ github.event.inputs.target_folder }}\${{ github.event.inputs.build_folder }}
if (Test-Path -Path ".\post_build.ps1") {
# sometimes the activate above doesn't work for the following ps1
pwsh .\post_build.ps1 ${{ github.event.inputs.target_folder }}\${{ github.event.inputs.build_folder }}
}else{
pwd
ls
Write-Output ".\post_build.ps1 doesn't exist. Skip executing it."
}
- name: Check for 7-Zip installation
run: |
if (!(Test-Path "C:\Program Files\7-Zip\7z.exe")) {
choco install 7zip
}
- name: Compress and split folder
if: ${{ github.event.inputs.zip_jar_only == 'false' }}
run: |
pwd
7z a -t7z -v${{ github.event.inputs.zip_vol_size }} zipped/${{ github.event.inputs.build_folder }}.7z ${{ github.event.inputs.target_folder }}*
ls zipped
- name: Compress and split ivy folder
if: ${{ github.event.inputs.zip_jar_only == 'true' }}
run: |
pwd
7z a -t7z -v${{ github.event.inputs.zip_vol_size }} zipped/${{ github.event.inputs.build_folder }}_ivy.7z ${{ github.event.inputs.target_folder }}/ivy*
ls zipped
- name: Upload compressed parts as artifacts
uses: actions/upload-artifact@v4
with:
name: ${{ github.event.inputs.build_folder }}
path: zipped/*.7z.*
retention-days: ${{ github.event.inputs.retention_days }}
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.tag.outputs.name }}
release_name: ${{ steps.tag.outputs.name }}
draft: false
prerelease: false
- name: Upload release
id: upload_release
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const path = require('path');
const fs = require('fs');
// Retrieve necessary information
const release_id = `${{ steps.create_release.outputs.id }}`;
const release_url = `${{ steps.create_release.outputs.html_url }}`;
const buildFolder = `${{ github.event.inputs.build_folder }}`;
const descriptionFilePath = `${buildFolder}/README.MD`;
let releaseDescription = '';
// Check for README file and load its content
if (fs.existsSync(descriptionFilePath)) {
releaseDescription = fs.readFileSync(descriptionFilePath, 'utf8');
console.log("Successfully read release description from 'README.MD'");
} else {
console.warn("README.MD not found in the specified build folder. Proceeding without a detailed description.");
}
// Construct the environment.yml GitHub file URL manually
const repoOwner = context.repo.owner;
const repoName = context.repo.repo;
const filePath = `${buildFolder}/environment.yml`;
const fileUrl = `https://github.com/${repoOwner}/${repoName}/blob/${{ steps.tag.outputs.name }}/${filePath}`;
console.log(`Constructed file URL: ${fileUrl}`);
// Append the file URL to the release description
releaseDescription += `\n\nView the corresponding [environment.yml](${fileUrl}) file used for this release.`;
releaseDescription += `\n\nTo use it, unzip the folders inside and put them under ${{ github.event.inputs.target_folder }}`;
releaseDescription += `\n\nThis environment was built with following workflow settings: `;
releaseDescription += `\ndownload_jars: ${{ github.event.inputs.download_jars }}`;
releaseDescription += `\nsparknlp_version: ${{ github.event.inputs.sparknlp_version }}`;
releaseDescription += `\nsparknlp_gpu: ${{ github.event.inputs.sparknlp_gpu }}`;
releaseDescription += `\ndownload_spacy_model: ${{ github.event.inputs.download_spacy_model }}`;
releaseDescription += `\nzip_jar_only: ${{ github.event.inputs.zip_jar_only }}`;
// Update the release with the description
await github.repos.updateRelease({
owner: context.repo.owner,
repo: context.repo.repo,
release_id,
body: releaseDescription,
});
for (let file of await fs.readdirSync('./zipped')) {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
name: file,
data: await fs.readFileSync(`zipped/${file}`)
});
}