Skip to content

Create&Zip vlinux_cuda #51

Create&Zip vlinux_cuda

Create&Zip vlinux_cuda #51

name: build_linux_env
run-name: Create&Zip ${{ github.event.inputs.build_folder }}
on:
workflow_dispatch:
inputs:
build_folder:
description: "From which folder's environment.yml, the conda environment will be built"
type: string
required: false
default: 'vlinux_cuda'
target_folder:
description: 'Envs path (must be on D drive)'
type: string
required: false
default: '/home/vhaslcshij/workspace'
download_jars:
description: "whether download jars for pyspark"
type: string
required: false
default: 'false'
sparknlp_version:
description: "sparknlp fat jar version"
type: string
required: false
default: '5.3.3'
sparknlp_gpu:
description: "sparknlp fat jar version, if wanting gpu, use '-gpu', otherwise leave it blank"
type: string
required: false
default: ''
download_spacy_model:
description: "whether download sm spacy models"
type: string
required: false
default: 'true'
zip_vol_size:
description: 'Max 7zip volumn size'
type: string
required: false
default: '800m'
zip_jar_only:
description: "whether zip jars file only"
type: string
required: false
default: 'false'
retention_days:
description: 'Days to keep the artifacts'
type: int
required: false
default: 7
jobs:
run_docker:
runs-on: ubuntu-latest
timeout-minutes: 40
steps:
- uses: actions/checkout@v4
- name: check&mount space
id: id_device
shell: bash
run: |
pwd
df -h
device=$(df -h | grep '/mnt' | awk '{print $1}')
echo "mnt device: $device"
echo "DEVICE=${device}" >> $GITHUB_OUTPUT
FOLDER_PATH=${{ github.event.inputs.target_folder }}
echo "FOLDER_PATH $FOLDER_PATH"
PARENT_FOLDER=$(dirname "$FOLDER_PATH")
echo "PARENT_FOLDER $PARENT_FOLDER"
sudo mkdir -p $PARENT_FOLDER
sudo mount --bind /mnt $PARENT_FOLDER
sudo chmod 777 -R $PARENT_FOLDER
echo "Remove unused pkgs"
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force
echo "Now recheck the spaces"
df -h
df -h $PARENT_FOLDER
# https://github.com/easimon/maximize-build-space/issues/48 This actiona won't work reliably any more
# - name: Maximize build space
# uses: easimon/maximize-build-space@master
# with:
# remove-dotnet: 'true'
# remove-android: 'true'
# remove-codeql: 'true'
# temp-reserve-mb: '2048'
# root-reserve-mb: '2048'
# # instead of using default value to mount to build path, ${{ github.event.inputs.target_folder }}/ is really the place we need more spaces.
# build-mount-path: '/home'
# - name: create softlink to mnt
# shell: bash
# run: |
# mkdir -p ${{ github.event.inputs.target_folder }}
# mkdir -p /mnt/${{ github.event.inputs.build_folder }}
# ln -s /mnt/${{ github.event.inputs.build_folder }} ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
- name: Determine tag name
id: tag
shell: bash
run: |
BUILD_NUMBER="$(git rev-list --count HEAD)"
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
echo "name=${{ github.event.inputs.build_folder }}-b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
else
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
echo "name=${{ github.event.inputs.build_folder }}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi
- name: Debug Permissions
run: |
echo "Permissions for this GITHUB_TOKEN:"
gh auth status
gh repo view
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# - name: Cache Miniforge Environment
# uses: actions/cache@v4
# with:
# path: ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
# key: ${{ runner.os }}-${{ github.event.inputs.build_folder }}-${{ hashFiles('${{ github.event.inputs.build_folder }}/environment.yml') }}
# restore-keys: |
# ${{ runner.os }}-${{ github.event.inputs.build_folder }}-/${{ github.event.inputs.target_folder }}
- name: mkdir
run: |
pwd
sudo mkdir -p ${{ github.event.inputs.target_folder }}
sudo mkdir -p ${{ github.event.inputs.target_folder }}/ivy
sudo chmod 777 -R ${{ github.event.inputs.target_folder }}
ls -l ${{ github.event.inputs.target_folder }}
- name: install miniforge
uses: conda-incubator/setup-miniconda@v3
with:
# activate-environment: ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
miniforge-version: latest
# environment-file: ${{ github.event.inputs.build_folder }}/environment.yml
- name: Create Conda Environment
run: |
cd ${{ github.event.inputs.build_folder }}
conda config --set verbosity 2
conda env create -f environment.yml -p ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
- name: clean up
run: |
echo "clean up cache..."
conda clean --all -f -y
pip cache purge
- name: recheck space
shell: bash
run: |
df -h
df -h /mnt
df -h ${{ steps.id_device.outputs.DEVICE }}
- name: check final environment settings
run: |
conda init
source /home/runner/.bashrc
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
conda env export
# - name: Cache Ivy jars
# if: ${{ github.event.inputs.download_jars == 'true' }}
# uses: actions/cache@v4
# with:
# path: |
# $ivyDir = ${{ github.event.inputs.target_folder }}/ivy/jars
# key: ${{ runner.os }}-ivy-${{ hashFiles('**/*.jar') }}
# restore-keys: |
# ${{ runner.os }}-ivy-
- name: download sparknlp jars
if: ${{ github.event.inputs.download_jars == 'true' }}
env:
PYSPARK_JARS_IVY: ${{ github.event.inputs.target_folder }}/ivy
run: |
echo "PYSPARK_JARS_IVY=${{ env.PYSPARK_JARS_IVY }}"
source /home/runner/.bashrc
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
if (!(Test-Path -Path "${{ env.PYSPARK_JARS_IVY }}/jars/spark-nlp-gpu-assembly-${{ github.event.inputs.sparknlp_version }}.jar")) {
curl -Lo "${{ env.PYSPARK_JARS_IVY }}/jars/spark-nlp${{ github.event.inputs.sparknlp_gpu }}-assembly-${{ github.event.inputs.sparknlp_version }}.jar" https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/jars/spark-nlp${{ github.event.inputs.sparknlp_gpu }}-assembly-${{ github.event.inputs.sparknlp_version }}.jar
} else {
echo "JAR already exists in ${{ env.PYSPARK_JARS_IVY }}/jars"
}
python -c "from pyspark.sql import SparkSession; spark = SparkSession.builder.appName('Spark NLP').master('local[*]').config('spark.jars.ivy', '${{ github.event.inputs.target_folder }}/ivy').config('spark.jars.packages', 'com.johnsnowlabs.nlp:spark-nlp${{ github.event.inputs.sparknlp_gpu }}_2.12:${{ github.event.inputs.sparknlp_version }}').getOrCreate(); spark.stop();"
- name: download spacy models
if: ${{ github.event.inputs.download_spacy_model == 'true' }}
run: |
source /home/runner/.bashrc
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
python -m spacy download en_core_web_sm
python -m spacy download en_core_web_md
# python -m spacy download en_core_web_trf this result tranformers version conflicts
- name: check folder
run: |
ls ${{ github.event.inputs.target_folder }}
pwd
ls ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
- name: check jar folder
if: ${{ github.event.inputs.download_jars == 'true' }}
run: |
ls ${{ github.event.inputs.target_folder }}/ivy
ls ${{ github.event.inputs.target_folder }}/ivy/jars
rm -R ${{ github.event.inputs.target_folder }}/ivy/cache
- name: Compress and split folder
if: ${{ github.event.inputs.zip_jar_only == 'false' }}
run: |
pwd
7z a -t7z -v${{ github.event.inputs.zip_vol_size }} zipped/${{ github.event.inputs.build_folder }}.7z ${{ github.event.inputs.target_folder }}
ls zipped
- name: Compress and split ivy folder
if: ${{ github.event.inputs.zip_jar_only == 'true' }}
run: |
pwd
7z a -t7z -v${{ github.event.inputs.zip_vol_size }} zipped/${{ github.event.inputs.build_folder }}_ivy.7z ${{ github.event.inputs.target_folder }}/ivy
ls zipped
source /home/runner/.bashrc
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }}
conda env export -> zipped/detailed_environment.yml
- name: Upload compressed parts as artifacts
uses: actions/upload-artifact@v4
with:
name: ${{ github.event.inputs.build_folder }}
path: zipped/*.7z.*
retention-days: ${{ github.event.inputs.retention_days }}
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ steps.tag.outputs.name }}
release_name: ${{ steps.tag.outputs.name }}
draft: false
prerelease: false
- name: Upload release
id: upload_release
uses: actions/github-script@v3
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
const path = require('path');
const fs = require('fs');
// Retrieve necessary information
const release_id = `${{ steps.create_release.outputs.id }}`;
const release_url = `${{ steps.create_release.outputs.html_url }}`;
const buildFolder = `${{ github.event.inputs.build_folder }}`;
const descriptionFilePath = `${buildFolder}/README.MD`;
let releaseDescription = '';
// Check for README file and load its content
if (fs.existsSync(descriptionFilePath)) {
releaseDescription = fs.readFileSync(descriptionFilePath, 'utf8');
console.log("Successfully read release description from 'README.MD'");
} else {
console.warn("README.MD not found in the specified build folder. Proceeding without a detailed description.");
}
// Construct the environment.yml GitHub file URL manually
const repoOwner = context.repo.owner;
const repoName = context.repo.repo;
const filePath = `${buildFolder}/environment.yml`;
const fileUrl = `https://github.com/${repoOwner}/${repoName}/blob/${{ steps.tag.outputs.name }}/${filePath}`;
console.log(`Constructed file URL: ${fileUrl}`);
// Append the file URL to the release description
releaseDescription += `\n\nView the corresponding [environment.yml](${fileUrl}) file used for this release.`;
releaseDescription += `\n\nTo use it, unzip the folders inside and put them under ${{ github.event.inputs.target_folder }}`;
// Update the release with the description
await github.repos.updateRelease({
owner: context.repo.owner,
repo: context.repo.repo,
release_id,
body: releaseDescription,
});
for (let file of await fs.readdirSync('./zipped')) {
console.log('uploadReleaseAsset', file);
await github.repos.uploadReleaseAsset({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: release_id,
name: file,
data: await fs.readFileSync(`zipped/${file}`)
});
}