Create&Zip vlinux_cuda #51
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: build_linux_env | |
run-name: Create&Zip ${{ github.event.inputs.build_folder }} | |
on: | |
workflow_dispatch: | |
inputs: | |
build_folder: | |
description: "From which folder's environment.yml, the conda environment will be built" | |
type: string | |
required: false | |
default: 'vlinux_cuda' | |
target_folder: | |
description: 'Envs path (must be on D drive)' | |
type: string | |
required: false | |
default: '/home/vhaslcshij/workspace' | |
download_jars: | |
description: "whether download jars for pyspark" | |
type: string | |
required: false | |
default: 'false' | |
sparknlp_version: | |
description: "sparknlp fat jar version" | |
type: string | |
required: false | |
default: '5.3.3' | |
sparknlp_gpu: | |
description: "sparknlp fat jar version, if wanting gpu, use '-gpu', otherwise leave it blank" | |
type: string | |
required: false | |
default: '' | |
download_spacy_model: | |
description: "whether download sm spacy models" | |
type: string | |
required: false | |
default: 'true' | |
zip_vol_size: | |
description: 'Max 7zip volumn size' | |
type: string | |
required: false | |
default: '800m' | |
zip_jar_only: | |
description: "whether zip jars file only" | |
type: string | |
required: false | |
default: 'false' | |
retention_days: | |
description: 'Days to keep the artifacts' | |
type: int | |
required: false | |
default: 7 | |
jobs: | |
run_docker: | |
runs-on: ubuntu-latest | |
timeout-minutes: 40 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: check&mount space | |
id: id_device | |
shell: bash | |
run: | | |
pwd | |
df -h | |
device=$(df -h | grep '/mnt' | awk '{print $1}') | |
echo "mnt device: $device" | |
echo "DEVICE=${device}" >> $GITHUB_OUTPUT | |
FOLDER_PATH=${{ github.event.inputs.target_folder }} | |
echo "FOLDER_PATH $FOLDER_PATH" | |
PARENT_FOLDER=$(dirname "$FOLDER_PATH") | |
echo "PARENT_FOLDER $PARENT_FOLDER" | |
sudo mkdir -p $PARENT_FOLDER | |
sudo mount --bind /mnt $PARENT_FOLDER | |
sudo chmod 777 -R $PARENT_FOLDER | |
echo "Remove unused pkgs" | |
sudo rm -rf /usr/share/dotnet | |
sudo rm -rf /usr/local/lib/android | |
sudo rm -rf /opt/hostedtoolcache/CodeQL | |
sudo docker image prune --all --force | |
echo "Now recheck the spaces" | |
df -h | |
df -h $PARENT_FOLDER | |
# https://github.com/easimon/maximize-build-space/issues/48 This actiona won't work reliably any more | |
# - name: Maximize build space | |
# uses: easimon/maximize-build-space@master | |
# with: | |
# remove-dotnet: 'true' | |
# remove-android: 'true' | |
# remove-codeql: 'true' | |
# temp-reserve-mb: '2048' | |
# root-reserve-mb: '2048' | |
# # instead of using default value to mount to build path, ${{ github.event.inputs.target_folder }}/ is really the place we need more spaces. | |
# build-mount-path: '/home' | |
# - name: create softlink to mnt | |
# shell: bash | |
# run: | | |
# mkdir -p ${{ github.event.inputs.target_folder }} | |
# mkdir -p /mnt/${{ github.event.inputs.build_folder }} | |
# ln -s /mnt/${{ github.event.inputs.build_folder }} ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
- name: Determine tag name | |
id: tag | |
shell: bash | |
run: | | |
BUILD_NUMBER="$(git rev-list --count HEAD)" | |
SHORT_HASH="$(git rev-parse --short=7 HEAD)" | |
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then | |
echo "name=${{ github.event.inputs.build_folder }}-b${BUILD_NUMBER}" >> $GITHUB_OUTPUT | |
else | |
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') | |
echo "name=${{ github.event.inputs.build_folder }}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT | |
fi | |
- name: Debug Permissions | |
run: | | |
echo "Permissions for this GITHUB_TOKEN:" | |
gh auth status | |
gh repo view | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
# - name: Cache Miniforge Environment | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
# key: ${{ runner.os }}-${{ github.event.inputs.build_folder }}-${{ hashFiles('${{ github.event.inputs.build_folder }}/environment.yml') }} | |
# restore-keys: | | |
# ${{ runner.os }}-${{ github.event.inputs.build_folder }}-/${{ github.event.inputs.target_folder }} | |
- name: mkdir | |
run: | | |
pwd | |
sudo mkdir -p ${{ github.event.inputs.target_folder }} | |
sudo mkdir -p ${{ github.event.inputs.target_folder }}/ivy | |
sudo chmod 777 -R ${{ github.event.inputs.target_folder }} | |
ls -l ${{ github.event.inputs.target_folder }} | |
- name: install miniforge | |
uses: conda-incubator/setup-miniconda@v3 | |
with: | |
# activate-environment: ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
miniforge-version: latest | |
# environment-file: ${{ github.event.inputs.build_folder }}/environment.yml | |
- name: Create Conda Environment | |
run: | | |
cd ${{ github.event.inputs.build_folder }} | |
conda config --set verbosity 2 | |
conda env create -f environment.yml -p ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
- name: clean up | |
run: | | |
echo "clean up cache..." | |
conda clean --all -f -y | |
pip cache purge | |
- name: recheck space | |
shell: bash | |
run: | | |
df -h | |
df -h /mnt | |
df -h ${{ steps.id_device.outputs.DEVICE }} | |
- name: check final environment settings | |
run: | | |
conda init | |
source /home/runner/.bashrc | |
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
conda env export | |
# - name: Cache Ivy jars | |
# if: ${{ github.event.inputs.download_jars == 'true' }} | |
# uses: actions/cache@v4 | |
# with: | |
# path: | | |
# $ivyDir = ${{ github.event.inputs.target_folder }}/ivy/jars | |
# key: ${{ runner.os }}-ivy-${{ hashFiles('**/*.jar') }} | |
# restore-keys: | | |
# ${{ runner.os }}-ivy- | |
- name: download sparknlp jars | |
if: ${{ github.event.inputs.download_jars == 'true' }} | |
env: | |
PYSPARK_JARS_IVY: ${{ github.event.inputs.target_folder }}/ivy | |
run: | | |
echo "PYSPARK_JARS_IVY=${{ env.PYSPARK_JARS_IVY }}" | |
source /home/runner/.bashrc | |
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
if (!(Test-Path -Path "${{ env.PYSPARK_JARS_IVY }}/jars/spark-nlp-gpu-assembly-${{ github.event.inputs.sparknlp_version }}.jar")) { | |
curl -Lo "${{ env.PYSPARK_JARS_IVY }}/jars/spark-nlp${{ github.event.inputs.sparknlp_gpu }}-assembly-${{ github.event.inputs.sparknlp_version }}.jar" https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/jars/spark-nlp${{ github.event.inputs.sparknlp_gpu }}-assembly-${{ github.event.inputs.sparknlp_version }}.jar | |
} else { | |
echo "JAR already exists in ${{ env.PYSPARK_JARS_IVY }}/jars" | |
} | |
python -c "from pyspark.sql import SparkSession; spark = SparkSession.builder.appName('Spark NLP').master('local[*]').config('spark.jars.ivy', '${{ github.event.inputs.target_folder }}/ivy').config('spark.jars.packages', 'com.johnsnowlabs.nlp:spark-nlp${{ github.event.inputs.sparknlp_gpu }}_2.12:${{ github.event.inputs.sparknlp_version }}').getOrCreate(); spark.stop();" | |
- name: download spacy models | |
if: ${{ github.event.inputs.download_spacy_model == 'true' }} | |
run: | | |
source /home/runner/.bashrc | |
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
python -m spacy download en_core_web_sm | |
python -m spacy download en_core_web_md | |
# python -m spacy download en_core_web_trf this result tranformers version conflicts | |
- name: check folder | |
run: | | |
ls ${{ github.event.inputs.target_folder }} | |
pwd | |
ls ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
- name: check jar folder | |
if: ${{ github.event.inputs.download_jars == 'true' }} | |
run: | | |
ls ${{ github.event.inputs.target_folder }}/ivy | |
ls ${{ github.event.inputs.target_folder }}/ivy/jars | |
rm -R ${{ github.event.inputs.target_folder }}/ivy/cache | |
- name: Compress and split folder | |
if: ${{ github.event.inputs.zip_jar_only == 'false' }} | |
run: | | |
pwd | |
7z a -t7z -v${{ github.event.inputs.zip_vol_size }} zipped/${{ github.event.inputs.build_folder }}.7z ${{ github.event.inputs.target_folder }} | |
ls zipped | |
- name: Compress and split ivy folder | |
if: ${{ github.event.inputs.zip_jar_only == 'true' }} | |
run: | | |
pwd | |
7z a -t7z -v${{ github.event.inputs.zip_vol_size }} zipped/${{ github.event.inputs.build_folder }}_ivy.7z ${{ github.event.inputs.target_folder }}/ivy | |
ls zipped | |
source /home/runner/.bashrc | |
conda activate ${{ github.event.inputs.target_folder }}/${{ github.event.inputs.build_folder }} | |
conda env export -> zipped/detailed_environment.yml | |
- name: Upload compressed parts as artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ github.event.inputs.build_folder }} | |
path: zipped/*.7z.* | |
retention-days: ${{ github.event.inputs.retention_days }} | |
- name: Create Release | |
id: create_release | |
uses: actions/create-release@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: ${{ steps.tag.outputs.name }} | |
release_name: ${{ steps.tag.outputs.name }} | |
draft: false | |
prerelease: false | |
- name: Upload release | |
id: upload_release | |
uses: actions/github-script@v3 | |
with: | |
github-token: ${{secrets.GITHUB_TOKEN}} | |
script: | | |
const path = require('path'); | |
const fs = require('fs'); | |
// Retrieve necessary information | |
const release_id = `${{ steps.create_release.outputs.id }}`; | |
const release_url = `${{ steps.create_release.outputs.html_url }}`; | |
const buildFolder = `${{ github.event.inputs.build_folder }}`; | |
const descriptionFilePath = `${buildFolder}/README.MD`; | |
let releaseDescription = ''; | |
// Check for README file and load its content | |
if (fs.existsSync(descriptionFilePath)) { | |
releaseDescription = fs.readFileSync(descriptionFilePath, 'utf8'); | |
console.log("Successfully read release description from 'README.MD'"); | |
} else { | |
console.warn("README.MD not found in the specified build folder. Proceeding without a detailed description."); | |
} | |
// Construct the environment.yml GitHub file URL manually | |
const repoOwner = context.repo.owner; | |
const repoName = context.repo.repo; | |
const filePath = `${buildFolder}/environment.yml`; | |
const fileUrl = `https://github.com/${repoOwner}/${repoName}/blob/${{ steps.tag.outputs.name }}/${filePath}`; | |
console.log(`Constructed file URL: ${fileUrl}`); | |
// Append the file URL to the release description | |
releaseDescription += `\n\nView the corresponding [environment.yml](${fileUrl}) file used for this release.`; | |
releaseDescription += `\n\nTo use it, unzip the folders inside and put them under ${{ github.event.inputs.target_folder }}`; | |
// Update the release with the description | |
await github.repos.updateRelease({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
release_id, | |
body: releaseDescription, | |
}); | |
for (let file of await fs.readdirSync('./zipped')) { | |
console.log('uploadReleaseAsset', file); | |
await github.repos.uploadReleaseAsset({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
release_id: release_id, | |
name: file, | |
data: await fs.readFileSync(`zipped/${file}`) | |
}); | |
} | |