Skip to content

Commit

Permalink
added automation for deployment
Browse files Browse the repository at this point in the history
  • Loading branch information
vighnesh-wednesday committed Jan 2, 2024
1 parent f048ab3 commit a668567
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 80 deletions.
13 changes: 6 additions & 7 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ jobs:
with:
python-version: 3.9

- run: |
- name: Build App Wheel
run: |
pip install setuptools wheel
python3 setup.py bdist_wheel
# Step 1: Copy script to S3 bucket
- name: Copy script to S3 bucket
- name: Setup AWS cli & upload App Wheel to S3
uses: jakejarvis/[email protected]
with:
args: --follow-symlinks
Expand All @@ -36,11 +36,10 @@ jobs:
DEST_DIR: $S3_SCRIPTS_PATH
AWS_S3_BUCKET: $S3_BUCKET_NAME

- name: Upload Scripts to S3
run: aws s3 cp jobs "s3://$S3_BUCKET_NAME/$S3_SCRIPTS_PATH/" --recursive --region ap-south-1

- name: Upload Script file to S3
run: aws s3 cp ./main.py "s3://$S3_BUCKET_NAME/$S3_SCRIPTS_PATH/" --region ap-south-1

- name: Update parameters for the job
- name: Deploy Jobs on Glue
run: |
scripts/update-parameters.sh
scripts/update-job.sh
32 changes: 32 additions & 0 deletions automation/create_glue_job.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"Name": "samplename",
"Description": "",
"LogUri": "",
"Role": "samplerole",
"ExecutionProperty": {
"MaxConcurrentRuns": 1
},
"Command": {
"Name": "glueetl",
"ScriptLocation": "sample-location",
"PythonVersion": "3"
},
"DefaultArguments": {
"--enable-glue-datacatalog": "true",
"--job-bookmark-option": "job-bookmark-disable",
"--TempDir": "sample-bucket/Logs/temp/",
"--enable-metrics": "true",
"--extra-py-files": "sample-bucket/scripts/sample-wheel",
"--spark-event-logs-path": "sample-bucket/Logs/UILogs/",
"--enable-job-insights": "false",
"--additional-python-modules": "python-dotenv,kaggle",
"--enable-observability-metrics": "true",
"--enable-continuous-cloudwatch-log": "true",
"--job-language": "python"
},
"MaxRetries": 0,
"Timeout": 10,
"WorkerType": "G.1X",
"NumberOfWorkers": 2,
"GlueVersion": "4.0"
}
73 changes: 73 additions & 0 deletions automation/deploy_glue_job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash
s3_bucket="$1"
role="$2"
kaggle_key="$3"
kaggle_username="$4"

source ./app/.custom-env

job_names=$(aws glue get-jobs | jq -r '.Jobs | map(.Name)[]')

for file in jobs/*.py; do
filename=$(basename "$file" .py)

if [ "$filename" != "__init__" ]; then

if [[ $job_names != *"$filename"* ]]; then

jq --arg NAME "$filename" \
--arg SCRIPT_LOCATION "s3://$s3_bucket/scripts/$filename.py" \
--arg ROLE "$role" \
--arg TEMP_DIR "s3://$s3_bucket/Logs/temp/" \
--arg EVENT_LOG "s3://$s3_bucket/Logs/UILogs/" \
--arg WHEEL "s3://$s3_bucket/scripts/app-0.9-py3-none-any.whl" \
--arg KAGGLE_KEY "$kaggle_key" \
--arg KAGGLE_USERNAME "$kaggle_username" \
--arg GLUE_READ_PATH "$GLUE_READ_PATH" \
--arg GLUE_WRITE_PATH "$GLUE_WRITE_PATH" \
--arg KAGGLE_PATH "$KAGGLE_PATH" \
'.Name=$NAME |
.Command.ScriptLocation=$SCRIPT_LOCATION |
.Role=$ROLE |
.DefaultArguments["--TempDir"]=$TEMP_DIR |
.DefaultArguments["--spark-event-logs-path"]=$EVENT_LOG |
.DefaultArguments["--extra-py-files"]=$WHEEL |
.DefaultArguments["--KAGGLE_KEY"]=$KAGGLE_KEY |
.DefaultArguments["--KAGGLE_USERNAME"]=$KAGGLE_USERNAME |
.DefaultArguments["--GLUE_READ_PATH"] = $GLUE_READ_PATH |
.DefaultArguments["--GLUE_WRITE_PATH"] = $GLUE_WRITE_PATH |
.DefaultArguments["--KAGGLE_PATH"] = $KAGGLE_PATH' \
automation/create_glue_job.json > "automation/output_$filename.json"

aws glue create-job --cli-input-json file://"automation/output_$filename.json"

else

jq --arg NAME "$filename" \
--arg SCRIPT_LOCATION "s3://$s3_bucket/scripts/$filename.py" \
--arg ROLE "$role" \
--arg TEMP_DIR "s3://$s3_bucket/Logs/temp/" \
--arg EVENT_LOG "s3://$s3_bucket/Logs/UILogs/" \
--arg WHEEL "s3://$s3_bucket/scripts/app-0.9-py3-none-any.whl" \
--arg KAGGLE_KEY "$kaggle_key" \
--arg KAGGLE_USERNAME "$kaggle_username" \
--arg GLUE_READ_PATH "$GLUE_READ_PATH" \
--arg GLUE_WRITE_PATH "$GLUE_WRITE_PATH" \
--arg KAGGLE_PATH "$KAGGLE_PATH" \
'.JobName=$NAME |
.JobUpdate.Command.ScriptLocation=$SCRIPT_LOCATION |
.JobUpdate.Role=$ROLE |
.JobUpdate.DefaultArguments["--TempDir"]=$TEMP_DIR |
.JobUpdate.DefaultArguments["--spark-event-logs-path"]=$EVENT_LOG |
.JobUpdate.DefaultArguments["--extra-py-files"]=$WHEEL |
.JobUpdate.DefaultArguments["--KAGGLE_KEY"]=$KAGGLE_KEY |
.JobUpdate.DefaultArguments["--KAGGLE_USERNAME"]=$KAGGLE_USERNAME |
.JobUpdate.DefaultArguments["--GLUE_READ_PATH"] = $GLUE_READ_PATH |
.JobUpdate.DefaultArguments["--GLUE_WRITE_PATH"] = $GLUE_WRITE_PATH |
.JobUpdate.DefaultArguments["--KAGGLE_PATH"] = $KAGGLE_PATH' \
automation/update_glue_job.json > "automation/output_$filename.json"

aws glue update-job --cli-input-json file://"automation/output_$filename.json"
fi
fi
done
31 changes: 31 additions & 0 deletions automation/update_glue_job.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"JobName": "sample-name",
"JobUpdate": {
"Description": "",
"Role": "sample-role",
"ExecutionProperty": {
"MaxConcurrentRuns": 1
},
"Command": {
"Name": "glueetl",
"ScriptLocation": "sample-location",
"PythonVersion": "3"
},
"DefaultArguments": {
"--enable-glue-datacatalog": "true",
"--job-bookmark-option": "job-bookmark-enable",
"--TempDir": "s3://sample-bucket/scripts/temp/",
"--enable-metrics": "true",
"--enable-spark-ui": "true",
"--spark-event-logs-path": "s3://sample-bucket/Logs/UILogs/",
"--enable-job-insights": "true",
"--enable-continuous-cloudwatch-log": "true",
"--job-language": "python"
},
"MaxRetries": 0,
"Timeout": 10,
"WorkerType": "G.1X",
"NumberOfWorkers": 2,
"GlueVersion": "4.0"
}
}
Empty file added jobs/__init__.py
Empty file.
Empty file added jobs/demo.py
Empty file.
57 changes: 0 additions & 57 deletions scripts/update-job.sh

This file was deleted.

16 changes: 0 additions & 16 deletions scripts/update-parameters.sh

This file was deleted.

0 comments on commit a668567

Please sign in to comment.