generated from jupyter-naas/data-product-framework
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
311 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import yaml, os | ||
import pydash as _ | ||
|
||
template_str = """ | ||
name: CI/CD Workflow | ||
on: {} | ||
jobs: | ||
scheduler: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
- name: Log in to GitHub Container Registry | ||
uses: docker/login-action@v3 | ||
with: | ||
registry: ghcr.io | ||
username: ${{ github.actor }} | ||
password: ${{ secrets.GITHUB_TOKEN }} | ||
- name: Pull Docker image | ||
run: docker pull ghcr.io/jupyter-naas/abi/abi:latest | ||
# - name: Run Papermill | ||
# run: | | ||
# docker run --name abi-execution -i --platform linux/amd64 ghcr.io/jupyter-naas/abi/abi:latest ls | ||
# mkdir output | ||
# docker cp abi-execution:/app/__pipeline__.ipynb ./output/__pipeline__.ipynb | ||
# - name: Upload output artifacts | ||
# uses: actions/upload-artifact@v4 | ||
# with: | ||
# name: output-files | ||
# path: ./output | ||
""" | ||
|
||
def generate_schedulers(config : dict, template : str): | ||
for scheduler in config["schedulers"]: | ||
# Skip disabled schedulers | ||
if scheduler.get("enabled", False) is False: | ||
continue | ||
|
||
# Load template | ||
cicd = yaml.safe_load(template_str) | ||
del cicd[True] | ||
print(cicd) | ||
|
||
_.set_(cicd, "name", f"Scheduler - {scheduler['name']}") | ||
|
||
cicd["on"] = {"schedule": [{"cron": scheduler["cron"]}], "workflow_dispatch": {}} | ||
|
||
|
||
new_step = {} | ||
|
||
new_step['name'] = scheduler['name'] | ||
|
||
new_step['run'] = f""" | ||
# Generate unique id | ||
export SCHEDULER_ID=$(python -c "import uuid; print(uuid.uuid4())") | ||
# Execute the Scheduler script | ||
docker run --name $SCHEDULER_ID -i --platform linux/amd64 ghcr.io/jupyter-naas/abi/abi:latest python .github/scripts/run_scheduler.py "{scheduler['name']}" | ||
# Create the output directory that will be used to store the output files and save them as artifacts. | ||
mkdir -p outputs/ | ||
# Copy the output files from the container to the host. | ||
docker cp $SCHEDULER_ID:/app/outputs ./outputs/ | ||
""" | ||
|
||
# Append the new step to the steps list | ||
cicd["jobs"]["scheduler"]["steps"].append(new_step) | ||
|
||
cicd["jobs"]["scheduler"]["steps"].append({ | ||
'name': 'Upload output artifacts', | ||
'uses': 'actions/upload-artifact@v4', | ||
'with': { | ||
'name': 'output-files', | ||
'path': './outputs' | ||
} | ||
}) | ||
|
||
# Write to file. | ||
# Make sure scheduler name is a valid filename. | ||
scheduler_name = scheduler["name"].replace(" ", "_").lower() | ||
yaml.dump(cicd, open(os.path.join('.github/workflows', f'scheduler__{scheduler_name}.yaml'), "w")) | ||
|
||
if __name__ == "__main__": | ||
with open("config.yml", "r") as file: | ||
config = yaml.safe_load(file) | ||
|
||
generate_schedulers(config, template_str) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#!/usr/bin/env python | ||
|
||
# This script is used to run a scheduler defined in config.yml | ||
|
||
import os | ||
import sys | ||
|
||
import papermill | ||
import yaml | ||
import re | ||
|
||
class SchedulerNotFoundError(Exception): | ||
pass | ||
|
||
|
||
class UnknownStepTypeError(Exception): | ||
pass | ||
|
||
# Backing up environment variables. | ||
environment_vars_backup: dict[str, str] = os.environ.copy() | ||
|
||
|
||
def sanitize_string_to_filename(filename): | ||
# Remove invalid characters | ||
filename = re.sub(r'[\\/*?:"<>|]', '', filename) | ||
|
||
# Replace spaces with underscores | ||
filename = filename.replace(' ', '_') | ||
|
||
# Remove leading/trailing whitespace | ||
filename = filename.strip() | ||
|
||
# Ensure filename doesn't exceed the max length | ||
max_filename_length = 255 | ||
if len(filename) > max_filename_length: | ||
filename = filename[:max_filename_length] | ||
|
||
return filename.lower() | ||
|
||
def get_scheduler(scheduler_name: str): | ||
with open("config.yml", "r") as file: | ||
config = yaml.safe_load(file) | ||
|
||
for scheduler in config["schedulers"]: | ||
if scheduler["name"] == scheduler_name: | ||
return scheduler | ||
|
||
raise SchedulerNotFoundError( | ||
f"Scheduler '{scheduler_name}' not found in config.yml" | ||
) | ||
|
||
|
||
def reset_environment_vars(): | ||
os.environ.clear() | ||
os.environ.update(environment_vars_backup) | ||
|
||
|
||
def run_notebook_step(scheduler_name: str, step: dict): | ||
reset_environment_vars() | ||
|
||
if "environment_variables" in step: | ||
for key, value in step["environment_variables"].items(): | ||
os.environ[key] = value | ||
|
||
entrypoint_path = '/'.join(step["entrypoint"].split('/')[:-1]) | ||
notebook_name = step["entrypoint"].split('/')[-1] | ||
|
||
output_path = os.path.join(f"outputs/scheduler_executions/{sanitize_string_to_filename(scheduler_name)}/{sanitize_string_to_filename(step['name'])}", entrypoint_path) | ||
os.makedirs(output_path, exist_ok=True) | ||
|
||
papermill.execute_notebook( | ||
input_path=step["entrypoint"], | ||
output_path=os.path.join(output_path, notebook_name), | ||
parameters=step.get("inputs", {}), | ||
) | ||
|
||
def run_scheduler(scheduler_name: str): | ||
scheduler = get_scheduler(scheduler_name) | ||
|
||
for step in scheduler["steps"]: | ||
if step.get("enabled", False) is False: | ||
continue | ||
|
||
if step.get("type") == "notebook": | ||
run_notebook_step(scheduler_name, step) | ||
else: | ||
raise UnknownStepTypeError(f"Unknown step type: {step.get('type')}") | ||
|
||
|
||
if __name__ == "__main__": | ||
scheduler_name = sys.argv[1] | ||
run_scheduler(scheduler_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import json | ||
import yaml | ||
from jsonschema import validate, ValidationError | ||
|
||
if len(sys.argv) != 3: | ||
print("Usage: python validate_jsonschema_yaml.py <schemafile> <datafile>") | ||
sys.exit(1) | ||
|
||
schema_file = sys.argv[1] | ||
data_file = sys.argv[2] | ||
|
||
# Load JSON schema | ||
with open(schema_file) as f: | ||
schema = json.load(f) | ||
|
||
# Load YAML data | ||
with open(data_file) as f: | ||
data = yaml.safe_load(f) | ||
|
||
# Validate | ||
try: | ||
validate(instance=data, schema=schema) | ||
except ValidationError as e: | ||
print(e) | ||
sys.exit(1) | ||
|
||
print("YAML data is valid") | ||
sys.exit(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
{ | ||
"python.defaultInterpreterPath": ".abi-config/bin/python" | ||
"python.defaultInterpreterPath": ".abi-config/bin/python", | ||
"conventionalCommits.scopes": [ | ||
"CI/CD" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters