Skip to content

feat(datasets): add synthetic data generation pipeline #378

feat(datasets): add synthetic data generation pipeline

feat(datasets): add synthetic data generation pipeline #378

Workflow file for this run

name: Continuous Integration
on:
push:
branches: [main, master]
pull_request:
jobs:
lints:
name: Run linters
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
checks: write
pull-requests: write
contents: read
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
version: "0.4.20"
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Cache pre-commit
uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Install Dependencies
run: source ./setup_dev_env.sh
- name: Run pre-commit checks
run: |
source .venv/bin/activate
pre-commit run --all-files --show-diff-on-failure --color always
- name: Run ruff formatter
run: uvx ruff format --check
- name: Run ruff linter
run: uvx ruff check
- name: Run mypy
run: uv run mypy .
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
env:
TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2,public.ecr.aws/aquasecurity/trivy-db:2
with:
scan-type: "fs"
ignore-unfixed: true
exit-code: 0 # change if you want to fail build on vulnerabilities
severity: "CRITICAL,HIGH,MEDIUM"
format: "table"
output: "trivy-scanning-results.txt"
- name: Format trivy message
run: |
echo "Trivy scanning results." >> trivy.txt
cat trivy-scanning-results.txt >> trivy.txt
- name: Add trivy report to PR
uses: thollander/actions-comment-pull-request@v2
continue-on-error: true
if: ${{ github.event_name == 'pull_request' }}
with:
filePath: trivy.txt
reactions: ""
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
comment_tag: trivy
- name: Check licenses
run: |
source .venv/bin/activate
./check_licenses.sh
- name: Generate pip freeze
run: |
source .venv/bin/activate
pip freeze > requirements-freeze.txt
- name: Publish Artefacts
uses: actions/upload-artifact@v3
if: always()
continue-on-error: true
with:
name: results
path: |
requirements-freeze.txt
licenses.txt
trivy-scanning-results.txt
retention-days: 30
- name: Publish Test Report
uses: actions/upload-artifact@v3
if: always()
continue-on-error: true
with:
name: test-report
path: report.xml
retention-days: 10
- name: Validate package build
run: |
source .venv/bin/activate
for dir in packages/*/; do uv build "$dir" --out-dir dist; done
- name: Publish Package
uses: actions/upload-artifact@v3
continue-on-error: true
if: success()
with:
name: packages
path: dist/**
retention-days: 3
tests:
name: Run tests
runs-on: ubuntu-latest
timeout-minutes: 15
permissions:
checks: write
pull-requests: write
contents: write # required for advanced coverage reporting (to keep branch)
strategy:
fail-fast: false # do not stop all jobs if one fails
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
version: "0.4.20"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Cache Dependencies
uses: actions/cache@v3
with:
path: ~/.cache/uv
key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install Dependencies
run: source ./setup_dev_env.sh
- name: Run Tests With Coverage
run: |
# run with coverage to not execute tests twice
source .venv/bin/activate
coverage run -m pytest -v -p no:warnings --junitxml=report.xml
coverage report
coverage xml
- name: Test Report
uses: mikepenz/action-junit-report@v4
continue-on-error: true
if: always()
with:
report_paths: 'report.xml'
- name: Publish Test Report
uses: actions/upload-artifact@v3
continue-on-error: true
if: always()
with:
name: test-report
path: report.xml
retention-days: 10
# simpler version for code coverage reporting
# - name: Produce Coverage report
# uses: 5monkeys/cobertura-action@v13
# continue-on-error: true
# with:
# path: coverage.xml
# minimum_coverage: 70
# fail_below_threshold: false
# more complex version for better coverage reporting
- name: Produce the coverage report
uses: insightsengineering/coverage-action@v2
continue-on-error: true
with:
# Path to the Cobertura XML report.
path: coverage.xml
# Minimum total coverage, if you want to the
# workflow to enforce it as a standard.
# This has no effect if the `fail` arg is set to `false`.
threshold: 60
# Fail the workflow if the minimum code coverage
# reuqirements are not satisfied.
fail: false
# Publish the rendered output as a PR comment
publish: true
# Create a coverage diff report.
diff: true
# Branch to diff against.
# Compare the current coverage to the coverage
# determined on this branch.
diff-branch: ${{ github.event.repository.default_branch }}
# make report togglable
togglable-report: true
# This is where the coverage reports for the
# `diff-branch` are stored.
# Branch is created if it doesn't already exist'.
diff-storage: _xml_coverage_reports
# A custom title that can be added to the code
# coverage summary in the PR comment.
coverage-summary-title: "Code Coverage Summary"