diff --git a/.github/actions/setup_environment/action.yml b/.github/actions/setup_environment/action.yml index d5b70818292..29bd5f7bc98 100644 --- a/.github/actions/setup_environment/action.yml +++ b/.github/actions/setup_environment/action.yml @@ -30,7 +30,7 @@ runs: using: composite steps: - name: Set up Python - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: ${{ inputs.python-version }} - name: Delete error-causing bash diff --git a/.github/workflows/ci-fast.yml b/.github/workflows/ci-fast.yml index 40eae8107ce..e0c26841e9e 100644 --- a/.github/workflows/ci-fast.yml +++ b/.github/workflows/ci-fast.yml @@ -16,12 +16,12 @@ concurrency: jobs: docstring-check: if: github.event.pull_request.draft == false - runs-on: arc-runner-set + runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: Set up Python - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.10' - name: Install darglint using uv @@ -32,7 +32,7 @@ jobs: - name: Check docstrings run: bash scripts/docstring.sh sqlite-db-migration-testing-random: - runs-on: arc-runner-set + runs-on: ubuntu-latest env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true @@ -40,11 +40,11 @@ jobs: if: github.event.pull_request.draft == false || github.event_name == 'workflow_dispatch' steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Test migrations across versions @@ -54,9 +54,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: Spelling checker - uses: crate-ci/typos@v1.17.0 + uses: crate-ci/typos@v1.27.0 with: files: . config: ./.typos.toml @@ -65,9 +65,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: Set up Python 3.11 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.11' - name: Test API docs buildable @@ -82,7 +82,20 @@ jobs: python-version: '3.9' os: ubuntu-latest secrets: inherit - custom-ubuntu-setup-and-unit-test: + linting: + if: github.event.pull_request.draft == false + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.11'] + fail-fast: false + uses: ./.github/workflows/linting.yml + with: + python-version: ${{ matrix.python-version }} + os: ${{ matrix.os }} + secrets: inherit + ubuntu-setup-and-unit-test: + needs: linting if: github.event.pull_request.draft == false strategy: matrix: @@ -90,25 +103,25 @@ jobs: # and `3.10` in our `ci-fast` workflow, this combination has been # excluded from the `ci-slow` workflow. If you change the configuration # here, please adjust the configuration of `ci-slow` accordingly. - os: [arc-runner-set] - python-version: ['3.10'] + os: [ubuntu-latest] + python-version: ['3.11'] fail-fast: false uses: ./.github/workflows/unit-test.yml with: python-version: ${{ matrix.python-version }} os: ${{ matrix.os }} secrets: inherit - custom-arc-runner-set-integration-test: + ubuntu-latest-integration-test: + needs: [linting] if: github.event.pull_request.draft == false strategy: matrix: - # IMPORTANT: Since we are using the combinations of `arc-runner-set` - # `3.10` and two different test environments in our `ci-fast` workflow, - # these combination have been excluded from the `ci-slow` workflow. - # If you change the configuration here, please adjust the configuration - # of `ci-slow` accordingly. - os: [arc-runner-set] - python-version: ['3.10'] + # IMPORTANT: Since we are using the combination of `arc-runner-set` + # and `3.10` in our `ci-fast` workflow, this combination has been + # excluded from the `ci-slow` workflow. If you change the configuration + # here, please adjust the configuration of `ci-slow` accordingly. + os: [ubuntu-latest] + python-version: ['3.11'] test_environment: [default, docker-server-docker-orchestrator-mysql] fail-fast: false uses: ./.github/workflows/integration-test-fast.yml diff --git a/.github/workflows/ci-slow.yml b/.github/workflows/ci-slow.yml index 7bca266e494..59a367ad559 100644 --- a/.github/workflows/ci-slow.yml +++ b/.github/workflows/ci-slow.yml @@ -23,7 +23,7 @@ jobs: # With dynamic approach dev can set label and rerun this flow to make it running. - name: Get PR labels id: pr-labels - uses: actions/github-script@v5 + uses: actions/github-script@v7.0.1 with: script: | const prNumber = ${{ github.event.pull_request.number }}; @@ -45,13 +45,13 @@ jobs: env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true - runs-on: arc-runner-set + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Login to Docker Hub @@ -68,13 +68,13 @@ jobs: env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true - runs-on: arc-runner-set + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Login to Docker Hub @@ -87,18 +87,18 @@ jobs: run: bash scripts/test-migrations.sh mysql random sqlite-db-migration-testing-full: needs: run-slow-ci-label-is-set - runs-on: arc-runner-set + runs-on: ubuntu-latest env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true if: github.event.pull_request.draft == false steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Test migrations across versions @@ -109,13 +109,13 @@ jobs: env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true - runs-on: arc-runner-set + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Login to Docker Hub @@ -131,9 +131,9 @@ jobs: needs: run-slow-ci-label-is-set runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 - name: Set up Python - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.11' - name: Install uv @@ -171,28 +171,48 @@ jobs: bash scripts/check-alembic-branches.sh - name: Install latest dashboard (test gitignore) run: bash scripts/install-dashboard.sh - custom-ubuntu-unit-test: + ubuntu-linting: + needs: run-slow-ci-label-is-set + if: github.event.pull_request.draft == false + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.9', '3.10', '3.12'] + fail-fast: false + uses: ./.github/workflows/linting.yml + with: + python-version: ${{ matrix.python-version }} + os: ${{ matrix.os }} + secrets: inherit + ubuntu-unit-test: if: github.event.pull_request.draft == false + needs: [run-slow-ci-label-is-set, ubuntu-linting] + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.9', '3.10', '3.12'] + fail-fast: false + uses: ./.github/workflows/unit-test.yml + with: + python-version: ${{ matrix.python-version }} + os: ${{ matrix.os }} + secrets: inherit + windows-linting: needs: run-slow-ci-label-is-set + if: github.event.pull_request.draft == false strategy: matrix: - os: [arc-runner-set] + os: [windows-latest] python-version: ['3.9', '3.10', '3.11', '3.12'] - # IMPORTANT: Since we are using the following combination - # in our `ci-fast` workflow, this combination has been - # excluded from the `ci-slow` workflow. - exclude: - - os: arc-runner-set - python-version: '3.10' fail-fast: false - uses: ./.github/workflows/unit-test.yml + uses: ./.github/workflows/linting.yml with: python-version: ${{ matrix.python-version }} os: ${{ matrix.os }} secrets: inherit windows-unit-test: if: github.event.pull_request.draft == false - needs: run-slow-ci-label-is-set + needs: [run-slow-ci-label-is-set, windows-linting] strategy: matrix: os: [windows-latest] @@ -203,9 +223,22 @@ jobs: python-version: ${{ matrix.python-version }} os: ${{ matrix.os }} secrets: inherit + macos-linting: + needs: run-slow-ci-label-is-set + if: github.event.pull_request.draft == false + strategy: + matrix: + os: [macos-latest] + python-version: ['3.9', '3.10', '3.11', '3.12'] + fail-fast: false + uses: ./.github/workflows/linting.yml + with: + python-version: ${{ matrix.python-version }} + os: ${{ matrix.os }} + secrets: inherit macos-unit-test: if: github.event.pull_request.draft == false - needs: run-slow-ci-label-is-set + needs: [run-slow-ci-label-is-set, macos-linting] strategy: matrix: os: [macos-latest] @@ -224,7 +257,7 @@ jobs: secrets: inherit windows-integration-test: if: github.event.pull_request.draft == false - needs: run-slow-ci-label-is-set + needs: [run-slow-ci-label-is-set, windows-unit-test] strategy: matrix: os: [windows-latest] @@ -239,7 +272,7 @@ jobs: secrets: inherit macos-integration-test: if: github.event.pull_request.draft == false - needs: run-slow-ci-label-is-set + needs: [run-slow-ci-label-is-set, macos-unit-test] strategy: matrix: os: [macos-13] @@ -258,13 +291,13 @@ jobs: python-version: ${{ matrix.python-version }} test_environment: ${{ matrix.test_environment }} secrets: inherit - custom-ubuntu-integration-test: + ubuntu-latest-integration-test: if: github.event.pull_request.draft == false - needs: run-slow-ci-label-is-set + needs: [run-slow-ci-label-is-set, ubuntu-unit-test] strategy: matrix: - os: [arc-runner-set] - python-version: ['3.9', '3.10', '3.11', '3.12'] + os: [ubuntu-latest] + python-version: ['3.9', '3.10', '3.12'] test_environment: - default - docker-server-docker-orchestrator-mysql @@ -272,26 +305,17 @@ jobs: exclude: # docker is time-consuming to run, so we only run it on 3.9 - test_environment: docker-server-docker-orchestrator-mysql - python-version: '3.10' + python-version: '3.9' - test_environment: docker-server-docker-orchestrator-mysql - python-version: '3.11' + python-version: '3.10' - test_environment: docker-server-docker-orchestrator-mysql python-version: '3.12' - test_environment: docker-server-docker-orchestrator-mariadb - python-version: '3.10' + python-version: '3.9' - test_environment: docker-server-docker-orchestrator-mariadb - python-version: '3.11' + python-version: '3.10' - test_environment: docker-server-docker-orchestrator-mariadb python-version: '3.12' - # IMPORTANT: Since we are using the following combinations - # in our `ci-fast` workflow, this combination has been - # excluded from the `ci-slow` workflow. - - os: arc-runner-set - test_environment: default - python-version: '3.10' - - os: arc-runner-set - test_environment: docker-server-docker-orchestrator-mysql - python-version: '3.10' fail-fast: false uses: ./.github/workflows/integration-test-slow.yml with: diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 1decfe4f6cf..c52b8fb71cd 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -23,7 +23,7 @@ jobs: language: [python] steps: - name: Checkout repository - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: diff --git a/.github/workflows/generate-test-duration.yml b/.github/workflows/generate-test-duration.yml index a4c441c1bf9..36d220836ab 100644 --- a/.github/workflows/generate-test-duration.yml +++ b/.github/workflows/generate-test-duration.yml @@ -7,7 +7,7 @@ on: jobs: generate-test-duration-file: name: Generate test duration file - runs-on: arc-runner-set + runs-on: ubuntu-latest strategy: fail-fast: false env: @@ -23,7 +23,7 @@ jobs: run: shell: bash steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: develop - name: Setup environment @@ -31,7 +31,7 @@ jobs: with: cache_version: ${{ secrets.GH_ACTIONS_CACHE_KEY }} python-version: '3.10' - os: arc-runner-set + os: ubuntu-latest runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }} runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }} discord_webhook: ${{ secrets.DISCORD_WEBHOOK }} diff --git a/.github/workflows/image-optimiser.yml b/.github/workflows/image-optimiser.yml index c64ca19ddc3..a7a5c560d8e 100644 --- a/.github/workflows/image-optimiser.yml +++ b/.github/workflows/image-optimiser.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: Compress Images uses: calibreapp/image-actions@1.1.0 with: diff --git a/.github/workflows/integration-test-fast-services.yml b/.github/workflows/integration-test-fast-services.yml new file mode 100644 index 00000000000..63a0b7a3b99 --- /dev/null +++ b/.github/workflows/integration-test-fast-services.yml @@ -0,0 +1,271 @@ +--- +name: Integration Tests (Fast CI) +on: + workflow_call: + inputs: + os: + description: OS + type: string + required: true + python-version: + description: Python version + type: string + required: true + test_environment: + description: The test environment + type: string + required: true + enable_tmate: + description: Enable tmate session for debugging + type: string + required: false + default: never + tmate_timeout: + description: Timeout for tmate session (minutes) + type: number + required: false + default: 30 + workflow_dispatch: + inputs: + os: + description: OS + type: choice + options: [ubuntu-latest, macos-13, windows-latest] + required: false + default: ubuntu-latest + python-version: + description: Python version + type: choice + options: ['3.9', '3.10', '3.11', '3.12'] + required: false + default: '3.11' + test_environment: + description: The test environment + type: choice + options: + # Default ZenML deployments + - default + - default-docker-orchestrator + - default-airflow-orchestrator + # Local ZenML server deployments + - local-server + - local-server-docker-orchestrator + - local-server-airflow-orchestrator + # Local ZenML docker-compose server deployments + - docker-server-mysql + - docker-server-mariadb + - docker-server-docker-orchestrator-mysql + - docker-server-docker-orchestrator-mariadb + - docker-server-airflow-orchestrator-mysql + - docker-server-airflow-orchestrator-mariadb + - github-actions-server-docker-orchestrator + required: false + default: default + enable_tmate: + description: Enable tmate session for debugging + type: choice + options: [no, on-failure, always, before-tests] + required: false + default: 'no' + tmate_timeout: + description: Timeout for tmate session (minutes) + type: number + required: false + default: 30 +jobs: + integration-tests-fast: + name: integration-tests-fast + runs-on: ${{ inputs.os }} + strategy: + fail-fast: false + matrix: + shard: [1, 2, 3, 4, 5, 6] + env: + ZENML_DEBUG: 1 + ZENML_ANALYTICS_OPT_IN: false + PYTHONIOENCODING: utf-8 + UV_HTTP_TIMEOUT: 600 + # on MAC OS, we need to set this environment variable + # to fix problems with the fork() calls (see this thread + # for more information: http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html) + OBJC_DISABLE_INITIALIZE_FORK_SAFETY: 'YES' + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_US_EAST_1_ENV_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_US_EAST_1_ENV_SECRET_ACCESS_KEY }} + AWS_US_EAST_1_SERVER_URL: ${{ secrets.AWS_US_EAST_1_SERVER_URL }} + AWS_US_EAST_1_SERVER_USERNAME: ${{ secrets.AWS_US_EAST_1_SERVER_USERNAME }} + AWS_US_EAST_1_SERVER_PASSWORD: ${{ secrets.AWS_US_EAST_1_SERVER_PASSWORD }} + GCP_US_EAST4_SERVER_URL: ${{ secrets.GCP_US_EAST4_SERVER_URL }} + GCP_US_EAST4_SERVER_USERNAME: ${{ secrets.GCP_US_EAST4_SERVER_USERNAME }} + GCP_US_EAST4_SERVER_PASSWORD: ${{ secrets.GCP_US_EAST4_SERVER_PASSWORD }} + if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') }} + defaults: + run: + shell: bash + steps: + - uses: actions/checkout@v4.2.2 + - name: Restore uv cache + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} + restore-keys: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ secrets.AWS_US_EAST_1_ENV_ROLE_ARN }} + aws-region: us-east-1 + if: contains(inputs.test_environment, 'aws') + - name: Configure GCP credentials + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_US_EAST4_ENV_CREDENTIALS }} + if: contains(inputs.test_environment, 'gcp') + - name: Set up gcloud SDK + uses: google-github-actions/setup-gcloud@v1 + with: + install_components: gke-gcloud-auth-plugin + if: contains(inputs.test_environment, 'gcp') + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + if: github.event.pull_request.head.repo.fork == false && (contains(inputs.test_environment, + 'docker') || contains(inputs.test_environment, 'kubeflow') || contains(inputs.test_environment, + 'airflow') || contains(inputs.test_environment, 'kubernetes')) + - name: Setup environment + uses: ./.github/actions/setup_environment + with: + cache_version: ${{ secrets.GH_ACTIONS_CACHE_KEY }} + python-version: ${{ inputs.python-version }} + os: ${{ inputs.os }} + runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }} + runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }} + discord_webhook: ${{ secrets.DISCORD_WEBHOOK }} + - name: Install docker-compose for non-default environments + if: inputs.test_environment != 'default' + run: | + pip install uv + # see https://github.com/docker/docker-py/issues/3256 for why we need to pin requests + # docker-compose is deprecated and doesn't work with newer versions of docker + uv pip install --system "pyyaml==5.3.1" "requests<2.32.0" "docker==6.1.3" docker-compose + - name: Install Linux System Dependencies + if: (inputs.os == 'ubuntu-latest' || inputs.os == 'arc-runner-set') + run: sudo apt install graphviz + - name: Install MacOS System Dependencies + if: runner.os=='macOS' + run: brew install graphviz + - name: Install Windows System Dependencies + if: runner.os=='Windows' + run: choco install graphviz + - name: Unbreak python in github actions + if: runner.os=='macOS' + # github actions overwrites brew's python. Force it to reassert itself, by + # running in a separate step. + # Workaround GitHub Actions Python issues + # see https://github.com/Homebrew/homebrew-core/issues/165793#issuecomment-1989441193 + run: | + find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete + sudo rm -rf /Library/Frameworks/Python.framework/ + brew install --force python3 && brew unlink python3 && brew unlink python3 && brew link --overwrite python3 + - name: Install Docker and Colima on MacOS + if: runner.os=='macOS' + run: | + export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 + brew update + brew install docker colima + brew reinstall --force qemu + + # We need to mount the /private/tmp/zenml-test/ folder because + # this folder is also mounted in the Docker containers that are + # started by local ZenML orchestrators. + colima start --mount /private/tmp/zenml-test/:w + + # This is required for the Docker Python SDK to work + sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock + - name: Install kubectl on Linux + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + sudo install -o root -g 0 -m 0755 kubectl /usr/local/bin/kubectl + if: (inputs.os == 'ubuntu-latest' || inputs.os == 'arc-runner-set') + - name: Install kubectl on MacOS + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/darwin/amd64/kubectl" + sudo install -o root -g 0 -m 0755 kubectl /usr/local/bin/kubectl + if: runner.os=='macOS' + - name: Install K3D + run: | + curl -s https://raw.githubusercontent.com/rancher/k3d/main/install.sh | bash + if: runner.os!='Windows' && contains(inputs.test_environment, 'kubeflow') + - name: Login to Amazon ECR + id: login-ecr + run: | + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 715803424590.dkr.ecr.us-east-1.amazonaws.com + if: contains(inputs.test_environment, 'aws') + - name: Login to Amazon EKS + id: login-eks + run: | + aws eks --region us-east-1 update-kubeconfig --name zenml-ci-cluster --alias zenml-ci-aws-us-east-1 + if: contains(inputs.test_environment, 'aws') + - name: Login to Google ECR + run: | + gcloud auth configure-docker --project zenml-ci + if: contains(inputs.test_environment, 'gcp') + - name: Login to Google GKE + uses: google-github-actions/get-gke-credentials@v2 + with: + cluster_name: zenml-ci-cluster + location: us-east4 + project_id: zenml-ci + if: contains(inputs.test_environment, 'gcp') + - name: Setup tmate session before tests + if: ${{ inputs.enable_tmate == 'before-tests' }} + uses: mxschmitt/action-tmate@v3.17 + timeout-minutes: ${{ inputs.tmate_timeout }} + - name: Sharded Integration Tests (Ubuntu) - Fast CI + # Ubuntu integration tests run as 6 shards + if: runner.os != 'macOS' && runner.os != 'Windows' + run: | + bash scripts/test-coverage-xml.sh integration ${{ inputs.test_environment }} 6 ${{ matrix.shard }} + - name: Setup tmate session after tests + if: ${{ inputs.enable_tmate == 'always' || (inputs.enable_tmate == 'on-failure' && failure()) }} + uses: mxschmitt/action-tmate@v3.17 + timeout-minutes: ${{ inputs.tmate_timeout }} + - name: Verify Python Env unaffected + run: |- + zenml integration list + uv pip list + uv pip check || true + services: + mysql: + image: mysql:5.7 + env: + MYSQL_ROOT_PASSWORD: zenml + MYSQL_DATABASE: zenml + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + zenml-server: + image: ghcr.io/${{ github.repository_owner }}/zenml-server-github-actions:${{ + github.sha }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.github_token }} + env: + ZENML_STORE_URL: mysql://root:zenml@mysql:3306/zenml + ZENML_SERVER_DEPLOYMENT_TYPE: docker + ZENML_SERVER_AUTO_ACTIVATE: 'True' + ZENML_SERVER_AUTO_CREATE_DEFAULT_USER: 'True' + ports: + - 8080:8080 + options: >- + --health-cmd="curl -f http://127.0.0.1:8080/health" + --health-interval=10s + --health-timeout=5s + --health-retries=3 diff --git a/.github/workflows/integration-test-fast.yml b/.github/workflows/integration-test-fast.yml index cc1e97eaaa9..cc2c661d18c 100644 --- a/.github/workflows/integration-test-fast.yml +++ b/.github/workflows/integration-test-fast.yml @@ -30,7 +30,7 @@ on: os: description: OS type: choice - options: [ubuntu-latest, macos-13, windows-latest, arc-runner-set] + options: [ubuntu-latest, macos-13, windows-latest] required: false default: ubuntu-latest python-version: @@ -38,7 +38,7 @@ on: type: choice options: ['3.9', '3.10', '3.11', '3.12'] required: false - default: '3.9' + default: '3.11' test_environment: description: The test environment type: choice @@ -58,6 +58,7 @@ on: - docker-server-docker-orchestrator-mariadb - docker-server-airflow-orchestrator-mysql - docker-server-airflow-orchestrator-mariadb + - github-actions-server-docker-orchestrator required: false default: default enable_tmate: @@ -78,7 +79,7 @@ jobs: strategy: fail-fast: false matrix: - shard: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + shard: [1, 2, 3, 4, 5, 6] env: ZENML_DEBUG: 1 ZENML_ANALYTICS_OPT_IN: false @@ -118,7 +119,15 @@ jobs: if: inputs.os == 'ubuntu-latest' && (contains(inputs.test_environment, 'docker') || contains(inputs.test_environment, 'kubeflow') || contains(inputs.test_environment, 'airflow') || contains(inputs.test_environment, 'kubernetes')) - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 + - name: Restore uv cache + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} + restore-keys: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v1 with: @@ -236,7 +245,7 @@ jobs: # Ubuntu integration tests run as 6 shards if: runner.os != 'macOS' && runner.os != 'Windows' run: | - bash scripts/test-coverage-xml.sh integration ${{ inputs.test_environment }} 12 ${{ matrix.shard }} + bash scripts/test-coverage-xml.sh integration ${{ inputs.test_environment }} 6 ${{ matrix.shard }} - name: Setup tmate session after tests if: ${{ inputs.enable_tmate == 'always' || (inputs.enable_tmate == 'on-failure' && failure()) }} uses: mxschmitt/action-tmate@v3.17 diff --git a/.github/workflows/integration-test-slow-services.yml b/.github/workflows/integration-test-slow-services.yml new file mode 100644 index 00000000000..f6ae3725bce --- /dev/null +++ b/.github/workflows/integration-test-slow-services.yml @@ -0,0 +1,260 @@ +--- +name: Integration Tests (Slow CI) +on: + workflow_call: + inputs: + os: + description: OS + type: string + required: true + python-version: + description: Python version + type: string + required: true + test_environment: + description: The test environment + type: string + required: true + enable_tmate: + description: Enable tmate session for debugging + type: string + required: false + default: never + tmate_timeout: + description: Timeout for tmate session (minutes) + type: number + required: false + default: 30 + workflow_dispatch: + inputs: + os: + description: OS + type: choice + options: [ubuntu-latest, macos-13, windows-latest] + required: false + default: ubuntu-latest + python-version: + description: Python version + type: choice + options: ['3.9', '3.10', '3.11', '3.12'] + required: false + default: '3.11' + test_environment: + description: The test environment + type: choice + options: + # Default ZenML deployments + - default + - default-docker-orchestrator + - default-airflow-orchestrator + # Local ZenML server deployments + - local-server + - local-server-docker-orchestrator + - local-server-airflow-orchestrator + # Local ZenML docker-compose server deployments + - docker-server-mysql + - docker-server-mariadb + - docker-server-docker-orchestrator-mysql + - docker-server-docker-orchestrator-mariadb + - docker-server-airflow-orchestrator-mysql + - docker-server-airflow-orchestrator-mariadb + required: false + default: default + enable_tmate: + description: Enable tmate session for debugging + type: choice + options: [no, on-failure, always, before-tests] + required: false + default: 'no' + tmate_timeout: + description: Timeout for tmate session (minutes) + type: number + required: false + default: 30 +jobs: + integration-tests-slow: + name: integration-tests-slow + runs-on: ${{ inputs.os }} + strategy: + fail-fast: false + env: + ZENML_DEBUG: 1 + ZENML_ANALYTICS_OPT_IN: false + PYTHONIOENCODING: utf-8 + UV_HTTP_TIMEOUT: 600 + # on MAC OS, we need to set this environment variable + # to fix problems with the fork() calls (see this thread + # for more information: http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html) + OBJC_DISABLE_INITIALIZE_FORK_SAFETY: 'YES' + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_US_EAST_1_ENV_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_US_EAST_1_ENV_SECRET_ACCESS_KEY }} + AWS_US_EAST_1_SERVER_URL: ${{ secrets.AWS_US_EAST_1_SERVER_URL }} + AWS_US_EAST_1_SERVER_USERNAME: ${{ secrets.AWS_US_EAST_1_SERVER_USERNAME }} + AWS_US_EAST_1_SERVER_PASSWORD: ${{ secrets.AWS_US_EAST_1_SERVER_PASSWORD }} + GCP_US_EAST4_SERVER_URL: ${{ secrets.GCP_US_EAST4_SERVER_URL }} + GCP_US_EAST4_SERVER_USERNAME: ${{ secrets.GCP_US_EAST4_SERVER_USERNAME }} + GCP_US_EAST4_SERVER_PASSWORD: ${{ secrets.GCP_US_EAST4_SERVER_PASSWORD }} + # TODO: add Windows testing for Python 3.11 and 3.12 back in + # TODO: add macos testing back in + if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.11') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.12') && ! (inputs.os == 'macos-13' || inputs.os == 'macos-latest') }} + defaults: + run: + shell: bash + steps: + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + if: github.event.pull_request.head.repo.fork == false && (contains(inputs.test_environment, + 'docker') || contains(inputs.test_environment, 'kubeflow') || contains(inputs.test_environment, + 'airflow') || contains(inputs.test_environment, 'kubernetes')) + - uses: actions/checkout@v4.2.2 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ secrets.AWS_US_EAST_1_ENV_ROLE_ARN }} + aws-region: us-east-1 + if: contains(inputs.test_environment, 'aws') + - name: Configure GCP credentials + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_US_EAST4_ENV_CREDENTIALS }} + if: contains(inputs.test_environment, 'gcp') + - name: Set up gcloud SDK + uses: google-github-actions/setup-gcloud@v1 + with: + install_components: gke-gcloud-auth-plugin + if: contains(inputs.test_environment, 'gcp') + - name: Setup environment + uses: ./.github/actions/setup_environment + with: + cache_version: ${{ secrets.GH_ACTIONS_CACHE_KEY }} + python-version: ${{ inputs.python-version }} + os: ${{ inputs.os }} + runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }} + runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }} + discord_webhook: ${{ secrets.DISCORD_WEBHOOK }} + - name: Install docker-compose for non-default environments + if: inputs.test_environment != 'default' + run: | + pip install uv + # see https://github.com/docker/docker-py/issues/3256 for why we need to pin requests + # docker-compose is deprecated and doesn't work with newer versions of docker + uv pip install --system "pyyaml==5.3.1" "requests<2.32.0" "docker==6.1.3" docker-compose + - name: Install MacOS System Dependencies + if: runner.os=='macOS' + run: brew install libomp + - name: Unbreak Python in GHA for 3.9-3.10 + if: runner.os=='macOS' && inputs.python-version != '3.11' + # github actions overwrites brew's python. Force it to reassert itself, by + # running in a separate step. + # Workaround GitHub Actions Python issues + # see https://github.com/Homebrew/homebrew-core/issues/165793#issuecomment-1989441193 + run: | + find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete + sudo rm -rf /Library/Frameworks/Python.framework/ + brew install --force python3 && brew unlink python3 && brew unlink python3 && brew link --overwrite python3 + - name: Unbreak Python in GHA for 3.11 + if: runner.os=='macOS' && inputs.python-version == '3.11' + run: | + # Unlink and re-link to prevent errors when github mac runner images + # https://github.com/actions/setup-python/issues/577 + brew list -1 | grep python | while read formula; do brew unlink $formula; brew link --overwrite $formula; done + - name: Install Docker and Colima on MacOS + if: runner.os=='macOS' + run: | + export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 + brew update + brew install docker colima + brew reinstall --force qemu + + # We need to mount the /private/tmp/zenml-test/ folder because + # this folder is also mounted in the Docker containers that are + # started by local ZenML orchestrators. + colima start --mount /private/tmp/zenml-test/:w + + # This is required for the Docker Python SDK to work + sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock + - name: Install kubectl on Linux + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + sudo install -o root -g 0 -m 0755 kubectl /usr/local/bin/kubectl + if: (inputs.os == 'ubuntu-latest' || inputs.os == 'arc-runner-set') + - name: Install kubectl on MacOS + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/darwin/amd64/kubectl" + sudo install -o root -g 0 -m 0755 kubectl /usr/local/bin/kubectl + if: runner.os=='macOS' + - name: Install K3D + run: | + curl -s https://raw.githubusercontent.com/rancher/k3d/main/install.sh | bash + if: runner.os!='Windows' && contains(inputs.test_environment, 'kubeflow') + - name: Login to Amazon ECR + id: login-ecr + run: | + aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 715803424590.dkr.ecr.us-east-1.amazonaws.com + if: contains(inputs.test_environment, 'aws') + - name: Login to Amazon EKS + id: login-eks + run: | + aws eks --region us-east-1 update-kubeconfig --name zenml-ci-cluster --alias zenml-ci-aws-us-east-1 + if: contains(inputs.test_environment, 'aws') + - name: Login to Google ECR + run: | + gcloud auth configure-docker --project zenml-ci + if: contains(inputs.test_environment, 'gcp') + - name: Login to Google GKE + uses: google-github-actions/get-gke-credentials@v2 + with: + cluster_name: zenml-ci-cluster + location: us-east4 + project_id: zenml-ci + if: contains(inputs.test_environment, 'gcp') + - name: Setup tmate session before tests + if: ${{ inputs.enable_tmate == 'before-tests' }} + uses: mxschmitt/action-tmate@v3.17 + timeout-minutes: ${{ inputs.tmate_timeout }} + - name: Integration Tests - Slow CI + run: | + bash scripts/test-coverage-xml.sh integration ${{ inputs.test_environment }} + - name: Setup tmate session after tests + if: ${{ inputs.enable_tmate == 'always' || (inputs.enable_tmate == 'on-failure' && failure()) }} + uses: mxschmitt/action-tmate@v3.17 + timeout-minutes: ${{ inputs.tmate_timeout }} + - name: Verify Python Env unaffected + run: |- + zenml integration list + uv pip list + uv pip check || true + services: + mysql: + image: mysql:5.7 + env: + MYSQL_ROOT_PASSWORD: zenml + MYSQL_DATABASE: zenml + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping" + --health-interval=10s + --health-timeout=5s + --health-retries=3 + zenml-server: + image: ghcr.io/${{ github.repository_owner }}/zenml-server-github-actions:${{ + github.sha }} + credentials: + username: ${{ github.actor }} + password: ${{ secrets.github_token }} + env: + ZENML_STORE_URL: mysql://root:zenml@mysql:3306/zenml + ZENML_SERVER_DEPLOYMENT_TYPE: docker + ZENML_SERVER_AUTO_ACTIVATE: 'True' + ZENML_SERVER_AUTO_CREATE_DEFAULT_USER: 'True' + ports: + - 8080:8080 + options: >- + --health-cmd="curl -f http://127.0.0.1:8080/health" + --health-interval=10s + --health-timeout=5s + --health-retries=3 diff --git a/.github/workflows/integration-test-slow.yml b/.github/workflows/integration-test-slow.yml index 368037de9f0..94d5f2e2fb5 100644 --- a/.github/workflows/integration-test-slow.yml +++ b/.github/workflows/integration-test-slow.yml @@ -30,7 +30,7 @@ on: os: description: OS type: choice - options: [ubuntu-latest, macos-13, windows-latest, arc-runner-set] + options: [ubuntu-latest, macos-13, windows-latest] required: false default: ubuntu-latest python-version: @@ -38,7 +38,7 @@ on: type: choice options: ['3.9', '3.10', '3.11', '3.12'] required: false - default: '3.9' + default: '3.11' test_environment: description: The test environment type: choice @@ -126,7 +126,7 @@ jobs: if: github.event.pull_request.head.repo.fork == false && (contains(inputs.test_environment, 'docker') || contains(inputs.test_environment, 'kubeflow') || contains(inputs.test_environment, 'airflow') || contains(inputs.test_environment, 'kubernetes')) - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v1 with: diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 00000000000..b702c37fcad --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,120 @@ +--- +name: Setup Python Environment, Lint and Unit Test +on: + workflow_call: + inputs: + os: + description: OS + type: string + required: true + python-version: + description: Python version + type: string + required: true + enable_tmate: + description: Enable tmate session for debugging + type: string + required: false + default: never + install_integrations: + description: Install ZenML integrations + type: string + required: false + default: 'yes' + git-ref: + description: Git branch or ref + type: string + required: false + default: '' + workflow_dispatch: + inputs: + os: + description: OS + type: choice + options: [ubuntu-latest, macos-13, windows-latest] + required: false + default: ubuntu-latest + python-version: + description: Python version + type: choice + options: ['3.9', '3.10', '3.11', '3.12'] + required: false + default: '3.11' + enable_tmate: + description: Enable tmate session for debugging + type: choice + options: [no, on-failure, always, before-tests] + required: false + default: 'no' + git-ref: + description: Git branch or ref + type: string + required: false + default: '' +jobs: + unit-test: + name: unit-test + runs-on: ${{ inputs.os }} + env: + ZENML_DEBUG: 1 + ZENML_ANALYTICS_OPT_IN: false + PYTHONIOENCODING: utf-8 + UV_HTTP_TIMEOUT: 600 + OBJC_DISABLE_INITIALIZE_FORK_SAFETY: 'YES' + if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.11') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.12') }} + defaults: + run: + shell: bash + steps: + - name: Checkout code + uses: actions/checkout@v4.2.2 + with: + repository: ${{ github.repository }} + ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 # Fetch all history for all branches and tags + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v42 + with: + files: | + src/** + tests/** + examples/** + - name: Restore uv cache + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} + restore-keys: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} + - name: Install libomp + if: ${{ inputs.os == 'macos-latest' || inputs.os == 'macos-13' }} + run: brew install libomp + - name: Setup environment + uses: ./.github/actions/setup_environment + with: + cache_version: ${{ secrets.GH_ACTIONS_CACHE_KEY }} + python-version: ${{ inputs.python-version }} + os: ${{ inputs.os }} + install_integrations: ${{ inputs.install_integrations }} + runners_cache_access_key_id: ${{ secrets.RUNNERS_CACHE_ACCESS_KEY_ID }} + runners_cache_secret_access_key: ${{ secrets.RUNNERS_CACHE_SECRET_ACCESS_KEY }} + discord_webhook: ${{ secrets.DISCORD_WEBHOOK }} + - name: Setup tmate session before tests + if: ${{ inputs.enable_tmate == 'before-tests' }} + uses: mxschmitt/action-tmate@v3.17 + - name: Lint check + env: + OS: ${{ inputs.os }} + CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + bash scripts/lint.sh + - name: Setup tmate session after tests + if: ${{ inputs.enable_tmate == 'always' || (inputs.enable_tmate == 'on-failure' && failure()) }} + uses: mxschmitt/action-tmate@v3.17 + - name: Verify Python Env unaffected + run: |- + zenml integration list + uv pip list + uv pip check || true diff --git a/.github/workflows/publish_api_docs.yml b/.github/workflows/publish_api_docs.yml index 5eca8442997..ae18d2a3bb7 100644 --- a/.github/workflows/publish_api_docs.yml +++ b/.github/workflows/publish_api_docs.yml @@ -13,7 +13,7 @@ jobs: ZENML_ANALYTICS_OPT_IN: false PYTHONIOENCODING: utf-8 steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: fetch-depth: 0 # fetch all commits/branches including gh-pages - name: Get the version from the github branch name @@ -27,7 +27,7 @@ jobs: - run: npm install - run: npm install html-minifier -g - name: Set up Python 3.11 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.11' - name: Setup git user diff --git a/.github/workflows/publish_docker_image.yml b/.github/workflows/publish_docker_image.yml index ef88763135f..81ef19a2ba7 100644 --- a/.github/workflows/publish_docker_image.yml +++ b/.github/workflows/publish_docker_image.yml @@ -25,7 +25,7 @@ jobs: ZENML_ANALYTICS_OPT_IN: false PYTHONIOENCODING: utf-8 steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: ${{ inputs.zenml_nightly && 'develop' || github.ref }} - name: Determine version diff --git a/.github/workflows/publish_helm_chart.yml b/.github/workflows/publish_helm_chart.yml index 5d7a17d2e6f..c7a2fed50e4 100644 --- a/.github/workflows/publish_helm_chart.yml +++ b/.github/workflows/publish_helm_chart.yml @@ -17,7 +17,7 @@ jobs: PYTHONIOENCODING: utf-8 steps: - name: Checkout repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 # The following sed command replaces the version number in Chart.yaml with the tag version. # It replaces the line that starts with "version: " with "version: " diff --git a/.github/workflows/publish_stack_templates.yml b/.github/workflows/publish_stack_templates.yml index 6068f8c91ec..804e6e74a8a 100644 --- a/.github/workflows/publish_stack_templates.yml +++ b/.github/workflows/publish_stack_templates.yml @@ -12,7 +12,7 @@ jobs: id-token: write steps: - name: Checkout repo - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 # Setup AWS CLI - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v2 diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index b04864fdb60..6bf1f02c090 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -15,12 +15,12 @@ jobs: ZENML_ANALYTICS_OPT_IN: false PYTHONIOENCODING: utf-8 steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 - name: Get the version from the github tag ref id: get_version run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//} - name: Set up Python - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Install Poetry diff --git a/.github/workflows/publish_to_pypi_nightly.yml b/.github/workflows/publish_to_pypi_nightly.yml index d61f8835fba..4487b61879d 100644 --- a/.github/workflows/publish_to_pypi_nightly.yml +++ b/.github/workflows/publish_to_pypi_nightly.yml @@ -17,11 +17,11 @@ jobs: ZENML_ANALYTICS_OPT_IN: false PYTHONIOENCODING: utf-8 steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: develop - name: Set up Python - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Install Poetry diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a04394bc03f..84fce611f94 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,21 +7,21 @@ jobs: setup-and-test: uses: ./.github/workflows/unit-test.yml with: - os: arc-runner-set + os: ubuntu-latest python-version: '3.9' secrets: inherit mysql-db-migration-testing: - runs-on: arc-runner-set + runs-on: ubuntu-latest env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Login to Docker Hub @@ -32,29 +32,29 @@ jobs: - name: Test migrations across versions run: bash scripts/test-migrations.sh mysql sqlite-db-migration-testing: - runs-on: arc-runner-set + runs-on: ubuntu-latest env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.9' - name: Test migrations across versions run: bash scripts/test-migrations.sh sqlite mariadb-db-migration-testing: - runs-on: arc-runner-set + runs-on: ubuntu-latest env: ZENML_ANALYTICS_OPT_IN: false ZENML_DEBUG: true steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Set up Python 3.9 @@ -78,7 +78,7 @@ jobs: uses: ./.github/workflows/publish_to_pypi.yml secrets: inherit wait-for-package-release: - runs-on: arc-runner-set + runs-on: ubuntu-latest needs: publish-python-package steps: - name: Sleep for 4 minutes @@ -95,7 +95,7 @@ jobs: uses: ./.github/workflows/publish_helm_chart.yml secrets: inherit wait-for-package-release-again: - runs-on: arc-runner-set + runs-on: ubuntu-latest needs: publish-helm-chart steps: - name: Sleep for 4 minutes diff --git a/.github/workflows/release_finalize.yml b/.github/workflows/release_finalize.yml index 9f486782a7a..c76b0226f7c 100644 --- a/.github/workflows/release_finalize.yml +++ b/.github/workflows/release_finalize.yml @@ -4,11 +4,11 @@ on: workflow_dispatch: inputs: latest_version: - description: "The latest version of ZenML" + description: The latest version of ZenML required: true type: string new_version: - description: "The new version of ZenML" + description: The new version of ZenML required: true type: string env: @@ -26,7 +26,7 @@ jobs: git config --global user.name "ZenML GmbH" # Check out develop - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: ref: develop # Create the release branch @@ -46,7 +46,7 @@ jobs: git config --global user.name "ZenML GmbH" # Check out the previous release branch - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: ref: release/${{ github.event.inputs.latest_version }} # Create the docs update PR @@ -66,7 +66,7 @@ jobs: git config --global user.name "ZenML GmbH" # Check out develop - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: ref: develop # Create the migration test version if necessary @@ -80,7 +80,7 @@ jobs: steps: # Check out develop - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: ref: develop # Setting up the Python @@ -115,7 +115,7 @@ jobs: git config --global user.name "ZenML GmbH" # Check out legacy docs branch - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: ref: docs/legacy-docs-page # Append new version to the legacy docs table diff --git a/.github/workflows/release_prepare.yml b/.github/workflows/release_prepare.yml index 1d60f6c3b0c..b77a1b136e7 100644 --- a/.github/workflows/release_prepare.yml +++ b/.github/workflows/release_prepare.yml @@ -25,7 +25,7 @@ jobs: # Check out main to get the old version - name: Checkout code id: checkout-code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: ref: main # Configure Git @@ -47,7 +47,7 @@ jobs: steps: # Check out the code - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 # Configure Git - name: Configure git shell: bash @@ -61,7 +61,7 @@ jobs: scripts/validate-new-version.sh ${{ needs.fetch-versions.outputs.new_version }} # Set up Python - name: Set up Python - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v5.3.0 with: python-version: '3.12' # Install ZenML @@ -142,7 +142,7 @@ jobs: steps: # Check out the prepare-release branch - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 # Sign in to Google - uses: google-github-actions/setup-gcloud@v0 with: @@ -183,7 +183,7 @@ jobs: steps: # Check out the code - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 # Setting up Python - name: Set up Python uses: actions/setup-python@v2 @@ -221,7 +221,7 @@ jobs: steps: # Check out the code - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 # Setting up Python - name: Set up Python uses: actions/setup-python@v2 diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml index 55f481d1ced..c0ddb508744 100644 --- a/.github/workflows/spellcheck.yml +++ b/.github/workflows/spellcheck.yml @@ -16,10 +16,10 @@ concurrency: jobs: spellcheck: if: github.event.pull_request.draft == false && github.repository == 'zenml-io/zenml' - runs-on: arc-runner-set + runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: Spelling checker uses: crate-ci/typos@v1.17.0 with: diff --git a/.github/workflows/templates-test.yml b/.github/workflows/templates-test.yml index e2fb1d7ba79..d76a85a731d 100644 --- a/.github/workflows/templates-test.yml +++ b/.github/workflows/templates-test.yml @@ -24,7 +24,7 @@ on: type: choice options: ['3.9', '3.10', '3.11', '3.12'] required: false - default: '3.9' + default: '3.11' jobs: all-template-tests: name: all-template-tests diff --git a/.github/workflows/trivy-zenml-core.yml b/.github/workflows/trivy-zenml-core.yml index 0df6b004151..f64fa0c80bd 100644 --- a/.github/workflows/trivy-zenml-core.yml +++ b/.github/workflows/trivy-zenml-core.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: zenml-Trivy vulnerability scanner uses: aquasecurity/trivy-action@0.19.0 with: diff --git a/.github/workflows/trivy-zenserver.yml b/.github/workflows/trivy-zenserver.yml index 16054b36e51..0601e5c3413 100644 --- a/.github/workflows/trivy-zenserver.yml +++ b/.github/workflows/trivy-zenserver.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 - name: zenserver-Trivy vulnerability scanner uses: aquasecurity/trivy-action@0.19.0 with: diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 9737c8f2ce2..9ec7fbfc305 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -31,7 +31,7 @@ on: os: description: OS type: choice - options: [ubuntu-latest, macos-13, windows-latest, arc-runner-set] + options: [ubuntu-latest, macos-13, windows-latest] required: false default: ubuntu-latest python-version: @@ -39,7 +39,7 @@ on: type: choice options: ['3.9', '3.10', '3.11', '3.12'] required: false - default: '3.9' + default: '3.11' enable_tmate: description: Enable tmate session for debugging type: choice @@ -60,23 +60,34 @@ jobs: ZENML_ANALYTICS_OPT_IN: false PYTHONIOENCODING: utf-8 UV_HTTP_TIMEOUT: 600 - - # on MAC OS, we need to set this environment variable - # to fix problems with the fork() calls (see this thread - # for more information: http://sealiesoftware.com/blog/archive/2017/6/5/Objective-C_and_fork_in_macOS_1013.html) OBJC_DISABLE_INITIALIZE_FORK_SAFETY: 'YES' - # TODO: add Windows testing for Python 3.11 and 3.12 back in - # Exit if it's a commit from Gitbook if: ${{ ! startsWith(github.event.head_commit.message, 'GitBook:') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.11') && ! (inputs.os == 'windows-latest' && inputs.python-version == '3.12') }} defaults: run: shell: bash steps: - name: Checkout code - uses: actions/checkout@v4.1.1 + uses: actions/checkout@v4.2.2 with: repository: ${{ github.repository }} ref: ${{ github.event.pull_request.head.sha }} + fetch-depth: 0 # Fetch all history for all branches and tags + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v42 + with: + files: | + src/** + tests/** + examples/** + - name: Restore uv cache + uses: actions/cache@v4 + with: + path: ~/.cache/uv + key: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} + restore-keys: | + uv-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('src/zenml/integrations/*/__init__.py') }} - name: Install libomp if: ${{ inputs.os == 'macos-latest' || inputs.os == 'macos-13' }} run: brew install libomp @@ -93,11 +104,6 @@ jobs: - name: Setup tmate session before tests if: ${{ inputs.enable_tmate == 'before-tests' }} uses: mxschmitt/action-tmate@v3.17 - - name: Lint check - env: - OS: ${{ inputs.os }} - run: | - bash scripts/lint.sh - name: Run unit tests run: | bash scripts/test-coverage-xml.sh unit diff --git a/.github/workflows/update-templates-to-examples.yml b/.github/workflows/update-templates-to-examples.yml index 9e203d98e70..3166d983220 100644 --- a/.github/workflows/update-templates-to-examples.yml +++ b/.github/workflows/update-templates-to-examples.yml @@ -16,7 +16,7 @@ on: os: description: OS type: choice - options: [ubuntu-latest, macos-13, windows-latest, arc-runner-set] + options: [ubuntu-latest, macos-13, windows-latest] required: false default: ubuntu-latest python-version: @@ -24,7 +24,7 @@ on: type: choice options: ['3.9', '3.10', '3.11', '3.12'] required: false - default: '3.9' + default: '3.11' jobs: update-e2e-batch-template-to-examples: name: update-e2e-batch-template-to-examples @@ -46,7 +46,7 @@ jobs: python-version: ${{ inputs.python-version }} stack-name: local ref-zenml: ${{ github.ref }} - ref-template: 2024.10.10 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py + ref-template: 2024.10.30 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py - name: Clean-up run: | rm -rf ./local_checkout @@ -57,7 +57,7 @@ jobs: Breaking changes affecting templates have been introduced. To mitigate this issue,\ please make the code in zenml-io/template-e2e-batch compatible with new version of\ ZenML core, release it and update release tag in zenml.cli.base.ZENML_PROJECT_TEMPLATES" - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: ${{ github.event.pull_request.head.ref }} - name: Check-out fresh E2E template @@ -118,7 +118,7 @@ jobs: python-version: ${{ inputs.python-version }} stack-name: local ref-zenml: ${{ github.ref }} - ref-template: 2024.09.23 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py + ref-template: 2024.10.30 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py - name: Clean-up run: | rm -rf ./local_checkout @@ -129,7 +129,7 @@ jobs: Breaking changes affecting templates have been introduced. To mitigate this issue,\ please make the code in zenml-io/template-nlp compatible with new version of\ ZenML core, release it and update release tag in zenml.cli.base.ZENML_PROJECT_TEMPLATES" - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: ${{ github.event.pull_request.head.ref }} - name: Check-out fresh NLP template @@ -189,7 +189,7 @@ jobs: python-version: ${{ inputs.python-version }} stack-name: local ref-zenml: ${{ github.ref }} - ref-template: 2024.09.24 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py + ref-template: 2024.10.30 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py - name: Clean-up run: | rm -rf ./local_checkout @@ -201,7 +201,7 @@ jobs: Breaking changes affecting templates have been introduced. To mitigate this issue,\ please make the code in zenml-io/template-starter compatible with new version of\ ZenML core, release it and update release tag in zenml.cli.base.ZENML_PROJECT_TEMPLATES" - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: ${{ github.event.pull_request.head.ref }} - name: Check-out fresh Starter template @@ -261,7 +261,7 @@ jobs: with: python-version: ${{ inputs.python-version }} ref-zenml: ${{ github.ref }} - ref-template: 2024.10.10 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py + ref-template: 2024.10.30 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py - name: Clean-up run: | rm -rf ./local_checkout @@ -272,7 +272,7 @@ jobs: Breaking changes affecting templates have been introduced. To mitigate this issue,\ please make the code in zenml-io/template-llm-finetuning compatible with new version of\ ZenML core, release it and update release tag in zenml.cli.base.ZENML_PROJECT_TEMPLATES" - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v4.2.2 with: ref: ${{ github.event.pull_request.head.ref }} - name: Check-out fresh LLM Finetuning template diff --git a/docs/book/component-guide/data-validators/data-validators.md b/docs/book/component-guide/data-validators/data-validators.md index 70ba1de73b0..80192f4ae72 100644 --- a/docs/book/component-guide/data-validators/data-validators.md +++ b/docs/book/component-guide/data-validators/data-validators.md @@ -23,7 +23,7 @@ Related concepts: * early on, even if it's just to keep a log of the quality state of your data and the performance of your models at different stages of development. * if you have pipelines that regularly ingest new data, you should use data validation to run regular data integrity checks to signal problems before they are propagated downstream. * in continuous training pipelines, you should use data validation techniques to compare new training data against a data reference and to compare the performance of newly trained models against previous ones. -* when you have pipelines that automate batch inference or if you regularly collect data used as input in online inference, you should use data validation to run data drift analyses and detect training-serving skew, data drift and model drift. +* when you have pipelines that automate batch inference or if you regularly collect data used as input in online inference, you should use data validation to run data drift analyzes and detect training-serving skew, data drift and model drift. #### Data Validator Flavors diff --git a/docs/book/component-guide/data-validators/deepchecks.md b/docs/book/component-guide/data-validators/deepchecks.md index 2fd2914c6e6..b24d827f0b5 100644 --- a/docs/book/component-guide/data-validators/deepchecks.md +++ b/docs/book/component-guide/data-validators/deepchecks.md @@ -10,7 +10,7 @@ The Deepchecks [Data Validator](./data-validators.md) flavor provided with the Z ### When would you want to use it? -[Deepchecks](https://deepchecks.com/) is an open-source library that you can use to run a variety of data and model validation tests, from data integrity tests that work with a single dataset to model evaluation tests to data drift analyses and model performance comparison tests. All this can be done with minimal configuration input from the user, or customized with specialized conditions that the validation tests should perform. +[Deepchecks](https://deepchecks.com/) is an open-source library that you can use to run a variety of data and model validation tests, from data integrity tests that work with a single dataset to model evaluation tests to data drift analyzes and model performance comparison tests. All this can be done with minimal configuration input from the user, or customized with specialized conditions that the validation tests should perform. Deepchecks works with both tabular data and computer vision data. For tabular, the supported dataset format is `pandas.DataFrame` and the supported model format is `sklearn.base.ClassifierMixin`. For computer vision, the supported dataset format is `torch.utils.data.dataloader.DataLoader` and supported model format is `torch.nn.Module`. diff --git a/docs/book/component-guide/data-validators/evidently.md b/docs/book/component-guide/data-validators/evidently.md index c80e048ef11..f48f70edb50 100644 --- a/docs/book/component-guide/data-validators/evidently.md +++ b/docs/book/component-guide/data-validators/evidently.md @@ -6,7 +6,7 @@ description: >- # Evidently -The Evidently [Data Validator](./data-validators.md) flavor provided with the ZenML integration uses [Evidently](https://evidentlyai.com/) to perform data quality, data drift, model drift and model performance analyses, to generate reports and run checks. The reports and check results can be used to implement automated corrective actions in your pipelines or to render interactive representations for further visual interpretation, evaluation and documentation. +The Evidently [Data Validator](./data-validators.md) flavor provided with the ZenML integration uses [Evidently](https://evidentlyai.com/) to perform data quality, data drift, model drift and model performance analyzes, to generate reports and run checks. The reports and check results can be used to implement automated corrective actions in your pipelines or to render interactive representations for further visual interpretation, evaluation and documentation. ### When would you want to use it? @@ -47,7 +47,7 @@ zenml stack register custom_stack -dv evidently_data_validator ... --set Evidently's profiling functions take in a `pandas.DataFrame` dataset or a pair of datasets and generate results in the form of a `Report` object. -One of Evidently's notable characteristics is that it only requires datasets as input. Even when running model performance comparison analyses, no model needs to be present. However, that does mean that the input data needs to include additional `target` and `prediction` columns for some profiling reports and, you have to include additional information about the dataset columns in the form of [column mappings](https://docs.evidentlyai.com/user-guide/tests-and-reports/column-mapping). Depending on how your data is structured, you may also need to include additional steps in your pipeline before the data validation step to insert the additional `target` and `prediction` columns into your data. This may also require interacting with one or more models. +One of Evidently's notable characteristics is that it only requires datasets as input. Even when running model performance comparison analyzes, no model needs to be present. However, that does mean that the input data needs to include additional `target` and `prediction` columns for some profiling reports and, you have to include additional information about the dataset columns in the form of [column mappings](https://docs.evidentlyai.com/user-guide/tests-and-reports/column-mapping). Depending on how your data is structured, you may also need to include additional steps in your pipeline before the data validation step to insert the additional `target` and `prediction` columns into your data. This may also require interacting with one or more models. There are three ways you can use Evidently to generate data reports in your ZenML pipelines that allow different levels of flexibility: diff --git a/docs/book/component-guide/orchestrators/kubeflow.md b/docs/book/component-guide/orchestrators/kubeflow.md index 0094bd85166..174cb56e82e 100644 --- a/docs/book/component-guide/orchestrators/kubeflow.md +++ b/docs/book/component-guide/orchestrators/kubeflow.md @@ -198,7 +198,7 @@ Kubeflow comes with its own UI that you can use to find further details about yo from zenml.client import Client pipeline_run = Client().get_pipeline_run("") -orchestrator_url = pipeline_run.run_metadata["orchestrator_url"].value +orchestrator_url = pipeline_run.run_metadata["orchestrator_url"] ``` #### Additional configuration diff --git a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md b/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md index 964c8d3bfc9..ae2dbcbaec6 100644 --- a/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md +++ b/docs/book/how-to/advanced-topics/manage-zenml-server/migration-guide/migration-zero-twenty.md @@ -221,7 +221,7 @@ Running with active project: 'default' (global) ┃ ACTIVE │ STACK NAME │ STACK ID │ SHARED │ OWNER │ ORCHESTRATOR │ ARTIFACT_STORE │ CONTAINER_REGISTRY │ SECRETS_MANAGER │ MODEL_DEPLOYER │ EXPERIMENT_TRACKER ┃ ┠────────┼──────────────────────┼──────────────────────┼────────┼─────────┼───────────────────────┼───────────────────┼──────────────────────┼───────────────────────┼─────────────────────┼──────────────────────┨ ┃ │ zenbytes_aws_kubeflo │ 9fe90f0b-2a79-47d9-8 │ │ default │ zenbytes_eks_orchestr │ zenbytes_s3_store │ zenbytes_ecr_registr │ zenbytes_aws_secret_m │ zenbytes_eks_seldon │ ┃ -┃ │ w_stack │ f80-04e45ff02cdb │ │ │ ator │ │ y │ anager │ │ ┃ +┃ │ w_stack │ f80-04e45ff02cdb │ │ │ ator │ │ y │ manager │ │ ┃ ┠────────┼──────────────────────┼──────────────────────┼────────┼─────────┼───────────────────────┼───────────────────┼──────────────────────┼───────────────────────┼─────────────────────┼──────────────────────┨ ┃ 👉 │ default │ 7a587e0c-30fd-402f-a │ │ default │ default │ default │ │ │ │ ┃ ┃ │ │ 3a8-03651fe1458f │ │ │ │ │ │ │ │ ┃ diff --git a/docs/book/user-guide/llmops-guide/finetuning-llms/deploying-finetuned-models.md b/docs/book/user-guide/llmops-guide/finetuning-llms/deploying-finetuned-models.md index 6afa88d46c4..c16b515cfd7 100644 --- a/docs/book/user-guide/llmops-guide/finetuning-llms/deploying-finetuned-models.md +++ b/docs/book/user-guide/llmops-guide/finetuning-llms/deploying-finetuned-models.md @@ -14,23 +14,23 @@ various factors that influence the deployment process. One of the primary considerations is the memory and machine requirements for your finetuned model. LLMs are typically resource-intensive, requiring substantial RAM, processing power -and specialised hardware. This choice of hardware can significantly impact both +and specialized hardware. This choice of hardware can significantly impact both performance and cost, so it's crucial to strike the right balance based on your specific use case. Real-time considerations play a vital role in deployment planning, especially for applications that require immediate responses. This includes preparing for potential failover scenarios if your finetuned model encounters issues, -conducting thorough benchmarks and load testing, and modelling expected user +conducting thorough benchmarks and load testing, and modeling expected user load and usage patterns. Additionally, you'll need to decide between streaming and non-streaming approaches, each with its own set of trade-offs in terms of -latency and resource utilisation. +latency and resource utilization. -Optimisation techniques, such as quantisation, can help reduce the resource -footprint of your model. However, these optimisations often come with additional +Optimization techniques, such as quantization, can help reduce the resource +footprint of your model. However, these Optimizations often come with additional steps in your workflow and require careful evaluation to ensure they don't negatively impact model performance. [Rigorous evaluation](./evaluation-for-finetuning.md) -becomes crucial in quantifying the extent to which you can optimise without +becomes crucial in quantifying the extent to which you can optimize without compromising accuracy or functionality. ## Deployment Options and Trade-offs @@ -39,7 +39,7 @@ When it comes to deploying your finetuned LLM, several options are available, each with its own set of advantages and challenges: 1. **Roll Your Own**: This approach involves setting up and managing your own - infrastructure. While it offers the most control and customisation, it also + infrastructure. While it offers the most control and customization, it also requires expertise and resources to maintain. For this, you'd usually create some kind of Docker-based service (a FastAPI endpoint, for example) and deploy this on your infrastructure, with you taking care of all @@ -49,7 +49,7 @@ each with its own set of advantages and challenges: be aware of the "cold start" phenomenon, which can introduce latency for infrequently accessed models. 3. **Always-On Options**: These deployments keep your model constantly running - and ready to serve requests. While this approach minimises latency, it can be + and ready to serve requests. While this approach minimizes latency, it can be more costly as you're paying for resources even during idle periods. 4. **Fully Managed Solutions**: Many cloud providers and AI platforms offer managed services for deploying LLMs. These solutions can simplify the @@ -177,14 +177,14 @@ crucial. Key areas to watch include: 2. **Latency Metrics**: Monitor response times to ensure they meet your application's requirements. 3. **Load and Usage Patterns**: Keep an eye on how users interact with your model - to inform scaling decisions and potential optimisations. -4. **Data Analysis**: Regularly analyse the inputs and outputs of your model to + to inform scaling decisions and potential Optimizations. +4. **Data Analysis**: Regularly analyze the inputs and outputs of your model to identify trends, potential biases, or areas for improvement. It's also important to consider privacy and security when capturing and logging responses. Ensure that your logging practices comply with relevant data -protection regulations and your organisation's privacy policies. +protection regulations and your organization's privacy policies. By carefully considering these deployment options and maintaining vigilant monitoring practices, you can ensure that your finetuned LLM performs optimally -and continues to meet the needs of your users and organisation. +and continues to meet the needs of your users and organization. diff --git a/docs/book/user-guide/llmops-guide/finetuning-llms/evaluation-for-finetuning.md b/docs/book/user-guide/llmops-guide/finetuning-llms/evaluation-for-finetuning.md index e3c33dd1c82..c2fc7753b82 100644 --- a/docs/book/user-guide/llmops-guide/finetuning-llms/evaluation-for-finetuning.md +++ b/docs/book/user-guide/llmops-guide/finetuning-llms/evaluation-for-finetuning.md @@ -12,9 +12,9 @@ The motivation for implementing thorough evals is similar to that of unit tests 1. **Prevent Regressions**: Ensure that new iterations or changes don't negatively impact existing functionality. -2. **Track Improvements**: Quantify and visualise how your model improves with each iteration or finetuning session. +2. **Track Improvements**: Quantify and visualize how your model improves with each iteration or finetuning session. -3. **Ensure Safety and Robustness**: Given the complex nature of LLMs, comprehensive evals help identify and mitigate potential risks, biases, or unexpected behaviours. +3. **Ensure Safety and Robustness**: Given the complex nature of LLMs, comprehensive evals help identify and mitigate potential risks, biases, or unexpected behaviors. By implementing a robust evaluation strategy, you can develop more reliable, performant, and safe finetuned LLMs while maintaining a clear picture of your model's capabilities and limitations throughout the development process. @@ -38,12 +38,12 @@ finetuning use case. The main distinction here is that we are not looking to evaluate retrieval, but rather the performance of the finetuned model (i.e. [the generation part](../evaluation/generation.md)). -Custom evals are tailored to your specific use case and can be categorised into two main types: +Custom evals are tailored to your specific use case and can be categorized into two main types: 1. **Success Modes**: These evals focus on things you want to see in your model's output, such as: - Correct formatting - Appropriate responses to specific prompts - - Desired behaviour in edge cases + - Desired behavior in edge cases 2. **Failure Modes**: These evals target things you don't want to see, including: - Hallucinations (generating false or nonsensical information) @@ -59,7 +59,7 @@ from my_library import query_llm good_responses = { "what are the best salads available at the food court?": ["caesar", "italian"], - "how late is the shopping centre open until?": ["10pm", "22:00", "ten"] + "how late is the shopping center open until?": ["10pm", "22:00", "ten"] } for question, answers in good_responses.items(): @@ -67,7 +67,7 @@ for question, answers in good_responses.items(): assert any(answer in llm_response for answer in answers), f"Response does not contain any of the expected answers: {answers}" bad_responses = { - "who is the manager of the shopping centre?": ["tom hanks", "spiderman"] + "who is the manager of the shopping center?": ["tom hanks", "spiderman"] } for question, answers in bad_responses.items(): @@ -77,15 +77,15 @@ for question, answers in bad_responses.items(): You can see how you might want to expand this out to cover more examples and more failure modes, but this is a good start. As you continue in the work of iterating on your model and performing more tests, you can update these cases with known failure modes (and/or with obvious success modes that your use case must always work for). -### Generalised Evals and Frameworks +### Generalized Evals and Frameworks -Generalised evals and frameworks provide a structured approach to evaluating your finetuned LLM. They offer: +Generalized evals and frameworks provide a structured approach to evaluating your finetuned LLM. They offer: -- Assistance in organising and structuring your evals -- Standardised evaluation metrics for common tasks +- Assistance in organizing and structuring your evals +- Standardized evaluation metrics for common tasks - Insights into the model's overall performance -When using generalised evals, it's important to consider their limitations and caveats. While they provide valuable insights, they should be complemented with custom evals tailored to your specific use case. Some possible options for you to check out include: +When using Generalized evals, it's important to consider their limitations and caveats. While they provide valuable insights, they should be complemented with custom evals tailored to your specific use case. Some possible options for you to check out include: - [prodigy-evaluate](https://github.com/explosion/prodigy-evaluate?tab=readme-ov-file) - [ragas](https://docs.ragas.io/en/stable/getstarted/monitoring.html) @@ -112,7 +112,7 @@ As part of this, implementing comprehensive logging from the early stages of dev Alongside collecting the raw data and viewing it periodically, creating simple dashboards that display core metrics reflecting your model's performance is an -effective way to visualise and monitor progress. These metrics should align with +effective way to visualize and monitor progress. These metrics should align with your iteration goals and capture improvements over time, allowing you to quickly assess the impact of changes and identify areas that require attention. Again, as with everything else, don't let perfect be the enemy of the good; a simple diff --git a/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md b/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md index 3ad07632ffb..6f995f7439d 100644 --- a/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md +++ b/docs/book/user-guide/llmops-guide/finetuning-llms/finetuning-with-accelerate.md @@ -46,7 +46,7 @@ smaller size (e.g. one of the Llama 3.1 family at the ~8B parameter mark) and then iterate on that. This will allow you to quickly run through a number of experiments and see how the model performs on your use case. -In this early stage, experimentation is important. Accordingly, any way you can maximise the number of experiments you can run will help increase the amount you can learn. So we want to minimise the amount of time it takes to iterate to a new experiment. Depending on the precise details of what you do, you might iterate on your data, on some hyperparameters of the finetuning process, or you might even try out different use case options. +In this early stage, experimentation is important. Accordingly, any way you can maximize the number of experiments you can run will help increase the amount you can learn. So we want to minimize the amount of time it takes to iterate to a new experiment. Depending on the precise details of what you do, you might iterate on your data, on some hyperparameters of the finetuning process, or you might even try out different use case options. ## Implementation details @@ -190,15 +190,15 @@ components for distributed training. For more details, see the [Accelerate docum ## Dataset iteration -While these stages offer lots of surface area for intervention and customisation, the most significant thing to be careful with is the data that you input into the model. If you find that your finetuned model offers worse performance than the base, or if you get garbled output post-fine tuning, this would be a strong indicator that you have not correctly formatted your input data, or something is mismatched with the tokeniser and so on. To combat this, be sure to inspect your data at all stages of the process! +While these stages offer lots of surface area for intervention and customization, the most significant thing to be careful with is the data that you input into the model. If you find that your finetuned model offers worse performance than the base, or if you get garbled output post-fine tuning, this would be a strong indicator that you have not correctly formatted your input data, or something is mismatched with the tokeniser and so on. To combat this, be sure to inspect your data at all stages of the process! -The main behaviour and activity while using this notebook should be around being +The main behavior and activity while using this notebook should be around being more serious about your data. If you are finding that you're on the low end of the spectrum, consider ways to either supplement that data or to synthetically generate data that could be substituted in. You should also start to think about evaluations at this stage (see [the next guide](./evaluation-for-finetuning.md) for more) since the changes you will likely want to measure how well your model is doing, -especially when you make changes and customisations. Once you have some basic +especially when you make changes and customizations. Once you have some basic evaluations up and running, you can then start thinking through all the optimal parameters and measuring whether these updates are actually doing what you think they will. diff --git a/docs/book/user-guide/llmops-guide/finetuning-llms/starter-choices-for-finetuning-llms.md b/docs/book/user-guide/llmops-guide/finetuning-llms/starter-choices-for-finetuning-llms.md index d23b3798cf2..b0e5de4ebc2 100644 --- a/docs/book/user-guide/llmops-guide/finetuning-llms/starter-choices-for-finetuning-llms.md +++ b/docs/book/user-guide/llmops-guide/finetuning-llms/starter-choices-for-finetuning-llms.md @@ -42,11 +42,11 @@ In general, try to pick something that is small and self-contained, ideally the For example, a general use case of "answer all customer support emails" is almost certainly too vague, whereas something like "triage incoming customer support queries and extract relevant information as per some pre-defined checklist or schema" is much more realistic. -It's also worth picking something where you can reach some sort of answer as to whether this the right approach in a short amount of time. If your use case depends on the generation or annotation of lots of data, or organisation and sorting of pre-existing data, this is less of an ideal starter project than if you have data that already exists within your organisation and that you can repurpose here. +It's also worth picking something where you can reach some sort of answer as to whether this the right approach in a short amount of time. If your use case depends on the generation or annotation of lots of data, or organization and sorting of pre-existing data, this is less of an ideal starter project than if you have data that already exists within your organization and that you can repurpose here. ## Picking data for your use case -The data needed for your use case will follow directly from the specific use case you're choosing, but ideally it should be something that is already *mostly* in the direction of what you need. It will take time to annotate and manually transform data if it is too distinct from the specific use case you want to use, so try to minimise this as much as you possibly can. +The data needed for your use case will follow directly from the specific use case you're choosing, but ideally it should be something that is already *mostly* in the direction of what you need. It will take time to annotate and manually transform data if it is too distinct from the specific use case you want to use, so try to minimize this as much as you possibly can. A couple of examples of where you might be able to reuse pre-existing data: diff --git a/examples/e2e/.copier-answers.yml b/examples/e2e/.copier-answers.yml index b008b2c1e99..cd687be59df 100644 --- a/examples/e2e/.copier-answers.yml +++ b/examples/e2e/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.21 +_commit: 2024.10.30 _src_path: gh:zenml-io/template-e2e-batch data_quality_checks: true email: info@zenml.io diff --git a/examples/e2e/README.md b/examples/e2e/README.md index 38006122eb7..1c63986d645 100644 --- a/examples/e2e/README.md +++ b/examples/e2e/README.md @@ -51,9 +51,8 @@ source .venv/bin/activate make setup # Optionally, provision default local stack make install-stack-local -# Start the ZenML UI locally (recommended, but optional); -# the default username is "admin" with an empty password -zenml up +# Start the ZenML UI locally (recommended, but optional) +zenml login --local # Run the pipeline included in the project python run.py ``` diff --git a/examples/e2e_nlp/.copier-answers.yml b/examples/e2e_nlp/.copier-answers.yml index e509aae2760..e13858e7da1 100644 --- a/examples/e2e_nlp/.copier-answers.yml +++ b/examples/e2e_nlp/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.21 +_commit: 2024.10.30 _src_path: gh:zenml-io/template-nlp accelerator: cpu cloud_of_choice: aws diff --git a/examples/e2e_nlp/Makefile b/examples/e2e_nlp/Makefile index 92396ffc4b5..598a6e11e0b 100644 --- a/examples/e2e_nlp/Makefile +++ b/examples/e2e_nlp/Makefile @@ -11,4 +11,4 @@ install-local-stack: zenml stack register -a default -o default -r mlflow_local_$${stack_name} \ -e mlflow_local_$${stack_name} $${stack_name} && \ zenml stack set $${stack_name} && \ - zenml up + zenml login --local diff --git a/examples/e2e_nlp/README.md b/examples/e2e_nlp/README.md index cd6e71f299e..f8a6bc50d89 100644 --- a/examples/e2e_nlp/README.md +++ b/examples/e2e_nlp/README.md @@ -48,9 +48,8 @@ source .venv/bin/activate make setup # Optionally, provision default local stack make install-local-stack -# Start the ZenML UI locally (recommended, but optional); -# the default username is "admin" with an empty password -zenml up +# Start the ZenML UI locally (recommended, but optional) +zenml login --local # Run the pipeline included in the project python run.py ``` diff --git a/examples/llm_finetuning/.copier-answers.yml b/examples/llm_finetuning/.copier-answers.yml index 386863f54e8..4004897928b 100644 --- a/examples/llm_finetuning/.copier-answers.yml +++ b/examples/llm_finetuning/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.08.29-1-g7af7693 +_commit: 2024.10.30 _src_path: gh:zenml-io/template-llm-finetuning bf16: true cuda_version: cuda11.8 diff --git a/examples/llm_finetuning/steps/log_metadata.py b/examples/llm_finetuning/steps/log_metadata.py index 645f98cc8ea..14371b78b6e 100644 --- a/examples/llm_finetuning/steps/log_metadata.py +++ b/examples/llm_finetuning/steps/log_metadata.py @@ -34,7 +34,7 @@ def log_metadata_from_step_artifact( context = get_step_context() metadata_dict: Dict[str, Any] = ( - context.pipeline_run.steps[step_name].outputs[artifact_name][0].load() + context.pipeline_run.steps[step_name].outputs[artifact_name].load() ) metadata = {artifact_name: metadata_dict} diff --git a/examples/mlops_starter/.copier-answers.yml b/examples/mlops_starter/.copier-answers.yml index e17f27ee551..fd6b937c7c9 100644 --- a/examples/mlops_starter/.copier-answers.yml +++ b/examples/mlops_starter/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier -_commit: 2024.10.21 +_commit: 2024.10.30 _src_path: gh:zenml-io/template-starter email: info@zenml.io full_name: ZenML GmbH diff --git a/examples/mlops_starter/README.md b/examples/mlops_starter/README.md index b25258f78bd..d756f05d51a 100644 --- a/examples/mlops_starter/README.md +++ b/examples/mlops_starter/README.md @@ -57,7 +57,7 @@ zenml integration install sklearn pandas -y zenml init # Start the ZenServer to enable dashboard access -zenml up +zenml login --local # Run the feature engineering pipeline python run.py --feature-pipeline diff --git a/examples/mlops_starter/quickstart.ipynb b/examples/mlops_starter/quickstart.ipynb index 6fba7a0e8cc..98a94f1e3e2 100644 --- a/examples/mlops_starter/quickstart.ipynb +++ b/examples/mlops_starter/quickstart.ipynb @@ -122,7 +122,7 @@ "source": [ "zenml_server_url = \"PLEASE_UPDATE_ME\" # in the form \"https://URL_TO_SERVER\"\n", "\n", - "!zenml connect --url $zenml_server_url" + "!zenml login $zenml_server_url" ] }, { @@ -403,9 +403,9 @@ " # Only spin up a local Dashboard in case you aren't already connected to a remote server\n", " if Environment.in_google_colab():\n", " # run ZenML through a cloudflare tunnel to get a public endpoint\n", - " !zenml up --port 8237 & cloudflared tunnel --url http://localhost:8237\n", + " !zenml login --local --port 8237 & cloudflared tunnel --url http://localhost:8237\n", " else:\n", - " !zenml up" + " !zenml login --local" ] }, { @@ -1140,7 +1140,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/scripts/format.sh b/scripts/format.sh index 409b969dbc7..d3dd117f80b 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -64,5 +64,5 @@ ruff format $SRC # standardizes / formats CI yaml files if [ "$SKIP_YAMLFIX" = false ]; then - yamlfix .github tests -e "dependabot.yml" -e "workflows/release_prepare.yml" -e "workflows/release_finalize.yml" -fi \ No newline at end of file + yamlfix --check .github tests -e "dependabot.yml" -e "workflows/release_prepare.yml" -e "workflows/release_finalize.yml" -e "workflows/integration-test-fast-services.yml" -e "workflows/integration-test-slow-services.yml" +fi diff --git a/scripts/lint.sh b/scripts/lint.sh index 914190eba5c..c4c8b24a823 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -30,7 +30,7 @@ fi # checks for yaml formatting errors if [ "$SKIP_YAMLFIX" = false ]; then - yamlfix --check .github tests -e "dependabot.yml" -e "workflows/release_prepare.yml" -e "workflows/release_finalize.yml" + yamlfix --check .github tests -e "dependabot.yml" -e "workflows/release_prepare.yml" -e "workflows/release_finalize.yml" -e "workflows/integration-test-fast-services.yml" -e "workflows/integration-test-slow-services.yml" fi # autoflake replacement: checks for unused imports and variables @@ -39,4 +39,4 @@ ruff check $SRC --select F401,F841 --exclude "__init__.py" --exclude "*.ipynb" - ruff format $SRC --check # check type annotations -mypy $SRC_NO_TESTS +mypy $SRC_NO_TESTS \ No newline at end of file diff --git a/src/zenml/artifacts/utils.py b/src/zenml/artifacts/utils.py index 8230cefbb20..83b7693eee6 100644 --- a/src/zenml/artifacts/utils.py +++ b/src/zenml/artifacts/utils.py @@ -387,17 +387,6 @@ def load_artifact( The loaded artifact. """ artifact = Client().get_artifact_version(name_or_id, version) - try: - step_run = get_step_context().step_run - client = Client() - client.zen_store.update_run_step( - step_run_id=step_run.id, - step_run_update=StepRunUpdate( - loaded_artifact_versions={artifact.name: artifact.id} - ), - ) - except RuntimeError: - pass # Cannot link to step run if called outside of a step return load_artifact_from_response(artifact) diff --git a/src/zenml/cli/base.py b/src/zenml/cli/base.py index f814222bb05..bc9bed65897 100644 --- a/src/zenml/cli/base.py +++ b/src/zenml/cli/base.py @@ -79,19 +79,19 @@ def copier_github_url(self) -> str: ZENML_PROJECT_TEMPLATES = dict( e2e_batch=ZenMLProjectTemplateLocation( github_url="zenml-io/template-e2e-batch", - github_tag="2024.10.10", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml + github_tag="2024.10.30", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml ), starter=ZenMLProjectTemplateLocation( github_url="zenml-io/template-starter", - github_tag="2024.09.24", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml + github_tag="2024.10.30", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml ), nlp=ZenMLProjectTemplateLocation( github_url="zenml-io/template-nlp", - github_tag="2024.09.23", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml + github_tag="2024.10.30", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml ), llm_finetuning=ZenMLProjectTemplateLocation( github_url="zenml-io/template-llm-finetuning", - github_tag="2024.10.10", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml + github_tag="2024.10.30", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml ), ) diff --git a/src/zenml/cli/model.py b/src/zenml/cli/model.py index ab9355abe76..403d4a7b42a 100644 --- a/src/zenml/cli/model.py +++ b/src/zenml/cli/model.py @@ -59,11 +59,6 @@ def _model_to_print(model: ModelResponse) -> Dict[str, Any]: def _model_version_to_print( model_version: ModelVersionResponse, ) -> Dict[str, Any]: - run_metadata = None - if model_version.run_metadata: - run_metadata = { - k: v.value for k, v in model_version.run_metadata.items() - } return { "id": model_version.id, "model": model_version.model.name, @@ -71,7 +66,7 @@ def _model_version_to_print( "number": model_version.number, "description": model_version.description, "stage": model_version.stage, - "run_metadata": run_metadata, + "run_metadata": model_version.run_metadata, "tags": [t.name for t in model_version.tags], "data_artifacts_count": len(model_version.data_artifact_ids), "model_artifacts_count": len(model_version.model_artifact_ids), diff --git a/src/zenml/cli/stack.py b/src/zenml/cli/stack.py index 1781a759134..68d723e9bbd 100644 --- a/src/zenml/cli/stack.py +++ b/src/zenml/cli/stack.py @@ -408,6 +408,7 @@ def register_stack( component_type, preset_name ) component_info = component_response.id + component_name = component_response.name else: if isinstance(service_connector, UUID): # find existing components under same connector diff --git a/src/zenml/client.py b/src/zenml/client.py index 2a16cab63e6..b32b8cce04c 100644 --- a/src/zenml/client.py +++ b/src/zenml/client.py @@ -136,9 +136,7 @@ PipelineResponse, PipelineRunFilter, PipelineRunResponse, - RunMetadataFilter, RunMetadataRequest, - RunMetadataResponse, RunTemplateFilter, RunTemplateRequest, RunTemplateResponse, @@ -190,6 +188,7 @@ WorkspaceResponse, WorkspaceUpdate, ) +from zenml.models.v2.core.step_run import StepRunUpdate from zenml.services.service import ServiceConfig from zenml.services.service_status import ServiceState from zenml.services.service_type import ServiceType @@ -4166,6 +4165,8 @@ def get_artifact_version( Returns: The artifact version. """ + from zenml import get_step_context + if cll := client_lazy_loader( method_name="get_artifact_version", name_id_or_prefix=name_id_or_prefix, @@ -4173,13 +4174,26 @@ def get_artifact_version( hydrate=hydrate, ): return cll # type: ignore[return-value] - return self._get_entity_version_by_id_or_name_or_prefix( + + artifact = self._get_entity_version_by_id_or_name_or_prefix( get_method=self.zen_store.get_artifact_version, list_method=self.list_artifact_versions, name_id_or_prefix=name_id_or_prefix, version=version, hydrate=hydrate, ) + try: + step_run = get_step_context().step_run + client = Client() + client.zen_store.update_run_step( + step_run_id=step_run.id, + step_run_update=StepRunUpdate( + loaded_artifact_versions={artifact.name: artifact.id} + ), + ) + except RuntimeError: + pass # Cannot link to step run if called outside of a step + return artifact def list_artifact_versions( self, @@ -4417,7 +4431,7 @@ def create_run_metadata( resource_id: UUID, resource_type: MetadataResourceTypes, stack_component_id: Optional[UUID] = None, - ) -> List[RunMetadataResponse]: + ) -> None: """Create run metadata. Args: @@ -4430,7 +4444,7 @@ def create_run_metadata( the metadata. Returns: - The created metadata, as string to model dictionary. + None """ from zenml.metadata.metadata_types import get_metadata_type @@ -4465,74 +4479,8 @@ def create_run_metadata( values=values, types=types, ) - return self.zen_store.create_run_metadata(run_metadata) - - def list_run_metadata( - self, - sort_by: str = "created", - page: int = PAGINATION_STARTING_PAGE, - size: int = PAGE_SIZE_DEFAULT, - logical_operator: LogicalOperators = LogicalOperators.AND, - id: Optional[Union[UUID, str]] = None, - created: Optional[Union[datetime, str]] = None, - updated: Optional[Union[datetime, str]] = None, - workspace_id: Optional[UUID] = None, - user_id: Optional[UUID] = None, - resource_id: Optional[UUID] = None, - resource_type: Optional[MetadataResourceTypes] = None, - stack_component_id: Optional[UUID] = None, - key: Optional[str] = None, - value: Optional["MetadataType"] = None, - type: Optional[str] = None, - hydrate: bool = False, - ) -> Page[RunMetadataResponse]: - """List run metadata. - - Args: - sort_by: The field to sort the results by. - page: The page number to return. - size: The number of results to return per page. - logical_operator: The logical operator to use for filtering. - id: The ID of the metadata. - created: The creation time of the metadata. - updated: The last update time of the metadata. - workspace_id: The ID of the workspace the metadata belongs to. - user_id: The ID of the user that created the metadata. - resource_id: The ID of the resource the metadata belongs to. - resource_type: The type of the resource the metadata belongs to. - stack_component_id: The ID of the stack component that produced - the metadata. - key: The key of the metadata. - value: The value of the metadata. - type: The type of the metadata. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata. - """ - metadata_filter_model = RunMetadataFilter( - sort_by=sort_by, - page=page, - size=size, - logical_operator=logical_operator, - id=id, - created=created, - updated=updated, - workspace_id=workspace_id, - user_id=user_id, - resource_id=resource_id, - resource_type=resource_type, - stack_component_id=stack_component_id, - key=key, - value=value, - type=type, - ) - metadata_filter_model.set_scope_workspace(self.active_workspace.id) - return self.zen_store.list_run_metadata( - metadata_filter_model, - hydrate=hydrate, - ) + self.zen_store.create_run_metadata(run_metadata) + return None # -------------------------------- Secrets --------------------------------- diff --git a/src/zenml/data_validators/base_data_validator.py b/src/zenml/data_validators/base_data_validator.py index 53a4ac669c4..0a62b2ed142 100644 --- a/src/zenml/data_validators/base_data_validator.py +++ b/src/zenml/data_validators/base_data_validator.py @@ -175,7 +175,7 @@ def model_validation( This method should be implemented by data validators that support running model validation checks (e.g. confusion matrix validation, - performance reports, model error analyses, etc). + performance reports, model error analyzes, etc). Unlike `data_validation`, model validation checks require that a model be present as an active component during the validation process. @@ -184,7 +184,7 @@ def model_validation( accommodate different categories of data validation tests, e.g.: * single dataset tests: confusion matrix validation, - performance reports, model error analyses, etc + performance reports, model error analyzes, etc * model comparison tests: tests that identify changes in a model behavior by comparing how it performs on two different datasets. diff --git a/src/zenml/enums.py b/src/zenml/enums.py index cb718cfb99f..c39b39c43ea 100644 --- a/src/zenml/enums.py +++ b/src/zenml/enums.py @@ -34,8 +34,12 @@ class ArtifactType(StrEnum): class StepRunInputArtifactType(StrEnum): """All possible types of a step run input artifact.""" - DEFAULT = "default" # input argument that is the output of a previous step + STEP_OUTPUT = ( + "step_output" # input argument that is the output of a previous step + ) MANUAL = "manual" # manually loaded via `zenml.load_artifact()` + EXTERNAL = "external" # loaded via `ExternalArtifact(value=...)` + LAZY_LOADED = "lazy" # loaded via various lazy methods class ArtifactSaveType(StrEnum): diff --git a/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py b/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py index c49bf5bed54..1c67bfc5741 100644 --- a/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py +++ b/src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py @@ -566,7 +566,7 @@ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus: # Fetch the status of the _PipelineExecution if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata: - run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value + run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID] elif run.orchestrator_run_id is not None: run_id = run.orchestrator_run_id else: diff --git a/src/zenml/integrations/azure/orchestrators/azureml_orchestrator.py b/src/zenml/integrations/azure/orchestrators/azureml_orchestrator.py index 1e0f68143ff..d0e2058ca1d 100644 --- a/src/zenml/integrations/azure/orchestrators/azureml_orchestrator.py +++ b/src/zenml/integrations/azure/orchestrators/azureml_orchestrator.py @@ -482,7 +482,7 @@ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus: # Fetch the status of the PipelineJob if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata: - run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value + run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID] elif run.orchestrator_run_id is not None: run_id = run.orchestrator_run_id else: diff --git a/src/zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py b/src/zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py index 94aa67cf65f..60dfccc8d51 100644 --- a/src/zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py +++ b/src/zenml/integrations/deepchecks/data_validators/deepchecks_data_validator.py @@ -430,7 +430,7 @@ def model_validation( """Run one or more Deepchecks model validation checks. Call this method to perform model validation checks (e.g. confusion - matrix validation, performance reports, model error analyses, etc). + matrix validation, performance reports, model error analyzes, etc). A second dataset is required for model performance comparison tests (i.e. tests that identify changes in a model behavior by comparing how it performs on two different datasets). diff --git a/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py b/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py index 2c02bb71b8a..bb218febb84 100644 --- a/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +++ b/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py @@ -835,7 +835,7 @@ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus: # Fetch the status of the PipelineJob if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata: - run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value + run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID] elif run.orchestrator_run_id is not None: run_id = run.orchestrator_run_id else: diff --git a/src/zenml/metadata/lazy_load.py b/src/zenml/metadata/lazy_load.py index 4064450142a..7ce2cc0d30a 100644 --- a/src/zenml/metadata/lazy_load.py +++ b/src/zenml/metadata/lazy_load.py @@ -13,10 +13,25 @@ # permissions and limitations under the License. """Run Metadata Lazy Loader definition.""" -from typing import TYPE_CHECKING, Optional +from typing import Optional -if TYPE_CHECKING: - from zenml.models import RunMetadataResponse +from pydantic import BaseModel + +from zenml.metadata.metadata_types import MetadataType + + +class LazyRunMetadataResponse(BaseModel): + """Lazy run metadata response. + + Used if the run metadata is accessed from the model in + a pipeline context available only during pipeline compilation. + """ + + lazy_load_artifact_name: Optional[str] = None + lazy_load_artifact_version: Optional[str] = None + lazy_load_metadata_name: Optional[str] = None + lazy_load_model_name: str + lazy_load_model_version: Optional[str] = None class RunMetadataLazyGetter: @@ -47,7 +62,7 @@ def __init__( self._lazy_load_artifact_name = _lazy_load_artifact_name self._lazy_load_artifact_version = _lazy_load_artifact_version - def __getitem__(self, key: str) -> "RunMetadataResponse": + def __getitem__(self, key: str) -> MetadataType: """Get the metadata for the given key. Args: @@ -56,9 +71,7 @@ def __getitem__(self, key: str) -> "RunMetadataResponse": Returns: The metadata lazy loader wrapper for the given key. """ - from zenml.models.v2.core.run_metadata import LazyRunMetadataResponse - - return LazyRunMetadataResponse( + return LazyRunMetadataResponse( # type: ignore[return-value] lazy_load_model_name=self._lazy_load_model_name, lazy_load_model_version=self._lazy_load_model_version, lazy_load_artifact_name=self._lazy_load_artifact_name, diff --git a/src/zenml/model/model.py b/src/zenml/model/model.py index 7d37686266e..05c0045ca66 100644 --- a/src/zenml/model/model.py +++ b/src/zenml/model/model.py @@ -41,7 +41,6 @@ ModelResponse, ModelVersionResponse, PipelineRunResponse, - RunMetadataResponse, StepRunResponse, ) @@ -347,7 +346,7 @@ def log_metadata( ) @property - def run_metadata(self) -> Dict[str, "RunMetadataResponse"]: + def run_metadata(self) -> Dict[str, "MetadataType"]: """Get model version run metadata. Returns: diff --git a/src/zenml/models/__init__.py b/src/zenml/models/__init__.py index af0b016b4a2..5d6cc0c9125 100644 --- a/src/zenml/models/__init__.py +++ b/src/zenml/models/__init__.py @@ -239,12 +239,7 @@ ) from zenml.models.v2.base.base_plugin_flavor import BasePluginFlavorResponse from zenml.models.v2.core.run_metadata import ( - LazyRunMetadataResponse, RunMetadataRequest, - RunMetadataFilter, - RunMetadataResponse, - RunMetadataResponseBody, - RunMetadataResponseMetadata, ) from zenml.models.v2.core.schedule import ( ScheduleRequest, @@ -418,7 +413,6 @@ FlavorResponseBody.model_rebuild() FlavorResponseMetadata.model_rebuild() LazyArtifactVersionResponse.model_rebuild() -LazyRunMetadataResponse.model_rebuild() ModelResponseBody.model_rebuild() ModelResponseMetadata.model_rebuild() ModelVersionResponseBody.model_rebuild() @@ -444,8 +438,6 @@ RunTemplateResponseMetadata.model_rebuild() RunTemplateResponseResources.model_rebuild() RunTemplateResponseBody.model_rebuild() -RunMetadataResponseBody.model_rebuild() -RunMetadataResponseMetadata.model_rebuild() ScheduleResponseBody.model_rebuild() ScheduleResponseMetadata.model_rebuild() SecretResponseBody.model_rebuild() @@ -637,10 +629,6 @@ "RunTemplateResponseResources", "RunTemplateFilter", "RunMetadataRequest", - "RunMetadataFilter", - "RunMetadataResponse", - "RunMetadataResponseBody", - "RunMetadataResponseMetadata", "ScheduleRequest", "ScheduleUpdate", "ScheduleFilter", diff --git a/src/zenml/models/v2/core/artifact_version.py b/src/zenml/models/v2/core/artifact_version.py index 69c9fca45ec..b5d80e32990 100644 --- a/src/zenml/models/v2/core/artifact_version.py +++ b/src/zenml/models/v2/core/artifact_version.py @@ -57,9 +57,6 @@ ArtifactVisualizationResponse, ) from zenml.models.v2.core.pipeline_run import PipelineRunResponse - from zenml.models.v2.core.run_metadata import ( - RunMetadataResponse, - ) from zenml.models.v2.core.step_run import StepRunResponse logger = get_logger(__name__) @@ -236,7 +233,7 @@ class ArtifactVersionResponseMetadata(WorkspaceScopedResponseMetadata): visualizations: Optional[List["ArtifactVisualizationResponse"]] = Field( default=None, title="Visualizations of the artifact." ) - run_metadata: Dict[str, "RunMetadataResponse"] = Field( + run_metadata: Dict[str, MetadataType] = Field( default={}, title="Metadata of the artifact." ) @@ -358,7 +355,7 @@ def visualizations( return self.get_metadata().visualizations @property - def run_metadata(self) -> Dict[str, "RunMetadataResponse"]: + def run_metadata(self) -> Dict[str, MetadataType]: """The `metadata` property. Returns: @@ -686,7 +683,7 @@ def get_metadata(self) -> None: # type: ignore[override] ) @property - def run_metadata(self) -> Dict[str, "RunMetadataResponse"]: + def run_metadata(self) -> Dict[str, MetadataType]: """The `metadata` property in lazy loading mode. Returns: diff --git a/src/zenml/models/v2/core/model_version.py b/src/zenml/models/v2/core/model_version.py index 48080a556e7..dbc0a0f214f 100644 --- a/src/zenml/models/v2/core/model_version.py +++ b/src/zenml/models/v2/core/model_version.py @@ -29,6 +29,7 @@ from zenml.constants import STR_FIELD_MAX_LENGTH, TEXT_FIELD_MAX_LENGTH from zenml.enums import ModelStages +from zenml.metadata.metadata_types import MetadataType from zenml.models.v2.base.filter import AnyQuery from zenml.models.v2.base.page import Page from zenml.models.v2.base.scoped import ( @@ -49,9 +50,6 @@ from zenml.models.v2.core.artifact_version import ArtifactVersionResponse from zenml.models.v2.core.model import ModelResponse from zenml.models.v2.core.pipeline_run import PipelineRunResponse - from zenml.models.v2.core.run_metadata import ( - RunMetadataResponse, - ) from zenml.zen_stores.schemas import BaseSchema AnySchema = TypeVar("AnySchema", bound=BaseSchema) @@ -193,7 +191,7 @@ class ModelVersionResponseMetadata(WorkspaceScopedResponseMetadata): max_length=TEXT_FIELD_MAX_LENGTH, default=None, ) - run_metadata: Dict[str, "RunMetadataResponse"] = Field( + run_metadata: Dict[str, MetadataType] = Field( description="Metadata linked to the model version", default={}, ) @@ -304,7 +302,7 @@ def description(self) -> Optional[str]: return self.get_metadata().description @property - def run_metadata(self) -> Optional[Dict[str, "RunMetadataResponse"]]: + def run_metadata(self) -> Dict[str, MetadataType]: """The `run_metadata` property. Returns: diff --git a/src/zenml/models/v2/core/pipeline_run.py b/src/zenml/models/v2/core/pipeline_run.py index 2dd4f463d09..26f517acdd3 100644 --- a/src/zenml/models/v2/core/pipeline_run.py +++ b/src/zenml/models/v2/core/pipeline_run.py @@ -30,6 +30,7 @@ from zenml.config.pipeline_configurations import PipelineConfiguration from zenml.constants import STR_FIELD_MAX_LENGTH from zenml.enums import ExecutionStatus +from zenml.metadata.metadata_types import MetadataType from zenml.models.v2.base.scoped import ( WorkspaceScopedRequest, WorkspaceScopedResponse, @@ -51,9 +52,6 @@ from zenml.models.v2.core.pipeline_build import ( PipelineBuildResponse, ) - from zenml.models.v2.core.run_metadata import ( - RunMetadataResponse, - ) from zenml.models.v2.core.schedule import ScheduleResponse from zenml.models.v2.core.stack import StackResponse from zenml.models.v2.core.step_run import StepRunResponse @@ -190,7 +188,7 @@ class PipelineRunResponseBody(WorkspaceScopedResponseBody): class PipelineRunResponseMetadata(WorkspaceScopedResponseMetadata): """Response metadata for pipeline runs.""" - run_metadata: Dict[str, "RunMetadataResponse"] = Field( + run_metadata: Dict[str, MetadataType] = Field( default={}, title="Metadata associated with this pipeline run.", ) @@ -450,7 +448,7 @@ def model_version_id(self) -> Optional[UUID]: return self.get_body().model_version_id @property - def run_metadata(self) -> Dict[str, "RunMetadataResponse"]: + def run_metadata(self) -> Dict[str, MetadataType]: """The `run_metadata` property. Returns: diff --git a/src/zenml/models/v2/core/run_metadata.py b/src/zenml/models/v2/core/run_metadata.py index 99b706a529b..c4a2ef8e678 100644 --- a/src/zenml/models/v2/core/run_metadata.py +++ b/src/zenml/models/v2/core/run_metadata.py @@ -13,21 +13,15 @@ # permissions and limitations under the License. """Models representing run metadata.""" -from typing import Any, Dict, Optional, Union +from typing import Dict, Optional from uuid import UUID -from pydantic import Field, field_validator +from pydantic import Field -from zenml.constants import STR_FIELD_MAX_LENGTH, TEXT_FIELD_MAX_LENGTH from zenml.enums import MetadataResourceTypes from zenml.metadata.metadata_types import MetadataType, MetadataTypeEnum from zenml.models.v2.base.scoped import ( - WorkspaceScopedFilter, WorkspaceScopedRequest, - WorkspaceScopedResponse, - WorkspaceScopedResponseBody, - WorkspaceScopedResponseMetadata, - WorkspaceScopedResponseResources, ) # ------------------ Request Model ------------------ @@ -51,212 +45,3 @@ class RunMetadataRequest(WorkspaceScopedRequest): types: Dict[str, "MetadataTypeEnum"] = Field( title="The types of the metadata to be created.", ) - - -# ------------------ Update Model ------------------ - -# There is no update model for run metadata. - -# ------------------ Response Model ------------------ - - -class RunMetadataResponseBody(WorkspaceScopedResponseBody): - """Response body for run metadata.""" - - key: str = Field(title="The key of the metadata.") - value: MetadataType = Field( - title="The value of the metadata.", union_mode="smart" - ) - type: MetadataTypeEnum = Field(title="The type of the metadata.") - - @field_validator("key", "type") - @classmethod - def str_field_max_length_check(cls, value: Any) -> Any: - """Checks if the length of the value exceeds the maximum str length. - - Args: - value: the value set in the field - - Returns: - the value itself. - - Raises: - AssertionError: if the length of the field is longer than the - maximum threshold. - """ - assert len(str(value)) < STR_FIELD_MAX_LENGTH, ( - "The length of the value for this field can not " - f"exceed {STR_FIELD_MAX_LENGTH}" - ) - return value - - @field_validator("value") - @classmethod - def text_field_max_length_check(cls, value: Any) -> Any: - """Checks if the length of the value exceeds the maximum text length. - - Args: - value: the value set in the field - - Returns: - the value itself. - - Raises: - AssertionError: if the length of the field is longer than the - maximum threshold. - """ - assert len(str(value)) < TEXT_FIELD_MAX_LENGTH, ( - "The length of the value for this field can not " - f"exceed {TEXT_FIELD_MAX_LENGTH}" - ) - return value - - -class RunMetadataResponseMetadata(WorkspaceScopedResponseMetadata): - """Response metadata for run metadata.""" - - resource_id: UUID = Field( - title="The ID of the resource that this metadata belongs to.", - ) - resource_type: MetadataResourceTypes = Field( - title="The type of the resource that this metadata belongs to.", - ) - stack_component_id: Optional[UUID] = Field( - title="The ID of the stack component that this metadata belongs to." - ) - - -class RunMetadataResponseResources(WorkspaceScopedResponseResources): - """Class for all resource models associated with the run metadata entity.""" - - -class RunMetadataResponse( - WorkspaceScopedResponse[ - RunMetadataResponseBody, - RunMetadataResponseMetadata, - RunMetadataResponseResources, - ] -): - """Response model for run metadata.""" - - def get_hydrated_version(self) -> "RunMetadataResponse": - """Get the hydrated version of this run metadata. - - Returns: - an instance of the same entity with the metadata field attached. - """ - from zenml.client import Client - - return Client().zen_store.get_run_metadata(self.id) - - # Body and metadata properties - @property - def key(self) -> str: - """The `key` property. - - Returns: - the value of the property. - """ - return self.get_body().key - - @property - def value(self) -> MetadataType: - """The `value` property. - - Returns: - the value of the property. - """ - return self.get_body().value - - @property - def type(self) -> MetadataTypeEnum: - """The `type` property. - - Returns: - the value of the property. - """ - return self.get_body().type - - @property - def resource_id(self) -> UUID: - """The `resource_id` property. - - Returns: - the value of the property. - """ - return self.get_metadata().resource_id - - @property - def resource_type(self) -> MetadataResourceTypes: - """The `resource_type` property. - - Returns: - the value of the property. - """ - return MetadataResourceTypes(self.get_metadata().resource_type) - - @property - def stack_component_id(self) -> Optional[UUID]: - """The `stack_component_id` property. - - Returns: - the value of the property. - """ - return self.get_metadata().stack_component_id - - -# ------------------ Filter Model ------------------ - - -class RunMetadataFilter(WorkspaceScopedFilter): - """Model to enable advanced filtering of run metadata.""" - - resource_id: Optional[Union[str, UUID]] = Field( - default=None, union_mode="left_to_right" - ) - resource_type: Optional[MetadataResourceTypes] = None - stack_component_id: Optional[Union[str, UUID]] = Field( - default=None, union_mode="left_to_right" - ) - key: Optional[str] = None - type: Optional[Union[str, MetadataTypeEnum]] = Field( - default=None, union_mode="left_to_right" - ) - - -# -------------------- Lazy Loader -------------------- - - -class LazyRunMetadataResponse(RunMetadataResponse): - """Lazy run metadata response. - - Used if the run metadata is accessed from the model in - a pipeline context available only during pipeline compilation. - """ - - id: Optional[UUID] = None # type: ignore[assignment] - lazy_load_artifact_name: Optional[str] = None - lazy_load_artifact_version: Optional[str] = None - lazy_load_metadata_name: Optional[str] = None - lazy_load_model_name: str - lazy_load_model_version: Optional[str] = None - - def get_body(self) -> None: # type: ignore[override] - """Protects from misuse of the lazy loader. - - Raises: - RuntimeError: always - """ - raise RuntimeError( - "Cannot access run metadata body before pipeline runs." - ) - - def get_metadata(self) -> None: # type: ignore[override] - """Protects from misuse of the lazy loader. - - Raises: - RuntimeError: always - """ - raise RuntimeError( - "Cannot access run metadata metadata before pipeline runs." - ) diff --git a/src/zenml/models/v2/core/step_run.py b/src/zenml/models/v2/core/step_run.py index f7fbd5dcbe9..f4103f433d3 100644 --- a/src/zenml/models/v2/core/step_run.py +++ b/src/zenml/models/v2/core/step_run.py @@ -21,7 +21,8 @@ from zenml.config.step_configurations import StepConfiguration, StepSpec from zenml.constants import STR_FIELD_MAX_LENGTH, TEXT_FIELD_MAX_LENGTH -from zenml.enums import ExecutionStatus +from zenml.enums import ExecutionStatus, StepRunInputArtifactType +from zenml.metadata.metadata_types import MetadataType from zenml.models.v2.base.scoped import ( WorkspaceScopedFilter, WorkspaceScopedRequest, @@ -30,19 +31,35 @@ WorkspaceScopedResponseMetadata, WorkspaceScopedResponseResources, ) +from zenml.models.v2.core.artifact_version import ArtifactVersionResponse from zenml.models.v2.core.model_version import ModelVersionResponse if TYPE_CHECKING: from sqlalchemy.sql.elements import ColumnElement - from zenml.models.v2.core.artifact_version import ArtifactVersionResponse from zenml.models.v2.core.logs import ( LogsRequest, LogsResponse, ) - from zenml.models.v2.core.run_metadata import ( - RunMetadataResponse, - ) + + +class StepRunInputResponse(ArtifactVersionResponse): + """Response model for step run inputs.""" + + input_type: StepRunInputArtifactType + + def get_hydrated_version(self) -> "StepRunInputResponse": + """Get the hydrated version of this step run input. + + Returns: + an instance of the same entity with the metadata field attached. + """ + from zenml.client import Client + + return StepRunInputResponse( + input_type=self.input_type, + **Client().zen_store.get_artifact_version(self.id).model_dump(), + ) # ------------------ Request Model ------------------ @@ -162,11 +179,11 @@ class StepRunResponseBody(WorkspaceScopedResponseBody): title="The end time of the step run.", default=None, ) - inputs: Dict[str, "ArtifactVersionResponse"] = Field( + inputs: Dict[str, StepRunInputResponse] = Field( title="The input artifact versions of the step run.", default_factory=dict, ) - outputs: Dict[str, List["ArtifactVersionResponse"]] = Field( + outputs: Dict[str, List[ArtifactVersionResponse]] = Field( title="The output artifact versions of the step run.", default_factory=dict, ) @@ -226,7 +243,7 @@ class StepRunResponseMetadata(WorkspaceScopedResponseMetadata): title="The IDs of the parent steps of this step run.", default_factory=list, ) - run_metadata: Dict[str, "RunMetadataResponse"] = Field( + run_metadata: Dict[str, MetadataType] = Field( title="Metadata associated with this step run.", default={}, ) @@ -270,7 +287,7 @@ def get_hydrated_version(self) -> "StepRunResponse": # Helper properties @property - def input(self) -> "ArtifactVersionResponse": + def input(self) -> ArtifactVersionResponse: """Returns the input artifact that was used to run this step. Returns: @@ -289,7 +306,7 @@ def input(self) -> "ArtifactVersionResponse": return next(iter(self.inputs.values())) @property - def output(self) -> "ArtifactVersionResponse": + def output(self) -> ArtifactVersionResponse: """Returns the output artifact that was written by this step. Returns: @@ -321,7 +338,7 @@ def status(self) -> ExecutionStatus: return self.get_body().status @property - def inputs(self) -> Dict[str, "ArtifactVersionResponse"]: + def inputs(self) -> Dict[str, StepRunInputResponse]: """The `inputs` property. Returns: @@ -330,7 +347,7 @@ def inputs(self) -> Dict[str, "ArtifactVersionResponse"]: return self.get_body().inputs @property - def outputs(self) -> Dict[str, List["ArtifactVersionResponse"]]: + def outputs(self) -> Dict[str, List[ArtifactVersionResponse]]: """The `outputs` property. Returns: @@ -465,7 +482,7 @@ def parent_step_ids(self) -> List[UUID]: return self.get_metadata().parent_step_ids @property - def run_metadata(self) -> Dict[str, "RunMetadataResponse"]: + def run_metadata(self) -> Dict[str, MetadataType]: """The `run_metadata` property. Returns: diff --git a/src/zenml/orchestrators/input_utils.py b/src/zenml/orchestrators/input_utils.py index fdabb0d1fd1..0094bdf84f1 100644 --- a/src/zenml/orchestrators/input_utils.py +++ b/src/zenml/orchestrators/input_utils.py @@ -18,18 +18,19 @@ from zenml.client import Client from zenml.config.step_configurations import Step -from zenml.enums import ArtifactSaveType +from zenml.enums import ArtifactSaveType, StepRunInputArtifactType from zenml.exceptions import InputResolutionError from zenml.utils import pagination_utils if TYPE_CHECKING: - from zenml.models import ArtifactVersionResponse, PipelineRunResponse + from zenml.models import PipelineRunResponse + from zenml.models.v2.core.step_run import StepRunInputResponse def resolve_step_inputs( step: "Step", pipeline_run: "PipelineRunResponse", -) -> Tuple[Dict[str, "ArtifactVersionResponse"], List[UUID]]: +) -> Tuple[Dict[str, "StepRunInputResponse"], List[UUID]]: """Resolves inputs for the current step. Args: @@ -46,7 +47,8 @@ def resolve_step_inputs( The IDs of the input artifact versions and the IDs of parent steps of the current step. """ - from zenml.models import ArtifactVersionResponse, RunMetadataResponse + from zenml.models import ArtifactVersionResponse + from zenml.models.v2.core.step_run import StepRunInputResponse current_run_steps = { run_step.name: run_step @@ -55,7 +57,7 @@ def resolve_step_inputs( ) } - input_artifacts: Dict[str, "ArtifactVersionResponse"] = {} + input_artifacts: Dict[str, StepRunInputResponse] = {} for name, input_ in step.spec.inputs.items(): try: step_run = current_run_steps[input_.step_name] @@ -90,15 +92,19 @@ def resolve_step_inputs( f"`{input_.step_name}`." ) - input_artifacts[name] = step_outputs[0] + input_artifacts[name] = StepRunInputResponse( + input_type=StepRunInputArtifactType.STEP_OUTPUT, + **step_outputs[0].model_dump(), + ) for ( name, external_artifact, ) in step.config.external_input_artifacts.items(): artifact_version_id = external_artifact.get_artifact_version_id() - input_artifacts[name] = Client().get_artifact_version( - artifact_version_id + input_artifacts[name] = StepRunInputResponse( + input_type=StepRunInputArtifactType.EXTERNAL, + **Client().get_artifact_version(artifact_version_id).model_dump(), ) for name, config_ in step.config.model_artifacts_or_metadata.items(): @@ -117,9 +123,7 @@ def resolve_step_inputs( ): # metadata values should go directly in parameters, as primitive types step.config.parameters[name] = ( - context_model_version.run_metadata[ - config_.metadata_name - ].value + context_model_version.run_metadata[config_.metadata_name] ) elif config_.artifact_name is None: err_msg = ( @@ -131,14 +135,15 @@ def resolve_step_inputs( config_.artifact_name, config_.artifact_version ): if config_.metadata_name is None: - input_artifacts[name] = artifact_ + input_artifacts[name] = StepRunInputResponse( + input_type=StepRunInputArtifactType.LAZY_LOADED, + **artifact_.model_dump(), + ) elif config_.metadata_name: # metadata values should go directly in parameters, as primitive types try: step.config.parameters[name] = ( - artifact_.run_metadata[ - config_.metadata_name - ].value + artifact_.run_metadata[config_.metadata_name] ) except KeyError: err_msg = ( @@ -160,9 +165,10 @@ def resolve_step_inputs( for name, cll_ in step.config.client_lazy_loaders.items(): value_ = cll_.evaluate() if isinstance(value_, ArtifactVersionResponse): - input_artifacts[name] = value_ - elif isinstance(value_, RunMetadataResponse): - step.config.parameters[name] = value_.value + input_artifacts[name] = StepRunInputResponse( + input_type=StepRunInputArtifactType.LAZY_LOADED, + **value_.model_dump(), + ) else: step.config.parameters[name] = value_ diff --git a/src/zenml/orchestrators/step_launcher.py b/src/zenml/orchestrators/step_launcher.py index 28523a7f325..84b0450672b 100644 --- a/src/zenml/orchestrators/step_launcher.py +++ b/src/zenml/orchestrators/step_launcher.py @@ -33,13 +33,13 @@ from zenml.logger import get_logger from zenml.logging import step_logging from zenml.models import ( - ArtifactVersionResponse, LogsRequest, PipelineDeploymentResponse, PipelineRunRequest, PipelineRunResponse, StepRunResponse, ) +from zenml.models.v2.core.step_run import StepRunInputResponse from zenml.orchestrators import output_utils, publish_utils, step_run_utils from zenml.orchestrators import utils as orchestrator_utils from zenml.orchestrators.step_runner import StepRunner @@ -442,7 +442,7 @@ def _run_step_without_step_operator( pipeline_run: PipelineRunResponse, step_run: StepRunResponse, step_run_info: StepRunInfo, - input_artifacts: Dict[str, ArtifactVersionResponse], + input_artifacts: Dict[str, StepRunInputResponse], output_artifact_uris: Dict[str, str], last_retry: bool, ) -> None: diff --git a/src/zenml/orchestrators/step_run_utils.py b/src/zenml/orchestrators/step_run_utils.py index 7d3f3cb6bbf..b5b63693916 100644 --- a/src/zenml/orchestrators/step_run_utils.py +++ b/src/zenml/orchestrators/step_run_utils.py @@ -104,6 +104,7 @@ def populate_request(self, request: StepRunRequest) -> None: input_name: artifact.id for input_name, artifact in input_artifacts.items() } + request.inputs = input_artifact_ids request.parent_step_ids = parent_step_ids diff --git a/src/zenml/orchestrators/step_runner.py b/src/zenml/orchestrators/step_runner.py index 7588dd6ef5c..c11c79c878f 100644 --- a/src/zenml/orchestrators/step_runner.py +++ b/src/zenml/orchestrators/step_runner.py @@ -42,6 +42,7 @@ from zenml.logger import get_logger from zenml.logging.step_logging import StepLogsStorageContext, redirected from zenml.materializers.base_materializer import BaseMaterializer +from zenml.models.v2.core.step_run import StepRunInputResponse from zenml.orchestrators.publish_utils import ( publish_step_run_metadata, publish_successful_step_run, @@ -100,7 +101,7 @@ def run( self, pipeline_run: "PipelineRunResponse", step_run: "StepRunResponse", - input_artifacts: Dict[str, "ArtifactVersionResponse"], + input_artifacts: Dict[str, StepRunInputResponse], output_artifact_uris: Dict[str, str], step_run_info: StepRunInfo, ) -> None: @@ -306,7 +307,7 @@ def _parse_inputs( self, args: List[str], annotations: Dict[str, Any], - input_artifacts: Dict[str, "ArtifactVersionResponse"], + input_artifacts: Dict[str, StepRunInputResponse], ) -> Dict[str, Any]: """Parses the inputs for a step entrypoint function. diff --git a/src/zenml/steps/base_step.py b/src/zenml/steps/base_step.py index 982b16e2529..b8ba79315ea 100644 --- a/src/zenml/steps/base_step.py +++ b/src/zenml/steps/base_step.py @@ -327,12 +327,12 @@ def _parse_call_args( The artifacts, external artifacts, model version artifacts/metadata and parameters for the step. """ from zenml.artifacts.external_artifact import ExternalArtifact + from zenml.metadata.lazy_load import LazyRunMetadataResponse from zenml.model.lazy_load import ModelVersionDataLazyLoader from zenml.models.v2.core.artifact_version import ( ArtifactVersionResponse, LazyArtifactVersionResponse, ) - from zenml.models.v2.core.run_metadata import LazyRunMetadataResponse signature = inspect.signature(self.entrypoint, follow_wrapped=True) diff --git a/src/zenml/steps/entrypoint_function_utils.py b/src/zenml/steps/entrypoint_function_utils.py index a91f87131a7..9f87ea826b7 100644 --- a/src/zenml/steps/entrypoint_function_utils.py +++ b/src/zenml/steps/entrypoint_function_utils.py @@ -32,6 +32,7 @@ from zenml.exceptions import StepInterfaceError from zenml.logger import get_logger from zenml.materializers.base_materializer import BaseMaterializer +from zenml.metadata.lazy_load import LazyRunMetadataResponse from zenml.steps.utils import ( OutputSignature, parse_return_type_annotations, @@ -136,10 +137,7 @@ def validate_input(self, key: str, value: Any) -> None: UnmaterializedArtifact, ) from zenml.client_lazy_loader import ClientLazyLoader - from zenml.models import ( - ArtifactVersionResponse, - RunMetadataResponse, - ) + from zenml.models import ArtifactVersionResponse if key not in self.inputs: raise KeyError( @@ -154,8 +152,8 @@ def validate_input(self, key: str, value: Any) -> None: StepArtifact, ExternalArtifact, ArtifactVersionResponse, - RunMetadataResponse, ClientLazyLoader, + LazyRunMetadataResponse, ), ): # If we were to do any type validation for artifacts here, we diff --git a/src/zenml/steps/step_context.py b/src/zenml/steps/step_context.py index 038ae3558fd..aefccff8521 100644 --- a/src/zenml/steps/step_context.py +++ b/src/zenml/steps/step_context.py @@ -35,11 +35,12 @@ from zenml.metadata.metadata_types import MetadataType from zenml.model.model import Model from zenml.models import ( - ArtifactVersionResponse, PipelineResponse, PipelineRunResponse, StepRunResponse, ) + from zenml.models.v2.core.step_run import StepRunInputResponse + logger = get_logger(__name__) @@ -191,7 +192,7 @@ def model(self) -> "Model": return self.model_version.to_model_class() @property - def inputs(self) -> Dict[str, "ArtifactVersionResponse"]: + def inputs(self) -> Dict[str, "StepRunInputResponse"]: """Returns the input artifacts of the current step. Returns: diff --git a/src/zenml/zen_server/rbac/utils.py b/src/zenml/zen_server/rbac/utils.py index 2dd0b2ef339..9e00e1a740e 100644 --- a/src/zenml/zen_server/rbac/utils.py +++ b/src/zenml/zen_server/rbac/utils.py @@ -404,7 +404,6 @@ def get_resource_type_for_model( PipelineDeploymentResponse, PipelineResponse, PipelineRunResponse, - RunMetadataResponse, RunTemplateResponse, SecretResponse, ServiceAccountResponse, @@ -437,7 +436,6 @@ def get_resource_type_for_model( ArtifactVersionResponse: ResourceType.ARTIFACT_VERSION, WorkspaceResponse: ResourceType.WORKSPACE, UserResponse: ResourceType.USER, - RunMetadataResponse: ResourceType.RUN_METADATA, PipelineDeploymentResponse: ResourceType.PIPELINE_DEPLOYMENT, PipelineBuildResponse: ResourceType.PIPELINE_BUILD, PipelineRunResponse: ResourceType.PIPELINE_RUN, diff --git a/src/zenml/zen_server/routers/run_metadata_endpoints.py b/src/zenml/zen_server/routers/run_metadata_endpoints.py deleted file mode 100644 index c3d97da7a0b..00000000000 --- a/src/zenml/zen_server/routers/run_metadata_endpoints.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) ZenML GmbH 2022. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Endpoint definitions for run metadata.""" - -from uuid import UUID - -from fastapi import APIRouter, Depends, Security - -from zenml.constants import API, RUN_METADATA, VERSION_1 -from zenml.models import Page, RunMetadataFilter, RunMetadataResponse -from zenml.zen_server.auth import AuthContext, authorize -from zenml.zen_server.exceptions import error_response -from zenml.zen_server.rbac.endpoint_utils import ( - verify_permissions_and_list_entities, -) -from zenml.zen_server.rbac.models import ResourceType -from zenml.zen_server.utils import ( - handle_exceptions, - make_dependable, - zen_store, -) - -router = APIRouter( - prefix=API + VERSION_1 + RUN_METADATA, - tags=["run_metadata"], - responses={401: error_response, 403: error_response}, -) - - -@router.get( - "", - response_model=Page[RunMetadataResponse], - responses={401: error_response, 404: error_response, 422: error_response}, -) -@handle_exceptions -def list_run_metadata( - run_metadata_filter_model: RunMetadataFilter = Depends( - make_dependable(RunMetadataFilter) - ), - hydrate: bool = False, - _: AuthContext = Security(authorize), -) -> Page[RunMetadataResponse]: - """Get run metadata according to query filters. - - Args: - run_metadata_filter_model: Filter model used for pagination, sorting, - filtering. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The pipeline runs according to query filters. - """ - return verify_permissions_and_list_entities( - filter_model=run_metadata_filter_model, - resource_type=ResourceType.RUN_METADATA, - list_method=zen_store().list_run_metadata, - hydrate=hydrate, - ) - - -@router.get( - "/{run_metadata_id}", - response_model=RunMetadataResponse, - responses={401: error_response, 404: error_response, 422: error_response}, -) -@handle_exceptions -def get_run_metadata( - run_metadata_id: UUID, - hydrate: bool = False, - _: AuthContext = Security(authorize), -) -> RunMetadataResponse: - """Get run metadata by ID. - - Args: - run_metadata_id: The ID of run metadata. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata response. - """ - return zen_store().get_run_metadata( - run_metadata_id=run_metadata_id, hydrate=hydrate - ) diff --git a/src/zenml/zen_server/routers/workspaces_endpoints.py b/src/zenml/zen_server/routers/workspaces_endpoints.py index 7b9782e87c3..dd33ebee869 100644 --- a/src/zenml/zen_server/routers/workspaces_endpoints.py +++ b/src/zenml/zen_server/routers/workspaces_endpoints.py @@ -74,7 +74,6 @@ PipelineRunRequest, PipelineRunResponse, RunMetadataRequest, - RunMetadataResponse, RunTemplateFilter, RunTemplateRequest, RunTemplateResponse, @@ -977,7 +976,6 @@ def get_or_create_pipeline_run( @router.post( WORKSPACES + "/{workspace_name_or_id}" + RUN_METADATA, - response_model=List[RunMetadataResponse], responses={401: error_response, 409: error_response, 422: error_response}, ) @handle_exceptions @@ -985,7 +983,7 @@ def create_run_metadata( workspace_name_or_id: Union[str, UUID], run_metadata: RunMetadataRequest, auth_context: AuthContext = Security(authorize), -) -> List[RunMetadataResponse]: +) -> None: """Creates run metadata. Args: @@ -1039,7 +1037,8 @@ def create_run_metadata( resource_type=ResourceType.RUN_METADATA, action=Action.CREATE ) - return zen_store().create_run_metadata(run_metadata) + zen_store().create_run_metadata(run_metadata) + return None @router.post( diff --git a/src/zenml/zen_server/zen_server_api.py b/src/zenml/zen_server/zen_server_api.py index 12c7cf63c8d..7ae81846c9c 100644 --- a/src/zenml/zen_server/zen_server_api.py +++ b/src/zenml/zen_server/zen_server_api.py @@ -70,7 +70,6 @@ pipeline_deployments_endpoints, pipelines_endpoints, plugin_endpoints, - run_metadata_endpoints, run_templates_endpoints, runs_endpoints, schedule_endpoints, @@ -423,7 +422,6 @@ async def dashboard(request: Request) -> Any: app.include_router(pipeline_builds_endpoints.router) app.include_router(pipeline_deployments_endpoints.router) app.include_router(runs_endpoints.router) -app.include_router(run_metadata_endpoints.router) app.include_router(run_templates_endpoints.router) app.include_router(schedule_endpoints.router) app.include_router(secrets_endpoints.router) diff --git a/src/zenml/zen_stores/migrations/versions/1cb6477f72d6_move_artifact_save_type.py b/src/zenml/zen_stores/migrations/versions/1cb6477f72d6_move_artifact_save_type.py index aa86ad6f841..ff14523cdb5 100644 --- a/src/zenml/zen_stores/migrations/versions/1cb6477f72d6_move_artifact_save_type.py +++ b/src/zenml/zen_stores/migrations/versions/1cb6477f72d6_move_artifact_save_type.py @@ -26,9 +26,10 @@ def upgrade() -> None: op.execute(""" UPDATE artifact_version SET save_type = ( - SELECT step_run_output_artifact.type + SELECT max(step_run_output_artifact.type) FROM step_run_output_artifact WHERE step_run_output_artifact.artifact_id = artifact_version.id + GROUP BY artifact_id ) """) op.execute(""" @@ -71,9 +72,10 @@ def downgrade() -> None: op.execute(""" UPDATE step_run_output_artifact SET type = ( - SELECT artifact_version.save_type + SELECT max(artifact_version.save_type) FROM artifact_version WHERE step_run_output_artifact.artifact_id = artifact_version.id + GROUP BY artifact_id ) """) op.execute(""" diff --git a/src/zenml/zen_stores/migrations/versions/b557b2871693_update_step_run_input_types.py b/src/zenml/zen_stores/migrations/versions/b557b2871693_update_step_run_input_types.py new file mode 100644 index 00000000000..cf397f57d91 --- /dev/null +++ b/src/zenml/zen_stores/migrations/versions/b557b2871693_update_step_run_input_types.py @@ -0,0 +1,33 @@ +"""Update step run input types [b557b2871693]. + +Revision ID: b557b2871693 +Revises: 1cb6477f72d6 +Create Date: 2024-10-30 13:06:55.147202 + +""" + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "b557b2871693" +down_revision = "1cb6477f72d6" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade database schema and/or data, creating a new revision.""" + op.execute(""" + UPDATE step_run_input_artifact + SET type = 'step_output' + WHERE type = 'default' + """) + + +def downgrade() -> None: + """Downgrade database schema and/or data back to the previous revision.""" + op.execute(""" + UPDATE step_run_input_artifact + SET type = 'default' + WHERE type = 'step_output' + """) diff --git a/src/zenml/zen_stores/rest_zen_store.py b/src/zenml/zen_stores/rest_zen_store.py index 0b4836c3f7c..b8288343256 100644 --- a/src/zenml/zen_stores/rest_zen_store.py +++ b/src/zenml/zen_stores/rest_zen_store.py @@ -201,9 +201,7 @@ PipelineRunResponse, PipelineRunUpdate, PipelineUpdate, - RunMetadataFilter, RunMetadataRequest, - RunMetadataResponse, RunTemplateFilter, RunTemplateRequest, RunTemplateResponse, @@ -2014,9 +2012,7 @@ def get_or_create_run( # ----------------------------- Run Metadata ----------------------------- - def create_run_metadata( - self, run_metadata: RunMetadataRequest - ) -> List[RunMetadataResponse]: + def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None: """Creates run metadata. Args: @@ -2026,55 +2022,8 @@ def create_run_metadata( The created run metadata. """ route = f"{WORKSPACES}/{str(run_metadata.workspace)}{RUN_METADATA}" - response_body = self.post(f"{route}", body=run_metadata) - result: List[RunMetadataResponse] = [] - if isinstance(response_body, list): - for metadata in response_body or []: - result.append(RunMetadataResponse.model_validate(metadata)) - return result - - def get_run_metadata( - self, run_metadata_id: UUID, hydrate: bool = True - ) -> RunMetadataResponse: - """Gets run metadata with the given ID. - - Args: - run_metadata_id: The ID of the run metadata to get. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata. - """ - return self._get_resource( - resource_id=run_metadata_id, - route=RUN_METADATA, - response_model=RunMetadataResponse, - params={"hydrate": hydrate}, - ) - - def list_run_metadata( - self, - run_metadata_filter_model: RunMetadataFilter, - hydrate: bool = False, - ) -> Page[RunMetadataResponse]: - """List run metadata. - - Args: - run_metadata_filter_model: All filter parameters including - pagination params. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata. - """ - return self._list_paginated_resources( - route=RUN_METADATA, - response_model=RunMetadataResponse, - filter_model=run_metadata_filter_model, - params={"hydrate": hydrate}, - ) + self.post(f"{route}", body=run_metadata) + return None # ----------------------------- Schedules ----------------------------- diff --git a/src/zenml/zen_stores/schemas/artifact_schemas.py b/src/zenml/zen_stores/schemas/artifact_schemas.py index b8ac03b2f17..8b08e51b562 100644 --- a/src/zenml/zen_stores/schemas/artifact_schemas.py +++ b/src/zenml/zen_stores/schemas/artifact_schemas.py @@ -13,6 +13,7 @@ # permissions and limitations under the License. """SQLModel implementation of artifact table.""" +import json from datetime import datetime from typing import TYPE_CHECKING, Any, List, Optional from uuid import UUID @@ -374,7 +375,9 @@ def to_model( workspace=self.workspace.to_model(), producer_step_run_id=producer_step_run_id, visualizations=[v.to_model() for v in self.visualizations], - run_metadata={m.key: m.to_model() for m in self.run_metadata}, + run_metadata={ + m.key: json.loads(m.value) for m in self.run_metadata + }, ) resources = None diff --git a/src/zenml/zen_stores/schemas/model_schemas.py b/src/zenml/zen_stores/schemas/model_schemas.py index 96727ed1f47..37cec2c5513 100644 --- a/src/zenml/zen_stores/schemas/model_schemas.py +++ b/src/zenml/zen_stores/schemas/model_schemas.py @@ -13,6 +13,7 @@ # permissions and limitations under the License. """SQLModel implementation of model tables.""" +import json from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast from uuid import UUID @@ -403,8 +404,7 @@ def to_model( workspace=self.workspace.to_model(), description=self.description, run_metadata={ - rm.key: rm.to_model(include_metadata=True) - for rm in self.run_metadata + rm.key: json.loads(rm.value) for rm in self.run_metadata }, ) diff --git a/src/zenml/zen_stores/schemas/pipeline_run_schemas.py b/src/zenml/zen_stores/schemas/pipeline_run_schemas.py index 389700fb9ac..6028451acf2 100644 --- a/src/zenml/zen_stores/schemas/pipeline_run_schemas.py +++ b/src/zenml/zen_stores/schemas/pipeline_run_schemas.py @@ -276,7 +276,7 @@ def to_model( ) run_metadata = { - metadata_schema.key: metadata_schema.to_model() + metadata_schema.key: json.loads(metadata_schema.value) for metadata_schema in self.run_metadata } diff --git a/src/zenml/zen_stores/schemas/run_metadata_schemas.py b/src/zenml/zen_stores/schemas/run_metadata_schemas.py index f84e210d97d..18d203111c7 100644 --- a/src/zenml/zen_stores/schemas/run_metadata_schemas.py +++ b/src/zenml/zen_stores/schemas/run_metadata_schemas.py @@ -13,20 +13,13 @@ # permissions and limitations under the License. """SQLModel implementation of pipeline run metadata tables.""" -import json -from typing import TYPE_CHECKING, Any, List, Optional +from typing import TYPE_CHECKING, List, Optional from uuid import UUID from sqlalchemy import TEXT, VARCHAR, Column from sqlmodel import Field, Relationship from zenml.enums import MetadataResourceTypes -from zenml.metadata.metadata_types import MetadataTypeEnum -from zenml.models import ( - RunMetadataResponse, - RunMetadataResponseBody, - RunMetadataResponseMetadata, -) from zenml.zen_stores.schemas.base_schemas import BaseSchema from zenml.zen_stores.schemas.component_schemas import StackComponentSchema from zenml.zen_stores.schemas.schema_utils import build_foreign_key_field @@ -110,43 +103,3 @@ class RunMetadataSchema(BaseSchema, table=True): key: str value: str = Field(sa_column=Column(TEXT, nullable=False)) type: str - - def to_model( - self, - include_metadata: bool = False, - include_resources: bool = False, - **kwargs: Any, - ) -> "RunMetadataResponse": - """Convert a `RunMetadataSchema` to a `RunMetadataResponse`. - - Args: - include_metadata: Whether the metadata will be filled. - include_resources: Whether the resources will be filled. - **kwargs: Keyword arguments to allow schema specific logic - - - Returns: - The created `RunMetadataResponse`. - """ - body = RunMetadataResponseBody( - user=self.user.to_model() if self.user else None, - key=self.key, - created=self.created, - updated=self.updated, - value=json.loads(self.value), - type=MetadataTypeEnum(self.type), - ) - metadata = None - if include_metadata: - metadata = RunMetadataResponseMetadata( - workspace=self.workspace.to_model(), - resource_id=self.resource_id, - resource_type=MetadataResourceTypes(self.resource_type), - stack_component_id=self.stack_component_id, - ) - - return RunMetadataResponse( - id=self.id, - body=body, - metadata=metadata, - ) diff --git a/src/zenml/zen_stores/schemas/step_run_schemas.py b/src/zenml/zen_stores/schemas/step_run_schemas.py index 79628cdc34b..8500db9715d 100644 --- a/src/zenml/zen_stores/schemas/step_run_schemas.py +++ b/src/zenml/zen_stores/schemas/step_run_schemas.py @@ -28,6 +28,7 @@ from zenml.enums import ( ExecutionStatus, MetadataResourceTypes, + StepRunInputArtifactType, ) from zenml.models import ( StepRunRequest, @@ -37,7 +38,10 @@ StepRunUpdate, ) from zenml.models.v2.core.artifact_version import ArtifactVersionResponse -from zenml.models.v2.core.step_run import StepRunResponseResources +from zenml.models.v2.core.step_run import ( + StepRunInputResponse, + StepRunResponseResources, +) from zenml.zen_stores.schemas.base_schemas import NamedSchema from zenml.zen_stores.schemas.constants import MODEL_VERSION_TABLENAME from zenml.zen_stores.schemas.pipeline_deployment_schemas import ( @@ -215,12 +219,15 @@ def to_model( or a step_configuration. """ run_metadata = { - metadata_schema.key: metadata_schema.to_model() + metadata_schema.key: json.loads(metadata_schema.value) for metadata_schema in self.run_metadata } input_artifacts = { - artifact.name: artifact.artifact_version.to_model() + artifact.name: StepRunInputResponse( + input_type=StepRunInputArtifactType(artifact.type), + **artifact.artifact_version.to_model().model_dump(), + ) for artifact in self.input_artifacts } diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py index 778dd634278..f9c314f774b 100644 --- a/src/zenml/zen_stores/sql_zen_store.py +++ b/src/zenml/zen_stores/sql_zen_store.py @@ -87,6 +87,7 @@ from zenml.config.pipeline_run_configuration import PipelineRunConfiguration from zenml.config.secrets_store_config import SecretsStoreConfiguration from zenml.config.server_config import ServerConfiguration +from zenml.config.step_configurations import StepConfiguration, StepSpec from zenml.config.store_config import StoreConfiguration from zenml.constants import ( DEFAULT_PASSWORD, @@ -217,9 +218,7 @@ PipelineRunResponse, PipelineRunUpdate, PipelineUpdate, - RunMetadataFilter, RunMetadataRequest, - RunMetadataResponse, RunTemplateFilter, RunTemplateRequest, RunTemplateResponse, @@ -5507,9 +5506,7 @@ def count_runs(self, filter_model: Optional[PipelineRunFilter]) -> int: # ----------------------------- Run Metadata ----------------------------- - def create_run_metadata( - self, run_metadata: RunMetadataRequest - ) -> List[RunMetadataResponse]: + def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None: """Creates run metadata. Args: @@ -5518,7 +5515,6 @@ def create_run_metadata( Returns: The created run metadata. """ - return_value: List[RunMetadataResponse] = [] with Session(self.engine) as session: for key, value in run_metadata.values.items(): type_ = run_metadata.types[key] @@ -5534,70 +5530,7 @@ def create_run_metadata( ) session.add(run_metadata_schema) session.commit() - return_value.append( - run_metadata_schema.to_model( - include_metadata=True, include_resources=True - ) - ) - return return_value - - def get_run_metadata( - self, run_metadata_id: UUID, hydrate: bool = True - ) -> RunMetadataResponse: - """Gets run metadata with the given ID. - - Args: - run_metadata_id: The ID of the run metadata to get. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata. - - Raises: - KeyError: if the run metadata doesn't exist. - """ - with Session(self.engine) as session: - run_metadata = session.exec( - select(RunMetadataSchema).where( - RunMetadataSchema.id == run_metadata_id - ) - ).first() - if run_metadata is None: - raise KeyError( - f"Unable to get run metadata with ID " - f"{run_metadata_id}: " - f"No run metadata with this ID found." - ) - return run_metadata.to_model( - include_metadata=hydrate, include_resources=True - ) - - def list_run_metadata( - self, - run_metadata_filter_model: RunMetadataFilter, - hydrate: bool = False, - ) -> Page[RunMetadataResponse]: - """List run metadata. - - Args: - run_metadata_filter_model: All filter parameters including - pagination params. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata. - """ - with Session(self.engine) as session: - query = select(RunMetadataSchema) - return self.filter_and_paginate( - session=session, - query=query, - table=RunMetadataSchema, - filter_model=run_metadata_filter_model, - hydrate=hydrate, - ) + return None # ----------------------------- Schedules ----------------------------- @@ -8215,13 +8148,23 @@ def create_run_step(self, step_run: StepRunRequest) -> StepRunResponse: session=session, ) + session.commit() + session.refresh(step_schema) + + step_model = step_schema.to_model(include_metadata=True) + # Save input artifact IDs into the database. for input_name, artifact_version_id in step_run.inputs.items(): + input_type = self._get_step_run_input_type( + input_name=input_name, + step_config=step_model.config, + step_spec=step_model.spec, + ) self._set_run_step_input_artifact( run_step_id=step_schema.id, artifact_version_id=artifact_version_id, name=input_name, - input_type=StepRunInputArtifactType.DEFAULT, + input_type=input_type, session=session, ) @@ -8241,6 +8184,7 @@ def create_run_step(self, step_run: StepRunRequest) -> StepRunResponse: ) session.commit() + session.refresh(step_schema) return step_schema.to_model( include_metadata=True, include_resources=True @@ -8367,6 +8311,34 @@ def update_run_step( include_metadata=True, include_resources=True ) + def _get_step_run_input_type( + self, + input_name: str, + step_config: StepConfiguration, + step_spec: StepSpec, + ) -> StepRunInputArtifactType: + """Get the input type of an artifact. + + Args: + input_name: The name of the input artifact. + step_config: The step config. + step_spec: The step spec. + + Returns: + The input type of the artifact. + """ + if input_name in step_spec.inputs: + return StepRunInputArtifactType.STEP_OUTPUT + if input_name in step_config.external_input_artifacts: + return StepRunInputArtifactType.EXTERNAL + elif ( + input_name in step_config.model_artifacts_or_metadata + or input_name in step_config.client_lazy_loaders + ): + return StepRunInputArtifactType.LAZY_LOADED + else: + return StepRunInputArtifactType.MANUAL + @staticmethod def _set_run_step_parent_step( child_id: UUID, parent_id: UUID, session: Session diff --git a/src/zenml/zen_stores/zen_store_interface.py b/src/zenml/zen_stores/zen_store_interface.py index ea2e53a06ca..7d2b4bd60b8 100644 --- a/src/zenml/zen_stores/zen_store_interface.py +++ b/src/zenml/zen_stores/zen_store_interface.py @@ -90,9 +90,7 @@ PipelineRunResponse, PipelineRunUpdate, PipelineUpdate, - RunMetadataFilter, RunMetadataRequest, - RunMetadataResponse, RunTemplateFilter, RunTemplateRequest, RunTemplateResponse, @@ -1633,52 +1631,14 @@ def get_or_create_run( # -------------------- Run metadata -------------------- @abstractmethod - def create_run_metadata( - self, run_metadata: RunMetadataRequest - ) -> List[RunMetadataResponse]: + def create_run_metadata(self, run_metadata: RunMetadataRequest) -> None: """Creates run metadata. Args: run_metadata: The run metadata to create. Returns: - The created run metadata. - """ - - @abstractmethod - def get_run_metadata( - self, run_metadata_id: UUID, hydrate: bool = True - ) -> RunMetadataResponse: - """Get run metadata by its unique ID. - - Args: - run_metadata_id: The ID of the run metadata to get. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata with the given ID. - - Raises: - KeyError: if the run metadata doesn't exist. - """ - - @abstractmethod - def list_run_metadata( - self, - run_metadata_filter_model: RunMetadataFilter, - hydrate: bool = False, - ) -> Page[RunMetadataResponse]: - """List run metadata. - - Args: - run_metadata_filter_model: All filter parameters including - pagination params. - hydrate: Flag deciding whether to hydrate the output model(s) - by including metadata fields in the response. - - Returns: - The run metadata. + None """ # -------------------- Schedules -------------------- diff --git a/tests/harness/cfg/deployments.yaml b/tests/harness/cfg/deployments.yaml index d0f64b302ff..db631674fa1 100644 --- a/tests/harness/cfg/deployments.yaml +++ b/tests/harness/cfg/deployments.yaml @@ -111,3 +111,14 @@ deployments: database: mariadb capabilities: server: true + - name: github-actions-server + description: >- + Local ZenML server and MariaDB both running in docker with docker-compose. + server: external + database: external + capabilities: + server: true + config: + url: http://127.0.0.1:8080/ + username: default + password: '' diff --git a/tests/harness/cfg/environments.yaml b/tests/harness/cfg/environments.yaml index 6451953c5f9..a323871587b 100644 --- a/tests/harness/cfg/environments.yaml +++ b/tests/harness/cfg/environments.yaml @@ -188,3 +188,14 @@ environments: - mlflow-local-registry - mlflow-local-deployer mandatory_requirements: [airflow-local] + - name: github-actions-server-docker-orchestrator + description: >- + Server using GitHub Actions services with docker orchestrator and all + local components. + deployment: github-actions-server + requirements: + - data-validators + - mlflow-local-tracker + - mlflow-local-registry + - mlflow-local-deployer + mandatory_requirements: [docker-local] diff --git a/tests/integration/functional/artifacts/test_artifacts_linage.py b/tests/integration/functional/artifacts/test_artifacts_linage.py index 3944814b86b..02b33d1df03 100644 --- a/tests/integration/functional/artifacts/test_artifacts_linage.py +++ b/tests/integration/functional/artifacts/test_artifacts_linage.py @@ -19,9 +19,15 @@ import pytest from typing_extensions import Annotated -from zenml import pipeline, save_artifact, step +from zenml import ( + ExternalArtifact, + load_artifact, + pipeline, + save_artifact, + step, +) from zenml.client import Client -from zenml.enums import ModelStages +from zenml.enums import ModelStages, StepRunInputArtifactType from zenml.model.model import Model from zenml.models.v2.core.pipeline_run import PipelineRunResponse @@ -271,6 +277,71 @@ def test_that_cached_artifact_versions_are_created_properly_for_multiple_version ) # cached show up only once +@step +def producer_step() -> Annotated[int, "shared_name"]: + save_artifact(41, "shared_name") + return 42 + + +@step +def consumer_step(shared_name: int, expected: int): + assert shared_name == expected + + +@step +def manual_consumer_step_load(): + assert load_artifact("shared_name", 1) == 41 + + +@step +def manual_consumer_step_client(): + assert Client().get_artifact_version("shared_name", 1).load() == 41 + + +def test_input_artifacts_typing(clean_client: Client): + """Test that input artifacts are correctly typed.""" + + @pipeline + def my_pipeline(): + a = producer_step() + consumer_step(a, 42, id="cs1", after=["producer_step"]) + consumer_step(ExternalArtifact(value=42), 42, id="cs2", after=["cs1"]) + consumer_step( + clean_client.get_artifact_version("shared_name", 1), + 41, + after=["producer_step", "cs2"], + id="cs3", + ) + manual_consumer_step_load(id="mcsl", after=["cs3"]) + manual_consumer_step_client(id="mcsc", after=["mcsl"]) + + for cache in [False, True]: + prr: PipelineRunResponse = my_pipeline.with_options( + enable_cache=cache + )() + assert len(prr.steps["producer_step"].inputs) == 0 + assert ( + prr.steps["cs1"].inputs["shared_name"].input_type + == StepRunInputArtifactType.STEP_OUTPUT + ) + assert ( + prr.steps["cs2"].inputs["shared_name"].input_type + == StepRunInputArtifactType.EXTERNAL + ) + assert ( + prr.steps["cs3"].inputs["shared_name"].input_type + == StepRunInputArtifactType.LAZY_LOADED + ) + assert ( + prr.steps["mcsl"].inputs["shared_name"].input_type + == StepRunInputArtifactType.MANUAL + ) + assert ( + prr.steps["mcsc"].inputs["shared_name"].input_type + == StepRunInputArtifactType.MANUAL + ) + + # TODO: Enable this test after fixing the issue with `is_model_artifact` and `is_deployment_artifact` flags @pytest.mark.skip( "Enable this test after fixing the issue with `is_model_artifact` and `is_deployment_artifact` flags" diff --git a/tests/integration/functional/artifacts/test_utils.py b/tests/integration/functional/artifacts/test_utils.py index 2ccb5ab9b28..2421f55fe38 100644 --- a/tests/integration/functional/artifacts/test_utils.py +++ b/tests/integration/functional/artifacts/test_utils.py @@ -151,22 +151,19 @@ def test_log_artifact_metadata_existing(clean_client): "meaning_of_life", version="1" ) assert "description" in artifact_1.run_metadata - assert artifact_1.run_metadata["description"].value == "Aria is great!" + assert artifact_1.run_metadata["description"] == "Aria is great!" assert "description_3" in artifact_1.run_metadata - assert artifact_1.run_metadata["description_3"].value == "Axl is great!" + assert artifact_1.run_metadata["description_3"] == "Axl is great!" assert "float" in artifact_1.run_metadata - assert artifact_1.run_metadata["float"].value - 1.0 < 10e-6 + assert artifact_1.run_metadata["float"] - 1.0 < 10e-6 assert "int" in artifact_1.run_metadata - assert artifact_1.run_metadata["int"].value == 1 + assert artifact_1.run_metadata["int"] == 1 assert "str" in artifact_1.run_metadata - assert artifact_1.run_metadata["str"].value == "1.0" + assert artifact_1.run_metadata["str"] == "1.0" assert "list_str" in artifact_1.run_metadata - assert ( - len(set(artifact_1.run_metadata["list_str"].value) - {"1.0", "2.0"}) - == 0 - ) + assert len(set(artifact_1.run_metadata["list_str"]) - {"1.0", "2.0"}) == 0 assert "list_floats" in artifact_1.run_metadata - for each in artifact_1.run_metadata["list_floats"].value: + for each in artifact_1.run_metadata["list_floats"]: if 0.99 < each < 1.01: assert each - 1.0 < 10e-6 else: @@ -176,7 +173,7 @@ def test_log_artifact_metadata_existing(clean_client): "meaning_of_life", version="43" ) assert "description_2" in artifact_2.run_metadata - assert artifact_2.run_metadata["description_2"].value == "Blupus is great!" + assert artifact_2.run_metadata["description_2"] == "Blupus is great!" @step @@ -201,9 +198,9 @@ def artifact_metadata_logging_pipeline(): run_ = artifact_metadata_logging_pipeline.model.last_run output = run_.steps["artifact_metadata_logging_step"].output assert "description" in output.run_metadata - assert output.run_metadata["description"].value == "Aria is great!" + assert output.run_metadata["description"] == "Aria is great!" assert "metrics" in output.run_metadata - assert output.run_metadata["metrics"].value == {"accuracy": 0.9} + assert output.run_metadata["metrics"] == {"accuracy": 0.9} @step @@ -234,9 +231,9 @@ def artifact_metadata_logging_pipeline(): assert "metrics" not in str_output.run_metadata int_output = step_.outputs["int_output"][0] assert "description" in int_output.run_metadata - assert int_output.run_metadata["description"].value == "Blupus is great!" + assert int_output.run_metadata["description"] == "Blupus is great!" assert "metrics" in int_output.run_metadata - assert int_output.run_metadata["metrics"].value == {"accuracy": 0.9} + assert int_output.run_metadata["metrics"] == {"accuracy": 0.9} @step diff --git a/tests/integration/functional/model/test_model_version.py b/tests/integration/functional/model/test_model_version.py index e434afb998b..b8cbf95e738 100644 --- a/tests/integration/functional/model/test_model_version.py +++ b/tests/integration/functional/model/test_model_version.py @@ -108,7 +108,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): def step_metadata_logging_functional(mdl_name: str): """Functional logging using implicit Model from context.""" log_model_metadata({"foo": "bar"}) - assert get_step_context().model.run_metadata["foo"].value == "bar" + assert get_step_context().model.run_metadata["foo"] == "bar" log_model_metadata( {"foo": "bar"}, model_name=mdl_name, model_version="other" ) @@ -392,13 +392,13 @@ def test_metadata_logging(self): mv.log_metadata({"foo": "bar"}) assert len(mv.run_metadata) == 1 - assert mv.run_metadata["foo"].value == "bar" + assert mv.run_metadata["foo"] == "bar" mv.log_metadata({"bar": "foo"}) assert len(mv.run_metadata) == 2 - assert mv.run_metadata["foo"].value == "bar" - assert mv.run_metadata["bar"].value == "foo" + assert mv.run_metadata["foo"] == "bar" + assert mv.run_metadata["bar"] == "foo" def test_metadata_logging_functional(self): """Test that model version can be used to track metadata from function.""" @@ -414,7 +414,7 @@ def test_metadata_logging_functional(self): ) assert len(mv.run_metadata) == 1 - assert mv.run_metadata["foo"].value == "bar" + assert mv.run_metadata["foo"] == "bar" with pytest.raises(ValueError): log_model_metadata({"foo": "bar"}) @@ -424,8 +424,8 @@ def test_metadata_logging_functional(self): ) assert len(mv.run_metadata) == 2 - assert mv.run_metadata["foo"].value == "bar" - assert mv.run_metadata["bar"].value == "foo" + assert mv.run_metadata["foo"] == "bar" + assert mv.run_metadata["bar"] == "foo" def test_metadata_logging_in_steps(self): """Test that model version can be used to track metadata from function in steps.""" @@ -448,11 +448,11 @@ def my_pipeline(): mv = Model(name=mdl_name, version="context") assert len(mv.run_metadata) == 1 - assert mv.run_metadata["foo"].value == "bar" + assert mv.run_metadata["foo"] == "bar" mv = Model(name=mdl_name, version="other") assert len(mv.run_metadata) == 1 - assert mv.run_metadata["foo"].value == "bar" + assert mv.run_metadata["foo"] == "bar" @pytest.mark.parametrize("delete_artifacts", [False, True]) def test_deletion_of_links(self, delete_artifacts: bool): diff --git a/tests/integration/functional/models/test_artifact.py b/tests/integration/functional/models/test_artifact.py index b06322ad246..db602efa13a 100644 --- a/tests/integration/functional/models/test_artifact.py +++ b/tests/integration/functional/models/test_artifact.py @@ -27,10 +27,10 @@ from zenml.artifacts.utils import load_artifact_visualization from zenml.enums import ExecutionStatus from zenml.exceptions import EntityExistsError +from zenml.metadata.metadata_types import MetadataType from zenml.models import ( ArtifactVersionResponse, ArtifactVisualizationResponse, - RunMetadataResponse, ) if TYPE_CHECKING: @@ -341,7 +341,7 @@ def _get_visualizations_of_last_run( def _get_metadata_of_last_run( clean_client: "Client", -) -> Dict[str, "RunMetadataResponse"]: +) -> Dict[str, MetadataType]: """Get the artifact metadata of the last run.""" return _get_output_of_last_run(clean_client).run_metadata diff --git a/tests/integration/functional/steps/test_step_context.py b/tests/integration/functional/steps/test_step_context.py index 31570aea020..4442f84b08e 100644 --- a/tests/integration/functional/steps/test_step_context.py +++ b/tests/integration/functional/steps/test_step_context.py @@ -100,7 +100,7 @@ def output_metadata_logging_step() -> Annotated[int, "my_output"]: def step_context_metadata_reader_step(my_input: int) -> None: step_context = get_step_context() my_input_metadata = step_context.inputs["my_input"].run_metadata - assert my_input_metadata["some_key"].value == "some_value" + assert my_input_metadata["some_key"] == "some_value" def test_input_artifacts_property(): @@ -204,10 +204,10 @@ def _pipeline(): av = clean_client.get_artifact_version(full_name) for k, v in metadata.items(): assert k in av.run_metadata - assert av.run_metadata[k].value == v + assert av.run_metadata[k] == v if full_name == "custom_name": - assert av.run_metadata["config_metadata"].value == "bar" + assert av.run_metadata["config_metadata"] == "bar" assert {t.name for t in av.tags} == set(tags).union({"config_tags"}) else: assert set(tags) == {t.name for t in av.tags} diff --git a/tests/integration/functional/steps/test_utils.py b/tests/integration/functional/steps/test_utils.py index 539ff75520e..7bdff4867e9 100644 --- a/tests/integration/functional/steps/test_utils.py +++ b/tests/integration/functional/steps/test_utils.py @@ -49,9 +49,9 @@ def step_metadata_logging_pipeline(): "step_metadata_logging_step_inside_run" ].run_metadata assert "description" in run_metadata - assert run_metadata["description"].value == "Aria is great!" + assert run_metadata["description"] == "Aria is great!" assert "metrics" in run_metadata - assert run_metadata["metrics"].value == {"accuracy": 0.9} + assert run_metadata["metrics"] == {"accuracy": 0.9} def test_log_step_metadata_using_latest_run(clean_client): @@ -84,9 +84,9 @@ def step_metadata_logging_pipeline(): "step_metadata_logging_step" ].run_metadata assert "description" in run_metadata_after_log - assert run_metadata_after_log["description"].value == "Axl is great!" + assert run_metadata_after_log["description"] == "Axl is great!" assert "metrics" in run_metadata_after_log - assert run_metadata_after_log["metrics"].value == {"accuracy": 0.9} + assert run_metadata_after_log["metrics"] == {"accuracy": 0.9} def test_log_step_metadata_using_specific_params(clean_client): @@ -124,6 +124,6 @@ def step_metadata_logging_pipeline(): "step_metadata_logging_step" ].run_metadata assert "description" in run_metadata_after_log - assert run_metadata_after_log["description"].value == "Blupus is great!" + assert run_metadata_after_log["description"] == "Blupus is great!" assert "metrics" in run_metadata_after_log - assert run_metadata_after_log["metrics"].value == {"accuracy": 0.9} + assert run_metadata_after_log["metrics"] == {"accuracy": 0.9} diff --git a/tests/integration/functional/test_client.py b/tests/integration/functional/test_client.py index 7c8c212757d..9daa823a1a7 100644 --- a/tests/integration/functional/test_client.py +++ b/tests/integration/functional/test_client.py @@ -30,7 +30,10 @@ from tests.integration.functional.utils import sample_name from zenml import ( ExternalArtifact, + get_pipeline_context, + get_step_context, log_artifact_metadata, + log_model_metadata, pipeline, save_artifact, step, @@ -53,7 +56,6 @@ StackExistsError, ) from zenml.io import fileio -from zenml.metadata.metadata_types import MetadataTypeEnum from zenml.model.model import Model from zenml.models import ( ComponentResponse, @@ -480,190 +482,53 @@ def test_listing_pipelines(clean_client): def test_create_run_metadata_for_pipeline_run(clean_client_with_run: Client): """Test creating run metadata linked only to a pipeline run.""" pipeline_run = clean_client_with_run.list_pipeline_runs()[0] - existing_metadata = clean_client_with_run.list_run_metadata( - resource_id=pipeline_run.id, - resource_type=MetadataResourceTypes.PIPELINE_RUN, - ) - # Assert that the created metadata is correct - new_metadata = clean_client_with_run.create_run_metadata( + clean_client_with_run.create_run_metadata( metadata={"axel": "is awesome"}, resource_id=pipeline_run.id, resource_type=MetadataResourceTypes.PIPELINE_RUN, ) - assert isinstance(new_metadata, list) - assert len(new_metadata) == 1 - assert new_metadata[0].key == "axel" - assert new_metadata[0].value == "is awesome" - assert new_metadata[0].type == MetadataTypeEnum.STRING - assert new_metadata[0].resource_id == pipeline_run.id - assert new_metadata[0].resource_type == MetadataResourceTypes.PIPELINE_RUN - assert new_metadata[0].stack_component_id is None - - # Assert new metadata is linked to the pipeline run - all_metadata = clean_client_with_run.list_run_metadata( - resource_id=pipeline_run.id, - resource_type=MetadataResourceTypes.PIPELINE_RUN, - ) - assert len(all_metadata) == len(existing_metadata) + 1 - - -def test_create_run_metadata_for_pipeline_run_and_component( - clean_client_with_run: Client, -): - """Test creating metadata linked to a pipeline run and a stack component""" - pipeline_run = clean_client_with_run.list_pipeline_runs()[0] - orchestrator_id = clean_client_with_run.active_stack_model.components[ - "orchestrator" - ][0].id - existing_metadata = clean_client_with_run.list_run_metadata( - resource_id=pipeline_run.id, - resource_type=MetadataResourceTypes.PIPELINE_RUN, - ) - existing_component_metadata = clean_client_with_run.list_run_metadata( - stack_component_id=orchestrator_id - ) - - # Assert that the created metadata is correct - new_metadata = clean_client_with_run.create_run_metadata( - metadata={"aria": "is awesome too"}, - resource_id=pipeline_run.id, - resource_type=MetadataResourceTypes.PIPELINE_RUN, - stack_component_id=orchestrator_id, - ) - assert isinstance(new_metadata, list) - assert len(new_metadata) == 1 - assert new_metadata[0].key == "aria" - assert new_metadata[0].value == "is awesome too" - assert new_metadata[0].type == MetadataTypeEnum.STRING - assert new_metadata[0].resource_id == pipeline_run.id - assert new_metadata[0].resource_type == MetadataResourceTypes.PIPELINE_RUN - assert new_metadata[0].stack_component_id == orchestrator_id - - # Assert new metadata is linked to the pipeline run - registered_metadata = clean_client_with_run.list_run_metadata( - resource_id=pipeline_run.id, - resource_type=MetadataResourceTypes.PIPELINE_RUN, - ) - assert len(registered_metadata) == len(existing_metadata) + 1 + rm = clean_client_with_run.get_pipeline_run(pipeline_run.id).run_metadata - # Assert new metadata is linked to the stack component - registered_component_metadata = clean_client_with_run.list_run_metadata( - stack_component_id=orchestrator_id - ) - assert ( - len(registered_component_metadata) - == len(existing_component_metadata) + 1 - ) + assert isinstance(rm, dict) + assert len(rm.values()) == 1 + assert rm["axel"] == "is awesome" def test_create_run_metadata_for_step_run(clean_client_with_run: Client): """Test creating run metadata linked only to a step run.""" step_run = clean_client_with_run.list_run_steps()[0] - existing_metadata = clean_client_with_run.list_run_metadata( - resource_id=step_run.id, resource_type=MetadataResourceTypes.STEP_RUN - ) # Assert that the created metadata is correct - new_metadata = clean_client_with_run.create_run_metadata( + clean_client_with_run.create_run_metadata( metadata={"axel": "is awesome"}, resource_id=step_run.id, resource_type=MetadataResourceTypes.STEP_RUN, ) - assert isinstance(new_metadata, list) - assert len(new_metadata) == 1 - assert new_metadata[0].key == "axel" - assert new_metadata[0].value == "is awesome" - assert new_metadata[0].type == MetadataTypeEnum.STRING - assert new_metadata[0].resource_id == step_run.id - assert new_metadata[0].resource_type == MetadataResourceTypes.STEP_RUN - assert new_metadata[0].stack_component_id is None - - # Assert new metadata is linked to the step run - registered_metadata = clean_client_with_run.list_run_metadata( - resource_id=step_run.id, resource_type=MetadataResourceTypes.STEP_RUN - ) - assert len(registered_metadata) == len(existing_metadata) + 1 + rm = clean_client_with_run.get_run_step(step_run.id).run_metadata - -def test_create_run_metadata_for_step_run_and_component( - clean_client_with_run: Client, -): - """Test creating metadata linked to a step run and a stack component""" - step_run = clean_client_with_run.list_run_steps()[0] - orchestrator_id = clean_client_with_run.active_stack_model.components[ - "orchestrator" - ][0].id - existing_metadata = clean_client_with_run.list_run_metadata( - resource_id=step_run.id, resource_type=MetadataResourceTypes.STEP_RUN - ) - existing_component_metadata = clean_client_with_run.list_run_metadata( - stack_component_id=orchestrator_id - ) - - # Assert that the created metadata is correct - new_metadata = clean_client_with_run.create_run_metadata( - metadata={"aria": "is awesome too"}, - resource_id=step_run.id, - resource_type=MetadataResourceTypes.STEP_RUN, - stack_component_id=orchestrator_id, - ) - assert isinstance(new_metadata, list) - assert len(new_metadata) == 1 - assert new_metadata[0].key == "aria" - assert new_metadata[0].value == "is awesome too" - assert new_metadata[0].type == MetadataTypeEnum.STRING - assert new_metadata[0].resource_id == step_run.id - assert new_metadata[0].resource_type == MetadataResourceTypes.STEP_RUN - assert new_metadata[0].stack_component_id == orchestrator_id - - # Assert new metadata is linked to the step run - registered_metadata = clean_client_with_run.list_run_metadata( - resource_id=step_run.id, resource_type=MetadataResourceTypes.STEP_RUN - ) - assert len(registered_metadata) == len(existing_metadata) + 1 - - # Assert new metadata is linked to the stack component - registered_component_metadata = clean_client_with_run.list_run_metadata( - stack_component_id=orchestrator_id - ) - assert ( - len(registered_component_metadata) - == len(existing_component_metadata) + 1 - ) + assert isinstance(rm, dict) + assert len(rm.values()) == 1 + assert rm["axel"] == "is awesome" def test_create_run_metadata_for_artifact(clean_client_with_run: Client): """Test creating run metadata linked to an artifact.""" artifact_version = clean_client_with_run.list_artifact_versions()[0] - existing_metadata = clean_client_with_run.list_run_metadata( - resource_id=artifact_version.id, - resource_type=MetadataResourceTypes.ARTIFACT_VERSION, - ) # Assert that the created metadata is correct - new_metadata = clean_client_with_run.create_run_metadata( + clean_client_with_run.create_run_metadata( metadata={"axel": "is awesome"}, resource_id=artifact_version.id, resource_type=MetadataResourceTypes.ARTIFACT_VERSION, ) - assert isinstance(new_metadata, list) - assert len(new_metadata) == 1 - assert new_metadata[0].key == "axel" - assert new_metadata[0].value == "is awesome" - assert new_metadata[0].type == MetadataTypeEnum.STRING - assert new_metadata[0].resource_id == artifact_version.id - assert ( - new_metadata[0].resource_type == MetadataResourceTypes.ARTIFACT_VERSION - ) - assert new_metadata[0].stack_component_id is None - # Assert new metadata is linked to the artifact - registered_metadata = clean_client_with_run.list_run_metadata( - resource_id=artifact_version.id, - resource_type=MetadataResourceTypes.ARTIFACT_VERSION, - ) - assert len(registered_metadata) == len(existing_metadata) + 1 + rm = clean_client_with_run.get_artifact_version( + artifact_version.id + ).run_metadata + + assert isinstance(rm, dict) + assert rm["axel"] == "is awesome" # .---------. @@ -1106,19 +971,20 @@ def lazy_producer_test_artifact() -> Annotated[str, "new_one"]: log_artifact_metadata(metadata={"some_meta": "meta_new_one"}) client = Client() - model = client.create_model(name="model_name", description="model_desc") - client.create_model_version( - model_name_or_id=model.id, - name="model_version", - description="mv_desc_1", + + log_model_metadata( + metadata={"some_meta": "meta_new_one"}, ) + + model = get_step_context().model + mv = client.create_model_version( - model_name_or_id=model.id, + model_name_or_id=model.name, name="model_version2", description="mv_desc_2", ) client.update_model_version( - model_name_or_id=model.id, version_name_or_id=mv.id, stage="staging" + model_name_or_id=model.name, version_name_or_id=mv.id, stage="staging" ) return "body_new_one" @@ -1132,6 +998,7 @@ def lazy_asserter_test_artifact( model: ModelResponse, model_version_by_version: ModelVersionResponse, model_version_by_stage: ModelVersionResponse, + model_version_run_metadata: str, ): """Assert that passed in values are loaded in lazy mode. They do not exists before actual run of the pipeline. @@ -1141,12 +1008,13 @@ def lazy_asserter_test_artifact( assert artifact_new == "body_new_one" assert artifact_metadata_new == "meta_new_one" - assert model.name == "model_name" - assert model.description == "model_desc" + assert model.name == "aria" + # assert model.description == "model_description" assert model_version_by_version.name == "model_version" - assert model_version_by_version.description == "mv_desc_1" + # assert model_version_by_version.description == "mv_desc_1" assert model_version_by_stage.name == "model_version2" assert model_version_by_stage.description == "mv_desc_2" + assert model_version_run_metadata == "meta_new_one" class TestArtifact: @@ -1208,7 +1076,12 @@ def test_pipeline_can_load_in_lazy_mode( ): """Tests that user can load model artifact versions, metadata and models (versions) in lazy mode in pipeline codes.""" - @pipeline(enable_cache=False) + @pipeline( + enable_cache=False, + model=Model( + name="aria", version="model_version", description="mv_desc_1" + ), + ) def dummy(): artifact_existing = clean_client.get_artifact_version( name_id_or_prefix="preexisting" @@ -1222,7 +1095,11 @@ def dummy(): ) artifact_metadata_new = artifact_new.run_metadata["some_meta"] - model = clean_client.get_model(model_name_or_id="model_name") + model = clean_client.get_model(model_name_or_id="aria") + + model_version_run_metadata = ( + get_pipeline_context().model.run_metadata["some_meta"] + ) lazy_producer_test_artifact() lazy_asserter_test_artifact( @@ -1233,7 +1110,7 @@ def dummy(): # pass as artifact response artifact_new, # read value of metadata directly - artifact_metadata_new.value, + artifact_metadata_new, # load model model, # load model version by version @@ -1245,9 +1122,10 @@ def dummy(): # load model version by stage clean_client.get_model_version( # this can be lazy loaders too - model.id, + model_name_or_id=model.id, model_version_name_or_number_or_id="staging", ), + model_version_run_metadata, after=["lazy_producer_test_artifact"], ) @@ -1259,6 +1137,11 @@ def dummy(): artifact_name="preexisting", artifact_version="1.2.3", ) + log_model_metadata( + metadata={"some_meta": "meta_preexisting"}, + model_name="aria", + model_version="model_version", + ) with pytest.raises(KeyError): clean_client.get_artifact_version("new_one") dummy() diff --git a/tests/integration/functional/zen_stores/test_zen_store.py b/tests/integration/functional/zen_stores/test_zen_store.py index 19889087f5f..57a477dea7b 100644 --- a/tests/integration/functional/zen_stores/test_zen_store.py +++ b/tests/integration/functional/zen_stores/test_zen_store.py @@ -5436,7 +5436,7 @@ def test_metadata_full_cycle_with_cascade_deletion( pr if type_ == MetadataResourceTypes.PIPELINE_RUN else sr ) - rm = client.zen_store.create_run_metadata( + client.zen_store.create_run_metadata( RunMetadataRequest( user=client.active_user.id, workspace=client.active_workspace.id, @@ -5450,12 +5450,13 @@ def test_metadata_full_cycle_with_cascade_deletion( else None, ) ) - rm = client.zen_store.get_run_metadata(rm[0].id, True) - assert rm.key == "foo" - assert rm.value == "bar" - assert rm.resource_id == resource.id - assert rm.resource_type == type_ - assert rm.type == MetadataTypeEnum.STRING + if type_ == MetadataResourceTypes.PIPELINE_RUN: + rm = client.zen_store.get_run(resource.id, True).run_metadata + assert rm["foo"] == "bar" + + elif type_ == MetadataResourceTypes.STEP_RUN: + rm = client.zen_store.get_run_step(resource.id, True).run_metadata + assert rm["foo"] == "bar" if type_ == MetadataResourceTypes.ARTIFACT_VERSION: client.zen_store.delete_artifact_version(resource.id) @@ -5469,9 +5470,6 @@ def test_metadata_full_cycle_with_cascade_deletion( client.zen_store.delete_run(pr.id) client.zen_store.delete_deployment(deployment.id) - with pytest.raises(KeyError): - client.zen_store.get_run_metadata(rm.id) - client.zen_store.delete_stack_component(sc.id) diff --git a/tests/unit/orchestrators/test_input_utils.py b/tests/unit/orchestrators/test_input_utils.py index 6d981f90963..8c97e41feb6 100644 --- a/tests/unit/orchestrators/test_input_utils.py +++ b/tests/unit/orchestrators/test_input_utils.py @@ -17,13 +17,18 @@ import pytest from zenml.config.step_configurations import Step +from zenml.enums import StepRunInputArtifactType from zenml.exceptions import InputResolutionError from zenml.models import Page, PipelineRunResponse +from zenml.models.v2.core.artifact_version import ArtifactVersionResponse +from zenml.models.v2.core.step_run import StepRunInputResponse from zenml.orchestrators import input_utils def test_input_resolution( - mocker, sample_artifact_version_model, create_step_run + mocker, + sample_artifact_version_model: ArtifactVersionResponse, + create_step_run, ): """Tests that input resolution works if the correct models exist in the zen store.""" @@ -57,7 +62,12 @@ def test_input_resolution( input_artifacts, parent_ids = input_utils.resolve_step_inputs( step=step, pipeline_run=PipelineRunResponse(id=uuid4(), name="foo") ) - assert input_artifacts == {"input_name": sample_artifact_version_model} + assert input_artifacts == { + "input_name": StepRunInputResponse( + input_type=StepRunInputArtifactType.STEP_OUTPUT, + **sample_artifact_version_model.model_dump(), + ) + } assert parent_ids == [step_run.id] diff --git a/tests/unit/utils/test_deprecation_utils.py b/tests/unit/utils/test_deprecation_utils.py index 97dd809bb47..c44ddaa4180 100644 --- a/tests/unit/utils/test_deprecation_utils.py +++ b/tests/unit/utils/test_deprecation_utils.py @@ -30,7 +30,7 @@ class Model(BaseModel): old: Optional[str] = None new: Optional[str] = None - _deprecatation_validator = ( + _deprecation_validator = ( deprecation_utils.deprecate_pydantic_attributes( "deprecated", ("old", "new") ) @@ -56,7 +56,7 @@ class Model(BaseModel): class InvalidAttributeNameModel(BaseModel): deprecated: Optional[str] = None - _deprecatation_validator = ( + _deprecation_validator = ( deprecation_utils.deprecate_pydantic_attributes("not_an_attribute") ) @@ -66,7 +66,7 @@ class InvalidAttributeNameModel(BaseModel): class DeprecateRequiredAttributeModel(BaseModel): deprecated: str - _deprecatation_validator = ( + _deprecation_validator = ( deprecation_utils.deprecate_pydantic_attributes("deprecated") )