diff --git a/.gitattributes b/.gitattributes index 083e1747cba4a..5f8117153f511 100644 --- a/.gitattributes +++ b/.gitattributes @@ -16,7 +16,6 @@ tests export-ignore Dockerfile.ci export-ignore ISSUE_TRIAGE_PROCESS.rst export-ignore -PULL_REQUEST_WORKFLOW.rst export-ignore STATIC_CODE_CHECKS.rst export-ignore TESTING.rst export-ignore LOCAL_VIRTUALENV.rst export-ignore diff --git a/.github/actions/checks-action b/.github/actions/checks-action deleted file mode 160000 index 9f02872da71b6..0000000000000 --- a/.github/actions/checks-action +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9f02872da71b6f558c6a6f190f925dde5e4d8798 diff --git a/.github/actions/label-when-approved-action b/.github/actions/label-when-approved-action deleted file mode 160000 index 0058d0094da27..0000000000000 --- a/.github/actions/label-when-approved-action +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0058d0094da27e116fad6e0da516ebe1107f26de diff --git a/.github/workflows/label_when_reviewed.yml b/.github/workflows/label_when_reviewed.yml deleted file mode 100644 index 189a2d7343b9b..0000000000000 --- a/.github/workflows/label_when_reviewed.yml +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: Label when reviewed -on: pull_request_review # yamllint disable-line rule:truthy -jobs: - - label-when-reviewed: - name: "Label PRs when reviewed" - runs-on: ubuntu-20.04 - steps: - - name: "Do nothing. Only trigger corresponding workflow_run event" - run: echo diff --git a/.github/workflows/label_when_reviewed_workflow_run.yml b/.github/workflows/label_when_reviewed_workflow_run.yml deleted file mode 100644 index b84ab34a79ea5..0000000000000 --- a/.github/workflows/label_when_reviewed_workflow_run.yml +++ /dev/null @@ -1,177 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# ---- -name: Label when reviewed workflow run -on: # yamllint disable-line rule:truthy - workflow_run: - workflows: ["Label when reviewed"] - types: ['requested'] -permissions: - # All other permissions are set to none - checks: write - contents: read - pull-requests: write -jobs: - - label-when-reviewed: - name: "Label PRs when reviewed workflow run" - runs-on: ubuntu-20.04 - outputs: - labelSet: ${{ steps.label-when-reviewed.outputs.labelSet }} - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 - with: - persist-credentials: false - submodules: recursive - - name: "Get information about the original trigger of the run" - uses: ./.github/actions/get-workflow-origin - id: source-run-info - with: - token: ${{ secrets.GITHUB_TOKEN }} - sourceRunId: ${{ github.event.workflow_run.id }} - - name: Initiate Selective Build check - uses: ./.github/actions/checks-action - id: selective-build-check - with: - token: ${{ secrets.GITHUB_TOKEN }} - name: "Selective build check" - status: "in_progress" - sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - output: > - {"summary": - "Checking selective status of the build in - [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) - "} - - name: > - Event: ${{ steps.source-run-info.outputs.sourceEvent }} - Repo: ${{ steps.source-run-info.outputs.sourceHeadRepo }} - Branch: ${{ steps.source-run-info.outputs.sourceHeadBranch }} - Run id: ${{ github.run_id }} - Source Run id: ${{ github.event.workflow_run.id }} - Sha: ${{ github.sha }} - Source Sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - Merge commit Sha: ${{ steps.source-run-info.outputs.mergeCommitSha }} - Target commit Sha: ${{ steps.source-run-info.outputs.targetCommitSha }} - run: printenv - - name: > - Fetch incoming commit ${{ steps.source-run-info.outputs.targetCommitSha }} with its parent - uses: actions/checkout@v2 - with: - ref: ${{ steps.source-run-info.outputs.targetCommitSha }} - fetch-depth: 2 - persist-credentials: false - # checkout the main branch again, to use the right script in main workflow - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 - with: - persist-credentials: false - submodules: recursive - - name: "Setup python" - uses: actions/setup-python@v2 - with: - # We do not have output from selective checks yet, so we need to hardcode python - python-version: 3.7 - cache: 'pip' - cache-dependency-path: ./dev/breeze/setup* - - run: ./scripts/ci/install_breeze.sh - - name: Selective checks - id: selective-checks - env: - PR_LABELS: "${{ steps.source-run-info.outputs.pullRequestLabels }}" - COMMIT_REF: "${{ steps.source-run-info.outputs.targetCommitSha }}" - run: breeze selective-check - - name: "Label when approved by committers for PRs that require full tests" - uses: ./.github/actions/label-when-approved-action - id: label-full-test-prs-when-approved-by-commiters - if: > - steps.selective-checks.outputs.run-tests == 'true' && - contains(steps.selective-checks.outputs.test-types, 'Core') - with: - token: ${{ secrets.GITHUB_TOKEN }} - label: 'full tests needed' - require_committers_approval: 'true' - remove_label_when_approval_missing: 'false' - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - comment: > - The PR most likely needs to run full matrix of tests because it modifies parts of the core - of Airflow. However, committers might decide to merge it quickly and take the risk. - If they don't merge it quickly - please rebase it to the latest main at your convenience, - or amend the last commit of the PR, and push it with --force-with-lease. - - name: "Initiate GitHub Check forcing rerun of SH ${{ github.event.pull_request.head.sha }}" - uses: ./.github/actions/checks-action - id: full-test-check - if: steps.label-full-test-prs-when-approved-by-commiters.outputs.labelSet == 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - name: "Please rebase or amend, and force push the PR to run full tests" - status: "in_progress" - sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - output: > - {"summary": - "The PR likely needs to run all tests! This was determined via selective check in - [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) - "} - - name: "Label when approved by committers for PRs that do not require full tests" - uses: ./.github/actions/label-when-approved-action - id: label-simple-test-prs-when-approved-by-commiters - if: > - steps.selective-checks.outputs.run-tests == 'true' && - ! contains(steps.selective-checks.outputs.test-types, 'Core') - with: - token: ${{ secrets.GITHUB_TOKEN }} - label: 'okay to merge' - require_committers_approval: 'true' - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - comment: > - The PR is likely OK to be merged with just subset of tests for default Python and Database - versions without running the full matrix of tests, because it does not modify the core of - Airflow. If the committers decide that the full tests matrix is needed, they will add the label - 'full tests needed'. Then you should rebase to the latest main or amend the last commit - of the PR, and push it with --force-with-lease. - - name: "Label when approved by committers for PRs that do not require tests at all" - uses: ./.github/actions/label-when-approved-action - id: label-no-test-prs-when-approved-by-commiters - if: steps.selective-checks.outputs.run-tests != 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - label: 'okay to merge' - pullRequestNumber: ${{ steps.source-run-info.outputs.pullRequestNumber }} - require_committers_approval: 'true' - comment: > - The PR is likely ready to be merged. No tests are needed as no important environment files, - nor python files were modified by it. However, committers might decide that full test matrix is - needed and add the 'full tests needed' label. Then you should rebase it to the latest main - or amend the last commit of the PR, and push it with --force-with-lease. - - name: Update Selective Build check - uses: ./.github/actions/checks-action - if: always() - with: - token: ${{ secrets.GITHUB_TOKEN }} - check_id: ${{ steps.selective-build-check.outputs.check_id }} - status: "completed" - sha: ${{ steps.source-run-info.outputs.sourceHeadSha }} - conclusion: ${{ job.status }} - details_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} - output: > - {"summary": - "Checking selective status of the build completed in - [the run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}) - "} diff --git a/.gitmodules b/.gitmodules index e03978e263653..aa1358f88496d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,6 @@ [submodule ".github/actions/get-workflow-origin"] path = .github/actions/get-workflow-origin url = https://github.com/potiuk/get-workflow-origin -[submodule ".github/actions/checks-action"] - path = .github/actions/checks-action - url = https://github.com/LouisBrunner/checks-action [submodule ".github/actions/configure-aws-credentials"] path = .github/actions/configure-aws-credentials url = https://github.com/aws-actions/configure-aws-credentials @@ -13,6 +10,3 @@ [submodule ".github/actions/github-push-action"] path = .github/actions/github-push-action url = https://github.com/ad-m/github-push-action -[submodule ".github/actions/label-when-approved-action"] - path = .github/actions/label-when-approved-action - url = https://github.com/TobKed/label-when-approved-action diff --git a/CI.rst b/CI.rst index 7798b077acf0c..c058598449299 100644 --- a/CI.rst +++ b/CI.rst @@ -426,12 +426,6 @@ CI, Production Images as well as base Python images that are also cached in the Also for those builds we only execute Python tests if important files changed (so for example if it is "no-code" change, no tests will be executed. -The workflow involved in Pull Requests review and approval is a bit more complex than simple workflows -in most of other projects because we've implemented some optimizations related to efficient use -of queue slots we share with other Apache Software Foundation projects. More details about it -can be found in `PULL_REQUEST_WORKFLOW.rst `_. - - Direct Push/Merge Run --------------------- diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f862d33b1963a..1904c15abe6e7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -360,33 +360,6 @@ Step 4: Prepare PR PR guidelines described in `pull request guidelines <#pull-request-guidelines>`_. Create Pull Request! Make yourself ready for the discussion! -5. Depending on "scope" of your changes, your Pull Request might go through one of few paths after approval. - We run some non-standard workflow with high degree of automation that allows us to optimize the usage - of queue slots in GitHub Actions. Our automated workflows determine the "scope" of changes in your PR - and send it through the right path: - - * In case of a "no-code" change, approval will generate a comment that the PR can be merged and no - tests are needed. This is usually when the change modifies some non-documentation related RST - files (such as this file). No python tests are run and no CI images are built for such PR. Usually - it can be approved and merged few minutes after it is submitted (unless there is a big queue of jobs). - - * In case of change involving python code changes or documentation changes, a subset of full test matrix - will be executed. This subset of tests perform relevant tests for single combination of python, backend - version and only builds one CI image and one PROD image. Here the scope of tests depends on the - scope of your changes: - - * when your change does not change "core" of Airflow (Providers, CLI, WWW, Helm Chart) you will get the - comment that PR is likely ok to be merged without running "full matrix" of tests. However decision - for that is left to committer who approves your change. The committer might set a "full tests needed" - label for your PR and ask you to rebase your request or re-run all jobs. PRs with "full tests needed" - run full matrix of tests. - - * when your change changes the "core" of Airflow you will get the comment that PR needs full tests and - the "full tests needed" label is set for your PR. Additional check is set that prevents from - accidental merging of the request until full matrix of tests succeeds for the PR. - - More details about the PR workflow be found in `PULL_REQUEST_WORKFLOW.rst `_. - Step 5: Pass PR Review ---------------------- diff --git a/PULL_REQUEST_WORKFLOW.rst b/PULL_REQUEST_WORKFLOW.rst deleted file mode 100644 index 7d7a7860fdcd4..0000000000000 --- a/PULL_REQUEST_WORKFLOW.rst +++ /dev/null @@ -1,151 +0,0 @@ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -.. contents:: :local: - -Why non-standard pull request workflow? ---------------------------------------- - -This document describes the Pull Request Workflow we've implemented in Airflow. The workflow is slightly -more complex than regular workflow you might encounter in most of the projects because after experiencing -some huge delays in processing queues in October 2020 with GitHub Actions, we've decided to optimize the -workflow to minimize the use of GitHub Actions build time by utilising selective approach on which tests -and checks in the CI system are run depending on analysis of which files changed in the incoming PR and -allowing the Committers to control the scope of the tests during the approval/review process. - -Just to give a bit of context, we started off with the approach that we always run all tests for all the -incoming PRs, however due to our matrix of tests growing, this approach did not scale with the increasing -number of PRs and when we had to compete with other Apache Software Foundation projects for the 180 -slots that are available for the whole organization. More Apache Software Foundation projects started -to use GitHub Actions and we've started to experience long queues when our jobs waited for free slots. - -We approached the problem by: - -1) Improving mechanism of cancelling duplicate workflow runs more efficiently in case of queue conditions - (duplicate workflow runs are generated when someone pushes a fixup quickly - leading to running both - out-dated and current run to completion, taking precious slots. This has been implemented by improving - `cancel-workflow-run `_ action we are using. In version - 4.1 it got a new feature of cancelling all duplicates even if there is a long queue of builds. - -2) Heavily decreasing strain on the GitHub Actions jobs by introducing selective checks - mechanism - to control which parts of the tests are run during the tests. This is implemented by the - ``breeze selective-check`` command. It selectively chooses which tests should be run in the PR based on - type of the PR and its content. More about it can be found in - `Selective checks `_ - -3) Even more optimisation came from limiting the scope of tests to only "default" matrix parameters. So far - in Airflow we always run all tests for all matrix combinations. The primary matrix components are: - - * Python versions (currently 3.7, 3.8, 3.9, 3.10) - * Backend types (currently MySQL/Postgres) - * Backed version (currently MySQL 5.7, MySQL 8, Postgres 13 - - We've decided that instead of running all the combinations of parameters for all matrix component we will - only run default values (Python 3.7, Mysql 5.7, Postgres 13) for all PRs which are not approved yet by - the committers. This has a nice effect, that full set of tests (though with limited combinations of - the matrix) are still run in the CI for every Pull Request that needs tests at all - allowing the - contributors to make sure that their PR is "good enough" to be reviewed. - - Even after approval, the automated workflows we've implemented, check if the PR seems to need - "full test matrix" and provide helpful information to both contributors and committers in the form of - explanatory comments and labels set automatically showing the status of the PR. Committers have still - control whether they want to merge such requests automatically or ask for rebase or re-run the tests - and run "full tests" by applying the "full tests needed" label and re-running such request. - The "full tests needed" label is also applied automatically after approval when the change touches - the "core" of Airflow - also a separate check is added to the PR so that the "merge" button status - will indicate to the committer that full tests are still needed. The committer might still decide, - whether to merge such PR without the "full matrix". The "escape hatch" we have - i.e. running the full - matrix of tests in the "merge push" will enable committers to catch and fix such problems quickly. - More about it can be found in `Approval workflow and Matrix tests <#approval-workflow-and-matrix-tests>`_ - chapter. - -4) We've also applied (and received) funds to run self-hosted runners. They are used for ``main`` runs - and whenever the PRs are done by one of the maintainers. Maintainers can force using Public GitHub runners - by applying "use public runners" label to the PR before submitting it. - - -Approval Workflow and Matrix tests ----------------------------------- - -As explained above the approval and matrix tests workflow works according to the algorithm below: - -1) In case of "no-code" changes - so changes that do not change any of the code or environment of - the application, no test are run (this is done via selective checks). Also no CI/PROD images are - build saving extra minutes. Such build takes less than 2 minutes currently and only few jobs are run - which is a very small fraction of the "full build" time. - -2) When new PR is created, only a "default set" of matrix test are running. Only default - values for each of the parameters are used effectively limiting it to running matrix builds for only - one python version and one version of each of the backends. In this case only one CI and one PROD - image is built, saving precious job slots. This build takes around 50% less time than the "full matrix" - build. - -3) When such PR gets approved, the system further analyses the files changed in this PR and further - decision is made that should be communicated to both Committer and Reviewer. - -3a) In case of "no-code" builds, a message is communicated that the PR is ready to be merged and - no tests are needed. - -.. image:: images/pr/pr-no-tests-needed-comment.png - :align: center - :alt: No tests needed for "no-code" builds - -3b) In case of "non-core" builds a message is communicated that such PR is likely OK to be merged as is with - limited set of tests, but that the committer might decide to re-run the PR after applying - "full tests needed" label, which will trigger full matrix build for tests for this PR. The committer - might make further decision on what to do with this PR. - -.. image:: images/pr/pr-likely-ok-to-merge.png - :align: center - :alt: Likely ok to merge the PR with only small set of tests - -3c) In case of "core" builds (i. e. when the PR touches some "core" part of Airflow) a message is - communicated that this PR needs "full test matrix", the "full tests needed" label is applied - automatically and either the contributor might rebase the request to trigger full test build or the - committer might re-run the build manually to trigger such full test rebuild. Also a check "in-progress" - is added, so that the committer realises that the PR is not yet "green to merge". Pull requests with - "full tests needed" label always trigger the full matrix build when rebased or re-run so if the - PR gets rebased, it will continue triggering full matrix build. - -.. image:: images/pr/pr-full-tests-needed.png - :align: center - :alt: Full tests are needed for the PR - -4) If this or another committer "request changes" in a previously approved PR with "full tests needed" - label, the bot automatically removes the label, moving it back to "run only default set of parameters" - mode. For PRs touching core of airflow once the PR gets approved back, the label will be restored. - If it was manually set by the committer, it has to be restored manually. - -.. note:: Note that setting the labels and adding comments might be delayed, due to limitation of GitHub Actions, - in case of queues, processing of Pull Request reviews might take some time, so it is advised not to merge - PR immediately after approval. Luckily, the comments describing the status of the PR trigger notifications - for the PRs and they provide good "notification" for the committer to act on a PR that was recently - approved. - -The PR approval workflow is possible thanks to two custom GitHub Actions we've developed: - -* `Get workflow origin `_ -* `Label when approved `_ - - -Next steps ----------- - -We are planning to also propose the approach to other projects from Apache Software Foundation to -make it a common approach, so that our effort is not limited only to one project. - -Discussion about it in `this discussion `_ diff --git a/TESTING.rst b/TESTING.rst index 2271e73ecfd8c..4f7de58b76766 100644 --- a/TESTING.rst +++ b/TESTING.rst @@ -487,8 +487,6 @@ This is done for three reasons: 1. in order to selectively run only subset of the test types for some PRs 2. in order to allow parallel execution of the tests on Self-Hosted runners -For case 1. see `Pull Request Workflow `_ for details. - For case 2. We can utilise memory and CPUs available on both CI and local development machines to run test in parallel. This way we can decrease the time of running all tests in self-hosted runners from 60 minutes to ~15 minutes. diff --git a/dev/airflow-github b/dev/airflow-github index 5dc5f5bd9d4d9..fcda3355d914d 100755 --- a/dev/airflow-github +++ b/dev/airflow-github @@ -126,7 +126,6 @@ def is_core_commit(files: List[str]) -> bool: "CONTRIBUTORS_QUICK_START.rst", "IMAGES.rst", "LOCAL_VIRTUALENV.rst", - "PULL_REQUEST_WORKFLOW.rst", "INTHEWILD.md", "INSTALL", "README.md", diff --git a/dev/breeze/src/airflow_breeze/commands/ci_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_commands.py index c8260698d71aa..c65753e1a65e5 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_commands.py @@ -205,7 +205,7 @@ def selective_check( from airflow_breeze.utils.selective_checks import SelectiveChecks github_event = GithubEvents(github_event_name) - if github_event == GithubEvents.PULL_REQUEST: + if commit_ref is not None: changed_files = get_changed_files(commit_ref=commit_ref, dry_run=dry_run, verbose=verbose) else: changed_files = () diff --git a/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py b/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py index 546f52ba5acdd..7ef0a11a2074b 100644 --- a/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/configuration_and_maintenance_commands.py @@ -319,7 +319,6 @@ def version(verbose: bool): @click.option('-C/-c', '--cheatsheet/--no-cheatsheet', help="Enable/disable cheatsheet.", default=None) @click.option('-A/-a', '--asciiart/--no-asciiart', help="Enable/disable ASCIIart.", default=None) @click.option( - '-B/-b', '--colour/--no-colour', help="Enable/disable Colour mode (useful for colour blind-friendly communication).", default=None, @@ -404,7 +403,10 @@ def command_hash_export(verbose: bool, output: IO): the_context_dict = ctx.to_info_dict() if verbose: get_console().print(the_context_dict) - output.write(dict_hash(the_context_dict) + "\n") + output.write(f"main:{dict_hash(the_context_dict['command']['params'])}\n") + commands_dict = the_context_dict['command']['commands'] + for command in sorted(commands_dict.keys()): + output.write(f"{command}:{dict_hash(commands_dict[command])}\n") def write_to_shell(command_to_execute: str, dry_run: bool, script_path: str, force_setup: bool) -> bool: diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index a81ae92745684..cbb2b10d06872 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -53,6 +53,7 @@ option_mount_sources, option_mssql_version, option_mysql_version, + option_platform_single, option_postgres_version, option_python, option_use_airflow_version, @@ -226,6 +227,7 @@ @option_verbose @option_dry_run @option_python +@option_platform_single @option_backend @option_debian_version @option_github_repository @@ -267,6 +269,7 @@ def shell( db_reset: bool, answer: Optional[str], image_tag: Optional[str], + platform: Optional[str], extra_args: Tuple, ): """Enter breeze.py environment. this is the default command use when no other is selected.""" @@ -296,6 +299,7 @@ def shell( answer=answer, debian_version=debian_version, image_tag=image_tag, + platform=platform, ) @@ -303,6 +307,7 @@ def shell( @main.command(name='start-airflow') @option_dry_run @option_python +@option_platform_single @option_github_repository @option_backend @option_postgres_version @@ -346,6 +351,7 @@ def start_airflow( image_tag: Optional[str], db_reset: bool, answer: Optional[str], + platform: Optional[str], extra_args: Tuple, ): """Enter breeze.py environment and starts all Airflow components in the tmux session.""" @@ -372,6 +378,7 @@ def start_airflow( db_reset=db_reset, start_airflow=True, image_tag=image_tag, + platform=platform, extra_args=extra_args, answer=answer, ) diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py index 07bd42cab90c9..d08e1d89b583a 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py @@ -37,6 +37,7 @@ option_airflow_constraints_mode_prod, option_airflow_constraints_reference_build, option_answer, + option_builder, option_debian_version, option_dev_apt_command, option_dev_apt_deps, @@ -52,7 +53,7 @@ option_image_tag_for_verifying, option_install_providers_from_sources, option_parallelism, - option_platform, + option_platform_multiple, option_prepare_buildx_cache, option_pull_image, option_push_image, @@ -242,7 +243,7 @@ def run_build_in_parallel( @option_parallelism @option_python_versions @option_upgrade_to_newer_dependencies -@option_platform +@option_platform_multiple @option_debian_version @option_github_repository @option_github_token @@ -303,6 +304,7 @@ def run_build_in_parallel( @option_additional_dev_apt_env @option_additional_runtime_apt_env @option_additional_runtime_apt_command +@option_builder @option_dev_apt_command @option_dev_apt_deps @option_python_image diff --git a/dev/breeze/src/airflow_breeze/global_constants.py b/dev/breeze/src/airflow_breeze/global_constants.py index 8dedc191f7db6..5ba825e71f67a 100644 --- a/dev/breeze/src/airflow_breeze/global_constants.py +++ b/dev/breeze/src/airflow_breeze/global_constants.py @@ -19,7 +19,7 @@ """ from __future__ import annotations -import os +import platform from enum import Enum from functools import lru_cache @@ -106,7 +106,8 @@ class SelectiveUnitTestTypes(Enum): ALLOWED_DEBIAN_VERSIONS = ['bullseye', 'buster'] ALLOWED_BUILD_CACHE = ["registry", "local", "disabled"] MULTI_PLATFORM = "linux/amd64,linux/arm64" -ALLOWED_PLATFORMS = ["linux/amd64", "linux/arm64", MULTI_PLATFORM] +SINGLE_PLATFORMS = ["linux/amd64", "linux/arm64"] +ALLOWED_PLATFORMS = [*SINGLE_PLATFORMS, MULTI_PLATFORM] ALLOWED_USE_AIRFLOW_VERSIONS = ['none', 'wheel', 'sdist'] PARAM_NAME_DESCRIPTION = { @@ -144,8 +145,15 @@ def get_available_packages(short_version=False) -> list[str]: return package_list +def get_default_platform_machine() -> str: + machine = platform.uname().machine + # Some additional conversion for various platforms... + machine = {"AMD64": "x86_64"}.get(machine, machine) + return machine + + # Initialise base variables -DOCKER_DEFAULT_PLATFORM = f"linux/{os.uname().machine}" +DOCKER_DEFAULT_PLATFORM = f"linux/{get_default_platform_machine()}" DOCKER_BUILDKIT = 1 SSH_PORT = "12322" @@ -296,6 +304,7 @@ class GithubEvents(Enum): PULL_REQUEST = "pull_request" PULL_REQUEST_REVIEW = "pull_request_review" PULL_REQUEST_TARGET = "pull_request_target" + PULL_REQUEST_WORKFLOW = "pull_request_workflow" PUSH = "push" SCHEDULE = "schedule" WORKFLOW_RUN = "workflow_run" diff --git a/dev/breeze/src/airflow_breeze/params/common_build_params.py b/dev/breeze/src/airflow_breeze/params/common_build_params.py index 68947186d68e8..e6c2c70029de4 100644 --- a/dev/breeze/src/airflow_breeze/params/common_build_params.py +++ b/dev/breeze/src/airflow_breeze/params/common_build_params.py @@ -22,6 +22,7 @@ from typing import List, Optional from airflow_breeze.branch_defaults import AIRFLOW_BRANCH +from airflow_breeze.global_constants import DOCKER_DEFAULT_PLATFORM from airflow_breeze.utils.console import get_console from airflow_breeze.utils.platforms import get_real_platform @@ -44,6 +45,7 @@ class CommonBuildParams: airflow_constraints_location: str = "" answer: Optional[str] = None build_id: int = 0 + builder: str = "default" constraints_github_repository: str = "apache/airflow" debian_version: str = "bullseye" dev_apt_command: str = "" @@ -56,7 +58,7 @@ class CommonBuildParams: github_username: str = "" image_tag: Optional[str] = None install_providers_from_sources: bool = False - platform: str = f"linux/{os.uname().machine}" + platform: str = DOCKER_DEFAULT_PLATFORM prepare_buildx_cache: bool = False python_image: Optional[str] = None push_image: bool = False diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index b67d362186653..b4c1e73f5a827 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -31,6 +31,7 @@ ALLOWED_POSTGRES_VERSIONS, ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS, AVAILABLE_INTEGRATIONS, + DOCKER_DEFAULT_PLATFORM, MOUNT_ALL, MOUNT_REMOVE, MOUNT_SELECTED, @@ -78,6 +79,7 @@ class ShellParams: mysql_version: str = ALLOWED_MYSQL_VERSIONS[0] num_runs: str = "" package_format: str = ALLOWED_INSTALLATION_PACKAGE_FORMATS[0] + platform: str = DOCKER_DEFAULT_PLATFORM postgres_version: str = ALLOWED_POSTGRES_VERSIONS[0] python: str = ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS[0] skip_environment_initialization: bool = False @@ -189,6 +191,8 @@ def compose_files(self): backend_files = [] for backend in ALLOWED_BACKENDS: backend_files.extend(self.get_backend_compose_files(backend)) + compose_ci_file.append(f"{str(SCRIPTS_CI_DIR)}/docker-compose/backend-mssql-bind-volume.yml") + compose_ci_file.append(f"{str(SCRIPTS_CI_DIR)}/docker-compose/backend-mssql-docker-volume.yml") local_docker_compose_file = f"{str(SCRIPTS_CI_DIR)}/docker-compose/local.yml" local_all_sources_docker_compose_file = f"{str(SCRIPTS_CI_DIR)}/docker-compose/local-all-sources.yml" files_docker_compose_file = f"{str(SCRIPTS_CI_DIR)}/docker-compose/files.yml" @@ -239,7 +243,7 @@ def compose_files(self): if len(integrations) > 0: for integration in integrations: compose_ci_file.append(f"{str(SCRIPTS_CI_DIR)}/docker-compose/integration-{integration}.yml") - return ':'.join(compose_ci_file) + return os.pathsep.join(compose_ci_file) @property def command_passed(self): diff --git a/dev/breeze/src/airflow_breeze/pre_commit_ids.py b/dev/breeze/src/airflow_breeze/pre_commit_ids.py index bcddfa0ec9533..db839f0712685 100644 --- a/dev/breeze/src/airflow_breeze/pre_commit_ids.py +++ b/dev/breeze/src/airflow_breeze/pre_commit_ids.py @@ -25,10 +25,11 @@ 'all', 'black', 'blacken-docs', - 'check-airflow-2-1-compatibility', + 'check-airflow-2-2-compatibility', 'check-airflow-config-yaml-consistent', 'check-airflow-providers-have-extras', 'check-apache-license-rat', + 'check-base-operator-partial-arguments', 'check-base-operator-usage', 'check-boring-cyborg-configuration', 'check-breeze-top-dependencies-limited', @@ -36,6 +37,7 @@ 'check-changelog-has-no-duplicates', 'check-daysago-import-from-utils', 'check-docstring-param-types', + 'check-example-dags-urls', 'check-executables-have-shebangs', 'check-extra-packages-references', 'check-extras-order', @@ -59,8 +61,10 @@ 'check-setup-order', 'check-start-date-not-used-in-defaults', 'check-system-tests-present', + 'check-system-tests-tocs', 'check-xml', 'codespell', + 'create-missing-init-py-files-tests', 'debug-statements', 'detect-private-key', 'doctoc', diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index af2b6c75e023d..778b34ad1c9ce 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -37,6 +37,7 @@ ALLOWED_POSTGRES_VERSIONS, ALLOWED_PYTHON_MAJOR_MINOR_VERSIONS, ALLOWED_USE_AIRFLOW_VERSIONS, + SINGLE_PLATFORMS, get_available_packages, ) from airflow_breeze.utils.custom_param_types import ( @@ -214,12 +215,18 @@ option_image_name = click.option( '-n', '--image-name', help='Name of the image to verify (overrides --python and --image-tag).' ) -option_platform = click.option( +option_platform_multiple = click.option( '--platform', help='Platform for Airflow image.', envvar='PLATFORM', type=BetterChoice(ALLOWED_PLATFORMS), ) +option_platform_single = click.option( + '--platform', + help='Platform for Airflow image.', + envvar='PLATFORM', + type=BetterChoice(SINGLE_PLATFORMS), +) option_debian_version = click.option( '--debian-version', help='Debian version used for the image.', @@ -452,10 +459,15 @@ is_flag=True, envvar='PULL_IMAGE', ) - option_python_image = click.option( '--python-image', help="If specified this is the base python image used to build the image. " "Should be something like: python:VERSION-slim-bullseye", envvar='PYTHON_IMAGE', ) +option_builder = click.option( + '--builder', + help="Buildx builder used to perform `docker buildx build` commands", + envvar='BUILDER', + default='default', +) diff --git a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py index 8af6010c40403..cdb1ceb258725 100644 --- a/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/docker_command_utils.py @@ -28,7 +28,7 @@ from airflow_breeze.params.common_build_params import CommonBuildParams from airflow_breeze.params.shell_params import ShellParams from airflow_breeze.utils.host_info_utils import get_host_group_id, get_host_os, get_host_user_id -from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT, MSSQL_DATA_VOLUME try: from packaging import version @@ -352,7 +352,7 @@ def prepare_docker_build_cache_command( build_flags = image_params.extra_docker_build_flags final_command = [] final_command.extend(["docker"]) - final_command.extend(["buildx", "build", "--builder", "airflow_cache", "--progress=tty"]) + final_command.extend(["buildx", "build", "--builder", image_params.builder, "--progress=tty"]) final_command.extend(build_flags) final_command.extend(["--pull"]) final_command.extend(arguments) @@ -388,7 +388,7 @@ def prepare_base_build_command(image_params: CommonBuildParams, verbose: bool) - "buildx", "build", "--builder", - "default", + image_params.builder, "--progress=tty", "--push" if image_params.push_image else "--load", ] @@ -525,6 +525,7 @@ def update_expected_environment_variables(env: Dict[str, str]) -> None: set_value_to_default_if_not_set(env, 'LIST_OF_INTEGRATION_TESTS_TO_RUN', "") set_value_to_default_if_not_set(env, 'LOAD_DEFAULT_CONNECTIONS', "false") set_value_to_default_if_not_set(env, 'LOAD_EXAMPLES', "false") + set_value_to_default_if_not_set(env, 'MSSQL_DATA_VOLUME', str(MSSQL_DATA_VOLUME)) set_value_to_default_if_not_set(env, 'PACKAGE_FORMAT', ALLOWED_PACKAGE_FORMATS[0]) set_value_to_default_if_not_set(env, 'PRINT_INFO_FROM_SCRIPTS', "true") set_value_to_default_if_not_set(env, 'PYTHONDONTWRITEBYTECODE', "true") diff --git a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py index 0900793a48e1f..e572c5c373424 100644 --- a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py +++ b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py @@ -86,7 +86,7 @@ def calculate_md5_checksum_for_files( return modified_files, not_modified_files -def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path, verbose: bool) -> bool: +def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path) -> bool: """ Checks if build is needed based on whether important files were modified. diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 413401c0e8332..c54b54e5c7f34 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -238,7 +238,9 @@ def find_airflow_sources_root_to_operate_on() -> Path: AIRFLOW_SOURCES_ROOT = find_airflow_sources_root_to_operate_on().resolve() BUILD_CACHE_DIR = AIRFLOW_SOURCES_ROOT / '.build' +DAGS_DIR = AIRFLOW_SOURCES_ROOT / 'dags' FILES_DIR = AIRFLOW_SOURCES_ROOT / 'files' +HOOKS_DIR = AIRFLOW_SOURCES_ROOT / 'hooks' MSSQL_DATA_VOLUME = AIRFLOW_SOURCES_ROOT / 'tmp_mssql_volume' KUBE_DIR = AIRFLOW_SOURCES_ROOT / ".kube" LOGS_DIR = AIRFLOW_SOURCES_ROOT / 'logs' @@ -260,12 +262,17 @@ def create_volume_if_missing(volume_name: str): check=False, ) if res_inspect.returncode != 0: - run_command( + result = run_command( cmd=["docker", "volume", "create", volume_name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=True, + check=False, + capture_output=True, ) + if result.returncode != 0: + get_console().print( + "[warning]\nMypy Cache volume could not be created. Continuing, but you " + "should make sure your docker works.\n\n" + f"Error: {result.stdout}\n" + ) def create_static_check_volumes(): @@ -278,7 +285,9 @@ def create_directories_and_files() -> None: Checks if setup has been updates since last time and proposes to upgrade if so. """ BUILD_CACHE_DIR.mkdir(parents=True, exist_ok=True) + DAGS_DIR.mkdir(parents=True, exist_ok=True) FILES_DIR.mkdir(parents=True, exist_ok=True) + HOOKS_DIR.mkdir(parents=True, exist_ok=True) MSSQL_DATA_VOLUME.mkdir(parents=True, exist_ok=True) KUBE_DIR.mkdir(parents=True, exist_ok=True) LOGS_DIR.mkdir(parents=True, exist_ok=True) diff --git a/dev/breeze/src/airflow_breeze/utils/run_utils.py b/dev/breeze/src/airflow_breeze/utils/run_utils.py index f0a44b425f4bc..9754605f49be4 100644 --- a/dev/breeze/src/airflow_breeze/utils/run_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/run_utils.py @@ -96,6 +96,7 @@ def run_command( return subprocess.CompletedProcess(cmd, returncode=0) try: cmd_env = os.environ.copy() + cmd_env.setdefault("HOME", str(Path.home())) if env: cmd_env.update(env) with ci_group(title=f"Output of {title}", enabled=enabled_output_group): diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 492135ebd38b0..2d7e8fe83d88b 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -298,7 +298,7 @@ def test_expected_output_full_tests_needed( "upgrade-to-newer-dependencies": "false", "test-types": "", }, - id="Everything should run when full tests are needed even if no files are changed", + id="Nothing should run if only non-important files changed", ), pytest.param( ( @@ -371,6 +371,76 @@ def test_expected_output_pull_request_v2_3( assert_outputs_are_printed(expected_outputs, str(sc)) +@pytest.mark.parametrize( + "files, expected_outputs,", + [ + pytest.param( + ("INTHEWILD.md",), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "false", + "needs-helm-tests": "false", + "run-tests": "false", + "docs-build": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "", + }, + id="Nothing should run if only non-important files changed", + ), + pytest.param( + ( + "airflow/cli/test.py", + "chart/aaaa.txt", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "true", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "true", + "upgrade-to-newer-dependencies": "false", + "test-types": "Always CLI", + }, + id="CLI tests and Kubernetes tests should run if cli/chart files changed", + ), + pytest.param( + ( + "airflow/file.py", + "tests/providers/google/file.py", + ), + { + "all-python-versions": "['3.7']", + "all-python-versions-list-as-string": "3.7", + "image-build": "true", + "needs-helm-tests": "false", + "run-tests": "true", + "docs-build": "true", + "run-kubernetes-tests": "false", + "upgrade-to-newer-dependencies": "false", + "test-types": "API Always CLI Core Integration Other Providers WWW", + }, + id="All tests except should run if core file changed", + ), + ], +) +def test_expected_output_pull_request_target( + files: Tuple[str, ...], + expected_outputs: Dict[str, str], +): + sc = SelectiveChecks( + files=files, + commit_ref="HEAD", + github_event=GithubEvents.PULL_REQUEST_TARGET, + pr_labels=(), + default_branch="main", + ) + assert_outputs_are_printed(expected_outputs, str(sc)) + + @pytest.mark.parametrize( "files, pr_labels, default_branch, expected_outputs,", [ @@ -441,11 +511,21 @@ def test_expected_output_push( assert_outputs_are_printed(expected_outputs, str(sc)) -def test_no_commit_provided(): +@pytest.mark.parametrize( + "github_event", + [ + GithubEvents.PUSH, + GithubEvents.PULL_REQUEST, + GithubEvents.PULL_REQUEST_TARGET, + GithubEvents.PULL_REQUEST_WORKFLOW, + GithubEvents.SCHEDULE, + ], +) +def test_no_commit_provided_trigger_full_build_for_any_event_type(github_event): sc = SelectiveChecks( files=(), commit_ref="", - github_event=GithubEvents.PULL_REQUEST, + github_event=github_event, pr_labels=(), default_branch="main", ) @@ -457,7 +537,9 @@ def test_no_commit_provided(): "needs-helm-tests": "true", "run-tests": "true", "docs-build": "true", - "upgrade-to-newer-dependencies": "false", + "upgrade-to-newer-dependencies": "true" + if github_event in [GithubEvents.PUSH, GithubEvents.SCHEDULE] + else "false", "test-types": "API Always CLI Core Integration Other Providers WWW", }, str(sc), diff --git a/images/pr/pr-full-tests-needed.png b/images/pr/pr-full-tests-needed.png deleted file mode 100644 index c863153d0699f..0000000000000 Binary files a/images/pr/pr-full-tests-needed.png and /dev/null differ diff --git a/images/pr/pr-likely-ok-to-merge.png b/images/pr/pr-likely-ok-to-merge.png deleted file mode 100644 index 9c04dee42211b..0000000000000 Binary files a/images/pr/pr-likely-ok-to-merge.png and /dev/null differ diff --git a/images/pr/pr-no-tests-needed-comment.png b/images/pr/pr-no-tests-needed-comment.png deleted file mode 100644 index 78a118186875a..0000000000000 Binary files a/images/pr/pr-no-tests-needed-comment.png and /dev/null differ