chore: add flaky retry in weekly CI

zama-ai · Apr 30, 2024 · e25ceff · e25ceff
1 parent 4e7ef50
commit e25ceff
Show file tree

Hide file tree

Showing 4 changed files with 136 additions and 32 deletions.
diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml
@@ -724,7 +724,8 @@ jobs:
           	poetry run python ./script/actions_utils/pytest_failed_test_report.py \
             --pytest-input-report "pytest_report.json" \
             --failed-tests-report "failed_tests_report.json" \
-            --failed-tests-comment "failed_tests_comment.txt"
+            --failed-tests-comment "failed_tests_comment_${{ matrix.python_version }}.txt"
+            --failed-tests-list "failed_tests_slack_list_${{ matrix.python_version }}.txt"
 
             # Check if all failed tests are known flaky tests
             FAILED_TESTS_ARE_FLAKY=$(jq .all_failed_tests_are_flaky "failed_tests_report.json")
@@ -749,7 +750,7 @@ jobs:
         with:
           header: flaky-test
           recreate: true
-          path: failed_tests_comment.txt
+          path: failed_tests_comment_${{ matrix.python_version }}.txt
 
       # If regular pytest step has been skipped but some changes has been detected in test files, 
       # meaning there was no other changed impacting our testing suite, we only need to run these 
@@ -774,22 +775,37 @@ jobs:
         id: pytest_weekly
         if: ${{ fromJSON(env.IS_WEEKLY) && steps.conformance.outcome == 'success' && !cancelled() }}
         run: |
-          make pytest PYTEST_OPTIONS=--weekly
+          make pytest_and_report PYTEST_OPTIONS=--weekly
 
-      # Run Pytest on all of our tests on a weekly basis using PyPI's local wheel
-      - name: PyTest with PyPI local wheel of Concrete ML (weekly)
-        if: |
-          fromJSON(env.IS_WEEKLY)
-          && steps.conformance.outcome == 'success' 
-          && !cancelled()
-        run: |
-          make pytest_pypi_wheel_cml
+          # If weekly tests failed, check for flaky tests
+          if [ $? -ne 0 ]; then
+
+            # Convert pytest report to formatted report with only information about failed tests
+          	poetry run python ./script/actions_utils/pytest_failed_test_report.py \
+            --pytest-input-report "pytest_report.json" \
+            --failed-tests-report "failed_tests_report.json" \
+            --failed-tests-list "failed_tests_slack_list_${{ matrix.python_version }}.txt"
+
+            # Check if all failed tests are known flaky tests
+            FAILED_TESTS_ARE_FLAKY=$(jq .all_failed_tests_are_flaky "failed_tests_report.json")
+
+            echo "FAILED_TESTS_ARE_FLAKY=${FAILED_TESTS_ARE_FLAKY}" >> "$GITHUB_ENV"
+
+            # If all failed tests are known flaky tests, try to rerun them
+            if [[ "${FAILED_TESTS_ARE_FLAKY}" == "true" ]]; then
+              make pytest_run_last_failed
+            
+            # Else, return exit status 1 in order to make this step fail
+            else
+              exit 1
+            fi
+          fi
 
-      # Run Pytest on all of our tests (except flaky ones) using PyPI's local wheel during the 
-      # release process
-      - name: PyTest (no flaky) with PyPI local wheel of Concrete ML (release)
+      # Run Pytest on all of our tests (except flaky ones) using PyPI's local wheel in the weekly 
+      # or during the release process
+      - name: PyTest (no flaky) with PyPI local wheel of Concrete ML (weekly, release)
         if: |
-          fromJSON(env.IS_RELEASE)
+          (fromJSON(env.IS_WEEKLY) || fromJSON(env.IS_RELEASE)) 
           && steps.conformance.outcome == 'success' 
           && !cancelled()
         run: |
@@ -936,7 +952,7 @@ jobs:
     needs: [build-linux]
     runs-on: ubuntu-20.04
     timeout-minutes: 2
-    if: ${{ always() }}
+    if: success() || failure()
     steps:
       - name: Fail on unsuccessful Linux build
         shell: bash
@@ -1080,12 +1096,25 @@ jobs:
       # macOS builds are already long, so we decide not to use --weekly on them, it could be
       # changed. Remark also that, for mac, due to unexpected issues with GitHub, we have a
       # slightly different way to launch pytest
+      # Add support for re-running flaky tests on macOS (intel) CI 
+      # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4428
       - name: PyTest Source Code
         run: |
           make pytest_macOS_for_GitHub
 
+
+  # Only send a report for the following CI:
+  # - when pushing to main
+  # - when pushing to a release branch
+  # - when running weekly tests
   send-report:
-    if: ${{ always() }}
+    if: |
+      (success() || failure()) 
+      && (
+        fromJSON(env.IS_WEEKLY)
+        || fromJSON(env.IS_PUSH_TO_MAIN)
+        || fromJSON(env.IS_PUSH_TO_RELEASE)
+      )
     timeout-minutes: 2
     needs:
       [
@@ -1095,15 +1124,15 @@ jobs:
         stop-runner-linux,
         build-macos,
       ]
+    env:
+      MATRIX: ${{ fromJSON(format('{{"include":{0}}}', needs.start-runner-linux.outputs.matrix)) }}
 
     name: Send Slack notification
     runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633
 
       - name: Prepare whole job status
-        if: ${{ always() }}
-        continue-on-error: true
         env:
           NEEDS_JSON: ${{ toJSON(needs) }}
         run: |
@@ -1112,20 +1141,47 @@ jobs:
           --needs_context_json /tmp/needs_context.json)
           echo "JOB_STATUS=${JOB_STATUS}" >> "$GITHUB_ENV"
 
+      - name: Set message title
+        run: |
+          if [[ "${{ fromJSON(env.IS_WEEKLY) }}" == "true" ]]; then
+            TITLES_START="Weekly Tests"
+          elif [[ "${{ fromJSON(env.IS_PUSH_TO_MAIN) || fromJSON(env.IS_PUSH_TO_RELEASE) }}" == "true" ]]; then
+            TITLES_START="Push to '${{ github.ref_name }}'"
+          fi
+
+          echo "SLACK_TITLE=${TITLES_START}: ${{ env.JOB_STATUS || 'failure' }}" >> "$GITHUB_ENV"
+
+      # Add support for re-running flaky tests on macOS (intel) CI 
+      # FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4428
+      - name: Set message body
+        run: |
+          SLACK_BODY="[Action URL](${{ env.ACTION_RUN_URL }})\n\
+            * build-linux: ${{ needs.build-linux.result || 'Did not run.' }}\n\n\
+            * build-macos: ${{ needs.build-macos.result || 'Did not run.' }}\n\n"
+
+          PYTHON_VERSIONS = ("3.8" "3.9" "3.10")
+
+          for python_version in ${PYTHON_VERSIONS}; do
+            file_name="failed_tests_slack_list_${{ python_version }}.txt"
+
+            if [ -f ${{ file_name }} ]; then
+              SLACK_BODY+="Linux failed tests (Python ${{ python_version }}):\n"
+              SLACK_BODY+="$(cat ${{ file_name }})"
+              SLACK_BODY+="\n\n"
+            fi
+          done
+
+          echo "SLACK_BODY=${SLACK_BODY}" >> "$GITHUB_ENV"
+
       - name: Slack Notification
-        if: ${{ always() }}
-        continue-on-error: true
         uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
         env:
           SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
           SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
           SLACK_COLOR: ${{ env.JOB_STATUS || 'failure' }}
-          SLACK_MESSAGE: "Full run finished with status ${{ env.JOB_STATUS || 'failure' }} \
-            (${{ env.ACTION_RUN_URL }})\n\
-            - matrix-preparation: ${{ needs.matrix-preparation.result || 'Did not run.'}}\n\n\
-            - start-runner-linux: ${{ needs.start-runner-linux.result || 'Did not run.'}}\n\n\
-            - build-linux: ${{ needs.build-linux.result || 'Did not run.' }}\n\n\
-            - stop-runner-linux: ${{ needs.stop-runner-linux.result || 'Did not run.'}}\n\n\
-            - build-macos: ${{ needs.build-macos.result || 'Did not run.' }}"
+          SLACK_TITLE: ${{ env.SLACK_TITLE || 'Unexpected CI' }}
+          SLACK_MESSAGE: ${{ env.SLACK_BODY || 'Unexpected CI' }}
           SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
           SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+          SLACKIFY_MARKDOWN: true
+
diff --git a/Makefile b/Makefile
@@ -252,7 +252,7 @@ pytest:
 	${PYTEST_OPTIONS}"
 
 # Coverage options are not included since they look to fail on macOS
-# (see https://github.com/zama-ai/concrete-ml-internal/issues/1554)
+# (see https://github.com/zama-ai/concrete-ml-internal/issues/4428)
 .PHONY: pytest_macOS_for_GitHub # Run pytest without coverage options
 pytest_macOS_for_GitHub: pytest_internal_parallel
 

diff --git a/script/actions_utils/generate_test_matrix.py b/script/actions_utils/generate_test_matrix.py
@@ -30,6 +30,8 @@ class OS(enum.Enum):
     MACOS = "macos"
 
 
+# Add mac silicon
+# FIXME: https://github.com/zama-ai/concrete-ml-internal/issues/4010
 OS_VERSIONS = {
     OS.LINUX: "ubuntu-20.04",
     OS.MACOS: "macos-12",

diff --git a/script/actions_utils/pytest_failed_test_report.py b/script/actions_utils/pytest_failed_test_report.py
@@ -6,6 +6,33 @@
 from typing import Dict, Optional
 
 
+def write_failed_tests_list(failed_tests_list_path: Path, failed_tests_report: Dict):
+    """Write the list of failed tests as a text file.
+
+    Args:
+        failed_tests_list_path (Path): Path where to write the list.
+        failed_tests_report (Dict): Formatted report of failed tests.
+    """
+    with open(failed_tests_list_path, "w", encoding="utf-8") as f:
+
+        # If at least one test failed, write the list
+        if failed_tests_report["tests_failed"]:
+
+            # Write all flaky tests that initially failed as a list
+            f.write("- Known flaky tests that initially failed: \n")
+            for flaky_name in failed_tests_report["flaky"]:
+                f.write("    - " + flaky_name)
+
+            # Write all other tests that initially failed as a list if they were not all known
+            # flaky tests
+            if not failed_tests_report["all_failed_tests_are_flaky"]:
+                f.write("\n")
+
+                f.write("- Other tests that initially failed: \n")
+                for non_flaky_name in failed_tests_report["non_flaky"]:
+                    f.write("    - " + non_flaky_name)
+
+
 def write_failed_tests_comment(failed_tests_comment_path: Path, failed_tests_report: Dict):
     """Write a formatted PR comment for failed tests as a text file.
 
@@ -56,10 +83,11 @@ def write_failed_tests_comment(failed_tests_comment_path: Path, failed_tests_rep
             f.write("\n\n</p>\n</details>\n\n")
 
 
-def write_failed_tests_report(
+def write_failed_tests_reports(
     failed_tests_report_path: Path,
     input_report: Dict,
     failed_tests_comment_path: Optional[Path] = None,
+    failed_tests_list_path: Optional[Path] = None,
 ):
     """Write a formatted report of failed tests as a JSON file.
 
@@ -70,6 +98,8 @@ def write_failed_tests_report(
         input_report (Dict): Pytest overall report.
         failed_tests_comment_path (Optional[Path]): Path where to write the formatted PR comment.
             If None, no file is written. Default to None.
+        failed_tests_list_path (Optional[Path]): Path where to write the list of failed tests.
+            If None, no file is written. Default to None.
     """
     # Safest default parameters
     failed_tests_report = {
@@ -120,6 +150,10 @@ def write_failed_tests_report(
     if failed_tests_comment_path is not None:
         write_failed_tests_comment(failed_tests_comment_path, failed_tests_report)
 
+    # Write list of failed tests if a path is given
+    if failed_tests_list_path is not None:
+        write_failed_tests_list(failed_tests_list_path, failed_tests_report)
+
 
 def main(args):
     """Entry point
@@ -140,8 +174,15 @@ def main(args):
     if failed_tests_comment_path is not None:
         failed_tests_comment_path = Path(failed_tests_comment_path).resolve()
 
-    write_failed_tests_report(
-        failed_tests_report_path, input_report, failed_tests_comment_path=failed_tests_comment_path
+    failed_tests_list_path = args.failed_tests_list
+    if failed_tests_list_path is not None:
+        failed_tests_list_path = Path(failed_tests_list_path).resolve()
+
+    write_failed_tests_reports(
+        failed_tests_report_path,
+        input_report,
+        failed_tests_comment_path=failed_tests_comment_path,
+        failed_tests_list_path=failed_tests_list_path,
     )
 
 
@@ -165,6 +206,11 @@ def main(args):
         type=str,
         help="Path where to write the warning comment for failed tests as a txt file",
     )
+    parser.add_argument(
+        "--failed-tests-list",
+        type=str,
+        help="Path where to write the list of failed tests as a txt file",
+    )
 
     cli_args = parser.parse_args()