flakey tests rerunner (#3395)

## About The Pull Request If only one test fails it will be rerun once, if that one passes it creates a bug report. tgstation/tgstation#71519 tgstation/tgstation#80817 tgstation/tgstation#82790  ## Why It's Good For The Game Its so fucking annyoing to deal with ship placment failing cause of auxmos this will also probably document some bugs for us.  ## Changelog :cl: FalloutFalcon, Mothblocks, Cyberboss add: retry failed unit tests. /:cl:   --------- Signed-off-by: FalloutFalcon <[email protected]>
shiptest-ss13 · Oct 7, 2024 · 09ffd17 · 09ffd17
1 parent 8ca8141
commit 09ffd17
Show file tree

Hide file tree

Showing 10 changed files with 10,102 additions and 1 deletion.
diff --git a/.github/workflows/rerun_flaky_tests.yml b/.github/workflows/rerun_flaky_tests.yml
@@ -0,0 +1,37 @@
+name: Rerun/Report Flaky Tests
+on:
+  workflow_run:
+    workflows: [Checks]
+    types:
+    - completed
+
+permissions:
+  actions: write
+  contents: write
+  issues: write
+
+jobs:
+  rerun_flaky_tests:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'failure' && github.event.workflow_run.run_attempt == 1 }}
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+    - name: Rerun flaky tests
+      uses: actions/github-script@v6
+      with:
+        script: |
+          const { rerunFlakyTests } = await import('${{ github.workspace }}/tools/pull_request_hooks/rerunFlakyTests.js')
+          await rerunFlakyTests({ github, context })
+  report_flaky_tests:
+    runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.run_attempt == 2 }}
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+    - name: Report flaky tests
+      uses: actions/github-script@v6
+      with:
+        script: |
+          const { reportFlakyTests } = await import('${{ github.workspace }}/tools/pull_request_hooks/rerunFlakyTests.js')
+          await reportFlakyTests({ github, context })
diff --git a/code/modules/unit_tests/unit_test.dm b/code/modules/unit_tests/unit_test.dm
@@ -111,7 +111,7 @@ GLOBAL_VAR(test_log)
 		log_world("::error file=[file],line=[line],title=[test_path]::[text]")
 
 		// Normal log message
-		log_entry += "\tREASON #[reasonID]: [text] at [file]:[line]"
+		log_entry += "\tFAILURE #[reasonID]: [text] at [file]:[line]"
 
 	var/message = log_entry.Join("\n")
 	log_test(message)

diff --git a/tools/pull_request_hooks/flakyTestPayloads/chat_client.txt b/tools/pull_request_hooks/flakyTestPayloads/chat_client.txt
diff --git a/tools/pull_request_hooks/flakyTestPayloads/invalid_timer.txt b/tools/pull_request_hooks/flakyTestPayloads/invalid_timer.txt
diff --git a/tools/pull_request_hooks/flakyTestPayloads/monkey_business.txt b/tools/pull_request_hooks/flakyTestPayloads/monkey_business.txt
diff --git a/tools/pull_request_hooks/flakyTestPayloads/multiple_failures.txt b/tools/pull_request_hooks/flakyTestPayloads/multiple_failures.txt
@@ -0,0 +1,8 @@
+2022-11-22T05:59:45.2618397Z ##[group]/datum/unit_test/shapeshift_spell
+2022-11-22T05:59:45.4118582Z ##[error]Shapeshift spell: Wild Shapeshift failed to transform the dummy into the shape Juggernaut. (Pablo Pfeifer was located within the floor, which is a /turf/open/floor/iron).
+2022-11-22T05:59:45.4119786Z 	FAILURE #1: Shapeshift spell: Dragon Form failed to transform the dummy into the shape . (Pablo Pfeifer was located within the floor, which is a /turf/open/floor/iron). at code/modules/unit_tests/spell_shapeshift.dm:65
+2022-11-22T05:59:45.2618397Z ##[endgroup]
+2022-11-22T05:59:45.2618397Z ##[group]/datum/unit_test/more_shapeshift_spell
+2022-11-22T05:59:45.4118582Z ##[error]Shapeshift spell: Wild Shapeshift failed to transform the dummy into the shape Juggernaut. (Pablo Pfeifer was located within the floor, which is a /turf/open/floor/iron).
+2022-11-22T05:59:45.4119786Z 	FAILURE #1: Shapeshift spell: Dragon Form failed to transform the dummy into the shape . (Pablo Pfeifer was located within the floor, which is a /turf/open/floor/iron). at code/modules/unit_tests/spell_shapeshift.dm:65
+2022-11-22T05:59:45.2618397Z ##[endgroup]
diff --git a/tools/pull_request_hooks/flakyTestPayloads/shapeshift.txt b/tools/pull_request_hooks/flakyTestPayloads/shapeshift.txt
diff --git a/tools/pull_request_hooks/package.json b/tools/pull_request_hooks/package.json
@@ -0,0 +1,3 @@
+{
+	"type": "module"
+}
diff --git a/tools/pull_request_hooks/rerunFlakyTests.js b/tools/pull_request_hooks/rerunFlakyTests.js
@@ -0,0 +1,298 @@
+const LABEL = "🤖 Flaky Test Report";
+const TITLE_BOT_HEADER = "title: ";
+
+// Only check jobs that start with these.
+// Helps make sure we don't restart something like screenshot tests or linters, which are not known to be flaky.
+const CONSIDERED_JOBS = [
+	"Integration Tests",
+];
+
+async function getFailedJobsForRun(github, context, workflowRunId, runAttempt) {
+	const jobs = await github.paginate(
+		github.rest.actions.listJobsForWorkflowRunAttempt,
+		{
+			owner: context.repo.owner,
+			repo: context.repo.repo,
+			run_id: workflowRunId,
+			attempt_number: runAttempt
+		},
+		response => {
+			return response.data;
+		});
+
+	return jobs
+		.filter((job) => job.conclusion === "failure");
+}
+
+export async function rerunFlakyTests({ github, context }) {
+	const failingJobs = await getFailedJobsForRun(
+		github,
+		context,
+		context.payload.workflow_run.id,
+		context.payload.workflow_run.run_attempt
+	);
+
+	const filteredFailingJobs = failingJobs.filter((job) => {
+		console.log(`Failing job: ${job.name}`)
+		return CONSIDERED_JOBS.some((title) => job.name.startsWith(title));
+	});
+	if (filteredFailingJobs.length === 0) {
+		console.log("Failing jobs are NOT designated flaky. Not rerunning.");
+		return;
+	}
+
+	console.log(`Rerunning job: ${filteredFailingJobs[0].name}`);
+
+	github.rest.actions.reRunWorkflowFailedJobs({
+		owner: context.repo.owner,
+		repo: context.repo.repo,
+		run_id: context.payload.workflow_run.id,
+	});
+}
+
+// Tries its best to extract a useful error title and message for the given log
+export function extractDetails(log) {
+	// Strip off timestamp
+	const lines = log.split(/^[0-9.:T\-]*?Z /gm);
+
+	const failureRegex = /^\t?FAILURE #(?<number>[0-9]+): (?<headline>.+)/;
+	const groupRegex = /^##\[group\](?<group>.+)/;
+
+	const failures = [];
+	let lastGroup = "root";
+	let loggingFailure;
+
+	const newFailure = (failureMatch) => {
+		const { headline } = failureMatch.groups;
+
+		loggingFailure = {
+			headline,
+			group: lastGroup.replace("/datum/unit_test/", ""),
+			details: [],
+		};
+	};
+
+	for (const line of lines) {
+		const groupMatch = line.match(groupRegex);
+		if (groupMatch) {
+			lastGroup = groupMatch.groups.group.trim();
+			continue;
+		}
+
+		const failureMatch = line.match(failureRegex);
+
+		if (loggingFailure === undefined) {
+			if (!failureMatch) {
+				continue;
+			}
+
+			newFailure(failureMatch);
+		} else if (failureMatch || line.startsWith("##")) {
+			failures.push(loggingFailure);
+			loggingFailure = undefined;
+
+			if (failureMatch) {
+				newFailure(failureMatch);
+			}
+		} else {
+			loggingFailure.details.push(line.trim());
+		}
+	}
+
+	// We had no logged failures, there's not really anything we can do here
+	if (failures.length === 0) {
+		return {
+			title: "Flaky test failure with no obvious source",
+			failures,
+		};
+	}
+
+	// We *could* create multiple failures for multiple groups.
+	// This would be important if we had multiple flaky tests at the same time.
+	// I'm choosing not to because it complicates this logic a bit, has the ability to go terribly wrong,
+	// and also because there's something funny to me about that increasing the urgency of fixing
+	// flaky tests. If it becomes a serious issue though, I would not mind this being fixed.
+	const uniqueGroups = new Set(failures.map((failure) => failure.group));
+
+	if (uniqueGroups.size > 1) {
+		return {
+			title: `Multiple flaky test failures in ${Array.from(uniqueGroups)
+				.sort()
+				.join(", ")}`,
+			failures,
+		};
+	}
+
+	const failGroup = failures[0].group;
+
+	if (failures.length > 1) {
+		return {
+			title: `Multiple errors in flaky test ${failGroup}`,
+			failures,
+		};
+	}
+
+	const failure = failures[0];
+
+	// Common patterns where we can always get a detailed title
+	const runtimeMatch = failure.headline.match(/Runtime in .+?: (?<error>.+)/);
+	if (runtimeMatch) {
+		const runtime = runtimeMatch.groups.error.trim();
+
+		const invalidTimerMatch = runtime.match(/^Invalid timer:.+object:(?<object>[^[]+).*delegate:(?<proc>.+?), source:/);
+		if (invalidTimerMatch) {
+			return {
+				title: `Flaky test ${failGroup}: Invalid timer: ${invalidTimerMatch.groups.proc.trim()} on ${invalidTimerMatch.groups.object.trim()}`,
+				failures,
+			};
+		}
+
+		return {
+			title: `Flaky test ${failGroup}: ${runtime}`,
+			failures,
+		};
+	}
+
+	const hardDelMatch = failure.headline.match(/^(?<object>\/[\w/]+) hard deleted .* times out of a total del count of/);
+	if (hardDelMatch) {
+		return {
+			title: `Flaky hard delete: ${hardDelMatch.groups.object}`,
+			failures,
+		};
+	}
+
+	// Try to normalize the title and remove anything that might be variable
+	const normalizedError = failure.headline.replace(/\s*at .+?:[0-9]+.*/g, ""); // "<message> at code.dm:123"
+
+	return {
+		title: `Flaky test ${failGroup}: ${normalizedError}`,
+		failures,
+	};
+}
+
+async function getExistingIssueId(graphql, context, title) {
+	// Hope you never have more than 100 of these open!
+	const {
+		repository: {
+			issues: { nodes: openFlakyTestIssues },
+		},
+	} = await graphql(
+		`
+			query ($owner: String!, $repo: String!, $label: String!) {
+				repository(owner: $owner, name: $repo) {
+					issues(
+						labels: [$label]
+						first: 100
+						orderBy: { field: CREATED_AT, direction: DESC }
+						states: [OPEN]
+					) {
+						nodes {
+							number
+							title
+							body
+						}
+					}
+				}
+			}
+		`,
+		{
+			owner: context.repo.owner,
+			repo: context.repo.repo,
+			label: LABEL,
+		}
+	);
+
+	const exactTitle = openFlakyTestIssues.find((issue) => issue.title === title);
+	if (exactTitle !== undefined) {
+		return exactTitle.number;
+	}
+
+	const foundInBody = openFlakyTestIssues.find((issue) =>
+		issue.body.includes(`<!-- ${TITLE_BOT_HEADER}${exactTitle} -->`)
+	);
+	if (foundInBody !== undefined) {
+		return foundInBody.number;
+	}
+
+	return undefined;
+}
+
+function createBody({ title, failures }, runUrl) {
+	return `
+	<!-- This issue can be renamed, but do not change the next comment! -->
+	<!-- title: ${title} -->
+
+	Flaky tests were detected in [this test run](${runUrl}). This means that there was a failure that was cleared when the tests were simply restarted.
+
+	Failures:
+	\`\`\`
+	${failures
+		.map(
+			(failure) =>
+				`${failure.group}: ${failure.headline}\n\t${failure.details.join("\n")}`
+		)
+		.join("\n")}
+	\`\`\`
+	`.replace(/^\s*/gm, "");
+}
+
+export async function reportFlakyTests({ github, context }) {
+	const failedJobsFromLastRun = await getFailedJobsForRun(
+		github,
+		context,
+		context.payload.workflow_run.id,
+		context.payload.workflow_run.run_attempt - 1
+	);
+
+	const filteredFailingJobs = failedJobsFromLastRun.filter((job) => {
+		console.log(`Failing job: ${job.name}`)
+		return CONSIDERED_JOBS.some((title) => job.name.startsWith(title));
+	});
+
+	// This could one day be relaxed if we face serious enough flaky test problems, so we're going to loop anyway
+	if (filteredFailingJobs.length !== 1) {
+		console.log(
+			"Multiple jobs failing after retry, assuming maintainer rerun."
+		);
+
+		return;
+	}
+
+	for (const job of filteredFailingJobs) {
+		const { data: log } =
+			await github.rest.actions.downloadJobLogsForWorkflowRun({
+				owner: context.repo.owner,
+				repo: context.repo.repo,
+				job_id: job.id,
+			});
+
+		const details = extractDetails(log);
+
+		const existingIssueId = await getExistingIssueId(
+			github.graphql,
+			context,
+			details.title
+		);
+
+		if (existingIssueId !== undefined) {
+			// Maybe in the future, if it's helpful, update the existing issue with new links
+			console.log(`Existing issue found: #${existingIssueId}`);
+			return;
+		}
+
+		await github.rest.issues.create({
+			owner: context.repo.owner,
+			repo: context.repo.repo,
+			title: details.title,
+			labels: [LABEL],
+			body: createBody(
+				details,
+				`https://github.com/${context.repo.owner}/${
+					context.repo.repo
+				}/actions/runs/${context.payload.workflow_run.id}/attempts/${
+					context.payload.workflow_run.run_attempt - 1
+				}`
+			),
+		});
+	}
+}