From 732fa19fad801ae3be5600645fb5899d81aa11a0 Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Wed, 14 Aug 2024 09:33:40 +0200 Subject: [PATCH 1/9] feat: track failures in Workers in a time period (SOFIE-3355) --- apps/worker/packages/generic/src/index.ts | 6 +- shared/packages/api/src/config.ts | 9 +++ shared/packages/worker/src/workerAgent.ts | 67 +++++++++++++++++++---- 3 files changed, 67 insertions(+), 15 deletions(-) diff --git a/apps/worker/packages/generic/src/index.ts b/apps/worker/packages/generic/src/index.ts index 9052fe2d..ea70da71 100644 --- a/apps/worker/packages/generic/src/index.ts +++ b/apps/worker/packages/generic/src/index.ts @@ -14,12 +14,12 @@ export async function startProcess(): Promise { const processHandler = new ProcessHandler(logger) processHandler.init(config.process) - const workforce = new WorkerAgent(logger, config) + const workerAgent = new WorkerAgent(logger, config) process.on('exit', (code) => { logger.info(`Worker: Closing with exitCode: ${code}`) - workforce.terminate() + workerAgent.terminate() }) - workforce.init().catch(logger.error) + workerAgent.init().catch(logger.error) } diff --git a/shared/packages/api/src/config.ts b/shared/packages/api/src/config.ts index 8652dbf0..de613b6f 100644 --- a/shared/packages/api/src/config.ts +++ b/shared/packages/api/src/config.ts @@ -169,6 +169,12 @@ const workerArguments = defineArguments({ default: process.env.WORKER_PICK_UP_CRITICAL_EXPECTATIONS_ONLY === '1' || false, describe: 'If set to 1, the worker will only pick up expectations that are marked as critical for playout.', }, + failureLimit: { + type: 'number', + default: process.env.WORKER_FAILURE_LIMIT || 0, + describe: + 'If set, the worker will count the number of failures it encounters while working and will restart once this number is exceeded in a period of 60 seconds.', + }, }) /** CLI-argument-definitions for the AppContainer process */ const appContainerArguments = defineArguments({ @@ -426,6 +432,7 @@ export interface WorkerConfig { costMultiplier: number considerCPULoad: number | null pickUpCriticalExpectationsOnly: boolean + failureLimit: number } & WorkerAgentConfig } export async function getWorkerConfig(): Promise { @@ -452,6 +459,8 @@ export async function getWorkerConfig(): Promise { (typeof argv.considerCPULoad === 'string' ? parseFloat(argv.considerCPULoad) : argv.considerCPULoad) || null, pickUpCriticalExpectationsOnly: argv.pickUpCriticalExpectationsOnly, + failureLimit: + (typeof argv.failureLimit === 'string' ? parseInt(argv.failureLimit) : argv.failureLimit) || 0, }, } } diff --git a/shared/packages/worker/src/workerAgent.ts b/shared/packages/worker/src/workerAgent.ts index faf0d254..ad97cc88 100644 --- a/shared/packages/worker/src/workerAgent.ts +++ b/shared/packages/worker/src/workerAgent.ts @@ -79,6 +79,8 @@ export class WorkerAgent { private spinDownTime = 0 private intervalCheckTimer: NodeJS.Timeout | null = null private lastWorkTime = 0 + private failureCounter = 0 + private intervalFailureTimer: NodeJS.Timeout | null = null private activeMonitors: Map> = new Map() private initWorkForceAPIPromise?: { resolve: () => void; reject: (reason?: any) => void } private initAppContainerAPIPromise?: { resolve: () => void; reject: (reason?: any) => void } @@ -253,6 +255,8 @@ export class WorkerAgent { // Wait for this.workforceAPI to be ready before continuing: await pWorkForce + this.setupIntervalErrorCheck() + this.IDidSomeWork() } terminate(): void { @@ -261,6 +265,8 @@ export class WorkerAgent { this.terminated = true this.workforceAPI.terminate() + if (this.intervalFailureTimer) clearInterval(this.intervalFailureTimer) + for (const expectationManager of this.expectationManagers.values()) { expectationManager.api.terminate() } @@ -545,6 +551,7 @@ export class WorkerAgent { this.IDidSomeWork() if (job.cancelled) return // Don't send updates on cancelled work job.lastUpdated = Date.now() + this.IFailed() this.removeJob(job) this.logger.warn( `Worker "${this.id}" stopped job ${job.wipId}, (${exp.id}), due to error: (${ @@ -764,7 +771,7 @@ export class WorkerAgent { // Wrap the methods, so that we can cut off communication upon termination: (this is used in tests) for (const key of Object.keys(methods) as Array>) { const fcn = methods[key] as any - methods[key] = ((...args: any[]) => { + methods[key] = (async (...args: any[]) => { if (this.terminated) return new Promise((_resolve, reject) => { // Simulate a timed out message: @@ -772,7 +779,7 @@ export class WorkerAgent { reject('Timeout') }, 200) }) - return fcn(...args) + return this.trackException(fcn(...args)) }) as any } // Connect to the ExpectationManager: @@ -793,6 +800,14 @@ export class WorkerAgent { await expectationManager.api.init(connectionOptions, methods) } + private async trackException(fnc: Promise): Promise { + fnc.catch((reason) => { + this.IFailed() + throw reason + }) + return fnc + } + private async updateListOfExpectationManagers(newExpectationManagers: { id: ExpectationManagerId; url: string }[]) { const ids = new Set() for (const newEm of newExpectationManagers) { @@ -854,16 +869,7 @@ export class WorkerAgent { if (!this.activeMonitors.size) { this.logger.debug(`Worker: is idle, requesting spinning down`) - if (this.appContainerAPI.connected) { - this.appContainerAPI.requestSpinDown().catch((err) => { - this.logger.error(`Worker: appContainerAPI.requestSpinDown failed: ${stringifyError(err)}`) - }) - } else { - // Huh, we're not connected to the appContainer. - // Well, we want to spin down anyway, so we'll do it: - // eslint-disable-next-line no-process-exit - process.exit(54) - } + this.requestShutDown() } } } @@ -875,6 +881,35 @@ export class WorkerAgent { }) } } + private requestShutDown() { + if (this.appContainerAPI.connected) { + this.appContainerAPI.requestSpinDown().catch((err) => { + this.logger.error(`Worker: appContainerAPI.requestSpinDown failed: ${stringifyError(err)}`) + }) + } else { + // Huh, we're not connected to the appContainer. + // Well, we want to spin down anyway, so we'll do it: + // eslint-disable-next-line no-process-exit + process.exit(54) + } + } + private setupIntervalErrorCheck() { + if (this.config.worker.failureLimit <= 0) return + if (this.intervalFailureTimer) clearInterval(this.intervalFailureTimer) + this.intervalFailureTimer = setInterval(() => this.intervalErrorCheck(), FAILURE_CHECK_INTERVAL) + } + private intervalErrorCheck() { + if (this.config.worker.failureLimit >= 0 && this.failureCounter < this.config.worker.failureLimit) { + // reset the failureCounter when the interval elapses and it doesn't cross the threshold + this.failureCounter = 0 + return + } else { + this.logger.error( + `Worker: Failed failureLimit check: ${this.failureCounter} errors in a ${FAILURE_CHECK_INTERVAL}ms window. Requesting spin down.` + ) + this.requestShutDown() + } + } /** * To be called when some actual work has been done. * If this is not called for a certain amount of time, the worker will be considered idle and will be spun down @@ -882,6 +917,12 @@ export class WorkerAgent { private IDidSomeWork() { this.lastWorkTime = Date.now() } + /** + * To be called when some work has failed + */ + private IFailed() { + this.failureCounter++ + } private getNextWipId(): WorkInProgressLocalId { return protectString(`${this._wipI++}`) } @@ -895,3 +936,5 @@ interface CurrentJob { wipId: WorkInProgressLocalId workInProgress: IWorkInProgress | null } + +const FAILURE_CHECK_INTERVAL = 60 * 1000 From f03434abfc909a0cdc04d8bea753d805163a6bad Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Wed, 14 Aug 2024 13:29:40 +0200 Subject: [PATCH 2/9] fix: make failureLimit to be passed through into appContainer --- .../packages/generic/src/appContainer.ts | 3 +++ shared/packages/api/src/appContainer.ts | 1 + shared/packages/api/src/config.ts | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/apps/appcontainer-node/packages/generic/src/appContainer.ts b/apps/appcontainer-node/packages/generic/src/appContainer.ts index 243acb70..55304d84 100644 --- a/apps/appcontainer-node/packages/generic/src/appContainer.ts +++ b/apps/appcontainer-node/packages/generic/src/appContainer.ts @@ -298,6 +298,9 @@ export class AppContainer { this.config.appContainer.worker.networkIds.length ? `--networkIds=${this.config.appContainer.worker.networkIds.join(';')}` : '', + this.config.appContainer.worker.failureLimit + ? `--failureLimit=${this.config.appContainer.worker.failureLimit}` + : '', ] } if ( diff --git a/shared/packages/api/src/appContainer.ts b/shared/packages/api/src/appContainer.ts index 1a5e3f84..64bcfccf 100644 --- a/shared/packages/api/src/appContainer.ts +++ b/shared/packages/api/src/appContainer.ts @@ -24,6 +24,7 @@ export interface AppContainerConfig { windowsDriveLetters: WorkerAgentConfig['windowsDriveLetters'] costMultiplier: number considerCPULoad: number | null + failureLimit: number } } diff --git a/shared/packages/api/src/config.ts b/shared/packages/api/src/config.ts index de613b6f..857aafee 100644 --- a/shared/packages/api/src/config.ts +++ b/shared/packages/api/src/config.ts @@ -246,6 +246,12 @@ const appContainerArguments = defineArguments({ describe: 'If set, the worker will consider the CPU load of the system it runs on before it accepts jobs. Set to a value between 0 and 1, the worker will accept jobs if the CPU load is below the configured value.', }, + failureLimit: { + type: 'number', + default: process.env.WORKER_FAILURE_LIMIT || 0, + describe: + 'If set, the worker will count the number of failures it encounters while working and will restart once this number is exceeded in a period of 60 seconds.', + }, }) /** CLI-argument-definitions for the "Single" process */ const singleAppArguments = defineArguments({ @@ -500,6 +506,8 @@ export async function getAppContainerConfig(): Promise Date: Thu, 15 Aug 2024 14:41:36 +0200 Subject: [PATCH 3/9] chore: update test env --- tests/internal-tests/src/__tests__/lib/setupEnv.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/internal-tests/src/__tests__/lib/setupEnv.ts b/tests/internal-tests/src/__tests__/lib/setupEnv.ts index 35f9185e..474f372c 100644 --- a/tests/internal-tests/src/__tests__/lib/setupEnv.ts +++ b/tests/internal-tests/src/__tests__/lib/setupEnv.ts @@ -83,6 +83,7 @@ const defaultTestConfig: SingleAppConfig = { costMultiplier: 1, considerCPULoad: null, pickUpCriticalExpectationsOnly: false, + failureLimit: 0, }, quantelHTTPTransformerProxy: { port: 0, @@ -103,6 +104,7 @@ const defaultTestConfig: SingleAppConfig = { windowsDriveLetters: ['X', 'Y', 'Z'], costMultiplier: 1, considerCPULoad: null, + failureLimit: 0, }, }, } From 8331ef4c9b89fca091ccaf0f5176fedc21f0bcbf Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Thu, 15 Aug 2024 15:11:54 +0200 Subject: [PATCH 4/9] fix: no need to re-throw in the catch --- shared/packages/worker/src/workerAgent.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shared/packages/worker/src/workerAgent.ts b/shared/packages/worker/src/workerAgent.ts index ad97cc88..1bb9d0da 100644 --- a/shared/packages/worker/src/workerAgent.ts +++ b/shared/packages/worker/src/workerAgent.ts @@ -801,9 +801,8 @@ export class WorkerAgent { } private async trackException(fnc: Promise): Promise { - fnc.catch((reason) => { + fnc.catch(() => { this.IFailed() - throw reason }) return fnc } From d33f973fb5d7c67f442365dd29778e24766b0466 Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Fri, 23 Aug 2024 14:59:39 +0200 Subject: [PATCH 5/9] feat: refactor the failure tracking to track periods of time with failures within them --- .../packages/generic/src/appContainer.ts | 7 +++- shared/packages/api/src/appContainer.ts | 3 +- shared/packages/api/src/config.ts | 41 ++++++++++++++----- shared/packages/worker/src/workerAgent.ts | 25 +++++++---- 4 files changed, 53 insertions(+), 23 deletions(-) diff --git a/apps/appcontainer-node/packages/generic/src/appContainer.ts b/apps/appcontainer-node/packages/generic/src/appContainer.ts index 55304d84..a17932f6 100644 --- a/apps/appcontainer-node/packages/generic/src/appContainer.ts +++ b/apps/appcontainer-node/packages/generic/src/appContainer.ts @@ -298,8 +298,11 @@ export class AppContainer { this.config.appContainer.worker.networkIds.length ? `--networkIds=${this.config.appContainer.worker.networkIds.join(';')}` : '', - this.config.appContainer.worker.failureLimit - ? `--failureLimit=${this.config.appContainer.worker.failureLimit}` + this.config.appContainer.worker.failurePeriodLimit + ? `--failurePeriodLimit=${this.config.appContainer.worker.failurePeriodLimit}` + : '', + this.config.appContainer.worker.failurePeriod + ? `--failurePeriod=${this.config.appContainer.worker.failurePeriod}` : '', ] } diff --git a/shared/packages/api/src/appContainer.ts b/shared/packages/api/src/appContainer.ts index 64bcfccf..9eae871a 100644 --- a/shared/packages/api/src/appContainer.ts +++ b/shared/packages/api/src/appContainer.ts @@ -24,7 +24,8 @@ export interface AppContainerConfig { windowsDriveLetters: WorkerAgentConfig['windowsDriveLetters'] costMultiplier: number considerCPULoad: number | null - failureLimit: number + failurePeriodLimit: number + failurePeriod: number } } diff --git a/shared/packages/api/src/config.ts b/shared/packages/api/src/config.ts index 857aafee..38e1f45b 100644 --- a/shared/packages/api/src/config.ts +++ b/shared/packages/api/src/config.ts @@ -169,11 +169,16 @@ const workerArguments = defineArguments({ default: process.env.WORKER_PICK_UP_CRITICAL_EXPECTATIONS_ONLY === '1' || false, describe: 'If set to 1, the worker will only pick up expectations that are marked as critical for playout.', }, - failureLimit: { + failurePeriodLimit: { type: 'number', - default: process.env.WORKER_FAILURE_LIMIT || 0, + default: parseInt(process.env.WORKER_FAILURE_PERIOD_LIMIT || '', 10) || 0, describe: - 'If set, the worker will count the number of failures it encounters while working and will restart once this number is exceeded in a period of 60 seconds.', + 'If set, the worker will count the number of periods of time where it encounters errors while working and will restart once the number of consequent periods of time is exceeded.', + }, + failurePeriod: { + type: 'number', + default: parseInt(process.env.WORKER_FAILURE_PERIOD || '', 10) || 5 * 60 * 1000, + describe: 'This is the period of time used by "failurePeriodLimit"', }, }) /** CLI-argument-definitions for the AppContainer process */ @@ -246,11 +251,16 @@ const appContainerArguments = defineArguments({ describe: 'If set, the worker will consider the CPU load of the system it runs on before it accepts jobs. Set to a value between 0 and 1, the worker will accept jobs if the CPU load is below the configured value.', }, - failureLimit: { + failurePeriodLimit: { type: 'number', - default: process.env.WORKER_FAILURE_LIMIT || 0, + default: parseInt(process.env.WORKER_FAILURE_PERIOD_LIMIT || '', 10) || 0, describe: - 'If set, the worker will count the number of failures it encounters while working and will restart once this number is exceeded in a period of 60 seconds.', + 'If set, the worker will count the number of periods of time where it encounters errors while working and will restart once the number of consequent periods of time is exceeded.', + }, + failurePeriod: { + type: 'number', + default: parseInt(process.env.WORKER_FAILURE_PERIOD || '', 10) || 5 * 60 * 1000, + describe: 'This is the period of time used by "failurePeriodLimit"', }, }) /** CLI-argument-definitions for the "Single" process */ @@ -438,7 +448,8 @@ export interface WorkerConfig { costMultiplier: number considerCPULoad: number | null pickUpCriticalExpectationsOnly: boolean - failureLimit: number + failurePeriodLimit: number + failurePeriod: number } & WorkerAgentConfig } export async function getWorkerConfig(): Promise { @@ -465,8 +476,12 @@ export async function getWorkerConfig(): Promise { (typeof argv.considerCPULoad === 'string' ? parseFloat(argv.considerCPULoad) : argv.considerCPULoad) || null, pickUpCriticalExpectationsOnly: argv.pickUpCriticalExpectationsOnly, - failureLimit: - (typeof argv.failureLimit === 'string' ? parseInt(argv.failureLimit) : argv.failureLimit) || 0, + failurePeriodLimit: + (typeof argv.failurePeriodLimit === 'string' + ? parseInt(argv.failurePeriodLimit) + : argv.failurePeriodLimit) || 0, + failurePeriod: + (typeof argv.failurePeriod === 'string' ? parseInt(argv.failurePeriod) : argv.failurePeriod) || 0, }, } } @@ -506,8 +521,12 @@ export async function getAppContainerConfig(): Promise> = new Map() private initWorkForceAPIPromise?: { resolve: () => void; reject: (reason?: any) => void } @@ -893,18 +894,26 @@ export class WorkerAgent { } } private setupIntervalErrorCheck() { - if (this.config.worker.failureLimit <= 0) return + if (this.config.worker.failurePeriodLimit <= 0) return if (this.intervalFailureTimer) clearInterval(this.intervalFailureTimer) - this.intervalFailureTimer = setInterval(() => this.intervalErrorCheck(), FAILURE_CHECK_INTERVAL) + this.intervalFailureTimer = setInterval(() => this.intervalErrorCheck(), this.config.worker.failurePeriod) } private intervalErrorCheck() { - if (this.config.worker.failureLimit >= 0 && this.failureCounter < this.config.worker.failureLimit) { - // reset the failureCounter when the interval elapses and it doesn't cross the threshold - this.failureCounter = 0 + if (this.failureCounter === 0) { + // reset the failurePeriodCounter when there were no exceptions in the period + this.failurePeriodCounter = 0 + // everything seems fine return - } else { + } + + if (this.failureCounter > 0) { + this.failurePeriodCounter++ + this.failureCounter = 0 + } + + if (this.failurePeriodCounter >= this.config.worker.failurePeriodLimit) { this.logger.error( - `Worker: Failed failureLimit check: ${this.failureCounter} errors in a ${FAILURE_CHECK_INTERVAL}ms window. Requesting spin down.` + `Worker: Failed failurePeriodLimit check: ${this.failurePeriodCounter} periods with errors. Requesting spin down.` ) this.requestShutDown() } @@ -935,5 +944,3 @@ interface CurrentJob { wipId: WorkInProgressLocalId workInProgress: IWorkInProgress | null } - -const FAILURE_CHECK_INTERVAL = 60 * 1000 From 65b6d035c9d6bbc3be668d1f0002edab7c072b48 Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Fri, 23 Aug 2024 15:08:49 +0200 Subject: [PATCH 6/9] chore: update test environment --- tests/internal-tests/src/__tests__/lib/setupEnv.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/internal-tests/src/__tests__/lib/setupEnv.ts b/tests/internal-tests/src/__tests__/lib/setupEnv.ts index 474f372c..c95129f2 100644 --- a/tests/internal-tests/src/__tests__/lib/setupEnv.ts +++ b/tests/internal-tests/src/__tests__/lib/setupEnv.ts @@ -83,7 +83,8 @@ const defaultTestConfig: SingleAppConfig = { costMultiplier: 1, considerCPULoad: null, pickUpCriticalExpectationsOnly: false, - failureLimit: 0, + failurePeriod: 0, + failurePeriodLimit: 0, }, quantelHTTPTransformerProxy: { port: 0, @@ -104,7 +105,8 @@ const defaultTestConfig: SingleAppConfig = { windowsDriveLetters: ['X', 'Y', 'Z'], costMultiplier: 1, considerCPULoad: null, - failureLimit: 0, + failurePeriod: 0, + failurePeriodLimit: 0, }, }, } From 3255189663d06d91e90d812bfc9a705589c5ba0d Mon Sep 17 00:00:00 2001 From: Johan Nyman Date: Mon, 26 Aug 2024 07:53:47 +0200 Subject: [PATCH 7/9] fix: add a `force` parameter to appcontainer.requestSpinDown() so that a worker can forcecully ask to be spun down (like if the error counter limit has been reached) --- .../packages/generic/src/appContainer.ts | 21 +++++++++++++++---- shared/packages/api/src/methods.ts | 2 +- shared/packages/worker/src/appContainerApi.ts | 4 ++-- shared/packages/worker/src/workerAgent.ts | 6 +++--- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/apps/appcontainer-node/packages/generic/src/appContainer.ts b/apps/appcontainer-node/packages/generic/src/appContainer.ts index a17932f6..1c1a0f88 100644 --- a/apps/appcontainer-node/packages/generic/src/appContainer.ts +++ b/apps/appcontainer-node/packages/generic/src/appContainer.ts @@ -223,13 +223,26 @@ export class AppContainer { const app = this.apps.get(clientId) if (app) app.lastPing = Date.now() }, - requestSpinDown: async (): Promise => { + requestSpinDown: async (force?: boolean): Promise => { const app = this.apps.get(clientId) - if (!app || !app.isAutoScaling) return - if (this.getAutoScalingAppCount(app.appType) > this.config.appContainer.minRunningApps) { - this.spinDown(clientId, `Requested by app`).catch((error) => { + if (!app) return + + if (force) { + // The Worker is forcefully asking to be spun down. + this.spinDown(clientId, `Forced by app`).catch((error) => { this.logger.error(`Error when spinning down app "${clientId}": ${stringifyError(error)}`) }) + // Note: this.monitorApps() will soon spin up another Worker if needed + } else { + // The Worker is kindly asking to be spun down. + // The appcontainer will determine if it should be spun down. + + if (!app.isAutoScaling) return + if (this.getAutoScalingAppCount(app.appType) > this.config.appContainer.minRunningApps) { + this.spinDown(clientId, `Requested by app`).catch((error) => { + this.logger.error(`Error when spinning down app "${clientId}": ${stringifyError(error)}`) + }) + } } }, workerStorageWriteLock: async ( diff --git a/shared/packages/api/src/methods.ts b/shared/packages/api/src/methods.ts index 83a77fed..0a6f812f 100644 --- a/shared/packages/api/src/methods.ts +++ b/shared/packages/api/src/methods.ts @@ -271,7 +271,7 @@ export namespace AppContainerWorkerAgent { id: WorkerAgentId ping: () => Promise - requestSpinDown: () => Promise + requestSpinDown: (force?: boolean) => Promise /** Acquire a write lock, the returned id is then used in workerStorageWrite to write */ workerStorageWriteLock: (dataId: DataId, customTimeout?: number) => Promise<{ lockId: LockId; current: any }> workerStorageReleaseLock: (dataId: DataId, lockId: LockId) => Promise diff --git a/shared/packages/worker/src/appContainerApi.ts b/shared/packages/worker/src/appContainerApi.ts index e12dd780..9bdc73e0 100644 --- a/shared/packages/worker/src/appContainerApi.ts +++ b/shared/packages/worker/src/appContainerApi.ts @@ -23,8 +23,8 @@ export class AppContainerAPI async ping(): Promise { return this._sendMessage('ping') } - async requestSpinDown(): Promise { - return this._sendMessage('requestSpinDown') + async requestSpinDown(force?: boolean): Promise { + return this._sendMessage('requestSpinDown', force) } async workerStorageWriteLock( dataId: DataId, diff --git a/shared/packages/worker/src/workerAgent.ts b/shared/packages/worker/src/workerAgent.ts index da24fd24..ad45904f 100644 --- a/shared/packages/worker/src/workerAgent.ts +++ b/shared/packages/worker/src/workerAgent.ts @@ -881,9 +881,9 @@ export class WorkerAgent { }) } } - private requestShutDown() { + private requestShutDown(force?: boolean) { if (this.appContainerAPI.connected) { - this.appContainerAPI.requestSpinDown().catch((err) => { + this.appContainerAPI.requestSpinDown(force).catch((err) => { this.logger.error(`Worker: appContainerAPI.requestSpinDown failed: ${stringifyError(err)}`) }) } else { @@ -915,7 +915,7 @@ export class WorkerAgent { this.logger.error( `Worker: Failed failurePeriodLimit check: ${this.failurePeriodCounter} periods with errors. Requesting spin down.` ) - this.requestShutDown() + this.requestShutDown(true) } } /** From 3a9771c15b47938cfcb78751605250a0d31b72d6 Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Wed, 28 Aug 2024 16:38:58 +0200 Subject: [PATCH 8/9] Add unit to description of config field Co-authored-by: Johan Nyman --- shared/packages/api/src/config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/packages/api/src/config.ts b/shared/packages/api/src/config.ts index 38e1f45b..11731429 100644 --- a/shared/packages/api/src/config.ts +++ b/shared/packages/api/src/config.ts @@ -178,7 +178,7 @@ const workerArguments = defineArguments({ failurePeriod: { type: 'number', default: parseInt(process.env.WORKER_FAILURE_PERIOD || '', 10) || 5 * 60 * 1000, - describe: 'This is the period of time used by "failurePeriodLimit"', + describe: 'This is the period of time used by "failurePeriodLimit" (milliseconds)', }, }) /** CLI-argument-definitions for the AppContainer process */ From de0f43b0090dd59293c98dc6ee59206675a7c09e Mon Sep 17 00:00:00 2001 From: Jan Starzak Date: Wed, 28 Aug 2024 16:39:15 +0200 Subject: [PATCH 9/9] fix(Config): Add unit to description of config field Co-authored-by: Johan Nyman --- shared/packages/api/src/config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/packages/api/src/config.ts b/shared/packages/api/src/config.ts index 11731429..1d6aee71 100644 --- a/shared/packages/api/src/config.ts +++ b/shared/packages/api/src/config.ts @@ -260,7 +260,7 @@ const appContainerArguments = defineArguments({ failurePeriod: { type: 'number', default: parseInt(process.env.WORKER_FAILURE_PERIOD || '', 10) || 5 * 60 * 1000, - describe: 'This is the period of time used by "failurePeriodLimit"', + describe: 'This is the period of time used by "failurePeriodLimit" (milliseconds)', }, }) /** CLI-argument-definitions for the "Single" process */