From f62326080d5e6ba36351cb0b6965a09f23856ac8 Mon Sep 17 00:00:00 2001 From: Guilherme Gazzo Date: Mon, 16 Dec 2024 21:54:45 -0300 Subject: [PATCH] fix: app subprocess restart on error and better reporting (#34106) Co-authored-by: Douglas Gubert <1810309+d-gubert@users.noreply.github.com> --- .changeset/giant-nails-trade.md | 5 ++ .changeset/honest-kings-allow.md | 5 ++ .changeset/quiet-radios-fry.md | 5 ++ .changeset/young-dots-cheat.md | 5 ++ .../deno-runtime/handlers/app/handler.ts | 56 +++++++++---------- .../apps-engine/deno-runtime/lib/messenger.ts | 4 ++ .../deno-runtime/lib/metricsCollector.ts | 20 +++++++ packages/apps-engine/deno-runtime/main.ts | 3 + .../runtime/deno/AppsEngineDenoRuntime.ts | 32 +++++++---- .../server/runtime/deno/LivenessManager.ts | 15 +++-- .../server/runtime/deno/ProcessMessenger.ts | 6 +- 11 files changed, 111 insertions(+), 45 deletions(-) create mode 100644 .changeset/giant-nails-trade.md create mode 100644 .changeset/honest-kings-allow.md create mode 100644 .changeset/quiet-radios-fry.md create mode 100644 .changeset/young-dots-cheat.md create mode 100644 packages/apps-engine/deno-runtime/lib/metricsCollector.ts diff --git a/.changeset/giant-nails-trade.md b/.changeset/giant-nails-trade.md new file mode 100644 index 000000000000..76e92e999247 --- /dev/null +++ b/.changeset/giant-nails-trade.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Adds simple app subprocess metrics report diff --git a/.changeset/honest-kings-allow.md b/.changeset/honest-kings-allow.md new file mode 100644 index 000000000000..7ab8d0abc1a8 --- /dev/null +++ b/.changeset/honest-kings-allow.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Attempts to restart an app subprocess if the spawn command fails diff --git a/.changeset/quiet-radios-fry.md b/.changeset/quiet-radios-fry.md new file mode 100644 index 000000000000..b3b7209cb041 --- /dev/null +++ b/.changeset/quiet-radios-fry.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Fixes an issue while collecting the error message from a failed restart attempt of an app subprocess diff --git a/.changeset/young-dots-cheat.md b/.changeset/young-dots-cheat.md new file mode 100644 index 000000000000..e8d3b6c5bff6 --- /dev/null +++ b/.changeset/young-dots-cheat.md @@ -0,0 +1,5 @@ +--- +'@rocket.chat/apps-engine': patch +--- + +Prevents app:getStatus requests from timing out in some cases diff --git a/packages/apps-engine/deno-runtime/handlers/app/handler.ts b/packages/apps-engine/deno-runtime/handlers/app/handler.ts index 2a44f34cb7fe..141e145df971 100644 --- a/packages/apps-engine/deno-runtime/handlers/app/handler.ts +++ b/packages/apps-engine/deno-runtime/handlers/app/handler.ts @@ -19,41 +19,41 @@ import handleOnUpdate from './handleOnUpdate.ts'; export default async function handleApp(method: string, params: unknown): Promise { const [, appMethod] = method.split(':'); - // We don't want the getStatus method to generate logs, so we handle it separately - if (appMethod === 'getStatus') { - return handleGetStatus(); - } + try { + // We don't want the getStatus method to generate logs, so we handle it separately + if (appMethod === 'getStatus') { + return await handleGetStatus(); + } - // `app` will be undefined if the method here is "app:construct" - const app = AppObjectRegistry.get('app'); + // `app` will be undefined if the method here is "app:construct" + const app = AppObjectRegistry.get('app'); - app?.getLogger().debug(`'${appMethod}' is being called...`); + app?.getLogger().debug(`'${appMethod}' is being called...`); - if (uikitInteractions.includes(appMethod)) { - return handleUIKitInteraction(appMethod, params).then((result) => { - if (result instanceof JsonRpcError) { - app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); - } else { - app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); - } + if (uikitInteractions.includes(appMethod)) { + return handleUIKitInteraction(appMethod, params).then((result) => { + if (result instanceof JsonRpcError) { + app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); + } else { + app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); + } - return result; - }); - } + return result; + }); + } - if (appMethod.startsWith('check') || appMethod.startsWith('execute')) { - return handleListener(appMethod, params).then((result) => { - if (result instanceof JsonRpcError) { - app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); - } else { - app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); - } + if (appMethod.startsWith('check') || appMethod.startsWith('execute')) { + return handleListener(appMethod, params).then((result) => { + if (result instanceof JsonRpcError) { + app?.getLogger().debug(`'${appMethod}' was unsuccessful.`, result.message); + } else { + app?.getLogger().debug(`'${appMethod}' was successfully called! The result is:`, result); + } - return result; - }); - } + return result; + }); + } - try { let result: Defined | JsonRpcError; switch (appMethod) { diff --git a/packages/apps-engine/deno-runtime/lib/messenger.ts b/packages/apps-engine/deno-runtime/lib/messenger.ts index 1e9ffe05c6c5..5881d408c01c 100644 --- a/packages/apps-engine/deno-runtime/lib/messenger.ts +++ b/packages/apps-engine/deno-runtime/lib/messenger.ts @@ -59,6 +59,10 @@ export const Queue = new (class Queue { this.queue.push(encoder.encode(message)); this.processQueue(); } + + public getCurrentSize() { + return this.queue.length; + } }); export const Transport = new (class Transporter { diff --git a/packages/apps-engine/deno-runtime/lib/metricsCollector.ts b/packages/apps-engine/deno-runtime/lib/metricsCollector.ts new file mode 100644 index 000000000000..c257b6c8a35b --- /dev/null +++ b/packages/apps-engine/deno-runtime/lib/metricsCollector.ts @@ -0,0 +1,20 @@ +import { writeAll } from "https://deno.land/std@0.216.0/io/write_all.ts"; +import { Queue } from "./messenger.ts"; + +export function collectMetrics() { + return { + queueSize: Queue.getCurrentSize(), + } +}; + +const encoder = new TextEncoder(); + +export async function sendMetrics() { + const metrics = collectMetrics(); + + await writeAll(Deno.stderr, encoder.encode(JSON.stringify(metrics))); +} + +export function startMetricsReport() { + setInterval(sendMetrics, 5000); +} diff --git a/packages/apps-engine/deno-runtime/main.ts b/packages/apps-engine/deno-runtime/main.ts index fa2822908954..3983c8d52407 100644 --- a/packages/apps-engine/deno-runtime/main.ts +++ b/packages/apps-engine/deno-runtime/main.ts @@ -22,6 +22,7 @@ import apiHandler from './handlers/api-handler.ts'; import handleApp from './handlers/app/handler.ts'; import handleScheduler from './handlers/scheduler-handler.ts'; import registerErrorListeners from './error-handlers.ts'; +import { startMetricsReport } from "./lib/metricsCollector.ts"; type Handlers = { app: typeof handleApp; @@ -130,3 +131,5 @@ async function main() { registerErrorListeners(); main(); + +startMetricsReport(); diff --git a/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts b/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts index 22608962bcf7..983b0a9343d3 100644 --- a/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts +++ b/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts @@ -295,7 +295,7 @@ export class DenoRuntimeSubprocessController extends EventEmitter { logger.info('Successfully restarted app subprocess'); } catch (e) { - logger.error("Failed to restart app's subprocess", { error: e }); + logger.error("Failed to restart app's subprocess", { error: e.message || e }); } finally { await this.logStorage.storeEntries(AppConsole.toStorageEntry(this.getAppId(), logger)); } @@ -322,18 +322,24 @@ export class DenoRuntimeSubprocessController extends EventEmitter { } private waitUntilReady(): Promise { + if (this.state === 'ready') { + return; + } + return new Promise((resolve, reject) => { - const timeoutId = setTimeout(() => reject(new Error(`[${this.getAppId()}] Timeout: app process not ready`)), this.options.timeout); + let timeoutId: NodeJS.Timeout; - if (this.state === 'ready') { + const handler = () => { clearTimeout(timeoutId); - return resolve(); - } + resolve(); + }; - this.once('ready', () => { - clearTimeout(timeoutId); - return resolve(); - }); + timeoutId = setTimeout(() => { + this.off('ready', handler); + reject(new Error(`[${this.getAppId()}] Timeout: app process not ready`)); + }, this.options.timeout); + + this.once('ready', handler); }); } @@ -637,6 +643,12 @@ export class DenoRuntimeSubprocessController extends EventEmitter { } private async parseError(chunk: Buffer): Promise { - console.error('Subprocess stderr', chunk.toString()); + try { + const data = JSON.parse(chunk.toString()); + + this.debug('Metrics received from subprocess: %o', data); + } catch (e) { + console.error('Subprocess stderr', chunk.toString()); + } } } diff --git a/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts b/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts index 2450b3f2ad50..b4c8dfb5d520 100644 --- a/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts +++ b/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts @@ -82,6 +82,7 @@ export class LivenessManager { this.controller.once('ready', () => this.ping()); this.subprocess.once('exit', this.handleExit.bind(this)); + this.subprocess.once('error', this.handleError.bind(this)); } /** @@ -155,6 +156,11 @@ export class LivenessManager { this.messenger.send(COMMAND_PING); } + private handleError(err: Error) { + this.debug('App has failed to start.`', err); + this.restartProcess(err.message); + } + private handleExit(exitCode: number, signal: string) { this.pingAbortController.emit('abort'); @@ -178,15 +184,13 @@ export class LivenessManager { this.restartProcess(reason); } - private restartProcess(reason: string) { + private async restartProcess(reason: string) { if (this.restartCount >= this.options.maxRestarts) { this.debug('Limit of restarts reached (%d). Aborting restart...', this.options.maxRestarts); this.controller.stopApp(); return; } - this.pingTimeoutConsecutiveCount = 0; - this.restartCount++; this.restartLog.push({ reason, restartedAt: new Date(), @@ -194,6 +198,9 @@ export class LivenessManager { pid: this.subprocess.pid, }); - this.controller.restartApp(); + await this.controller.restartApp(); + + this.pingTimeoutConsecutiveCount = 0; + this.restartCount++; } } diff --git a/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts b/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts index 03d03d125323..c919adb5f0bb 100644 --- a/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts +++ b/packages/apps-engine/src/server/runtime/deno/ProcessMessenger.ts @@ -1,11 +1,11 @@ -import { ChildProcess } from 'child_process'; +import type { ChildProcess } from 'child_process'; import type { JsonRpc } from 'jsonrpc-lite'; import { encoder } from './codec'; export class ProcessMessenger { - private deno: ChildProcess; + private deno: ChildProcess | undefined; private _sendStrategy: (message: JsonRpc) => void; @@ -30,7 +30,7 @@ export class ProcessMessenger { } private switchStrategy() { - if (this.deno instanceof ChildProcess) { + if (this.deno?.stdin?.writable) { this._sendStrategy = this.strategySend.bind(this); } else { this._sendStrategy = this.strategyError.bind(this);