From 76f6239ff1a9f34f163c03c140c4ceba62563b4e Mon Sep 17 00:00:00 2001 From: Douglas Gubert Date: Thu, 19 Dec 2024 18:21:34 -0300 Subject: [PATCH] fix(apps): runtime orchestration fixes (#34205) --- .changeset/blue-items-raise.md | 22 +++++++++++++++++++ .changeset/gold-comics-taste.md | 22 +++++++++++++++++++ .changeset/proud-planets-applaud.md | 22 +++++++++++++++++++ .../server/lib/getAppsStatistics.ts | 8 +++---- packages/apps-engine/deno-runtime/deno.lock | 15 ++----------- .../deno-runtime/lib/metricsCollector.ts | 15 +++++++++++-- .../apps-engine/deno-runtime/lib/parseArgs.ts | 11 ++++++++++ packages/apps-engine/deno-runtime/main.ts | 5 ++++- packages/apps-engine/src/server/AppManager.ts | 2 +- packages/apps-engine/src/server/ProxiedApp.ts | 4 ++-- .../runtime/deno/AppsEngineDenoRuntime.ts | 8 +++++++ .../server/runtime/deno/LivenessManager.ts | 13 +++++++++-- 12 files changed, 121 insertions(+), 26 deletions(-) create mode 100644 .changeset/blue-items-raise.md create mode 100644 .changeset/gold-comics-taste.md create mode 100644 .changeset/proud-planets-applaud.md create mode 100644 packages/apps-engine/deno-runtime/lib/parseArgs.ts diff --git a/.changeset/blue-items-raise.md b/.changeset/blue-items-raise.md new file mode 100644 index 000000000000..7b5592a9efb2 --- /dev/null +++ b/.changeset/blue-items-raise.md @@ -0,0 +1,22 @@ +--- +'@rocket.chat/fuselage-ui-kit': patch +'@rocket.chat/instance-status': patch +'@rocket.chat/ui-theming': patch +'@rocket.chat/model-typings': patch +'@rocket.chat/ui-video-conf': patch +'@rocket.chat/uikit-playground': patch +'@rocket.chat/core-typings': patch +'@rocket.chat/rest-typings': patch +'@rocket.chat/apps-engine': patch +'@rocket.chat/ui-composer': patch +'@rocket.chat/ui-contexts': patch +'@rocket.chat/gazzodown': patch +'@rocket.chat/ui-avatar': patch +'@rocket.chat/ui-client': patch +'@rocket.chat/livechat': patch +'@rocket.chat/ui-voip': patch +'@rocket.chat/i18n': patch +'@rocket.chat/meteor': patch +--- + +Fixes an error where the engine would not retry a subprocess restart if the last attempt failed diff --git a/.changeset/gold-comics-taste.md b/.changeset/gold-comics-taste.md new file mode 100644 index 000000000000..c9f864a0ffb5 --- /dev/null +++ b/.changeset/gold-comics-taste.md @@ -0,0 +1,22 @@ +--- +'@rocket.chat/fuselage-ui-kit': patch +'@rocket.chat/instance-status': patch +'@rocket.chat/ui-theming': patch +'@rocket.chat/model-typings': patch +'@rocket.chat/ui-video-conf': patch +'@rocket.chat/uikit-playground': patch +'@rocket.chat/core-typings': patch +'@rocket.chat/rest-typings': patch +'@rocket.chat/apps-engine': patch +'@rocket.chat/ui-composer': patch +'@rocket.chat/ui-contexts': patch +'@rocket.chat/gazzodown': patch +'@rocket.chat/ui-avatar': patch +'@rocket.chat/ui-client': patch +'@rocket.chat/livechat': patch +'@rocket.chat/ui-voip': patch +'@rocket.chat/i18n': patch +'@rocket.chat/meteor': patch +--- + +Fixes error propagation when trying to get the status of apps in some cases diff --git a/.changeset/proud-planets-applaud.md b/.changeset/proud-planets-applaud.md new file mode 100644 index 000000000000..7f692f780271 --- /dev/null +++ b/.changeset/proud-planets-applaud.md @@ -0,0 +1,22 @@ +--- +'@rocket.chat/fuselage-ui-kit': patch +'@rocket.chat/instance-status': patch +'@rocket.chat/ui-theming': patch +'@rocket.chat/model-typings': patch +'@rocket.chat/ui-video-conf': patch +'@rocket.chat/uikit-playground': patch +'@rocket.chat/core-typings': patch +'@rocket.chat/rest-typings': patch +'@rocket.chat/apps-engine': patch +'@rocket.chat/ui-composer': patch +'@rocket.chat/ui-contexts': patch +'@rocket.chat/gazzodown': patch +'@rocket.chat/ui-avatar': patch +'@rocket.chat/ui-client': patch +'@rocket.chat/livechat': patch +'@rocket.chat/ui-voip': patch +'@rocket.chat/i18n': patch +'@rocket.chat/meteor': patch +--- + +Fixes wrong data being reported to total failed apps metrics and statistics diff --git a/apps/meteor/app/statistics/server/lib/getAppsStatistics.ts b/apps/meteor/app/statistics/server/lib/getAppsStatistics.ts index 40c8e1946e36..90e05d192356 100644 --- a/apps/meteor/app/statistics/server/lib/getAppsStatistics.ts +++ b/apps/meteor/app/statistics/server/lib/getAppsStatistics.ts @@ -41,7 +41,7 @@ async function _getAppsStatistics(): Promise { totalInstalled++; const status = await app.getStatus(); - const storageItem = await app.getStorageItem(); + const storageItem = app.getStorageItem(); if (storageItem.installationSource === AppInstallationSource.PRIVATE) { totalPrivateApps++; @@ -51,12 +51,10 @@ async function _getAppsStatistics(): Promise { } } - if (status === AppStatus.MANUALLY_DISABLED) { - totalFailed++; - } - if (AppStatusUtils.isEnabled(status)) { totalActive++; + } else if (status !== AppStatus.MANUALLY_DISABLED) { + totalFailed++; } }), ); diff --git a/packages/apps-engine/deno-runtime/deno.lock b/packages/apps-engine/deno-runtime/deno.lock index 86cebf98f63a..1154e7709f11 100644 --- a/packages/apps-engine/deno-runtime/deno.lock +++ b/packages/apps-engine/deno-runtime/deno.lock @@ -90,18 +90,7 @@ "https://deno.land/std@0.203.0/testing/bdd.ts": "3f446df5ef8e856a869e8eec54c8482590415741ff0b6358a00c43486cc15769", "https://deno.land/std@0.203.0/testing/mock.ts": "6576b4aa55ee20b1990d656a78fff83599e190948c00e9f25a7f3ac5e9d6492d", "https://deno.land/std@0.216.0/io/types.ts": "748bbb3ac96abda03594ef5a0db15ce5450dcc6c0d841c8906f8b10ac8d32c96", - "https://deno.land/std@0.216.0/io/write_all.ts": "24aac2312bb21096ae3ae0b102b22c26164d3249dff96dbac130958aa736f038" - }, - "workspace": { - "dependencies": [ - "npm:@msgpack/msgpack@3.0.0-beta2", - "npm:@rocket.chat/ui-kit@^0.31.22", - "npm:acorn-walk@8.2.0", - "npm:acorn@8.10.0", - "npm:astring@1.8.6", - "npm:jsonrpc-lite@2.2.0", - "npm:stack-trace@0.0.10", - "npm:uuid@8.3.2" - ] + "https://deno.land/std@0.216.0/io/write_all.ts": "24aac2312bb21096ae3ae0b102b22c26164d3249dff96dbac130958aa736f038", + "https://jsr.io/@std/cli/1.0.9/parse_args.ts": "29ac18602d8836d2723cab1d90111ff954acc369f184626a3f9f677e3185caef" } } diff --git a/packages/apps-engine/deno-runtime/lib/metricsCollector.ts b/packages/apps-engine/deno-runtime/lib/metricsCollector.ts index c257b6c8a35b..273ef2463d59 100644 --- a/packages/apps-engine/deno-runtime/lib/metricsCollector.ts +++ b/packages/apps-engine/deno-runtime/lib/metricsCollector.ts @@ -3,6 +3,7 @@ import { Queue } from "./messenger.ts"; export function collectMetrics() { return { + pid: Deno.pid, queueSize: Queue.getCurrentSize(), } }; @@ -15,6 +16,16 @@ export async function sendMetrics() { await writeAll(Deno.stderr, encoder.encode(JSON.stringify(metrics))); } -export function startMetricsReport() { - setInterval(sendMetrics, 5000); +let intervalId: number; + +export function startMetricsReport(frequencyInMs = 5000) { + if (intervalId) { + throw new Error('There is already an active metrics report'); + } + + intervalId = setInterval(sendMetrics, frequencyInMs); +} + +export function abortMetricsReport() { + clearInterval(intervalId); } diff --git a/packages/apps-engine/deno-runtime/lib/parseArgs.ts b/packages/apps-engine/deno-runtime/lib/parseArgs.ts new file mode 100644 index 000000000000..10c59cbca3a7 --- /dev/null +++ b/packages/apps-engine/deno-runtime/lib/parseArgs.ts @@ -0,0 +1,11 @@ +import { parseArgs as $parseArgs } from "https://jsr.io/@std/cli/1.0.9/parse_args.ts"; + +export type ParsedArgs = { + subprocess: string; + spawnId: number; + metricsReportFrequencyInMs?: number; +} + +export function parseArgs(args: string[]): ParsedArgs { + return $parseArgs(args); +} diff --git a/packages/apps-engine/deno-runtime/main.ts b/packages/apps-engine/deno-runtime/main.ts index 3983c8d52407..596128952168 100644 --- a/packages/apps-engine/deno-runtime/main.ts +++ b/packages/apps-engine/deno-runtime/main.ts @@ -23,6 +23,7 @@ import handleApp from './handlers/app/handler.ts'; import handleScheduler from './handlers/scheduler-handler.ts'; import registerErrorListeners from './error-handlers.ts'; import { startMetricsReport } from "./lib/metricsCollector.ts"; +import { parseArgs } from "./lib/parseArgs.ts"; type Handlers = { app: typeof handleApp; @@ -128,8 +129,10 @@ async function main() { } } +const mainArgs = parseArgs(Deno.args); + registerErrorListeners(); main(); -startMetricsReport(); +startMetricsReport(mainArgs.metricsReportFrequencyInMs); diff --git a/packages/apps-engine/src/server/AppManager.ts b/packages/apps-engine/src/server/AppManager.ts index 0bb930b723a1..0ea7e998e995 100644 --- a/packages/apps-engine/src/server/AppManager.ts +++ b/packages/apps-engine/src/server/AppManager.ts @@ -271,7 +271,7 @@ export class AppManager { const prl = new ProxiedApp(this, item, { // Maybe we should have an "EmptyRuntime" class for this? getStatus() { - return AppStatus.COMPILER_ERROR_DISABLED; + return Promise.resolve(AppStatus.COMPILER_ERROR_DISABLED); }, } as unknown as DenoRuntimeSubprocessController); diff --git a/packages/apps-engine/src/server/ProxiedApp.ts b/packages/apps-engine/src/server/ProxiedApp.ts index 4307f9c9fc93..7810ab362422 100644 --- a/packages/apps-engine/src/server/ProxiedApp.ts +++ b/packages/apps-engine/src/server/ProxiedApp.ts @@ -1,5 +1,5 @@ import type { AppManager } from './AppManager'; -import type { AppStatus } from '../definition/AppStatus'; +import { AppStatus } from '../definition/AppStatus'; import { AppsEngineException } from '../definition/exceptions'; import type { IAppAuthorInfo, IAppInfo } from '../definition/metadata'; import { AppMethod } from '../definition/metadata'; @@ -79,7 +79,7 @@ export class ProxiedApp { } public async getStatus(): Promise { - return this.appRuntime.getStatus(); + return this.appRuntime.getStatus().catch(() => AppStatus.UNKNOWN); } public async setStatus(status: AppStatus, silent?: boolean): Promise { diff --git a/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts b/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts index 983b0a9343d3..fec78b835984 100644 --- a/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts +++ b/packages/apps-engine/src/server/runtime/deno/AppsEngineDenoRuntime.ts @@ -88,6 +88,11 @@ export class DenoRuntimeSubprocessController extends EventEmitter { private state: 'uninitialized' | 'ready' | 'invalid' | 'restarting' | 'unknown' | 'stopped'; + /** + * Incremental id that keeps track of how many times we've spawned a process for this app + */ + private spawnId = 0; + private readonly debug: debug.Debugger; private readonly options = { @@ -149,6 +154,8 @@ export class DenoRuntimeSubprocessController extends EventEmitter { denoWrapperPath, '--subprocess', this.appPackage.info.id, + '--spawnId', + String(this.spawnId++), ]; // If the app doesn't request any permissions, it gets the default set of permissions, which includes "networking" @@ -296,6 +303,7 @@ export class DenoRuntimeSubprocessController extends EventEmitter { logger.info('Successfully restarted app subprocess'); } catch (e) { logger.error("Failed to restart app's subprocess", { error: e.message || e }); + throw e; } finally { await this.logStorage.storeEntries(AppConsole.toStorageEntry(this.getAppId(), logger)); } diff --git a/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts b/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts index b4c8dfb5d520..3f363c5402f1 100644 --- a/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts +++ b/packages/apps-engine/src/server/runtime/deno/LivenessManager.ts @@ -7,10 +7,11 @@ import type { ProcessMessenger } from './ProcessMessenger'; const COMMAND_PING = '_zPING'; const defaultOptions: LivenessManager['options'] = { - pingRequestTimeout: 10000, + pingRequestTimeout: 1000, pingFrequencyInMS: 10000, consecutiveTimeoutLimit: 4, maxRestarts: Infinity, + restartAttemptDelayInMS: 1000, }; /** @@ -36,6 +37,9 @@ export class LivenessManager { // Limit of times we can try to restart a process maxRestarts: number; + + // Time to delay the next restart attempt after a failed one + restartAttemptDelayInMS: number; }; private subprocess: ChildProcess; @@ -198,7 +202,12 @@ export class LivenessManager { pid: this.subprocess.pid, }); - await this.controller.restartApp(); + try { + await this.controller.restartApp(); + } catch (e) { + this.debug('Restart attempt failed. Retrying in %dms', this.options.restartAttemptDelayInMS); + setTimeout(() => this.restartProcess('Failed restart attempt'), this.options.restartAttemptDelayInMS); + } this.pingTimeoutConsecutiveCount = 0; this.restartCount++;