Skip to content

Commit

Permalink
[8.17] implements backoff delay in usage counters service (#206363) (#…
Browse files Browse the repository at this point in the history
…206851)

# Backport

This will backport the following commits from `main` to `8.17`:
- [implements backoff delay in usage counters service
(#206363)](#206363)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Christiane (Tina)
Heiligers","email":"[email protected]"},"sourceCommit":{"committedDate":"2025-01-15T18:42:44Z","message":"implements
backoff delay in usage counters service (#206363)\n\nfix
https://github.com/elastic/kibana/issues/192829\r\n\r\n##
Summary\r\n\r\nUsage counters lose counts when there's a version
conflict exception to\r\nincrement counters.\r\nATM, the service retries
`incrementCounter` immediately on error, which\r\ncould also fail if
another Kibana tries at the same time.\r\nUsing a backoff retry reduces
the likelihood of a version conflict\r\nexception.  \r\n\r\nThis PR
replaces the retry with an exponential `backoffDelay` that will\r\nwait
longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [x] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests
changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<[email protected]>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31","branchLabelMapping":{"^v9.0.0$":"main","^v8.18.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:all-open"],"title":"implements
backoff delay in usage counters
service","number":206363,"url":"https://github.com/elastic/kibana/pull/206363","mergeCommit":{"message":"implements
backoff delay in usage counters service (#206363)\n\nfix
https://github.com/elastic/kibana/issues/192829\r\n\r\n##
Summary\r\n\r\nUsage counters lose counts when there's a version
conflict exception to\r\nincrement counters.\r\nATM, the service retries
`incrementCounter` immediately on error, which\r\ncould also fail if
another Kibana tries at the same time.\r\nUsing a backoff retry reduces
the likelihood of a version conflict\r\nexception.  \r\n\r\nThis PR
replaces the retry with an exponential `backoffDelay` that will\r\nwait
longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [x] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests
changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<[email protected]>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/206363","number":206363,"mergeCommit":{"message":"implements
backoff delay in usage counters service (#206363)\n\nfix
https://github.com/elastic/kibana/issues/192829\r\n\r\n##
Summary\r\n\r\nUsage counters lose counts when there's a version
conflict exception to\r\nincrement counters.\r\nATM, the service retries
`incrementCounter` immediately on error, which\r\ncould also fail if
another Kibana tries at the same time.\r\nUsing a backoff retry reduces
the likelihood of a version conflict\r\nexception.  \r\n\r\nThis PR
replaces the retry with an exponential `backoffDelay` that will\r\nwait
longer between retry attempts.\r\n\r\n\r\n- [x] [Unit or
functional\r\ntests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)\r\nwere
updated or added to match the most common scenarios\r\n- [x] [Flaky
Test\r\nRunner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1)
was\r\nused on any tests
changed\r\n(https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7701)\r\n\r\n---------\r\n\r\nCo-authored-by:
Elastic Machine
<[email protected]>","sha":"fd3b4ed590006212855c1a80994649eacbf41f31"}}]}]
BACKPORT-->

Co-authored-by: Christiane (Tina) Heiligers <[email protected]>
  • Loading branch information
kibanamachine and TinaHeiligers authored Jan 15, 2025
1 parent 60ac68e commit fdce5f8
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 50 deletions.
1 change: 1 addition & 0 deletions src/plugins/usage_collection/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
export const KIBANA_STATS_TYPE = 'kibana_stats';
export const DEFAULT_MAXIMUM_WAIT_TIME_FOR_ALL_COLLECTORS_IN_S = 1;
export const MAIN_APP_DEFAULT_VIEW_ID = 'main';
export const USAGE_COUNTERS_BACKOFF_DELAY_IN_MS = 500;
2 changes: 1 addition & 1 deletion src/plugins/usage_collection/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { DEFAULT_MAXIMUM_WAIT_TIME_FOR_ALL_COLLECTORS_IN_S } from '../common/con
export const configSchema = schema.object({
usageCounters: schema.object({
enabled: schema.boolean({ defaultValue: true }),
retryCount: schema.number({ defaultValue: 1 }),
retryCount: schema.number({ defaultValue: 3 }),
bufferDuration: schema.duration({ defaultValue: '5s' }),
}),
uiCounters: schema.object({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ export class UsageCounter implements IUsageCounter {
incrementBy = 1,
namespace,
} = params;

this.counter$.next({
domainId: this.domainId,
counterName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ const registerUsageCountersRollupsMock = registerUsageCountersRollups as jest.Mo
typeof registerUsageCountersRollups
>;

const tick = () => {
// optionally advance test timers after a delay
const tickWithDelay = (delay = 1) => {
jest.useRealTimers();
return new Promise((resolve) => setTimeout(resolve, 1));
return new Promise((resolve) => setTimeout(resolve, delay));
};

describe('UsageCountersService', () => {
Expand Down Expand Up @@ -188,51 +189,62 @@ describe('UsageCountersService', () => {
]
`);
});

it('retries errors by `retryCount` times before failing to store', async () => {
const usageCountersService = new UsageCountersService({
logger,
retryCount: 1,
bufferDurationMs,
});

const mockRepository = coreStart.savedObjects.createInternalRepository();
const mockError = new Error('failed.');
const mockIncrementCounter = jest.fn().mockImplementation((_, key) => {
switch (key) {
case 'test-counter:counterA:count:server:20210409':
throw mockError;
case 'test-counter:counterB:count:server:20210409':
return 'pass';
default:
throw new Error(`unknown key ${key}`);
}
});

mockRepository.incrementCounter = mockIncrementCounter;

coreStart.savedObjects.createInternalRepository.mockReturnValue(mockRepository);
const { createUsageCounter } = usageCountersService.setup(coreSetup);
jest.useFakeTimers();
const usageCounter = createUsageCounter('test-counter');

usageCountersService.start(coreStart);
usageCounter.incrementCounter({ counterName: 'counterA' });
usageCounter.incrementCounter({ counterName: 'counterB' });
jest.runOnlyPendingTimers();

// wait for retries to kick in on next scheduler call
await tick();
// number of incrementCounter calls + number of retries
expect(mockIncrementCounter).toBeCalledTimes(2 + 1);
expect(logger.debug).toHaveBeenNthCalledWith(1, 'Store counters into savedObjects', {
kibana: {
usageCounters: {
results: [mockError, 'pass'],
// requires extended test runtime because of exponential backoff delay for retries
it(
'retries errors by `retryCount` times before failing to store',
async () => {
const retryConst = 2;
const usageCountersService = new UsageCountersService({
logger,
retryCount: retryConst,
bufferDurationMs: 50000,
});

const mockRepository = coreStart.savedObjects.createInternalRepository();
const mockError = new Error('failed');
const mockIncrementCounter = jest.fn().mockImplementation((_, key) => {
switch (key) {
case 'test-counter:counterA:count:server:20210409':
throw mockError;
case 'test-counter:counterB:count:server:20210409':
return 'pass';
default:
throw new Error(`unknown key ${key}`);
}
});

mockRepository.incrementCounter = mockIncrementCounter;

coreStart.savedObjects.createInternalRepository.mockReturnValue(mockRepository);
const { createUsageCounter } = usageCountersService.setup(coreSetup);
jest.useFakeTimers();
const usageCounter = createUsageCounter('test-counter');

usageCountersService.start(coreStart);
usageCounter.incrementCounter({ counterName: 'counterA' });
usageCounter.incrementCounter({ counterName: 'counterB' });
jest.runOnlyPendingTimers();

// wait for retries to kick in on next scheduler call
await tickWithDelay(5000);
// number of incrementCounter calls + number of retries
expect(mockIncrementCounter).toBeCalledTimes(2 + retryConst);
// assert counterA increment error warning logs
expect(logger.warn).toHaveBeenNthCalledWith(
2,
`${mockError}, retrying attempt ${retryConst}`
);
expect(logger.warn).toHaveBeenNthCalledWith(3, mockError);
expect(logger.debug).toHaveBeenNthCalledWith(1, 'Store counters into savedObjects', {
kibana: {
usageCounters: {
results: [mockError, 'pass'],
},
},
},
});
});
});
},
10 * 1000
);

it('buffers counters within `bufferDurationMs` time', async () => {
const usageCountersService = new UsageCountersService({
Expand Down Expand Up @@ -264,7 +276,7 @@ describe('UsageCountersService', () => {
jest.runOnlyPendingTimers();

// wait for debounce to kick in on next scheduler call
await tick();
await tickWithDelay();
expect(mockIncrementCounter).toBeCalledTimes(2);
expect(mockIncrementCounter.mock.results.map(({ value }) => value)).toMatchInlineSnapshot(`
Array [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import {
} from './saved_objects';
import { registerUsageCountersRollups } from './rollups';
import { searchUsageCounters } from './search';
import { USAGE_COUNTERS_BACKOFF_DELAY_IN_MS } from '../../common/constants';

interface UsageCountersLogMeta extends LogMeta {
kibana: { usageCounters: { results: unknown[] } };
Expand Down Expand Up @@ -159,14 +160,23 @@ export class UsageCountersService {
};
};

private backoffDelay = (attempt: number) =>
Math.pow(2, attempt) * USAGE_COUNTERS_BACKOFF_DELAY_IN_MS; // exponential backoff: 500ms, 1000ms, 2000ms

private storeDate$(
counters: UsageCounters.v1.CounterMetric[],
soRepository: Pick<SavedObjectsRepository, 'incrementCounter'>
) {
return Rx.forkJoin(
counters.map((metric) =>
Rx.defer(() => storeCounter({ metric, soRepository })).pipe(
Rx.retry(this.retryCount),
Rx.retry({
count: this.retryCount,
delay: (error, retryIndex) => {
this.logger.warn(`Error: ${error.message}, retrying attempt ${retryIndex}`); // extra warning logger
return Rx.timer(this.backoffDelay(retryIndex));
},
}),
Rx.catchError((error) => {
this.logger.warn(error);
return Rx.of(error);
Expand Down

0 comments on commit fdce5f8

Please sign in to comment.