From 660e1483fcc4c6fb7b38f9adba8fd672d7e67112 Mon Sep 17 00:00:00 2001 From: David Schlosnagle Date: Sat, 7 Sep 2024 08:41:04 -0400 Subject: [PATCH] memoize gauge snapshots --- .../AutobatcherTelemetryComponents.java | 29 ++++++++++++++----- .../autobatch/DisruptorAutobatcherTest.java | 21 +++++++++----- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/atlasdb-autobatch/src/main/java/com/palantir/atlasdb/autobatch/AutobatcherTelemetryComponents.java b/atlasdb-autobatch/src/main/java/com/palantir/atlasdb/autobatch/AutobatcherTelemetryComponents.java index f29101de1e6..d77b537b62a 100644 --- a/atlasdb-autobatch/src/main/java/com/palantir/atlasdb/autobatch/AutobatcherTelemetryComponents.java +++ b/atlasdb-autobatch/src/main/java/com/palantir/atlasdb/autobatch/AutobatcherTelemetryComponents.java @@ -18,8 +18,11 @@ import com.codahale.metrics.Gauge; import com.codahale.metrics.Histogram; +import com.codahale.metrics.Snapshot; +import com.google.common.base.Suppliers; import com.palantir.tritium.metrics.registry.TaggedMetricRegistry; import java.time.Duration; +import java.util.function.Supplier; public final class AutobatcherTelemetryComponents { private final String safeLoggablePurpose; @@ -83,24 +86,34 @@ public static AutobatcherTelemetryComponents create( } private void registerGauges() { + // capturing a snapshot is expensive, so memoize for 10 milliseconds to reduce overhead during metrics read 50%. + Duration memoizeDuration = Duration.ofMillis(10); + Supplier waitTimerSnapshot = Suppliers.memoizeWithExpiration(waitTimer::getSnapshot, memoizeDuration); + Supplier waitTimeHistogramSnapshot = + Suppliers.memoizeWithExpiration(waitTimeHistogram::getSnapshot, memoizeDuration); + Supplier runningTimerSnapshot = + Suppliers.memoizeWithExpiration(runningTimer::getSnapshot, memoizeDuration); + Supplier totalTimerSnapshot = + Suppliers.memoizeWithExpiration(totalTimer::getSnapshot, memoizeDuration); + overheadMetrics.waitTimeNanosP1( - (Gauge) () -> waitTimer.getSnapshot().getValue(0.01)); + (Gauge) () -> waitTimerSnapshot.get().getValue(0.01)); overheadMetrics.waitTimeNanosMedian( - (Gauge) () -> waitTimer.getSnapshot().getValue(0.5)); + (Gauge) () -> waitTimerSnapshot.get().getValue(0.5)); overheadMetrics.waitTimePercentageP1( - (Gauge) () -> waitTimeHistogram.getSnapshot().getValue(0.01)); + (Gauge) () -> waitTimeHistogramSnapshot.get().getValue(0.01)); overheadMetrics.waitTimePercentageMedian( - (Gauge) () -> waitTimeHistogram.getSnapshot().getValue(0.5)); + (Gauge) () -> waitTimeHistogramSnapshot.get().getValue(0.5)); overheadMetrics.runningTimeNanosP1( - (Gauge) () -> runningTimer.getSnapshot().getValue(0.01)); + (Gauge) () -> runningTimerSnapshot.get().getValue(0.01)); overheadMetrics.runningTimeNanosMedian( - (Gauge) () -> runningTimer.getSnapshot().getValue(0.5)); + (Gauge) () -> runningTimerSnapshot.get().getValue(0.5)); overheadMetrics.totalTimeNanosP1( - (Gauge) () -> totalTimer.getSnapshot().getValue(0.01)); + (Gauge) () -> totalTimerSnapshot.get().getValue(0.01)); overheadMetrics.totalTimeNanosMedian( - (Gauge) () -> totalTimer.getSnapshot().getValue(0.5)); + (Gauge) () -> totalTimerSnapshot.get().getValue(0.5)); } } diff --git a/atlasdb-autobatch/src/test/java/com/palantir/atlasdb/autobatch/DisruptorAutobatcherTest.java b/atlasdb-autobatch/src/test/java/com/palantir/atlasdb/autobatch/DisruptorAutobatcherTest.java index 8f3f79cc1f7..a6b16cb082f 100644 --- a/atlasdb-autobatch/src/test/java/com/palantir/atlasdb/autobatch/DisruptorAutobatcherTest.java +++ b/atlasdb-autobatch/src/test/java/com/palantir/atlasdb/autobatch/DisruptorAutobatcherTest.java @@ -76,17 +76,24 @@ private void assertWaitTimeAndRunningTimeAndTotalTimeMetricsAreProduced( } private void assertNoWaitTimeAndRunningTimeMetricsAreProduced(TaggedMetricRegistry registry) { - assertThat(getWaitTimeHistogram(registry)).isNull(); - assertThat(getWaitTimePercentageHistogram(registry)).isNull(); - assertThat(getRunningTimeHistogram(registry)).isNull(); - assertThat(getTotalTimeHistogram(registry)).isNull(); + assertThat(getWaitTimeHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); + assertThat(getWaitTimePercentageHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); + assertThat(getRunningTimeHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); + assertThat(getTotalTimeHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); } private void assertOnlyWaitTimeMetricsAreProduced(TaggedMetricRegistry registry, int waitTimeNanos) { assertThat(getWaitTimeHistogram(registry).getSnapshot().getValues()).containsExactly(waitTimeNanos); - assertThat(getWaitTimePercentageHistogram(registry)).isNull(); - assertThat(getRunningTimeHistogram(registry)).isNull(); - assertThat(getTotalTimeHistogram(registry)).isNull(); + assertThat(getWaitTimePercentageHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); + assertThat(getRunningTimeHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); + assertThat(getTotalTimeHistogram(registry)) + .satisfies(histogram -> assertThat(histogram.getCount()).isZero()); } private static Histogram getWaitTimeHistogram(TaggedMetricRegistry registry) {