Skip to content

Commit

Permalink
Add new ClutchRps auto scale type. Include sourcejob drop in PID co…
Browse files Browse the repository at this point in the history
…ntroller. (#85)

* Add mantis publish properties to docs

* Update MQL docs for using [*]

* Add myself as code owners

* Rule base auto scale should not scale down lower than min

* add comment

* Subscribe to source job drop metrics in job master

* add copy right, fix tests

* Address review comments. Add parameters to system params.

* Handle empty string for drop metric param

* Fix bug in determining source job metric vs job metric

* Add ClutchRps auto scaler, outlier detection using sourcejob drop

* clean up

* add copyright

* fix tests

* make setpoint percentile configurable

* add more test on config generation

* add test message

Co-authored-by: Calvin Cheung <[email protected]>
  • Loading branch information
calvin681 and calvin681 authored Jan 4, 2021
1 parent 884b891 commit aab3066
Show file tree
Hide file tree
Showing 16 changed files with 1,037 additions and 113 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ public enum ScalingReason {
KafkaProcessed,
Clutch,
ClutchExperimental,
ClutchRps,
RPS,
JVMMemory,
SourceJobDrop
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public class CompressionUtilsTest {
try (BufferedReader reader = new BufferedReader(new StringReader(testInput))) {
List<MantisServerSentEvent> result = CompressionUtils.tokenize(reader, delimiter);

assertEquals(result.size(), 3);
assertEquals("Delimiter: '" + delimiter + "'", result.size(), 3);
assertEquals(result.get(0).getEventAsString(), event1);
assertEquals(result.get(1).getEventAsString(), event2);
assertEquals(result.get(2).getEventAsString(), event3);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright 2020 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.mantisrx.server.core;

import rx.functions.Action1;

import java.util.HashMap;
import java.util.Map;

/**
* Worker outlier detector that buffers events based on time before determining outliers. This is used for high volume
* metrics such as sourcejob drops. Volume may have high variation over time, buffering by time will eliminate the variant.
*/
public class TimeBufferedWorkerOutlier extends WorkerOutlier {
private Map<Integer, CumulatedValue> workerValues = new HashMap<>();
private long bufferedSecs;

public TimeBufferedWorkerOutlier(long cooldownSecs, long bufferedSecs, Action1<Integer> outlierTrigger) {
super(cooldownSecs, outlierTrigger);
this.bufferedSecs = bufferedSecs;
}

@Override
public void addDataPoint(int workerIndex, double value, int numWorkers) {
CumulatedValue cumulatedValue;
synchronized (workerValues) {
cumulatedValue = workerValues.get(workerIndex);
if (cumulatedValue == null) {
cumulatedValue = new CumulatedValue();
workerValues.put(workerIndex, cumulatedValue);
}
}

double dataPoint = -1;
synchronized (cumulatedValue) {
if (System.currentTimeMillis() - cumulatedValue.startTs > bufferedSecs * 1000) {
dataPoint = cumulatedValue.value;
cumulatedValue.reset();
}
cumulatedValue.increment(value);
}

if (dataPoint != -1) {
super.addDataPoint(workerIndex, dataPoint, numWorkers);
}
}

public static class CumulatedValue {
private long startTs = System.currentTimeMillis();
private double value = 0;

public void increment(double incr) {
value += incr;
}

public void reset() {
startTs = System.currentTimeMillis();
value = 0;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright 2020 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.mantisrx.server.core;

import org.junit.Test;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.junit.Assert.*;

public class TimeBufferedWorkerOutlierTest {
@Test
public void testOutlier() throws Exception {
long bufferSec = 1;
CountDownLatch latch = new CountDownLatch(1);
TimeBufferedWorkerOutlier outlier = new TimeBufferedWorkerOutlier(600, bufferSec, index -> {
if (index == 0) {
latch.countDown();
}
});

for (int i = 0; i < 16; i++) {
// Add multiple data points within the buffer period.
for (int j = 0; j < 3; j++) {
outlier.addDataPoint(0, 5, 2);
outlier.addDataPoint(1, 5, 2);
}
assertFalse(latch.await(bufferSec * 1500, TimeUnit.MILLISECONDS));
}

outlier.addDataPoint(0, 15, 2);
assertFalse(latch.await(bufferSec * 1500, TimeUnit.MILLISECONDS));

// Additional data points with higher value. Need to have > 70% of 20 outliers to trigger.
for (int i = 0; i < 14; i++) {
for (int j = 0; j < 3; j++) {
outlier.addDataPoint(0, 6, 2);
}
assertFalse(latch.await(bufferSec * 1500, TimeUnit.MILLISECONDS));
}

outlier.addDataPoint(0, 18, 2);
assertFalse(latch.await(bufferSec * 1500, TimeUnit.MILLISECONDS));
outlier.addDataPoint(0, 18, 2);
assertTrue(latch.await(bufferSec * 1500, TimeUnit.MILLISECONDS));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright 2020 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.mantisrx.server.core;

import org.junit.Test;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.junit.Assert.*;

public class WorkerOutlierTest {
@Test
public void testOutlier() throws Exception {
CountDownLatch latch = new CountDownLatch(1);
WorkerOutlier outlier = new WorkerOutlier(600, index -> {
if (index == 0) {
latch.countDown();
}
});

for (int i = 0; i < 16; i++) {
outlier.addDataPoint(0, 5, 2);
outlier.addDataPoint(1, 5, 2);
assertEquals(1, latch.getCount());
}

outlier.addDataPoint(0, 5, 2);
assertFalse(latch.await(1, TimeUnit.SECONDS));

// Additional data points with higher value. Need to have > 70% of 20 outliers to trigger.
for (int i = 0; i < 14; i++) {
outlier.addDataPoint(0, 6, 2);
assertEquals(1, latch.getCount());
}

outlier.addDataPoint(0, 6, 2);
assertTrue(latch.await(1, TimeUnit.SECONDS));
}

@Test
public void testOutlierMultipleWorkers() throws Exception {
CountDownLatch latch = new CountDownLatch(1);
WorkerOutlier outlier = new WorkerOutlier(600, index -> {
if (index == 0) {
latch.countDown();
}
});

for (int i = 0; i < 16; i++) {
outlier.addDataPoint(0, 6, 4);
outlier.addDataPoint(1, 6, 4);
outlier.addDataPoint(2, 5, 4);
outlier.addDataPoint(3, 5, 4);
assertEquals(1, latch.getCount());
}

outlier.addDataPoint(0, 6, 4);
assertFalse(latch.await(1, TimeUnit.SECONDS));

// Additional data points with higher value. Need to have > 70% of 20 outliers to trigger.
for (int i = 0; i < 14; i++) {
outlier.addDataPoint(0, 8, 4);
assertEquals(1, latch.getCount());
}

outlier.addDataPoint(0, 8, 4);
assertTrue(latch.await(1, TimeUnit.SECONDS));
}
}
Loading

0 comments on commit aab3066

Please sign in to comment.