Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-26798 improve balancer visibility #9

Open
wants to merge 4 commits into
base: hubspot-2
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
protected static final String COST_FUNCTIONS_COST_FUNCTIONS_KEY =
"hbase.master.balancer.stochastic.additionalCostFunctions";
public static final String OVERALL_COST_FUNCTION_NAME = "Overall";
public static final String WIGHTED_IMBALANCE_COST_NAME = "Imbalance";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: typo "WIGHTED"


protected static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Logger LOG = LoggerFactory.getLogger(StochasticLoadBalancer.class);
Expand Down Expand Up @@ -323,7 +324,7 @@ private void updateBalancerTableLoadInfo(TableName tableName,
initCosts(cluster);
curOverallCost = computeCost(cluster, Double.MAX_VALUE);
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
updateStochasticCosts(tableName, curOverallCost, curOverallCost/sumMultiplier, curFunctionCosts);
briaugenreich marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
Expand Down Expand Up @@ -424,7 +425,7 @@ protected boolean needsBalance(TableName tableName, Cluster cluster) {
@InterfaceAudience.Private
Cluster.Action nextAction(Cluster cluster) {
return candidateGenerators.get(RANDOM.nextInt(candidateGenerators.size()))
.generate(cluster);
.generate(cluster);
}

/**
Expand Down Expand Up @@ -480,7 +481,7 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
double currentCost = computeCost(cluster, Double.MAX_VALUE);
curOverallCost = currentCost;
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
updateStochasticCosts(tableName, curOverallCost, curOverallCost / sumMultiplier, curFunctionCosts);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: so we're already passing in curOverallCost to updateStochasticCosts. we call updateStochasticCosts in 3 places and in each place we have to calculate curOverallCost / sumMultiplier. rather than pass in that value, what if we just pass in sumMultiplier and do the calculation within the method impl? just a little more DRY

double initCost = currentCost;
double newCost;

Expand All @@ -505,12 +506,13 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
}
}
LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}," +
" functionCost={} computedMaxSteps={}",
currentCost / sumMultiplier, functionCost(), computedMaxSteps);
" sumMultiplier={} sumCost={} functionCost={} computedMaxSteps={}",
currentCost / sumMultiplier, sumMultiplier, currentCost, functionCost(), computedMaxSteps);

final String initFunctionTotalCosts = totalCostsPerFunc();
// Perform a stochastic walk to see if we can get a good fit.
long step;
int moveCostLogCounter = 0;

for (step = 0; step < computedMaxSteps; step++) {
Cluster.Action action = nextAction(cluster);
Expand All @@ -532,6 +534,10 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
curOverallCost = currentCost;
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);
} else {
//LOG hidden move cost if greatly impacting ability to find better plan
logMoveCosts(newCost, tempFunctionCosts, moveCostLogCounter);
moveCostLogCounter++;

// Put things back the way they were before.
// TODO: undo by remembering old values
Action undoAction = action.undoAction();
Expand All @@ -548,7 +554,7 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
metricsBalancer.balanceCluster(endTime - startTime);

if (initCost > currentCost) {
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
updateStochasticCosts(tableName, curOverallCost, curOverallCost / sumMultiplier, curFunctionCosts);
plans = createRegionPlans(cluster);
LOG.info("Finished computing new moving plan. Computation took {} ms" +
" to try {} different iterations. Found a solution that moves " +
Expand All @@ -559,9 +565,11 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
return plans;
}

LOG.info("Could not find a better moving plan. Tried {} different configurations in "
+ "{} ms, and did not find anything with an imbalance score less than {}", step,
+ "{} ms, and did not find anything with an imbalance score less than {}.", step,
endTime - startTime, initCost / sumMultiplier);

return null;
}

Expand Down Expand Up @@ -603,10 +611,30 @@ private void sendRegionPlansToRingBuffer(List<RegionPlan> plans, double currentC
}
}

private void logMoveCosts(double currentCost, double [] currentFunctionCosts, int logCount){
if (logCount > 5){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does printing this 5 times provide us any benefit over printing it once?

// no op - so as to not flood the logs
return;
}
for (int i = 0; i < costFunctions.size(); i++) {
CostFunction costFunction = costFunctions.get(i);
if (costFunction instanceof MoveCostFunction){
String costFunctionName = costFunction.getClass().getSimpleName();
double moveCost = currentFunctionCosts[i];
double costPercent = (currentCost == 0) ? 0 : (moveCost/ currentCost) * 100;
if (costPercent > .50 ){
LOG.info("{} may be impacting the ability to find an improved plan. calculatedMoveCost={} percentageOfPlanCost={}%. Consider lowering moveCost multiplier.", costFunctionName, moveCost, costPercent );
break;
}
}
briaugenreich marked this conversation as resolved.
Show resolved Hide resolved

}
}
/**
* update costs to JMX
*/
private void updateStochasticCosts(TableName tableName, double overall, double[] subCosts) {
private void updateStochasticCosts(TableName tableName, double overall, double weightedImbalance, double[] subCosts) {

if (tableName == null) {
return;
}
Expand All @@ -618,7 +646,11 @@ private void updateStochasticCosts(TableName tableName, double overall, double[]
balancer.updateStochasticCost(tableName.getNameAsString(),
OVERALL_COST_FUNCTION_NAME, "Overall cost", overall);

// each cost function
// weighted imbalance
balancer.updateStochasticCost(tableName.getNameAsString(),
WIGHTED_IMBALANCE_COST_NAME, "Weighted imbalance", weightedImbalance);

// each cost function as a percent of overall cost
for (int i = 0; i < costFunctions.size(); i++) {
CostFunction costFunction = costFunctions.get(i);
String costFunctionName = costFunction.getClass().getSimpleName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.hadoop.hbase.master.balancer;

import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
Expand Down Expand Up @@ -51,7 +50,6 @@
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.FixMethodOrder;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runners.MethodSorters;
Expand All @@ -60,7 +58,6 @@

@Category({ MiscTests.class, MediumTests.class })
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
@Ignore
public class TestStochasticBalancerJmxMetrics extends BalancerTestBase {

@ClassRule
Expand Down Expand Up @@ -130,6 +127,7 @@ public static void tearDownAfterClass() throws Exception {
*/
@Test
public void testJmxMetrics_EnsembleMode() throws Exception {

loadBalancer = new StochasticLoadBalancer();

conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false);
Expand All @@ -141,11 +139,12 @@ public void testJmxMetrics_EnsembleMode() throws Exception {

String[] tableNames = new String[] { tableName.getNameAsString() };
String[] functionNames = loadBalancer.getCostFunctionNames();
Set<String> jmxMetrics = readJmxMetricsWithRetry();

Set<String> jmxMetrics = readJmxMetricsWithRetry(functionNames.length + 1);
Set<String> expectedMetrics = getExpectedJmxMetrics(tableNames, functionNames);

// printMetrics(jmxMetrics, "existing metrics in ensemble mode");
// printMetrics(expectedMetrics, "expected metrics in ensemble mode");
printMetrics(jmxMetrics, "existing metrics in ensemble mode");
printMetrics(expectedMetrics, "expected metrics in ensemble mode");

// assert that every expected is in the JMX
for (String expected : expectedMetrics) {
Expand All @@ -159,16 +158,18 @@ public void testJmxMetrics_EnsembleMode() throws Exception {
*/
@Test
public void testJmxMetrics_PerTableMode() throws Exception {

loadBalancer = new StochasticLoadBalancer();

conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, true);
loadBalancer.setConf(conf);

// NOTE the size is normally set in setClusterMetrics, for test purpose, we set it manually
// Tables: hbase:namespace, table1, table2
// Functions: costFunctions, overall
// Functions: costFunctions, overall, imbalance
String[] functionNames = loadBalancer.getCostFunctionNames();
loadBalancer.updateMetricsSize(3 * (functionNames.length + 1));

loadBalancer.updateMetricsSize(3 * (functionNames.length + 2));

// table 1
TableName tableName = TableName.valueOf(TABLE_NAME_1);
Expand All @@ -180,17 +181,19 @@ public void testJmxMetrics_PerTableMode() throws Exception {
clusterState = mockClusterServers(mockCluster_pertable_2);
loadBalancer.balanceTable(tableName, clusterState);


// table hbase:namespace
tableName = TableName.valueOf(TABLE_NAME_NAMESPACE);
clusterState = mockClusterServers(mockCluster_pertable_namespace);
loadBalancer.balanceTable(tableName, clusterState);


String[] tableNames = new String[] { TABLE_NAME_1, TABLE_NAME_2, TABLE_NAME_NAMESPACE };
Set<String> jmxMetrics = readJmxMetricsWithRetry();
Set<String> jmxMetrics = readJmxMetricsWithRetry(3 * (functionNames.length + 2));
Set<String> expectedMetrics = getExpectedJmxMetrics(tableNames, functionNames);

// printMetrics(jmxMetrics, "existing metrics in per-table mode");
// printMetrics(expectedMetrics, "expected metrics in per-table mode");
printMetrics(jmxMetrics, "existing metrics in per-table mode");
printMetrics(expectedMetrics, "expected metrics in per-table mode");

// assert that every expected is in the JMX
for (String expected : expectedMetrics) {
Expand All @@ -199,11 +202,13 @@ public void testJmxMetrics_PerTableMode() throws Exception {
}
}

private Set<String> readJmxMetricsWithRetry() throws IOException {
private Set<String> readJmxMetricsWithRetry(int expectedMetricsCount) throws IOException {
final int count = 0;
for (int i = 0; i < 10; i++) {
for (int i = 0; i < 20; i++) {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just realized in another PR the count used on line 214 is never updated... TODO to fix.


Set<String> metrics = readJmxMetrics();
if (metrics != null) {

if (metrics != null && metrics.size() >= expectedMetricsCount){
return metrics;
}
LOG.warn("Failed to get jmxmetrics... sleeping, retrying; " + i + " of " + count + " times");
Expand All @@ -219,6 +224,7 @@ private Set<String> readJmxMetrics() throws IOException {
JMXConnector connector = null;
ObjectName target = null;
MBeanServerConnection mb = null;

try {
connector =
JMXConnectorFactory.connect(JMXListener.buildJMXServiceURL(connectorPort, connectorPort));
Expand Down Expand Up @@ -268,6 +274,7 @@ private Set<String> getExpectedJmxMetrics(String[] tableNames, String[] function

for (String tableName : tableNames) {
ret.add(StochasticLoadBalancer.composeAttributeName(tableName, "Overall"));
ret.add(StochasticLoadBalancer.composeAttributeName(tableName, "Imbalance"));
for (String functionName : functionNames) {
String metricsName = StochasticLoadBalancer.composeAttributeName(tableName, functionName);
ret.add(metricsName);
Expand All @@ -286,5 +293,6 @@ private static void printMetrics(Set<String> metrics, String info) {
for (String str : metrics) {
LOG.info(" ++++ " + str);
}

}
}