-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-26798 improve balancer visibility #9
base: hubspot-2
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,6 +132,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { | |
protected static final String COST_FUNCTIONS_COST_FUNCTIONS_KEY = | ||
"hbase.master.balancer.stochastic.additionalCostFunctions"; | ||
public static final String OVERALL_COST_FUNCTION_NAME = "Overall"; | ||
public static final String WIGHTED_IMBALANCE_COST_NAME = "Imbalance"; | ||
|
||
protected static final Random RANDOM = new Random(System.currentTimeMillis()); | ||
private static final Logger LOG = LoggerFactory.getLogger(StochasticLoadBalancer.class); | ||
|
@@ -323,7 +324,7 @@ private void updateBalancerTableLoadInfo(TableName tableName, | |
initCosts(cluster); | ||
curOverallCost = computeCost(cluster, Double.MAX_VALUE); | ||
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); | ||
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); | ||
updateStochasticCosts(tableName, curOverallCost, curOverallCost/sumMultiplier, curFunctionCosts); | ||
briaugenreich marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
@Override | ||
|
@@ -424,7 +425,7 @@ protected boolean needsBalance(TableName tableName, Cluster cluster) { | |
@InterfaceAudience.Private | ||
Cluster.Action nextAction(Cluster cluster) { | ||
return candidateGenerators.get(RANDOM.nextInt(candidateGenerators.size())) | ||
.generate(cluster); | ||
.generate(cluster); | ||
} | ||
|
||
/** | ||
|
@@ -480,7 +481,7 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve | |
double currentCost = computeCost(cluster, Double.MAX_VALUE); | ||
curOverallCost = currentCost; | ||
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); | ||
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); | ||
updateStochasticCosts(tableName, curOverallCost, curOverallCost / sumMultiplier, curFunctionCosts); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: so we're already passing in curOverallCost to updateStochasticCosts. we call updateStochasticCosts in 3 places and in each place we have to calculate |
||
double initCost = currentCost; | ||
double newCost; | ||
|
||
|
@@ -505,12 +506,13 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve | |
} | ||
} | ||
LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}," + | ||
" functionCost={} computedMaxSteps={}", | ||
currentCost / sumMultiplier, functionCost(), computedMaxSteps); | ||
" sumMultiplier={} sumCost={} functionCost={} computedMaxSteps={}", | ||
currentCost / sumMultiplier, sumMultiplier, currentCost, functionCost(), computedMaxSteps); | ||
|
||
final String initFunctionTotalCosts = totalCostsPerFunc(); | ||
// Perform a stochastic walk to see if we can get a good fit. | ||
long step; | ||
int moveCostLogCounter = 0; | ||
|
||
for (step = 0; step < computedMaxSteps; step++) { | ||
Cluster.Action action = nextAction(cluster); | ||
|
@@ -532,6 +534,10 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve | |
curOverallCost = currentCost; | ||
System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); | ||
} else { | ||
//LOG hidden move cost if greatly impacting ability to find better plan | ||
logMoveCosts(newCost, tempFunctionCosts, moveCostLogCounter); | ||
moveCostLogCounter++; | ||
|
||
// Put things back the way they were before. | ||
// TODO: undo by remembering old values | ||
Action undoAction = action.undoAction(); | ||
|
@@ -548,7 +554,7 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve | |
metricsBalancer.balanceCluster(endTime - startTime); | ||
|
||
if (initCost > currentCost) { | ||
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); | ||
updateStochasticCosts(tableName, curOverallCost, curOverallCost / sumMultiplier, curFunctionCosts); | ||
plans = createRegionPlans(cluster); | ||
LOG.info("Finished computing new moving plan. Computation took {} ms" + | ||
" to try {} different iterations. Found a solution that moves " + | ||
|
@@ -559,9 +565,11 @@ public synchronized List<RegionPlan> balanceTable(TableName tableName, Map<Serve | |
sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step); | ||
return plans; | ||
} | ||
|
||
LOG.info("Could not find a better moving plan. Tried {} different configurations in " | ||
+ "{} ms, and did not find anything with an imbalance score less than {}", step, | ||
+ "{} ms, and did not find anything with an imbalance score less than {}.", step, | ||
endTime - startTime, initCost / sumMultiplier); | ||
|
||
return null; | ||
} | ||
|
||
|
@@ -603,10 +611,30 @@ private void sendRegionPlansToRingBuffer(List<RegionPlan> plans, double currentC | |
} | ||
} | ||
|
||
private void logMoveCosts(double currentCost, double [] currentFunctionCosts, int logCount){ | ||
if (logCount > 5){ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does printing this 5 times provide us any benefit over printing it once? |
||
// no op - so as to not flood the logs | ||
return; | ||
} | ||
for (int i = 0; i < costFunctions.size(); i++) { | ||
CostFunction costFunction = costFunctions.get(i); | ||
if (costFunction instanceof MoveCostFunction){ | ||
String costFunctionName = costFunction.getClass().getSimpleName(); | ||
double moveCost = currentFunctionCosts[i]; | ||
double costPercent = (currentCost == 0) ? 0 : (moveCost/ currentCost) * 100; | ||
if (costPercent > .50 ){ | ||
LOG.info("{} may be impacting the ability to find an improved plan. calculatedMoveCost={} percentageOfPlanCost={}%. Consider lowering moveCost multiplier.", costFunctionName, moveCost, costPercent ); | ||
break; | ||
} | ||
} | ||
briaugenreich marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
} | ||
} | ||
/** | ||
* update costs to JMX | ||
*/ | ||
private void updateStochasticCosts(TableName tableName, double overall, double[] subCosts) { | ||
private void updateStochasticCosts(TableName tableName, double overall, double weightedImbalance, double[] subCosts) { | ||
|
||
if (tableName == null) { | ||
return; | ||
} | ||
|
@@ -618,7 +646,11 @@ private void updateStochasticCosts(TableName tableName, double overall, double[] | |
balancer.updateStochasticCost(tableName.getNameAsString(), | ||
OVERALL_COST_FUNCTION_NAME, "Overall cost", overall); | ||
|
||
// each cost function | ||
// weighted imbalance | ||
balancer.updateStochasticCost(tableName.getNameAsString(), | ||
WIGHTED_IMBALANCE_COST_NAME, "Weighted imbalance", weightedImbalance); | ||
|
||
// each cost function as a percent of overall cost | ||
for (int i = 0; i < costFunctions.size(); i++) { | ||
CostFunction costFunction = costFunctions.get(i); | ||
String costFunctionName = costFunction.getClass().getSimpleName(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,6 @@ | |
package org.apache.hadoop.hbase.master.balancer; | ||
|
||
import static org.junit.Assert.assertTrue; | ||
|
||
import java.io.IOException; | ||
import java.util.HashSet; | ||
import java.util.Hashtable; | ||
|
@@ -51,7 +50,6 @@ | |
import org.junit.BeforeClass; | ||
import org.junit.ClassRule; | ||
import org.junit.FixMethodOrder; | ||
import org.junit.Ignore; | ||
import org.junit.Test; | ||
import org.junit.experimental.categories.Category; | ||
import org.junit.runners.MethodSorters; | ||
|
@@ -60,7 +58,6 @@ | |
|
||
@Category({ MiscTests.class, MediumTests.class }) | ||
@FixMethodOrder(MethodSorters.NAME_ASCENDING) | ||
@Ignore | ||
public class TestStochasticBalancerJmxMetrics extends BalancerTestBase { | ||
|
||
@ClassRule | ||
|
@@ -130,6 +127,7 @@ public static void tearDownAfterClass() throws Exception { | |
*/ | ||
@Test | ||
public void testJmxMetrics_EnsembleMode() throws Exception { | ||
|
||
loadBalancer = new StochasticLoadBalancer(); | ||
|
||
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, false); | ||
|
@@ -141,11 +139,12 @@ public void testJmxMetrics_EnsembleMode() throws Exception { | |
|
||
String[] tableNames = new String[] { tableName.getNameAsString() }; | ||
String[] functionNames = loadBalancer.getCostFunctionNames(); | ||
Set<String> jmxMetrics = readJmxMetricsWithRetry(); | ||
|
||
Set<String> jmxMetrics = readJmxMetricsWithRetry(functionNames.length + 1); | ||
Set<String> expectedMetrics = getExpectedJmxMetrics(tableNames, functionNames); | ||
|
||
// printMetrics(jmxMetrics, "existing metrics in ensemble mode"); | ||
// printMetrics(expectedMetrics, "expected metrics in ensemble mode"); | ||
printMetrics(jmxMetrics, "existing metrics in ensemble mode"); | ||
printMetrics(expectedMetrics, "expected metrics in ensemble mode"); | ||
|
||
// assert that every expected is in the JMX | ||
for (String expected : expectedMetrics) { | ||
|
@@ -159,16 +158,18 @@ public void testJmxMetrics_EnsembleMode() throws Exception { | |
*/ | ||
@Test | ||
public void testJmxMetrics_PerTableMode() throws Exception { | ||
|
||
loadBalancer = new StochasticLoadBalancer(); | ||
|
||
conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, true); | ||
loadBalancer.setConf(conf); | ||
|
||
// NOTE the size is normally set in setClusterMetrics, for test purpose, we set it manually | ||
// Tables: hbase:namespace, table1, table2 | ||
// Functions: costFunctions, overall | ||
// Functions: costFunctions, overall, imbalance | ||
String[] functionNames = loadBalancer.getCostFunctionNames(); | ||
loadBalancer.updateMetricsSize(3 * (functionNames.length + 1)); | ||
|
||
loadBalancer.updateMetricsSize(3 * (functionNames.length + 2)); | ||
|
||
// table 1 | ||
TableName tableName = TableName.valueOf(TABLE_NAME_1); | ||
|
@@ -180,17 +181,19 @@ public void testJmxMetrics_PerTableMode() throws Exception { | |
clusterState = mockClusterServers(mockCluster_pertable_2); | ||
loadBalancer.balanceTable(tableName, clusterState); | ||
|
||
|
||
// table hbase:namespace | ||
tableName = TableName.valueOf(TABLE_NAME_NAMESPACE); | ||
clusterState = mockClusterServers(mockCluster_pertable_namespace); | ||
loadBalancer.balanceTable(tableName, clusterState); | ||
|
||
|
||
String[] tableNames = new String[] { TABLE_NAME_1, TABLE_NAME_2, TABLE_NAME_NAMESPACE }; | ||
Set<String> jmxMetrics = readJmxMetricsWithRetry(); | ||
Set<String> jmxMetrics = readJmxMetricsWithRetry(3 * (functionNames.length + 2)); | ||
Set<String> expectedMetrics = getExpectedJmxMetrics(tableNames, functionNames); | ||
|
||
// printMetrics(jmxMetrics, "existing metrics in per-table mode"); | ||
// printMetrics(expectedMetrics, "expected metrics in per-table mode"); | ||
printMetrics(jmxMetrics, "existing metrics in per-table mode"); | ||
printMetrics(expectedMetrics, "expected metrics in per-table mode"); | ||
|
||
// assert that every expected is in the JMX | ||
for (String expected : expectedMetrics) { | ||
|
@@ -199,11 +202,13 @@ public void testJmxMetrics_PerTableMode() throws Exception { | |
} | ||
} | ||
|
||
private Set<String> readJmxMetricsWithRetry() throws IOException { | ||
private Set<String> readJmxMetricsWithRetry(int expectedMetricsCount) throws IOException { | ||
final int count = 0; | ||
for (int i = 0; i < 10; i++) { | ||
for (int i = 0; i < 20; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just realized in another PR the count used on line 214 is never updated... TODO to fix. |
||
|
||
Set<String> metrics = readJmxMetrics(); | ||
if (metrics != null) { | ||
|
||
if (metrics != null && metrics.size() >= expectedMetricsCount){ | ||
return metrics; | ||
} | ||
LOG.warn("Failed to get jmxmetrics... sleeping, retrying; " + i + " of " + count + " times"); | ||
|
@@ -219,6 +224,7 @@ private Set<String> readJmxMetrics() throws IOException { | |
JMXConnector connector = null; | ||
ObjectName target = null; | ||
MBeanServerConnection mb = null; | ||
|
||
try { | ||
connector = | ||
JMXConnectorFactory.connect(JMXListener.buildJMXServiceURL(connectorPort, connectorPort)); | ||
|
@@ -268,6 +274,7 @@ private Set<String> getExpectedJmxMetrics(String[] tableNames, String[] function | |
|
||
for (String tableName : tableNames) { | ||
ret.add(StochasticLoadBalancer.composeAttributeName(tableName, "Overall")); | ||
ret.add(StochasticLoadBalancer.composeAttributeName(tableName, "Imbalance")); | ||
for (String functionName : functionNames) { | ||
String metricsName = StochasticLoadBalancer.composeAttributeName(tableName, functionName); | ||
ret.add(metricsName); | ||
|
@@ -286,5 +293,6 @@ private static void printMetrics(Set<String> metrics, String info) { | |
for (String str : metrics) { | ||
LOG.info(" ++++ " + str); | ||
} | ||
|
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: typo "WIGHTED"