Skip to content

Commit

Permalink
Use specified strategy to get input table statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
hantangwangd committed Jul 14, 2024
1 parent 4ca9f8b commit 6b77a1b
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,39 @@ public void testHistoryBasedStatsCalculatorCTE()
assertPlan(cteMaterialization, sql, anyTree(node(ProjectNode.class, anyTree(any())).withOutputRowCount(3)));
}

@Test
public void testHistoryBasedStatsWithSpecifiedCanonicalizationStrategy()
{
getQueryRunner().execute("CREATE TABLE test_myt(a int, b varchar)");
getQueryRunner().execute("INSERT INTO test_myt values(1, '1001'), (2, '1002'), (3, '1003'), (4, '1004')");

String query = "SELECT * FROM test_myt where a-1 < 3 ORDER BY b";
Session session = Session.builder(defaultSession())
.setSystemProperty(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "IGNORE_SAFE_CONSTANTS")
.build();

// get cost base stats before completing any query
assertPlan(session, query, node(OutputNode.class, anyTree(any())).withOutputRowCount(false, "CBO"));
executeAndTrackHistory(query, session);

// get history base stats after completing a query with the same canonicalization strategy
assertPlan(session, query, node(OutputNode.class, anyTree(any())).withOutputRowCount(3, "HBO"));

Session sessionWithAnotherStrategy = Session.builder(defaultSession())
.setSystemProperty(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "IGNORE_SCAN_CONSTANTS")
.build();

// could not get history base stats when using a different canonicalization strategy from the one that used to collect the stats
assertPlan(sessionWithAnotherStrategy, query, node(OutputNode.class, anyTree(any())).withOutputRowCount(false, "CBO"));

Session sessionWithMultiStrategy = Session.builder(defaultSession())
.setSystemProperty(HISTORY_BASED_OPTIMIZATION_PLAN_CANONICALIZATION_STRATEGY, "DEFAULT,CONNECTOR,IGNORE_SCAN_CONSTANTS,IGNORE_SAFE_CONSTANTS")
.build();

// get history base stats when using multiple canonicalization strategies that contains the one used to collect the stats
assertPlan(sessionWithMultiStrategy, query, node(OutputNode.class, anyTree(any())).withOutputRowCount(3, "HBO"));
}

@Override
protected void assertPlan(@Language("SQL") String query, PlanMatchPattern pattern)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ private PlanNodeStatsEstimate getStatistics(PlanNode planNode, Session session,
for (PlanCanonicalizationStrategy strategy : historyBasedPlanCanonicalizationStrategyList(session)) {
for (Map.Entry<PlanNodeWithHash, HistoricalPlanStatistics> entry : statistics.entrySet()) {
if (allHashes.containsKey(strategy) && entry.getKey().getHash().isPresent() && allHashes.get(strategy).equals(entry.getKey())) {
Optional<List<PlanStatistics>> inputTableStatistics = getPlanNodeInputTableStatistics(plan, session, true);
Optional<List<PlanStatistics>> inputTableStatistics = getPlanNodeInputTableStatistics(plan, session, strategy, true);
if (inputTableStatistics.isPresent()) {
Optional<HistoricalPlanStatisticsEntry> historicalPlanStatisticsEntry = getSelectedHistoricalPlanStatisticsEntry(entry.getValue(), inputTableStatistics.get(), historyMatchingThreshold);
if (historicalPlanStatisticsEntry.isPresent()) {
Expand All @@ -213,13 +213,13 @@ private PlanNodeStatsEstimate getStatistics(PlanNode planNode, Session session,
return delegateStats;
}

private Optional<List<PlanStatistics>> getPlanNodeInputTableStatistics(PlanNode plan, Session session, boolean cacheOnly)
private Optional<List<PlanStatistics>> getPlanNodeInputTableStatistics(PlanNode plan, Session session, PlanCanonicalizationStrategy strategy, boolean cacheOnly)
{
if (!useHistoryBasedPlanStatisticsEnabled(session) || !plan.getStatsEquivalentPlanNode().isPresent()) {
return Optional.empty();
}

PlanNode statsEquivalentPlanNode = plan.getStatsEquivalentPlanNode().get();
return planCanonicalInfoProvider.getInputTableStatistics(session, statsEquivalentPlanNode, cacheOnly);
return planCanonicalInfoProvider.getInputTableStatistics(session, statsEquivalentPlanNode, strategy, cacheOnly);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import static com.facebook.presto.SystemSessionProperties.getHistoryBasedOptimizerTimeoutLimit;
import static com.facebook.presto.SystemSessionProperties.logQueryPlansUsedInHistoryBasedOptimizer;
import static com.facebook.presto.common.RuntimeUnit.NANO;
import static com.facebook.presto.cost.HistoryBasedPlanStatisticsManager.historyBasedPlanCanonicalizationStrategyList;
import static com.google.common.hash.Hashing.sha256;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -68,9 +67,9 @@ public Optional<String> hash(Session session, PlanNode planNode, PlanCanonicaliz
}

@Override
public Optional<List<PlanStatistics>> getInputTableStatistics(Session session, PlanNode planNode, boolean cacheOnly)
public Optional<List<PlanStatistics>> getInputTableStatistics(Session session, PlanNode planNode, PlanCanonicalizationStrategy strategy, boolean cacheOnly)
{
CacheKey key = new CacheKey(planNode, historyBasedPlanCanonicalizationStrategyList(session).get(0));
CacheKey key = new CacheKey(planNode, strategy);
return loadValue(session, key, cacheOnly).map(PlanNodeCanonicalInfo::getInputTableStatistics);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ public interface PlanCanonicalInfoProvider
* plan canonicalization.
* @param session Session for query being run
* @param planNode Plan node to hash
* @param strategy Strategy to canonicalize the plan node
* @param cacheOnly Only fetch from cache, and return Optional.empty() if set to true and no entry found in cache
* @return Statistics of leaf input tables to plan node, ordered by a consistent canonicalization strategy.
*/
Optional<List<PlanStatistics>> getInputTableStatistics(Session session, PlanNode planNode, boolean cacheOnly);
Optional<List<PlanStatistics>> getInputTableStatistics(Session session, PlanNode planNode, PlanCanonicalizationStrategy strategy, boolean cacheOnly);
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ public static List<CanonicalPlanWithInfo> getCanonicalInfo(
}
PlanNode statsEquivalentPlanNode = node.getStatsEquivalentPlanNode().get();
Optional<String> hash = planCanonicalInfoProvider.hash(session, statsEquivalentPlanNode, strategy, true);
Optional<List<PlanStatistics>> inputTableStatistics = planCanonicalInfoProvider.getInputTableStatistics(session, statsEquivalentPlanNode, true);
Optional<List<PlanStatistics>> inputTableStatistics = planCanonicalInfoProvider.getInputTableStatistics(session, statsEquivalentPlanNode, strategy, true);
if (hash.isPresent() && inputTableStatistics.isPresent()) {
result.add(new CanonicalPlanWithInfo(new CanonicalPlan(statsEquivalentPlanNode, strategy), new PlanNodeCanonicalInfo(hash.get(), inputTableStatistics.get())));
}
Expand Down

0 comments on commit 6b77a1b

Please sign in to comment.