From e79b16c112998e0a1e53aaacb3e92bad475011a2 Mon Sep 17 00:00:00 2001
From: dzlab <dzlabs@outlook.com>
Date: Wed, 9 Oct 2024 14:20:04 -0700
Subject: [PATCH] Added different sections

---
 _posts/2024-07-17-sql-processor-calcite.md | 100 ++++++++++++++-------
 assets/2024/07/20240717-physical_plan.svg  |  95 ++++++++++++++++++++
 2 files changed, 163 insertions(+), 32 deletions(-)
 create mode 100644 assets/2024/07/20240717-physical_plan.svg

diff --git a/_posts/2024-07-17-sql-processor-calcite.md b/_posts/2024-07-17-sql-processor-calcite.md
index 6c68689..088e6b4 100644
--- a/_posts/2024-07-17-sql-processor-calcite.md
+++ b/_posts/2024-07-17-sql-processor-calcite.md
@@ -14,13 +14,24 @@ img_excerpt: assets/logos/Apache_Calcite_Logo.svg
 
 In a [previous article]({{ "database/2024/07/06/apache-calcite/" | absolute_url }}), we saw how to create an Adapter for Apache Calcite and then how to run SQL queries against random data source. In this article we will see in [step by step](https://github.com/zabetak/slides/blob/master/2021/boss-workshop/apache-calcite-tutorial.pdf) how to use Apache Cacite to implement a SQL processor to parse an input query, validate it and then execute it.
 
+**Query**
+
+```sql
+SELECT `C_NAME`, `O_ORDERKEY`, `O_ORDERDATE`
+FROM `CUSTOMER`
+INNER JOIN `ORDERS` ON `CUSTOMER`.`c_custkey` = `ORDERS`.`o_custkey`
+WHERE `CUSTOMER`.`c_custkey` < 3
+ORDER BY `C_NAME`, `O_ORDERKEY`
+```
 
 ```java
 // TODO 1. Create the root schema and type factory
 CalciteSchema schema = CalciteSchema.createRootSchema(false);
 RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl();
+```
 // TODO 2. Create the data type for each TPC-H table
 // TODO 3. Add the TPC-H table to the schema
+```java
 for(TpchTable table: TpchTable.values()) {
   RelDataTypeFactory.Builder builder = typeFactory.builder();
   for(TpchTable.Column c: table.columns) {
@@ -29,47 +40,78 @@ for(TpchTable table: TpchTable.values()) {
   String indexPath = Paths.get(DatasetIndexer.INDEX_LOCATION, "tpch", table.name()).toString();
   schema.add(table.name(), new LuceneTable(indexPath, builder.build()));
 }
+```
+
+## Query to AST
 
 // TODO 4. Create an SQL parser
+```java
 SqlParser parser = SqlParser.create(sqlQuery);
+```
 // TODO 5. Parse the query into an AST
+```java
 SqlNode parseAst = parser.parseQuery();
 // TODO 6. Print and check the AST
 System.out.println("[Parsed query]");
 System.out.println(parseAst.toString());
-
+```
 // TODO 7. Configure and instantiate the catalog reader
+```java
 CalciteConnectionConfig readerConfig = CalciteConnectionConfig.DEFAULT
         .set(CalciteConnectionProperty.CASE_SENSITIVE, "false");
 CalciteCatalogReader catalogReader = new CalciteCatalogReader(schema, Collections.emptyList(), typeFactory,
         readerConfig);
+```
 // TODO 8. Create the SQL validator using the standard operator table and default configuration
+```java
 SqlValidator sqlValidator = SqlValidatorUtil.newValidator(SqlStdOperatorTable.instance(),
         catalogReader, typeFactory, SqlValidator.Config.DEFAULT);
+```
 // TODO 9. Validate the initial AST
+```java
 SqlNode validAst = sqlValidator.validate(parseAst);
 System.out.println("[Validated query");
 System.out.println(validAst.toString());
+```
+
+## AST to Logical plan
 
 // TODO 10. Create the optimization cluster to maintain planning information
 // TODO 11. Configure and instantiate the converter of the AST to Logical plan
 // - No view expansion (use NOOP_EXPANDER)
 // - Standard expression normalization (use StandardConvertletTable.INSTANCE)
 // - Default configuration (SqlToRelConverter.config())
+```java
 RelOptCluster cluster = newCluster(typeFactory);
 SqlToRelConverter sqlToRelConverter = new SqlToRelConverter(NOOP_EXPANDER,
         sqlValidator, catalogReader, cluster,
         StandardConvertletTable.INSTANCE,
         SqlToRelConverter.config());
-
+```
 // TODO 12. Convert the valid AST into a logical plan
+```java
 RelNode logPlan = sqlToRelConverter.convertQuery(validAst, false, true).rel;
 // TODO 13. Display the logical plan with explain attributes
 System.out.println(
         RelOptUtil.dumpPlan("[Logical plan]", logPlan, SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES)
 );
+```
+
+
+**Logical plan**
+```
+LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+  LogicalProject(C_NAME=[$1], O_ORDERKEY=[$8], O_ORDERDATE=[$12])
+    LogicalFilter(condition=[<($0, 3)])
+      LogicalJoin(condition=[=($0, $9)], joinType=[inner])
+        LogicalTableScan(table=[[CUSTOMER]])
+        LogicalTableScan(table=[[ORDERS]])
+```
+
+## Logical to Physical plan
 
 // TODO 14. Initialize optimizer/planner with the necessary rules
+```java
 RelOptPlanner planner = cluster.getPlanner();
 planner.addRule(CoreRules.FILTER_TO_CALC);
 planner.addRule(CoreRules.PROJECT_TO_CALC);
@@ -77,11 +119,11 @@ planner.addRule(EnumerableRules.ENUMERABLE_SORT_RULE);
 planner.addRule(EnumerableRules.ENUMERABLE_CALC_RULE);
 planner.addRule(EnumerableRules.ENUMERABLE_JOIN_RULE);
 planner.addRule(EnumerableRules.ENUMERABLE_TABLE_SCAN_RULE);
-
+```
 // TODO 15. Define the type of the output plan (in this case we want a physical plan in
 // EnumerableContention)
-logPlan =
-        planner.changeTraits(logPlan, logPlan.getTraitSet().replace(EnumerableConvention.INSTANCE));
+```java
+logPlan = planner.changeTraits(logPlan, logPlan.getTraitSet().replace(EnumerableConvention.INSTANCE));
 planner.setRoot(logPlan);
 
 // TODO 16. Start the optimization process to obtain the most efficient physical plan based on
@@ -92,12 +134,31 @@ EnumerableRel phyPlan = (EnumerableRel) planner.findBestExp();
 System.out.println(
         RelOptUtil.dumpPlan("[Physical plan]", phyPlan, SqlExplainFormat.TEXT, SqlExplainLevel.EXPPLAN_ATTRIBUTES)
 );
+```
+
+**Physical plan**
+```
+EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
+  EnumerableCalc(expr#0..16=[{inputs}], C_NAME=[$t1], O_ORDERKEY=[$t8], O_ORDERDATE=[$t12])
+    EnumerableCalc(expr#0..16=[{inputs}], expr#17=[3], expr#18=[<($t0, $t17)], proj#0..16=[{exprs}], $condition=[$t18])
+      EnumerableHashJoin(condition=[=($0, $9)], joinType=[inner])
+        EnumerableTableScan(table=[[CUSTOMER]])
+        EnumerableTableScan(table=[[ORDERS]])
+```
+
 
+![Physical Plan](/assets/2024/07/20240717-physical_plan.svg)
+
+## Physical to Executable plan
+
+```
 // TODO 18. Compile generated code and obtain the executable program
+```java
 Bindable<Object[]> execPlan = EnumerableInterpretable.toBindable(new HashMap<>(), null, phyPlan, EnumerableRel.Prefer.ARRAY);
-
+```
 // TODO 19. Run the program using a context simply providing access to the schema and print
 // results
+```java
 long start = System.currentTimeMillis();
 for(Object[] row: execPlan.bind(new SchemaOnlyDataContext(schema))) {
   System.out.println(Arrays.toString(row));
@@ -106,33 +167,8 @@ long finish = System.currentTimeMillis();
 System.out.println("Elapsed time " + (finish - start) + "ms");
 ```
 
-**Query**
-```sql
-SELECT `C_NAME`, `O_ORDERKEY`, `O_ORDERDATE`
-FROM `CUSTOMER`
-INNER JOIN `ORDERS` ON `CUSTOMER`.`c_custkey` = `ORDERS`.`o_custkey`
-WHERE `CUSTOMER`.`c_custkey` < 3
-ORDER BY `C_NAME`, `O_ORDERKEY`
-```
-**Logical plan**
-```
-LogicalSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-  LogicalProject(C_NAME=[$1], O_ORDERKEY=[$8], O_ORDERDATE=[$12])
-    LogicalFilter(condition=[<($0, 3)])
-      LogicalJoin(condition=[=($0, $9)], joinType=[inner])
-        LogicalTableScan(table=[[CUSTOMER]])
-        LogicalTableScan(table=[[ORDERS]])
-```
 
-**Physical plan**
-```
-EnumerableSort(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC])
-  EnumerableCalc(expr#0..16=[{inputs}], C_NAME=[$t1], O_ORDERKEY=[$t8], O_ORDERDATE=[$t12])
-    EnumerableCalc(expr#0..16=[{inputs}], expr#17=[3], expr#18=[<($t0, $t17)], proj#0..16=[{exprs}], $condition=[$t18])
-      EnumerableHashJoin(condition=[=($0, $9)], joinType=[inner])
-        EnumerableTableScan(table=[[CUSTOMER]])
-        EnumerableTableScan(table=[[ORDERS]])
-```
+
 
 ## That's all folks
 I hope you enjoyed this article, feel free to leave a comment or reach out on twitter [@bachiirc](https://twitter.com/bachiirc).
diff --git a/assets/2024/07/20240717-physical_plan.svg b/assets/2024/07/20240717-physical_plan.svg
new file mode 100644
index 0000000..6cf8464
--- /dev/null
+++ b/assets/2024/07/20240717-physical_plan.svg
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="430pt" height="775pt" viewBox="0.00 0.00 430.49 774.70">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 770.7031)">
+<title>%0</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-770.7031 426.4923,-770.7031 426.4923,4 -4,4"/>
+<!-- EnumerableCalc\nexpr#0..16 = {inputs}\nC_NAME = $t1\nO_ORDERKEY = $t8\nO_ORDERDATE = $t12\n -->
+<g id="node1" class="node">
+<title>EnumerableCalc\nexpr#0..16 = {inputs}\nC_NAME = $t1\nO_ORDERKEY = $t8\nO_ORDERDATE = $t12\n</title>
+<ellipse fill="none" stroke="#000000" cx="211.5724" cy="-247.9615" rx="111.4496" ry="65.1077"/>
+<text text-anchor="middle" x="211.5724" y="-277.3615" font-family="Times,serif" font-size="14.00" fill="#000000">EnumerableCalc</text>
+<text text-anchor="middle" x="211.5724" y="-260.5615" font-family="Times,serif" font-size="14.00" fill="#000000">expr#0..16 = {inputs}</text>
+<text text-anchor="middle" x="211.5724" y="-243.7615" font-family="Times,serif" font-size="14.00" fill="#000000">C_NAME = $t1</text>
+<text text-anchor="middle" x="211.5724" y="-226.9615" font-family="Times,serif" font-size="14.00" fill="#000000">O_ORDERKEY = $t8</text>
+<text text-anchor="middle" x="211.5724" y="-210.1615" font-family="Times,serif" font-size="14.00" fill="#000000">O_ORDERDATE = $t12</text>
+</g>
+<!-- EnumerableSort\nsort0 = $0\nsort1 = $1\ndir0 = ASC\ndir1 = ASC\n -->
+<g id="node2" class="node">
+<title>EnumerableSort\nsort0 = $0\nsort1 = $1\ndir0 = ASC\ndir1 = ASC\n</title>
+<ellipse fill="none" stroke="#000000" cx="211.5724" cy="-65.0538" rx="75.7815" ry="65.1077"/>
+<text text-anchor="middle" x="211.5724" y="-94.4538" font-family="Times,serif" font-size="14.00" fill="#000000">EnumerableSort</text>
+<text text-anchor="middle" x="211.5724" y="-77.6538" font-family="Times,serif" font-size="14.00" fill="#000000">sort0 = $0</text>
+<text text-anchor="middle" x="211.5724" y="-60.8538" font-family="Times,serif" font-size="14.00" fill="#000000">sort1 = $1</text>
+<text text-anchor="middle" x="211.5724" y="-44.0538" font-family="Times,serif" font-size="14.00" fill="#000000">dir0 = ASC</text>
+<text text-anchor="middle" x="211.5724" y="-27.2538" font-family="Times,serif" font-size="14.00" fill="#000000">dir1 = ASC</text>
+</g>
+<!-- EnumerableCalc\nexpr#0..16 = {inputs}\nC_NAME = $t1\nO_ORDERKEY = $t8\nO_ORDERDATE = $t12\n&#45;&gt;EnumerableSort\nsort0 = $0\nsort1 = $1\ndir0 = ASC\ndir1 = ASC\n -->
+<g id="edge1" class="edge">
+<title>EnumerableCalc\nexpr#0..16 = {inputs}\nC_NAME = $t1\nO_ORDERKEY = $t8\nO_ORDERDATE = $t12\n-&gt;EnumerableSort\nsort0 = $0\nsort1 = $1\ndir0 = ASC\ndir1 = ASC\n</title>
+<path fill="none" stroke="#000000" d="M211.5724,-182.7023C211.5724,-168.9979 211.5724,-154.4364 211.5724,-140.3979"/>
+<polygon fill="#000000" stroke="#000000" points="215.0725,-140.2878 211.5724,-130.2879 208.0725,-140.2879 215.0725,-140.2878"/>
+<text text-anchor="middle" x="215.0724" y="-152.3076" font-family="Times,serif" font-size="14.00" fill="#000000">0</text>
+</g>
+<!-- EnumerableCalc\nexpr#0..16 = {inputs}\nexpr#17 = 3\nexpr#18 = &lt;($t0, $t17)\nproj#0..16 = {exprs}\n$condition = $t18 -->
+<g id="node3" class="node">
+<title>EnumerableCalc\nexpr#0..16 = {inputs}\nexpr#17 = 3\nexpr#18 = &lt;($t0, $t17)\nproj#0..16 = {exprs}\n$condition = $t18</title>
+<ellipse fill="none" stroke="#000000" cx="211.5724" cy="-442.7485" rx="101.8172" ry="76.8665"/>
+<text text-anchor="middle" x="211.5724" y="-480.5485" font-family="Times,serif" font-size="14.00" fill="#000000">EnumerableCalc</text>
+<text text-anchor="middle" x="211.5724" y="-463.7485" font-family="Times,serif" font-size="14.00" fill="#000000">expr#0..16 = {inputs}</text>
+<text text-anchor="middle" x="211.5724" y="-446.9485" font-family="Times,serif" font-size="14.00" fill="#000000">expr#17 = 3</text>
+<text text-anchor="middle" x="211.5724" y="-430.1485" font-family="Times,serif" font-size="14.00" fill="#000000">expr#18 = &lt;($t0, $t17)</text>
+<text text-anchor="middle" x="211.5724" y="-413.3485" font-family="Times,serif" font-size="14.00" fill="#000000">proj#0..16 = {exprs}</text>
+<text text-anchor="middle" x="211.5724" y="-396.5485" font-family="Times,serif" font-size="14.00" fill="#000000">$condition = $t18</text>
+</g>
+<!-- EnumerableCalc\nexpr#0..16 = {inputs}\nexpr#17 = 3\nexpr#18 = &lt;($t0, $t17)\nproj#0..16 = {exprs}\n$condition = $t18&#45;&gt;EnumerableCalc\nexpr#0..16 = {inputs}\nC_NAME = $t1\nO_ORDERKEY = $t8\nO_ORDERDATE = $t12\n -->
+<g id="edge2" class="edge">
+<title>EnumerableCalc\nexpr#0..16 = {inputs}\nexpr#17 = 3\nexpr#18 = &lt;($t0, $t17)\nproj#0..16 = {exprs}\n$condition = $t18-&gt;EnumerableCalc\nexpr#0..16 = {inputs}\nC_NAME = $t1\nO_ORDERKEY = $t8\nO_ORDERDATE = $t12\n</title>
+<path fill="none" stroke="#000000" d="M211.5724,-365.7635C211.5724,-351.7299 211.5724,-337.1044 211.5724,-323.1244"/>
+<polygon fill="#000000" stroke="#000000" points="215.0725,-323.0766 211.5724,-313.0766 208.0725,-323.0766 215.0725,-323.0766"/>
+<text text-anchor="middle" x="215.0724" y="-335.2153" font-family="Times,serif" font-size="14.00" fill="#000000">0</text>
+</g>
+<!-- EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n -->
+<g id="node4" class="node">
+<title>EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n</title>
+<ellipse fill="none" stroke="#000000" cx="211.5724" cy="-613.7768" rx="96.3288" ry="41.0911"/>
+<text text-anchor="middle" x="211.5724" y="-626.3768" font-family="Times,serif" font-size="14.00" fill="#000000">EnumerableHashJoin</text>
+<text text-anchor="middle" x="211.5724" y="-609.5768" font-family="Times,serif" font-size="14.00" fill="#000000">condition = =($0, $9)</text>
+<text text-anchor="middle" x="211.5724" y="-592.7768" font-family="Times,serif" font-size="14.00" fill="#000000">joinType = inner</text>
+</g>
+<!-- EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n&#45;&gt;EnumerableCalc\nexpr#0..16 = {inputs}\nexpr#17 = 3\nexpr#18 = &lt;($t0, $t17)\nproj#0..16 = {exprs}\n$condition = $t18 -->
+<g id="edge3" class="edge">
+<title>EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n-&gt;EnumerableCalc\nexpr#0..16 = {inputs}\nexpr#17 = 3\nexpr#18 = &lt;($t0, $t17)\nproj#0..16 = {exprs}\n$condition = $t18</title>
+<path fill="none" stroke="#000000" d="M211.5724,-572.3756C211.5724,-559.5238 211.5724,-544.8415 211.5724,-530.0867"/>
+<polygon fill="#000000" stroke="#000000" points="215.0725,-529.8818 211.5724,-519.8819 208.0725,-529.8819 215.0725,-529.8818"/>
+<text text-anchor="middle" x="215.0724" y="-541.8817" font-family="Times,serif" font-size="14.00" fill="#000000">0</text>
+</g>
+<!-- EnumerableTableScan\ntable = [CUSTOMER]\n -->
+<g id="node5" class="node">
+<title>EnumerableTableScan\ntable = [CUSTOMER]\n</title>
+<ellipse fill="none" stroke="#000000" cx="101.5724" cy="-737.2874" rx="101.6448" ry="29.3315"/>
+<text text-anchor="middle" x="101.5724" y="-741.4874" font-family="Times,serif" font-size="14.00" fill="#000000">EnumerableTableScan</text>
+<text text-anchor="middle" x="101.5724" y="-724.6874" font-family="Times,serif" font-size="14.00" fill="#000000">table = [CUSTOMER]</text>
+</g>
+<!-- EnumerableTableScan\ntable = [CUSTOMER]\n&#45;&gt;EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n -->
+<g id="edge4" class="edge">
+<title>EnumerableTableScan\ntable = [CUSTOMER]\n-&gt;EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n</title>
+<path fill="none" stroke="#000000" d="M127.085,-708.6412C139.8479,-694.3108 155.6183,-676.6033 170.0651,-660.3822"/>
+<polygon fill="#000000" stroke="#000000" points="172.9946,-662.3553 177.0318,-652.5598 167.7672,-657.6997 172.9946,-662.3553"/>
+<text text-anchor="middle" x="160.0724" y="-677.2718" font-family="Times,serif" font-size="14.00" fill="#000000">0</text>
+</g>
+<!-- EnumerableTableScan\ntable = [ORDERS]\n -->
+<g id="node6" class="node">
+<title>EnumerableTableScan\ntable = [ORDERS]\n</title>
+<ellipse fill="none" stroke="#000000" cx="321.5724" cy="-737.2874" rx="100.84" ry="29.3315"/>
+<text text-anchor="middle" x="321.5724" y="-741.4874" font-family="Times,serif" font-size="14.00" fill="#000000">EnumerableTableScan</text>
+<text text-anchor="middle" x="321.5724" y="-724.6874" font-family="Times,serif" font-size="14.00" fill="#000000">table = [ORDERS]</text>
+</g>
+<!-- EnumerableTableScan\ntable = [ORDERS]\n&#45;&gt;EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n -->
+<g id="edge5" class="edge">
+<title>EnumerableTableScan\ntable = [ORDERS]\n-&gt;EnumerableHashJoin\ncondition = =($0, $9)\njoinType = inner\n</title>
+<path fill="none" stroke="#000000" d="M296.0597,-708.6412C283.2968,-694.3108 267.5264,-676.6033 253.0796,-660.3822"/>
+<polygon fill="#000000" stroke="#000000" points="255.3775,-657.6997 246.1129,-652.5598 250.1501,-662.3553 255.3775,-657.6997"/>
+<text text-anchor="middle" x="280.0724" y="-677.2718" font-family="Times,serif" font-size="14.00" fill="#000000">1</text>
+</g>
+</g>
+</svg>
\ No newline at end of file