Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLINK-37222][table] Don't resolve the CatalogView to QueryOperationCatalogView #26323

Merged
merged 1 commit into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 38 additions & 2 deletions flink-table/flink-sql-client/src/test/resources/nexmark.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ CREATE TABLE datagen
'connector' = 'datagen',
'number-of-rows' = '10'
);
CREATE VIEW person AS
CREATE TEMPORARY VIEW person AS
SELECT person.id,
person.name,
person.emailAddress,
Expand Down Expand Up @@ -93,7 +93,6 @@ SELECT bid.auction,
FROM datagen
WHERE event_type = 2;


CREATE TABLE nexmark_q7
(
auction BIGINT,
Expand All @@ -105,6 +104,9 @@ CREATE TABLE nexmark_q7
'connector' = 'blackhole'
);

CREATE TABLE nexmark_q8 (id BIGINT, name VARCHAR, stime TIMESTAMP(3)) WITH ('connector' = 'blackhole');

BEGIN STATEMENT SET;
INSERT INTO nexmark_q7
SELECT B.auction, B.price, B.bidder, B.`dateTime`, B.extra
from bid B
Expand All @@ -114,3 +116,37 @@ from bid B
GROUP BY window_start, window_end) B1
ON B.price = B1.maxprice
WHERE B.`dateTime` BETWEEN B1.`dateTime` - INTERVAL '10' SECOND AND B1.`dateTime`;

INSERT INTO nexmark_q8
SELECT
P.id, P.name, P.starttime
FROM
(
SELECT
P.id,
P.name,
TUMBLE_START(P.dateTime, INTERVAL '10' SECOND) AS starttime,
TUMBLE_END(P.dateTime, INTERVAL '10' SECOND) AS endtime
FROM
person P
GROUP BY
P.id,
P.name,
TUMBLE(P.dateTime, INTERVAL '10' SECOND)
) P
JOIN (
SELECT
A.seller,
TUMBLE_START(A.dateTime, INTERVAL '10' SECOND) AS starttime,
TUMBLE_END(A.dateTime, INTERVAL '10' SECOND) AS endtime
FROM
auction A
GROUP BY
A.seller,
TUMBLE(A.dateTime, INTERVAL '10' SECOND)
) A
ON P.id = A.seller
AND P.starttime = A.starttime
AND P.endtime = A.endtime;
END;

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.flink.table.catalog.CatalogManager;
import org.apache.flink.table.catalog.CatalogStoreHolder;
import org.apache.flink.table.catalog.FunctionCatalog;
import org.apache.flink.table.catalog.GenericInMemoryCatalog;
import org.apache.flink.table.factories.CatalogStoreFactory;
import org.apache.flink.table.factories.FactoryUtil;
import org.apache.flink.table.factories.TableFactoryUtil;
Expand Down Expand Up @@ -445,7 +446,7 @@ private static CatalogManager buildCatalogManager(
catalogStore.config(),
catalogStore.classLoader()))
.orElse(
new EnvironmentReusableInMemoryCatalog(
new GenericInMemoryCatalog(
defaultCatalogName, settings.getBuiltInDatabaseName()));
}
defaultCatalog.open();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.flink.table.catalog.CatalogView;
import org.apache.flink.table.catalog.ContextResolvedTable;
import org.apache.flink.table.catalog.ObjectIdentifier;
import org.apache.flink.table.catalog.ResolvedCatalogView;
import org.apache.flink.table.catalog.ResolvedSchema;
import org.apache.flink.table.catalog.UnresolvedIdentifier;
import org.apache.flink.table.planner.operations.PlannerQueryOperation;
Expand Down Expand Up @@ -110,12 +111,14 @@ static CatalogView toCatalogView(
schema = ResolvedSchema.physical(aliasFieldNames, schema.getColumnDataTypes());
}

return CatalogView.of(
Schema.newBuilder().fromResolvedSchema(schema).build(),
viewComment,
originalQuery,
expandedQuery,
viewOptions);
return new ResolvedCatalogView(
CatalogView.of(
Schema.newBuilder().fromResolvedSchema(schema).build(),
viewComment,
originalQuery,
expandedQuery,
viewOptions),
schema);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,31 +177,30 @@ Calc(select=[a, b, DATE_FORMAT(CURRENT_TIMESTAMP(), _UTF-16LE'yyMMdd') AS day],
Sink(table=[default_catalog.default_database.sink1], fields=[a, day, EXPR$2, EXPR$3])
+- GroupAggregate(advice=[1], groupBy=[a, day], select=[a, day, SUM_RETRACT(b) AS EXPR$2, COUNT_RETRACT(DISTINCT c) AS EXPR$3])
+- Exchange(distribution=[hash[a, day]])
+- Calc(select=[a, day, b0 AS b, c])
+- Join(joinType=[InnerJoin], where=[=(a, d)], select=[a, b, day, b0, day0, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])
+- Calc(select=[a, day, b, c])
+- Join(joinType=[InnerJoin], where=[=(a, d)], select=[a, day, b, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])
:- Exchange(distribution=[hash[a]])
: +- Calc(select=[a, b, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day])
: +- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a, b], metadata=[]]], fields=[a, b])
: +- Calc(select=[a, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day])
: +- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a], metadata=[]]], fields=[a])
+- Exchange(distribution=[hash[d]])
+- Calc(select=[b, CONCAT(c, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd')) AS day, c, d])
+- TableSourceScan(table=[[default_catalog, default_database, src2, project=[b, c, d], metadata=[]]], fields=[b, c, d])
+- TableSourceScan(table=[[default_catalog, default_database, src2, project=[b, c, d], metadata=[]]], fields=[b, c, d])

Sink(table=[default_catalog.default_database.sink2], fields=[a, day, b0, c])
+- Calc(select=[a, day, b0, c], where=[>(b0, 100)])
+- Join(joinType=[InnerJoin], where=[=(a, d)], select=[a, b, day, b0, day0, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])
Sink(table=[default_catalog.default_database.sink2], fields=[a, day, b, c])
+- Calc(select=[a, day, b, c])
+- Join(joinType=[InnerJoin], where=[=(a, d)], select=[a, day, b, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])
:- Exchange(distribution=[hash[a]])
: +- Calc(select=[a, b, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day])
: +- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a, b], metadata=[]]], fields=[a, b])
: +- Calc(select=[a, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day])
: +- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a], metadata=[]]], fields=[a])
+- Exchange(distribution=[hash[d]])
+- Calc(select=[b, CONCAT(c, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd')) AS day, c, d])
+- Calc(select=[b, c, d], where=[>(b, 100)])
+- TableSourceScan(table=[[default_catalog, default_database, src2, project=[b, c, d], metadata=[]]], fields=[b, c, d])

advice[1]: [ADVICE] You might want to enable local-global two-phase optimization by configuring ('table.exec.mini-batch.enabled' to 'true', 'table.exec.mini-batch.allow-latency' to a positive long value, 'table.exec.mini-batch.size' to a positive long value).
advice[2]: [WARNING] The column(s): day(generated by non-deterministic function: CURRENT_TIMESTAMP ) can not satisfy the determinism requirement for correctly processing update message('UB'/'UA'/'D' in changelogMode, not 'I' only), this usually happens when input node has no upsertKey(upsertKeys=[{}]) or current node outputs non-deterministic update messages. Please consider removing these non-deterministic columns or making them deterministic by using deterministic functions.

related rel plan:
Calc(select=[a, b, DATE_FORMAT(CURRENT_TIMESTAMP(), _UTF-16LE'yyMMdd') AS day], changelogMode=[I,UB,UA,D])
+- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a, b], metadata=[]]], fields=[a, b], changelogMode=[I,UB,UA,D])
Calc(select=[a, DATE_FORMAT(CURRENT_TIMESTAMP(), _UTF-16LE'yyMMdd') AS day], changelogMode=[I,UB,UA,D])
+- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a], metadata=[]]], fields=[a], changelogMode=[I,UB,UA,D])


]]>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,19 +187,19 @@ Calc(select=[EXPR$0, window_start])
<![CDATA[
LogicalProject(order_id=[$0], customer_id=[$1], product_id=[$2], ts=[$3])
+- LogicalProject(order_id=[$0], customer_id=[$1], product_id=[$5], ts=[$4])
+- LogicalCorrelate(correlation=[$cor3], joinType=[inner], requiredColumns=[{3}])
+- LogicalCorrelate(correlation=[$cor2], joinType=[inner], requiredColumns=[{3}])
:- LogicalWatermarkAssigner(rowtime=[ts], watermark=[$4])
: +- LogicalTableScan(table=[[cat, default, t]])
+- LogicalProject(product_id=[$0])
+- Uncollect
+- LogicalProject(product_ids=[$cor3.product_ids])
+- LogicalProject(product_ids=[$cor2.product_ids])
+- LogicalValues(tuples=[[{ 0 }]])
]]>
</Resource>
<Resource name="optimized exec plan">
<![CDATA[
Calc(select=[order_id, customer_id, f0 AS product_id, ts])
+- Correlate(invocation=[$UNNEST_ROWS$1($cor3.product_ids)], correlate=[table($UNNEST_ROWS$1($cor3.product_ids))], select=[order_id,customer_id,product_id,product_ids,ts,f0], rowType=[RecordType(INTEGER order_id, INTEGER customer_id, INTEGER product_id, INTEGER ARRAY product_ids, TIMESTAMP_LTZ(3) *ROWTIME* ts, INTEGER f0)], joinType=[INNER])
+- Correlate(invocation=[$UNNEST_ROWS$1($cor2.product_ids)], correlate=[table($UNNEST_ROWS$1($cor2.product_ids))], select=[order_id,customer_id,product_id,product_ids,ts,f0], rowType=[RecordType(INTEGER order_id, INTEGER customer_id, INTEGER product_id, INTEGER ARRAY product_ids, TIMESTAMP_LTZ(3) *ROWTIME* ts, INTEGER f0)], joinType=[INNER])
+- WatermarkAssigner(rowtime=[ts], watermark=[ts])
+- TableSourceScan(table=[[cat, default, t]], fields=[order_id, customer_id, product_id, product_ids, ts])
]]>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2438,23 +2438,28 @@ LogicalSink(table=[default_catalog.default_database.sink2], fields=[a, day, b, c
</Resource>
<Resource name="optimized exec plan">
<![CDATA[
Join(joinType=[InnerJoin], where=[(a = d)], select=[a, b, day, b0, day0, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])(reuse_id=[1])
:- Exchange(distribution=[hash[a]])
: +- Calc(select=[a, b, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day])
: +- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a, b], metadata=[]]], fields=[a, b])
+- Exchange(distribution=[hash[d]])
+- Calc(select=[b, CONCAT(c, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd')) AS day, c, d])
+- TableSourceScan(table=[[default_catalog, default_database, src2, project=[b, c, d], metadata=[]]], fields=[b, c, d])
Exchange(distribution=[hash[a]])(reuse_id=[1])
+- Calc(select=[a, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day])
+- TableSourceScan(table=[[default_catalog, default_database, src1, project=[a], metadata=[]]], fields=[a])

TableSourceScan(table=[[default_catalog, default_database, src2, project=[b, c, d], metadata=[]]], fields=[b, c, d])(reuse_id=[2])

Sink(table=[default_catalog.default_database.sink1], fields=[a, day, EXPR$2, EXPR$3])
+- GroupAggregate(groupBy=[a, day], select=[a, day, SUM_RETRACT(b) AS EXPR$2, COUNT_RETRACT(DISTINCT c) AS EXPR$3])
+- Exchange(distribution=[hash[a, day]])
+- Calc(select=[a, day, b0 AS b, c])
+- Reused(reference_id=[1])
+- Calc(select=[a, day, b, c])
+- Join(joinType=[InnerJoin], where=[(a = d)], select=[a, day, b, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])
:- Reused(reference_id=[1])
+- Exchange(distribution=[hash[d]])
+- Reused(reference_id=[2])

Sink(table=[default_catalog.default_database.sink2], fields=[a, day, b0, c])
+- Calc(select=[a, day, b0, c], where=[(b0 > 100)])
+- Reused(reference_id=[1])
Sink(table=[default_catalog.default_database.sink2], fields=[a, day, b, c])
+- Calc(select=[a, day, b, c])
+- Join(joinType=[InnerJoin], where=[(a = d)], select=[a, day, b, c, d], leftInputSpec=[NoUniqueKey], rightInputSpec=[NoUniqueKey])
:- Reused(reference_id=[1])
+- Exchange(distribution=[hash[d]])
+- Calc(select=[b, c, d], where=[(b > 100)])
+- Reused(reference_id=[2])
]]>
</Resource>
</TestCase>
Expand Down Expand Up @@ -2518,18 +2523,17 @@ LogicalSink(table=[default_catalog.default_database.sink2], fields=[a, b, d])
</Resource>
<Resource name="optimized exec plan">
<![CDATA[
Calc(select=[id, deepNested_nested2_num AS a, deepNested_nested1_name AS name, ((deepNested_nested1_value + deepNested_nested2_num) + metadata_1) AS b])(reuse_id=[1])
+- TableSourceScan(table=[[default_catalog, default_database, nested_src, project=[id, deepNested_nested2_num, deepNested_nested1_name, deepNested_nested1_value], metadata=[metadata_1]]], fields=[id, deepNested_nested2_num, deepNested_nested1_name, deepNested_nested1_value, metadata_1])
TableSourceScan(table=[[default_catalog, default_database, nested_src, project=[deepNested, deepNested_nested1_value, deepNested_nested2_num, metadata_1], metadata=[metadata_1]]], fields=[deepNested, deepNested_nested1_value, deepNested_nested2_num, metadata_1])(reuse_id=[1])

Sink(table=[default_catalog.default_database.sink1], fields=[a, b, d])
+- Calc(select=[a, day AS b, CAST(EXPR$2 AS BIGINT) AS d])
+- GroupAggregate(groupBy=[a, day], select=[a, day, SUM_RETRACT(b) AS EXPR$2])
+- Exchange(distribution=[hash[a, day]])
+- Calc(select=[a, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day, b])
+- Calc(select=[deepNested_nested2_num AS a, DATE_FORMAT(CURRENT_TIMESTAMP(), 'yyMMdd') AS day, ((deepNested_nested1_value + deepNested_nested2_num) + metadata_1) AS b])
+- Reused(reference_id=[1])

Sink(table=[default_catalog.default_database.sink2], fields=[a, b, d])
+- Calc(select=[a, name AS b, CAST(b AS BIGINT) AS d], where=[(b > 100)])
+- Calc(select=[deepNested.nested2.num AS a, deepNested.nested1.name AS b, CAST(((deepNested.nested1.value + deepNested.nested2.num) + metadata_1) AS BIGINT) AS d], where=[(((deepNested.nested1.value + deepNested.nested2.num) + metadata_1) > 100)])
+- Reused(reference_id=[1])
]]>
</Resource>
Expand Down
Loading