risingwavelabs · BugenZhao · Jan 17, 2025 · Jan 17, 2025 · Jan 20, 2025 · Jan 22, 2025
diff --git a/e2e_test/source_inline/connection/ddl.slt b/e2e_test/source_inline/connection/ddl.slt
@@ -81,7 +81,7 @@ create sink sink_kafka from data_table with (
 
 sleep 3s
 
-query IT rowsort
+query IT rowsort retry 3 backoff 5s
 select a, b from t1;
 ----
 1 a

diff --git a/e2e_test/source_inline/kafka/avro/alter_table.slt b/e2e_test/source_inline/kafka/avro/alter_table.slt
@@ -128,18 +128,6 @@ Caused by these errors (recent errors listed first):
   4: Item not found: Invalid column: bar
 
 
-# Can't drop non-generated column
-# TODO(purify): may support it.
-statement error
-ALTER TABLE t DROP COLUMN foo;
-----
-db error: ERROR: Failed to run the query
-
-Caused by:
-  Not supported: alter table with schema registry
-HINT: try `ALTER TABLE .. FORMAT .. ENCODE .. (...)` instead
-
-
 # Drop generated column
 statement ok
 ALTER TABLE t DROP COLUMN gen_col;

diff --git a/e2e_test/source_inline/kafka/avro/partial_schema.slt b/e2e_test/source_inline/kafka/avro/partial_schema.slt
@@ -0,0 +1,199 @@
+control substitution on
+
+# cleanup
+system ok
+rpk topic delete 'avro_partial_schema_test' || true; \
+(rpk sr subject delete 'avro_partial_schema_test-value' && rpk sr subject delete 'avro_partial_schema_test-value' --permanent) || true;
+
+# create topic and sr subject
+system ok
+rpk topic create 'avro_partial_schema_test'
+
+# create a schema
+system ok
+sr_register avro_partial_schema_test-value AVRO <<< '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}]}'
+
+# Specify schema
+statement ok
+create table t1 (foo varchar, bar int)
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_partial_schema_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+
+# Specify partial schema
+statement ok
+create table t2 (bar int, gen_col int as bar + 1)
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_partial_schema_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+
+# Specify incorrect schema
+statement error
+create table t (bar int, foo varchar, baz int)
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_partial_schema_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Protocol error: Column "baz" is defined in SQL but not found in the source
+
+
+statement error
+create table t (bar double)
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_partial_schema_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Protocol error: Data type mismatch for column "bar". Defined in SQL as "double precision", but found in the source as "integer"
+
+
+# Resolve schema
+statement ok
+create table tstar (*, gen_col int as bar + 1)
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_partial_schema_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+
+# No wildcard will be interpreted as `*` for syntax backward compatibility
+statement ok
+create table tstar2 (gen_col int as bar + 1)
+WITH (
+    ${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
+    topic = 'avro_partial_schema_test'
+)
+FORMAT PLAIN ENCODE AVRO (
+    schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
+);
+
+
+# Demonstrate purified definition
+query TT rowsort
+SELECT name, SUBSTRING(definition, 1, POSITION(' WITH' IN definition) - 1) FROM rw_tables WHERE name LIKE 't%';
+----
+t1	    CREATE TABLE t1 (foo CHARACTER VARYING, bar INT)
+t2	    CREATE TABLE t2 (bar INT, gen_col INT AS bar + 1)
+tstar	CREATE TABLE tstar (bar INT, foo CHARACTER VARYING, gen_col INT AS bar + 1)
+tstar2	CREATE TABLE tstar2 (bar INT, foo CHARACTER VARYING, gen_col INT AS bar + 1)
+
+# create a new schema
+system ok
+sr_register avro_partial_schema_test-value AVRO <<< '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}, {"name":"baz", "type":"double", "default":0}]}'
+
+# Can perform `[ADD | DROP] COLUMN` no matter whether the schema is from resolved
+# However, the schema will be checked against the resolved schema
+statement ok
+alter table t1 drop column foo;
+
+statement error
+alter table t2 add column baz int;
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Protocol error: Data type mismatch for column "baz". Defined in SQL as "integer", but found in the source as "double precision"
+
+
+statement ok
+alter table t2 add column baz double;
+
+statement error
+alter table t2 add column bbaazz double;
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Protocol error: Column "bbaazz" is defined in SQL but not found in the source
+
+
+statement ok
+alter table tstar drop column foo;
+
+statement error
+alter table tstar add column baz int;
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Protocol error: Data type mismatch for column "baz". Defined in SQL as "integer", but found in the source as "double precision"
+
+
+statement ok
+alter table tstar add column baz double;
+
+statement error
+alter table tstar add column bbaazz double;
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  Protocol error: Column "bbaazz" is defined in SQL but not found in the source
+
+
+# Demonstrate purified definition
+query TT rowsort
+SELECT name, SUBSTRING(definition, 1, POSITION(' WITH' IN definition) - 1) FROM rw_tables WHERE name LIKE 't%';
+----
+t1	    CREATE TABLE t1 (bar INT)
+t2	    CREATE TABLE t2 (bar INT, gen_col INT AS bar + 1, baz DOUBLE)
+tstar	CREATE TABLE tstar (bar INT, gen_col INT AS bar + 1, baz DOUBLE)
+tstar2	CREATE TABLE tstar2 (bar INT, foo CHARACTER VARYING, gen_col INT AS bar + 1)
+
+# Can refresh schema no matter whether the schema is from resolved
+statement ok
+alter table t1 refresh schema;
+
+statement ok
+alter table t2 refresh schema;
+
+statement ok
+alter table tstar refresh schema;
+
+statement ok
+alter table tstar2 refresh schema;
+
+# Demonstrate purified definition
+query TT rowsort
+SELECT name, SUBSTRING(definition, 1, POSITION(' WITH' IN definition) - 1) FROM rw_tables WHERE name LIKE 't%';
+----
+t1	    CREATE TABLE t1 (bar INT, foo CHARACTER VARYING, baz DOUBLE)
+t2	    CREATE TABLE t2 (bar INT, foo CHARACTER VARYING, baz DOUBLE, gen_col INT AS bar + 1)
+tstar	CREATE TABLE tstar (bar INT, foo CHARACTER VARYING, baz DOUBLE, gen_col INT AS bar + 1)
+tstar2	CREATE TABLE tstar2 (bar INT, foo CHARACTER VARYING, baz DOUBLE, gen_col INT AS bar + 1)
+
+# Cleanup
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;
+
+statement ok
+DROP TABLE tstar;
+
+statement ok
+DROP TABLE tstar2;
diff --git a/src/frontend/src/handler/alter_table_column.rs b/src/frontend/src/handler/alter_table_column.rs
@@ -28,7 +28,7 @@ use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::{ProjectNode, StreamFragmentGraph};
 use risingwave_sqlparser::ast::{AlterTableOperation, ColumnOption, ObjectName, Statement};
 
-use super::create_source::{schema_has_schema_registry, SqlColumnStrategy};
+use super::create_source::SqlColumnStrategy;
 use super::create_table::{generate_stream_graph_for_replace_table, ColumnIdGenerator};
 use super::util::SourceSchemaCompatExt;
 use super::{HandlerArgs, RwPgResponse};
@@ -260,33 +260,10 @@ pub async fn handle_alter_table_column(
 
     // Retrieve the original table definition and parse it to AST.
     let mut definition = original_catalog.create_sql_ast_purified()?;
-    let Statement::CreateTable {
-        columns,
-        format_encode,
-        ..
-    } = &mut definition
-    else {
+    let Statement::CreateTable { columns, .. } = &mut definition else {
         panic!("unexpected statement: {:?}", definition);
     };
 
-    let format_encode = format_encode
-        .clone()
-        .map(|format_encode| format_encode.into_v2_with_warning());
-
-    let fail_if_has_schema_registry = || {
-        if let Some(format_encode) = &format_encode
-            && schema_has_schema_registry(format_encode)
-        {
-            // TODO(purify): we may support this.
-            Err(ErrorCode::NotSupported(
-                "alter table with schema registry".to_owned(),
-                "try `ALTER TABLE .. FORMAT .. ENCODE .. (...)` instead".to_owned(),
-            ))
-        } else {
-            Ok(())
-        }
-    };
-
     if !original_catalog.incoming_sinks.is_empty()
         && matches!(operation, AlterTableOperation::DropColumn { .. })
     {
@@ -295,12 +272,27 @@ pub async fn handle_alter_table_column(
         ))?;
     }
 
-    match operation {
+    // The `sql_column_strategy` will be `FollowChecked` if the operation is `AddColumn`, and
+    // `FollowUnchecked` if the operation is `DropColumn`.
+    //
+    // Consider the following example:
+    // - There was a column `foo` and a generated column `gen` that references `foo`.
+    // - The external schema is updated to remove `foo`.
+    // - The user tries to drop `foo` from the table.
+    //
+    // Dropping `foo` directly will fail because `gen` references `foo`. However, dropping `gen`
+    // first will also be rejected because `foo` does not exist any more. Also, executing
+    // `REFRESH SCHEMA` will not help because it keeps the generated column. The user gets stuck.
+    //
+    // `FollowUnchecked` workarounds this issue. There are also some alternatives:
+    // - Allow dropping multiple columns at once.
+    // - Check against the persisted schema, instead of resolving again.
+    //
+    // Applied only to tables with schema registry.
+    let sql_column_strategy = match operation {
         AlterTableOperation::AddColumn {
             column_def: new_column,
         } => {
-            fail_if_has_schema_registry()?;
-
             // Duplicated names can actually be checked by `StreamMaterialize`. We do here for
             // better error reporting.
             let new_column_name = new_column.name.real_value();
@@ -325,6 +317,8 @@ pub async fn handle_alter_table_column(
 
             // Add the new column to the table definition if it is not created by `create table (*)` syntax.
             columns.push(new_column);
+
+            SqlColumnStrategy::FollowChecked
         }
 
         AlterTableOperation::DropColumn {
@@ -338,10 +332,6 @@ pub async fn handle_alter_table_column(
 
             // Check if the column to drop is referenced by any generated columns.
             for column in original_catalog.columns() {
-                if column_name.real_value() == column.name() && !column.is_generated() {
-                    fail_if_has_schema_registry()?;
-                }
-
                 if let Some(expr) = column.generated_expr() {
                     let expr = ExprImpl::from_expr_proto(expr)?;
                     let refs = expr.collect_input_refs(original_catalog.columns().len());
@@ -381,17 +371,18 @@ pub async fn handle_alter_table_column(
                     column_name, table_name
                 )))?
             }
+
+            SqlColumnStrategy::FollowUnchecked
         }
 
         _ => unreachable!(),
     };
-
     let (source, table, graph, col_index_mapping, job_type) = get_replace_table_plan(
         &session,
         table_name,
         definition,
         &original_catalog,
-        SqlColumnStrategy::Follow,
+        sql_column_strategy,
     )
     .await?;
 

diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
@@ -558,7 +558,7 @@ pub(crate) async fn reparse_table_for_sink(
         None,
         include_column_options,
         engine,
-        SqlColumnStrategy::Follow,
+        SqlColumnStrategy::FollowUnchecked,
     )
     .await?;