From 9814fd5dba2028a056d21b8711d6039657f60b36 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 5 Feb 2024 11:17:09 +0100 Subject: [PATCH 1/6] make sure to handle unsupported collations well Signed-off-by: Andres Taylor --- go/vt/vtgate/semantics/analyzer_test.go | 9 ++++-- go/vt/vtgate/semantics/derived_table.go | 4 +-- go/vt/vtgate/semantics/early_rewriter.go | 18 ++++++++++-- go/vt/vtgate/semantics/real_table.go | 22 ++++++++++----- go/vt/vtgate/semantics/semantic_state.go | 2 +- go/vt/vtgate/semantics/typer_test.go | 35 ++++++++++++++++++++++++ go/vt/vtgate/semantics/vindex_table.go | 2 +- go/vt/vtgate/semantics/vtable.go | 4 +-- go/vt/vtgate/vindexes/vschema.go | 10 +++---- 9 files changed, 83 insertions(+), 23 deletions(-) diff --git a/go/vt/vtgate/semantics/analyzer_test.go b/go/vt/vtgate/semantics/analyzer_test.go index 1df1ec7fd87..0257aad8a72 100644 --- a/go/vt/vtgate/semantics/analyzer_test.go +++ b/go/vt/vtgate/semantics/analyzer_test.go @@ -1630,8 +1630,13 @@ func fakeSchemaInfo() *FakeSI { Name: sqlparser.NewIdentifierCI("uid"), Type: querypb.Type_INT64, }, { - Name: sqlparser.NewIdentifierCI("name"), - Type: querypb.Type_VARCHAR, + Name: sqlparser.NewIdentifierCI("name"), + Type: querypb.Type_VARCHAR, + CollationName: "utf8_bin", + }, { + Name: sqlparser.NewIdentifierCI("textcol"), + Type: querypb.Type_VARCHAR, + CollationName: "big5_bin", }} si := &FakeSI{ diff --git a/go/vt/vtgate/semantics/derived_table.go b/go/vt/vtgate/semantics/derived_table.go index 0425d78ed93..4fe1c2b163d 100644 --- a/go/vt/vtgate/semantics/derived_table.go +++ b/go/vt/vtgate/semantics/derived_table.go @@ -154,14 +154,14 @@ func (dt *DerivedTable) GetVindexTable() *vindexes.Table { return nil } -func (dt *DerivedTable) getColumns() []ColumnInfo { +func (dt *DerivedTable) getColumns() ([]ColumnInfo, error) { cols := make([]ColumnInfo, 0, len(dt.columnNames)) for _, col := range dt.columnNames { cols = append(cols, ColumnInfo{ Name: col, }) } - return cols + return cols, nil } func (dt *DerivedTable) hasStar() bool { diff --git a/go/vt/vtgate/semantics/early_rewriter.go b/go/vt/vtgate/semantics/early_rewriter.go index 08d432ae9d0..510a26b93f1 100644 --- a/go/vt/vtgate/semantics/early_rewriter.go +++ b/go/vt/vtgate/semantics/early_rewriter.go @@ -680,7 +680,11 @@ func findOnlyOneTableInfoThatHasColumn(b *binder, tbl sqlparser.TableExpr, colum case *sqlparser.AliasedTableExpr: ts := b.tc.tableSetFor(tbl) tblInfo := b.tc.Tables[ts.TableOffset()] - for _, info := range tblInfo.getColumns() { + columns, err := tblInfo.getColumns() + if err != nil { + return nil, err + } + for _, info := range columns { if column.EqualString(info.Name) { return []TableInfo{tblInfo}, nil } @@ -835,9 +839,13 @@ func (e *expanderState) processColumnsFor(tbl TableInfo) error { From: https://dev.mysql.com/doc/refman/8.0/en/join.html */ + columns, err := tbl.getColumns() + if err != nil { + return err + } outer: // in this first loop we just find columns used in any JOIN USING used on this table - for _, col := range tbl.getColumns() { + for _, col := range columns { if col.Invisible { continue } @@ -856,7 +864,11 @@ outer: } // and this time around we are printing any columns not involved in any JOIN USING - for _, col := range tbl.getColumns() { + columns, err = tbl.getColumns() + if err != nil { + return err + } + for _, col := range columns { if col.Invisible { continue } diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index 72549b98e8c..e50f2c2a67f 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -40,7 +40,11 @@ var _ TableInfo = (*RealTable)(nil) // dependencies implements the TableInfo interface func (r *RealTable) dependencies(colName string, org originable) (dependencies, error) { ts := org.tableSetFor(r.ASTNode) - for _, info := range r.getColumns() { + columns, err := r.getColumns() + if err != nil { + return nil, err + } + for _, info := range columns { if strings.EqualFold(info.Name, colName) { return createCertain(ts, ts, info.Type), nil } @@ -68,7 +72,7 @@ func (r *RealTable) IsInfSchema() bool { } // GetColumns implements the TableInfo interface -func (r *RealTable) getColumns() []ColumnInfo { +func (r *RealTable) getColumns() ([]ColumnInfo, error) { return vindexTableToColumnInfo(r.Table, r.collationEnv) } @@ -117,24 +121,28 @@ func (r *RealTable) matches(name sqlparser.TableName) bool { return (name.Qualifier.IsEmpty() || name.Qualifier.String() == r.dbName) && r.tableName == name.Name.String() } -func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Environment) []ColumnInfo { +func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Environment) ([]ColumnInfo, error) { if tbl == nil { - return nil + return nil, nil } nameMap := map[string]any{} cols := make([]ColumnInfo, 0, len(tbl.Columns)) for _, col := range tbl.Columns { + tt, err := col.ToEvalengineType(collationEnv) + if err != nil { + return nil, err + } cols = append(cols, ColumnInfo{ Name: col.Name.String(), - Type: col.ToEvalengineType(collationEnv), + Type: tt, Invisible: col.Invisible, }) nameMap[col.Name.String()] = nil } // If table is authoritative, we do not need ColumnVindexes to help in resolving the unqualified columns. if tbl.ColumnListAuthoritative { - return cols + return cols, nil } for _, vindex := range tbl.ColumnVindexes { for _, column := range vindex.Columns { @@ -148,5 +156,5 @@ func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Envir nameMap[name] = nil } } - return cols + return cols, nil } diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go index 9067e77dd88..0ba47876896 100644 --- a/go/vt/vtgate/semantics/semantic_state.go +++ b/go/vt/vtgate/semantics/semantic_state.go @@ -58,7 +58,7 @@ type ( canShortCut() shortCut // getColumns returns the known column information for this table - getColumns() []ColumnInfo + getColumns() ([]ColumnInfo, error) dependencies(colName string, org originable) (dependencies, error) getExprFor(s string) (sqlparser.Expr, error) diff --git a/go/vt/vtgate/semantics/typer_test.go b/go/vt/vtgate/semantics/typer_test.go index c87d5672dab..c071dc7e112 100644 --- a/go/vt/vtgate/semantics/typer_test.go +++ b/go/vt/vtgate/semantics/typer_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/require" + "vitess.io/vitess/go/mysql/collations/colldata" querypb "vitess.io/vitess/go/vt/proto/query" "vitess.io/vitess/go/vt/sqlparser" ) @@ -54,5 +55,39 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) { require.Equal(t, test.typ, typ.Type().String()) }) } +} + +func TestColumnCollations(t *testing.T) { + tests := []struct { + query, collation string + }{ + {query: "select textcol from t2"}, + {query: "select name from t2", collation: "utf8mb3_bin"}, + } + + for _, test := range tests { + t.Run(test.query, func(t *testing.T) { + parse, err := sqlparser.NewTestParser().Parse(test.query) + require.NoError(t, err) + err = sqlparser.Normalize(parse, sqlparser.NewReservedVars("bv", sqlparser.BindVars{}), map[string]*querypb.BindVariable{}) + require.NoError(t, err) + + st, err := Analyze(parse, "d", fakeSchemaInfo()) + require.NoError(t, err) + col := parse.(*sqlparser.Select).SelectExprs[0].(*sqlparser.AliasedExpr).Expr + typ, found := st.TypeForExpr(col) + require.True(t, found, "column was not typed") + + require.Equal(t, "VARCHAR", typ.Type().String()) + collation := colldata.Lookup(typ.Collation()) + if test.collation != "" { + collation := colldata.Lookup(typ.Collation()) + require.NotNil(t, collation) + require.Equal(t, test.collation, collation.Name()) + } else { + require.Nil(t, collation) + } + }) + } } diff --git a/go/vt/vtgate/semantics/vindex_table.go b/go/vt/vtgate/semantics/vindex_table.go index fba8f8ab9a0..945ad440b45 100644 --- a/go/vt/vtgate/semantics/vindex_table.go +++ b/go/vt/vtgate/semantics/vindex_table.go @@ -76,7 +76,7 @@ func (v *VindexTable) canShortCut() shortCut { } // GetColumns implements the TableInfo interface -func (v *VindexTable) getColumns() []ColumnInfo { +func (v *VindexTable) getColumns() ([]ColumnInfo, error) { return v.Table.getColumns() } diff --git a/go/vt/vtgate/semantics/vtable.go b/go/vt/vtgate/semantics/vtable.go index 133e38ff505..b4f49965f64 100644 --- a/go/vt/vtgate/semantics/vtable.go +++ b/go/vt/vtgate/semantics/vtable.go @@ -83,14 +83,14 @@ func (v *vTableInfo) GetVindexTable() *vindexes.Table { return nil } -func (v *vTableInfo) getColumns() []ColumnInfo { +func (v *vTableInfo) getColumns() ([]ColumnInfo, error) { cols := make([]ColumnInfo, 0, len(v.columnNames)) for _, col := range v.columnNames { cols = append(cols, ColumnInfo{ Name: col, }) } - return cols + return cols, nil } func (v *vTableInfo) hasStar() bool { diff --git a/go/vt/vtgate/vindexes/vschema.go b/go/vt/vtgate/vindexes/vschema.go index e1044d0136d..82a7db02215 100644 --- a/go/vt/vtgate/vindexes/vschema.go +++ b/go/vt/vtgate/vindexes/vschema.go @@ -219,15 +219,15 @@ func (col *Column) MarshalJSON() ([]byte, error) { return json.Marshal(cj) } -func (col *Column) ToEvalengineType(collationEnv *collations.Environment) evalengine.Type { +func (col *Column) ToEvalengineType(collationEnv *collations.Environment) (evalengine.Type, error) { collation := collations.CollationForType(col.Type, collationEnv.DefaultConnectionCharset()) if sqltypes.IsText(col.Type) { - coll, found := collationEnv.LookupID(col.CollationName) - if found { - collation = coll + collation, _ = collationEnv.LookupID(col.CollationName) + if collation == collations.Unknown { + return evalengine.Type{}, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown collation name %q", col.CollationName) } } - return evalengine.NewTypeEx(col.Type, collation, col.Nullable, col.Size, col.Scale) + return evalengine.NewTypeEx(col.Type, collation, col.Nullable, col.Size, col.Scale), nil } // KeyspaceSchema contains the schema(table) for a keyspace. From 63e36fc47683755973e1f163cf697811bebe32aa Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 5 Feb 2024 11:43:18 +0100 Subject: [PATCH 2/6] bug: make sure to handle unsupported collations in the planner Signed-off-by: Andres Taylor --- go/mysql/collations/env.go | 5 +++ go/vt/vtgate/engine/aggregations.go | 2 +- go/vt/vtgate/planbuilder/plan_test.go | 2 +- .../planbuilder/testdata/aggr_cases.json | 14 ++++--- .../testdata/postprocess_cases.json | 37 +++++++++++++++++-- .../planbuilder/testdata/vschemas/schema.json | 3 +- go/vt/vtgate/semantics/analyzer.go | 10 ++++- go/vt/vtgate/semantics/semantic_state.go | 8 +++- 8 files changed, 66 insertions(+), 15 deletions(-) diff --git a/go/mysql/collations/env.go b/go/mysql/collations/env.go index 9fe87230649..ae5419a5797 100644 --- a/go/mysql/collations/env.go +++ b/go/mysql/collations/env.go @@ -301,3 +301,8 @@ func (env *Environment) LookupByCharset(name string) *colldefaults { func (env *Environment) LookupCharsetName(coll ID) string { return env.byCharsetName[coll] } + +func (env *Environment) IsSupported(coll ID) bool { + _, supported := env.byID[coll] + return supported +} diff --git a/go/vt/vtgate/engine/aggregations.go b/go/vt/vtgate/engine/aggregations.go index 96e8cc294a9..3413234c84f 100644 --- a/go/vt/vtgate/engine/aggregations.go +++ b/go/vt/vtgate/engine/aggregations.go @@ -76,7 +76,7 @@ func (ap *AggregateParams) String() string { if ap.WAssigned() { keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol) } - if sqltypes.IsText(ap.Type.Type()) && ap.Type.Collation() != collations.Unknown { + if sqltypes.IsText(ap.Type.Type()) && ap.CollationEnv.IsSupported(ap.Type.Collation()) { keyCol += " COLLATE " + ap.CollationEnv.LookupName(ap.Type.Collation()) } dispOrigOp := "" diff --git a/go/vt/vtgate/planbuilder/plan_test.go b/go/vt/vtgate/planbuilder/plan_test.go index 1a09aa555ec..3c484b6a664 100644 --- a/go/vt/vtgate/planbuilder/plan_test.go +++ b/go/vt/vtgate/planbuilder/plan_test.go @@ -586,7 +586,7 @@ func loadSchema(t testing.TB, filename string, setCollation bool) *vindexes.VSch if setCollation { for _, table := range ks.Tables { for i, col := range table.Columns { - if sqltypes.IsText(col.Type) { + if sqltypes.IsText(col.Type) && col.CollationName == "" { table.Columns[i].CollationName = "latin1_swedish_ci" } } diff --git a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json index 4a1c8fa1559..64764abc802 100644 --- a/go/vt/vtgate/planbuilder/testdata/aggr_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/aggr_cases.json @@ -4840,7 +4840,8 @@ "Instructions": { "OperatorType": "Aggregate", "Variant": "Scalar", - "Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", + "Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1|4) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", + "ResultColumns": 4, "Inputs": [ { "OperatorType": "Route", @@ -4849,9 +4850,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by textcol1", + "FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by textcol1, weight_string(textcol2)", "OrderBy": "2 ASC COLLATE latin1_swedish_ci", - "Query": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by textcol1 order by textcol1 asc", + "Query": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by textcol1, weight_string(textcol2) order by textcol1 asc", "Table": "`user`" } ] @@ -4870,8 +4871,9 @@ "Instructions": { "OperatorType": "Aggregate", "Variant": "Ordered", - "Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", + "Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2|5) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)", "GroupBy": "0", + "ResultColumns": 5, "Inputs": [ { "OperatorType": "Route", @@ -4880,9 +4882,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by col, textcol1", + "FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by col, textcol1, weight_string(textcol2)", "OrderBy": "0 ASC, 3 ASC COLLATE latin1_swedish_ci", - "Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by col, textcol1 order by col asc, textcol1 asc", + "Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by col, textcol1, weight_string(textcol2) order by col asc, textcol1 asc", "Table": "`user`" } ] diff --git a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json index 0ec62ed2574..6697da92c23 100644 --- a/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/postprocess_cases.json @@ -342,9 +342,9 @@ "Name": "user", "Sharded": true }, - "FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` where 1 != 1", - "OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, 3 ASC COLLATE latin1_swedish_ci", - "Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc", + "FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` where 1 != 1", + "OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, (3|6) ASC COLLATE ", + "Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc", "ResultColumns": 4, "Table": "`user`" }, @@ -2174,5 +2174,36 @@ "user.user" ] } + }, + { + "comment": "DISTINCT on an unsupported collation should fall back on weightstrings", + "query": "select distinct textcol2 from user", + "plan": { + "QueryType": "SELECT", + "Original": "select distinct textcol2 from user", + "Instructions": { + "OperatorType": "Distinct", + "Collations": [ + "(0:1): " + ], + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "Scatter", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select textcol2, weight_string(textcol2) from `user` where 1 != 1", + "Query": "select distinct textcol2, weight_string(textcol2) from `user`", + "Table": "`user`" + } + ] + }, + "TablesUsed": [ + "user.user" + ] + } } ] diff --git a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json index 3579205ba70..70f55c62f1f 100644 --- a/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json +++ b/go/vt/vtgate/planbuilder/testdata/vschemas/schema.json @@ -219,7 +219,8 @@ }, { "name": "textcol2", - "type": "VARCHAR" + "type": "VARCHAR", + "collation_name": "big5_bin" } ] }, diff --git a/go/vt/vtgate/semantics/analyzer.go b/go/vt/vtgate/semantics/analyzer.go index 546cdf76186..3c7470026d2 100644 --- a/go/vt/vtgate/semantics/analyzer.go +++ b/go/vt/vtgate/semantics/analyzer.go @@ -78,7 +78,7 @@ func Analyze(statement sqlparser.Statement, currentDb string, si SchemaInformati } // Creation of the semantic table - return analyzer.newSemTable(statement, si.ConnCollation(), si.GetForeignKeyChecksState()) + return analyzer.newSemTable(statement, si.ConnCollation(), si.GetForeignKeyChecksState(), si.Environment().CollationEnv()) } // AnalyzeStrict analyzes the parsed query, and fails the analysis for any possible errors @@ -98,7 +98,12 @@ func AnalyzeStrict(statement sqlparser.Statement, currentDb string, si SchemaInf return st, nil } -func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID, fkChecksState *bool) (*SemTable, error) { +func (a *analyzer) newSemTable( + statement sqlparser.Statement, + coll collations.ID, + fkChecksState *bool, + env *collations.Environment, +) (*SemTable, error) { var comments *sqlparser.ParsedComments commentedStmt, isCommented := statement.(sqlparser.Commented) if isCommented { @@ -133,6 +138,7 @@ func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID childForeignKeysInvolved: childFks, parentForeignKeysInvolved: parentFks, childFkToUpdExprs: childFkToUpdExprs, + collEnv: env, }, nil } diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go index 0ba47876896..cd2c666633c 100644 --- a/go/vt/vtgate/semantics/semantic_state.go +++ b/go/vt/vtgate/semantics/semantic_state.go @@ -143,6 +143,7 @@ type ( childForeignKeysInvolved map[TableSet][]vindexes.ChildFKInfo parentForeignKeysInvolved map[TableSet][]vindexes.ParentFKInfo childFkToUpdExprs map[string]sqlparser.UpdateExprs + collEnv *collations.Environment } columnName struct { @@ -640,7 +641,12 @@ func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool { if !found { return true } - return typ.Collation() == collations.Unknown && !sqltypes.IsNumber(typ.Type()) + + if !sqltypes.IsText(typ.Type()) { + return false + } + + return !st.collEnv.IsSupported(typ.Collation()) } } From c635264b13f9f6a45a74b33069601f6403233df5 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 5 Feb 2024 12:07:58 +0100 Subject: [PATCH 3/6] test: set a default collation for all text columns Signed-off-by: Andres Taylor --- go/vt/vtgate/planbuilder/collations_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/go/vt/vtgate/planbuilder/collations_test.go b/go/vt/vtgate/planbuilder/collations_test.go index 8e90bdf5fcc..60ce8bfd612 100644 --- a/go/vt/vtgate/planbuilder/collations_test.go +++ b/go/vt/vtgate/planbuilder/collations_test.go @@ -20,12 +20,12 @@ import ( "fmt" "testing" - "vitess.io/vitess/go/test/vschemawrapper" - "vitess.io/vitess/go/vt/vtenv" - "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/test/vschemawrapper" + "vitess.io/vitess/go/vt/vtenv" "vitess.io/vitess/go/vt/vtgate/engine" ) @@ -61,7 +61,8 @@ func (tc *collationTestCase) addCollationsToSchema(vschema *vschemawrapper.VSche for i, c := range tbl.Columns { if c.Name.EqualString(collation.colName) { tbl.Columns[i].CollationName = collation.collationName - break + } else if c.CollationName == "" && sqltypes.IsText(c.Type) { + tbl.Columns[i].CollationName = "latin1_swedish_ci" } } } From 3e5d494991972150a56ed3fd5097681282da506a Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 5 Feb 2024 13:09:55 +0100 Subject: [PATCH 4/6] bug: should not fail on unknown collations Signed-off-by: Andres Taylor --- go/vt/vtgate/semantics/derived_table.go | 4 ++-- go/vt/vtgate/semantics/early_rewriter.go | 18 +++--------------- go/vt/vtgate/semantics/real_table.go | 22 +++++++--------------- go/vt/vtgate/semantics/semantic_state.go | 2 +- go/vt/vtgate/semantics/typer_test.go | 3 ++- go/vt/vtgate/semantics/vindex_table.go | 2 +- go/vt/vtgate/semantics/vtable.go | 4 ++-- go/vt/vtgate/vindexes/vschema.go | 7 ++----- 8 files changed, 20 insertions(+), 42 deletions(-) diff --git a/go/vt/vtgate/semantics/derived_table.go b/go/vt/vtgate/semantics/derived_table.go index 4fe1c2b163d..0425d78ed93 100644 --- a/go/vt/vtgate/semantics/derived_table.go +++ b/go/vt/vtgate/semantics/derived_table.go @@ -154,14 +154,14 @@ func (dt *DerivedTable) GetVindexTable() *vindexes.Table { return nil } -func (dt *DerivedTable) getColumns() ([]ColumnInfo, error) { +func (dt *DerivedTable) getColumns() []ColumnInfo { cols := make([]ColumnInfo, 0, len(dt.columnNames)) for _, col := range dt.columnNames { cols = append(cols, ColumnInfo{ Name: col, }) } - return cols, nil + return cols } func (dt *DerivedTable) hasStar() bool { diff --git a/go/vt/vtgate/semantics/early_rewriter.go b/go/vt/vtgate/semantics/early_rewriter.go index 510a26b93f1..08d432ae9d0 100644 --- a/go/vt/vtgate/semantics/early_rewriter.go +++ b/go/vt/vtgate/semantics/early_rewriter.go @@ -680,11 +680,7 @@ func findOnlyOneTableInfoThatHasColumn(b *binder, tbl sqlparser.TableExpr, colum case *sqlparser.AliasedTableExpr: ts := b.tc.tableSetFor(tbl) tblInfo := b.tc.Tables[ts.TableOffset()] - columns, err := tblInfo.getColumns() - if err != nil { - return nil, err - } - for _, info := range columns { + for _, info := range tblInfo.getColumns() { if column.EqualString(info.Name) { return []TableInfo{tblInfo}, nil } @@ -839,13 +835,9 @@ func (e *expanderState) processColumnsFor(tbl TableInfo) error { From: https://dev.mysql.com/doc/refman/8.0/en/join.html */ - columns, err := tbl.getColumns() - if err != nil { - return err - } outer: // in this first loop we just find columns used in any JOIN USING used on this table - for _, col := range columns { + for _, col := range tbl.getColumns() { if col.Invisible { continue } @@ -864,11 +856,7 @@ outer: } // and this time around we are printing any columns not involved in any JOIN USING - columns, err = tbl.getColumns() - if err != nil { - return err - } - for _, col := range columns { + for _, col := range tbl.getColumns() { if col.Invisible { continue } diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index e50f2c2a67f..e05a098f629 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -40,11 +40,7 @@ var _ TableInfo = (*RealTable)(nil) // dependencies implements the TableInfo interface func (r *RealTable) dependencies(colName string, org originable) (dependencies, error) { ts := org.tableSetFor(r.ASTNode) - columns, err := r.getColumns() - if err != nil { - return nil, err - } - for _, info := range columns { + for _, info := range r.getColumns() { if strings.EqualFold(info.Name, colName) { return createCertain(ts, ts, info.Type), nil } @@ -72,7 +68,7 @@ func (r *RealTable) IsInfSchema() bool { } // GetColumns implements the TableInfo interface -func (r *RealTable) getColumns() ([]ColumnInfo, error) { +func (r *RealTable) getColumns() []ColumnInfo { return vindexTableToColumnInfo(r.Table, r.collationEnv) } @@ -121,18 +117,14 @@ func (r *RealTable) matches(name sqlparser.TableName) bool { return (name.Qualifier.IsEmpty() || name.Qualifier.String() == r.dbName) && r.tableName == name.Name.String() } -func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Environment) ([]ColumnInfo, error) { +func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Environment) []ColumnInfo { if tbl == nil { - return nil, nil + return nil } nameMap := map[string]any{} cols := make([]ColumnInfo, 0, len(tbl.Columns)) for _, col := range tbl.Columns { - tt, err := col.ToEvalengineType(collationEnv) - if err != nil { - return nil, err - } - + tt := col.ToEvalengineType(collationEnv) cols = append(cols, ColumnInfo{ Name: col.Name.String(), Type: tt, @@ -142,7 +134,7 @@ func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Envir } // If table is authoritative, we do not need ColumnVindexes to help in resolving the unqualified columns. if tbl.ColumnListAuthoritative { - return cols, nil + return cols } for _, vindex := range tbl.ColumnVindexes { for _, column := range vindex.Columns { @@ -156,5 +148,5 @@ func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Envir nameMap[name] = nil } } - return cols, nil + return cols } diff --git a/go/vt/vtgate/semantics/semantic_state.go b/go/vt/vtgate/semantics/semantic_state.go index cd2c666633c..7327fed9d67 100644 --- a/go/vt/vtgate/semantics/semantic_state.go +++ b/go/vt/vtgate/semantics/semantic_state.go @@ -58,7 +58,7 @@ type ( canShortCut() shortCut // getColumns returns the known column information for this table - getColumns() ([]ColumnInfo, error) + getColumns() []ColumnInfo dependencies(colName string, org originable) (dependencies, error) getExprFor(s string) (sqlparser.Expr, error) diff --git a/go/vt/vtgate/semantics/typer_test.go b/go/vt/vtgate/semantics/typer_test.go index c071dc7e112..7de5ecf1340 100644 --- a/go/vt/vtgate/semantics/typer_test.go +++ b/go/vt/vtgate/semantics/typer_test.go @@ -57,6 +57,7 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) { } } +// Tests that the types correctly picks up and sets the collation on columns func TestColumnCollations(t *testing.T) { tests := []struct { query, collation string @@ -75,7 +76,7 @@ func TestColumnCollations(t *testing.T) { st, err := Analyze(parse, "d", fakeSchemaInfo()) require.NoError(t, err) - col := parse.(*sqlparser.Select).SelectExprs[0].(*sqlparser.AliasedExpr).Expr + col := extract(parse.(*sqlparser.Select), 0) typ, found := st.TypeForExpr(col) require.True(t, found, "column was not typed") diff --git a/go/vt/vtgate/semantics/vindex_table.go b/go/vt/vtgate/semantics/vindex_table.go index 945ad440b45..fba8f8ab9a0 100644 --- a/go/vt/vtgate/semantics/vindex_table.go +++ b/go/vt/vtgate/semantics/vindex_table.go @@ -76,7 +76,7 @@ func (v *VindexTable) canShortCut() shortCut { } // GetColumns implements the TableInfo interface -func (v *VindexTable) getColumns() ([]ColumnInfo, error) { +func (v *VindexTable) getColumns() []ColumnInfo { return v.Table.getColumns() } diff --git a/go/vt/vtgate/semantics/vtable.go b/go/vt/vtgate/semantics/vtable.go index b4f49965f64..133e38ff505 100644 --- a/go/vt/vtgate/semantics/vtable.go +++ b/go/vt/vtgate/semantics/vtable.go @@ -83,14 +83,14 @@ func (v *vTableInfo) GetVindexTable() *vindexes.Table { return nil } -func (v *vTableInfo) getColumns() ([]ColumnInfo, error) { +func (v *vTableInfo) getColumns() []ColumnInfo { cols := make([]ColumnInfo, 0, len(v.columnNames)) for _, col := range v.columnNames { cols = append(cols, ColumnInfo{ Name: col, }) } - return cols, nil + return cols } func (v *vTableInfo) hasStar() bool { diff --git a/go/vt/vtgate/vindexes/vschema.go b/go/vt/vtgate/vindexes/vschema.go index 82a7db02215..5227c64483c 100644 --- a/go/vt/vtgate/vindexes/vschema.go +++ b/go/vt/vtgate/vindexes/vschema.go @@ -219,15 +219,12 @@ func (col *Column) MarshalJSON() ([]byte, error) { return json.Marshal(cj) } -func (col *Column) ToEvalengineType(collationEnv *collations.Environment) (evalengine.Type, error) { +func (col *Column) ToEvalengineType(collationEnv *collations.Environment) evalengine.Type { collation := collations.CollationForType(col.Type, collationEnv.DefaultConnectionCharset()) if sqltypes.IsText(col.Type) { collation, _ = collationEnv.LookupID(col.CollationName) - if collation == collations.Unknown { - return evalengine.Type{}, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unknown collation name %q", col.CollationName) - } } - return evalengine.NewTypeEx(col.Type, collation, col.Nullable, col.Size, col.Scale), nil + return evalengine.NewTypeEx(col.Type, collation, col.Nullable, col.Size, col.Scale) } // KeyspaceSchema contains the schema(table) for a keyspace. From db7a62c325c20bcb9bdc11277b65ea53b107bbd7 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Mon, 5 Feb 2024 13:24:50 +0100 Subject: [PATCH 5/6] refactor: minor cleanups Signed-off-by: Andres Taylor --- go/vt/vtgate/planbuilder/collations_test.go | 3 --- go/vt/vtgate/semantics/real_table.go | 3 +-- go/vt/vtgate/vindexes/vschema.go | 16 ++++++++-------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/go/vt/vtgate/planbuilder/collations_test.go b/go/vt/vtgate/planbuilder/collations_test.go index 60ce8bfd612..b393e186679 100644 --- a/go/vt/vtgate/planbuilder/collations_test.go +++ b/go/vt/vtgate/planbuilder/collations_test.go @@ -23,7 +23,6 @@ import ( "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql/collations" - "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/test/vschemawrapper" "vitess.io/vitess/go/vt/vtenv" "vitess.io/vitess/go/vt/vtgate/engine" @@ -61,8 +60,6 @@ func (tc *collationTestCase) addCollationsToSchema(vschema *vschemawrapper.VSche for i, c := range tbl.Columns { if c.Name.EqualString(collation.colName) { tbl.Columns[i].CollationName = collation.collationName - } else if c.CollationName == "" && sqltypes.IsText(c.Type) { - tbl.Columns[i].CollationName = "latin1_swedish_ci" } } } diff --git a/go/vt/vtgate/semantics/real_table.go b/go/vt/vtgate/semantics/real_table.go index e05a098f629..d12df6acfa9 100644 --- a/go/vt/vtgate/semantics/real_table.go +++ b/go/vt/vtgate/semantics/real_table.go @@ -124,10 +124,9 @@ func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Envir nameMap := map[string]any{} cols := make([]ColumnInfo, 0, len(tbl.Columns)) for _, col := range tbl.Columns { - tt := col.ToEvalengineType(collationEnv) cols = append(cols, ColumnInfo{ Name: col.Name.String(), - Type: tt, + Type: col.ToEvalengineType(collationEnv), Invisible: col.Invisible, }) nameMap[col.Name.String()] = nil diff --git a/go/vt/vtgate/vindexes/vschema.go b/go/vt/vtgate/vindexes/vschema.go index 5227c64483c..8559cabc447 100644 --- a/go/vt/vtgate/vindexes/vschema.go +++ b/go/vt/vtgate/vindexes/vschema.go @@ -25,19 +25,17 @@ import ( "strings" "time" + "vitess.io/vitess/go/json2" "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqlescape" - vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" - "vitess.io/vitess/go/vt/vterrors" - "vitess.io/vitess/go/vt/vtgate/evalengine" - - "vitess.io/vitess/go/json2" "vitess.io/vitess/go/sqltypes" - "vitess.io/vitess/go/vt/sqlparser" - querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" vschemapb "vitess.io/vitess/go/vt/proto/vschema" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vtgate/evalengine" ) // TabletTypeSuffix maps the tablet type to its suffix string. @@ -220,9 +218,11 @@ func (col *Column) MarshalJSON() ([]byte, error) { } func (col *Column) ToEvalengineType(collationEnv *collations.Environment) evalengine.Type { - collation := collations.CollationForType(col.Type, collationEnv.DefaultConnectionCharset()) + var collation collations.ID if sqltypes.IsText(col.Type) { collation, _ = collationEnv.LookupID(col.CollationName) + } else { + collation = collations.CollationForType(col.Type, collationEnv.DefaultConnectionCharset()) } return evalengine.NewTypeEx(col.Type, collation, col.Nullable, col.Size, col.Scale) } From 341d8721e58225ca2dad819cff71df144e8cc6ec Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Tue, 6 Feb 2024 07:28:22 +0100 Subject: [PATCH 6/6] test: fix test expectations Signed-off-by: Andres Taylor --- go/vt/vtgate/executor_select_test.go | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/go/vt/vtgate/executor_select_test.go b/go/vt/vtgate/executor_select_test.go index 9603b5da5bc..dc4f1e0e726 100644 --- a/go/vt/vtgate/executor_select_test.go +++ b/go/vt/vtgate/executor_select_test.go @@ -26,28 +26,26 @@ import ( "testing" "time" - _flag "vitess.io/vitess/go/internal/flag" - "vitess.io/vitess/go/mysql/collations" - "vitess.io/vitess/go/streamlog" - "vitess.io/vitess/go/vt/topo/topoproto" - "vitess.io/vitess/go/vt/vtenv" - "vitess.io/vitess/go/vt/vtgate/logstats" - "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + _flag "vitess.io/vitess/go/internal/flag" + "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/streamlog" "vitess.io/vitess/go/test/utils" "vitess.io/vitess/go/vt/discovery" - "vitess.io/vitess/go/vt/vterrors" - _ "vitess.io/vitess/go/vt/vtgate/vindexes" - "vitess.io/vitess/go/vt/vttablet/sandboxconn" - querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" vtgatepb "vitess.io/vitess/go/vt/proto/vtgate" vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/topo/topoproto" + "vitess.io/vitess/go/vt/vtenv" + "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vtgate/logstats" + _ "vitess.io/vitess/go/vt/vtgate/vindexes" + "vitess.io/vitess/go/vt/vttablet/sandboxconn" ) func TestSelectNext(t *testing.T) { @@ -1982,6 +1980,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) { Fields: []*querypb.Field{ {Name: "col1", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)}, {Name: "textcol", Type: sqltypes.VarChar, Charset: uint32(collations.MySQL8().DefaultConnectionCharset())}, + {Name: "weight_string(textcol)", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID}, }, InsertID: 0, Rows: [][]sqltypes.Value{{ @@ -1990,6 +1989,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) { // This will allow us to test that cross-shard ordering // still works correctly. sqltypes.NewVarChar(fmt.Sprintf("%d", i%4)), + sqltypes.NewVarBinary(fmt.Sprintf("%d", i%4)), }}, }}) conns = append(conns, sbc) @@ -2005,7 +2005,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) { require.NoError(t, err) wantQueries := []*querypb.BoundQuery{{ - Sql: "select col1, textcol from `user` order by textcol desc", + Sql: "select col1, textcol, weight_string(textcol) from `user` order by textcol desc", BindVariables: map[string]*querypb.BindVariable{}, }} for _, conn := range conns { @@ -2114,11 +2114,13 @@ func TestStreamSelectScatterOrderByVarChar(t *testing.T) { Fields: []*querypb.Field{ {Name: "id", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)}, {Name: "textcol", Type: sqltypes.VarChar, Charset: uint32(collations.MySQL8().DefaultConnectionCharset())}, + {Name: "weight_string(textcol)", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID}, }, InsertID: 0, Rows: [][]sqltypes.Value{{ sqltypes.NewInt32(1), sqltypes.NewVarChar(fmt.Sprintf("%d", i%4)), + sqltypes.NewVarBinary(fmt.Sprintf("%d", i%4)), }}, }}) conns = append(conns, sbc) @@ -2131,7 +2133,7 @@ func TestStreamSelectScatterOrderByVarChar(t *testing.T) { require.NoError(t, err) wantQueries := []*querypb.BoundQuery{{ - Sql: "select id, textcol from `user` order by textcol desc", + Sql: "select id, textcol, weight_string(textcol) from `user` order by textcol desc", BindVariables: map[string]*querypb.BindVariable{}, }} for _, conn := range conns {