Skip to content

Commit

Permalink
make sure to handle unsupported collations well (vitessio#15134)
Browse files Browse the repository at this point in the history
Signed-off-by: Andres Taylor <[email protected]>
  • Loading branch information
systay authored and frouioui committed Feb 6, 2024
1 parent 2586e28 commit f2a117f
Show file tree
Hide file tree
Showing 14 changed files with 135 additions and 47 deletions.
5 changes: 5 additions & 0 deletions go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,8 @@ func (env *Environment) LookupByCharset(name string) *colldefaults {
func (env *Environment) LookupCharsetName(coll ID) string {
return env.byCharsetName[coll]
}

func (env *Environment) IsSupported(coll ID) bool {
_, supported := env.byID[coll]
return supported
}
2 changes: 1 addition & 1 deletion go/vt/vtgate/engine/aggregations.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (ap *AggregateParams) String() string {
if ap.WAssigned() {
keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol)
}
if sqltypes.IsText(ap.Type.Type()) && ap.Type.Collation() != collations.Unknown {
if sqltypes.IsText(ap.Type.Type()) && ap.CollationEnv.IsSupported(ap.Type.Collation()) {
keyCol += " COLLATE " + ap.CollationEnv.LookupName(ap.Type.Collation())
}
dispOrigOp := ""
Expand Down
28 changes: 15 additions & 13 deletions go/vt/vtgate/executor_select_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,26 @@ import (
"testing"
"time"

_flag "vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/streamlog"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vtgate/logstats"

"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

_flag "vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/streamlog"
"vitess.io/vitess/go/test/utils"
"vitess.io/vitess/go/vt/discovery"
"vitess.io/vitess/go/vt/vterrors"
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/sandboxconn"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtgatepb "vitess.io/vitess/go/vt/proto/vtgate"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vtgate/logstats"
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/sandboxconn"
)

func TestSelectNext(t *testing.T) {
Expand Down Expand Up @@ -1982,6 +1980,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) {
Fields: []*querypb.Field{
{Name: "col1", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "textcol", Type: sqltypes.VarChar, Charset: uint32(collations.MySQL8().DefaultConnectionCharset())},
{Name: "weight_string(textcol)", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID},
},
InsertID: 0,
Rows: [][]sqltypes.Value{{
Expand All @@ -1990,6 +1989,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) {
// This will allow us to test that cross-shard ordering
// still works correctly.
sqltypes.NewVarChar(fmt.Sprintf("%d", i%4)),
sqltypes.NewVarBinary(fmt.Sprintf("%d", i%4)),
}},
}})
conns = append(conns, sbc)
Expand All @@ -2005,7 +2005,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) {
require.NoError(t, err)

wantQueries := []*querypb.BoundQuery{{
Sql: "select col1, textcol from `user` order by textcol desc",
Sql: "select col1, textcol, weight_string(textcol) from `user` order by textcol desc",
BindVariables: map[string]*querypb.BindVariable{},
}}
for _, conn := range conns {
Expand Down Expand Up @@ -2114,11 +2114,13 @@ func TestStreamSelectScatterOrderByVarChar(t *testing.T) {
Fields: []*querypb.Field{
{Name: "id", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "textcol", Type: sqltypes.VarChar, Charset: uint32(collations.MySQL8().DefaultConnectionCharset())},
{Name: "weight_string(textcol)", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID},
},
InsertID: 0,
Rows: [][]sqltypes.Value{{
sqltypes.NewInt32(1),
sqltypes.NewVarChar(fmt.Sprintf("%d", i%4)),
sqltypes.NewVarBinary(fmt.Sprintf("%d", i%4)),
}},
}})
conns = append(conns, sbc)
Expand All @@ -2131,7 +2133,7 @@ func TestStreamSelectScatterOrderByVarChar(t *testing.T) {
require.NoError(t, err)

wantQueries := []*querypb.BoundQuery{{
Sql: "select id, textcol from `user` order by textcol desc",
Sql: "select id, textcol, weight_string(textcol) from `user` order by textcol desc",
BindVariables: map[string]*querypb.BindVariable{},
}}
for _, conn := range conns {
Expand Down
6 changes: 2 additions & 4 deletions go/vt/vtgate/planbuilder/collations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ import (
"fmt"
"testing"

"vitess.io/vitess/go/test/vschemawrapper"
"vitess.io/vitess/go/vt/vtenv"

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/test/vschemawrapper"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vtgate/engine"
)

Expand Down Expand Up @@ -61,7 +60,6 @@ func (tc *collationTestCase) addCollationsToSchema(vschema *vschemawrapper.VSche
for i, c := range tbl.Columns {
if c.Name.EqualString(collation.colName) {
tbl.Columns[i].CollationName = collation.collationName
break
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/planbuilder/plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ func loadSchema(t testing.TB, filename string, setCollation bool) *vindexes.VSch
if setCollation {
for _, table := range ks.Tables {
for i, col := range table.Columns {
if sqltypes.IsText(col.Type) {
if sqltypes.IsText(col.Type) && col.CollationName == "" {
table.Columns[i].CollationName = "latin1_swedish_ci"
}
}
Expand Down
14 changes: 8 additions & 6 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4840,7 +4840,8 @@
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1|4) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"ResultColumns": 4,
"Inputs": [
{
"OperatorType": "Route",
Expand All @@ -4849,9 +4850,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by textcol1",
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by textcol1, weight_string(textcol2)",
"OrderBy": "2 ASC COLLATE latin1_swedish_ci",
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by textcol1 order by textcol1 asc",
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by textcol1, weight_string(textcol2) order by textcol1 asc",
"Table": "`user`"
}
]
Expand All @@ -4870,8 +4871,9 @@
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2|5) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"GroupBy": "0",
"ResultColumns": 5,
"Inputs": [
{
"OperatorType": "Route",
Expand All @@ -4880,9 +4882,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by col, textcol1",
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by col, textcol1, weight_string(textcol2)",
"OrderBy": "0 ASC, 3 ASC COLLATE latin1_swedish_ci",
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by col, textcol1 order by col asc, textcol1 asc",
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by col, textcol1, weight_string(textcol2) order by col asc, textcol1 asc",
"Table": "`user`"
}
]
Expand Down
37 changes: 34 additions & 3 deletions go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` where 1 != 1",
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, 3 ASC COLLATE latin1_swedish_ci",
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` where 1 != 1",
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, (3|6) ASC COLLATE ",
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
"ResultColumns": 4,
"Table": "`user`"
},
Expand Down Expand Up @@ -2174,5 +2174,36 @@
"user.user"
]
}
},
{
"comment": "DISTINCT on an unsupported collation should fall back on weightstrings",
"query": "select distinct textcol2 from user",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct textcol2 from user",
"Instructions": {
"OperatorType": "Distinct",
"Collations": [
"(0:1): "
],
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select textcol2, weight_string(textcol2) from `user` where 1 != 1",
"Query": "select distinct textcol2, weight_string(textcol2) from `user`",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
}
]
3 changes: 2 additions & 1 deletion go/vt/vtgate/planbuilder/testdata/vschemas/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@
},
{
"name": "textcol2",
"type": "VARCHAR"
"type": "VARCHAR",
"collation_name": "big5_bin"
}
]
},
Expand Down
10 changes: 8 additions & 2 deletions go/vt/vtgate/semantics/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func Analyze(statement sqlparser.Statement, currentDb string, si SchemaInformati
}

// Creation of the semantic table
return analyzer.newSemTable(statement, si.ConnCollation(), si.GetForeignKeyChecksState())
return analyzer.newSemTable(statement, si.ConnCollation(), si.GetForeignKeyChecksState(), si.Environment().CollationEnv())
}

// AnalyzeStrict analyzes the parsed query, and fails the analysis for any possible errors
Expand All @@ -98,7 +98,12 @@ func AnalyzeStrict(statement sqlparser.Statement, currentDb string, si SchemaInf
return st, nil
}

func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID, fkChecksState *bool) (*SemTable, error) {
func (a *analyzer) newSemTable(
statement sqlparser.Statement,
coll collations.ID,
fkChecksState *bool,
env *collations.Environment,
) (*SemTable, error) {
var comments *sqlparser.ParsedComments
commentedStmt, isCommented := statement.(sqlparser.Commented)
if isCommented {
Expand Down Expand Up @@ -133,6 +138,7 @@ func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID
childForeignKeysInvolved: childFks,
parentForeignKeysInvolved: parentFks,
childFkToUpdExprs: childFkToUpdExprs,
collEnv: env,
}, nil
}

Expand Down
9 changes: 7 additions & 2 deletions go/vt/vtgate/semantics/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1630,8 +1630,13 @@ func fakeSchemaInfo() *FakeSI {
Name: sqlparser.NewIdentifierCI("uid"),
Type: querypb.Type_INT64,
}, {
Name: sqlparser.NewIdentifierCI("name"),
Type: querypb.Type_VARCHAR,
Name: sqlparser.NewIdentifierCI("name"),
Type: querypb.Type_VARCHAR,
CollationName: "utf8_bin",
}, {
Name: sqlparser.NewIdentifierCI("textcol"),
Type: querypb.Type_VARCHAR,
CollationName: "big5_bin",
}}

si := &FakeSI{
Expand Down
1 change: 0 additions & 1 deletion go/vt/vtgate/semantics/real_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Envir
nameMap := map[string]any{}
cols := make([]ColumnInfo, 0, len(tbl.Columns))
for _, col := range tbl.Columns {

cols = append(cols, ColumnInfo{
Name: col.Name.String(),
Type: col.ToEvalengineType(collationEnv),
Expand Down
8 changes: 7 additions & 1 deletion go/vt/vtgate/semantics/semantic_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ type (
childForeignKeysInvolved map[TableSet][]vindexes.ChildFKInfo
parentForeignKeysInvolved map[TableSet][]vindexes.ParentFKInfo
childFkToUpdExprs map[string]sqlparser.UpdateExprs
collEnv *collations.Environment
}

columnName struct {
Expand Down Expand Up @@ -640,7 +641,12 @@ func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool {
if !found {
return true
}
return typ.Collation() == collations.Unknown && !sqltypes.IsNumber(typ.Type())

if !sqltypes.IsText(typ.Type()) {
return false
}

return !st.collEnv.IsSupported(typ.Collation())
}
}

Expand Down
36 changes: 36 additions & 0 deletions go/vt/vtgate/semantics/typer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql/collations/colldata"
querypb "vitess.io/vitess/go/vt/proto/query"
"vitess.io/vitess/go/vt/sqlparser"
)
Expand Down Expand Up @@ -54,5 +55,40 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) {
require.Equal(t, test.typ, typ.Type().String())
})
}
}

// Tests that the types correctly picks up and sets the collation on columns
func TestColumnCollations(t *testing.T) {
tests := []struct {
query, collation string
}{
{query: "select textcol from t2"},
{query: "select name from t2", collation: "utf8mb3_bin"},
}

for _, test := range tests {
t.Run(test.query, func(t *testing.T) {
parse, err := sqlparser.NewTestParser().Parse(test.query)
require.NoError(t, err)

err = sqlparser.Normalize(parse, sqlparser.NewReservedVars("bv", sqlparser.BindVars{}), map[string]*querypb.BindVariable{})
require.NoError(t, err)

st, err := Analyze(parse, "d", fakeSchemaInfo())
require.NoError(t, err)
col := extract(parse.(*sqlparser.Select), 0)
typ, found := st.TypeForExpr(col)
require.True(t, found, "column was not typed")

require.Equal(t, "VARCHAR", typ.Type().String())
collation := colldata.Lookup(typ.Collation())
if test.collation != "" {
collation := colldata.Lookup(typ.Collation())
require.NotNil(t, collation)
require.Equal(t, test.collation, collation.Name())
} else {
require.Nil(t, collation)
}
})
}
}
Loading

0 comments on commit f2a117f

Please sign in to comment.