Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make sure to handle unsupported collations well #15134

Merged
merged 6 commits into from
Feb 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,8 @@ func (env *Environment) LookupByCharset(name string) *colldefaults {
func (env *Environment) LookupCharsetName(coll ID) string {
return env.byCharsetName[coll]
}

func (env *Environment) IsSupported(coll ID) bool {
_, supported := env.byID[coll]
return supported
}
2 changes: 1 addition & 1 deletion go/vt/vtgate/engine/aggregations.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (ap *AggregateParams) String() string {
if ap.WAssigned() {
keyCol = fmt.Sprintf("%s|%d", keyCol, ap.WCol)
}
if sqltypes.IsText(ap.Type.Type()) && ap.Type.Collation() != collations.Unknown {
if sqltypes.IsText(ap.Type.Type()) && ap.CollationEnv.IsSupported(ap.Type.Collation()) {
keyCol += " COLLATE " + ap.CollationEnv.LookupName(ap.Type.Collation())
}
dispOrigOp := ""
Expand Down
28 changes: 15 additions & 13 deletions go/vt/vtgate/executor_select_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,26 @@ import (
"testing"
"time"

_flag "vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/streamlog"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vtgate/logstats"

"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

_flag "vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/streamlog"
"vitess.io/vitess/go/test/utils"
"vitess.io/vitess/go/vt/discovery"
"vitess.io/vitess/go/vt/vterrors"
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/sandboxconn"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtgatepb "vitess.io/vitess/go/vt/proto/vtgate"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/topo/topoproto"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vterrors"
"vitess.io/vitess/go/vt/vtgate/logstats"
_ "vitess.io/vitess/go/vt/vtgate/vindexes"
"vitess.io/vitess/go/vt/vttablet/sandboxconn"
)

func TestSelectNext(t *testing.T) {
Expand Down Expand Up @@ -1982,6 +1980,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) {
Fields: []*querypb.Field{
{Name: "col1", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "textcol", Type: sqltypes.VarChar, Charset: uint32(collations.MySQL8().DefaultConnectionCharset())},
{Name: "weight_string(textcol)", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID},
},
InsertID: 0,
Rows: [][]sqltypes.Value{{
Expand All @@ -1990,6 +1989,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) {
// This will allow us to test that cross-shard ordering
// still works correctly.
sqltypes.NewVarChar(fmt.Sprintf("%d", i%4)),
sqltypes.NewVarBinary(fmt.Sprintf("%d", i%4)),
}},
}})
conns = append(conns, sbc)
Expand All @@ -2005,7 +2005,7 @@ func TestSelectScatterOrderByVarChar(t *testing.T) {
require.NoError(t, err)

wantQueries := []*querypb.BoundQuery{{
Sql: "select col1, textcol from `user` order by textcol desc",
Sql: "select col1, textcol, weight_string(textcol) from `user` order by textcol desc",
BindVariables: map[string]*querypb.BindVariable{},
}}
for _, conn := range conns {
Expand Down Expand Up @@ -2114,11 +2114,13 @@ func TestStreamSelectScatterOrderByVarChar(t *testing.T) {
Fields: []*querypb.Field{
{Name: "id", Type: sqltypes.Int32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "textcol", Type: sqltypes.VarChar, Charset: uint32(collations.MySQL8().DefaultConnectionCharset())},
{Name: "weight_string(textcol)", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID},
},
InsertID: 0,
Rows: [][]sqltypes.Value{{
sqltypes.NewInt32(1),
sqltypes.NewVarChar(fmt.Sprintf("%d", i%4)),
sqltypes.NewVarBinary(fmt.Sprintf("%d", i%4)),
}},
}})
conns = append(conns, sbc)
Expand All @@ -2131,7 +2133,7 @@ func TestStreamSelectScatterOrderByVarChar(t *testing.T) {
require.NoError(t, err)

wantQueries := []*querypb.BoundQuery{{
Sql: "select id, textcol from `user` order by textcol desc",
Sql: "select id, textcol, weight_string(textcol) from `user` order by textcol desc",
BindVariables: map[string]*querypb.BindVariable{},
}}
for _, conn := range conns {
Expand Down
6 changes: 2 additions & 4 deletions go/vt/vtgate/planbuilder/collations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ import (
"fmt"
"testing"

"vitess.io/vitess/go/test/vschemawrapper"
"vitess.io/vitess/go/vt/vtenv"

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/test/vschemawrapper"
"vitess.io/vitess/go/vt/vtenv"
"vitess.io/vitess/go/vt/vtgate/engine"
)

Expand Down Expand Up @@ -61,7 +60,6 @@ func (tc *collationTestCase) addCollationsToSchema(vschema *vschemawrapper.VSche
for i, c := range tbl.Columns {
if c.Name.EqualString(collation.colName) {
tbl.Columns[i].CollationName = collation.collationName
break
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/planbuilder/plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ func loadSchema(t testing.TB, filename string, setCollation bool) *vindexes.VSch
if setCollation {
for _, table := range ks.Tables {
for i, col := range table.Columns {
if sqltypes.IsText(col.Type) {
if sqltypes.IsText(col.Type) && col.CollationName == "" {
table.Columns[i].CollationName = "latin1_swedish_ci"
}
}
Expand Down
14 changes: 8 additions & 6 deletions go/vt/vtgate/planbuilder/testdata/aggr_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4840,7 +4840,8 @@
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Scalar",
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"Aggregates": "min(0 COLLATE latin1_swedish_ci) AS min(textcol1), max(1|4) AS max(textcol2), sum_distinct(2 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(3 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"ResultColumns": 4,
"Inputs": [
{
"OperatorType": "Route",
Expand All @@ -4849,9 +4850,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by textcol1",
"FieldQuery": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by textcol1, weight_string(textcol2)",
"OrderBy": "2 ASC COLLATE latin1_swedish_ci",
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by textcol1 order by textcol1 asc",
"Query": "select min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by textcol1, weight_string(textcol2) order by textcol1 asc",
"Table": "`user`"
}
]
Expand All @@ -4870,8 +4871,9 @@
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2 COLLATE latin1_swedish_ci) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"Aggregates": "min(1 COLLATE latin1_swedish_ci) AS min(textcol1), max(2|5) AS max(textcol2), sum_distinct(3 COLLATE latin1_swedish_ci) AS sum(distinct textcol1), count_distinct(4 COLLATE latin1_swedish_ci) AS count(distinct textcol1)",
"GroupBy": "0",
"ResultColumns": 5,
"Inputs": [
{
"OperatorType": "Route",
Expand All @@ -4880,9 +4882,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` where 1 != 1 group by col, textcol1",
"FieldQuery": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` where 1 != 1 group by col, textcol1, weight_string(textcol2)",
"OrderBy": "0 ASC, 3 ASC COLLATE latin1_swedish_ci",
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1 from `user` group by col, textcol1 order by col asc, textcol1 asc",
"Query": "select col, min(textcol1), max(textcol2), textcol1, textcol1, weight_string(textcol2) from `user` group by col, textcol1, weight_string(textcol2) order by col asc, textcol1 asc",
"Table": "`user`"
}
]
Expand Down
37 changes: 34 additions & 3 deletions go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@
"Name": "user",
"Sharded": true
},
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` where 1 != 1",
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, 3 ASC COLLATE latin1_swedish_ci",
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
"FieldQuery": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` where 1 != 1",
"OrderBy": "(0|4) ASC, 1 ASC COLLATE latin1_swedish_ci, (2|5) ASC, (3|6) ASC COLLATE ",
"Query": "select a, textcol1, b, textcol2, weight_string(a), weight_string(b), weight_string(textcol2) from `user` order by a asc, textcol1 asc, b asc, textcol2 asc",
"ResultColumns": 4,
"Table": "`user`"
},
Expand Down Expand Up @@ -2174,5 +2174,36 @@
"user.user"
]
}
},
{
"comment": "DISTINCT on an unsupported collation should fall back on weightstrings",
"query": "select distinct textcol2 from user",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct textcol2 from user",
"Instructions": {
"OperatorType": "Distinct",
"Collations": [
"(0:1): "
],
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select textcol2, weight_string(textcol2) from `user` where 1 != 1",
"Query": "select distinct textcol2, weight_string(textcol2) from `user`",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
}
]
3 changes: 2 additions & 1 deletion go/vt/vtgate/planbuilder/testdata/vschemas/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@
},
{
"name": "textcol2",
"type": "VARCHAR"
"type": "VARCHAR",
"collation_name": "big5_bin"
}
]
},
Expand Down
10 changes: 8 additions & 2 deletions go/vt/vtgate/semantics/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func Analyze(statement sqlparser.Statement, currentDb string, si SchemaInformati
}

// Creation of the semantic table
return analyzer.newSemTable(statement, si.ConnCollation(), si.GetForeignKeyChecksState())
return analyzer.newSemTable(statement, si.ConnCollation(), si.GetForeignKeyChecksState(), si.Environment().CollationEnv())
}

// AnalyzeStrict analyzes the parsed query, and fails the analysis for any possible errors
Expand All @@ -98,7 +98,12 @@ func AnalyzeStrict(statement sqlparser.Statement, currentDb string, si SchemaInf
return st, nil
}

func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID, fkChecksState *bool) (*SemTable, error) {
func (a *analyzer) newSemTable(
statement sqlparser.Statement,
coll collations.ID,
fkChecksState *bool,
env *collations.Environment,
) (*SemTable, error) {
var comments *sqlparser.ParsedComments
commentedStmt, isCommented := statement.(sqlparser.Commented)
if isCommented {
Expand Down Expand Up @@ -133,6 +138,7 @@ func (a *analyzer) newSemTable(statement sqlparser.Statement, coll collations.ID
childForeignKeysInvolved: childFks,
parentForeignKeysInvolved: parentFks,
childFkToUpdExprs: childFkToUpdExprs,
collEnv: env,
}, nil
}

Expand Down
9 changes: 7 additions & 2 deletions go/vt/vtgate/semantics/analyzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1630,8 +1630,13 @@ func fakeSchemaInfo() *FakeSI {
Name: sqlparser.NewIdentifierCI("uid"),
Type: querypb.Type_INT64,
}, {
Name: sqlparser.NewIdentifierCI("name"),
Type: querypb.Type_VARCHAR,
Name: sqlparser.NewIdentifierCI("name"),
Type: querypb.Type_VARCHAR,
CollationName: "utf8_bin",
}, {
Name: sqlparser.NewIdentifierCI("textcol"),
Type: querypb.Type_VARCHAR,
CollationName: "big5_bin",
}}

si := &FakeSI{
Expand Down
1 change: 0 additions & 1 deletion go/vt/vtgate/semantics/real_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ func vindexTableToColumnInfo(tbl *vindexes.Table, collationEnv *collations.Envir
nameMap := map[string]any{}
cols := make([]ColumnInfo, 0, len(tbl.Columns))
for _, col := range tbl.Columns {

cols = append(cols, ColumnInfo{
Name: col.Name.String(),
Type: col.ToEvalengineType(collationEnv),
Expand Down
8 changes: 7 additions & 1 deletion go/vt/vtgate/semantics/semantic_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ type (
childForeignKeysInvolved map[TableSet][]vindexes.ChildFKInfo
parentForeignKeysInvolved map[TableSet][]vindexes.ParentFKInfo
childFkToUpdExprs map[string]sqlparser.UpdateExprs
collEnv *collations.Environment
}

columnName struct {
Expand Down Expand Up @@ -640,7 +641,12 @@ func (st *SemTable) NeedsWeightString(e sqlparser.Expr) bool {
if !found {
return true
}
return typ.Collation() == collations.Unknown && !sqltypes.IsNumber(typ.Type())

if !sqltypes.IsText(typ.Type()) {
return false
}

return !st.collEnv.IsSupported(typ.Collation())
}
}

Expand Down
36 changes: 36 additions & 0 deletions go/vt/vtgate/semantics/typer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/mysql/collations/colldata"
querypb "vitess.io/vitess/go/vt/proto/query"
"vitess.io/vitess/go/vt/sqlparser"
)
Expand Down Expand Up @@ -54,5 +55,40 @@ func TestNormalizerAndSemanticAnalysisIntegration(t *testing.T) {
require.Equal(t, test.typ, typ.Type().String())
})
}
}

// Tests that the types correctly picks up and sets the collation on columns
func TestColumnCollations(t *testing.T) {
tests := []struct {
query, collation string
}{
{query: "select textcol from t2"},
{query: "select name from t2", collation: "utf8mb3_bin"},
}

for _, test := range tests {
t.Run(test.query, func(t *testing.T) {
parse, err := sqlparser.NewTestParser().Parse(test.query)
require.NoError(t, err)

err = sqlparser.Normalize(parse, sqlparser.NewReservedVars("bv", sqlparser.BindVars{}), map[string]*querypb.BindVariable{})
require.NoError(t, err)

st, err := Analyze(parse, "d", fakeSchemaInfo())
require.NoError(t, err)
col := extract(parse.(*sqlparser.Select), 0)
typ, found := st.TypeForExpr(col)
require.True(t, found, "column was not typed")

require.Equal(t, "VARCHAR", typ.Type().String())
collation := colldata.Lookup(typ.Collation())
if test.collation != "" {
collation := colldata.Lookup(typ.Collation())
require.NotNil(t, collation)
require.Equal(t, test.collation, collation.Name())
} else {
require.Nil(t, collation)
}
})
}
}
Loading
Loading