From 037927ea93eb7e3f0424b55d4c0941c6605ee74f Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 4 Nov 2024 10:53:49 +0200 Subject: [PATCH 1/4] Parse enum/set values with sqlparser Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schema/parser.go | 115 ++++++------------ go/vt/schema/parser_test.go | 109 ++++++----------- .../tabletserver/vstreamer/vstreamer.go | 23 ++-- 3 files changed, 86 insertions(+), 161 deletions(-) diff --git a/go/vt/schema/parser.go b/go/vt/schema/parser.go index 7f5e133e01c..86994e5e594 100644 --- a/go/vt/schema/parser.go +++ b/go/vt/schema/parser.go @@ -17,11 +17,14 @@ limitations under the License. package schema import ( + "fmt" "regexp" "strings" - "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vtenv" + "vitess.io/vitess/go/vt/vterrors" ) // NormalizedDDLQuery contains a query which is online-ddl -normalized @@ -49,9 +52,6 @@ var ( // ALTER TABLE tbl something regexp.MustCompile(alterTableBasicPattern + `([\S]+)\s+(.*$)`), } - - enumValuesRegexp = regexp.MustCompile("(?i)^enum[(](.*)[)]$") - setValuesRegexp = regexp.MustCompile("(?i)^set[(](.*)[)]$") ) // ParseAlterTableOptions parses a ALTER ... TABLE... statement into: @@ -77,91 +77,52 @@ func ParseAlterTableOptions(alterStatement string) (explicitSchema, explicitTabl return explicitSchema, explicitTable, alterOptions } -// ParseEnumValues parses the comma delimited part of an enum column definition -func ParseEnumValues(enumColumnType string) string { - if submatch := enumValuesRegexp.FindStringSubmatch(enumColumnType); len(submatch) > 0 { - return submatch[1] - } - return enumColumnType -} - -// ParseSetValues parses the comma delimited part of a set column definition -func ParseSetValues(setColumnType string) string { - if submatch := setValuesRegexp.FindStringSubmatch(setColumnType); len(submatch) > 0 { - return submatch[1] - } - return setColumnType -} - // parseEnumOrSetTokens parses the comma delimited part of an enum/set column definition and // returns the (unquoted) text values // Expected input: `'x-small','small','medium','large','x-large'` // Unexpected input: `enum('x-small','small','medium','large','x-large')` -func parseEnumOrSetTokens(enumOrSetValues string) []string { - // We need to track both the start of the current value and current - // position, since there might be quoted quotes inside the value - // which we need to handle. - start := 0 - pos := 1 - var tokens []string - for { - // If the input does not start with a quote, it's not a valid enum/set definition - if enumOrSetValues[start] != '\'' { - return nil - } - i := strings.IndexByte(enumOrSetValues[pos:], '\'') - // If there's no closing quote, we have invalid input - if i < 0 { - return nil - } - // We're at the end here of the last quoted value, - // so we add the last token and return them. - if i == len(enumOrSetValues[pos:])-1 { - tok, err := sqltypes.DecodeStringSQL(enumOrSetValues[start:]) - if err != nil { - return nil - } - tokens = append(tokens, tok) - return tokens - } - // MySQL double quotes things as escape value, so if we see another - // single quote, we skip the character and remove it from the input. - if enumOrSetValues[pos+i+1] == '\'' { - pos = pos + i + 2 - continue - } - // Next value needs to be a comma as a separator, otherwise - // the data is invalid so we return nil. - if enumOrSetValues[pos+i+1] != ',' { - return nil - } - // If we're at the end of the input here, it's invalid - // since we have a trailing comma which is not what MySQL - // returns. - if pos+i+1 == len(enumOrSetValues) { - return nil - } - - tok, err := sqltypes.DecodeStringSQL(enumOrSetValues[start : pos+i+1]) - if err != nil { - return nil - } - - tokens = append(tokens, tok) - // We add 2 to the position to skip the closing quote & comma - start = pos + i + 2 - pos = start + 1 +func parseEnumOrSetTokens(env *vtenv.Environment, enumOrSetValues string) ([]string, error) { + // sqlparser cannot directly parse enum/set values, so we create a dummy query to parse it. + dummyQuery := fmt.Sprintf("alter table t add column e enum(%s)", enumOrSetValues) + ddlStmt, err := env.Parser().ParseStrictDDL(dummyQuery) + if err != nil { + return nil, err + } + unexpectedError := func() error { + return vterrors.Errorf(vtrpc.Code_INTERNAL, "unexpected error parsing enum values: %v", enumOrSetValues) + } + alterTable, ok := ddlStmt.(*sqlparser.AlterTable) + if !ok { + return nil, unexpectedError() } + if len(alterTable.AlterOptions) != 1 { + return nil, unexpectedError() + } + addColumn, ok := alterTable.AlterOptions[0].(*sqlparser.AddColumns) + if !ok { + return nil, unexpectedError() + } + if len(addColumn.Columns) != 1 { + return nil, unexpectedError() + } + enumValues := addColumn.Columns[0].Type.EnumValues + for i := range enumValues { + enumValues[i] = strings.Trim(enumValues[i], "'") + } + return enumValues, nil } // ParseEnumOrSetTokensMap parses the comma delimited part of an enum column definition // and returns a map where [1] is the first token, and [] is the last. -func ParseEnumOrSetTokensMap(enumOrSetValues string) map[int]string { - tokens := parseEnumOrSetTokens(enumOrSetValues) +func ParseEnumOrSetTokensMap(env *vtenv.Environment, enumOrSetValues string) (map[int]string, error) { + tokens, err := parseEnumOrSetTokens(env, enumOrSetValues) + if err != nil { + return nil, err + } tokensMap := map[int]string{} for i, token := range tokens { // SET and ENUM values are 1 indexed. tokensMap[i+1] = token } - return tokensMap + return tokensMap, nil } diff --git a/go/vt/schema/parser_test.go b/go/vt/schema/parser_test.go index dc696fd3d1d..2a37605f53b 100644 --- a/go/vt/schema/parser_test.go +++ b/go/vt/schema/parser_test.go @@ -20,6 +20,9 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/vt/vtenv" ) func TestParseAlterTableOptions(t *testing.T) { @@ -48,108 +51,50 @@ func TestParseAlterTableOptions(t *testing.T) { } } -func TestParseEnumValues(t *testing.T) { - { - inputs := []string{ - `enum('x-small','small','medium','large','x-large')`, - `ENUM('x-small','small','medium','large','x-large')`, - `'x-small','small','medium','large','x-large'`, - } - for _, input := range inputs { - enumValues := ParseEnumValues(input) - assert.Equal(t, `'x-small','small','medium','large','x-large'`, enumValues) - } - } - { - inputs := []string{ - ``, - `abc`, - `func('x-small','small','medium','large','x-large')`, - `set('x-small','small','medium','large','x-large')`, - } - for _, input := range inputs { - enumValues := ParseEnumValues(input) - assert.Equal(t, input, enumValues) - } - } - - { - inputs := []string{ - ``, - `abc`, - `func('x small','small','medium','large','x large')`, - `set('x small','small','medium','large','x large')`, - } - for _, input := range inputs { - enumValues := ParseEnumValues(input) - assert.Equal(t, input, enumValues) - } - } -} - -func TestParseSetValues(t *testing.T) { - { - inputs := []string{ - `set('x-small','small','medium','large','x-large')`, - `SET('x-small','small','medium','large','x-large')`, - `'x-small','small','medium','large','x-large'`, - } - for _, input := range inputs { - setValues := ParseSetValues(input) - assert.Equal(t, `'x-small','small','medium','large','x-large'`, setValues) - } - } - { - inputs := []string{ - ``, - `abc`, - `func('x-small','small','medium','large','x-large')`, - `enum('x-small','small','medium','large','x-large')`, - `ENUM('x-small','small','medium','large','x-large')`, - } - for _, input := range inputs { - setValues := ParseSetValues(input) - assert.Equal(t, input, setValues) - } - } -} - func TestParseEnumTokens(t *testing.T) { + env := vtenv.NewTestEnv() { input := `'x-small','small','medium','large','x-large'` - enumTokens := parseEnumOrSetTokens(input) + enumTokens, err := parseEnumOrSetTokens(env, input) + require.NoError(t, err) expect := []string{"x-small", "small", "medium", "large", "x-large"} assert.Equal(t, expect, enumTokens) } { input := `'x small','small','medium','large','x large'` - enumTokens := parseEnumOrSetTokens(input) + enumTokens, err := parseEnumOrSetTokens(env, input) + require.NoError(t, err) expect := []string{"x small", "small", "medium", "large", "x large"} assert.Equal(t, expect, enumTokens) } { input := `'with '' quote','and \n newline'` - enumTokens := parseEnumOrSetTokens(input) + enumTokens, err := parseEnumOrSetTokens(env, input) + require.NoError(t, err) expect := []string{"with ' quote", "and \n newline"} assert.Equal(t, expect, enumTokens) } { input := `enum('x-small','small','medium','large','x-large')` - enumTokens := parseEnumOrSetTokens(input) + enumTokens, err := parseEnumOrSetTokens(env, input) + assert.Error(t, err) assert.Nil(t, enumTokens) } { input := `set('x-small','small','medium','large','x-large')` - enumTokens := parseEnumOrSetTokens(input) + enumTokens, err := parseEnumOrSetTokens(env, input) + assert.Error(t, err) assert.Nil(t, enumTokens) } } func TestParseEnumTokensMap(t *testing.T) { + env := vtenv.NewTestEnv() { input := `'x-small','small','medium','large','x-large'` - enumTokensMap := ParseEnumOrSetTokensMap(input) + enumTokensMap, err := ParseEnumOrSetTokensMap(env, input) + require.NoError(t, err) expect := map[int]string{ 1: "x-small", 2: "small", @@ -165,9 +110,23 @@ func TestParseEnumTokensMap(t *testing.T) { `set('x-small','small','medium','large','x-large')`, } for _, input := range inputs { - enumTokensMap := ParseEnumOrSetTokensMap(input) - expect := map[int]string{} - assert.Equal(t, expect, enumTokensMap) + enumTokensMap, err := ParseEnumOrSetTokensMap(env, input) + assert.Error(t, err) + assert.Nil(t, enumTokensMap) + } + } + { + input := `'x-small','small','med''ium','large','x-large'` + + enumTokensMap, err := ParseEnumOrSetTokensMap(env, input) + require.NoError(t, err) + expect := map[int]string{ + 1: "x-small", + 2: "small", + 3: "med\\'ium", + 4: "large", + 5: "x-large", } + assert.Equal(t, expect, enumTokensMap) } } diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go index e7453b2b70f..ea7f75cdc38 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go @@ -37,6 +37,7 @@ import ( "vitess.io/vitess/go/vt/logutil" vtschema "vitess.io/vitess/go/vt/schema" "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vtenv" "vitess.io/vitess/go/vt/vterrors" vttablet "vitess.io/vitess/go/vt/vttablet/common" "vitess.io/vitess/go/vt/vttablet/tabletserver/schema" @@ -783,7 +784,7 @@ func (vs *vstreamer) buildTablePlan(id uint64, tm *mysql.TableMap) (*binlogdatap vs.plans[id] = nil return nil, nil } - if err := addEnumAndSetMappingstoPlan(plan, cols, tm.Metadata); err != nil { + if err := addEnumAndSetMappingstoPlan(vs.se.Environment(), plan, cols, tm.Metadata); err != nil { return nil, vterrors.Wrapf(err, "failed to build ENUM and SET column integer to string mappings") } vs.plans[id] = &streamerPlan{ @@ -1097,13 +1098,13 @@ func (vs *vstreamer) extractRowAndFilter(plan *streamerPlan, data []byte, dataCo // Convert the integer values in the binlog event for any SET and ENUM fields into their // string representations. if plan.Table.Fields[colNum].Type == querypb.Type_ENUM || mysqlType == mysqlbinlog.TypeEnum { - value, err = buildEnumStringValue(plan, colNum, value) + value, err = buildEnumStringValue(vs.se.Environment(), plan, colNum, value) if err != nil { return false, nil, false, vterrors.Wrapf(err, "failed to perform ENUM column integer to string value mapping") } } if plan.Table.Fields[colNum].Type == querypb.Type_SET || mysqlType == mysqlbinlog.TypeSet { - value, err = buildSetStringValue(plan, colNum, value) + value, err = buildSetStringValue(vs.se.Environment(), plan, colNum, value) if err != nil { return false, nil, false, vterrors.Wrapf(err, "failed to perform SET column integer to string value mapping") } @@ -1120,7 +1121,7 @@ func (vs *vstreamer) extractRowAndFilter(plan *streamerPlan, data []byte, dataCo } // addEnumAndSetMappingstoPlan sets up any necessary ENUM and SET integer to string mappings. -func addEnumAndSetMappingstoPlan(plan *Plan, cols []*querypb.Field, metadata []uint16) error { +func addEnumAndSetMappingstoPlan(env *vtenv.Environment, plan *Plan, cols []*querypb.Field, metadata []uint16) error { plan.EnumSetValuesMap = make(map[int]map[int]string) for i, col := range cols { // If the column is a CHAR based type with a binary collation (e.g. utf8mb4_bin) then @@ -1139,21 +1140,25 @@ func addEnumAndSetMappingstoPlan(plan *Plan, cols []*querypb.Field, metadata []u return fmt.Errorf("enum or set column %s does not have valid string values: %s", col.Name, col.ColumnType) } - plan.EnumSetValuesMap[i] = vtschema.ParseEnumOrSetTokensMap(col.ColumnType[begin+1 : end]) + var err error + plan.EnumSetValuesMap[i], err = vtschema.ParseEnumOrSetTokensMap(env, col.ColumnType[begin+1:end]) + if err != nil { + return err + } } } return nil } // buildEnumStringValue takes the integer value of an ENUM column and returns the string value. -func buildEnumStringValue(plan *streamerPlan, colNum int, value sqltypes.Value) (sqltypes.Value, error) { +func buildEnumStringValue(env *vtenv.Environment, plan *streamerPlan, colNum int, value sqltypes.Value) (sqltypes.Value, error) { if value.IsNull() { // No work is needed return value, nil } // Add the mappings just-in-time in case we haven't properly received and processed a // table map event to initialize it. if plan.EnumSetValuesMap == nil { - if err := addEnumAndSetMappingstoPlan(plan.Plan, plan.Table.Fields, plan.TableMap.Metadata); err != nil { + if err := addEnumAndSetMappingstoPlan(env, plan.Plan, plan.Table.Fields, plan.TableMap.Metadata); err != nil { return sqltypes.Value{}, err } } @@ -1181,14 +1186,14 @@ func buildEnumStringValue(plan *streamerPlan, colNum int, value sqltypes.Value) } // buildSetStringValue takes the integer value of a SET column and returns the string value. -func buildSetStringValue(plan *streamerPlan, colNum int, value sqltypes.Value) (sqltypes.Value, error) { +func buildSetStringValue(env *vtenv.Environment, plan *streamerPlan, colNum int, value sqltypes.Value) (sqltypes.Value, error) { if value.IsNull() { // No work is needed return value, nil } // Add the mappings just-in-time in case we haven't properly received and processed a // table map event to initialize it. if plan.EnumSetValuesMap == nil { - if err := addEnumAndSetMappingstoPlan(plan.Plan, plan.Table.Fields, plan.TableMap.Metadata); err != nil { + if err := addEnumAndSetMappingstoPlan(env, plan.Plan, plan.Table.Fields, plan.TableMap.Metadata); err != nil { return sqltypes.Value{}, err } } From 4fce5ae6ab97e4be43b9f98dc22a7ead345eb18c Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:19:36 +0200 Subject: [PATCH 2/4] adapt test Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schema/parser_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/vt/schema/parser_test.go b/go/vt/schema/parser_test.go index 2a37605f53b..bd7f1b808c7 100644 --- a/go/vt/schema/parser_test.go +++ b/go/vt/schema/parser_test.go @@ -71,7 +71,7 @@ func TestParseEnumTokens(t *testing.T) { input := `'with '' quote','and \n newline'` enumTokens, err := parseEnumOrSetTokens(env, input) require.NoError(t, err) - expect := []string{"with ' quote", "and \n newline"} + expect := []string{"with \\' quote", "and \\n newline"} assert.Equal(t, expect, enumTokens) } { From 02b6c9b9190b1533ba4919eca78bd644dcaa93f4 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 6 Nov 2024 06:53:51 +0200 Subject: [PATCH 3/4] use sqltypes.DecodeStringSQL Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schema/parser.go | 7 ++++++- go/vt/schema/parser_test.go | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/go/vt/schema/parser.go b/go/vt/schema/parser.go index 86994e5e594..13fd6d43359 100644 --- a/go/vt/schema/parser.go +++ b/go/vt/schema/parser.go @@ -21,6 +21,7 @@ import ( "regexp" "strings" + "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vtenv" @@ -107,7 +108,11 @@ func parseEnumOrSetTokens(env *vtenv.Environment, enumOrSetValues string) ([]str } enumValues := addColumn.Columns[0].Type.EnumValues for i := range enumValues { - enumValues[i] = strings.Trim(enumValues[i], "'") + val, err := sqltypes.DecodeStringSQL(enumValues[i]) + if err != nil { + return nil, err + } + enumValues[i] = val } return enumValues, nil } diff --git a/go/vt/schema/parser_test.go b/go/vt/schema/parser_test.go index bd7f1b808c7..e70bcfa68c5 100644 --- a/go/vt/schema/parser_test.go +++ b/go/vt/schema/parser_test.go @@ -71,7 +71,7 @@ func TestParseEnumTokens(t *testing.T) { input := `'with '' quote','and \n newline'` enumTokens, err := parseEnumOrSetTokens(env, input) require.NoError(t, err) - expect := []string{"with \\' quote", "and \\n newline"} + expect := []string{"with ' quote", "and \n newline"} assert.Equal(t, expect, enumTokens) } { @@ -123,7 +123,7 @@ func TestParseEnumTokensMap(t *testing.T) { expect := map[int]string{ 1: "x-small", 2: "small", - 3: "med\\'ium", + 3: "med'ium", 4: "large", 5: "x-large", } From 270c0d554d88b8c65b1056c02f93f8ed76f1942f Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Wed, 6 Nov 2024 07:17:06 +0200 Subject: [PATCH 4/4] create a new slice, just a matter of preference Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- go/vt/schema/parser.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go/vt/schema/parser.go b/go/vt/schema/parser.go index 13fd6d43359..5d5bbc868ff 100644 --- a/go/vt/schema/parser.go +++ b/go/vt/schema/parser.go @@ -107,14 +107,15 @@ func parseEnumOrSetTokens(env *vtenv.Environment, enumOrSetValues string) ([]str return nil, unexpectedError() } enumValues := addColumn.Columns[0].Type.EnumValues + decodedEnumValues := make([]string, len(enumValues)) for i := range enumValues { val, err := sqltypes.DecodeStringSQL(enumValues[i]) if err != nil { return nil, err } - enumValues[i] = val + decodedEnumValues[i] = val } - return enumValues, nil + return decodedEnumValues, nil } // ParseEnumOrSetTokensMap parses the comma delimited part of an enum column definition