Skip to content

Commit

Permalink
evalengine: Support built-in MySQL function for string functions and …
Browse files Browse the repository at this point in the history
…operations (#11185)

* add the support for LOWER function

Signed-off-by: Weijun-H <[email protected]>

* add the support for LCASE

Signed-off-by: Weijun-H <[email protected]>

* update tests for LCASE

Signed-off-by: Weijun-H <[email protected]>

* used DefaultCollation instead of CollationUtf8mb4ID

Signed-off-by: Weijun-H <[email protected]>

* add the support for Char_Length and CHARACTER_LENGTH

Signed-off-by: Weijun-H <[email protected]>

* add the support forLENGTH  andOCTET_R_LENGTH

Signed-off-by: Weijun-H <[email protected]>

* add the support for UPPER and UCASE

Signed-off-by: Weijun-H <[email protected]>

* simple try for lower function

Signed-off-by: Weijun-H <[email protected]>

* upate tolower function, but met the output problem

Signed-off-by: Weijun-H <[email protected]>

* update tests for different collations

Signed-off-by: Weijun-H <[email protected]>

* update support for LCASE, UPPER and UCASE

Signed-off-by: Weijun-H <[email protected]>

* Hide the ToLower and ToUpper implementation in Collation

Signed-off-by: Weijun-H <[email protected]>

* refactor lower and upper functions

Signed-off-by: Weijun-H <[email protected]>

* naive implementation of CHAR_LENGTH in 8bit collation

Signed-off-by: Weijun-H <[email protected]>

* implement char_length for uca and unicode, but met panic problem

Signed-off-by: Weijun-H <[email protected]>

* complete CharLen for different Charset

Signed-off-by: Weijun-H <[email protected]>

* update CharLen support for utf16 and utf32

Signed-off-by: Weijun-H <[email protected]>

* update support for CHARACTER_LENGTH

Signed-off-by: Weijun-H <[email protected]>

* refactor the code to avoid vague duplication

Signed-off-by: Weijun-H <[email protected]>

* add support for BIT_LENGTH

Signed-off-by: Weijun-H <[email protected]>

* add support for ASCII

Signed-off-by: Weijun-H <[email protected]>

* add the support for REPEAT but met strange MYSQL result

Signed-off-by: Weijun-H <[email protected]>

* update REPEAT test case

Signed-off-by: Weijun-H <[email protected]>

* evalengine: cleanup logic in string functions

Signed-off-by: Vicent Marti <[email protected]>

Signed-off-by: Weijun-H <[email protected]>
Signed-off-by: Vicent Marti <[email protected]>
Co-authored-by: Vicent Marti <[email protected]>
  • Loading branch information
Weijun-H and vmg authored Oct 18, 2022
1 parent 83543b8 commit 460e0f1
Show file tree
Hide file tree
Showing 17 changed files with 2,156 additions and 162 deletions.
54 changes: 50 additions & 4 deletions go/mysql/collations/8bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ type simpletables struct {
// take up a lot of binary space.
// Uncomment these fields and pass `-full8bit` to `makemysqldata` to generate
// these tables.
// tolower *[256]byte
// toupper *[256]byte
// ctype *[256]byte
sort *[256]byte
tolower *[256]byte
toupper *[256]byte
ctype *[256]byte
sort *[256]byte
}

type Collation_8bit_bin struct {
Expand Down Expand Up @@ -113,6 +113,24 @@ func (c *Collation_8bit_bin) Wildcard(pat []byte, matchOne rune, matchMany rune,
return newEightbitWildcardMatcher(&sortOrderIdentity, c.Collate, pat, matchOne, matchMany, escape)
}

func (c *Collation_8bit_bin) ToLower(dst, src []byte) []byte {
lowerTable := c.simpletables.tolower

for _, c := range src {
dst = append(dst, lowerTable[c])
}
return dst
}

func (c *Collation_8bit_bin) ToUpper(dst, src []byte) []byte {
upperTable := c.simpletables.toupper

for _, c := range src {
dst = append(dst, upperTable[c])
}
return dst
}

type Collation_8bit_simple_ci struct {
id ID
name string
Expand Down Expand Up @@ -224,6 +242,24 @@ func weightStringPadingSimple(padChar byte, dst []byte, numCodepoints int, padTo
return dst
}

func (c *Collation_8bit_simple_ci) ToLower(dst, src []byte) []byte {
lowerTable := c.simpletables.tolower

for _, c := range src {
dst = append(dst, lowerTable[c])
}
return dst
}

func (c *Collation_8bit_simple_ci) ToUpper(dst, src []byte) []byte {
upperTable := c.simpletables.toupper

for _, c := range src {
dst = append(dst, upperTable[c])
}
return dst
}

type Collation_binary struct{}

func (c *Collation_binary) Init() {}
Expand Down Expand Up @@ -283,3 +319,13 @@ func (c *Collation_binary) WeightStringLen(numBytes int) int {
func (c *Collation_binary) Wildcard(pat []byte, matchOne rune, matchMany rune, escape rune) WildcardPattern {
return newEightbitWildcardMatcher(&sortOrderIdentity, c.Collate, pat, matchOne, matchMany, escape)
}

func (c *Collation_binary) ToLower(dst, raw []byte) []byte {
dst = append(dst, raw...)
return dst
}

func (c *Collation_binary) ToUpper(dst, raw []byte) []byte {
dst = append(dst, raw...)
return dst
}
12 changes: 12 additions & 0 deletions go/mysql/collations/collation.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ import (
"vitess.io/vitess/go/mysql/collations/internal/charset"
)

// CaseAwareCollation implements lowercase and uppercase conventions for collations.
type CaseAwareCollation interface {
Collation
ToUpper(dst []byte, src []byte) []byte
ToLower(dst []byte, src []byte) []byte
}

// ID is a numeric identifier for a collation. These identifiers are defined by MySQL, not by Vitess.
type ID uint16

Expand Down Expand Up @@ -188,3 +195,8 @@ func Validate(collation Collation, input []byte) bool {
func Convert(dst []byte, dstCollation Collation, src []byte, srcCollation Collation) ([]byte, error) {
return charset.Convert(dst, dstCollation.Charset(), src, srcCollation.Charset())
}

// Length returns the number of codepoints in the input based on the given collation
func Length(collation Collation, input []byte) int {
return charset.Length(collation.Charset(), input)
}
4 changes: 4 additions & 0 deletions go/mysql/collations/internal/charset/eightbit/8bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,7 @@ func (e *Charset_8bit) EncodeRune(dst []byte, r rune) int {
}
return -1
}

func (Charset_8bit) Length(src []byte) int {
return len(src)
}
4 changes: 4 additions & 0 deletions go/mysql/collations/internal/charset/eightbit/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,7 @@ func (c Charset_binary) DecodeRune(bytes []byte) (rune, int) {
func (c Charset_binary) Convert(_, in []byte, _ types.Charset) ([]byte, error) {
return in, nil
}

func (Charset_binary) Length(src []byte) int {
return len(src)
}
4 changes: 4 additions & 0 deletions go/mysql/collations/internal/charset/eightbit/latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,7 @@ func (Charset_latin1) DecodeRune(src []byte) (rune, int) {
}
return rune(tounicode_latin1[src[0]]), 1
}

func (Charset_latin1) Length(src []byte) int {
return len(src)
}
13 changes: 13 additions & 0 deletions go/mysql/collations/internal/charset/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,16 @@ func Validate(charset Charset, input []byte) bool {
}
return true
}

func Length(charset Charset, input []byte) int {
if charset, ok := charset.(interface{ Length([]byte) int }); ok {
return charset.Length(input)
}
var count int
for len(input) > 0 {
_, size := charset.DecodeRune(input)
input = input[size:]
count++
}
return count
}
8 changes: 8 additions & 0 deletions go/mysql/collations/internal/charset/unicode/utf16.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,11 @@ func (Charset_ucs2) DecodeRune(p []byte) (rune, int) {
func (Charset_ucs2) SupportsSupplementaryChars() bool {
return false
}

func (Charset_ucs2) Length(src []byte) int {
cnt := len(src)
if cnt%2 != 0 {
return cnt/2 + 1
}
return cnt / 2
}
8 changes: 8 additions & 0 deletions go/mysql/collations/internal/charset/unicode/utf32.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,11 @@ func (Charset_utf32) DecodeRune(p []byte) (rune, int) {
func (Charset_utf32) SupportsSupplementaryChars() bool {
return true
}

func (Charset_utf32) CharLen(src []byte) int {
cnt := len(src)
if cnt%4 != 0 {
return cnt/4 + 1
}
return cnt / 4
}
8 changes: 8 additions & 0 deletions go/mysql/collations/internal/charset/unicode/utf8.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ func (Charset_utf8mb3) SupportsSupplementaryChars() bool {
return false
}

func (Charset_utf8mb3) Length(src []byte) int {
return utf8.RuneCount(src)
}

type Charset_utf8mb4 struct{}

func (Charset_utf8mb4) Name() string {
Expand Down Expand Up @@ -207,3 +211,7 @@ func (Charset_utf8mb4) SupportsSupplementaryChars() bool {
func (Charset_utf8mb4) Validate(p []byte) bool {
return utf8.Valid(p)
}

func (Charset_utf8mb4) Length(src []byte) int {
return utf8.RuneCount(src)
}
Loading

0 comments on commit 460e0f1

Please sign in to comment.