Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tiny Weights #14402

Merged
merged 14 commits into from
Nov 7, 2023
7 changes: 5 additions & 2 deletions go/hack/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@ func RuntimeAllocSize(size int64) int64 {
return int64(roundupsize(uintptr(size)))
}

//go:linkname ParseFloatPrefix strconv.parseFloatPrefix
func ParseFloatPrefix(s string, bitSize int) (float64, int, error)
//go:linkname Atof64 strconv.atof64
func Atof64(s string) (float64, int, error)
vmg marked this conversation as resolved.
Show resolved Hide resolved

//go:linkname Atof32 strconv.atof32
func Atof32(s string) (float32, int, error)

//go:linkname FastRand runtime.fastrand
func FastRand() uint32
18 changes: 18 additions & 0 deletions go/mysql/collations/colldata/8bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package colldata

import (
"encoding/binary"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/mysql/collations/charset"
"vitess.io/vitess/go/vt/vthash"
Expand Down Expand Up @@ -168,6 +170,16 @@ func (c *Collation_8bit_simple_ci) Collate(left, right []byte, rightIsPrefix boo
return len(left) - len(right)
}

func (c *Collation_8bit_simple_ci) TinyWeightString(src []byte) uint32 {
var w32 [4]byte
sortOrder := c.sort
sortLen := min(4, len(src))
for i := 0; i < sortLen; i++ {
w32[i] = sortOrder[src[i]]
}
return binary.BigEndian.Uint32(w32[:4])
}

func (c *Collation_8bit_simple_ci) WeightString(dst, src []byte, numCodepoints int) []byte {
padToMax := false
sortOrder := c.sort
Expand Down Expand Up @@ -272,6 +284,12 @@ func (c *Collation_binary) Collate(left, right []byte, isPrefix bool) int {
return collationBinary(left, right, isPrefix)
}

func (c *Collation_binary) TinyWeightString(src []byte) uint32 {
var w32 [4]byte
copy(w32[:4], src)
return binary.BigEndian.Uint32(w32[:4])
}

func (c *Collation_binary) WeightString(dst, src []byte, numCodepoints int) []byte {
padToMax := false
copyCodepoints := len(src)
Expand Down
8 changes: 8 additions & 0 deletions go/mysql/collations/colldata/collation.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,14 @@ type CaseAwareCollation interface {
ToLower(dst []byte, src []byte) []byte
}

// TinyWeightCollation implements the TinyWeightString API for collations.
type TinyWeightCollation interface {
Collation
// TinyWeightString returns a 32-bit weight string for a source string based on this collation.
// This is usually the 4-byte prefix of the full weight string, calculated more efficiently.
TinyWeightString(src []byte) uint32
}

func Lookup(id collations.ID) Collation {
if int(id) >= len(collationsById) {
return nil
Expand Down
23 changes: 23 additions & 0 deletions go/mysql/collations/colldata/uca.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package colldata

import (
"bytes"
"encoding/binary"
"math/bits"

"vitess.io/vitess/go/mysql/collations"
Expand Down Expand Up @@ -119,6 +120,28 @@ nextLevel:
return int(l) - int(r)
}

func (c *Collation_utf8mb4_uca_0900) TinyWeightString(src []byte) uint32 {
it := c.uca.Iterator(src)
defer it.Done()

if fast, ok := it.(*uca.FastIterator900); ok {
var chunk [16]byte
fast.NextWeightBlock64(chunk[:16])
return binary.BigEndian.Uint32(chunk[:4])
}

var w32 uint32
w, ok := it.Next()
if ok {
w32 = uint32(w) << 16
w, ok = it.Next()
if ok {
w32 |= uint32(w)
}
}
return w32
}

func (c *Collation_utf8mb4_uca_0900) WeightString(dst, src []byte, numCodepoints int) []byte {
it := c.uca.Iterator(src)
defer it.Done()
Expand Down
56 changes: 56 additions & 0 deletions go/mysql/collations/colldata/uca_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,62 @@ func TestCompareWithWeightString(t *testing.T) {
}
}

func TestTinyWeightStrings(t *testing.T) {
var Collations = []Collation{
testcollation(t, "utf8mb4_0900_as_cs"),
testcollation(t, "utf8mb4_0900_as_ci"),
testcollation(t, "utf8mb4_0900_ai_ci"),
}

var Strings = []string{
"a", "A", "aa", "AA", "aaa", "AAA", "aaaa", "AAAA",
"b", "B", "BB", "BB", "bbb", "BBB", "bbbb", "BBBB",
"Abc", "aBC",
"ǍḄÇ", "ÁḆĈ",
"\uA73A", "\uA738",
"\uAC00", "\u326E",
ExampleString,
ExampleStringLong,
JapaneseString,
WhitespaceString,
HungarianString,
JapaneseString2,
ChineseString,
ChineseString2,
SpanishString,
EnglishString,
}

for _, coll := range Collations {
tw := coll.(TinyWeightCollation)

for _, a := range Strings {
aw := tw.TinyWeightString([]byte(a))

for _, b := range Strings {
bw := tw.TinyWeightString([]byte(b))
cmp := tw.Collate([]byte(a), []byte(b), false)

switch {
case cmp == 0:
if aw != bw {
t.Errorf("[%s] %q vs %q: should be equal, got %08x / %08x", coll.Name(), a, b, aw, bw)
}
case cmp < 0:
if aw > bw {
t.Errorf("[%s] %q vs %q: should be <=, got %08x / %08x", coll.Name(), a, b, aw, bw)
}
case cmp > 0:
if aw < bw {
t.Errorf("[%s] %q vs %q: should be >= got %08x / %08x", coll.Name(), a, b, aw, bw)
}
}
}
}
}

}

func TestFastIterators(t *testing.T) {
allASCIICharacters := make([]byte, 128)
for n := range allASCIICharacters {
Expand Down
2 changes: 1 addition & 1 deletion go/mysql/fastparse/fastparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func ParseFloat64(s string) (float64, error) {
// We only care to parse as many of the initial float characters of the
// string as possible. This functionality is implemented in the `strconv` package
// of the standard library, but not exposed, so we hook into it.
val, l, err := hack.ParseFloatPrefix(s[ws:], 64)
val, l, err := hack.Atof64(s[ws:])
for l < len(s[ws:]) {
if !isSpace(s[ws+uint(l)]) {
break
Expand Down
4 changes: 2 additions & 2 deletions go/sqltypes/bind_variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TupleToProto(v []Value) *querypb.Value {

// ValueToProto converts Value to a *querypb.Value.
func ValueToProto(v Value) *querypb.Value {
return &querypb.Value{Type: v.typ, Value: v.val}
return &querypb.Value{Type: v.Type(), Value: v.val}
}

// ProtoToValue converts a *querypb.Value to a Value.
Expand Down Expand Up @@ -143,7 +143,7 @@ func BytesBindVariable(v []byte) *querypb.BindVariable {

// ValueBindVariable converts a Value to a bind var.
func ValueBindVariable(v Value) *querypb.BindVariable {
return &querypb.BindVariable{Type: v.typ, Value: v.val}
return &querypb.BindVariable{Type: v.Type(), Value: v.val}
}

// BuildBindVariable builds a *querypb.BindVariable from a valid input type.
Expand Down
10 changes: 8 additions & 2 deletions go/sqltypes/parse_rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package sqltypes
import (
"fmt"
"io"
"reflect"
"slices"
"strconv"
"strings"
"text/scanner"
Expand Down Expand Up @@ -127,6 +127,12 @@ func (e *RowMismatchError) Error() string {
return fmt.Sprintf("results differ: %v\n\twant: %v\n\tgot: %v", e.err, e.want, e.got)
}

func RowEqual(want, got Row) bool {
return slices.EqualFunc(want, got, func(a, b Value) bool {
return a.Equal(b)
})
}

func RowsEquals(want, got []Row) error {
if len(want) != len(got) {
return &RowMismatchError{
Expand All @@ -143,7 +149,7 @@ func RowsEquals(want, got []Row) error {
if matched[i] {
continue
}
if reflect.DeepEqual(aa, bb) {
if RowEqual(aa, bb) {
matched[i] = true
ok = true
break
Expand Down
10 changes: 6 additions & 4 deletions go/sqltypes/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package sqltypes
import (
"crypto/sha256"
"fmt"
"reflect"
"slices"

"google.golang.org/protobuf/proto"

Expand Down Expand Up @@ -69,8 +69,8 @@ func (result *Result) Repair(fields []*querypb.Field) {
// Usage of j is intentional.
for j, f := range fields {
for _, r := range result.Rows {
if r[j].typ != Null {
r[j].typ = f.Type
if r[j].Type() != Null {
r[j].typ = uint16(f.Type)
}
}
}
Expand Down Expand Up @@ -198,7 +198,9 @@ func (result *Result) Equal(other *Result) bool {
return FieldsEqual(result.Fields, other.Fields) &&
result.RowsAffected == other.RowsAffected &&
result.InsertID == other.InsertID &&
reflect.DeepEqual(result.Rows, other.Rows)
slices.EqualFunc(result.Rows, other.Rows, func(a, b Row) bool {
return RowEqual(a, b)
})
}

// ResultsEqual compares two arrays of Result.
Expand Down
2 changes: 1 addition & 1 deletion go/sqltypes/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func TestValue(typ querypb.Type, val string) Value {
// This function should only be used for testing.
func TestTuple(vals ...Value) Value {
return Value{
typ: Tuple,
typ: uint16(Tuple),
val: encodeTuple(vals),
}
}
Expand Down
Loading
Loading