Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a number of encoding issues when evaluating expressions with the evalengine #13509

Merged
merged 14 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions go/cmd/vtgateclienttest/services/echo.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"sort"
"strings"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/callerid"
"vitess.io/vitess/go/vt/vtgate/vtgateservice"
Expand Down Expand Up @@ -77,13 +78,13 @@ func echoQueryResult(vals map[string]any) *sqltypes.Result {
// The first two returned fields are always a field with a MySQL NULL value,
// and another field with a zero-length string.
// Client tests can use this to check that they correctly distinguish the two.
qr.Fields = append(qr.Fields, &querypb.Field{Name: "null", Type: sqltypes.VarBinary})
qr.Fields = append(qr.Fields, &querypb.Field{Name: "null", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)})
row = append(row, sqltypes.NULL)
qr.Fields = append(qr.Fields, &querypb.Field{Name: "emptyString", Type: sqltypes.VarBinary})
qr.Fields = append(qr.Fields, &querypb.Field{Name: "emptyString", Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)})
row = append(row, sqltypes.NewVarBinary(""))

for k, v := range vals {
qr.Fields = append(qr.Fields, &querypb.Field{Name: k, Type: sqltypes.VarBinary})
qr.Fields = append(qr.Fields, &querypb.Field{Name: k, Type: sqltypes.VarBinary, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)})

val := reflect.ValueOf(v)
if val.Kind() == reflect.Map {
Expand Down
4 changes: 2 additions & 2 deletions go/mysql/binlog/binlog_json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,12 @@ func TestMarshalJSONToSQL(t *testing.T) {
{
name: `opaque string [2 202 254]`,
data: []byte{15, 15, 2, 202, 254},
expected: `CAST(x'02cafe' as JSON)`,
expected: `CAST(x'02CAFE' as JSON)`,
},
{
name: `opaque blob [2 202 254]`,
data: []byte{15, 252, 2, 202, 254},
expected: `CAST(x'02cafe' as JSON)`,
expected: `CAST(x'02CAFE' as JSON)`,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're now consistent with MySQL which upcases by default for hex literals.

},
}
for _, tc := range testcases {
Expand Down
3 changes: 3 additions & 0 deletions go/mysql/collations/charset/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func convertFastFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, er
if dst == nil {
dst = make([]byte, len(src)*3)
} else {
nDst = len(dst)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one and the other changes fix bugs when appending to an existing buffer. The additional evalengine test cases cover this behavior.

dst = dst[:cap(dst)]
}

Expand Down Expand Up @@ -65,6 +66,7 @@ func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset)
if dst == nil {
dst = make([]byte, len(src)*3)
} else {
nDst = len(dst)
dst = dst[:cap(dst)]
}

Expand Down Expand Up @@ -180,6 +182,7 @@ func Collapse(dst []byte, src []rune, dstCharset Charset) []byte {
if dst == nil {
dst = make([]byte, len(src)*dstCharset.MaxWidth())
} else {
nDst = len(dst)
dst = dst[:cap(dst)]
}
for _, c := range src {
Expand Down
12 changes: 11 additions & 1 deletion go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func makeEnv(version collver) *Environment {
// A few interesting character set values.
// See http://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet
const (
CollationUtf8ID = 33
CollationUtf8mb3ID = 33
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned, rename for explicitness.

CollationUtf8mb4ID = 255
CollationBinaryID = 63
CollationUtf8mb4BinID = 46
Expand All @@ -204,6 +204,16 @@ const (
// Binary is the default Binary collation
var Binary = ID(CollationBinaryID).Get()

// SystemCollation is the default collation for the system tables
// such as the information schema. This is still utf8mb3 to match
// MySQLs behavior. This means that you can't use utf8mb4 in table
// names, column names, without running into significant issues.
var SystemCollation = TypedCollation{
Collation: CollationUtf8mb3ID,
Coercibility: CoerceCoercible,
Repertoire: RepertoireUnicode,
}

// CharsetAlias returns the internal charset name for the given charset.
// For now, this only maps `utf8` to `utf8mb3`; in future versions of MySQL,
// this mapping will change, so it's important to use this helper so that
Expand Down
12 changes: 12 additions & 0 deletions go/mysql/collations/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sync"

"vitess.io/vitess/go/internal/flag"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/servenv"
)

Expand All @@ -46,3 +47,14 @@ func Local() *Environment {
func Default() ID {
return ID(Local().DefaultConnectionCharset())
}

func DefaultCollationForType(t sqltypes.Type) ID {
switch {
case sqltypes.IsText(t):
return Default()
case t == sqltypes.TypeJSON:
return CollationUtf8mb4ID
default:
return CollationBinaryID
}
}
64 changes: 64 additions & 0 deletions go/mysql/hex/hex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package hex
dbussink marked this conversation as resolved.
Show resolved Hide resolved

import (
"encoding/hex"
"math/bits"
)

const hextable = "0123456789ABCDEF"

func EncodeBytes(src []byte) []byte {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These have been moved since we now used them in two places.

j := 0
dst := make([]byte, len(src)*2)
for _, v := range src {
dst[j] = hextable[v>>4]
dst[j+1] = hextable[v&0x0f]
j += 2
}
return dst
}

func EncodeUint(u uint64) []byte {
var a [16 + 1]byte
i := len(a)
shift := uint(bits.TrailingZeros(uint(16))) & 7
b := uint64(16)
m := uint(16) - 1 // == 1<<shift - 1

for u >= b {
i--
a[i] = hextable[uint(u)&m]
u >>= shift
}

// u < base
i--
a[i] = hextable[uint(u)]
return a[i:]
}

func DecodeUint(u uint64) []byte {
if u == 0 {
return []byte{0}
}
var decoded []byte
for u > 0 {
c1 := u % 10
c2 := u % 100 / 10
decoded = append([]byte{byte(c1 + c2<<4)}, decoded...)
u /= 100
}
return decoded
}

func DecodedLen(src []byte) int {
return (len(src) + 1) / 2
}

func DecodeBytes(dst, src []byte) bool {
if len(src)&1 == 1 {
src = append([]byte{'0'}, src...)
}
_, err := hex.Decode(dst, src)
return err == nil
}
5 changes: 3 additions & 2 deletions go/mysql/json/marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@ limitations under the License.
package json

import (
"encoding/hex"
"fmt"
"math/big"

"vitess.io/vitess/go/hack"
"vitess.io/vitess/go/mysql/hex"
querypb "vitess.io/vitess/go/vt/proto/query"

"vitess.io/vitess/go/sqltypes"
Expand Down Expand Up @@ -106,7 +107,7 @@ func (v *Value) marshalSQLInternal(top bool, dst []byte) []byte {
dst = append(dst, "CAST("...)
}
dst = append(dst, "x'"...)
dst = append(dst, hex.EncodeToString([]byte(v.s))...)
dst = append(dst, hex.EncodeBytes(hack.StringBytes(v.s))...)
dst = append(dst, "'"...)
if top {
dst = append(dst, " as JSON)"...)
Expand Down
19 changes: 18 additions & 1 deletion go/mysql/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"strconv"
"strings"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/vterrors"
Expand Down Expand Up @@ -1073,7 +1074,9 @@ func (c *Conn) writePrepare(fld []*querypb.Field, prepare *PrepareData) error {
if err := c.writeColumnDefinition(&querypb.Field{
Name: "?",
Type: sqltypes.VarBinary,
Charset: 63}); err != nil {
Charset: collations.CollationBinaryID,
Flags: uint32(querypb.MySqlFlag_BINARY_FLAG),
}); err != nil {
return err
}
}
Expand Down Expand Up @@ -1517,3 +1520,17 @@ func val2MySQLLen(v sqltypes.Value) (int, error) {
}
return length, nil
}

func FlagsForColumn(t sqltypes.Type, col collations.ID) uint32 {
var fl uint32
if sqltypes.IsNumber(t) {
fl |= uint32(querypb.MySqlFlag_NUM_FLAG)
}
if sqltypes.IsUnsigned(t) {
fl |= uint32(querypb.MySqlFlag_UNSIGNED_FLAG)
}
if sqltypes.IsQuoted(t) && col == collations.CollationBinaryID {
fl |= uint32(querypb.MySqlFlag_BINARY_FLAG)
}
return fl
}
87 changes: 49 additions & 38 deletions go/mysql/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (

"google.golang.org/protobuf/proto"

"vitess.io/vitess/go/mysql/collations"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand All @@ -48,8 +50,10 @@ func MockPrepareData(t *testing.T) (*PrepareData, *sqltypes.Result) {
result := &sqltypes.Result{
Fields: []*querypb.Field{
{
Name: "id",
Type: querypb.Type_INT32,
Name: "id",
Type: querypb.Type_INT32,
Charset: collations.CollationBinaryID,
Flags: uint32(querypb.MySqlFlag_NUM_FLAG),
},
},
Rows: [][]sqltypes.Value{
Expand Down Expand Up @@ -399,12 +403,15 @@ func TestQueries(t *testing.T) {
checkQuery(t, "type and name", sConn, cConn, &sqltypes.Result{
Fields: []*querypb.Field{
{
Name: "id",
Type: querypb.Type_INT32,
Name: "id",
Type: querypb.Type_INT32,
Charset: collations.CollationBinaryID,
Flags: uint32(querypb.MySqlFlag_NUM_FLAG),
},
{
Name: "name",
Type: querypb.Type_VARCHAR,
Name: "name",
Type: querypb.Type_VARCHAR,
Charset: uint32(collations.Default()),
},
},
Rows: [][]sqltypes.Value{
Expand All @@ -424,36 +431,36 @@ func TestQueries(t *testing.T) {
// One row has all NULL values.
checkQuery(t, "all types", sConn, cConn, &sqltypes.Result{
Fields: []*querypb.Field{
{Name: "Type_INT8 ", Type: querypb.Type_INT8},
{Name: "Type_UINT8 ", Type: querypb.Type_UINT8},
{Name: "Type_INT16 ", Type: querypb.Type_INT16},
{Name: "Type_UINT16 ", Type: querypb.Type_UINT16},
{Name: "Type_INT24 ", Type: querypb.Type_INT24},
{Name: "Type_UINT24 ", Type: querypb.Type_UINT24},
{Name: "Type_INT32 ", Type: querypb.Type_INT32},
{Name: "Type_UINT32 ", Type: querypb.Type_UINT32},
{Name: "Type_INT64 ", Type: querypb.Type_INT64},
{Name: "Type_UINT64 ", Type: querypb.Type_UINT64},
{Name: "Type_FLOAT32 ", Type: querypb.Type_FLOAT32},
{Name: "Type_FLOAT64 ", Type: querypb.Type_FLOAT64},
{Name: "Type_TIMESTAMP", Type: querypb.Type_TIMESTAMP},
{Name: "Type_DATE ", Type: querypb.Type_DATE},
{Name: "Type_TIME ", Type: querypb.Type_TIME},
{Name: "Type_DATETIME ", Type: querypb.Type_DATETIME},
{Name: "Type_YEAR ", Type: querypb.Type_YEAR},
{Name: "Type_DECIMAL ", Type: querypb.Type_DECIMAL},
{Name: "Type_TEXT ", Type: querypb.Type_TEXT},
{Name: "Type_BLOB ", Type: querypb.Type_BLOB},
{Name: "Type_VARCHAR ", Type: querypb.Type_VARCHAR},
{Name: "Type_VARBINARY", Type: querypb.Type_VARBINARY},
{Name: "Type_CHAR ", Type: querypb.Type_CHAR},
{Name: "Type_BINARY ", Type: querypb.Type_BINARY},
{Name: "Type_BIT ", Type: querypb.Type_BIT},
{Name: "Type_ENUM ", Type: querypb.Type_ENUM},
{Name: "Type_SET ", Type: querypb.Type_SET},
{Name: "Type_INT8 ", Type: querypb.Type_INT8, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_UINT8 ", Type: querypb.Type_UINT8, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG)},
{Name: "Type_INT16 ", Type: querypb.Type_INT16, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_UINT16 ", Type: querypb.Type_UINT16, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG)},
{Name: "Type_INT24 ", Type: querypb.Type_INT24, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_UINT24 ", Type: querypb.Type_UINT24, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG)},
{Name: "Type_INT32 ", Type: querypb.Type_INT32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_UINT32 ", Type: querypb.Type_UINT32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG)},
{Name: "Type_INT64 ", Type: querypb.Type_INT64, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_UINT64 ", Type: querypb.Type_UINT64, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG | querypb.MySqlFlag_UNSIGNED_FLAG)},
{Name: "Type_FLOAT32 ", Type: querypb.Type_FLOAT32, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_FLOAT64 ", Type: querypb.Type_FLOAT64, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_TIMESTAMP", Type: querypb.Type_TIMESTAMP, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG | querypb.MySqlFlag_TIMESTAMP_FLAG)},
{Name: "Type_DATE ", Type: querypb.Type_DATE, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_TIME ", Type: querypb.Type_TIME, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_DATETIME ", Type: querypb.Type_DATETIME, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_YEAR ", Type: querypb.Type_YEAR, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_UNSIGNED_FLAG | querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_DECIMAL ", Type: querypb.Type_DECIMAL, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_NUM_FLAG)},
{Name: "Type_TEXT ", Type: querypb.Type_TEXT, Charset: uint32(collations.Default())},
{Name: "Type_BLOB ", Type: querypb.Type_BLOB, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_VARCHAR ", Type: querypb.Type_VARCHAR, Charset: uint32(collations.Default())},
{Name: "Type_VARBINARY", Type: querypb.Type_VARBINARY, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_CHAR ", Type: querypb.Type_CHAR, Charset: uint32(collations.Default())},
{Name: "Type_BINARY ", Type: querypb.Type_BINARY, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_BIT ", Type: querypb.Type_BIT, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG)},
{Name: "Type_ENUM ", Type: querypb.Type_ENUM, Charset: uint32(collations.Default()), Flags: uint32(querypb.MySqlFlag_ENUM_FLAG)},
{Name: "Type_SET ", Type: querypb.Type_SET, Charset: uint32(collations.Default()), Flags: uint32(querypb.MySqlFlag_SET_FLAG)},
// Skip TUPLE, not possible in Result.
{Name: "Type_GEOMETRY ", Type: querypb.Type_GEOMETRY},
{Name: "Type_JSON ", Type: querypb.Type_JSON},
{Name: "Type_GEOMETRY ", Type: querypb.Type_GEOMETRY, Charset: collations.CollationBinaryID, Flags: uint32(querypb.MySqlFlag_BINARY_FLAG | querypb.MySqlFlag_BLOB_FLAG)},
{Name: "Type_JSON ", Type: querypb.Type_JSON, Charset: collations.CollationUtf8mb4ID},
},
Rows: [][]sqltypes.Value{
{
Expand Down Expand Up @@ -526,8 +533,9 @@ func TestQueries(t *testing.T) {
checkQuery(t, "first empty string", sConn, cConn, &sqltypes.Result{
Fields: []*querypb.Field{
{
Name: "name",
Type: querypb.Type_VARCHAR,
Name: "name",
Type: querypb.Type_VARCHAR,
Charset: uint32(collations.Default()),
},
},
Rows: [][]sqltypes.Value{
Expand All @@ -544,7 +552,9 @@ func TestQueries(t *testing.T) {
checkQuery(t, "type only", sConn, cConn, &sqltypes.Result{
Fields: []*querypb.Field{
{
Type: querypb.Type_INT64,
Type: querypb.Type_INT64,
Charset: collations.CollationBinaryID,
Flags: uint32(querypb.MySqlFlag_NUM_FLAG),
},
},
Rows: [][]sqltypes.Value{
Expand Down Expand Up @@ -667,6 +677,7 @@ func checkQueryInternal(t *testing.T, query string, sConn, cConn *Conn, result *
if !got.Equal(&expected) {
for i, f := range got.Fields {
if i < len(expected.Fields) && !proto.Equal(f, expected.Fields[i]) {
t.Logf("Query = %v", query)
t.Logf("Got field(%v) = %v", i, f)
t.Logf("Expected field(%v) = %v", i, expected.Fields[i])
}
Expand Down
Loading
Loading