From 46269e094c8bfd8c146d96e9d7dca819f286d10c Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Thu, 21 Oct 2021 21:01:49 +0800 Subject: [PATCH 1/5] convert binary to non-binary --- errno/errcode.go | 1 + errno/errname.go | 1 + expression/builtin.go | 6 ++-- expression/builtin_cast.go | 46 ++++++++++++++++++++++++ expression/builtin_cast_vec.go | 27 ++++++++++++++- expression/builtin_compare.go | 2 +- expression/builtin_control.go | 4 +-- expression/builtin_string.go | 4 +++ expression/collation.go | 52 +++++++++++++--------------- expression/collation_test.go | 4 +-- expression/distsql_builtin.go | 7 +++- expression/integration_test.go | 2 +- expression/typeinfer_test.go | 1 - parser/charset/encoding_table.go | 7 ++-- planner/core/expression_rewriter.go | 4 +-- planner/core/logical_plan_builder.go | 2 +- types/datum.go | 4 +-- 17 files changed, 128 insertions(+), 46 deletions(-) diff --git a/errno/errcode.go b/errno/errcode.go index c4b5234a554f2..04fe354c9067b 100644 --- a/errno/errcode.go +++ b/errno/errcode.go @@ -901,6 +901,7 @@ const ( ErrFKIncompatibleColumns = 3780 ErrFunctionalIndexRowValueIsNotAllowed = 3800 ErrDependentByFunctionalIndex = 3837 + ErrCannotConvertString = 3854 ErrInvalidJSONValueForFuncIndex = 3903 ErrJSONValueOutOfRangeForFuncIndex = 3904 ErrFunctionalIndexDataIsTooLong = 3907 diff --git a/errno/errname.go b/errno/errname.go index 799f74af63f08..df3661ab7dbc7 100644 --- a/errno/errname.go +++ b/errno/errname.go @@ -896,6 +896,7 @@ var MySQLErrName = map[uint16]*mysql.ErrMessage{ ErrFKIncompatibleColumns: mysql.Message("Referencing column '%s' in foreign key constraint '%s' are incompatible", nil), ErrFunctionalIndexRowValueIsNotAllowed: mysql.Message("Expression of expression index '%s' cannot refer to a row value", nil), ErrDependentByFunctionalIndex: mysql.Message("Column '%s' has an expression index dependency and cannot be dropped or renamed", nil), + ErrCannotConvertString: mysql.Message("Cannot convert string '%.64s' from %s to %s", nil), ErrInvalidJSONValueForFuncIndex: mysql.Message("Invalid JSON value for CAST for expression index '%s'", nil), ErrJSONValueOutOfRangeForFuncIndex: mysql.Message("Out of range JSON value for CAST for expression index '%s'", nil), ErrFunctionalIndexDataIsTooLong: mysql.Message("Data too long for expression index '%s'", nil), diff --git a/expression/builtin.go b/expression/builtin.go index 2ec8672c5cce1..a1e2cbba919da 100644 --- a/expression/builtin.go +++ b/expression/builtin.go @@ -91,7 +91,7 @@ func newBaseBuiltinFunc(ctx sessionctx.Context, funcName string, args []Expressi if ctx == nil { return baseBuiltinFunc{}, errors.New("unexpected nil session ctx") } - ec, err := deriveCollation(ctx, funcName, args, retType, retType) + ec, _, err := deriveCollation(ctx, funcName, args, retType, retType) if err != nil { return baseBuiltinFunc{}, err } @@ -125,7 +125,7 @@ func newBaseBuiltinFuncWithTp(ctx sessionctx.Context, funcName string, args []Ex // derive collation information for string function, and we must do it // before doing implicit cast. - ec, err := deriveCollation(ctx, funcName, args, retType, argTps...) + ec, retTp, err := deriveCollation(ctx, funcName, args, retType, argTps...) if err != nil { return } @@ -139,7 +139,7 @@ func newBaseBuiltinFuncWithTp(ctx sessionctx.Context, funcName string, args []Ex case types.ETDecimal: args[i] = WrapWithCastAsDecimal(ctx, args[i]) case types.ETString: - args[i] = WrapWithCastAsString(ctx, args[i]) + args[i] = WrapWithCastAsStringWithTp(ctx, args[i], retTp) case types.ETDatetime: args[i] = WrapWithCastAsTime(ctx, args[i], types.NewFieldType(mysql.TypeDatetime)) case types.ETTimestamp: diff --git a/expression/builtin_cast.go b/expression/builtin_cast.go index b155370d64462..933061930c128 100644 --- a/expression/builtin_cast.go +++ b/expression/builtin_cast.go @@ -23,12 +23,16 @@ package expression import ( + "fmt" "math" "strconv" "strings" + "unicode/utf8" "github.com/pingcap/errors" + "github.com/pingcap/tidb/errno" "github.com/pingcap/tidb/parser/ast" + "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/model" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/parser/terror" @@ -37,6 +41,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/dbterror" "github.com/pingcap/tipb/go-tipb" ) @@ -108,6 +113,11 @@ var ( _ builtinFunc = &builtinCastJSONAsJSONSig{} ) +var ( + // errCannotConvertString returns when the string can not convert to other charset. + errCannotConvertString = dbterror.ClassExpression.NewStd(errno.ErrCannotConvertString) +) + type castAsIntFunctionClass struct { baseFunctionClass @@ -1112,6 +1122,23 @@ func (b *builtinCastStringAsStringSig) evalString(row chunk.Row) (res string, is if isNull || err != nil { return res, isNull, err } + ov := res + fromChs := b.args[0].GetType().Charset + toChs := b.tp.Charset + if toChs == charset.CharsetBin && fromChs != charset.CharsetBin { + res, err = charset.NewEncoding(fromChs).EncodeString(res) + } else if toChs != charset.CharsetBin && fromChs == charset.CharsetBin { + res, err = charset.NewEncoding(toChs).DecodeString(res) + // If toChs is utf8 or utf8mb4, DecodeString will do nothing and return nil error, but we need check if the binary literal is able to convert to utf8. + if toChs == charset.CharsetUTF8 || toChs == charset.CharsetUTF8MB4 { + if !utf8.ValidString(res) { + return "", false, errCannotConvertString.GenWithStackByArgs(fmt.Sprintf("%X", ov), fromChs, toChs) + } + } + } + if err != nil { + return "", false, errCannotConvertString.GenWithStackByArgs(fmt.Sprintf("%X", ov), fromChs, toChs) + } sc := b.ctx.GetSessionVars().StmtCtx res, err = types.ProduceStrWithSpecifiedTp(res, b.tp, sc, false) if err != nil { @@ -1907,6 +1934,25 @@ func WrapWithCastAsDecimal(ctx sessionctx.Context, expr Expression) Expression { return BuildCastFunction(ctx, expr, tp) } +// WrapWithCastAsStringWithTp wraps `expr` with `cast`. +func WrapWithCastAsStringWithTp(ctx sessionctx.Context, expr Expression, toTp *types.FieldType) Expression { + if expr.GetType().EvalType() == types.ETString && toTp != nil { + if expr.GetType().Charset == toTp.Charset { + return expr + } + toTp = &types.FieldType{ + Tp: mysql.TypeVarString, + Decimal: expr.GetType().Decimal, // keep original Decimal + Charset: toTp.Charset, + Collate: toTp.Collate, + Flen: expr.GetType().Flen, // keep original Flen + } + return BuildCastFunction(ctx, expr, toTp) + } + + return WrapWithCastAsString(ctx, expr) +} + // WrapWithCastAsString wraps `expr` with `cast` if the return type of expr is // not type string, otherwise, returns `expr` directly. func WrapWithCastAsString(ctx sessionctx.Context, expr Expression) Expression { diff --git a/expression/builtin_cast_vec.go b/expression/builtin_cast_vec.go index 95609069dcba6..0410beff889b7 100644 --- a/expression/builtin_cast_vec.go +++ b/expression/builtin_cast_vec.go @@ -15,10 +15,13 @@ package expression import ( + "fmt" "math" "strconv" "strings" + "unicode/utf8" + "github.com/pingcap/tidb/parser/charset" "github.com/pingcap/tidb/parser/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" @@ -1820,6 +1823,24 @@ func (b *builtinCastStringAsStringSig) vecEvalString(input *chunk.Chunk, result var res string var isNull bool + + fromChs := b.args[0].GetType().Charset + toChs := b.tp.Charset + transferString := func(s string) (string, error) { return s, nil } + if toChs == charset.CharsetBin && fromChs != charset.CharsetBin { + transferString = charset.NewEncoding(fromChs).EncodeString + } else if toChs != charset.CharsetBin && fromChs == charset.CharsetBin { + transferString = charset.NewEncoding(toChs).DecodeString + if toChs == charset.CharsetUTF8 || toChs == charset.CharsetUTF8MB4 { + transferString = func(s string) (string, error) { + if !utf8.ValidString(s) { + return "", errCannotConvertString.GenWithStackByArgs(fmt.Sprintf("%X", s), fromChs, toChs) + } + return s, nil + } + } + } + sc := b.ctx.GetSessionVars().StmtCtx result.ReserveString(n) for i := 0; i < n; i++ { @@ -1827,7 +1848,11 @@ func (b *builtinCastStringAsStringSig) vecEvalString(input *chunk.Chunk, result result.AppendNull() continue } - res, err = types.ProduceStrWithSpecifiedTp(buf.GetString(i), b.tp, sc, false) + res, err = transferString(buf.GetString(i)) + if err != nil { + return errCannotConvertString.GenWithStackByArgs(fmt.Sprintf("%X", buf.GetString(i)), fromChs, toChs) + } + res, err = types.ProduceStrWithSpecifiedTp(res, b.tp, sc, false) if err != nil { return err } diff --git a/expression/builtin_compare.go b/expression/builtin_compare.go index e5c2dcb08fcfa..dcd241fdbe4d7 100644 --- a/expression/builtin_compare.go +++ b/expression/builtin_compare.go @@ -1210,7 +1210,7 @@ func GetCmpFunction(ctx sessionctx.Context, lhs, rhs Expression) CompareFunc { case types.ETDecimal: return CompareDecimal case types.ETString: - coll, _ := CheckAndDeriveCollationFromExprs(ctx, "", types.ETInt, lhs, rhs) + coll, _, _ := CheckAndDeriveCollationFromExprs(ctx, "", types.ETInt, lhs, rhs) return genCompareString(coll.Collation) case types.ETDuration: return CompareDuration diff --git a/expression/builtin_control.go b/expression/builtin_control.go index e9b39bf36ab5c..4aa9e93ce9b55 100644 --- a/expression/builtin_control.go +++ b/expression/builtin_control.go @@ -94,7 +94,7 @@ func InferType4ControlFuncs(ctx sessionctx.Context, funcName string, lexp, rexp } if types.IsNonBinaryStr(lhs) && !types.IsBinaryStr(rhs) { - ec, err := CheckAndDeriveCollationFromExprs(ctx, funcName, evalType, lexp, rexp) + ec, _, err := CheckAndDeriveCollationFromExprs(ctx, funcName, evalType, lexp, rexp) if err != nil { return nil, err } @@ -104,7 +104,7 @@ func InferType4ControlFuncs(ctx sessionctx.Context, funcName string, lexp, rexp resultFieldType.Flag |= mysql.BinaryFlag } } else if types.IsNonBinaryStr(rhs) && !types.IsBinaryStr(lhs) { - ec, err := CheckAndDeriveCollationFromExprs(ctx, funcName, evalType, lexp, rexp) + ec, _, err := CheckAndDeriveCollationFromExprs(ctx, funcName, evalType, lexp, rexp) if err != nil { return nil, err } diff --git a/expression/builtin_string.go b/expression/builtin_string.go index b5b495321e16c..f7b835d624912 100644 --- a/expression/builtin_string.go +++ b/expression/builtin_string.go @@ -2972,6 +2972,10 @@ func (c *quoteFunctionClass) getFunction(ctx sessionctx.Context, args []Expressi } SetBinFlagOrBinStr(args[0].GetType(), bf.tp) bf.tp.Flen = 2*args[0].GetType().Flen + 2 + // If arg is NULL, quote function will return 'NULL', the Flen should be 4. + if args[0].GetType().Tp == mysql.TypeNull { + bf.tp.Flen = 4 + } if bf.tp.Flen > mysql.MaxBlobWidth { bf.tp.Flen = mysql.MaxBlobWidth } diff --git a/expression/collation.go b/expression/collation.go index 66e0c2e33c9c4..c1325cb9c5a4e 100644 --- a/expression/collation.go +++ b/expression/collation.go @@ -192,7 +192,7 @@ func deriveCoercibilityForColumn(c *Column) Coercibility { return CoercibilityImplicit } -func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, retType types.EvalType, argTps ...types.EvalType) (ec *ExprCollation, err error) { +func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, retType types.EvalType, argTps ...types.EvalType) (ec *ExprCollation, retTp *types.FieldType, err error) { switch funcName { case ast.Concat, ast.ConcatWS, ast.Lower, ast.Lcase, ast.Reverse, ast.Upper, ast.Ucase, ast.Quote, ast.Coalesce: return CheckAndDeriveCollationFromExprs(ctx, funcName, retType, args...) @@ -215,53 +215,48 @@ func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, case ast.GE, ast.LE, ast.GT, ast.LT, ast.EQ, ast.NE, ast.NullEQ, ast.Strcmp: // if compare type is string, we should determine which collation should be used. if argTps[0] == types.ETString { - ec, err = CheckAndDeriveCollationFromExprs(ctx, funcName, types.ETInt, args...) + ec, retTp, err = CheckAndDeriveCollationFromExprs(ctx, funcName, types.ETInt, args...) if err != nil { - return nil, err + return nil, nil, err } ec.Coer = CoercibilityNumeric ec.Repe = ASCII - return ec, nil + return ec, retTp, nil } case ast.If: return CheckAndDeriveCollationFromExprs(ctx, funcName, retType, args[1], args[2]) case ast.Ifnull: return CheckAndDeriveCollationFromExprs(ctx, funcName, retType, args[0], args[1]) case ast.Like: - ec, err = CheckAndDeriveCollationFromExprs(ctx, funcName, types.ETInt, args[0], args[1]) + ec, retTp, err = CheckAndDeriveCollationFromExprs(ctx, funcName, types.ETInt, args[0], args[1]) if err != nil { - return nil, err + return nil, nil, err } ec.Coer = CoercibilityNumeric ec.Repe = ASCII - return ec, nil + return ec, retTp, nil case ast.In: if args[0].GetType().EvalType() == types.ETString { return CheckAndDeriveCollationFromExprs(ctx, funcName, types.ETInt, args...) } case ast.DateFormat, ast.TimeFormat: charsetInfo, collation := ctx.GetSessionVars().GetCharsetInfo() - return &ExprCollation{args[1].Coercibility(), args[1].Repertoire(), charsetInfo, collation}, nil + return &ExprCollation{args[1].Coercibility(), args[1].Repertoire(), charsetInfo, collation}, nil, nil case ast.Cast: - // We assume all the cast are implicit. - ec = &ExprCollation{args[0].Coercibility(), args[0].Repertoire(), args[0].GetType().Charset, args[0].GetType().Collate} - // Non-string type cast to string type should use @@character_set_connection and @@collation_connection. - // String type cast to string type should keep its original charset and collation. It should not happen. - if retType == types.ETString && argTps[0] != types.ETString { - ec.Charset, ec.Collation = ctx.GetSessionVars().GetCharsetInfo() - } - return ec, nil + // We assume all the cast are implicit, keep the collation related fields to its original value. + return &ExprCollation{args[0].Coercibility(), args[0].Repertoire(), args[0].GetType().Charset, args[0].GetType().Collate}, nil, nil case ast.Case: // FIXME: case function aggregate collation is not correct. - return CheckAndDeriveCollationFromExprs(ctx, funcName, retType, args...) + ec, _, err = CheckAndDeriveCollationFromExprs(ctx, funcName, retType, args...) + return ec, nil, err case ast.Database, ast.User, ast.CurrentUser, ast.Version, ast.CurrentRole, ast.TiDBVersion: chs, coll := charset.GetDefaultCharsetAndCollate() - return &ExprCollation{CoercibilitySysconst, UNICODE, chs, coll}, nil + return &ExprCollation{CoercibilitySysconst, UNICODE, chs, coll}, nil, nil case ast.Format, ast.Space, ast.ToBase64, ast.UUID, ast.Hex, ast.MD5, ast.SHA, ast.SHA2: // should return ASCII repertoire, MySQL's doc says it depends on character_set_connection, but it not true from its source code. ec = &ExprCollation{Coer: CoercibilityCoercible, Repe: ASCII} ec.Charset, ec.Collation = ctx.GetSessionVars().GetCharsetInfo() - return ec, nil + return ec, nil, nil } ec = &ExprCollation{CoercibilityNumeric, ASCII, charset.CharsetBin, charset.CollationBin} @@ -272,7 +267,7 @@ func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, ec.Repe = UNICODE } } - return ec, nil + return ec, nil, nil } // DeriveCollationFromExprs derives collation information from these expressions. @@ -284,14 +279,14 @@ func DeriveCollationFromExprs(ctx sessionctx.Context, exprs ...Expression) (dstC } // CheckAndDeriveCollationFromExprs derives collation information from these expressions, return error if derives collation error. -func CheckAndDeriveCollationFromExprs(ctx sessionctx.Context, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, err error) { +func CheckAndDeriveCollationFromExprs(ctx sessionctx.Context, funcName string, evalType types.EvalType, args ...Expression) (et *ExprCollation, retTp *types.FieldType, err error) { ec := inferCollation(args...) if ec == nil { - return nil, illegalMixCollationErr(funcName, args) + return nil, nil, illegalMixCollationErr(funcName, args) } if evalType != types.ETString && ec.Coer == CoercibilityNone { - return nil, illegalMixCollationErr(funcName, args) + return nil, nil, illegalMixCollationErr(funcName, args) } if evalType == types.ETString && ec.Coer == CoercibilityNumeric { @@ -301,10 +296,9 @@ func CheckAndDeriveCollationFromExprs(ctx sessionctx.Context, funcName string, e } if !safeConvert(ctx, ec, args...) { - return nil, illegalMixCollationErr(funcName, args) + return nil, nil, illegalMixCollationErr(funcName, args) } - - return ec, nil + return ec, &types.FieldType{Charset: ec.Charset, Collate: ec.Collation}, nil } func safeConvert(ctx sessionctx.Context, ec *ExprCollation, args ...Expression) bool { @@ -322,7 +316,11 @@ func safeConvert(ctx sessionctx.Context, ec *ExprCollation, args ...Expression) if err != nil { return false } - if !isNull && !isValidString(str, ec.Charset) { + // if value is NULL or binary string, just skip it. + if isNull || types.IsBinaryStr(c.GetType()) { + continue + } + if !isValidString(str, ec.Charset) { return false } } else { diff --git a/expression/collation_test.go b/expression/collation_test.go index 1a8541fef3060..3a98db3dfa895 100644 --- a/expression/collation_test.go +++ b/expression/collation_test.go @@ -622,13 +622,13 @@ func TestDeriveCollation(t *testing.T) { []types.EvalType{types.ETInt}, types.ETString, false, - &ExprCollation{CoercibilityExplicit, ASCII, charset.CharsetUTF8MB4, charset.CollationUTF8MB4}, + &ExprCollation{CoercibilityExplicit, ASCII, charset.CharsetBinary, charset.CollationBin}, }, } for i, test := range tests { for _, fc := range test.fcs { - ec, err := deriveCollation(ctx, fc, test.args, test.retTp, test.argTps...) + ec, _, err := deriveCollation(ctx, fc, test.args, test.retTp, test.argTps...) if test.err { require.Error(t, err, "Number: %d, function: %s", i, fc) require.Nil(t, ec, i) diff --git a/expression/distsql_builtin.go b/expression/distsql_builtin.go index 99693677091bc..502601a9d1218 100644 --- a/expression/distsql_builtin.go +++ b/expression/distsql_builtin.go @@ -1218,7 +1218,12 @@ func convertUint(val []byte) (*Constant, error) { func convertString(val []byte, tp *tipb.FieldType) (*Constant, error) { var d types.Datum d.SetBytesAsString(val, protoToCollation(tp.Collate), uint32(tp.Flen)) - return &Constant{Value: d, RetType: types.NewFieldType(mysql.TypeVarString)}, nil + return &Constant{Value: d, RetType: &types.FieldType{ + Tp: mysql.TypeString, + Flag: uint(tp.Flag), + Charset: tp.Charset, + Flen: int(tp.Flen), + }}, nil } func convertFloat(val []byte, f32 bool) (*Constant, error) { diff --git a/expression/integration_test.go b/expression/integration_test.go index 59984a2c97f61..c7688b9afcaa3 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -1180,7 +1180,7 @@ func (s *testIntegrationSuite2) TestStringBuiltin(c *C) { // for insert result = tk.MustQuery(`select insert("中文", 1, 1, cast("aaa" as binary)), insert("ba", -1, 1, "aaa"), insert("ba", 1, 100, "aaa"), insert("ba", 100, 1, "aaa");`) - result.Check(testkit.Rows("aaa文 ba aaa ba")) + result.Check(testkit.Rows("aaa\xb8\xad文 ba aaa ba")) result = tk.MustQuery(`select insert("bb", NULL, 1, "aa"), insert("bb", 1, NULL, "aa"), insert(NULL, 1, 1, "aaa"), insert("bb", 1, 1, NULL);`) result.Check(testkit.Rows(" ")) result = tk.MustQuery(`SELECT INSERT("bb", 0, 1, NULL), INSERT("bb", 0, NULL, "aaa");`) diff --git a/expression/typeinfer_test.go b/expression/typeinfer_test.go index 13f5d81380e7e..33a711e8194fd 100644 --- a/expression/typeinfer_test.go +++ b/expression/typeinfer_test.go @@ -275,7 +275,6 @@ func (s *testInferTypeSuite) createTestCase4StrFuncs() []typeInferTestCase { {"CONCAT('T', 'i', 'DB', c_binary)", mysql.TypeVarString, charset.CharsetBin, mysql.BinaryFlag, 24, types.UnspecifiedLength}, {"CONCAT_WS('-', 'T', 'i', 'DB')", mysql.TypeVarString, charset.CharsetUTF8MB4, mysql.NotNullFlag, 6, types.UnspecifiedLength}, {"CONCAT_WS(',', 'TiDB', c_binary)", mysql.TypeVarString, charset.CharsetBin, mysql.BinaryFlag, 25, types.UnspecifiedLength}, - {"CONCAT(c_bchar, 0x80)", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 23, types.UnspecifiedLength}, {"left(c_int_d, c_int_d)", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 20, types.UnspecifiedLength}, {"right(c_int_d, c_int_d)", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 20, types.UnspecifiedLength}, {"lower(c_int_d)", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 20, types.UnspecifiedLength}, diff --git a/parser/charset/encoding_table.go b/parser/charset/encoding_table.go index ea7e6d8915798..18aac4b75a968 100644 --- a/parser/charset/encoding_table.go +++ b/parser/charset/encoding_table.go @@ -175,14 +175,14 @@ var encodings = map[string]struct { "cp819": {charmap.Windows1252, "windows-1252"}, "csisolatin1": {charmap.Windows1252, "windows-1252"}, "ibm819": {charmap.Windows1252, "windows-1252"}, - "iso-8859-1": {charmap.Windows1252, "windows-1252"}, + "iso-8859-1": {charmap.ISO8859_1, "iso-8859-1"}, "iso-ir-100": {charmap.Windows1252, "windows-1252"}, "iso8859-1": {charmap.Windows1252, "windows-1252"}, "iso88591": {charmap.Windows1252, "windows-1252"}, "iso_8859-1": {charmap.Windows1252, "windows-1252"}, "iso_8859-1:1987": {charmap.Windows1252, "windows-1252"}, "l1": {charmap.Windows1252, "windows-1252"}, - "latin1": {charmap.Windows1252, "windows-1252"}, + "latin1": {charmap.ISO8859_1, "iso-8859-1"}, "us-ascii": {charmap.Windows1252, "windows-1252"}, "windows-1252": {charmap.Windows1252, "windows-1252"}, "x-cp1252": {charmap.Windows1252, "windows-1252"}, @@ -273,6 +273,9 @@ func FindNextCharacterLength(label string) func([]byte) int { var encodingNextCharacterLength = map[string]func([]byte) int{ // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Layout_diagram + "windows-1252": func(bs []byte) int { + return 1 + }, "gbk": characterLengthGBK, "utf-8": characterLengthUTF8, "binary": func(bs []byte) int { diff --git a/planner/core/expression_rewriter.go b/planner/core/expression_rewriter.go index 0581301d1a791..f21c0ce634adf 100644 --- a/planner/core/expression_rewriter.go +++ b/planner/core/expression_rewriter.go @@ -545,7 +545,7 @@ func (er *expressionRewriter) handleCompareSubquery(ctx context.Context, v *ast. // Lexpr cannot compare with rexpr by different collate opString := new(strings.Builder) v.Op.Format(opString) - _, er.err = expression.CheckAndDeriveCollationFromExprs(er.sctx, opString.String(), types.ETInt, lexpr, rexpr) + _, _, er.err = expression.CheckAndDeriveCollationFromExprs(er.sctx, opString.String(), types.ETInt, lexpr, rexpr) if er.err != nil { return v, true } @@ -1670,7 +1670,7 @@ func (er *expressionRewriter) betweenToExpression(v *ast.BetweenExpr) { expr, lexp, rexp := er.wrapExpWithCast() - coll, err := expression.CheckAndDeriveCollationFromExprs(er.sctx, "BETWEEN", types.ETInt, expr, lexp, rexp) + coll, _, err := expression.CheckAndDeriveCollationFromExprs(er.sctx, "BETWEEN", types.ETInt, expr, lexp, rexp) er.err = err if er.err != nil { return diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 175b5395c7fd7..6c12a27d62b55 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -1378,7 +1378,7 @@ func (b *PlanBuilder) buildProjection4Union(ctx context.Context, u *LogicalUnion childTp := u.children[j].Schema().Columns[i].RetType resultTp = unionJoinFieldType(resultTp, childTp) } - collation, err := expression.CheckAndDeriveCollationFromExprs(b.ctx, "UNION", resultTp.EvalType(), tmpExprs...) + collation, _, err := expression.CheckAndDeriveCollationFromExprs(b.ctx, "UNION", resultTp.EvalType(), tmpExprs...) if err != nil || collation.Coer == expression.CoercibilityNone { return collate.ErrIllegalMixCollation.GenWithStackByArgs("UNION") } diff --git a/types/datum.go b/types/datum.go index d79a086cd7346..26f9b761e4840 100644 --- a/types/datum.go +++ b/types/datum.go @@ -996,9 +996,9 @@ func ProduceStrWithSpecifiedTp(s string, tp *FieldType, sc *stmtctx.StatementCon // overflowed part is all whitespaces var overflowed string var characterLen int - // Flen is the rune length, not binary length, for UTF8 charset, we need to calculate the + // Flen is the rune length, not binary length, for Non-binary charset, we need to calculate the // rune count and truncate to Flen runes if it is too long. - if chs == charset.CharsetUTF8 || chs == charset.CharsetUTF8MB4 { + if chs != charset.CharsetBinary { characterLen = utf8.RuneCountInString(s) if characterLen > flen { // 1. If len(s) is 0 and flen is 0, truncateLen will be 0, don't truncate s. From f10b6d6fbe0635aa088b9b0f40ced135b48ff3c9 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Wed, 27 Oct 2021 14:27:48 +0800 Subject: [PATCH 2/5] add test --- .../r/new_character_set_builtin.result | Bin 6848 -> 9913 bytes .../t/new_character_set_builtin.test | 31 ++++++++++++++++++ expression/collation_test.go | 12 ------- expression/typeinfer_test.go | 1 + 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/cmd/explaintest/r/new_character_set_builtin.result b/cmd/explaintest/r/new_character_set_builtin.result index bb4e9feaae201cd5c099079c907e2d929820371d..e39f13652c3070e3da6afd834cad9cb8dd030810 100644 GIT binary patch literal 9913 zcmeHNOK%%D5YCnRD;Bv(0t9qdZ#6Iyzj7$j-(YZ8RH8)}!P1-yT z?X7j1OM7wc7Co$lz56GmX|MeYomrB*76s&5u+Xx9M3FL)4f7R#fnu5D`V!F>t46D>eR}nljdD^u9i*Lbb13i zx{kd{=ZxN9%toHs8+b$0Gq2J9IvbnC!i(L_7kk?;c5fBRW?ln&k6E9&o_U3B6ltj( zmX0XhlGz;%R}nU66D*ukShyu)xwg=1S@3thwHVghWT~>Su+X@;yx6+9MACdK^CL0z z+2FEwC4?cWh(uHsFp(UI#AFRnwMm0iEUA-@-Ci$L$!n;Q*A#_Ph)5Yx0d18^i&Sij zKq*xYU5|My5s0#>5pV#Fpp2u@X^CiUu&dTqr{b9xE_l6;v%&@#QY+WMWy4YLL)KYg z8*3vr9`}X=bI$Ze>+IB}7_k2vmv(7?3_k4~EuYxdAqhoeLPp8YA^x}icmDVO5B`q- zqyNnR$^Y5^#ozOPoqRO8H+ejHI{kEdb9!g`H25s|BDfoT6+8&O2_6Mco^S0u-+GFP z=Os1732J?9jg7*E%xg}ZoFt1#j^S_mANe=^TmEf-+rQ)A_3!x){YU!wl(nR5Ts#m(v-;$CDdM#sIlSSDL&vO93s-gA!W}ejDcfgEguOFv zz{VLjVArf)>dn_Lz5gzh?+|OWUU_T$UYn@YdHpkY1Y2u6Y& zIB7y*ZLpdHhr|4>1&*hxc&FkxGYf-m+Hz+WcN$$5ZzS$;;8L$h;T#Q46D;=p?Y`oO!7kx_Or++ez2buK63`dv&fVY2f6SgdFb-3n%*=s+(~`o9DaOC%3`mY1JWL96<2fZv z>_7{xt?ztkVa%kOthV>yULKfimf~|Uk z%Vq&D1`E6czrchev%Kw;7UO$5Kj0ljH@}5UX@}blsaZYm=*#I9L5~lL(IS3jGdPh) zL3#OXAAXJe?5_tm3J|l!!k_nV^9tpK&5J*_J}!jk>se814^eohEFuK}t&cHO@KOp! z%pyl7&FzQBQj9xIo3YebvKE&YYml9ALdJqMZxfi1(Xa_Ms<>#Y7s+`UEvk07Jr8Y= z4K9Pr@5?2O41rRFF@u?cDw2r?30;wcC&5a9NpKY;2~KRGtnq2y=o%w2w#HLN*67?J zS!3FHV`_}Vm>N$RQ=@bLXN`%monz+&D=}_@tBjoBXkjN~MlJ-NCzYj;6u^}t@Cn&y zfh0u*KMK7hKdlOmEExD=JLS8r0bwEt28ES_3kPNtuNx~aG?Qt53`;;Eyy0SOr)LkQa{ak!l?1e$jy2GiK}hA?3wXjDONl6@QntR_$B~L zzc9m8uwYp_B>rggYm5ycC>Wxl=Roix(&|1Ayqur)csa09CTWwdn z)M`7d)2vXpS!IM$X4!VLRd04#z1FgATCY^wm1e!ufkw4@gsnq6o^!Z-7H{Ni-q5#W z9p(-@tjGpoLCa@XhXe2W*-}(7=kUG=-J^>-kwjD}fh>==vZ)KZkxWzp+4!zpm(D>E m2C*)ACxoATTYZQe;HTzO{&z^MlYaomLiim3 delta 60 zcmdn#d%$!<0oUYpoLZA}ICM8J5z}PhzINf<5w53GyEr#2ocu&;7Ym3#d7i@B&EYcE MEMVT|c?!~O06@(f%K!iX diff --git a/cmd/explaintest/t/new_character_set_builtin.test b/cmd/explaintest/t/new_character_set_builtin.test index d5d0bcc9a14f5..92aedcea74e46 100644 --- a/cmd/explaintest/t/new_character_set_builtin.test +++ b/cmd/explaintest/t/new_character_set_builtin.test @@ -31,6 +31,37 @@ insert into t values ('一二三', '一二三', '一二三'); select to_base64(a), to_base64(b), to_base64(c) from t; set @@tidb_enable_vectorized_expression = true; select to_base64(a), to_base64(b), to_base64(c) from t; + +drop table if exists t; +create table t (a char(20) charset utf8mb4, b char(20) charset gbk, c binary(20)); +insert into t values ('一', '一', 0xe4b880); +insert into t values ('一', '一', 0xd2bb); +insert into t values ('一', '一', 0xe4ba8c); +insert into t values ('一', '一', 0xb6fe); +select concat(a, c), concat(b, c) from t; +select concat(a, 0xe4b880), concat(b, 0xd2bb) from t; +select a = 0xe4b880, b = 0xd2bb from t; +select a = c, b = c from t; +select insert(a, 1, 2, 0xe4ba8c), insert(b, 1, 2, 0xb6fe) from t; +select insert(a, 1, 2, c), insert(b, 1, 2, c) from t; +select lpad(a, 5, 0xe4ba8c), lpad(b, 5, 0xb6fe) from t; +select lpad(a, 5, c), lpad(b, 5, c) from t; +select rpad(a, 5, 0xe4ba8c), rpad(b, 5, 0xb6fe) from t; +select rpad(a, 5, c), rpad(b, 5, c) from t; +select elt(2, a, 0xe4ba8c), elt(2, b, 0xb6fe) from t; +select elt(2, a, c), elt(2, b, c) from t; +select instr(a, 0xe4b880), instr(b, 0xd2bb) from t; +select position(a in 0xe4b880), position(b in 0xd2bb) from t; +select a like 0xe4b880, b like 0xd2bb from t; + +--error ER_CANNOT_CONVERT_STRING +select a = 0xb6fe from t; +--error ER_CANNOT_CONVERT_STRING +select b = 0xe4ba8c from t; +--error ER_CANNOT_CONVERT_STRING +select concat(a, 0xb6fe) from t; +--error ER_CANNOT_CONVERT_STRING +select concat(b, 0xe4ba8c) from t; set @@tidb_enable_vectorized_expression = false; -- test for builtin function convert() diff --git a/expression/collation_test.go b/expression/collation_test.go index 3a98db3dfa895..fcd1f7e2578b2 100644 --- a/expression/collation_test.go +++ b/expression/collation_test.go @@ -612,18 +612,6 @@ func TestDeriveCollation(t *testing.T) { false, &ExprCollation{CoercibilitySysconst, UNICODE, charset.CharsetUTF8MB4, charset.CollationUTF8MB4}, }, - { - []string{ - ast.Cast, - }, - []Expression{ - newColInt(CoercibilityExplicit), - }, - []types.EvalType{types.ETInt}, - types.ETString, - false, - &ExprCollation{CoercibilityExplicit, ASCII, charset.CharsetBinary, charset.CollationBin}, - }, } for i, test := range tests { diff --git a/expression/typeinfer_test.go b/expression/typeinfer_test.go index 33a711e8194fd..fea83846729f6 100644 --- a/expression/typeinfer_test.go +++ b/expression/typeinfer_test.go @@ -489,6 +489,7 @@ func (s *testInferTypeSuite) createTestCase4StrFuncs() []typeInferTestCase { {"quote(c_bigint_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 42, types.UnspecifiedLength}, {"quote(c_float_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 0, types.UnspecifiedLength}, {"quote(c_double_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 0, types.UnspecifiedLength}, + {"quote(null )", mysql.TypeVarString, charset.CharsetBinary, mysql.BinaryFlag, 4, types.UnspecifiedLength}, {"convert(c_double_d using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, {"convert(c_binary using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, From 9152dd6d25bb955e749498907a88d730510247fd Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Wed, 27 Oct 2021 15:07:14 +0800 Subject: [PATCH 3/5] add comments --- expression/collation.go | 1 + 1 file changed, 1 insertion(+) diff --git a/expression/collation.go b/expression/collation.go index c1325cb9c5a4e..c1f6ce31b7e15 100644 --- a/expression/collation.go +++ b/expression/collation.go @@ -192,6 +192,7 @@ func deriveCoercibilityForColumn(c *Column) Coercibility { return CoercibilityImplicit } +// retTp is the type that the function's args should cast to, only use for string args, return nil means no need to do cast. func deriveCollation(ctx sessionctx.Context, funcName string, args []Expression, retType types.EvalType, argTps ...types.EvalType) (ec *ExprCollation, retTp *types.FieldType, err error) { switch funcName { case ast.Concat, ast.ConcatWS, ast.Lower, ast.Lcase, ast.Reverse, ast.Upper, ast.Ucase, ast.Quote, ast.Coalesce: From f3e9e0464b9c20b66a2c647541c0bb011d1367c8 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Wed, 27 Oct 2021 17:22:40 +0800 Subject: [PATCH 4/5] change test to readable character --- .../t/new_character_set_builtin.test | 50 ++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/cmd/explaintest/t/new_character_set_builtin.test b/cmd/explaintest/t/new_character_set_builtin.test index 92aedcea74e46..74cee1b40c591 100644 --- a/cmd/explaintest/t/new_character_set_builtin.test +++ b/cmd/explaintest/t/new_character_set_builtin.test @@ -38,20 +38,21 @@ insert into t values ('一', '一', 0xe4b880); insert into t values ('一', '一', 0xd2bb); insert into t values ('一', '一', 0xe4ba8c); insert into t values ('一', '一', 0xb6fe); -select concat(a, c), concat(b, c) from t; -select concat(a, 0xe4b880), concat(b, 0xd2bb) from t; + +select hex(concat(a, c)), hex(concat(b, c)) from t; +select hex(concat(a, 0xe4b880)), hex(concat(b, 0xd2bb)) from t; select a = 0xe4b880, b = 0xd2bb from t; select a = c, b = c from t; -select insert(a, 1, 2, 0xe4ba8c), insert(b, 1, 2, 0xb6fe) from t; -select insert(a, 1, 2, c), insert(b, 1, 2, c) from t; -select lpad(a, 5, 0xe4ba8c), lpad(b, 5, 0xb6fe) from t; -select lpad(a, 5, c), lpad(b, 5, c) from t; -select rpad(a, 5, 0xe4ba8c), rpad(b, 5, 0xb6fe) from t; -select rpad(a, 5, c), rpad(b, 5, c) from t; -select elt(2, a, 0xe4ba8c), elt(2, b, 0xb6fe) from t; -select elt(2, a, c), elt(2, b, c) from t; -select instr(a, 0xe4b880), instr(b, 0xd2bb) from t; -select position(a in 0xe4b880), position(b in 0xd2bb) from t; +select hex(insert(a, 1, 2, 0xe4ba8c)), hex(insert(b, 1, 2, 0xb6fe)) from t; +select hex(insert(a, 1, 2, c)), hex(insert(b, 1, 2, c)) from t; +select hex(lpad(a, 5, 0xe4ba8c)), hex(lpad(b, 5, 0xb6fe)) from t; +select hex(lpad(a, 5, c)), hex(lpad(b, 5, c)) from t; +select hex(rpad(a, 5, 0xe4ba8c)), hex(rpad(b, 5, 0xb6fe)) from t; +select hex(rpad(a, 5, c)), hex(rpad(b, 5, c)) from t; +select hex(elt(2, a, 0xe4ba8c)), hex(elt(2, b, 0xb6fe)) from t; +select hex(elt(2, a, c)), hex(elt(2, b, c)) from t; +select hex(instr(a, 0xe4b880)), hex(instr(b, 0xd2bb)) from t; +select hex(position(a in 0xe4b880)), hex(position(b in 0xd2bb)) from t; select a like 0xe4b880, b like 0xd2bb from t; --error ER_CANNOT_CONVERT_STRING @@ -106,3 +107,28 @@ select decode(encode(a,"monty"),"monty") = a, md5(decode(encode(b,"monty"),"mont set @@tidb_enable_vectorized_expression = true; select decode(encode(a,"monty"),"monty") = a, md5(decode(encode(b,"monty"),"monty")) = md5(b), decode(encode(c,"monty"),"monty") = c from t; set @@tidb_enable_vectorized_expression = false; + +select hex(concat(a, c)), hex(concat(b, c)) from t; +select hex(concat(a, 0xe4b880)), hex(concat(b, 0xd2bb)) from t; +select a = 0xe4b880, b = 0xd2bb from t; +select a = c, b = c from t; +select hex(insert(a, 1, 2, 0xe4ba8c)), hex(insert(b, 1, 2, 0xb6fe)) from t; +select hex(insert(a, 1, 2, c)), hex(insert(b, 1, 2, c)) from t; +select hex(lpad(a, 5, 0xe4ba8c)), hex(lpad(b, 5, 0xb6fe)) from t; +select hex(lpad(a, 5, c)), hex(lpad(b, 5, c)) from t; +select hex(rpad(a, 5, 0xe4ba8c)), hex(rpad(b, 5, 0xb6fe)) from t; +select hex(rpad(a, 5, c)), hex(rpad(b, 5, c)) from t; +select hex(elt(2, a, 0xe4ba8c)), hex(elt(2, b, 0xb6fe)) from t; +select hex(elt(2, a, c)), hex(elt(2, b, c)) from t; +select hex(instr(a, 0xe4b880)), hex(instr(b, 0xd2bb)) from t; +select hex(position(a in 0xe4b880)), hex(position(b in 0xd2bb)) from t; +select a like 0xe4b880, b like 0xd2bb from t; + +--error ER_CANNOT_CONVERT_STRING +select a = 0xb6fe from t; +--error ER_CANNOT_CONVERT_STRING +select b = 0xe4ba8c from t; +--error ER_CANNOT_CONVERT_STRING +select concat(a, 0xb6fe) from t; +--error ER_CANNOT_CONVERT_STRING +select concat(b, 0xe4ba8c) from t; From 370da72a160e30ae25aef5687e8f30b7ede72726 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Thu, 4 Nov 2021 18:00:43 +0800 Subject: [PATCH 5/5] resolve conflict --- .../r/new_character_set_builtin.result | Bin 9913 -> 14671 bytes .../t/new_character_set_builtin.test | 67 +++++++++--------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/cmd/explaintest/r/new_character_set_builtin.result b/cmd/explaintest/r/new_character_set_builtin.result index e39f13652c3070e3da6afd834cad9cb8dd030810..104da7324a00176633f79ea0b44e8a4d1289c098 100644 GIT binary patch literal 14671 zcmeGi$!^<5^h|yQk&7%qKuPUt5G3AmDAI2*NNSX*ktJ86Q>UkNF|r`(k~C?v9GY9} zG(gc_+_X1J;H~JXACS8Jj=otrGbAm_P7$Y|0BOj1Z{9q<{c+SCkG8D9^#;i5_bjy2 zp9B*tIBoi49|j{Uu&%g+Z8Wi#N(cMb4qn_o*uPn-SXno)dV&VX53EaQXUQ#B$mp@r zFI&CwXcMzd!3G|@G8z1`xn5ssw{7@cZm*K-jpj>9g1TD96Xs*Y{I z$Vra;00kR@iHfC6;0=@sX&+_YEgQ-`$W^(QA0@EPoeTP1ZvzdnNo`yKm5s*z4^ejm z?QD(FWYQlEtut0I-bRH=vBCatRN8X~6VPdQv3TOxhXfP_2oWVahJ@dSPs8uR@58qv*@$vi|B6jRrDbGCVCV-dAYmya`zby zJjbLE2dM3>Ei@*MOs~8+IB}4{A!c|j{3yH;-VASrx5GQ(-SA%cFnkm~nO>XToj#iG z%|4l3pWUAAMR%glqc5X-(f#P_=wbAj@@*WCX&xsAqYoIz^h)BGgA5KakG|!Lm%G0n zpW`D%7%upV#U(Ei1_u>7IimA`73I~*(L%&)FFAtIhUZRDvrfD^zQY+bzLU~tJPElo zzJbIU-$2%ET<9-1F1-IPjPDrsc)R-6SZE> zbmmf^v_j@=q#%?9k14)_{N`q{600oJd7Wonl5+?b{2Sw*jqR$KBic;%4F=@8Y*KFT4NRx z_^9C}nHA5aVKRy)ehBXTOw*h;A4{5QGI^HpvKf|;4&zorYh*CvBrD-d+!y=%@ZnG^@eS?8Zb87)voQ-8_jML)jCecaO;h3-R-r!YQ2V9y;`T%bM20Yx~;0~w`$06 zU1U2>tKDezP@~><9Jf)eb*imKw+lD5+5#yZ!80fS0%~Rf~%12@G*j23%@y6-!0-1&+7?TwjDh6+43qV#F%up-dPmZUzsy zl7w0^|H}=YVRf$c#2VSe#Fp@tqf$7kR2`gn=g1Y3~qQj87l37HX>H>tZ>9 zeuacz!7S?qU5NCYdwhh9?=Z*024w^zhLaW_hi3kj==qdToIo%QpDgxG+pv|d)RWSa zY3vtH#i}y5$F9kw%HZH698P>um2EZ=BH=6=SfXHD2DXD-*N5<($uIzfT(KSKyaGIx zNtkCEpKs4#CAs}Oud_1Co4>hjGuh2YrF`b|=e*7dH1nd$emvN6yV&X)Ns}WWFgOBA zv9{EuM29IWnQW+~X}{RJG4JvhtLH?ok$gZBgfM?e1iIy`=(@nTy;DPG_DrOKf7mQF zULm6x7i@~mVo|bLY87Nw;(45{cLF*@@2N}4W2CHPz6-nnYsp^{)n&co>l3{uU4}yM zk_dFYOL7B{E-nOL6l-rJ^J$vY;pZbBDPKU^4&c8f%zVHCr2+*2#ClSyzLaG|UvyqtC2A_Iw!$UD)@afX z;NM$t651b%%8fH1pdN*$a3N4M0)!O-RMuzl<649MWh9uuRUezsDo6zl>OcuAn>MaQ zu@8!hX?;8%jjdX{QGeT7b%(=Iz|Vgz&<8#hE@8Eo=mPql7EamuA0&AMpfxOQ`WB&0 zZ7)F`UgvVk@D`Kxbu4-;LOu`cYy)B{tZQ9li$k9v@6mkfQBu*)f^1nT$`hZYbTvOl zNeOK}a?#Ez@u+BLi*`2QG(|gGw6jns;Ec<^dTXfmr}Q0d(azGAh~*<%O(cIs)5T?r v#}mCqY7N!?l)}!P1-yT z?X7j1OM7wc7Co$lz56GmX|MeYomrB*76s&5u+Xx9M3FL)4f7R#fnu5D`V!F>t46D>eR}nljdD^u9i*Lbb13i zx{kd{=ZxN9%toHs8+b$0Gq2J9IvbnC!i(L_7kk?;c5fBRW?ln&k6E9&o_U3B6ltj( zmX0XhlGz;%R}nU66D*ukShyu)xwg=1S@3thwHVghWT~>Su+X@;yx6+9MACdK^CL0z z+2FEwC4?cWh(uHsFp(UI#AFRnwMm0iEUA-@-Ci$L$!n;Q*A#_Ph)5Yx0d18^i&Sij zKq*xYU5|My5s0#>5pV#Fpp2u@X^CiUu&dTqr{b9xE_l6;v%&@#QY+WMWy4YLL)KYg z8*3vr9`}X=bI$Ze>+IB}7_k2vmv(7?3_k4~EuYxdAqhoeLPp8YA^x}icmDVO5B`q- zqyNnR$^Y5^#ozOPoqRO8H+ejHI{kEdb9!g`H25s|BDfoT6+8&O2_6Mco^S0u-+GFP z=Os1732J?9jg7*E%xg}ZoFt1#j^S_mANe=^TmEf-+rQ)A_3!x){YU!wl(nR5Ts#m(v-;$CDdM#sIlSSDL&vO93s-gA!W}ejDcfgEguOFv zz{VLjVArf)>dn_Lz5gzh?+|OWUU_T$UYn@YdHpkY1Y2u6Y& zIB7y*ZLpdHhr|4>1&*hxc&FkxGYf-m+Hz+WcN$$5ZzS$;;8L$h;T#Q46D;=p?Y`oO!7kx_Or++ez2buK63`dv&fVY2f6SgdFb-3n%*=s+(~`o9DaOC%3`mY1JWL96<2fZv z>_7{xt?ztkVa%kOthV>yULKfimf~|Uk z%Vq&D1`E6czrchev%Kw;7UO$5Kj0ljH@}5UX@}blsaZYm=*#I9L5~lL(IS3jGdPh) zL3#OXAAXJe?5_tm3J|l!!k_nV^9tpK&5J*_J}!jk>se814^eohEFuK}t&cHO@KOp! z%pyl7&FzQBQj9xIo3YebvKE&YYml9ALdJqMZxfi1(Xa_Ms<>#Y7s+`UEvk07Jr8Y= z4K9Pr@5?2O41rRFF@u?cDw2r?30;wcC&5a9NpKY;2~KRGtnq2y=o%w2w#HLN*67?J zS!3FHV`_}Vm>N$RQ=@bLXN`%monz+&D=}_@tBjoBXkjN~MlJ-NCzYj;6u^}t@Cn&y zfh0u*KMK7hKdlOmEExD=JLS8r0bwEt28ES_3kPNtuNx~aG?Qt53`;;Eyy0SOr)LkQa{ak!l?1e$jy2GiK}hA?3wXjDONl6@QntR_$B~L zzc9m8uwYp_B>rggYm5ycC>Wxl=Roix(&|1Ayqur)csa09CTWwdn z)M`7d)2vXpS!IM$X4!VLRd04#z1FgATCY^wm1e!ufkw4@gsnq6o^!Z-7H{Ni-q5#W z9p(-@tjGpoLCa@XhXe2W*-}(7=kUG=-J^>-kwjD}fh>==vZ)KZkxWzp+4!zpm(D>E m2C*)ACxoATTYZQe;HTzO{&z^MlYaomLiim3 diff --git a/cmd/explaintest/t/new_character_set_builtin.test b/cmd/explaintest/t/new_character_set_builtin.test index 74cee1b40c591..1cec990d1a5c0 100644 --- a/cmd/explaintest/t/new_character_set_builtin.test +++ b/cmd/explaintest/t/new_character_set_builtin.test @@ -32,39 +32,6 @@ select to_base64(a), to_base64(b), to_base64(c) from t; set @@tidb_enable_vectorized_expression = true; select to_base64(a), to_base64(b), to_base64(c) from t; -drop table if exists t; -create table t (a char(20) charset utf8mb4, b char(20) charset gbk, c binary(20)); -insert into t values ('一', '一', 0xe4b880); -insert into t values ('一', '一', 0xd2bb); -insert into t values ('一', '一', 0xe4ba8c); -insert into t values ('一', '一', 0xb6fe); - -select hex(concat(a, c)), hex(concat(b, c)) from t; -select hex(concat(a, 0xe4b880)), hex(concat(b, 0xd2bb)) from t; -select a = 0xe4b880, b = 0xd2bb from t; -select a = c, b = c from t; -select hex(insert(a, 1, 2, 0xe4ba8c)), hex(insert(b, 1, 2, 0xb6fe)) from t; -select hex(insert(a, 1, 2, c)), hex(insert(b, 1, 2, c)) from t; -select hex(lpad(a, 5, 0xe4ba8c)), hex(lpad(b, 5, 0xb6fe)) from t; -select hex(lpad(a, 5, c)), hex(lpad(b, 5, c)) from t; -select hex(rpad(a, 5, 0xe4ba8c)), hex(rpad(b, 5, 0xb6fe)) from t; -select hex(rpad(a, 5, c)), hex(rpad(b, 5, c)) from t; -select hex(elt(2, a, 0xe4ba8c)), hex(elt(2, b, 0xb6fe)) from t; -select hex(elt(2, a, c)), hex(elt(2, b, c)) from t; -select hex(instr(a, 0xe4b880)), hex(instr(b, 0xd2bb)) from t; -select hex(position(a in 0xe4b880)), hex(position(b in 0xd2bb)) from t; -select a like 0xe4b880, b like 0xd2bb from t; - ---error ER_CANNOT_CONVERT_STRING -select a = 0xb6fe from t; ---error ER_CANNOT_CONVERT_STRING -select b = 0xe4ba8c from t; ---error ER_CANNOT_CONVERT_STRING -select concat(a, 0xb6fe) from t; ---error ER_CANNOT_CONVERT_STRING -select concat(b, 0xe4ba8c) from t; -set @@tidb_enable_vectorized_expression = false; - -- test for builtin function convert() drop table if exists t; create table t(a char(10)); @@ -108,6 +75,40 @@ set @@tidb_enable_vectorized_expression = true; select decode(encode(a,"monty"),"monty") = a, md5(decode(encode(b,"monty"),"monty")) = md5(b), decode(encode(c,"monty"),"monty") = c from t; set @@tidb_enable_vectorized_expression = false; +drop table if exists t; +create table t (a char(20) charset utf8mb4, b char(20) charset gbk, c binary(20)); +insert into t values ('一', '一', 0xe4b880); +insert into t values ('一', '一', 0xd2bb); +insert into t values ('一', '一', 0xe4ba8c); +insert into t values ('一', '一', 0xb6fe); + +set @@tidb_enable_vectorized_expression = true; +select hex(concat(a, c)), hex(concat(b, c)) from t; +select hex(concat(a, 0xe4b880)), hex(concat(b, 0xd2bb)) from t; +select a = 0xe4b880, b = 0xd2bb from t; +select a = c, b = c from t; +select hex(insert(a, 1, 2, 0xe4ba8c)), hex(insert(b, 1, 2, 0xb6fe)) from t; +select hex(insert(a, 1, 2, c)), hex(insert(b, 1, 2, c)) from t; +select hex(lpad(a, 5, 0xe4ba8c)), hex(lpad(b, 5, 0xb6fe)) from t; +select hex(lpad(a, 5, c)), hex(lpad(b, 5, c)) from t; +select hex(rpad(a, 5, 0xe4ba8c)), hex(rpad(b, 5, 0xb6fe)) from t; +select hex(rpad(a, 5, c)), hex(rpad(b, 5, c)) from t; +select hex(elt(2, a, 0xe4ba8c)), hex(elt(2, b, 0xb6fe)) from t; +select hex(elt(2, a, c)), hex(elt(2, b, c)) from t; +select hex(instr(a, 0xe4b880)), hex(instr(b, 0xd2bb)) from t; +select hex(position(a in 0xe4b880)), hex(position(b in 0xd2bb)) from t; +select a like 0xe4b880, b like 0xd2bb from t; + +--error ER_CANNOT_CONVERT_STRING +select a = 0xb6fe from t; +--error ER_CANNOT_CONVERT_STRING +select b = 0xe4ba8c from t; +--error ER_CANNOT_CONVERT_STRING +select concat(a, 0xb6fe) from t; +--error ER_CANNOT_CONVERT_STRING +select concat(b, 0xe4ba8c) from t; + +set @@tidb_enable_vectorized_expression = false; select hex(concat(a, c)), hex(concat(b, c)) from t; select hex(concat(a, 0xe4b880)), hex(concat(b, 0xd2bb)) from t; select a = 0xe4b880, b = 0xd2bb from t;