Skip to content

Commit

Permalink
executor: raise error when text data is too long (#33345)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jasonysli authored Mar 26, 2022
1 parent 3206d51 commit f012766
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 23 deletions.
44 changes: 44 additions & 0 deletions executor/insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1215,6 +1215,50 @@ func TestAutoIDIncrementAndOffset(t *testing.T) {
require.EqualError(t, err, "[autoid:8060]Invalid auto_increment settings: auto_increment_increment: 65536, auto_increment_offset: 65536, both of them must be in range [1..65535]")
}

// Fix https://github.com/pingcap/tidb/issues/32601.
func TestTextTooLongError(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
// Set strict sql_mode
tk.MustExec("set sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_ALL_TABLES,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';")

// For max_allowed_packet default value is big enough to ensure tinytext, text can test correctly.
tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 TINYTEXT CHARACTER SET utf8mb4);")
_, err := tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 128));")
require.EqualError(t, err, "[types:1406]Data too long for column 'c1' at row 1")

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 Text CHARACTER SET utf8mb4);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 32768));")
require.EqualError(t, err, "[types:1406]Data too long for column 'c1' at row 1")

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 mediumtext);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 8777215));")
require.EqualError(t, err, "[types:1406]Data too long for column 'c1' at row 1")

// For long text, max_allowed_packet default value can not allow 4GB package, skip the test case.

// Set non strict sql_mode, we are not supposed to raise an error but to truncate the value.
tk.MustExec("set sql_mode = 'ONLY_FULL_GROUP_BY,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';")

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 TINYTEXT CHARACTER SET utf8mb4);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 128));")
require.NoError(t, err)
tk.MustQuery(`select length(c1) from t1;`).Check(testkit.Rows("254"))

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 Text CHARACTER SET utf8mb4);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 32768));")
require.NoError(t, err)
tk.MustQuery(`select length(c1) from t1;`).Check(testkit.Rows("65534"))
// For mediumtext or bigger size, for tikv limit, we will get:ERROR 8025 (HY000): entry too large, the max entry size is 6291456, the size of data is 16777247, no need to test.
}

func TestAutoRandomID(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
Expand Down
3 changes: 0 additions & 3 deletions expression/builtin_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -679,9 +679,6 @@ func (b *builtinRepeatSig) evalString(row chunk.Row) (d string, isNull bool, err
return "", true, handleAllowedPacketOverflowed(b.ctx, "repeat", b.maxAllowedPacket)
}

if int64(byteLength) > int64(b.tp.Flen)/num {
return "", true, nil
}
return strings.Repeat(str, int(num)), false, nil
}

Expand Down
2 changes: 1 addition & 1 deletion expression/builtin_string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ func TestRepeat(t *testing.T) {
require.NoError(t, err)
v, err = evalBuiltinFunc(f, chunk.Row{})
require.NoError(t, err)
require.True(t, v.IsNull())
require.False(t, v.IsNull())

args = []interface{}{"a", uint64(16777216)}
f, err = fc.getFunction(ctx, datumsToConstants(types.MakeDatums(args...)))
Expand Down
67 changes: 48 additions & 19 deletions types/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -1062,29 +1062,58 @@ func ProduceStrWithSpecifiedTp(s string, tp *FieldType, sc *stmtctx.StatementCon
// overflowed part is all whitespaces
var overflowed string
var characterLen int
// Flen is the rune length, not binary length, for Non-binary charset, we need to calculate the
// rune count and truncate to Flen runes if it is too long.

// For mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob(defined in tidb)
// and tinytext, text, mediumtext, longtext(not explicitly defined in tidb, corresponding to blob(s) in tidb) flen is the store length limit regardless of charset.
if chs != charset.CharsetBinary {
characterLen = utf8.RuneCountInString(s)
if characterLen > flen {
// 1. If len(s) is 0 and flen is 0, truncateLen will be 0, don't truncate s.
// CREATE TABLE t (a char(0));
// INSERT INTO t VALUES (``);
// 2. If len(s) is 10 and flen is 0, truncateLen will be 0 too, but we still need to truncate s.
// SELECT 1, CAST(1234 AS CHAR(0));
// So truncateLen is not a suitable variable to determine to do truncate or not.
var runeCount int
var truncateLen int
for i := range s {
if runeCount == flen {
truncateLen = i
break
switch tp.Tp {
case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
characterLen = len(s)
// We need to truncate the value to a proper length that contains complete word.
if characterLen > flen {
var r rune
var size int
var tempStr string
var truncateLen int
// Find the truncate position.
for truncateLen = flen; truncateLen > 0; truncateLen-- {
tempStr = truncateStr(s, truncateLen)
r, size = utf8.DecodeLastRuneInString(tempStr)
if r == utf8.RuneError && size == 0 {
// Empty string
} else if r == utf8.RuneError && size == 1 {
// Invalid string
} else {
// Get the truncate position
break
}
}
runeCount++
overflowed = s[truncateLen:]
s = truncateStr(s, truncateLen)
}
default:
characterLen = utf8.RuneCountInString(s)
if characterLen > flen {
// 1. If len(s) is 0 and flen is 0, truncateLen will be 0, don't truncate s.
// CREATE TABLE t (a char(0));
// INSERT INTO t VALUES (``);
// 2. If len(s) is 10 and flen is 0, truncateLen will be 0 too, but we still need to truncate s.
// SELECT 1, CAST(1234 AS CHAR(0));
// So truncateLen is not a suitable variable to determine to do truncate or not.
var runeCount int
var truncateLen int
for i := range s {
if runeCount == flen {
truncateLen = i
break
}
runeCount++
}
overflowed = s[truncateLen:]
s = truncateStr(s, truncateLen)
}
overflowed = s[truncateLen:]
s = truncateStr(s, truncateLen)
}

} else if len(s) > flen {
characterLen = len(s)
overflowed = s[flen:]
Expand Down

0 comments on commit f012766

Please sign in to comment.