Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

executor: raise error when text data is too long #33345

Merged
merged 7 commits into from
Mar 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions executor/insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1215,6 +1215,50 @@ func TestAutoIDIncrementAndOffset(t *testing.T) {
require.EqualError(t, err, "[autoid:8060]Invalid auto_increment settings: auto_increment_increment: 65536, auto_increment_offset: 65536, both of them must be in range [1..65535]")
}

// Fix https://github.com/pingcap/tidb/issues/32601.
func TestTextTooLongError(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
// Set strict sql_mode
tk.MustExec("set sql_mode = 'ONLY_FULL_GROUP_BY,STRICT_ALL_TABLES,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';")

// For max_allowed_packet default value is big enough to ensure tinytext, text can test correctly.
tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 TINYTEXT CHARACTER SET utf8mb4);")
_, err := tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 128));")
require.EqualError(t, err, "[types:1406]Data too long for column 'c1' at row 1")

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 Text CHARACTER SET utf8mb4);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 32768));")
require.EqualError(t, err, "[types:1406]Data too long for column 'c1' at row 1")

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 mediumtext);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 8777215));")
require.EqualError(t, err, "[types:1406]Data too long for column 'c1' at row 1")

// For long text, max_allowed_packet default value can not allow 4GB package, skip the test case.

// Set non strict sql_mode, we are not supposed to raise an error but to truncate the value.
tk.MustExec("set sql_mode = 'ONLY_FULL_GROUP_BY,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';")

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 TINYTEXT CHARACTER SET utf8mb4);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 128));")
require.NoError(t, err)
tk.MustQuery(`select length(c1) from t1;`).Check(testkit.Rows("254"))

tk.MustExec(`drop table if exists t1;`)
tk.MustExec("CREATE TABLE t1(c1 Text CHARACTER SET utf8mb4);")
_, err = tk.Exec("INSERT INTO t1 (c1) VALUES(REPEAT(X'C385', 32768));")
require.NoError(t, err)
tk.MustQuery(`select length(c1) from t1;`).Check(testkit.Rows("65534"))
// For mediumtext or bigger size, for tikv limit, we will get:ERROR 8025 (HY000): entry too large, the max entry size is 6291456, the size of data is 16777247, no need to test.
}

func TestAutoRandomID(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
Expand Down
3 changes: 0 additions & 3 deletions expression/builtin_string.go
Original file line number Diff line number Diff line change
Expand Up @@ -679,9 +679,6 @@ func (b *builtinRepeatSig) evalString(row chunk.Row) (d string, isNull bool, err
return "", true, handleAllowedPacketOverflowed(b.ctx, "repeat", b.maxAllowedPacket)
}

if int64(byteLength) > int64(b.tp.Flen)/num {
bb7133 marked this conversation as resolved.
Show resolved Hide resolved
return "", true, nil
}
return strings.Repeat(str, int(num)), false, nil
}

Expand Down
2 changes: 1 addition & 1 deletion expression/builtin_string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ func TestRepeat(t *testing.T) {
require.NoError(t, err)
v, err = evalBuiltinFunc(f, chunk.Row{})
require.NoError(t, err)
require.True(t, v.IsNull())
require.False(t, v.IsNull())

args = []interface{}{"a", uint64(16777216)}
f, err = fc.getFunction(ctx, datumsToConstants(types.MakeDatums(args...)))
Expand Down
67 changes: 48 additions & 19 deletions types/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -1062,29 +1062,58 @@ func ProduceStrWithSpecifiedTp(s string, tp *FieldType, sc *stmtctx.StatementCon
// overflowed part is all whitespaces
var overflowed string
var characterLen int
// Flen is the rune length, not binary length, for Non-binary charset, we need to calculate the
// rune count and truncate to Flen runes if it is too long.

// For mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob(defined in tidb)
// and tinytext, text, mediumtext, longtext(not explicitly defined in tidb, corresponding to blob(s) in tidb) flen is the store length limit regardless of charset.
if chs != charset.CharsetBinary {
characterLen = utf8.RuneCountInString(s)
if characterLen > flen {
// 1. If len(s) is 0 and flen is 0, truncateLen will be 0, don't truncate s.
// CREATE TABLE t (a char(0));
// INSERT INTO t VALUES (``);
// 2. If len(s) is 10 and flen is 0, truncateLen will be 0 too, but we still need to truncate s.
// SELECT 1, CAST(1234 AS CHAR(0));
// So truncateLen is not a suitable variable to determine to do truncate or not.
var runeCount int
var truncateLen int
for i := range s {
if runeCount == flen {
truncateLen = i
break
switch tp.Tp {
case mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
characterLen = len(s)
// We need to truncate the value to a proper length that contains complete word.
if characterLen > flen {
var r rune
var size int
var tempStr string
var truncateLen int
// Find the truncate position.
for truncateLen = flen; truncateLen > 0; truncateLen-- {
tempStr = truncateStr(s, truncateLen)
r, size = utf8.DecodeLastRuneInString(tempStr)
if r == utf8.RuneError && size == 0 {
Jasonysli marked this conversation as resolved.
Show resolved Hide resolved
// Empty string
} else if r == utf8.RuneError && size == 1 {
// Invalid string
} else {
// Get the truncate position
break
}
}
runeCount++
overflowed = s[truncateLen:]
s = truncateStr(s, truncateLen)
}
default:
characterLen = utf8.RuneCountInString(s)
if characterLen > flen {
// 1. If len(s) is 0 and flen is 0, truncateLen will be 0, don't truncate s.
// CREATE TABLE t (a char(0));
// INSERT INTO t VALUES (``);
// 2. If len(s) is 10 and flen is 0, truncateLen will be 0 too, but we still need to truncate s.
// SELECT 1, CAST(1234 AS CHAR(0));
// So truncateLen is not a suitable variable to determine to do truncate or not.
var runeCount int
var truncateLen int
for i := range s {
if runeCount == flen {
truncateLen = i
break
}
runeCount++
}
overflowed = s[truncateLen:]
s = truncateStr(s, truncateLen)
}
overflowed = s[truncateLen:]
s = truncateStr(s, truncateLen)
}

Jasonysli marked this conversation as resolved.
Show resolved Hide resolved
} else if len(s) > flen {
characterLen = len(s)
overflowed = s[flen:]
Expand Down