From d32e4f51537d99bff40ea49d8aa715e666808e9e Mon Sep 17 00:00:00 2001 From: siddontang Date: Mon, 12 Oct 2015 09:56:52 +0800 Subject: [PATCH 1/5] *: support like escape --- expression/like.go | 23 +++++++++++----- expression/like_test.go | 58 ++++++++++++++++++++++++----------------- parser/parser.y | 25 +++++++++++++++--- parser/parser_test.go | 7 ++++- parser/scanner.l | 3 +++ 5 files changed, 82 insertions(+), 34 deletions(-) diff --git a/expression/like.go b/expression/like.go index 9db0740771f92..7aa0a1fbc9ea9 100644 --- a/expression/like.go +++ b/expression/like.go @@ -45,6 +45,8 @@ type PatternLike struct { patTypes []byte // Not is true, the expression is "not like". Not bool + // Escape is the special escaped character, default is \. + Escape byte } // Clone implements the Expression Clone interface. @@ -57,6 +59,7 @@ func (p *PatternLike) Clone() Expression { patChars: p.patChars, patTypes: p.patTypes, Not: p.Not, + Escape: p.Escape, } } @@ -67,6 +70,9 @@ func (p *PatternLike) IsStatic() bool { // String implements the Expression String interface. func (p *PatternLike) String() string { + if p.Escape != '\\' { + return fmt.Sprintf("%s LIKE %s ESCAPE '%v'", p.Expr, p.Pattern, p.Escape) + } return fmt.Sprintf("%s LIKE %s", p.Expr, p.Pattern) } @@ -104,7 +110,7 @@ func (p *PatternLike) Eval(ctx context.Context, args map[interface{}]interface{} default: return nil, errors.Errorf("Pattern should be string or []byte in LIKE: %v (Value of type %T)", pattern, pattern) } - p.patChars, p.patTypes = compilePattern(spattern) + p.patChars, p.patTypes = compilePattern(spattern, p.Escape) } match := doMatch(sexpr, p.patChars, p.patTypes) @@ -120,7 +126,7 @@ func (p *PatternLike) Accept(v Visitor) (Expression, error) { } // handle escapes and wild cards convert pattern characters and pattern types, -func compilePattern(pattern string) (patChars, patTypes []byte) { +func compilePattern(pattern string, escape byte) (patChars, patTypes []byte) { var lastAny bool patChars = make([]byte, len(pattern)) patTypes = make([]byte, len(pattern)) @@ -129,18 +135,23 @@ func compilePattern(pattern string) (patChars, patTypes []byte) { var tp byte var c = pattern[i] switch c { - case '\\': + case escape: lastAny = false tp = patMatch if i < len(pattern)-1 { i++ c = pattern[i] - if c == '\\' || c == '_' || c == '%' { + if c == escape || c == '_' || c == '%' { // valid escape. } else { - // invalid escape, fall back to literal back slash + // invalid escape, fall back to escape byte + // mysql will treat escape character as the origin value even + // the escape sequence is invalid in Go or C. + // e.g, \m is invalid in Go, but in MySQL we will get "m" for select '\m'. + // Following case is correct just for escape \, not for others like +. + // TODO: add more checks for other escapes. i-- - c = '\\' + c = escape } } case '_': diff --git a/expression/like_test.go b/expression/like_test.go index 824073e77fd43..a377ec60295b2 100644 --- a/expression/like_test.go +++ b/expression/like_test.go @@ -27,34 +27,40 @@ func (*testLikeSuite) TestLike(c *C) { tbl := []struct { pattern string input string + escape byte match bool }{ - {"", "a", false}, - {"a", "a", true}, - {"a", "b", false}, - {"aA", "aA", true}, - {"_", "a", true}, - {"_", "ab", false}, - {"__", "b", false}, - {"_ab", "AAB", true}, - {"%", "abcd", true}, - {"%", "", true}, - {"%a", "AAA", true}, - {"%b", "AAA", false}, - {"b%", "BBB", true}, - {"%a%", "BBB", false}, - {"%a%", "BAB", true}, - {"a%", "BBB", false}, - {`\%a`, `%a`, true}, - {`\%a`, `aa`, false}, - {`\_a`, `_a`, true}, - {`\_a`, `aa`, false}, - {`\\_a`, `\xa`, true}, - {`\a\b`, `\a\b`, true}, - {"%%_", `abc`, true}, + {"", "a", '\\', false}, + {"a", "a", '\\', true}, + {"a", "b", '\\', false}, + {"aA", "aA", '\\', true}, + {"_", "a", '\\', true}, + {"_", "ab", '\\', false}, + {"__", "b", '\\', false}, + {"_ab", "AAB", '\\', true}, + {"%", "abcd", '\\', true}, + {"%", "", '\\', true}, + {"%a", "AAA", '\\', true}, + {"%b", "AAA", '\\', false}, + {"b%", "BBB", '\\', true}, + {"%a%", "BBB", '\\', false}, + {"%a%", "BAB", '\\', true}, + {"a%", "BBB", '\\', false}, + {`\%a`, `%a`, '\\', true}, + {`\%a`, `aa`, '\\', false}, + {`\_a`, `_a`, '\\', true}, + {`\_a`, `aa`, '\\', false}, + {`\\_a`, `\xa`, '\\', true}, + {`\a\b`, `\a\b`, '\\', true}, + {"%%_", `abc`, '\\', true}, + {`+_a`, `_a`, '+', true}, + {`+%a`, `%a`, '+', true}, + {`\%a`, `%a`, '+', false}, + {`++a`, `+a`, '+', true}, + {`++_a`, `+xa`, '+', true}, } for _, v := range tbl { - patChars, patTypes := compilePattern(v.pattern) + patChars, patTypes := compilePattern(v.pattern, v.escape) match := doMatch(v.input, patChars, patTypes) c.Assert(match, Equals, v.match, Commentf("%v", v)) } @@ -68,6 +74,8 @@ func (*testLikeSuite) TestEval(c *C) { Pattern: &Value{ Val: "aA", }, + + Escape: '\\', } cloned := pattern.Clone() pattern = cloned.(*PatternLike) @@ -107,6 +115,7 @@ func (*testLikeSuite) TestEval(c *C) { pattern = &PatternLike{ Expr: mockExpr{isStatic: true, val: "slien"}, Pattern: mockExpr{isStatic: true, val: []byte("%E%")}, + Escape: '\\', } v, err := pattern.Eval(nil, nil) c.Assert(err, IsNil) @@ -114,6 +123,7 @@ func (*testLikeSuite) TestEval(c *C) { pattern = &PatternLike{ Expr: mockExpr{isStatic: true, val: "slin"}, Pattern: mockExpr{isStatic: true, val: []byte("%E%")}, + Escape: '\\', } v, err = pattern.Eval(nil, nil) c.Assert(err, IsNil) diff --git a/parser/parser.y b/parser/parser.y index 41cbc0bf427a5..bfd0ffb5fbd0e 100644 --- a/parser/parser.y +++ b/parser/parser.y @@ -133,6 +133,7 @@ import ( engines "ENGINES" enum "ENUM" eq "=" + escape "ESCAPE" execute "EXECUTE" exists "EXISTS" explain "EXPLAIN" @@ -424,6 +425,7 @@ import ( JoinTable "join table" JoinType "join type" KeyOrIndex "{KEY|INDEX}" + LikeEscapeOpt "like escape option" LimitClause "LIMIT clause" Literal "literal value" logAnd "logical and operator" @@ -1505,9 +1507,17 @@ Factor1: return 1 } } -| PrimaryFactor NotOpt "LIKE" PrimaryExpression +| PrimaryFactor NotOpt "LIKE" PrimaryExpression LikeEscapeOpt { - $$ = &expression.PatternLike{Expr: $1.(expression.Expression), Pattern: $4.(expression.Expression), Not: $2.(bool)} + escape := $5.(string) + if len(escape) > 1 { + yylex.(*lexer).errf("Incorrect arguments %s to ESCAPE", escape) + return 1 + } else if len(escape) == 0 { + escape = "\\" + } + $$ = &expression.PatternLike{Expr: $1.(expression.Expression), Pattern: $4.(expression.Expression), + Not: $2.(bool), Escape: escape[0]} } | PrimaryFactor NotOpt RegexpSym PrimaryExpression { @@ -1519,6 +1529,15 @@ RegexpSym: "REGEXP" | "RLIKE" +LikeEscapeOpt: + { + $$ = "\\" + } +| "ESCAPE" stringLit + { + $$ = $2 + } + NotOpt: { $$ = false @@ -1661,7 +1680,7 @@ UnReservedKeyword: | "START" | "GLOBAL" | "TABLES"| "TEXT" | "TIME" | "TIMESTAMP" | "TRANSACTION" | "TRUNCATE" | "UNKNOWN" | "VALUE" | "WARNINGS" | "YEAR" | "MODE" | "WEEK" | "ANY" | "SOME" | "USER" | "IDENTIFIED" | "COLLATION" | "COMMENT" | "AVG_ROW_LENGTH" | "CONNECTION" | "CHECKSUM" | "COMPRESSION" | "KEY_BLOCK_SIZE" | "MAX_ROWS" | "MIN_ROWS" -| "NATIONAL" | "ROW" | "QUARTER" +| "NATIONAL" | "ROW" | "QUARTER" | "ESCAPE" NotKeywordToken: "ABS" | "COALESCE" | "CONCAT" | "CONCAT_WS" | "COUNT" | "DAY" | "DAYOFMONTH" | "DAYOFWEEK" | "DAYOFYEAR" | "FOUND_ROWS" | "GROUP_CONCAT" diff --git a/parser/parser_test.go b/parser/parser_test.go index 0a7abcbf1f3db..55a2d49d27b80 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -533,6 +533,11 @@ func (s *testParserSuite) TestParser0(c *C) { {`select * from t as a`, true}, {"select 1 full, 1 row, 1 abs", true}, {"select * from t full, t1 row, t2 abs", true}, + + // For like escape + {`select "abc_" like "abc\\_" escape ''`, true}, + {`select "abc_" like "abc\\_" escape '\\'`, true}, + {`select "abc_" like "abc\\_" escape '||'`, false}, } for _, t := range table { @@ -556,7 +561,7 @@ func (s *testParserSuite) TestParser0(c *C) { "start", "global", "tables", "text", "time", "timestamp", "transaction", "truncate", "unknown", "value", "warnings", "year", "now", "substring", "mode", "any", "some", "user", "identified", "collation", "comment", "avg_row_length", "checksum", "compression", "connection", "key_block_size", - "max_rows", "min_rows", "national", "row", "quarter", + "max_rows", "min_rows", "national", "row", "quarter", "escape", } for _, kw := range unreservedKws { src := fmt.Sprintf("SELECT %s FROM tbl;", kw) diff --git a/parser/scanner.l b/parser/scanner.l index c93aa77a5c2eb..c0ca9d5f09175 100644 --- a/parser/scanner.l +++ b/parser/scanner.l @@ -303,6 +303,7 @@ else {e}{l}{s}{e} end {e}{n}{d} engine {e}{n}{g}{i}{n}{e} engines {e}{n}{g}{i}{n}{e}{s} +escape {e}{s}{c}{a}{p}{e} execute {e}{x}{e}{c}{u}{t}{e} exists {e}{x}{i}{s}{t}{s} explain {e}{x}{p}{l}{a}{i}{n} @@ -662,6 +663,8 @@ year_month {y}{e}{a}{r}_{m}{o}{n}{t}{h} {execute} lval.item = string(l.val) return execute {enum} return enum +{escape} lval.item = string(l.val) + return escape {exists} return exists {explain} return explain {extract} lval.item = string(l.val) From 320dca9fb8127c5addffd20c446ab8663cde0a61 Mon Sep 17 00:00:00 2001 From: siddontang Date: Mon, 12 Oct 2015 17:29:26 +0800 Subject: [PATCH 2/5] parser: fix S/R conflict. --- parser/parser.y | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parser/parser.y b/parser/parser.y index bfd0ffb5fbd0e..b17a44ff5dab9 100644 --- a/parser/parser.y +++ b/parser/parser.y @@ -587,6 +587,8 @@ import ( %precedence '(' %precedence lowerThanQuick %precedence quick +%precedence lowerThanEscape +%precedence escape %precedence lowerThanComma %precedence ',' @@ -1530,6 +1532,7 @@ RegexpSym: | "RLIKE" LikeEscapeOpt: + %prec lowerThanEscape { $$ = "\\" } From 6c9cf0690a66862b3d0457ae2cf231c016a6dcf1 Mon Sep 17 00:00:00 2001 From: siddontang Date: Mon, 12 Oct 2015 17:37:49 +0800 Subject: [PATCH 3/5] parser: add test --- parser/parser_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parser/parser_test.go b/parser/parser_test.go index e1b4c0b5eb194..b6363fb2d134d 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -58,7 +58,7 @@ func (s *testParserSuite) TestSimple(c *C) { "start", "global", "tables", "text", "time", "timestamp", "transaction", "truncate", "unknown", "value", "warnings", "year", "now", "substring", "mode", "any", "some", "user", "identified", "collation", "comment", "avg_row_length", "checksum", "compression", "connection", "key_block_size", - "max_rows", "min_rows", "national", "row", "quarter", + "max_rows", "min_rows", "national", "row", "quarter", "escape", } for _, kw := range unreservedKws { src := fmt.Sprintf("SELECT %s FROM tbl;", kw) @@ -585,6 +585,7 @@ func (s *testParserSuite) TestLikeEscape(c *C) { {`select "abc_" like "abc\\_" escape ''`, true}, {`select "abc_" like "abc\\_" escape '\\'`, true}, {`select "abc_" like "abc\\_" escape '||'`, false}, + {`select "abc" like "escape" escape '+'`, true}, } s.RunTest(c, table) From 9e8ce41f33346d6ca20712b4e9d16983210fa15a Mon Sep 17 00:00:00 2001 From: siddontang Date: Mon, 12 Oct 2015 17:41:11 +0800 Subject: [PATCH 4/5] expression: fix escape string format. --- expression/like.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/like.go b/expression/like.go index 7aa0a1fbc9ea9..ad58d4ec3391c 100644 --- a/expression/like.go +++ b/expression/like.go @@ -71,7 +71,7 @@ func (p *PatternLike) IsStatic() bool { // String implements the Expression String interface. func (p *PatternLike) String() string { if p.Escape != '\\' { - return fmt.Sprintf("%s LIKE %s ESCAPE '%v'", p.Expr, p.Pattern, p.Escape) + return fmt.Sprintf("%s LIKE %s ESCAPE '%c'", p.Expr, p.Pattern, p.Escape) } return fmt.Sprintf("%s LIKE %s", p.Expr, p.Pattern) } From 213b0b9ab8c49db739cc818b0b351db982c0e9a1 Mon Sep 17 00:00:00 2001 From: siddontang Date: Tue, 13 Oct 2015 10:12:04 +0800 Subject: [PATCH 5/5] parser: Address comment --- parser/parser.y | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/parser/parser.y b/parser/parser.y index 377b51dbd22be..7c23f49e87781 100644 --- a/parser/parser.y +++ b/parser/parser.y @@ -1523,8 +1523,11 @@ Factor1: } else if len(escape) == 0 { escape = "\\" } - $$ = &expression.PatternLike{Expr: $1.(expression.Expression), Pattern: $4.(expression.Expression), - Not: $2.(bool), Escape: escape[0]} + $$ = &expression.PatternLike{ + Expr: $1.(expression.Expression), + Pattern: $4.(expression.Expression), + Not: $2.(bool), + Escape: escape[0]} } | PrimaryFactor NotOpt RegexpSym PrimaryExpression {