Skip to content

Commit

Permalink
Review comments and edge cases
Browse files Browse the repository at this point in the history
- the `${}` parser handles escapes, but needs to preserve them for `#`/`%`
  - but `\}` needs to be de-escaped
- reversing strings need to handle escapes, i.e. `a\*c` -> `c\*a`
- build the regex with a scanner, not QuoteMeta+StringReplace
- add more complicated cases to the tests
  • Loading branch information
tstenner committed Oct 2, 2023
1 parent b2cd56f commit 40dfe83
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 48 deletions.
128 changes: 93 additions & 35 deletions frontend/dockerfile/shell/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,9 @@ func (sw *shellWord) processDollar() (string, error) {
chs += string(ch)
fallthrough
case '+', '-', '?', '#', '%':
if ch == '#' || ch == '%' {
sw.rawEscapes = true
}
word, _, err := sw.processStopOn('}')
if err != nil {
if sw.scanner.Peek() == scanner.EOF {
Expand Down Expand Up @@ -397,48 +400,18 @@ func (sw *shellWord) processDollar() (string, error) {
}
return value, nil
case '%', '#':
sw.rawEscapes = false
// %/# matches the shortest pattern expansion, %%/## the longest
match_longest := false
greedy := false
if word[0] == byte(ch) {
match_longest = true
greedy = true
word = word[1:]
}

// regular expressions can't handle finding the shortest rightmost
// string so we reverse both search space and pattern to convert it
// to a leftmost search in both cases
if ch == '%' {
value = bidi.ReverseString(value)
word = bidi.ReverseString(word)
}

// generate a regular expression for the provided pattern by escaping all
// meta characters first
pattern := "^" + regexp.QuoteMeta(word)
// \? (after escaping) should match a single character: .
pattern = strings.Replace(pattern, "\\?", ".", -1)
// \* (after escaping) should match the shortest (.*?) or longest (.*) substring
if match_longest {
pattern = strings.Replace(pattern, "\\*", ".*", -1)
} else {
pattern = strings.Replace(pattern, "\\*", ".*?", -1)
}

re, err := regexp.Compile(pattern)
if err != nil {
return "", errors.Errorf("invalid pattern (%s) in substitution", word)
}

idx := re.FindStringIndex(value)
if idx != nil {
value = value[idx[1]:]
}

if ch == '#' {
return value, nil
} else {
return bidi.ReverseString(value), nil
return trimSuffix(word, value, greedy)
}
return trimPrefix(word, value, greedy)
default:
return "", errors.Errorf("unsupported modifier (%s) in substitution", chs)
}
Expand Down Expand Up @@ -517,3 +490,88 @@ func BuildEnvs(env []string) map[string]string {

return envs
}

// convert a shell-like wildcard pattern (? is a single char, * either the
// shortest or longest (greedy) string) to an equivalent regular expression
func convertShellPatternToRegex(pattern string, greedy bool) (*regexp.Regexp, error) {
var s scanner.Scanner
s.Init(strings.NewReader(pattern))
out := strings.Builder{}
out.Grow(len(pattern) + 4)

// match only at the beginning of the string
out.WriteByte('^')

// default: non-greedy wildcards
starPattern := ".*?"
if greedy {
starPattern = ".*"
}

for tok := s.Next(); tok != scanner.EOF; tok = s.Next() {
switch tok {
case '*':
out.WriteString(starPattern)
continue
case '?':
out.WriteByte('.')
continue
case '\\':
// } as part of ${} needs to be escaped, but the escape isn't part
// of the pattern
if s.Peek() == '}' {
continue
}
out.WriteRune('\\')
tok = s.Next()
if tok != '*' && tok != '?' && tok != '\\' {
return nil, errors.Errorf("invalid escape '\\%c'", tok)
}
// regex characters that need to be escaped
case '[', '{', '.', '+', '(', ')', '|', '^', '$':
out.WriteByte('\\')
}
out.WriteRune(tok)
}
return regexp.Compile(out.String())
}

func trimPrefix(word, value string, greedy bool) (string, error) {
re, err := convertShellPatternToRegex(word, greedy)
if err != nil {
return "", errors.Errorf("invalid pattern (%s) in substitution: %s", word, err)
}

idx := re.FindStringIndex(value)
if idx != nil {
value = value[idx[1]:]
}
return value, nil
}

func trimSuffix(pattern, word string, greedy bool) (string, error) {
// regular expressions can't handle finding the shortest rightmost
// string so we reverse both search space and pattern to convert it
// to a leftmost search in both cases

// reverse without avoid reversing escapes, i.e. a\*c -> c\*a
patternBuilder := strings.Builder{}
patternBuilder.Grow(len(pattern))
patternRunes := []rune(pattern)
for i := len(patternRunes) - 1; i >= 0; i-- {
tok := patternRunes[i]
if i > 0 && patternRunes[i-1] == '\\' {
patternBuilder.WriteByte('\\')
i--
}
patternBuilder.WriteRune(tok)
}

pattern = patternBuilder.String()
word = bidi.ReverseString(word)
str, err := trimPrefix(pattern, word, greedy)
if err != nil {
return "", err
}
return bidi.ReverseString(str), nil
}
29 changes: 16 additions & 13 deletions frontend/dockerfile/shell/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,21 +359,24 @@ func TestProcessWithMatches(t *testing.T) {
expectedErr: true,
},
{
input: "${FOO%y*}",
envs: map[string]string{"FOO": "xxyy"},
expected: "xxy",
// special characters in regular expressions
// } needs to be escaped so it doesn't match the
// closing brace of ${}
input: "${FOO#()[]{\\}^$.\\*\\?|\\\\}",
envs: map[string]string{"FOO": "()[]{}^$.*?|\\x"},
expected: "x",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO%%y*}",
envs: map[string]string{"FOO": "xxyy"},
input: "${FOO%%\\**}",
envs: map[string]string{"FOO": "xx**"},
expected: "xx",
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO#*x}",
input: "${FOO#*x*y}",
envs: map[string]string{"FOO": "xxyy"},
expected: "xyy",
expected: "y",
matches: map[string]struct{}{"FOO": {}},
},
{
Expand All @@ -383,9 +386,9 @@ func TestProcessWithMatches(t *testing.T) {
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO#?}",
envs: map[string]string{"FOO": "xxyy"},
expected: "xyy",
input: "${FOO#?\\?}",
envs: map[string]string{"FOO": "???y"},
expected: "?y",
matches: map[string]struct{}{"FOO": {}},
},
{
Expand All @@ -395,9 +398,9 @@ func TestProcessWithMatches(t *testing.T) {
matches: map[string]struct{}{"FOO": {}},
},
{
input: "${FOO%%x*}",
envs: map[string]string{"FOO": "xxyy"},
expected: "",
input: "${FOO%%\\**\\*}",
envs: map[string]string{"FOO": "a***yy*"},
expected: "a",
matches: map[string]struct{}{"FOO": {}},
},
}
Expand Down

0 comments on commit 40dfe83

Please sign in to comment.