diff --git a/pkg/grok/grok.go b/pkg/grok/grok.go index b53f57ee52..7316630f01 100644 --- a/pkg/grok/grok.go +++ b/pkg/grok/grok.go @@ -161,9 +161,10 @@ func (h Host) Compile(expr string) (*Pattern, error) { type Pattern struct { *regexp.Regexp - s map[string]int - order map[int]string - cache []string + s map[string]int + order map[int]string + keyCache []string + valCache []string } // Parse returns a map of matches on the input. The map can be empty. @@ -179,19 +180,24 @@ func (p *Pattern) Parse(input string) map[string]string { return r } -func (p *Pattern) ParseValues(input string) []string { +func (p *Pattern) ParseKeyValues(input string) ([]string, []string) { a := p.FindStringSubmatchIndex(input) if a == nil { - return nil + return nil, nil } - p.cache = p.cache[:0] - for i := 0; len(p.cache) < len(p.s); i++ { + p.valCache = p.valCache[:0] + p.keyCache = p.keyCache[:0] + for i := 0; i < len(a)/2; i++ { if _, ok := p.order[i]; !ok { continue } - p.cache = append(p.cache, input[a[i*2]:a[i*2+1]]) + if a[i*2] == -1 { + continue + } + p.keyCache = append(p.keyCache, p.order[i]) + p.valCache = append(p.valCache, input[a[i*2]:a[i*2+1]]) } - return p.cache + return p.keyCache, p.valCache } // Names returns all names that this pattern has in order. diff --git a/pkg/grok/pattern_test.go b/pkg/grok/pattern_test.go index e3fdeec271..a3c07b85cf 100644 --- a/pkg/grok/pattern_test.go +++ b/pkg/grok/pattern_test.go @@ -53,11 +53,11 @@ func TestPattern_Names(t *testing.T) { require.Equal(t, []string{"zero", "three", "one", "two"}, p.Names()) } -func TestPattern_ParseValues(t *testing.T) { +func TestPattern_ParseKeyValues(t *testing.T) { h := NewBase() p, err := h.Compile("%{TIMESTAMP_ISO8601:event_time} %{LOGLEVEL:log_level} %{GREEDYDATA:log_message}") require.NoError(t, err) - ss := p.ParseValues("2020-09-16T04:20:42.45+01:00 DEBUG This is a sample debug log message") + _, ss := p.ParseKeyValues("2020-09-16T04:20:42.45+01:00 DEBUG This is a sample debug log message") require.Equal(t, []string{"2020-09-16T04:20:42.45+01:00", "DEBUG", "This is a sample debug log message"}, ss) } @@ -76,3 +76,12 @@ func TestPattern_NamesNested(t *testing.T) { require.NoError(t, err) require.Equal(t, []string{"num.one", "[num][two]"}, p.Names()) } + +func TestPattern_OptionalValues(t *testing.T) { + h := NewBase() + p, err := h.Compile("(%{INT:a}|%{INT:b})") + require.NoError(t, err) + keys, values := p.ParseKeyValues("1") + require.Equal(t, []string{"a"}, keys) + require.Equal(t, []string{"1"}, values) +} diff --git a/runtime/sam/expr/function/grok.go b/runtime/sam/expr/function/grok.go index d4e89cd7e8..b32fdcf3da 100644 --- a/runtime/sam/expr/function/grok.go +++ b/runtime/sam/expr/function/grok.go @@ -1,6 +1,7 @@ package function import ( + "errors" "strings" "github.com/brimdata/zed" @@ -21,10 +22,10 @@ func newGrok(zctx *zed.Context) *Grok { } } -func (g *Grok) Call(_ zed.Allocator, vals []zed.Value) zed.Value { - patternArg, inputArg, defArg := vals[0], vals[1], zed.NullString - if len(vals) == 3 { - defArg = vals[2] +func (g *Grok) Call(_ zed.Allocator, args []zed.Value) zed.Value { + patternArg, inputArg, defArg := args[0], args[1], zed.NullString + if len(args) == 3 { + defArg = args[2] } switch { case zed.TypeUnder(defArg.Type()) != zed.TypeString: @@ -42,15 +43,11 @@ func (g *Grok) Call(_ zed.Allocator, vals []zed.Value) zed.Value { if err != nil { return g.error(err.Error(), patternArg) } - ss := p.ParseValues(inputArg.AsString()) - if ss == nil { - return g.error("value does not match pattern", inputArg) - } - g.builder.Reset() - for _, s := range ss { - g.builder.Append([]byte(s)) + val, err := p.parse(g.zctx, &g.builder, inputArg.AsString()) + if err != nil { + return g.error(err.Error(), inputArg) } - return zed.NewValue(p.typ, g.builder.Bytes()) + return val } func (g *Grok) error(msg string, val zed.Value) zed.Value { @@ -97,5 +94,23 @@ func (h *host) getPattern(zctx *zed.Context, patternArg string) (*pattern, error type pattern struct { *grok.Pattern - typ zed.Type + fields []zed.Field + typ zed.Type +} + +func (p *pattern) parse(zctx *zed.Context, b *zcode.Builder, input string) (zed.Value, error) { + keys, vals := p.ParseKeyValues(input) + if vals == nil { + return zed.Null, errors.New("value does not match pattern") + } + p.fields = p.fields[:0] + for _, key := range keys { + p.fields = append(p.fields, zed.NewField(key, zed.TypeString)) + } + typ := zctx.MustLookupTypeRecord(p.fields) + b.Reset() + for _, s := range vals { + b.Append([]byte(s)) + } + return zed.NewValue(typ, b.Bytes()), nil }