Skip to content

Commit

Permalink
libbeat: add support for defining analyzers in-line in fields.yml fil…
Browse files Browse the repository at this point in the history
…es (#28926) (#28981)

(cherry picked from commit 62ec678)

Co-authored-by: Dan Kortschak <[email protected]>
Co-authored-by: Dan Kortschak <[email protected]>
  • Loading branch information
3 people authored Nov 16, 2021
1 parent f3f2b24 commit 633ac3f
Show file tree
Hide file tree
Showing 10 changed files with 384 additions and 87 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Add options to configure k8s client qps/burst. {pull}28151[28151]
- Update to ECS 8.0 fields. {pull}28620[28620]
- Add http.pprof.enabled option to libbeat to allow http/pprof endpoints on the socket that libbeat creates for metrics. {issue}21965[21965]
- Support custom analyzers in fields.yml. {issue}28540[28540] {pull}28926[28926]

*Auditbeat*

Expand Down
40 changes: 40 additions & 0 deletions docs/devguide/fields-yml.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,43 @@ use in aggregations or ordering, you can use a multi-field mapping:

For more information, see the {ref}/multi-fields.html[{es} documentation about
multi-fields].

==== Defining a text analyzer in-line

It is possible to define a new text analyzer or search analyzer in-line with
the field definition in the field's mapping parameters.

For example, you can define a new text analyzer that does not break hyphenated names:

[source,yaml]
----------------------------------------------------------------------
- key: mybeat
title: mybeat
description: These are the fields used by mybeat.
fields:
- name: last_name
type: text
required: true
description: >
The last name.
analyzer:
mybeat_hyphenated_name: <1>
type: pattern <2>
pattern: "[\\W&&[^-]]+" <3>
search_analyzer:
mybeat_hyphenated_name: <4>
type: pattern
pattern: "[\\W&&[^-]]+"
----------------------------------------------------------------------
<1> Use a newly defined text analyzer
<2> Define the custome analyzer type
<3> Specify the analyzer behaviour
<4> Use the same analyzer for the search

The names of custom analyzers that are defined in-line may not be reused for a different
text analyzer. If a text analyzer name is reused it is checked for matching existing
instances of the analyzer. It is recommended that the analyzer name is prefixed with the
beat name to avoid name clashes.

For more information, see {ref}/analysis-custom-analyzer.html[{es} documentation about
defining custom text analyzers].
46 changes: 36 additions & 10 deletions libbeat/mapping/field.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ import (
"github.com/joeshaw/multierror"
"github.com/pkg/errors"

"github.com/elastic/beats/v7/libbeat/common"
"github.com/elastic/go-ucfg/yaml"
)

//This reflects allowed attributes for field definitions in the fields.yml.
//No logic is put into this data structure.
//The purpose is to enable using different kinds of transformation, on top of the same data structure.
//Current transformation:
// This reflects allowed attributes for field definitions in the fields.yml.
// No logic is put into this data structure.
// The purpose is to enable using different kinds of transformation, on top of the same data structure.
// Current transformation:
// -ElasticSearch Template
// -Kibana Index Pattern

Expand All @@ -44,8 +45,8 @@ type Field struct {
Fields Fields `config:"fields"`
MultiFields Fields `config:"multi_fields"`
Enabled *bool `config:"enabled"`
Analyzer string `config:"analyzer"`
SearchAnalyzer string `config:"search_analyzer"`
Analyzer Analyzer `config:"analyzer"`
SearchAnalyzer Analyzer `config:"search_analyzer"`
Norms bool `config:"norms"`
Dynamic DynamicType `config:"dynamic"`
Index *bool `config:"index"`
Expand Down Expand Up @@ -125,6 +126,35 @@ func (d *DynamicType) Unpack(s string) error {
return nil
}

type Analyzer struct {
Name string
Definition interface{}
}

func (a *Analyzer) Unpack(v interface{}) error {
var m common.MapStr
switch v := v.(type) {
case string:
a.Name = v
return nil
case common.MapStr:
m = v
case map[string]interface{}:
m = common.MapStr(v)
default:
return fmt.Errorf("'%v' is invalid analyzer setting", v)
}

if len(m) != 1 {
return fmt.Errorf("'%v' is invalid analyzer setting", v)
}
for a.Name, a.Definition = range m {
break
}

return nil
}

// Validate ensures objectTypeParams are not mixed with top level objectType configuration
func (f *Field) Validate() error {
if err := f.validateType(); err != nil {
Expand Down Expand Up @@ -264,7 +294,6 @@ func (f Fields) HasKey(key string) bool {
func (f Fields) GetField(key string) *Field {
keys := strings.Split(key, ".")
return f.getField(keys)

}

// HasNode checks if inside fields the given node exists
Expand All @@ -276,7 +305,6 @@ func (f Fields) HasNode(key string) bool {
}

func (f Fields) hasNode(keys []string) bool {

// Nothing to compare, so does not contain it
if len(keys) == 0 {
return false
Expand All @@ -286,7 +314,6 @@ func (f Fields) hasNode(keys []string) bool {
keys = keys[1:]

for _, field := range f {

if field.Name == key {

//// It's the last key to compare
Expand Down Expand Up @@ -373,7 +400,6 @@ func (f Fields) GetKeys() []string {
}

func (f Fields) getKeys(namespace string) []string {

var keys []string

for _, field := range f {
Expand Down
47 changes: 45 additions & 2 deletions libbeat/mapping/field_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package mapping

import (
"fmt"
"strings"
"testing"

Expand Down Expand Up @@ -58,7 +59,8 @@ func TestFieldsHasNode(t *testing.T) {
Field{Name: "a", Fields: Fields{
Field{Name: "b", Fields: Fields{
Field{Name: "c"},
}}}},
}},
}},
},
hasNode: true,
},
Expand All @@ -68,7 +70,8 @@ func TestFieldsHasNode(t *testing.T) {
Field{Name: "a", Fields: Fields{
Field{Name: "b", Fields: Fields{
Field{Name: "c"},
}}}},
}},
}},
},
hasNode: true,
},
Expand Down Expand Up @@ -185,6 +188,46 @@ func TestDynamicYaml(t *testing.T) {
}
}

func TestAnalyzer(t *testing.T) {
tests := map[string]struct {
input []byte
output Field
err error
}{
"simple analyzer": {
input: []byte(`{name: test, analyzer: simple}`),
output: Field{
Name: "test",
Analyzer: Analyzer{Name: "simple"},
},
err: nil,
},
"pattern analyzer": {
input: []byte(`{"name": "test", "analyzer": {"custom": {"type": "pattern", "pattern":"[\\W&&[^-]]+"}}}`),
output: Field{
Name: "test",
Analyzer: Analyzer{Name: "custom", Definition: map[string]interface{}{"type": "pattern", "pattern": "[\\W\u0026\u0026[^-]]+"}},
},
err: nil,
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
keys := Field{}

cfg, err := yaml.NewConfig(test.input)
assert.NoError(t, err)
err = cfg.Unpack(&keys)

if fmt.Sprint(err) != fmt.Sprint(test.err) {
t.Fatalf("unexpected error for %s: got:%v want:%v", name, err, test.err)
}
assert.Equal(t, test.output.Analyzer, keys.Analyzer)
})
}
}

func TestGetKeys(t *testing.T) {
tests := []struct {
fields Fields
Expand Down
Loading

0 comments on commit 633ac3f

Please sign in to comment.