From f70c8a3ba4e51d4f74c8f29979f2ef42799bb188 Mon Sep 17 00:00:00 2001 From: RoeiDimi Date: Wed, 31 Jan 2024 21:23:29 +0200 Subject: [PATCH] Header support for json_array_parser (#30814) **Description:** Adding a feature following https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/30644. This feature allow json_array_parser parser to accept a comma-delimited header and for every json array it parses, output a map which contains the header fileds as keys and the matching values are the ones parsed from the input json array. This feature as added mainly for performance reasons as from a functional POV, this is mostly similar to chaining the 2 operators: `json_array_parser -> assign_keys ` **Link to tracking Issue:** https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/30321 **Testing:** - unittests - End to end tests Used generated traffic on a running otel collector thats using the parser and verified the data is as expected in the end table and performance looks good **Documentation:** - [json_array_parser.md](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/58cc91ca30eabbd35c074d79db8630fc474164d9/pkg/stanza/docs/operators/json_array_parser.md) --- .chloggen/add_jarray_parser_header.yaml | 16 +++ .../docs/operators/json_array_parser.md | 43 +++++- .../operator/parser/jsonarray/config_test.go | 9 ++ .../parser/jsonarray/json_array_parser.go | 123 +++++++++++++----- .../jsonarray/json_array_parser_test.go | 37 ++++++ .../parser/jsonarray/testdata/config.yaml | 4 + 6 files changed, 198 insertions(+), 34 deletions(-) create mode 100644 .chloggen/add_jarray_parser_header.yaml diff --git a/.chloggen/add_jarray_parser_header.yaml b/.chloggen/add_jarray_parser_header.yaml new file mode 100644 index 000000000000..9294715ad968 --- /dev/null +++ b/.chloggen/add_jarray_parser_header.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/stanza + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add support in a header configuration for json array parser. + +# One or more tracking issues related to the change +issues: [30321] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | diff --git a/pkg/stanza/docs/operators/json_array_parser.md b/pkg/stanza/docs/operators/json_array_parser.md index 39c980ee33d3..734fed54b560 100644 --- a/pkg/stanza/docs/operators/json_array_parser.md +++ b/pkg/stanza/docs/operators/json_array_parser.md @@ -47,8 +47,9 @@ More information on json arrays can be found [here](https://json-schema.org/unde |--------------------|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------| | `id` | `json_array_parser` | A unique identifier for the operator. | | `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. | +| `header` | optional | A string of comma delimited field names. When a header is set, the output will be a map containing the header fields as keys and the parsed input json array fields as matching values | | `parse_from` | `body` | The [field](../types/field.md) from which the value will be parsed. | -| `parse_to` | required. can be one of `body` or a nested field inside `body`, `attributes` or `resource` (ie `attributes.parsed`) | The [field](../types/field.md) to which the value will be parsed. | +| `parse_to` | required. can be one of `body` or a nested field inside `body`, `attributes` or `resource` (ie `attributes.parsed`). When a header is used, `attributes` is also valid | The [field](../types/field.md) to which the value will be parsed. | | `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](../types/on_error.md). | | `timestamp` | `nil` | An optional [timestamp](../types/timestamp.md) block which will parse a timestamp field before passing the entry to the output operator. | | `severity` | `nil` | An optional [severity](../types/severity.md) block which will parse a severity field before passing the entry to the output operator. | @@ -124,6 +125,46 @@ Configuration: } ``` + + + + +#### Parse the field `body` with a json array parser and a header into attributes + +Configuration: + +```yaml +- type: json_array_parser + parse_to: attributes + header: origin,sev,message,isBool +``` + + + + + +
Input Entry Output Entry
+ +```json +{ + "body": "[1,\"debug\",\"Debug Message\", true]" +} +``` + + + +```json +{ + "body": "[1,\"debug\",\"Debug Message\", true]", + "attributes": { + "origin": 1, + "sev": "debug", + "message": "Debug Message", + "isBool": true, + } +} +``` +
\ No newline at end of file diff --git a/pkg/stanza/operator/parser/jsonarray/config_test.go b/pkg/stanza/operator/parser/jsonarray/config_test.go index 8a9af6933d70..59352ef3761c 100644 --- a/pkg/stanza/operator/parser/jsonarray/config_test.go +++ b/pkg/stanza/operator/parser/jsonarray/config_test.go @@ -53,6 +53,15 @@ func TestConfig(t *testing.T) { return p }(), }, + { + Name: "parse_with_header_as_attributes", + Expect: func() *Config { + p := NewConfig() + p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()} + p.Header = "A,B,C" + return p + }(), + }, }, }.Run(t) } diff --git a/pkg/stanza/operator/parser/jsonarray/json_array_parser.go b/pkg/stanza/operator/parser/jsonarray/json_array_parser.go index 2e687eb5ff46..74465559eef6 100644 --- a/pkg/stanza/operator/parser/jsonarray/json_array_parser.go +++ b/pkg/stanza/operator/parser/jsonarray/json_array_parser.go @@ -6,6 +6,7 @@ import ( "context" "errors" "fmt" + "strings" "github.com/valyala/fastjson" "go.opentelemetry.io/collector/featuregate" @@ -17,6 +18,7 @@ import ( ) const operatorType = "json_array_parser" +const headerDelimiter = "," var jsonArrayParserFeatureGate = featuregate.GlobalRegistry().MustRegister( "logs.jsonParserArray", @@ -46,6 +48,7 @@ func NewConfigWithID(operatorID string) *Config { // Config is the configuration of a json array parser operator. type Config struct { helper.ParserConfig `mapstructure:",squash"` + Header string `mapstructure:"header"` } // Build will build a json array parser operator. @@ -55,59 +58,113 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) { return nil, err } + if c.Header != "" { + return &Parser{ + ParserOperator: parserOperator, + parse: generateParseToMapFunc(new(fastjson.ParserPool), strings.Split(c.Header, headerDelimiter)), + }, nil + } + return &Parser{ ParserOperator: parserOperator, - pool: new(fastjson.ParserPool), + parse: generateParseToArrayFunc(new(fastjson.ParserPool)), }, nil } // Parser is an operator that parses json array in an entry. type Parser struct { helper.ParserOperator - pool *fastjson.ParserPool + parse parseFunc } +type parseFunc func(any) (any, error) + // Process will parse an entry for json array. func (r *Parser) Process(ctx context.Context, e *entry.Entry) error { return r.ParserOperator.ProcessWith(ctx, e, r.parse) } -func (r *Parser) parse(value any) (any, error) { - jArrayLine, err := valueAsString(value) - if err != nil { - return nil, err - } +func generateParseToArrayFunc(pool *fastjson.ParserPool) parseFunc { + return func(value any) (any, error) { + jArrayLine, err := valueAsString(value) + if err != nil { + return nil, err + } - p := r.pool.Get() - v, err := p.Parse(jArrayLine) - r.pool.Put(p) - if err != nil { - return nil, errors.New("failed to parse entry") - } + p := pool.Get() + v, err := p.Parse(jArrayLine) + pool.Put(p) + if err != nil { + return nil, errors.New("failed to parse entry") + } - jArray := v.GetArray() // a is a []*Value slice - parsedValues := make([]any, len(jArray)) - for i := range jArray { - switch jArray[i].Type() { - case fastjson.TypeNumber: - parsedValues[i] = jArray[i].GetInt64() - case fastjson.TypeString: - parsedValues[i] = string(jArray[i].GetStringBytes()) - case fastjson.TypeTrue: - parsedValues[i] = true - case fastjson.TypeFalse: - parsedValues[i] = false - case fastjson.TypeNull: - parsedValues[i] = nil - case fastjson.TypeObject: - // Nested objects handled as a string since this parser doesn't support nested headers - parsedValues[i] = jArray[i].String() - default: - return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil))) + jArray := v.GetArray() // a is a []*Value slice + parsedValues := make([]any, len(jArray)) + for i := range jArray { + switch jArray[i].Type() { + case fastjson.TypeNumber: + parsedValues[i] = jArray[i].GetInt64() + case fastjson.TypeString: + parsedValues[i] = string(jArray[i].GetStringBytes()) + case fastjson.TypeTrue: + parsedValues[i] = true + case fastjson.TypeFalse: + parsedValues[i] = false + case fastjson.TypeNull: + parsedValues[i] = nil + case fastjson.TypeObject: + // Nested objects handled as a string since this parser doesn't support nested headers + parsedValues[i] = jArray[i].String() + default: + return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil))) + } } + + return parsedValues, nil } +} + +func generateParseToMapFunc(pool *fastjson.ParserPool, header []string) parseFunc { + return func(value any) (any, error) { + jArrayLine, err := valueAsString(value) + if err != nil { + return nil, err + } + + p := pool.Get() + v, err := p.Parse(jArrayLine) + pool.Put(p) + if err != nil { + return nil, errors.New("failed to parse entry") + } - return parsedValues, nil + jArray := v.GetArray() // a is a []*Value slice + if len(header) != len(jArray) { + return nil, fmt.Errorf("wrong number of fields: expected %d, found %d", len(header), len(jArray)) + } + parsedValues := make(map[string]any, len(jArray)) + for i := range jArray { + switch jArray[i].Type() { + case fastjson.TypeNumber: + parsedValues[header[i]] = jArray[i].GetInt64() + case fastjson.TypeString: + parsedValues[header[i]] = string(jArray[i].GetStringBytes()) + case fastjson.TypeTrue: + parsedValues[header[i]] = true + case fastjson.TypeFalse: + parsedValues[header[i]] = false + case fastjson.TypeNull: + parsedValues[header[i]] = nil + case fastjson.TypeObject: + // Nested objects handled as a string since this parser doesn't support nested headers + parsedValues[header[i]] = jArray[i].String() + default: + return nil, errors.New("failed to parse entry: " + string(jArray[i].MarshalTo(nil))) + } + } + + return parsedValues, nil + } } // valueAsString interprets the given value as a string. diff --git a/pkg/stanza/operator/parser/jsonarray/json_array_parser_test.go b/pkg/stanza/operator/parser/jsonarray/json_array_parser_test.go index 7536e2689a8d..61cdabe59c3e 100644 --- a/pkg/stanza/operator/parser/jsonarray/json_array_parser_test.go +++ b/pkg/stanza/operator/parser/jsonarray/json_array_parser_test.go @@ -36,6 +36,17 @@ func TestParserInvalidType(t *testing.T) { require.Contains(t, err.Error(), "type '[]int' cannot be parsed as json array") } +func TestParserByteFailureHeadersMismatch(t *testing.T) { + cfg := NewConfigWithID("test") + cfg.Header = "name,sev,msg" + op, err := cfg.Build(testutil.Logger(t)) + require.NoError(t, err) + parser := op.(*Parser) + _, err = parser.parse("[\"stanza\",\"INFO\",\"started agent\", 42, true]") + require.Error(t, err) + require.Contains(t, err.Error(), "wrong number of fields: expected 3, found 5") +} + func TestParserJarray(t *testing.T) { cases := []struct { name string @@ -193,6 +204,32 @@ func TestParserJarray(t *testing.T) { false, false, }, + { + "parse-as-attributes-with-header", + func(p *Config) { + p.ParseTo = entry.RootableField{Field: entry.NewAttributeField()} + p.Header = "origin,sev,message,count,isBool" + }, + []entry.Entry{ + { + Body: "[\"stanza\",\"INFO\",\"started agent\", 42, true]", + }, + }, + []entry.Entry{ + { + Body: "[\"stanza\",\"INFO\",\"started agent\", 42, true]", + Attributes: map[string]any{ + "origin": "stanza", + "sev": "INFO", + "message": "started agent", + "count": int64(42), + "isBool": true, + }, + }, + }, + false, + false, + }, } for _, tc := range cases { diff --git a/pkg/stanza/operator/parser/jsonarray/testdata/config.yaml b/pkg/stanza/operator/parser/jsonarray/testdata/config.yaml index 479a5a1fa436..f50f22b40c59 100644 --- a/pkg/stanza/operator/parser/jsonarray/testdata/config.yaml +++ b/pkg/stanza/operator/parser/jsonarray/testdata/config.yaml @@ -12,3 +12,7 @@ parse_to_body: parse_to_resource: type: json_array_parser parse_to: resource.output +parse_with_header_as_attributes: + type: json_array_parser + parse_to: attributes + header: A,B,C \ No newline at end of file