diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index f805fd49700..2abeafaff67 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -205,6 +205,7 @@ Setting environmental variable ELASTIC_NETINFO:false in Elastic Agent pod will d - Suppress and log max HTTP request retry errors in CEL input. {pull}37160[37160] - Prevent CEL input from re-entering the eval loop when an evaluation failed. {pull}37161[37161] - Update CEL extensions library to v1.7.0. {pull}37172[37172] +- Add support for complete URL replacement in HTTPJSON chain steps. {pull}37486[37486] *Auditbeat* diff --git a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc index 33154579a60..410edf9f948 100644 --- a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc @@ -1226,6 +1226,30 @@ request_url using file_name as 'file_2': \https://example.com/services/data/v1.0 + Collect and make events from response in any format supported by httpjson for all calls. ++ +Note that since `request.url` must be a valid URL, if an API returns complete URLs in place of an identifier as in the example above, it would not be possible to use the JSON Path syntax. To achieve the desired result in this case an opaque URI syntax can be used. An opaque URI has an arbitrary scheme and opaque text separated by a colon. When the replacement is done, the scheme and colon are stripped from the URI prior to the replacement and the remaining opaque text is used as the replacement target. In the following example, the scheme is "placeholder". + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: httpjson + enabled: true + # first call + request.url: https://example.com/services/data/v1.0/records + interval: 1h + chain: + # second call + - step: + request.url: placeholder:$.records[:] + request.method: GET + replace: $.records[:] + # third call + - step: + request.url: placeholder:$.file_name + request.method: GET + replace: $.file_name +---- + + [[chain-step-replace_with]] [float] @@ -1478,6 +1502,34 @@ response_json using id as '2': + Collect and make events from response in any format supported by httpjson for all calls. ++ +Note that since `request.url` must be a valid URL, if an API returns complete URLs in place of an identifier as in the example above, it would not be possible to use the JSON Path syntax. To achieve the desired result in this case an opaque URI syntax can be used. An opaque URI has an arbitrary scheme and opaque text separated by a colon. When the replacement is done, the scheme and colon are stripped from the URI prior to the replacement and the remaining opaque text is used as the replacement target. In the following example, the scheme is "placeholder". + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: httpjson + enabled: true + # first call + id: my-httpjson-id + request.url: http://example.com/services/data/v1.0/exports + interval: 1h + chain: + # second call + - while: + request.url: placeholder:$.exportId + request.method: GET + replace: $.exportId + until: '[[ eq .last_response.body.status "completed" ]]' + request.retry.max_attempts: 5 + request.retry.wait_min: 5s + # third call + - step: + request.url: placeholder:$.files[:] + request.method: GET + replace: $.files[:] +---- + NOTE: httpjson chain will only create and ingest events from last call on chained configurations. Also, the current chain only supports the following: all <>, <> and <>. [float] diff --git a/x-pack/filebeat/input/httpjson/input_test.go b/x-pack/filebeat/input/httpjson/input_test.go index de4cc3f11e6..498ccc86183 100644 --- a/x-pack/filebeat/input/httpjson/input_test.go +++ b/x-pack/filebeat/input/httpjson/input_test.go @@ -538,6 +538,25 @@ var testCases = []struct { handler: defaultHandler(http.MethodGet, "", ""), expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, }, + { + name: "simple_naked_Chain_GET_request", + setupServer: newNakedChainTestServer(httptest.NewServer), + baseConfig: map[string]interface{}{ + "interval": 10, + "request.method": http.MethodGet, + "chain": []interface{}{ + map[string]interface{}{ + "step": map[string]interface{}{ + "request.url": "placeholder:$.records[:]", + "request.method": http.MethodGet, + "replace": "$.records[:]", + }, + }, + }, + }, + handler: defaultHandler(http.MethodGet, "", ""), + expected: []string{`{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`}, + }, { name: "multiple_Chain_GET_request", setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { @@ -1419,6 +1438,25 @@ func newChainTestServer( } } +func newNakedChainTestServer( + newServer func(http.Handler) *httptest.Server, +) func(testing.TB, http.HandlerFunc, map[string]interface{}) { + return func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { + var server *httptest.Server + r := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/": + fmt.Fprintln(w, `{"records":["`+server.URL+`/1"]}`) + case "/1": + fmt.Fprintln(w, `{"hello":[{"world":"moon"},{"space":[{"cake":"pumpkin"}]}]}`) + } + }) + server = httptest.NewServer(r) + config["request.url"] = server.URL + t.Cleanup(server.Close) + } +} + func newChainPaginationTestServer( newServer func(http.Handler) *httptest.Server, ) func(testing.TB, http.HandlerFunc, map[string]interface{}) { diff --git a/x-pack/filebeat/input/httpjson/request.go b/x-pack/filebeat/input/httpjson/request.go index b161363dbe7..5612f2dc641 100644 --- a/x-pack/filebeat/input/httpjson/request.go +++ b/x-pack/filebeat/input/httpjson/request.go @@ -713,8 +713,25 @@ func (r *requester) processChainPaginationEvents(ctx context.Context, trCtx *tra return n, nil } -// generateNewUrl returns new url value using replacement from oldUrl with ids +// generateNewUrl returns new url value using replacement from oldUrl with ids. +// If oldUrl is an opaque URL, the scheme: is dropped and the remaining string +// is used as the replacement target. For example +// +// placeholder:$.result[:] +// +// becomes +// +// $.result[:] +// +// which is now the replacement target. func generateNewUrl(replacement, oldUrl, id string) (url.URL, error) { + u, err := url.Parse(oldUrl) + if err != nil { + return url.URL{}, err + } + if u.Opaque != "" { + oldUrl = u.Opaque + } newUrl, err := url.Parse(strings.Replace(oldUrl, replacement, id, 1)) if err != nil { return url.URL{}, fmt.Errorf("failed to replace value in url: %w", err)