diff --git a/docs/build.gradle b/docs/build.gradle index 94631365529ec..47da3df34290a 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -144,23 +144,28 @@ Closure setupTwitter = { String name, int count -> type: date likes: type: long + location: + properties: + city: + type: keyword + country: + type: keyword - do: bulk: index: twitter refresh: true body: |''' for (int i = 0; i < count; i++) { - String user, text + String body if (i == 0) { - user = 'kimchy' - text = 'trying out Elasticsearch' + body = """{"user": "kimchy", "message": "trying out Elasticsearch", "date": "2009-11-15T14:12:12", "likes": 0, + "location": { "city": "Amsterdam", "country": "Netherlands" }}""" } else { - user = 'test' - text = "some message with the number $i" + body = """{"user": "test", "message": "some message with the number $i", "date": "2009-11-15T14:12:12", "likes": $i}""" } buildRestTests.setups[name] += """ {"index":{"_id": "$i"}} - {"user": "$user", "message": "$text", "date": "2009-11-15T14:12:12", "likes": $i}""" + $body""" } } setupTwitter('twitter', 5) diff --git a/docs/reference/aggregations/misc.asciidoc b/docs/reference/aggregations/misc.asciidoc index 9c0e6206f0a67..28d0df30cd537 100644 --- a/docs/reference/aggregations/misc.asciidoc +++ b/docs/reference/aggregations/misc.asciidoc @@ -105,7 +105,8 @@ GET /twitter/_search?typed_keys "aggregations": { "top_users": { "top_hits": { - "size": 1 + "size": 1, + "_source": ["user", "likes", "message"] } } } @@ -133,7 +134,7 @@ In the response, the aggregations names will be changed to respectively `date_hi "total": { "value": 5, "relation": "eq" - }, + }, "max_score": 1.0, "hits": [ { @@ -141,9 +142,8 @@ In the response, the aggregations names will be changed to respectively `date_hi "_id": "0", "_score": 1.0, "_source": { - "date": "2009-11-15T14:12:12", - "message": "trying out Elasticsearch", "user": "kimchy", + "message": "trying out Elasticsearch", "likes": 0 } } @@ -167,12 +167,12 @@ request. This is the case for Terms, Significant Terms and Percentiles aggregati also contains information about the type of the targeted field: `lterms` (for a terms aggregation on a Long field), `sigsterms` (for a significant terms aggregation on a String field), `tdigest_percentiles` (for a percentile aggregation based on the TDigest algorithm). - + [[indexing-aggregation-results]] == Indexing aggregation results with {transforms} - -<> enable you to convert existing {es} indices -into summarized indices, which provide opportunities for new insights and -analytics. You can use {transforms} to persistently index your aggregation + +<> enable you to convert existing {es} indices +into summarized indices, which provide opportunities for new insights and +analytics. You can use {transforms} to persistently index your aggregation results into entity-centric indices. diff --git a/docs/reference/docs/get.asciidoc b/docs/reference/docs/get.asciidoc index b0c813c5c01f9..9cdcbac308829 100644 --- a/docs/reference/docs/get.asciidoc +++ b/docs/reference/docs/get.asciidoc @@ -241,7 +241,11 @@ The API returns the following result: "user": "kimchy", "date": "2009-11-15T14:12:12", "likes": 0, - "message": "trying out Elasticsearch" + "message": "trying out Elasticsearch", + "location": { + "city": "Amsterdam", + "country": "Netherlands" + } } } -------------------------------------------------- diff --git a/docs/reference/modules/cross-cluster-search.asciidoc b/docs/reference/modules/cross-cluster-search.asciidoc index f79a62a4fcf14..1c2e1f403f1c1 100644 --- a/docs/reference/modules/cross-cluster-search.asciidoc +++ b/docs/reference/modules/cross-cluster-search.asciidoc @@ -76,7 +76,8 @@ GET /cluster_one:twitter/_search "match": { "user": "kimchy" } - } + }, + "_source": ["user", "message", "likes"] } -------------------------------------------------- // TEST[continued] @@ -113,7 +114,6 @@ The API returns the following response: "_score": 1, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } @@ -147,7 +147,8 @@ GET /twitter,cluster_one:twitter,cluster_two:twitter/_search "match": { "user": "kimchy" } - } + }, + "_source": ["user", "message", "likes"] } -------------------------------------------------- // TEST[continued] @@ -184,7 +185,6 @@ The API returns the following response: "_score": 2, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } @@ -195,7 +195,6 @@ The API returns the following response: "_score": 1, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } @@ -206,7 +205,6 @@ The API returns the following response: "_score": 1, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } diff --git a/docs/reference/search/search-fields.asciidoc b/docs/reference/search/search-fields.asciidoc index 20605c48e3206..5354c74f4c3bd 100644 --- a/docs/reference/search/search-fields.asciidoc +++ b/docs/reference/search/search-fields.asciidoc @@ -4,33 +4,211 @@ By default, each hit in the search response includes the document <>, which is the entire JSON object that was -provided when indexing the document. If you only need certain source fields in -the search response, you can use the <> to -restrict what parts of the source are returned. +provided when indexing the document. To retrieve specific fields in the search +response, you can use the `fields` parameter: -Returning fields using only the document source has some limitations: +[source,console] +---- +POST twitter/_search +{ + "query": { + "match": { + "message": "elasticsearch" + } + }, + "fields": ["user", "date"], + "_source": false +} +---- +// TEST[setup:twitter] -* The `_source` field does not include <> or -<>. Likewise, a field in the source does not contain -values copied using the <> mapping parameter. -* Since the `_source` is stored as a single field in Lucene, the whole source -object must be loaded and parsed, even if only a small number of fields are -needed. +The `fields` parameter consults both a document's `_source` and the index +mappings to load and return values. Because it makes use of the mappings, +`fields` has some advantages over referencing the `_source` directly: it +accepts <> and <>, and +also formats field values like dates in a consistent way. -To avoid these limitations, you can: +A document's `_source` is stored as a single field in Lucene. So the whole +`_source` object must be loaded and parsed even if only a small number of +fields are requested. To avoid this limitation, you can try another option for +loading fields: * Use the <> parameter to get values for selected fields. This can be a good choice when returning a fairly small number of fields that support doc values, such as keywords and dates. -* Use the <> parameter to get the values for specific stored fields. (Fields that use the <> mapping option.) +* Use the <> parameter to +get the values for specific stored fields (fields that use the +<> mapping option). -You can find more detailed information on each of these methods in the +You can find more detailed information on each of these methods in the following sections: -* <> +* <> * <> * <> +* <> + +[discrete] +[[search-fields-param]] +=== Fields + +The `fields` parameter allows for retrieving a list of document fields in +the search response. It consults both the document `_source` and the index +mappings to return each value in a standardized way that matches its mapping +type. By default, date fields are formatted according to the +<> parameter in their mappings. + +The following search request uses the `fields` parameter to retrieve values +for the `user` field, all fields starting with `location.`, and the +`date` field: + +[source,console] +---- +POST twitter/_search +{ + "query": { + "match": { + "message": "elasticsearch" + } + }, + "fields": [ + "user", + "location.*", <1> + { + "field": "date", + "format": "epoch_millis" <2> + } + ], + "_source": false +} +---- +// TEST[continued] + +<1> Both full field names and wildcard patterns are accepted. +<2> Using object notation, you can pass a `format` parameter to apply a custom + format for the field's values. This is currently supported for + <> and <>, which + accept a <>. + +The values are returned as a flat list in the `fields` section in each hit: + +[source,console-result] +---- +{ + "took" : 2, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "twitter", + "_id" : "0", + "_score" : 1.0, + "fields" : { + "user" : [ + "kimchy" + ], + "date" : [ + "1258294332000" + ], + "location.city": [ + "Amsterdam" + ], + "location.country": [ + "Netherlands" + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took" : 2/"took": $body.took/] +// TESTRESPONSE[s/"max_score" : 1.0/"max_score" : $body.hits.max_score/] +// TESTRESPONSE[s/"_score" : 1.0/"_score" : $body.hits.hits.0._score/] + +Only leaf fields are returned -- `fields` does not allow for fetching entire +objects. + +The `fields` parameter handles field types like <> and +<> whose values aren't always present in +the `_source`. Other mapping options are also respected, including +<>, <> and +<>. + +[discrete] +[[docvalue-fields]] +=== Doc value fields + +You can use the <> parameter to return +<> for one or more fields in the search response. + +Doc values store the same values as the `_source` but in an on-disk, +column-based structure that's optimized for sorting and aggregations. Since each +field is stored separately, {es} only reads the field values that were requested +and can avoid loading the whole document `_source`. + +Doc values are stored for supported fields by default. However, doc values are +not supported for <> or +{plugins}/mapper-annotated-text-usage.html[`text_annotated`] fields. + +The following search request uses the `docvalue_fields` parameter to retrieve +doc values for the `user` field, all fields starting with `location.`, and the +`date` field: + +[source,console] +---- +GET twitter/_search +{ + "query": { + "match": { + "message": "elasticsearch" + } + }, + "docvalue_fields": [ + "user", + "location.*", <1> + { + "field": "date", + "format": "epoch_millis" <2> + } + ] +} +---- +// TEST[continued] + +<1> Both full field names and wildcard patterns are accepted. +<2> Using object notation, you can pass a `format` parameter to apply a custom + format for the field's doc values. <> support a + <>. <> support a + https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html[DecimalFormat + pattern]. Other field datatypes do not support the `format` parameter. + +TIP: You cannot use the `docvalue_fields` parameter to retrieve doc values for +nested objects. If you specify a nested object, the search returns an empty +array (`[ ]`) for the field. To access nested fields, use the +<> parameter's `docvalue_fields` +property. + +[discrete] +[[stored-fields]] +=== Stored fields + +It's also possible to store an individual field's values by using the +<> mapping option. You can use the +<> parameter to include +these stored values in the search response. [discrete] [[source-filtering]] @@ -117,71 +295,3 @@ GET /_search } } ---- - - -[discrete] -[[docvalue-fields]] -=== Doc value fields - -You can use the <> parameter to return -<> for one or more fields in the search response. - -Doc values store the same values as the `_source` but in an on-disk, -column-based structure that's optimized for sorting and aggregations. Since each -field is stored separately, {es} only reads the field values that were requested -and can avoid loading the whole document `_source`. - -Doc values are stored for supported fields by default. However, doc values are -not supported for <> or -{plugins}/mapper-annotated-text-usage.html[`text_annotated`] fields. - -The following search request uses the `docvalue_fields` parameter to -retrieve doc values for the following fields: - -* Fields with names starting with `my_ip` -* `my_keyword_field` -* Fields with names ending with `_date_field` - -[source,console] ----- -GET /_search -{ - "query": { - "match_all": {} - }, - "docvalue_fields": [ - "my_ip*", <1> - { - "field": "my_keyword_field" <2> - }, - { - "field": "*_date_field", - "format": "epoch_millis" <3> - } - ] -} ----- - -<1> Wildcard patten used to match field names, specified as a string. -<2> Wildcard patten used to match field names, specified as an object. -<3> With the object notation, you can use the `format` parameter to specify a - format for the field's returned doc values. <> support a - <>. <> support a - https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html[DecimalFormat - pattern]. Other field data types do not support the `format` parameter. - -TIP: You cannot use the `docvalue_fields` parameter to retrieve doc values for -nested objects. If you specify a nested object, the search returns an empty -array (`[ ]`) for the field. To access nested fields, use the -<> parameter's `docvalue_fields` -property. - - -[discrete] -[[stored-fields]] -=== Stored fields - -It's also possible to store an individual field's values by using the -<> mapping option. You can use the -<> parameter to include -these stored values in the search response.