From 0c62dcc748fe6e34d4a227f092ef2a1744f78eb3 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Tue, 16 Aug 2022 12:51:24 -0700 Subject: [PATCH 1/3] Add support for text fields using source directly in scripting --- .../test/painless/50_script_doc_values.yml | 267 ++++++++++++++++++ ...alueFetcherSortedBinaryIndexFieldData.java | 17 +- .../index/mapper/TextFieldMapper.java | 51 ++-- .../script/field/TextDocValuesField.java | 17 ++ 4 files changed, 328 insertions(+), 24 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml index dd2187673134a..979f0a1cdf7df 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml @@ -70,6 +70,11 @@ setup: type: scaled_float scaling_factor: 100 doc_values: false + text: + type: text + fielddata: true + text_no_field_data: + type: text token_count: type: token_count analyzer: standard @@ -110,6 +115,8 @@ setup: half_float_no_doc_values: 3.140625 scaled_float: 3.14 scaled_float_no_doc_values: 3.14 + text: "Lots of text." + text_no_field_data: "Lots of text." token_count: count all these words please - do: @@ -150,6 +157,8 @@ setup: half_float_no_doc_values: [2.234, 1.123] scaled_float: [-3.5, 2.5] scaled_float_no_doc_values: [2.5, -3.5] + text: ["Lots of text.", "even more text", "SOOOOO much text"] + text_no_field_data: ["Lots of text.", "even more text", "SOOOOO much text"] - do: @@ -2719,6 +2728,264 @@ setup: source: "int value = field('dne').get(1, 1); value" - match: { hits.hits.0.fields.field.0: 1 } +--- +"text": + - do: + search: + rest_total_hits_as_int: true + body: + query: { term: { _id: "1" } } + script_fields: + field: + script: + source: "doc['text'].get(0)" + - match: { hits.hits.0.fields.field.0: lots } + + - do: + search: + rest_total_hits_as_int: true + body: + query: { term: { _id: "1" } } + script_fields: + field: + script: + source: "doc['text'].value" + - match: { hits.hits.0.fields.field.0: lots } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('text').get('')" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "/* avoid yaml stash */ $('text', '')" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; field('text').get(defaultText)" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; $('text', defaultText)" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('text').get(1, '')" + - match: { hits.hits.0.fields.field.0: "" } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; field('text').get(1, defaultText)" + - match: { hits.hits.0.fields.field.0: "default text" } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('text').get(1, '')" + - match: { hits.hits.0.fields.field.0: "" } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String cat = ''; for (String s : field('text')) { cat += s; } cat + field('text').size();" + - match: { hits.hits.0.fields.field.0: "Lots of text.1" } + - match: { hits.hits.1.fields.field.0: "0" } + - match: { hits.hits.2.fields.field.0: "Lots of text.SOOOOO much texteven more text3" } + +--- +"text_no_field_data": + - do: + catch: bad_request + search: + rest_total_hits_as_int: true + body: + query: { term: { _id: "1" } } + script_fields: + field: + script: + source: "doc['text_no_field_data'].get(0)" + - match: { error.failed_shards.0.reason.caused_by.type: "illegal_argument_exception" } + + - do: + catch: bad_request + search: + rest_total_hits_as_int: true + body: + query: { term: { _id: "1" } } + script_fields: + field: + script: + source: "doc['text_no_field_data'].value" + - match: { error.failed_shards.0.reason.caused_by.type: "illegal_argument_exception" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('text_no_field_data').get('')" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "/* avoid yaml stash */ $('text_no_field_data', '')" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; field('text_no_field_data').get(defaultText)" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; $('text_no_field_data', defaultText)" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('text_no_field_data').get(1, '')" + - match: { hits.hits.0.fields.field.0: "" } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; field('text_no_field_data').get(1, defaultText)" + - match: { hits.hits.0.fields.field.0: "default text" } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('text_no_field_data').get(1, '')" + - match: { hits.hits.0.fields.field.0: "" } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + rest_total_hits_as_int: true + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String cat = ''; for (String s : field('text_no_field_data')) { cat += s; } cat + field('text_no_field_data').size();" + - match: { hits.hits.0.fields.field.0: "Lots of text.1" } + - match: { hits.hits.1.fields.field.0: "0" } + - match: { hits.hits.2.fields.field.0: "Lots of text.SOOOOO much texteven more text3" } + --- "version and sequence number": - do: diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java index 501430149a0ce..5211a01320481 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java @@ -18,8 +18,10 @@ import org.elasticsearch.search.lookup.SourceLookup; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; +import java.util.List; import java.util.SortedSet; import java.util.TreeSet; @@ -91,8 +93,8 @@ public static class SourceValueFetcherSortedBinaryDocValues extends SortedBinary private final ValueFetcher valueFetcher; private final SourceLookup sourceLookup; - private SortedSet values; - private Iterator iterator; + private final SortedSet values; + private Iterator iterator; public SourceValueFetcherSortedBinaryDocValues( LeafReaderContext leafReaderContext, @@ -102,12 +104,19 @@ public SourceValueFetcherSortedBinaryDocValues( this.leafReaderContext = leafReaderContext; this.valueFetcher = valueFetcher; this.sourceLookup = sourceLookup; + + values = new TreeSet<>(); } @Override public boolean advanceExact(int doc) throws IOException { sourceLookup.setSegmentAndDocument(leafReaderContext, doc); - values = new TreeSet<>(valueFetcher.fetchValues(sourceLookup, Collections.emptyList())); + values.clear(); + + for (Object object : valueFetcher.fetchValues(sourceLookup, Collections.emptyList())) { + values.add(new BytesRef(object.toString())); + } + iterator = values.iterator(); return true; @@ -121,7 +130,7 @@ public int docValueCount() { @Override public BytesRef nextValue() throws IOException { assert iterator.hasNext(); - return new BytesRef(iterator.next().toString()); + return iterator.next(); } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 852d03bcfafff..00595391cfb35 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -60,10 +60,12 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.script.field.DelegateDocValuesField; +import org.elasticsearch.script.field.TextDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -894,29 +896,38 @@ public static boolean hasGaps(TokenStream stream) throws IOException { @Override public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { - if (fielddata == false) { - throw new IllegalArgumentException( - "Text fields are not optimised for operations that require per-document " - + "field data like aggregations and sorting, so these operations are disabled by default. Please use a " - + "keyword field instead. Alternatively, set fielddata=true on [" - + name() - + "] in order to load " - + "field data by uninverting the inverted index. Note that this can use significant memory." + FielddataOperation operation = fieldDataContext.fielddataOperation(); + + if (operation == FielddataOperation.SCRIPT) { + return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD, + SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())), fieldDataContext.lookupSupplier().get().source(), + TextDocValuesField::new); + } else if (operation == FielddataOperation.SEARCH) { + if (fielddata == false) { + throw new IllegalArgumentException( + "Text fields are not optimised for operations that require per-document " + + "field data like aggregations and sorting, so these operations are disabled by default. Please use a " + + "keyword field instead. Alternatively, set fielddata=true on [" + + name() + + "] in order to load " + + "field data by uninverting the inverted index. Note that this can use significant memory." + ); + } + return new PagedBytesIndexFieldData.Builder( + name(), + filter.minFreq, + filter.maxFreq, + filter.minSegmentSize, + CoreValuesSourceType.KEYWORD, + (dv, n) -> new DelegateDocValuesField( + new ScriptDocValues.Strings(new ScriptDocValues.StringsSupplier(FieldData.toString(dv))), + n + ) ); } - return new PagedBytesIndexFieldData.Builder( - name(), - filter.minFreq, - filter.maxFreq, - filter.minSegmentSize, - CoreValuesSourceType.KEYWORD, - (dv, n) -> new DelegateDocValuesField( - new ScriptDocValues.Strings(new ScriptDocValues.StringsSupplier(FieldData.toString(dv))), - n - ) - ); - } + throw new IllegalStateException("unknown field data operation [" + operation.name() + "]"); + } } public static class ConstantScoreTextFieldType extends TextFieldType { diff --git a/server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java b/server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java new file mode 100644 index 0000000000000..7d2bc45f7d059 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.script.field; + +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; + +public class TextDocValuesField extends BaseKeywordDocValuesField { + public TextDocValuesField(SortedBinaryDocValues input, String name) { + super(input, name); + } +} From c2f70c48b1f10d5efe6ffe42b21b545e8e92aac2 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Tue, 16 Aug 2022 12:58:21 -0700 Subject: [PATCH 2/3] spotless --- .../SourceValueFetcherSortedBinaryIndexFieldData.java | 2 -- .../elasticsearch/index/mapper/TextFieldMapper.java | 10 +++++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java index 5211a01320481..535cc3320b2ae 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java @@ -18,10 +18,8 @@ import org.elasticsearch.search.lookup.SourceLookup; import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; -import java.util.List; import java.util.SortedSet; import java.util.TreeSet; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 00595391cfb35..458a839eb9858 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -899,9 +899,13 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext FielddataOperation operation = fieldDataContext.fielddataOperation(); if (operation == FielddataOperation.SCRIPT) { - return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD, - SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())), fieldDataContext.lookupSupplier().get().source(), - TextDocValuesField::new); + return new SourceValueFetcherSortedBinaryIndexFieldData.Builder( + name(), + CoreValuesSourceType.KEYWORD, + SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())), + fieldDataContext.lookupSupplier().get().source(), + TextDocValuesField::new + ); } else if (operation == FielddataOperation.SEARCH) { if (fielddata == false) { throw new IllegalArgumentException( From 737a70b2cdd967c91c502d2af438f85676b9de82 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Tue, 16 Aug 2022 13:04:33 -0700 Subject: [PATCH 3/3] Update docs/changelog/89396.yaml --- docs/changelog/89396.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/89396.yaml diff --git a/docs/changelog/89396.yaml b/docs/changelog/89396.yaml new file mode 100644 index 0000000000000..933f951437d4e --- /dev/null +++ b/docs/changelog/89396.yaml @@ -0,0 +1,5 @@ +pr: 89396 +summary: Add text field support in the Painless scripting fields API +area: Mapping +type: enhancement +issues: []