From 0c62dcc748fe6e34d4a227f092ef2a1744f78eb3 Mon Sep 17 00:00:00 2001
From: Jack Conradson <osjdconrad@gmail.com>
Date: Tue, 16 Aug 2022 12:51:24 -0700
Subject: [PATCH 1/3] Add support for text fields using source directly in
 scripting

---
 .../test/painless/50_script_doc_values.yml    | 267 ++++++++++++++++++
 ...alueFetcherSortedBinaryIndexFieldData.java |  17 +-
 .../index/mapper/TextFieldMapper.java         |  51 ++--
 .../script/field/TextDocValuesField.java      |  17 ++
 4 files changed, 328 insertions(+), 24 deletions(-)
 create mode 100644 server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java

diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml
index dd2187673134a..979f0a1cdf7df 100644
--- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml
+++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/50_script_doc_values.yml
@@ -70,6 +70,11 @@ setup:
                             type: scaled_float
                             scaling_factor: 100
                             doc_values: false
+                        text:
+                            type: text
+                            fielddata: true
+                        text_no_field_data:
+                            type: text
                         token_count:
                             type: token_count
                             analyzer: standard
@@ -110,6 +115,8 @@ setup:
                 half_float_no_doc_values: 3.140625
                 scaled_float: 3.14
                 scaled_float_no_doc_values: 3.14
+                text: "Lots of text."
+                text_no_field_data: "Lots of text."
                 token_count: count all these words please
 
     - do:
@@ -150,6 +157,8 @@ setup:
               half_float_no_doc_values: [2.234, 1.123]
               scaled_float: [-3.5, 2.5]
               scaled_float_no_doc_values: [2.5, -3.5]
+              text: ["Lots of text.", "even more text", "SOOOOO much text"]
+              text_no_field_data: ["Lots of text.", "even more text", "SOOOOO much text"]
 
 
     - do:
@@ -2719,6 +2728,264 @@ setup:
                 source: "int value = field('dne').get(1, 1); value"
   - match: { hits.hits.0.fields.field.0: 1 }
 
+---
+"text":
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query: { term: { _id: "1" } }
+          script_fields:
+            field:
+              script:
+                source: "doc['text'].get(0)"
+  - match: { hits.hits.0.fields.field.0: lots }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query: { term: { _id: "1" } }
+          script_fields:
+            field:
+              script:
+                source: "doc['text'].value"
+  - match: { hits.hits.0.fields.field.0: lots }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "field('text').get('')"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "/* avoid yaml stash */ $('text', '')"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String defaultText = 'default text'; field('text').get(defaultText)"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "default text" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String defaultText = 'default text'; $('text', defaultText)"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "default text" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "field('text').get(1, '')"
+  - match: { hits.hits.0.fields.field.0: "" }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "SOOOOO much text" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String defaultText = 'default text'; field('text').get(1, defaultText)"
+  - match: { hits.hits.0.fields.field.0: "default text" }
+  - match: { hits.hits.1.fields.field.0: "default text" }
+  - match: { hits.hits.2.fields.field.0: "SOOOOO much text" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "field('text').get(1, '')"
+  - match: { hits.hits.0.fields.field.0: "" }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "SOOOOO much text" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String cat = ''; for (String s : field('text')) { cat += s; } cat + field('text').size();"
+  - match: { hits.hits.0.fields.field.0: "Lots of text.1" }
+  - match: { hits.hits.1.fields.field.0: "0" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text.SOOOOO much texteven more text3" }
+
+---
+"text_no_field_data":
+  - do:
+      catch: bad_request
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query: { term: { _id: "1" } }
+          script_fields:
+            field:
+              script:
+                source: "doc['text_no_field_data'].get(0)"
+  - match: { error.failed_shards.0.reason.caused_by.type: "illegal_argument_exception" }
+
+  - do:
+      catch: bad_request
+      search:
+        rest_total_hits_as_int: true
+        body:
+          query: { term: { _id: "1" } }
+          script_fields:
+            field:
+              script:
+                source: "doc['text_no_field_data'].value"
+  - match: { error.failed_shards.0.reason.caused_by.type: "illegal_argument_exception" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "field('text_no_field_data').get('')"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "/* avoid yaml stash */ $('text_no_field_data', '')"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String defaultText = 'default text'; field('text_no_field_data').get(defaultText)"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "default text" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String defaultText = 'default text'; $('text_no_field_data', defaultText)"
+  - match: { hits.hits.0.fields.field.0: "Lots of text." }
+  - match: { hits.hits.1.fields.field.0: "default text" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text." }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "field('text_no_field_data').get(1, '')"
+  - match: { hits.hits.0.fields.field.0: "" }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "SOOOOO much text" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String defaultText = 'default text'; field('text_no_field_data').get(1, defaultText)"
+  - match: { hits.hits.0.fields.field.0: "default text" }
+  - match: { hits.hits.1.fields.field.0: "default text" }
+  - match: { hits.hits.2.fields.field.0: "SOOOOO much text" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "field('text_no_field_data').get(1, '')"
+  - match: { hits.hits.0.fields.field.0: "" }
+  - match: { hits.hits.1.fields.field.0: "" }
+  - match: { hits.hits.2.fields.field.0: "SOOOOO much text" }
+
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        body:
+          sort: [ { rank: asc } ]
+          script_fields:
+            field:
+              script:
+                source: "String cat = ''; for (String s : field('text_no_field_data')) { cat += s; } cat + field('text_no_field_data').size();"
+  - match: { hits.hits.0.fields.field.0: "Lots of text.1" }
+  - match: { hits.hits.1.fields.field.0: "0" }
+  - match: { hits.hits.2.fields.field.0: "Lots of text.SOOOOO much texteven more text3" }
+
 ---
 "version and sequence number":
   - do:
diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java
index 501430149a0ce..5211a01320481 100644
--- a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java
+++ b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java
@@ -18,8 +18,10 @@
 import org.elasticsearch.search.lookup.SourceLookup;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
@@ -91,8 +93,8 @@ public static class SourceValueFetcherSortedBinaryDocValues extends SortedBinary
         private final ValueFetcher valueFetcher;
         private final SourceLookup sourceLookup;
 
-        private SortedSet<Object> values;
-        private Iterator<Object> iterator;
+        private final SortedSet<BytesRef> values;
+        private Iterator<BytesRef> iterator;
 
         public SourceValueFetcherSortedBinaryDocValues(
             LeafReaderContext leafReaderContext,
@@ -102,12 +104,19 @@ public SourceValueFetcherSortedBinaryDocValues(
             this.leafReaderContext = leafReaderContext;
             this.valueFetcher = valueFetcher;
             this.sourceLookup = sourceLookup;
+
+            values = new TreeSet<>();
         }
 
         @Override
         public boolean advanceExact(int doc) throws IOException {
             sourceLookup.setSegmentAndDocument(leafReaderContext, doc);
-            values = new TreeSet<>(valueFetcher.fetchValues(sourceLookup, Collections.emptyList()));
+            values.clear();
+
+            for (Object object : valueFetcher.fetchValues(sourceLookup, Collections.emptyList())) {
+                values.add(new BytesRef(object.toString()));
+            }
+
             iterator = values.iterator();
 
             return true;
@@ -121,7 +130,7 @@ public int docValueCount() {
         @Override
         public BytesRef nextValue() throws IOException {
             assert iterator.hasNext();
-            return new BytesRef(iterator.next().toString());
+            return iterator.next();
         }
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index 852d03bcfafff..00595391cfb35 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -60,10 +60,12 @@
 import org.elasticsearch.index.fielddata.FieldDataContext;
 import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.fielddata.ScriptDocValues;
+import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
 import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
 import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 import org.elasticsearch.script.field.DelegateDocValuesField;
+import org.elasticsearch.script.field.TextDocValuesField;
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 import org.elasticsearch.xcontent.ToXContent;
 import org.elasticsearch.xcontent.XContentBuilder;
@@ -894,29 +896,38 @@ public static boolean hasGaps(TokenStream stream) throws IOException {
 
         @Override
         public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
-            if (fielddata == false) {
-                throw new IllegalArgumentException(
-                    "Text fields are not optimised for operations that require per-document "
-                        + "field data like aggregations and sorting, so these operations are disabled by default. Please use a "
-                        + "keyword field instead. Alternatively, set fielddata=true on ["
-                        + name()
-                        + "] in order to load "
-                        + "field data by uninverting the inverted index. Note that this can use significant memory."
+            FielddataOperation operation = fieldDataContext.fielddataOperation();
+
+            if (operation == FielddataOperation.SCRIPT) {
+                return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD,
+                    SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())), fieldDataContext.lookupSupplier().get().source(),
+                    TextDocValuesField::new);
+            } else if (operation == FielddataOperation.SEARCH) {
+                if (fielddata == false) {
+                    throw new IllegalArgumentException(
+                        "Text fields are not optimised for operations that require per-document "
+                            + "field data like aggregations and sorting, so these operations are disabled by default. Please use a "
+                            + "keyword field instead. Alternatively, set fielddata=true on ["
+                            + name()
+                            + "] in order to load "
+                            + "field data by uninverting the inverted index. Note that this can use significant memory."
+                    );
+                }
+                return new PagedBytesIndexFieldData.Builder(
+                    name(),
+                    filter.minFreq,
+                    filter.maxFreq,
+                    filter.minSegmentSize,
+                    CoreValuesSourceType.KEYWORD,
+                    (dv, n) -> new DelegateDocValuesField(
+                        new ScriptDocValues.Strings(new ScriptDocValues.StringsSupplier(FieldData.toString(dv))),
+                        n
+                    )
                 );
             }
-            return new PagedBytesIndexFieldData.Builder(
-                name(),
-                filter.minFreq,
-                filter.maxFreq,
-                filter.minSegmentSize,
-                CoreValuesSourceType.KEYWORD,
-                (dv, n) -> new DelegateDocValuesField(
-                    new ScriptDocValues.Strings(new ScriptDocValues.StringsSupplier(FieldData.toString(dv))),
-                    n
-                )
-            );
-        }
 
+            throw new IllegalStateException("unknown field data operation [" + operation.name() + "]");
+        }
     }
 
     public static class ConstantScoreTextFieldType extends TextFieldType {
diff --git a/server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java b/server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java
new file mode 100644
index 0000000000000..7d2bc45f7d059
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/script/field/TextDocValuesField.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0 and the Server Side Public License, v 1; you may not use this file except
+ * in compliance with, at your election, the Elastic License 2.0 or the Server
+ * Side Public License, v 1.
+ */
+
+package org.elasticsearch.script.field;
+
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+
+public class TextDocValuesField extends BaseKeywordDocValuesField {
+    public TextDocValuesField(SortedBinaryDocValues input, String name) {
+        super(input, name);
+    }
+}

From c2f70c48b1f10d5efe6ffe42b21b545e8e92aac2 Mon Sep 17 00:00:00 2001
From: Jack Conradson <osjdconrad@gmail.com>
Date: Tue, 16 Aug 2022 12:58:21 -0700
Subject: [PATCH 2/3] spotless

---
 .../SourceValueFetcherSortedBinaryIndexFieldData.java  |  2 --
 .../elasticsearch/index/mapper/TextFieldMapper.java    | 10 +++++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java
index 5211a01320481..535cc3320b2ae 100644
--- a/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java
+++ b/server/src/main/java/org/elasticsearch/index/fielddata/SourceValueFetcherSortedBinaryIndexFieldData.java
@@ -18,10 +18,8 @@
 import org.elasticsearch.search.lookup.SourceLookup;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
-import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
index 00595391cfb35..458a839eb9858 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -899,9 +899,13 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
             FielddataOperation operation = fieldDataContext.fielddataOperation();
 
             if (operation == FielddataOperation.SCRIPT) {
-                return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.KEYWORD,
-                    SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())), fieldDataContext.lookupSupplier().get().source(),
-                    TextDocValuesField::new);
+                return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
+                    name(),
+                    CoreValuesSourceType.KEYWORD,
+                    SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
+                    fieldDataContext.lookupSupplier().get().source(),
+                    TextDocValuesField::new
+                );
             } else if (operation == FielddataOperation.SEARCH) {
                 if (fielddata == false) {
                     throw new IllegalArgumentException(

From 737a70b2cdd967c91c502d2af438f85676b9de82 Mon Sep 17 00:00:00 2001
From: Jack Conradson <osjdconrad@gmail.com>
Date: Tue, 16 Aug 2022 13:04:33 -0700
Subject: [PATCH 3/3] Update docs/changelog/89396.yaml

---
 docs/changelog/89396.yaml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 docs/changelog/89396.yaml

diff --git a/docs/changelog/89396.yaml b/docs/changelog/89396.yaml
new file mode 100644
index 0000000000000..933f951437d4e
--- /dev/null
+++ b/docs/changelog/89396.yaml
@@ -0,0 +1,5 @@
+pr: 89396
+summary: Add text field support in the Painless scripting fields API
+area: Mapping
+type: enhancement
+issues: []