Highlighters skip ignored keyword values (elastic#53408)

Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is an overall wrong behaviour to attempt to highlight something that was ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Closes elastic#43800
mayya-sharipova · Mar 16, 2020 · 5f7af83 · 5f7af83
1 parent 94da4ca
commit 5f7af83
Show file tree

Hide file tree

Showing 5 changed files with 101 additions and 7 deletions.
diff --git a/docs/reference/migration/migrate_7_7.asciidoc b/docs/reference/migration/migrate_7_7.asciidoc
@@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
 Therefore, these settings have been deprecated. You can safely remove these
 settings from the configuration of your nodes.
 
+[discrete]
+[[breaking_77_highlighters_changes]]
+=== Highlighters changes
+
+[discrete]
+==== Ignored keyword values are no longer highlighted
+If a keyword value was ignored during indexing because of its length
+(`ignore_above` parameter was applied), {es} doesn't attempt to
+highlight it anymore, which means no highlights are produced for
+ignored values.
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/40_keyword_ignore.yml
@@ -0,0 +1,64 @@
+---
+setup:
+  - do:
+      indices.create:
+          index: test-index
+          body:
+            mappings:
+              "properties":
+                "k1":
+                  "type": "keyword"
+                "k2":
+                  "type": "keyword"
+                  "ignore_above": 3
+  - do:
+      bulk:
+        index: test-index
+        refresh: true
+        body:
+          - '{"index": {"_id": "1"}}'
+          - '{"k1": "123", "k2" : "123"}'
+          - '{"index": {"_id": "2"}}'
+          - '{"k1": "1234", "k2" : "1234"}'
+
+---
+"Plain Highligher should skip highlighting ignored keyword values":
+  - skip:
+      version: " - 7.6.99"
+      reason: "skip highlighting of ignored values was introduced in 7.7"
+  - do:
+      search:
+        index: test-index
+        body:
+          query:
+            prefix:
+              k1: "12"
+          highlight:
+            require_field_match: false
+            fields:
+              k2:
+                type: plain
+
+  - match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
+  - is_false: hits.hits.1.highlight # no highlight for a value that was ignored
+
+---
+"Unified Highligher should skip highlighting ignored keyword values":
+  - skip:
+      version: " - 7.6.99"
+      reason: "skip highlighting of ignored values was introduced in 7.7"
+  - do:
+      search:
+        index: test-index
+        body:
+          query:
+            prefix:
+              k1: "12"
+          highlight:
+            require_field_match: false
+            fields:
+              k2:
+                type: unified
+
+  - match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
+  - is_false: hits.hits.1.highlight # no highlight for a value that was ignored
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -314,8 +314,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe
 
     /** Values that have more chars than the return value of this method will
      *  be skipped at parsing time. */
-    // pkg-private for testing
-    int ignoreAbove() {
+    public int ignoreAbove() {
         return ignoreAbove;
     }
 

diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.text.Text;
 import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
         ArrayList<TextFragment> fragsList = new ArrayList<>();
         List<Object> textsToHighlight;
         Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
+        Integer keywordIgnoreAbove = null;
+        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+            KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
+                .mappers().getMapper(highlighterContext.fieldName);
+            keywordIgnoreAbove = mapper.ignoreAbove();
+        };
         final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
 
         try {
@@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
 
             for (Object textToHighlight : textsToHighlight) {
                 String text = convertFieldValue(fieldType, textToHighlight);
-                if (text.length() > maxAnalyzedOffset) {
+                int textLength = text.length();
+                if (keywordIgnoreAbove != null  && textLength > keywordIgnoreAbove) {
+                    continue; // skip highlighting keyword terms that were ignored during indexing
+                }
+                if (textLength > maxAnalyzedOffset) {
                     throw new IllegalArgumentException(
                         "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
                             "] doc of [" + context.index().getName() + "] index " +

diff --git a/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/...r/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.mapper.DocumentMapper;
 import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -56,7 +57,7 @@ public class UnifiedHighlighter implements Highlighter {
     public boolean canHighlight(MappedFieldType fieldType) {
         return true;
     }
-    
+
     @Override
     public HighlightField highlight(HighlighterContext highlighterContext) {
         MappedFieldType fieldType = highlighterContext.fieldType;
@@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
         FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
         Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
         final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
+        Integer keywordIgnoreAbove = null;
+        if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
+            KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
+                .mappers().getMapper(highlighterContext.fieldName);
+            keywordIgnoreAbove = mapper.ignoreAbove();
+        }
 
         List<Snippet> snippets = new ArrayList<>();
         int numberOfFragments = field.fieldOptions().numberOfFragments();
         try {
-
             final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
                     hitContext);
             List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
@@ -82,7 +88,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
             final CustomUnifiedHighlighter highlighter;
             final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
             final OffsetSource offsetSource = getOffsetSource(fieldType);
-            if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
+            int fieldValueLength = fieldValue.length();
+            if (keywordIgnoreAbove != null  && fieldValueLength > keywordIgnoreAbove) {
+                return null; // skip highlighting keyword terms that were ignored during indexing
+            }
+            if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
                 throw new IllegalArgumentException(
                     "The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
                         "] doc of [" + context.index().getName() + "] index " + "has exceeded [" +
@@ -152,7 +162,7 @@ protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchCont
         return passageFormatter;
     }
 
-    
+
     protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
         return docMapper.mappers().indexAnalyzer();
     }