Skip to content

Commit

Permalink
Highlighters skip ignored keyword values (elastic#53408)
Browse files Browse the repository at this point in the history
Keyword field values with length more than ignore_above are not
indexed. But highlighters still were retrieving these values
from _source and were trying to highlight them. This sometimes lead to
errors if a field length exceeded  max_analyzed_offset. But also this
is an overall wrong behaviour to attempt to highlight something that was
ignored during indexing.

This PR checks if a keyword value was ignored because of its length,
and if yes, skips highlighting it.

Closes elastic#43800
  • Loading branch information
mayya-sharipova committed Mar 16, 2020
1 parent 94da4ca commit 5f7af83
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 7 deletions.
10 changes: 10 additions & 0 deletions docs/reference/migration/migrate_7_7.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
Therefore, these settings have been deprecated. You can safely remove these
settings from the configuration of your nodes.

[discrete]
[[breaking_77_highlighters_changes]]
=== Highlighters changes

[discrete]
==== Ignored keyword values are no longer highlighted
If a keyword value was ignored during indexing because of its length
(`ignore_above` parameter was applied), {es} doesn't attempt to
highlight it anymore, which means no highlights are produced for
ignored values.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
setup:
- do:
indices.create:
index: test-index
body:
mappings:
"properties":
"k1":
"type": "keyword"
"k2":
"type": "keyword"
"ignore_above": 3
- do:
bulk:
index: test-index
refresh: true
body:
- '{"index": {"_id": "1"}}'
- '{"k1": "123", "k2" : "123"}'
- '{"index": {"_id": "2"}}'
- '{"k1": "1234", "k2" : "1234"}'

---
"Plain Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.6.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: plain

- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored

---
"Unified Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.6.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: unified

- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe

/** Values that have more chars than the return value of this method will
* be skipped at parsing time. */
// pkg-private for testing
int ignoreAbove() {
public int ignoreAbove() {
return ignoreAbove;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand Down Expand Up @@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
};
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();

try {
Expand All @@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
if (text.length() > maxAnalyzedOffset) {
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand All @@ -56,7 +57,7 @@ public class UnifiedHighlighter implements Highlighter {
public boolean canHighlight(MappedFieldType fieldType) {
return true;
}

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
MappedFieldType fieldType = highlighterContext.fieldType;
Expand All @@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}

List<Snippet> snippets = new ArrayList<>();
int numberOfFragments = field.fieldOptions().numberOfFragments();
try {

final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
Expand All @@ -82,7 +88,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
final CustomUnifiedHighlighter highlighter;
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
final OffsetSource offsetSource = getOffsetSource(fieldType);
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
int fieldValueLength = fieldValue.length();
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +
Expand Down Expand Up @@ -152,7 +162,7 @@ protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchCont
return passageFormatter;
}


protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
return docMapper.mappers().indexAnalyzer();
}
Expand Down

0 comments on commit 5f7af83

Please sign in to comment.