Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Highlighters skip ignored keyword values (#53408) #53604

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/reference/migration/migrate_7_7.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
Therefore, these settings have been deprecated. You can safely remove these
settings from the configuration of your nodes.

[discrete]
[[breaking_77_highlighters_changes]]
=== Highlighters changes

[discrete]
==== Ignored keyword values are no longer highlighted
If a keyword value was ignored during indexing because of its length
(`ignore_above` parameter was applied), {es} doesn't attempt to
highlight it anymore, which means no highlights are produced for
ignored values.
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
---
setup:
- do:
indices.create:
index: test-index
body:
mappings:
"properties":
"k1":
"type": "keyword"
"k2":
"type": "keyword"
"ignore_above": 3
- do:
bulk:
index: test-index
refresh: true
body:
- '{"index": {"_id": "1"}}'
- '{"k1": "123", "k2" : "123"}'
- '{"index": {"_id": "2"}}'
- '{"k1": "1234", "k2" : "1234"}'

---
"Plain Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.6.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: plain

- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored

---
"Unified Highligher should skip highlighting ignored keyword values":
- skip:
version: " - 7.6.99"
reason: "skip highlighting of ignored values was introduced in 7.7"
- do:
search:
index: test-index
body:
query:
prefix:
k1: "12"
highlight:
require_field_match: false
fields:
k2:
type: unified

- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe

/** Values that have more chars than the return value of this method will
* be skipped at parsing time. */
// pkg-private for testing
int ignoreAbove() {
public int ignoreAbove() {
return ignoreAbove;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand Down Expand Up @@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
};
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();

try {
Expand All @@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
if (text.length() > maxAnalyzedOffset) {
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
Expand All @@ -56,7 +57,7 @@ public class UnifiedHighlighter implements Highlighter {
public boolean canHighlight(MappedFieldType fieldType) {
return true;
}

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
MappedFieldType fieldType = highlighterContext.fieldType;
Expand All @@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
.mappers().getMapper(highlighterContext.fieldName);
keywordIgnoreAbove = mapper.ignoreAbove();
}

List<Snippet> snippets = new ArrayList<>();
int numberOfFragments = field.fieldOptions().numberOfFragments();
try {

final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
Expand All @@ -82,7 +88,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
final CustomUnifiedHighlighter highlighter;
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
final OffsetSource offsetSource = getOffsetSource(fieldType);
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
int fieldValueLength = fieldValue.length();
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +
Expand Down Expand Up @@ -152,7 +162,7 @@ protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchCont
return passageFormatter;
}


protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
return docMapper.mappers().indexAnalyzer();
}
Expand Down