Skip to content

Commit

Permalink
Apply keyword normalizers in the field retrieval API. (#59260)
Browse files Browse the repository at this point in the history
As we discussed in the meta-issue, when returning `keyword` in the fields
retrieval API, we'll apply their `normalizer`. This decision is not a clear-cut
one, and we'll validate it with internal users before merging the feature
branch.
  • Loading branch information
jtibshirani committed Jul 22, 2020
1 parent 0f96e16 commit e033880
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -361,25 +362,9 @@ protected void parseCreateField(ParseContext context) throws IOException {
return;
}

final NamedAnalyzer normalizer = fieldType().normalizer();
NamedAnalyzer normalizer = fieldType().normalizer();
if (normalizer != null) {
try (TokenStream ts = normalizer.tokenStream(name(), value)) {
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
if (ts.incrementToken() == false) {
throw new IllegalStateException("The normalization token stream is "
+ "expected to produce exactly 1 token, but got 0 for analyzer "
+ normalizer + " and input \"" + value + "\"");
}
final String newValue = termAtt.toString();
if (ts.incrementToken()) {
throw new IllegalStateException("The normalization token stream is "
+ "expected to produce exactly 1 token, but got 2+ for analyzer "
+ normalizer + " and input \"" + value + "\"");
}
ts.end();
value = newValue;
}
value = normalizeValue(normalizer, value);
}

// convert to utf8 only once before feeding postings/dv/stored fields
Expand All @@ -398,6 +383,26 @@ protected void parseCreateField(ParseContext context) throws IOException {
}
}

private String normalizeValue(NamedAnalyzer normalizer, String value) throws IOException {
try (TokenStream ts = normalizer.tokenStream(name(), value)) {
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
if (ts.incrementToken() == false) {
throw new IllegalStateException("The normalization token stream is "
+ "expected to produce exactly 1 token, but got 0 for analyzer "
+ normalizer + " and input \"" + value + "\"");
}
final String newValue = termAtt.toString();
if (ts.incrementToken()) {
throw new IllegalStateException("The normalization token stream is "
+ "expected to produce exactly 1 token, but got 2+ for analyzer "
+ normalizer + " and input \"" + value + "\"");
}
ts.end();
return newValue;
}
}

@Override
protected String parseSourceValue(Object value, String format) {
if (format != null) {
Expand All @@ -408,7 +413,17 @@ protected String parseSourceValue(Object value, String format) {
if (keywordValue.length() > ignoreAbove) {
return null;
}
return keywordValue;

NamedAnalyzer normalizer = fieldType().normalizer();
if (normalizer == null) {
return keywordValue;
}

try {
return normalizeValue(normalizer, keywordValue);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,13 @@ public void testParseSourceValue() {
assertEquals("42", ignoreAboveMapper.parseSourceValue(42L, null));
assertEquals("true", ignoreAboveMapper.parseSourceValue(true, null));

KeywordFieldMapper normalizerMapper = new KeywordFieldMapper.Builder("field")
.normalizer(indexService.getIndexAnalyzers(), "lowercase")
.build(context);
assertEquals("value", normalizerMapper.parseSourceValue("VALUE", null));
assertEquals("42", normalizerMapper.parseSourceValue(42L, null));
assertEquals("value", normalizerMapper.parseSourceValue("value", null));

KeywordFieldMapper nullValueMapper = new KeywordFieldMapper.Builder("field")
.nullValue("NULL")
.build(context);
Expand Down

0 comments on commit e033880

Please sign in to comment.