Skip to content

Commit

Permalink
Allow field mappers to retrieve fields from source. (#56928)
Browse files Browse the repository at this point in the history
This PR adds new method `FieldMapper#lookupValues(SourceLookup)` that extracts
and parses the source values. This lets us return values like numbers and dates
in a consistent format, and also handle special data types like
`constant_keyword`. The `lookupValues` method calls into `parseSourceValue`,
which mappers can override to specify how values should be parsed.
  • Loading branch information
jtibshirani committed May 28, 2020
1 parent 47fd6c9 commit 5a1ffd4
Show file tree
Hide file tree
Showing 57 changed files with 736 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
float value;
if (context.externalValueSet()) {
Object v = context.externalValue();
if (v instanceof Number) {
value = ((Number) v).floatValue();
} else {
value = Float.parseFloat(v.toString());
}
value = objectToFloat(v);
} else if (context.parser().currentToken() == Token.VALUE_NULL) {
// skip
return;
Expand All @@ -209,6 +205,19 @@ protected void parseCreateField(ParseContext context) throws IOException {
context.doc().addWithKey(name(), new FeatureField("_feature", name(), value));
}

private Float objectToFloat(Object value) {
if (value instanceof Number) {
return ((Number) value).floatValue();
} else {
return Float.parseFloat(value.toString());
}
}

@Override
protected Float parseSourceValue(Object value) {
return objectToFloat(value);
}

@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
throw new AssertionError("parse is implemented directly");
}

@Override
protected Object parseSourceValue(Object value) {
return value;
}

@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,13 @@ private static double objectToDouble(Object value) {
return doubleValue;
}

@Override
protected Double parseSourceValue(Object value) {
double doubleValue = objectToDouble(value);
double scalingFactor = fieldType().getScalingFactor();
return Math.round(doubleValue * scalingFactor) / scalingFactor;
}

private static class ScaledFloatIndexFieldData implements IndexNumericFieldData {

private final IndexNumericFieldData scaledFieldData;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,11 @@ protected void parseCreateField(ParseContext context) {
throw new UnsupportedOperationException();
}

@Override
protected Object parseSourceValue(Object value) {
throw new UnsupportedOperationException();
}

@Override
protected void mergeOptions(FieldMapper other, List<String> conflicts) {

Expand Down Expand Up @@ -510,6 +515,11 @@ protected void mergeOptions(FieldMapper other, List<String> conflicts) {

}

@Override
protected Object parseSourceValue(Object value) {
throw new UnsupportedOperationException();
}

@Override
protected String contentType() {
return "shingle";
Expand Down Expand Up @@ -665,6 +675,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
}
}

@Override
protected String parseSourceValue(Object value) {
return value.toString();
}

@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
context.doc().addAll(NumberFieldMapper.NumberType.INTEGER.createFields(fieldType().name(), tokenCount, indexed, docValued, stored));
}

@Override
protected String parseSourceValue(Object value) {
return value.toString();
}

/**
* Count position increments in a token stream. Package private for testing.
* @param analyzer analyzer to create token stream
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@
import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute;
import org.apache.lucene.document.FeatureField;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
Expand Down Expand Up @@ -180,4 +183,12 @@ public void testRejectMultiValuedFields() throws MapperParsingException, IOExcep
e.getCause().getMessage());
}

public void testParseSourceValue() {
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build();
Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath());
RankFeatureFieldMapper mapper = new RankFeatureFieldMapper.Builder("field").build(context);

assertEquals(3.14f, mapper.parseSourceValue(3.14), 0.0001);
assertEquals(42.9f, mapper.parseSourceValue("42.9"), 0.0001);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
Expand Down Expand Up @@ -392,4 +395,15 @@ public void testMeta() throws Exception {
new CompressedXContent(mapping3), MergeReason.MAPPING_UPDATE);
assertEquals(mapping3, mapper.mappingSource().toString());
}

public void testParseSourceValue() {
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build();
Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath());
ScaledFloatFieldMapper mapper = new ScaledFloatFieldMapper.Builder("field")
.scalingFactor(100)
.build(context);

assertEquals(3.14, mapper.parseSourceValue(3.1415926), 0.00001);
assertEquals(3.14, mapper.parseSourceValue("3.1415"), 0.00001);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
throw new IllegalStateException("Should never be called");
}

@Override
protected Object parseSourceValue(Object value) {
throw new UnsupportedOperationException("The " + typeName() + " field is not stored in _source.");
}

@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
context.doc().add(new SortedDocValuesField(fieldType().name(), binaryValue));
}

@Override
protected Object parseSourceValue(Object value) {
throw new UnsupportedOperationException("The " + typeName() + " field is not stored in _source.");
}

@Override
protected void mergeOptions(FieldMapper other, List<String> conflicts) {
ParentIdFieldMapper parentMergeWith = (ParentIdFieldMapper) other;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
throw new UnsupportedOperationException("parsing is implemented in parse(), this method should NEVER be called");
}

@Override
protected Object parseSourceValue(Object value) {
return value;
}

@Override
public void parse(ParseContext context) throws IOException {
context.path().add(simpleName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,11 @@ public void parse(ParseContext context) throws IOException {
processQuery(query, context);
}

@Override
protected Object parseSourceValue(Object value) {
return value;
}

static void createQueryBuilderField(Version indexVersion, BinaryFieldMapper qbField,
QueryBuilder queryBuilder, ParseContext context) throws IOException {
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,4 +750,9 @@ protected void parseCreateField(ParseContext context) throws IOException {
createFieldNamesField(context);
}
}

@Override
protected String parseSourceValue(Object value) {
return value.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
Expand Down Expand Up @@ -477,4 +480,13 @@ public void testUpdateIgnoreAbove() throws IOException {
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
}

public void testParseSourceValue() {
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build();
Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath());
ICUCollationKeywordFieldMapper mapper = new ICUCollationKeywordFieldMapper.Builder("field").build(context);

assertEquals("value", mapper.parseSourceValue("value"));
assertEquals("42", mapper.parseSourceValue(42L));
assertEquals("true", mapper.parseSourceValue(true));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,11 @@ protected void parseCreateField(ParseContext context) throws IOException {
}
}

@Override
protected String parseSourceValue(Object value) {
return value.toString();
}

@Override
protected String contentType() {
return CONTENT_TYPE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.termvectors.TermVectorsRequest;
import org.elasticsearch.action.termvectors.TermVectorsResponse;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
Expand All @@ -44,8 +46,10 @@
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.mapper.ParsedDocument;
Expand Down Expand Up @@ -672,4 +676,17 @@ public void testEmptyName() throws IOException {
assertThat(e.getMessage(), containsString("name cannot be empty string"));
}

public void testParseSourceValue() {
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build();
Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath());
AnnotatedTextFieldMapper mapper = new AnnotatedTextFieldMapper.Builder("field")
.indexAnalyzer(indexService.getIndexAnalyzers().getDefaultIndexAnalyzer())
.searchAnalyzer(indexService.getIndexAnalyzers().getDefaultSearchAnalyzer())
.searchQuoteAnalyzer(indexService.getIndexAnalyzers().getDefaultSearchQuoteAnalyzer())
.build(context);

assertEquals("value", mapper.parseSourceValue("value"));
assertEquals("42", mapper.parseSourceValue(42L));
assertEquals("true", mapper.parseSourceValue(true));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,13 @@ protected void parseCreateField(ParseContext context)
}
}

@Override
protected String parseSourceValue(Object value) {
return value.toString();
}

@Override
protected void mergeOptions(FieldMapper other, List<String> conflicts) {

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ setup:
index: test
id: 1
body:
keyword: [ "first", "second" ]
keyword: [ "x", "y" ]
integer_range:
gte: 0
lte: 42
Expand All @@ -39,8 +39,8 @@ setup:
- is_true: hits.hits.0._id
- is_true: hits.hits.0._source

- match: { hits.hits.0.fields.keyword.0: first }
- match: { hits.hits.0.fields.keyword.1: second }
- match: { hits.hits.0.fields.keyword.0: x }
- match: { hits.hits.0.fields.keyword.1: y }

- match: { hits.hits.0.fields.integer_range.0.gte: 0 }
- match: { hits.hits.0.fields.integer_range.0.lte: 42 }
Expand All @@ -65,7 +65,7 @@ setup:
index: test
id: 1
body:
keyword: [ "value" ]
keyword: [ "x" ]

- do:
catch: bad_request
Expand All @@ -76,3 +76,49 @@ setup:
- match: { error.root_cause.0.type: "illegal_argument_exception" }
- match: { error.root_cause.0.reason: "Unable to retrieve the requested [fields] since _source is disabled
in the mappings for index [test]" }

---
"Test ignore malformed":
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
mappings:
properties:
keyword:
type: keyword
integer:
type: integer
ignore_malformed: true

- do:
index:
index: test
id: 1
body:
keyword: "x"
integer: 42

- do:
index:
index: test
id: 2
body:
keyword: "y"
integer: "not an integer"

- do:
indices.refresh:
index: [ test ]

- do:
search:
index: test
body:
sort: [ keyword ]
fields: [ integer ]

- match: { hits.hits.0.fields.integer.0: 42 }
- is_false: hits.hits.1.fields.integer
Loading

0 comments on commit 5a1ffd4

Please sign in to comment.