Skip to content

Commit

Permalink
Fix casting of scaled_float in sorts (elastic#57207)
Browse files Browse the repository at this point in the history
Previously we'd get a `ClassCastException` when you tried to use
`numeric_type` on `scaled_float`. Oops! This cleans up the CCE and moves
some code around so the casting actually works.
  • Loading branch information
nik9000 committed May 29, 2020
1 parent d5e86d7 commit 04714e1
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 194 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,11 @@
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
Expand All @@ -45,25 +42,20 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.fielddata.LeafNumericFieldData;
import org.elasticsearch.index.fielddata.NumericDoubleValues;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData;
import org.elasticsearch.index.mapper.NumberFieldMapper.Defaults;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder;

import java.io.IOException;
import java.math.BigDecimal;
Expand Down Expand Up @@ -499,7 +491,7 @@ private static double objectToDouble(Object value) {
return doubleValue;
}

private static class ScaledFloatIndexFieldData implements IndexNumericFieldData {
private static class ScaledFloatIndexFieldData extends IndexNumericFieldData {

private final IndexNumericFieldData scaledFieldData;
private final double scalingFactor;
Expand All @@ -525,16 +517,15 @@ public LeafNumericFieldData loadDirect(LeafReaderContext context) throws Excepti
}

@Override
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
final XFieldComparatorSource source = new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
return new SortField(getFieldName(), source, reverse);
}

@Override
public BucketedSort newBucketedSort(BigArrays bigArrays, Object missingValue, MultiValueMode sortMode, Nested nested,
SortOrder sortOrder, DocValueFormat format, int bucketSize, BucketedSort.ExtraData extra) {
return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested)
.newBucketedSort(bigArrays, sortOrder, format, bucketSize, extra);
protected boolean sortRequiresCustomComparator() {
/*
* We need to use a custom comparator because the non-custom
* comparator wouldn't properly decode the long bits into the
* double. Sorting on the long representation *would* put the
* docs in order. We just don't have a way to convert the long
* into a double the right way afterwords.
*/
return true;
}

@Override
Expand All @@ -549,7 +540,7 @@ public Index index() {

@Override
public NumericType getNumericType() {
/**
/*
* {@link ScaledFloatLeafFieldData#getDoubleValues()} transforms the raw long values in `scaled` floats.
*/
return NumericType.DOUBLE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,28 @@ setup:

- do:
search:
rest_total_hits_as_int: true
body: { "size" : 1, "sort" : { "number" : { "order" : "asc" } } }
body:
size: 1
sort:
number:
order: asc

- match: { hits.total: 4 }
- match: { hits.total.value: 4 }
- match: { hits.hits.0._id: "3" }
- match: { hits.hits.0.sort.0: -2.1 }

---
"Sort with numeric_type":

- do:
search:
body:
size: 1
sort:
number:
order: asc
numeric_type: long

- match: { hits.total.value: 4 }
- match: { hits.hits.0._id: "3" }
- match: { hits.hits.0.sort.0: -2 }
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,184 @@

package org.elasticsearch.index.fielddata;

public interface IndexNumericFieldData extends IndexFieldData<LeafNumericFieldData> {

enum NumericType {
BOOLEAN(false),
BYTE(false),
SHORT(false),
INT(false),
LONG(false),
DATE(false),
DATE_NANOSECONDS(false),
HALF_FLOAT(true),
FLOAT(true),
DOUBLE(true);
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.time.DateUtils;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
import org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder;

import java.io.IOException;
import java.util.function.LongUnaryOperator;

/**
* Base class for numeric field data.
*/
public abstract class IndexNumericFieldData implements IndexFieldData<LeafNumericFieldData> {
/**
* The type of number.
*/
public enum NumericType {
BOOLEAN(false, SortField.Type.LONG),
BYTE(false, SortField.Type.LONG),
SHORT(false, SortField.Type.LONG),
INT(false, SortField.Type.LONG),
LONG(false, SortField.Type.LONG),
DATE(false, SortField.Type.LONG),
DATE_NANOSECONDS(false, SortField.Type.LONG),
HALF_FLOAT(true, SortField.Type.LONG),
FLOAT(true, SortField.Type.FLOAT),
DOUBLE(true, SortField.Type.DOUBLE);

private final boolean floatingPoint;
private final SortField.Type sortFieldType;

NumericType(boolean floatingPoint) {
NumericType(boolean floatingPoint, SortField.Type sortFieldType) {
this.floatingPoint = floatingPoint;
this.sortFieldType = sortFieldType;
}

public final boolean isFloatingPoint() {
return floatingPoint;
}
}

/**
* The numeric type of this number.
*/
public abstract NumericType getNumericType();

/**
* Returns the {@link SortField} to used for sorting.
* Values are casted to the provided <code>targetNumericType</code> type if it doesn't
* match the field's <code>numericType</code>.
*/
public final SortField sortField(
NumericType targetNumericType,
Object missingValue,
MultiValueMode sortMode,
Nested nested,
boolean reverse
) {
XFieldComparatorSource source = comparatorSource(targetNumericType, missingValue, sortMode, nested);

/*
* Use a SortField with the custom comparator logic if required because
* 1. The underlying data source needs it.
* 2. We need to read the value from a nested field.
* 3. We Aren't using max or min to resolve the duplicates.
* 4. We have to cast the results to another type.
*/
if (sortRequiresCustomComparator()
|| nested != null
|| (sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN)
|| targetNumericType != getNumericType()) {
return new SortField(getFieldName(), source, reverse);
}

SortedNumericSelector.Type selectorType = sortMode == MultiValueMode.MAX ?
SortedNumericSelector.Type.MAX : SortedNumericSelector.Type.MIN;
SortField sortField = new SortedNumericSortField(getFieldName(), getNumericType().sortFieldType, reverse, selectorType);
sortField.setMissingValue(source.missingObject(missingValue, reverse));
return sortField;
}

/**
* Does {@link #sortField} require a custom comparator because of the way
* the data is stored in doc values ({@code true}) or are the docs values
* stored such that they can be sorted without decoding ({@code false}).
*/
protected abstract boolean sortRequiresCustomComparator();

@Override
public final SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
return sortField(getNumericType(), missingValue, sortMode, nested, reverse);
}

NumericType getNumericType();
/**
* Builds a {@linkplain BucketedSort} for the {@code targetNumericType},
* casting the values if their native type doesn't match.
*/
public final BucketedSort newBucketedSort(NumericType targetNumericType, BigArrays bigArrays, @Nullable Object missingValue,
MultiValueMode sortMode, Nested nested, SortOrder sortOrder, DocValueFormat format,
int bucketSize, BucketedSort.ExtraData extra) {
return comparatorSource(targetNumericType, missingValue, sortMode, nested)
.newBucketedSort(bigArrays, sortOrder, format, bucketSize, extra);
}

@Override
public final BucketedSort newBucketedSort(BigArrays bigArrays, @Nullable Object missingValue, MultiValueMode sortMode, Nested nested,
SortOrder sortOrder, DocValueFormat format, int bucketSize, BucketedSort.ExtraData extra) {
return newBucketedSort(getNumericType(), bigArrays, missingValue, sortMode, nested, sortOrder, format, bucketSize, extra);
}

/**
* Build a {@link XFieldComparatorSource} matching the parameters.
*/
private XFieldComparatorSource comparatorSource(
NumericType targetNumericType,
@Nullable Object missingValue,
MultiValueMode sortMode,
Nested nested
) {
switch (targetNumericType) {
case HALF_FLOAT:
case FLOAT:
return new FloatValuesComparatorSource(this, missingValue, sortMode, nested);
case DOUBLE:
return new DoubleValuesComparatorSource(this, missingValue, sortMode, nested);
case DATE:
return dateComparatorSource(missingValue, sortMode, nested);
case DATE_NANOSECONDS:
return dateNanosComparatorSource(missingValue, sortMode, nested);
default:
assert !targetNumericType.isFloatingPoint();
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}
}

protected XFieldComparatorSource dateComparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
}

protected XFieldComparatorSource dateNanosComparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
return new LongValuesComparatorSource(this, missingValue, sortMode, nested, dvs -> convertNumeric(dvs, DateUtils::toNanoSeconds));
}

/**
* Convert the values in <code>dvs</code> using the provided <code>converter</code>.
*/
protected static SortedNumericDocValues convertNumeric(SortedNumericDocValues values, LongUnaryOperator converter) {
return new AbstractSortedNumericDocValues() {

@Override
public boolean advanceExact(int target) throws IOException {
return values.advanceExact(target);
}

@Override
public long nextValue() throws IOException {
return converter.applyAsLong(values.nextValue());
}

@Override
public int docValueCount() {
return values.docValueCount();
}

@Override
public int nextDoc() throws IOException {
return values.nextDoc();
}
};
}
}
Loading

0 comments on commit 04714e1

Please sign in to comment.