From 8b9c4eb3e0b13499ef01f1de7f15796a2a1bdfc9 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 19 May 2020 17:36:55 -0400 Subject: [PATCH] Save memory when date_histogram is not on top (#56921) (#56960) When `date_histogram` is a sub-aggregator it used to allocate a bunch of objects for every one of it's parent's buckets. This uses the data structures that we built in #55873 rework the `date_histogram` aggregator instead of all of the allocation. Part of #56487 --- .../test/search.aggregation/10_histogram.yml | 52 +++++++ .../bucket/BucketsAggregator.java | 47 ++++++ .../DateHistogramAggregationSupplier.java | 1 + .../histogram/DateHistogramAggregator.java | 28 ++-- .../DateHistogramAggregatorFactory.java | 24 ++- .../DateRangeHistogramAggregator.java | 30 ++-- .../bucket/terms/LongKeyedBucketOrds.java | 3 +- .../DateHistogramAggregatorTests.java | 143 ++++++++++++++---- 8 files changed, 265 insertions(+), 63 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 694335b6677f5..991eab271437d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -490,3 +490,55 @@ setup: - length: { aggregations.histo.buckets: 1 } - match: { aggregations.histo.buckets.0.key_as_string: "2015-12-31T17:00:00.000-07:00" } - match: { aggregations.histo.buckets.0.doc_count: 1 } + +--- +"profiler": + - skip: + version: " - 7.8.99" + reason: debug info added in 7.9.0 + + - do: + indices.create: + index: test_2 + body: + settings: + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + date: + type: date + + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2016-01-01"}' + - '{"index": {}}' + - '{"date": "2016-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + + - do: + search: + index: test_2 + body: + size: 0 + profile: true + aggs: + histo: + date_histogram: + field: date + calendar_interval: month + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } + - match: { profile.shards.0.aggregations.0.description: histo } + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } + - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index b170c049889bf..dca1c94feab0f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongHash; +import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorBase; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -29,6 +30,7 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.LeafBucketCollector; +import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.elasticsearch.search.aggregations.support.AggregationPath; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.sort.SortOrder; @@ -340,6 +342,51 @@ protected final InternalAggregation[] buildAggregationsForVariableBuckets(lo return new InternalAggregation[] { resultBuilder.apply(buckets) }; } + + /** + * Build aggregation results for an aggregator with a varying number of + * {@code long} keyed buckets that is at the top level or wrapped in + * {@link AggregatorFactory#asMultiBucketAggregator}. + * @param owningBucketOrds owning bucket ordinals for which to build the results + * @param bucketOrds hash of values to the bucket ordinal + */ + protected final InternalAggregation[] buildAggregationsForVariableBuckets(long[] owningBucketOrds, LongKeyedBucketOrds bucketOrds, + BucketBuilderForVariable bucketBuilder, Function, InternalAggregation> resultBuilder) throws IOException { + long totalOrdsToCollect = 0; + for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + totalOrdsToCollect += bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]); + } + if (totalOrdsToCollect > Integer.MAX_VALUE) { + throw new AggregationExecutionException("Can't collect more than [" + Integer.MAX_VALUE + + "] buckets but attempted [" + totalOrdsToCollect + "]"); + } + consumeBucketsAndMaybeBreak((int) totalOrdsToCollect); + long[] bucketOrdsToCollect = new long[(int) totalOrdsToCollect]; + int b = 0; + for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); + while(ordsEnum.next()) { + bucketOrdsToCollect[b++] = ordsEnum.ord(); + } + } + InternalAggregations[] subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); + + InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; + b = 0; + for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + List buckets = new ArrayList<>((int) bucketOrds.size()); + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); + while(ordsEnum.next()) { + if (bucketOrdsToCollect[b] != ordsEnum.ord()) { + throw new AggregationExecutionException("Iteration order of [" + bucketOrds + "] changed without mutating. [" + + ordsEnum.ord() + "] should have been [" + bucketOrdsToCollect[b] + "]"); + } + buckets.add(bucketBuilder.build(ordsEnum.value(), bucketDocCount(ordsEnum.ord()), subAggregationResults[b++])); + } + results[ordIdx] = resultBuilder.apply(buckets); + } + return results; + } @FunctionalInterface protected interface BucketBuilderForVariable { B build(long bucketValue, int docCount, InternalAggregations subAggregationResults); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationSupplier.java index 70c6028e69a25..743e50c1e6090 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregationSupplier.java @@ -46,5 +46,6 @@ Aggregator build(String name, DocValueFormat formatter, SearchContext aggregationContext, Aggregator parent, + boolean collectsFromSingleBucket, Map metadata) throws IOException; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 338efbb6b1fa9..b3437cd7d9330 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -25,7 +25,6 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Rounding; import org.elasticsearch.common.lease.Releasables; -import org.elasticsearch.common.util.LongHash; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -34,12 +33,14 @@ import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Collections; import java.util.Map; +import java.util.function.BiConsumer; /** * An aggregator for date values. Every date is rounded down using a configured @@ -62,13 +63,13 @@ class DateHistogramAggregator extends BucketsAggregator { private final long minDocCount; private final ExtendedBounds extendedBounds; - private final LongHash bucketOrds; + private final LongKeyedBucketOrds bucketOrds; DateHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding.Prepared preparedRounding, BucketOrder order, boolean keyed, long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource valuesSource, DocValueFormat formatter, SearchContext aggregationContext, - Aggregator parent, Map metadata) throws IOException { + Aggregator parent, boolean collectsFromSingleBucket, Map metadata) throws IOException { super(name, factories, aggregationContext, parent, metadata); this.rounding = rounding; @@ -81,7 +82,7 @@ class DateHistogramAggregator extends BucketsAggregator { this.valuesSource = (ValuesSource.Numeric) valuesSource; this.formatter = formatter; - bucketOrds = new LongHash(1, aggregationContext.bigArrays()); + bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), collectsFromSingleBucket); } @Override @@ -93,30 +94,26 @@ public ScoreMode scoreMode() { } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { if (valuesSource == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } - final SortedNumericDocValues values = valuesSource.longValues(ctx); + SortedNumericDocValues values = valuesSource.longValues(ctx); return new LeafBucketCollectorBase(sub, values) { @Override - public void collect(int doc, long bucket) throws IOException { - assert bucket == 0; + public void collect(int doc, long owningBucketOrd) throws IOException { if (values.advanceExact(doc)) { - final int valuesCount = values.docValueCount(); + int valuesCount = values.docValueCount(); long previousRounded = Long.MIN_VALUE; for (int i = 0; i < valuesCount; ++i) { long value = values.nextValue(); - // We can use shardRounding here, which is sometimes more efficient - // if daylight saving times are involved. long rounded = preparedRounding.round(value); assert rounded >= previousRounded; if (rounded == previousRounded) { continue; } - long bucketOrd = bucketOrds.add(rounded); + long bucketOrd = bucketOrds.add(owningBucketOrd, rounded); if (bucketOrd < 0) { // already seen bucketOrd = -1 - bucketOrd; collectExistingBucket(sub, doc, bucketOrd); @@ -162,4 +159,9 @@ public InternalAggregation buildEmptyAggregation() { public void doClose() { Releasables.close(bucketOrds); } + + @Override + public void collectDebugInfo(BiConsumer add) { + add.accept("total_buckets", bucketOrds.size()); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 79ba6956a2c2d..81730e1e00f70 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -81,9 +81,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, Map metadata) throws IOException { - if (collectsFromSingleBucket == false) { - return asMultiBucketAggregator(this, searchContext, parent); - } AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config.valueSourceType(), DateHistogramAggregationBuilder.NAME); if (aggregatorSupplier instanceof DateHistogramAggregationSupplier == false) { @@ -91,9 +88,22 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, aggregatorSupplier.getClass().toString() + "]"); } Rounding.Prepared preparedRounding = valuesSource.roundingPreparer(queryShardContext.getIndexReader()).apply(shardRounding); - return ((DateHistogramAggregationSupplier) aggregatorSupplier).build(name, factories, rounding, preparedRounding, order, keyed, - minDocCount, extendedBounds, valuesSource, config.format(), searchContext, - parent, metadata); + return ((DateHistogramAggregationSupplier) aggregatorSupplier).build( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + valuesSource, + config.format(), + searchContext, + parent, + collectsFromSingleBucket, + metadata + ); } @Override @@ -101,6 +111,6 @@ protected Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map metadata) throws IOException { return new DateHistogramAggregator(name, factories, rounding, null, order, keyed, minDocCount, extendedBounds, - null, config.format(), searchContext, parent, metadata); + null, config.format(), searchContext, parent, false, metadata); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java index cf5c3bd007b69..860c679381be3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java @@ -25,7 +25,6 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Rounding; import org.elasticsearch.common.lease.Releasables; -import org.elasticsearch.common.util.LongHash; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.mapper.RangeFieldMapper; import org.elasticsearch.index.mapper.RangeType; @@ -37,6 +36,7 @@ import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.internal.SearchContext; @@ -44,6 +44,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; /** * An aggregator for date values. Every date is rounded down using a configured @@ -66,13 +67,13 @@ class DateRangeHistogramAggregator extends BucketsAggregator { private final long minDocCount; private final ExtendedBounds extendedBounds; - private final LongHash bucketOrds; + private final LongKeyedBucketOrds bucketOrds; DateRangeHistogramAggregator(String name, AggregatorFactories factories, Rounding rounding, Rounding.Prepared preparedRounding, BucketOrder order, boolean keyed, long minDocCount, @Nullable ExtendedBounds extendedBounds, @Nullable ValuesSource valuesSource, DocValueFormat formatter, SearchContext aggregationContext, - Aggregator parent, Map metadata) throws IOException { + Aggregator parent, boolean collectsFromSingleBucket, Map metadata) throws IOException { super(name, factories, aggregationContext, parent, metadata); this.rounding = rounding; @@ -89,7 +90,7 @@ class DateRangeHistogramAggregator extends BucketsAggregator { + "]"); } - bucketOrds = new LongHash(1, aggregationContext.bigArrays()); + bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), collectsFromSingleBucket); } @Override @@ -101,21 +102,19 @@ public ScoreMode scoreMode() { } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { if (valuesSource == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } - final SortedBinaryDocValues values = valuesSource.bytesValues(ctx); - final RangeType rangeType = valuesSource.rangeType(); + SortedBinaryDocValues values = valuesSource.bytesValues(ctx); + RangeType rangeType = valuesSource.rangeType(); return new LeafBucketCollectorBase(sub, values) { @Override - public void collect(int doc, long bucket) throws IOException { - assert bucket == 0; + public void collect(int doc, long owningBucketOrd) throws IOException { if (values.advanceExact(doc)) { // Is it possible for valuesCount to be > 1 here? Multiple ranges are encoded into the same BytesRef in the binary doc // values, so it isn't clear what we'd be iterating over. - final int valuesCount = values.docValueCount(); + int valuesCount = values.docValueCount(); assert valuesCount == 1 : "Value count for ranges should always be 1"; long previousKey = Long.MIN_VALUE; @@ -124,7 +123,7 @@ public void collect(int doc, long bucket) throws IOException { List ranges = rangeType.decodeRanges(encodedRanges); long previousFrom = Long.MIN_VALUE; for (RangeFieldMapper.Range range : ranges) { - final Long from = (Long) range.getFrom(); + Long from = (Long) range.getFrom(); // The encoding should ensure that this assert is always true. assert from >= previousFrom : "Start of range not >= previous start"; final Long to = (Long) range.getTo(); @@ -136,7 +135,7 @@ public void collect(int doc, long bucket) throws IOException { continue; } // Bucket collection identical to NumericHistogramAggregator, could be refactored - long bucketOrd = bucketOrds.add(key); + long bucketOrd = bucketOrds.add(owningBucketOrd, key); if (bucketOrd < 0) { // already seen bucketOrd = -1 - bucketOrd; collectExistingBucket(sub, doc, bucketOrd); @@ -187,4 +186,9 @@ public InternalAggregation buildEmptyAggregation() { public void doClose() { Releasables.close(bucketOrds); } + + @Override + public void collectDebugInfo(BiConsumer add) { + add.accept("total_buckets", bucketOrds.size()); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java index 4c585f2930398..cd49e47afde62 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java @@ -57,7 +57,8 @@ private LongKeyedBucketOrds() {} public abstract long size(); /** - * Build an iterator for buckets inside {@code owningBucketOrd}. + * Build an iterator for buckets inside {@code owningBucketOrd} in order + * of increasing ord. *

* When this is first returns it is "unpositioned" and you must call * {@link BucketOrdsEnum#next()} to move it to the first value. diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index 98a58adfed6f2..05cfab85bf748 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; @@ -31,12 +32,18 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.MultiBucketConsumerService.TooManyBucketsException; +import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; import java.io.IOException; @@ -46,6 +53,7 @@ import java.util.List; import java.util.function.Consumer; +import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.equalTo; public class DateHistogramAggregatorTests extends AggregatorTestCase { @@ -62,7 +70,7 @@ public class DateHistogramAggregatorTests extends AggregatorTestCase { */ private static final String SEARCHABLE_DATE = "searchable_date"; - private static final List dataset = Arrays.asList( + private static final List DATASET = Arrays.asList( "2010-03-12T01:07:45", "2010-04-27T03:43:34", "2012-05-18T04:11:00", @@ -75,7 +83,7 @@ public class DateHistogramAggregatorTests extends AggregatorTestCase { "2017-12-12T22:55:46"); public void testMatchNoDocsDeprecatedInterval() throws IOException { - testBothCases(new MatchNoDocsQuery(), dataset, + testBothCases(new MatchNoDocsQuery(), DATASET, aggregation -> aggregation.dateHistogramInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> { assertEquals(0, histogram.getBuckets().size()); @@ -86,11 +94,11 @@ public void testMatchNoDocsDeprecatedInterval() throws IOException { } public void testMatchNoDocs() throws IOException { - testBothCases(new MatchNoDocsQuery(), dataset, + testBothCases(new MatchNoDocsQuery(), DATASET, aggregation -> aggregation.calendarInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> assertEquals(0, histogram.getBuckets().size()), false ); - testBothCases(new MatchNoDocsQuery(), dataset, + testBothCases(new MatchNoDocsQuery(), DATASET, aggregation -> aggregation.fixedInterval(new DateHistogramInterval("365d")).field(AGGREGABLE_DATE), histogram -> assertEquals(0, histogram.getBuckets().size()), false ); @@ -99,21 +107,21 @@ public void testMatchNoDocs() throws IOException { public void testMatchAllDocsDeprecatedInterval() throws IOException { Query query = new MatchAllDocsQuery(); - testSearchCase(query, dataset, + testSearchCase(query, DATASET, aggregation -> aggregation.dateHistogramInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> { assertEquals(6, histogram.getBuckets().size()); assertTrue(AggregationInspectionHelper.hasValue(histogram)); }, false ); - testSearchAndReduceCase(query, dataset, + testSearchAndReduceCase(query, DATASET, aggregation -> aggregation.dateHistogramInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> { assertEquals(8, histogram.getBuckets().size()); assertTrue(AggregationInspectionHelper.hasValue(histogram)); }, false ); - testBothCases(query, dataset, + testBothCases(query, DATASET, aggregation -> aggregation.dateHistogramInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE).minDocCount(1L), histogram -> { assertEquals(6, histogram.getBuckets().size()); @@ -128,7 +136,7 @@ public void testMatchAllDocs() throws IOException { List foo = new ArrayList<>(); for (int i = 0; i < 1000; i++) { - foo.add(dataset.get(randomIntBetween(0, dataset.size()-1))); + foo.add(DATASET.get(randomIntBetween(0, DATASET.size()-1))); } testSearchAndReduceCase(query, foo, aggregation -> aggregation.fixedInterval(new DateHistogramInterval("365d")) @@ -136,33 +144,108 @@ public void testMatchAllDocs() throws IOException { histogram -> assertEquals(8, histogram.getBuckets().size()), false ); - testSearchCase(query, dataset, + testSearchCase(query, DATASET, aggregation -> aggregation.calendarInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> assertEquals(6, histogram.getBuckets().size()), false ); - testSearchAndReduceCase(query, dataset, + testSearchAndReduceCase(query, DATASET, aggregation -> aggregation.calendarInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> assertEquals(8, histogram.getBuckets().size()), false ); - testBothCases(query, dataset, + testBothCases(query, DATASET, aggregation -> aggregation.calendarInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE).minDocCount(1L), histogram -> assertEquals(6, histogram.getBuckets().size()), false ); - testSearchCase(query, dataset, + testSearchCase(query, DATASET, aggregation -> aggregation.fixedInterval(new DateHistogramInterval("365d")).field(AGGREGABLE_DATE), histogram -> assertEquals(6, histogram.getBuckets().size()), false ); - testSearchAndReduceCase(query, dataset, + testSearchAndReduceCase(query, DATASET, aggregation -> aggregation.fixedInterval(new DateHistogramInterval("365d")).field(AGGREGABLE_DATE), histogram -> assertEquals(8, histogram.getBuckets().size()), false ); - testBothCases(query, dataset, + testBothCases(query, DATASET, aggregation -> aggregation.fixedInterval(new DateHistogramInterval("365d")).field(AGGREGABLE_DATE).minDocCount(1L), histogram -> assertEquals(6, histogram.getBuckets().size()), false ); } + public void testAsSubAgg() throws IOException { + KeywordFieldMapper.KeywordFieldType k1ft = new KeywordFieldMapper.KeywordFieldType(); + k1ft.setName("k1"); + k1ft.setHasDocValues(true); + KeywordFieldMapper.KeywordFieldType k2ft = new KeywordFieldMapper.KeywordFieldType(); + k2ft.setName("k2"); + k2ft.setHasDocValues(true); + DateFieldMapper.DateFieldType dft = aggregableDateFieldType(false, randomBoolean()); + CheckedConsumer buildIndex = iw -> { + iw.addDocument(org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-02-01T00:00:00Z")), + new SortedSetDocValuesField("k1", new BytesRef("a")), + new SortedSetDocValuesField("k2", new BytesRef("a")) + )); + iw.addDocument(org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-03-01T00:00:00Z")), + new SortedSetDocValuesField("k1", new BytesRef("a")), + new SortedSetDocValuesField("k2", new BytesRef("a")) + )); + iw.addDocument(org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2021-02-01T00:00:00Z")), + new SortedSetDocValuesField("k1", new BytesRef("a")), + new SortedSetDocValuesField("k2", new BytesRef("a")) + )); + iw.addDocument(org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2021-03-01T00:00:00Z")), + new SortedSetDocValuesField("k1", new BytesRef("a")), + new SortedSetDocValuesField("k2", new BytesRef("b")) + )); + iw.addDocument(org.elasticsearch.common.collect.List.of( + new SortedNumericDocValuesField(AGGREGABLE_DATE, dft.parse("2020-02-01T00:00:00Z")), + new SortedSetDocValuesField("k1", new BytesRef("b")), + new SortedSetDocValuesField("k2", new BytesRef("b")) + )); + }; + AggregationBuilder builder = new TermsAggregationBuilder("k1").field("k1").subAggregation( + new DateHistogramAggregationBuilder("dh").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR)); + testCase(builder, new MatchAllDocsQuery(), buildIndex, (StringTerms terms) -> { + StringTerms.Bucket a = terms.getBucketByKey("a"); + InternalDateHistogram adh = a.getAggregations().get("dh"); + assertThat(adh.getBuckets().stream().map(bucket -> bucket.getKey().toString()).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of("2020-01-01T00:00Z", "2021-01-01T00:00Z") + )); + + StringTerms.Bucket b = terms.getBucketByKey("b"); + InternalDateHistogram bdh = b.getAggregations().get("dh"); + assertThat(bdh.getBuckets().stream().map(bucket -> bucket.getKey().toString()).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of("2020-01-01T00:00Z") + )); + }, k1ft, dft); + builder = new TermsAggregationBuilder("k2").field("k2").subAggregation(builder); + testCase(builder, new MatchAllDocsQuery(), buildIndex, (StringTerms terms) -> { + StringTerms.Bucket a = terms.getBucketByKey("a"); + StringTerms ak1 = a.getAggregations().get("k1"); + StringTerms.Bucket ak1a = ak1.getBucketByKey("a"); + InternalDateHistogram ak1adh = ak1a.getAggregations().get("dh"); + assertThat(ak1adh.getBuckets().stream().map(bucket -> bucket.getKey().toString()).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of("2020-01-01T00:00Z", "2021-01-01T00:00Z") + )); + + StringTerms.Bucket b = terms.getBucketByKey("b"); + StringTerms bk1 = b.getAggregations().get("k1"); + StringTerms.Bucket bk1a = bk1.getBucketByKey("a"); + InternalDateHistogram bk1adh = bk1a.getAggregations().get("dh"); + assertThat(bk1adh.getBuckets().stream().map(bucket -> bucket.getKey().toString()).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of("2021-01-01T00:00Z") + )); + StringTerms.Bucket bk1b = bk1.getBucketByKey("b"); + InternalDateHistogram bk1bdh = bk1b.getAggregations().get("dh"); + assertThat(bk1bdh.getBuckets().stream().map(bucket -> bucket.getKey().toString()).collect(toList()), + equalTo(org.elasticsearch.common.collect.List.of("2020-01-01T00:00Z") + )); + }, k1ft, k2ft, dft); + } + public void testNoDocsDeprecatedInterval() throws IOException { Query query = new MatchNoDocsQuery(); List dates = Collections.emptyList(); @@ -203,7 +286,7 @@ public void testNoDocs() throws IOException { } public void testAggregateWrongFieldDeprecated() throws IOException { - testBothCases(new MatchAllDocsQuery(), dataset, + testBothCases(new MatchAllDocsQuery(), DATASET, aggregation -> aggregation.dateHistogramInterval(DateHistogramInterval.YEAR).field("wrong_field"), histogram -> { assertEquals(0, histogram.getBuckets().size()); @@ -214,18 +297,18 @@ public void testAggregateWrongFieldDeprecated() throws IOException { } public void testAggregateWrongField() throws IOException { - testBothCases(new MatchAllDocsQuery(), dataset, + testBothCases(new MatchAllDocsQuery(), DATASET, aggregation -> aggregation.calendarInterval(DateHistogramInterval.YEAR).field("wrong_field"), histogram -> assertEquals(0, histogram.getBuckets().size()), false ); - testBothCases(new MatchAllDocsQuery(), dataset, + testBothCases(new MatchAllDocsQuery(), DATASET, aggregation -> aggregation.fixedInterval(new DateHistogramInterval("365d")).field("wrong_field"), histogram -> assertEquals(0, histogram.getBuckets().size()), false ); } public void testIntervalYearDeprecated() throws IOException { - testBothCases(LongPoint.newRangeQuery(SEARCHABLE_DATE, asLong("2015-01-01"), asLong("2017-12-31")), dataset, + testBothCases(LongPoint.newRangeQuery(SEARCHABLE_DATE, asLong("2015-01-01"), asLong("2017-12-31")), DATASET, aggregation -> aggregation.dateHistogramInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> { List buckets = histogram.getBuckets(); @@ -248,7 +331,7 @@ public void testIntervalYearDeprecated() throws IOException { } public void testIntervalYear() throws IOException { - testBothCases(LongPoint.newRangeQuery(SEARCHABLE_DATE, asLong("2015-01-01"), asLong("2017-12-31")), dataset, + testBothCases(LongPoint.newRangeQuery(SEARCHABLE_DATE, asLong("2015-01-01"), asLong("2017-12-31")), DATASET, aggregation -> aggregation.calendarInterval(DateHistogramInterval.YEAR).field(AGGREGABLE_DATE), histogram -> { List buckets = histogram.getBuckets(); @@ -1222,14 +1305,7 @@ private void executeTestCase(boolean reduced, int maxBucket, boolean useNanosecondResolution) throws IOException { boolean aggregableDateIsSearchable = randomBoolean(); - - DateFieldMapper.Builder builder = new DateFieldMapper.Builder("_name"); - if (useNanosecondResolution) { - builder.withResolution(DateFieldMapper.Resolution.NANOSECONDS); - } - DateFieldMapper.DateFieldType fieldType = builder.fieldType(); - fieldType.setHasDocValues(true); - fieldType.setIndexOptions(aggregableDateIsSearchable ? IndexOptions.DOCS : IndexOptions.NONE); + DateFieldMapper.DateFieldType fieldType = aggregableDateFieldType(useNanosecondResolution, aggregableDateIsSearchable); try (Directory directory = newDirectory()) { @@ -1259,8 +1335,6 @@ private void executeTestCase(boolean reduced, configure.accept(aggregationBuilder); } - fieldType.setName(aggregationBuilder.field()); - InternalDateHistogram histogram; if (reduced) { histogram = searchAndReduce(indexSearcher, query, aggregationBuilder, maxBucket, null, fieldType); @@ -1272,6 +1346,17 @@ private void executeTestCase(boolean reduced, } } + private DateFieldMapper.DateFieldType aggregableDateFieldType(boolean useNanosecondResolution, boolean isSearchable) { + DateFieldMapper.Builder builder = new DateFieldMapper.Builder(AGGREGABLE_DATE); + if (useNanosecondResolution) { + builder.withResolution(DateFieldMapper.Resolution.NANOSECONDS); + } + DateFieldMapper.DateFieldType fieldType = builder.fieldType(); + fieldType.setIndexOptions(isSearchable ? IndexOptions.DOCS : IndexOptions.NONE); + fieldType.setName(AGGREGABLE_DATE); + return fieldType; + } + private static long asLong(String dateTime) { return DateFormatters.from(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parse(dateTime)).toInstant().toEpochMilli(); }