From d5e86d7c4d008d83b0321492f59c89db046bbe8c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 29 May 2020 16:59:35 -0400 Subject: [PATCH] Small cleanups for terms aggregator (#57315) (#57381) This includes a few small cleanups for the `TermsAggregatorFactory`: 1. Removes an unused `DeprecationLogger` 2. Moves the members to right above the ctor. 3. Merges some all of the heuristics for picking `SubAggCollectionMode` into a single method. --- .../bucket/terms/TermsAggregatorFactory.java | 57 +++++++++---------- .../terms/TermsAggregatorFactoryTests.java | 28 ++++++--- 2 files changed, 48 insertions(+), 37 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index e56a0393856d6..e6848a3ddc2ba 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -19,10 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.terms; -import org.apache.logging.log4j.LogManager; import org.apache.lucene.search.IndexSearcher; import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.AggregationExecutionException; @@ -52,17 +50,8 @@ import java.util.function.Function; public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { - private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(TermsAggregatorFactory.class)); - static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS; - private final BucketOrder order; - private final IncludeExclude includeExclude; - private final String executionHint; - private final SubAggCollectionMode collectMode; - private final TermsAggregator.BucketCountThresholds bucketCountThresholds; - private final boolean showTermDocCountError; - static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register(TermsAggregationBuilder.NAME, Arrays.asList(CoreValuesSourceType.BYTES, CoreValuesSourceType.IP), @@ -98,7 +87,7 @@ public Aggregator build(String name, ExecutionMode execution = null; if (executionHint != null) { - execution = ExecutionMode.fromString(executionHint, deprecationLogger); + execution = ExecutionMode.fromString(executionHint); } // In some cases, using ordinals is just not supported: override it if (valuesSource instanceof ValuesSource.Bytes.WithOrdinals == false) { @@ -109,11 +98,7 @@ public Aggregator build(String name, } final long maxOrd = execution == ExecutionMode.GLOBAL_ORDINALS ? getMaxOrd(valuesSource, context.searcher()) : -1; if (subAggCollectMode == null) { - subAggCollectMode = SubAggCollectionMode.DEPTH_FIRST; - // TODO can we remove concept of AggregatorFactories.EMPTY? - if (factories != AggregatorFactories.EMPTY) { - subAggCollectMode = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd); - } + subAggCollectMode = pickSubAggColectMode(factories, bucketCountThresholds.getShardSize(), maxOrd); } if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) { @@ -165,12 +150,7 @@ public Aggregator build(String name, } if (subAggCollectMode == null) { - // TODO can we remove concept of AggregatorFactories.EMPTY? - if (factories != AggregatorFactories.EMPTY) { - subAggCollectMode = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1); - } else { - subAggCollectMode = SubAggCollectionMode.DEPTH_FIRST; - } + subAggCollectMode = pickSubAggColectMode(factories, bucketCountThresholds.getShardSize(), -1); } ValuesSource.Numeric numericValuesSource = (ValuesSource.Numeric) valuesSource; @@ -198,6 +178,13 @@ public boolean needsToCollectFromSingleBucket() { }; } + private final BucketOrder order; + private final IncludeExclude includeExclude; + private final String executionHint; + private final SubAggCollectionMode collectMode; + private final TermsAggregator.BucketCountThresholds bucketCountThresholds; + private final boolean showTermDocCountError; + TermsAggregatorFactory(String name, ValuesSourceConfig config, BucketOrder order, @@ -280,17 +267,29 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource, showTermDocCountError, collectsFromSingleBucket, metadata); } - // return the SubAggCollectionMode that this aggregation should use based on the expected size - // and the cardinality of the field - static SubAggCollectionMode subAggCollectionMode(int expectedSize, long maxOrd) { + /** + * Pick a {@link SubAggCollectionMode} based on heuristics about what + * we're collecting. + */ + static SubAggCollectionMode pickSubAggColectMode(AggregatorFactories factories, int expectedSize, long maxOrd) { + if (factories.countAggregators() == 0) { + // Without sub-aggregations we pretty much ignore this field value so just pick something + return SubAggCollectionMode.DEPTH_FIRST; + } if (expectedSize == Integer.MAX_VALUE) { - // return all buckets + // We expect to return all buckets so delaying them won't save any time return SubAggCollectionMode.DEPTH_FIRST; } if (maxOrd == -1 || maxOrd > expectedSize) { - // use breadth_first if the cardinality is bigger than the expected size or unknown (-1) + /* + * We either don't know how many buckets we expect there to be + * (maxOrd == -1) or we expect there to be more buckets than + * we will collect from this shard. So delaying collection of + * the sub-buckets *should* save time. + */ return SubAggCollectionMode.BREADTH_FIRST; } + // We expect to collect so many buckets that we may as well collect them all. return SubAggCollectionMode.DEPTH_FIRST; } @@ -398,7 +397,7 @@ Aggregator create(String name, } }; - public static ExecutionMode fromString(String value, final DeprecationLogger deprecationLogger) { + public static ExecutionMode fromString(String value) { switch (value) { case "global_ordinals": return GLOBAL_ORDINALS; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java index 83a05738163c9..af394c08a7c1b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactoryTests.java @@ -20,25 +20,37 @@ package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.test.ESTestCase; import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class TermsAggregatorFactoryTests extends ESTestCase { - public void testSubAggCollectMode() throws Exception { - assertThat(TermsAggregatorFactory.subAggCollectionMode(Integer.MAX_VALUE, -1), + public void testPickEmpty() throws Exception { + AggregatorFactories empty = mock(AggregatorFactories.class); + when(empty.countAggregators()).thenReturn(0); + assertThat(TermsAggregatorFactory.pickSubAggColectMode(empty, randomInt(), randomInt()), equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST)); - assertThat(TermsAggregatorFactory.subAggCollectionMode(10, -1), + } + + public void testPickNonEempty() { + AggregatorFactories nonEmpty = mock(AggregatorFactories.class); + when(nonEmpty.countAggregators()).thenReturn(1); + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, Integer.MAX_VALUE, -1), + equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST)); + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, -1), equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST)); - assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 5), + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, 5), equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST)); - assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 10), + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, 10), equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST)); - assertThat(TermsAggregatorFactory.subAggCollectionMode(10, 100), + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 10, 100), equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST)); - assertThat(TermsAggregatorFactory.subAggCollectionMode(1, 2), + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 1, 2), equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST)); - assertThat(TermsAggregatorFactory.subAggCollectionMode(1, 100), + assertThat(TermsAggregatorFactory.pickSubAggColectMode(nonEmpty, 1, 100), equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST)); } }