diff --git a/server/src/main/java/org/elasticsearch/common/util/LongLongHash.java b/server/src/main/java/org/elasticsearch/common/util/LongLongHash.java
new file mode 100644
index 0000000000000..3ffb98b19aefb
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/common/util/LongLongHash.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common.util;
+
+import com.carrotsearch.hppc.BitMixer;
+
+import org.elasticsearch.common.lease.Releasables;
+
+/**
+ * Specialized hash table implementation similar to BytesRefHash that maps
+ * two long values to ids. Collisions are resolved with open addressing and
+ * linear probing, growth is smooth thanks to {@link BigArrays} and capacity
+ * is always a multiple of 2 for faster identification of buckets.
+ * This class is not thread-safe.
+ */
+// IDs are internally stored as id + 1 so that 0 encodes for an empty slot
+public final class LongLongHash extends AbstractHash {
+ /**
+ * The keys of the hash, stored one after another. So the keys for an id
+ * are stored in {@code 2 * id} and {@code 2 * id + 1}. This arrangement
+ * makes {@link #add(long, long)} about 17% faster which seems worth it
+ * because it is in the critical path for aggregations.
+ */
+ private LongArray keys;
+
+ // Constructor with configurable capacity and default maximum load factor.
+ public LongLongHash(long capacity, BigArrays bigArrays) {
+ this(capacity, DEFAULT_MAX_LOAD_FACTOR, bigArrays);
+ }
+
+ //Constructor with configurable capacity and load factor.
+ public LongLongHash(long capacity, float maxLoadFactor, BigArrays bigArrays) {
+ super(capacity, maxLoadFactor, bigArrays);
+ keys = bigArrays.newLongArray(2 * capacity, false);
+ }
+
+ /**
+ * Return the first key at {@code 0 <= index <= capacity()}. The
+ * result is undefined if the slot is unused.
+ */
+ public long getKey1(long id) {
+ return keys.get(2 * id);
+ }
+
+ /**
+ * Return the second key at {@code 0 <= index <= capacity()}. The
+ * result is undefined if the slot is unused.
+ */
+ public long getKey2(long id) {
+ return keys.get(2 * id + 1);
+ }
+
+ /**
+ * Get the id associated with key
or -1 if the key is not contained in the hash.
+ */
+ public long find(long key1, long key2) {
+ final long slot = slot(hash(key1, key2), mask);
+ for (long index = slot; ; index = nextSlot(index, mask)) {
+ final long id = id(index);
+ long keyOffset = 2 * id;
+ if (id == -1 || (keys.get(keyOffset) == key1 && keys.get(keyOffset + 1) == key2)) {
+ return id;
+ }
+ }
+ }
+
+ private long set(long key1, long key2, long id) {
+ assert size < maxSize;
+ final long slot = slot(hash(key1, key2), mask);
+ for (long index = slot; ; index = nextSlot(index, mask)) {
+ final long curId = id(index);
+ if (curId == -1) { // means unset
+ id(index, id);
+ append(id, key1, key2);
+ ++size;
+ return id;
+ } else {
+ long keyOffset = 2 * curId;
+ if (keys.get(keyOffset) == key1 && keys.get(keyOffset + 1) == key2) {
+ return -1 - curId;
+ }
+ }
+ }
+ }
+
+ private void append(long id, long key1, long key2) {
+ long keyOffset = 2 * id;
+ keys = bigArrays.grow(keys, keyOffset + 2);
+ keys.set(keyOffset, key1);
+ keys.set(keyOffset + 1, key2);
+ }
+
+ private void reset(long key1, long key2, long id) {
+ final long slot = slot(hash(key1, key2), mask);
+ for (long index = slot; ; index = nextSlot(index, mask)) {
+ final long curId = id(index);
+ if (curId == -1) { // means unset
+ id(index, id);
+ append(id, key1, key2);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Try to add {@code key}. Return its newly allocated id if it wasn't in
+ * the hash table yet, or {@code -1-id} if it was already present in
+ * the hash table.
+ */
+ public long add(long key1, long key2) {
+ if (size >= maxSize) {
+ assert size == maxSize;
+ grow();
+ }
+ assert size < maxSize;
+ return set(key1, key2, size);
+ }
+
+ @Override
+ protected void removeAndAdd(long index) {
+ final long id = id(index, -1);
+ assert id >= 0;
+ long keyOffset = id * 2;
+ final long key1 = keys.set(keyOffset, 0);
+ final long key2 = keys.set(keyOffset + 1, 0);
+ reset(key1, key2, id);
+ }
+
+ @Override
+ public void close() {
+ Releasables.close(keys, () -> super.close());
+ }
+
+ static long hash(long key1, long key2) {
+ return 31 * BitMixer.mix(key1) + BitMixer.mix(key2);
+ }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java
index b79dc74980a4d..370cb6419003b 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java
@@ -20,11 +20,9 @@
package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.common.lease.Releasable;
-import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
-import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.common.util.LongHash;
-import org.elasticsearch.common.util.ObjectArray;
+import org.elasticsearch.common.util.LongLongHash;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
/**
@@ -124,6 +122,7 @@ public FromSingle(BigArrays bigArrays) {
@Override
public long add(long owningBucketOrd, long value) {
+ // This is in the critical path for collecting most aggs. Be careful of performance.
assert owningBucketOrd == 0;
return ords.add(value);
}
@@ -189,121 +188,69 @@ public void close() {
* Implementation that works properly when collecting from many buckets.
*/
public static class FromMany extends LongKeyedBucketOrds {
- // TODO we can almost certainly do better here by building something fit for purpose rather than trying to lego together stuff
- private static class Buckets implements Releasable {
- private final LongHash valueToThisBucketOrd;
- private LongArray thisBucketOrdToGlobalOrd;
-
- Buckets(BigArrays bigArrays) {
- valueToThisBucketOrd = new LongHash(1, bigArrays);
- thisBucketOrdToGlobalOrd = bigArrays.newLongArray(1, false);
- }
-
- @Override
- public void close() {
- Releasables.close(valueToThisBucketOrd, thisBucketOrdToGlobalOrd);
- }
- }
- private final BigArrays bigArrays;
- private ObjectArray owningOrdToBuckets;
- private long lastGlobalOrd = -1;
+ private final LongLongHash ords;
public FromMany(BigArrays bigArrays) {
- this.bigArrays = bigArrays;
- owningOrdToBuckets = bigArrays.newObjectArray(1);
+ ords = new LongLongHash(2, bigArrays);
}
@Override
public long add(long owningBucketOrd, long value) {
- Buckets buckets = bucketsForOrd(owningBucketOrd);
- long thisBucketOrd = buckets.valueToThisBucketOrd.add(value);
- if (thisBucketOrd < 0) {
- // Already in the hash
- thisBucketOrd = -1 - thisBucketOrd;
- return -1 - buckets.thisBucketOrdToGlobalOrd.get(thisBucketOrd);
- }
- buckets.thisBucketOrdToGlobalOrd = bigArrays.grow(buckets.thisBucketOrdToGlobalOrd, thisBucketOrd + 1);
- lastGlobalOrd++;
- buckets.thisBucketOrdToGlobalOrd.set(thisBucketOrd, lastGlobalOrd);
- return lastGlobalOrd;
- }
-
- private Buckets bucketsForOrd(long owningBucketOrd) {
- if (owningOrdToBuckets.size() <= owningBucketOrd) {
- owningOrdToBuckets = bigArrays.grow(owningOrdToBuckets, owningBucketOrd + 1);
- Buckets buckets = new Buckets(bigArrays);
- owningOrdToBuckets.set(owningBucketOrd, buckets);
- return buckets;
- }
- Buckets buckets = owningOrdToBuckets.get(owningBucketOrd);
- if (buckets == null) {
- buckets = new Buckets(bigArrays);
- owningOrdToBuckets.set(owningBucketOrd, buckets);
- }
- return buckets;
+ // This is in the critical path for collecting most aggs. Be careful of performance.
+ return ords.add(owningBucketOrd, value);
}
@Override
public long find(long owningBucketOrd, long value) {
- if (owningBucketOrd >= owningOrdToBuckets.size()) {
- return -1;
- }
- Buckets buckets = owningOrdToBuckets.get(owningBucketOrd);
- if (buckets == null) {
- return -1;
- }
- long thisBucketOrd = buckets.valueToThisBucketOrd.find(value);
- if (thisBucketOrd < 0) {
- return -1;
- }
- return buckets.thisBucketOrdToGlobalOrd.get(thisBucketOrd);
+ return ords.find(owningBucketOrd, value);
}
@Override
public long bucketsInOrd(long owningBucketOrd) {
- if (owningBucketOrd >= owningOrdToBuckets.size()) {
- return 0;
- }
- Buckets buckets = owningOrdToBuckets.get(owningBucketOrd);
- if (buckets == null) {
- return 0;
+ // TODO it'd be faster to count the number of buckets in a list of these ords rather than one at a time
+ long count = 0;
+ for (long i = 0; i < ords.size(); i++) {
+ if (ords.getKey1(i) == owningBucketOrd) {
+ count++;
+ }
}
- return buckets.valueToThisBucketOrd.size();
+ return count;
}
@Override
public long size() {
- return lastGlobalOrd + 1;
+ return ords.size();
}
@Override
public long maxOwningBucketOrd() {
- return owningOrdToBuckets.size() - 1;
+ // TODO this is fairly expensive to compute. Can we avoid needing it?
+ long max = -1;
+ for (long i = 0; i < ords.size(); i++) {
+ max = Math.max(max, ords.getKey1(i));
+ }
+ return max;
}
@Override
public BucketOrdsEnum ordsEnum(long owningBucketOrd) {
- if (owningBucketOrd >= owningOrdToBuckets.size()) {
- return BucketOrdsEnum.EMPTY;
- }
- Buckets buckets = owningOrdToBuckets.get(owningBucketOrd);
- if (buckets == null) {
- return BucketOrdsEnum.EMPTY;
- }
+ // TODO it'd be faster to iterate many ords at once rather than one at a time
return new BucketOrdsEnum() {
- private long thisBucketOrd = -1;
+ private long ord = -1;
private long value;
- private long ord;
@Override
public boolean next() {
- thisBucketOrd++;
- if (thisBucketOrd >= buckets.valueToThisBucketOrd.size()) {
- return false;
+ while (true) {
+ ord++;
+ if (ord >= ords.size()) {
+ return false;
+ }
+ if (ords.getKey1(ord) == owningBucketOrd) {
+ value = ords.getKey2(ord);
+ return true;
+ }
}
- value = buckets.valueToThisBucketOrd.get(thisBucketOrd);
- ord = buckets.thisBucketOrdToGlobalOrd.get(thisBucketOrd);
- return true;
}
@Override
@@ -320,13 +267,7 @@ public long ord() {
@Override
public void close() {
- for (long owningBucketOrd = 0; owningBucketOrd < owningOrdToBuckets.size(); owningBucketOrd++) {
- Buckets buckets = owningOrdToBuckets.get(owningBucketOrd);
- if (buckets != null) {
- buckets.close();
- }
- }
- owningOrdToBuckets.close();
+ ords.close();
}
}
}
diff --git a/server/src/test/java/org/elasticsearch/common/util/LongHashTests.java b/server/src/test/java/org/elasticsearch/common/util/LongHashTests.java
index 708e7beb861ec..31b499d5c4984 100644
--- a/server/src/test/java/org/elasticsearch/common/util/LongHashTests.java
+++ b/server/src/test/java/org/elasticsearch/common/util/LongHashTests.java
@@ -22,9 +22,10 @@
import com.carrotsearch.hppc.LongLongHashMap;
import com.carrotsearch.hppc.LongLongMap;
import com.carrotsearch.hppc.cursors.LongLongCursor;
+
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
-import org.elasticsearch.test.ESSingleNodeTestCase;
+import org.elasticsearch.test.ESTestCase;
import java.util.HashMap;
import java.util.HashSet;
@@ -32,7 +33,7 @@
import java.util.Map;
import java.util.Set;
-public class LongHashTests extends ESSingleNodeTestCase {
+public class LongHashTests extends ESTestCase {
LongHash hash;
private BigArrays randombigArrays() {
diff --git a/server/src/test/java/org/elasticsearch/common/util/LongLongHashTests.java b/server/src/test/java/org/elasticsearch/common/util/LongLongHashTests.java
new file mode 100644
index 0000000000000..bdfb6b89ccd4a
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/common/util/LongLongHashTests.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common.util;
+
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class LongLongHashTests extends ESTestCase {
+ private BigArrays randombigArrays() {
+ return new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService());
+ }
+
+ private LongLongHash randomHash() {
+ // Test high load factors to make sure that collision resolution works fine
+ final float maxLoadFactor = 0.6f + randomFloat() * 0.39f;
+ return new LongLongHash(randomIntBetween(0, 100), maxLoadFactor, randombigArrays());
+ }
+
+ public void testSimple() {
+ try (LongLongHash hash = randomHash()) {
+ assertThat(hash.add(0, 0), equalTo(0L));
+ assertThat(hash.add(0, 1), equalTo(1L));
+ assertThat(hash.add(0, 2), equalTo(2L));
+ assertThat(hash.add(1, 0), equalTo(3L));
+ assertThat(hash.add(1, 1), equalTo(4L));
+
+ assertThat(hash.add(0, 0), equalTo(-1L));
+ assertThat(hash.add(0, 2), equalTo(-3L));
+ assertThat(hash.add(1, 1), equalTo(-5L));
+
+ assertThat(hash.getKey1(0), equalTo(0L));
+ assertThat(hash.getKey2(0), equalTo(0L));
+ assertThat(hash.getKey1(4), equalTo(1L));
+ assertThat(hash.getKey2(4), equalTo(1L));
+ }
+ }
+
+ public void testDuel() {
+ try (LongLongHash hash = randomHash()) {
+ int iters = scaledRandomIntBetween(100, 100000);
+ Key[] values = randomArray(1, iters, Key[]::new, () -> new Key(randomLong(), randomLong()));
+ Map keyToId = new HashMap<>();
+ List idToKey = new ArrayList<>();
+ for (int i = 0; i < iters; ++i) {
+ Key key = randomFrom(values);
+ if (keyToId.containsKey(key)) {
+ assertEquals(-1 - keyToId.get(key), hash.add(key.key1, key.key2));
+ } else {
+ assertEquals(keyToId.size(), hash.add(key.key1, key.key2));
+ keyToId.put(key, keyToId.size());
+ idToKey.add(key);
+ }
+ }
+
+ assertEquals(keyToId.size(), hash.size());
+ for (Map.Entry entry : keyToId.entrySet()) {
+ assertEquals(entry.getValue().longValue(), hash.find(entry.getKey().key1, entry.getKey().key2));
+ }
+
+ assertEquals(idToKey.size(), hash.size());
+ for (long i = 0; i < hash.capacity(); i++) {
+ long id = hash.id(i);
+ if (id >= 0) {
+ Key key = idToKey.get((int) id);
+ assertEquals(key.key1, hash.getKey1(id));
+ assertEquals(key.key2, hash.getKey2(id));
+ }
+ }
+
+ for (long i = 0; i < hash.size(); i++) {
+ Key key = idToKey.get((int) i);
+ assertEquals(key.key1, hash.getKey1(i));
+ assertEquals(key.key2, hash.getKey2(i));
+ }
+ }
+ }
+
+ class Key {
+ long key1;
+ long key2;
+
+ Key(long key1, long key2) {
+ this.key1 = key1;
+ this.key2 = key2;
+ }
+ }
+
+}