tuple hash: simplify

Hashing is now similar to abseil, which is essentially doing the same.
martinus · Dec 19, 2023 · 05b4082 · 05b4082
1 parent 6f94f87
commit 05b4082
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 23 deletions.
diff --git a/include/ankerl/unordered_dense.h b/include/ankerl/unordered_dense.h
@@ -330,41 +330,29 @@ struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
 
 template <typename... Args>
 struct tuple_hash_helper {
+    // Converts the value into 64bit. If it is an integral type, just cast it. Mixing is doing the rest.
+    // If it isn't an integral we need to hash it.
     template <typename Arg>
-    [[nodiscard]] constexpr static auto calc_buf_size() {
-        if constexpr (std::has_unique_object_representations_v<Arg>) {
-            return sizeof(Arg);
+    [[nodiscard]] constexpr static auto to64(Arg const& arg) -> uint64_t {
+        if constexpr (std::is_integral_v<Arg>) {
+            return static_cast<uint64_t>(arg);
         } else {
-            return sizeof(hash<Arg>{}(std::declval<Arg>()));
+            return hash<Arg>{}(arg);
         }
     }
 
-    // Reads data from back to front. We do this so there's no need for bswap when multiple
-    // bytes are read (on little endian). This should be a tiny bit faster.
-    template <typename Arg>
-    [[nodiscard]] constexpr static auto put(std::byte* pos, Arg const& arg) -> std::byte* {
-        if constexpr (std::has_unique_object_representations_v<Arg>) {
-            pos -= sizeof(Arg);
-            std::memcpy(pos, &arg, sizeof(Arg));
-            return pos;
-        } else {
-            auto x = hash<Arg>{}(arg);
-            pos -= sizeof(x);
-            std::memcpy(pos, &x, sizeof(x));
-            return pos;
-        }
+    [[nodiscard]] static auto mix64(uint64_t state, uint64_t v) -> uint64_t {
+        return detail::wyhash::mix(state + v, uint64_t{0x9ddfea08eb382d69});
     }
 
     // Creates a buffer that holds all the data from each element of the tuple. If possible we memcpy the data directly. If
     // not, we hash the object and use this for the array. Size of the array is known at compile time, and memcpy is optimized
     // away, so filling the buffer is highly efficient. Finally, call wyhash with this buffer.
     template <typename T, std::size_t... Idx>
     [[nodiscard]] static auto calc_hash(T const& t, std::index_sequence<Idx...>) noexcept -> uint64_t {
-        std::array<std::byte, (calc_buf_size<Args>() + ...)> tmp_buffer;
-        auto* buf_ptr = tmp_buffer.data() + tmp_buffer.size();
-        ((buf_ptr = put(buf_ptr, std::get<Idx>(t))), ...);
-        // at this point, buf_ptr==tmp_buffer.data()
-        return ankerl::unordered_dense::detail::wyhash::hash(tmp_buffer.data(), tmp_buffer.size());
+        auto h = uint64_t{};
+        ((h = mix64(h, to64(std::get<Idx>(t)))), ...);
+        return h;
     }
 };
 

diff --git a/test/meson.build b/test/meson.build
@@ -175,6 +175,7 @@ test_exe = executable(
         # disable these two if you don't want them
         #dependency('boost'),
         #dependency('absl_container', default_options: ['warning_level=0', 'werror=false'])
+        # dependency('absl_hash', method: 'builtin', default_options: ['warning_level=0', 'werror=false'])
     ],
 )
 

diff --git a/test/unit/tuple_hash.cpp b/test/unit/tuple_hash.cpp
@@ -2,6 +2,11 @@
 
 #include <app/doctest.h>
 
+#include <third-party/nanobench.h> // for Rng, doNotOptimizeAway, Bench
+
+#include <string>
+#include <string_view>
+
 TEST_CASE("tuple_hash") {
     auto m = ankerl::unordered_dense::map<std::pair<int, std::string>, int>();
     auto pair_hash = ankerl::unordered_dense::hash<std::pair<int, std::string>>{};
@@ -24,3 +29,29 @@ TEST_CASE("good_tuple_hash") {
 
     REQUIRE(hashes.size() == 256 * 256);
 }
+
+TEST_CASE("tuple_hash_with_stringview") {
+    using T = std::tuple<int, std::string_view>;
+
+    auto t = T();
+    std::get<0>(t) = 1;
+    auto str = std::string("hello");
+    std::get<1>(t) = str;
+
+    auto h1 = ankerl::unordered_dense::hash<T>{}(t);
+    str = "world";
+    REQUIRE(std::get<1>(t) == std::string_view{"world"});
+    auto h2 = ankerl::unordered_dense::hash<T>{}(t);
+    REQUIRE(h1 != h2);
+}
+
+TEST_CASE("bench_tuple_hash" * doctest::test_suite("bench")) {
+    using T = std::tuple<char, int, uint16_t, std::byte, uint64_t>;
+
+    auto h = uint64_t{};
+    auto t = std::tuple<char, int, uint16_t, std::byte, uint64_t>{};
+    ankerl::nanobench::Bench().run("ankerl hash", [&] {
+        h += ankerl::unordered_dense::hash<T>{}(t);
+        ++std::get<4>(t);
+    });
+}