-
Notifications
You must be signed in to change notification settings - Fork 8
/
mphf.hpp
129 lines (104 loc) · 4.14 KB
/
mphf.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#pragma once
#include <random>
#include "bitpair_vector.hpp"
#include "ranked_bitpair_vector.hpp"
#include "perfutils.hpp"
namespace emphf {
template <typename BaseHasher>
class mphf {
public:
mphf()
{}
template <typename HypergraphSorter, typename Range, typename Adaptor>
mphf(HypergraphSorter& sorter, size_t n,
Range const& input_range, Adaptor adaptor,
double gamma = 1.23)
: m_n(n)
, m_hash_domain((size_t(std::ceil(double(m_n) * gamma)) + 2) / 3)
{
typedef typename HypergraphSorter::node_t node_t;
typedef typename HypergraphSorter::hyperedge hyperedge;
typedef decltype(*std::begin(input_range)) value_type;
size_t nodes_domain = m_hash_domain * 3;
if (nodes_domain >= std::numeric_limits<node_t>::max()) {
throw std::invalid_argument("Too many nodes for node_t");
}
auto edge_gen = [&](value_type s) {
using std::get;
auto hashes = m_hasher(adaptor(s));
return hyperedge((node_t)(get<0>(hashes) % m_hash_domain),
(node_t)(m_hash_domain +
(get<1>(hashes) % m_hash_domain)),
(node_t)(2 * m_hash_domain +
(get<2>(hashes) % m_hash_domain)));
};
std::mt19937_64 rng(37); // deterministic seed
for (size_t trial = 0; ; ++trial) {
logger() << "Hypergraph generation: trial " << trial << std::endl;
m_hasher = BaseHasher::generate(rng);
if (sorter.try_generate_and_sort(input_range, edge_gen,
m_n, m_hash_domain)) break;
}
auto peeling_order = sorter.get_peeling_order();
bitpair_vector bv(nodes_domain);
logger() << "Assigning values" << std::endl;
for (auto edge = peeling_order.first;
edge != peeling_order.second;
++edge) {
uint64_t target = orientation(*edge);
uint64_t assigned = bv[edge->v1] + bv[edge->v2];
// "assigned values" must be nonzeros to be ranked, so
// if the result is 0 we assign 3
bv.set(edge->v0, ((target - assigned + 9) % 3) ?: 3);
}
m_bv.build(std::move(bv));
}
uint64_t size() const
{
return m_n;
}
BaseHasher const& base_hasher() const
{
return m_hasher;
}
template <typename T, typename Adaptor>
uint64_t lookup(T val, Adaptor adaptor)
{
using std::get;
auto hashes = m_hasher(adaptor(val));
uint64_t nodes[3] = {get<0>(hashes) % m_hash_domain,
m_hash_domain + (get<1>(hashes) % m_hash_domain),
2 * m_hash_domain + (get<2>(hashes) % m_hash_domain)};
uint64_t hidx = (m_bv[nodes[0]] + m_bv[nodes[1]] + m_bv[nodes[2]]) % 3;
return m_bv.rank(nodes[hidx]);
}
void swap(mphf& other)
{
std::swap(m_n, other.m_n);
std::swap(m_hash_domain, other.m_hash_domain);
m_hasher.swap(other.m_hasher);
m_bv.swap(other.m_bv);
}
void save(std::ostream& os) const
{
os.write(reinterpret_cast<char const*>(&m_n), sizeof(m_n));
os.write(reinterpret_cast<char const*>(&m_hash_domain),
sizeof(m_hash_domain));
m_hasher.save(os);
m_bv.save(os);
}
void load(std::istream& is)
{
is.read(reinterpret_cast<char*>(&m_n), sizeof(m_n));
is.read(reinterpret_cast<char*>(&m_hash_domain),
sizeof(m_hash_domain));
m_hasher.load(is);
m_bv.load(is);
}
private:
uint64_t m_n;
uint64_t m_hash_domain;
BaseHasher m_hasher;
ranked_bitpair_vector m_bv;
};
}