Skip to content

Commit

Permalink
Switch from smhasher to mmh3
Browse files Browse the repository at this point in the history
  • Loading branch information
dan-blanchard committed May 30, 2017
1 parent 06d1b87 commit 9f214d6
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 11 deletions.
22 changes: 18 additions & 4 deletions probably/hashfunctions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
import smhasher
import struct

import mmh3
from six import text_type
from six.moves import range


def hash64(key, seed):
"""
Wrapper around mmh3.hash64 to get us single 64-bit value.
This also does the extra work of ensuring that we always treat the
returned values as big-endian unsigned long, like smhasher used to
do.
"""
hash_val = mmh3.hash64(key, seed)[0]
return struct.unpack('>Q', struct.pack('q', hash_val))[0]


def generate_hashfunctions(nbr_bits, nbr_slices):
"""Generate a set of hash functions.
Expand All @@ -15,10 +29,10 @@ def _make_hashfuncs(key):
else:
key = str(key)
rval = []
current_hash = None
current_hash = 0
for i in range(nbr_slices):
seed = current_hash or 0
current_hash = smhasher.murmur3_x64_64(key, seed)
seed = current_hash
current_hash = hash64(key, seed)
rval.append(current_hash % nbr_bits)
return rval
return _make_hashfuncs
8 changes: 4 additions & 4 deletions probably/hll.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import absolute_import, division, print_function

import numpy as np
import smhasher
from six import PY3
from six.moves import cPickle as pickle
from six.moves import range

from .hashfunctions import hash64


if PY3:
long = int
Expand Down Expand Up @@ -49,9 +49,9 @@ def add(self, uuid):
if uuid:
# Computing the hash
try:
x = smhasher.murmur3_x64_64(uuid)
x = hash64(uuid)
except UnicodeEncodeError:
x = smhasher.murmur3_x64_64(uuid.encode('ascii', 'ignore'))
x = hash64(uuid.encode('ascii', 'ignore'))
# Finding the register to update by using the first b bits as an index
j = x & ((1 << self.b) - 1)
# Remove those b bits
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bitarray
numpy
six
smhasher
mmh3>=2.4
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def finalize_options(self):
packages=find_packages(),
platforms=['any'],
zip_safe=False,
install_requires=['numpy', 'bitarray', 'six', 'smhasher'],
install_requires=['numpy', 'bitarray', 'six', 'mmh3>=2.4'],
setup_requires=['numpy'],
cmdclass={'build_ext': build_ext},
ext_modules=[Extension("probably.maintenance", ["probably/maintenance.c"])],
Expand Down
2 changes: 1 addition & 1 deletion tests/cdbf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_expiration_realtime(self):
elapsed = t2 - t1
experimental_expiration = time.time() - start
print(experimental_expiration)
assert (experimental_expiration - self.expiration) < 0.25 # Arbitrary error threshold
assert (experimental_expiration - self.expiration) < 0.28 # Arbitrary error threshold

def test_expiration(self):
existing = self.bf.add('random_uuid')
Expand Down

0 comments on commit 9f214d6

Please sign in to comment.