Skip to content

Commit

Permalink
Merge pull request #64 from djunzu/fix__binary_array_to_hex
Browse files Browse the repository at this point in the history
Fix binary array to hex
  • Loading branch information
JohannesBuchner committed Dec 6, 2017
2 parents 41c03fe + b89333f commit 5450033
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 43 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ build
dist
ImageHash.egg-info/
.eggs
.DS_Store
.DS_Store
.python-version
51 changes: 30 additions & 21 deletions imagehash/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,14 @@
__version__ = open(os.path.join(os.path.abspath(
os.path.dirname(__file__)), 'VERSION')).read().strip()


def _binary_array_to_hex(arr):
"""
internal function to make a hex string out of a binary array.
binary array might be created from comparison - for example, in
average hash, each pixel in the image is compared with the average pixel value.
If the pixel's value is less than the average it gets a 0 and if it's more it gets a 1.
Then we treat this like a string of bits and convert it to hexadecimal.
"""
h = 0
s = []
for i, v in enumerate(arr.flatten()):
if v:
h += 2**(i % 8)
if (i % 8) == 7:
s.append(hex(h)[2:].rjust(2, '0'))
h = 0
return "".join(s)
bit_string = ''.join(str(b) for b in 1 * arr.flatten())
width = int(numpy.ceil(len(bit_string)/4))
return '{:0>{width}x}'.format(int(bit_string, 2), width=width)


class ImageHash(object):
Expand Down Expand Up @@ -96,10 +86,29 @@ def __hash__(self):
return sum([2**(i % 8) for i, v in enumerate(self.hash.flatten()) if v])


def hex_to_hash(hexstr, hash_size=8):
def hex_to_hash(hexstr):
"""
Convert a stored hash (hex, as retrieved from str(Imagehash))
back to a Imagehash object.
Notes:
1. This algorithm assumes all hashes are bidimensional arrays
with dimensions hash_size * hash_size.
2. This algorithm does not work for hash_size < 2.
"""
hash_size = int(numpy.sqrt(len(hexstr)*4))
binary_array = '{:0>{width}b}'.format(int(hexstr, 16), width = hash_size * hash_size)
bit_rows = [binary_array[i:i+hash_size] for i in range(0, len(binary_array), hash_size)]
hash_array = numpy.array([[bool(int(d)) for d in row] for row in bit_rows])
return ImageHash(hash_array)

def old_hex_to_hash(hexstr, hash_size=8):
"""
Convert a stored hash (hex, as retrieved from str(Imagehash))
back to a Imagehash object. This method should be used for
hashes generated by ImageHash up to version 3.7. For hashes
generated by newer versions of ImageHash, hex_to_hash should
be used instead.
"""
l = []
count = hash_size * (hash_size // 4)
Expand All @@ -123,8 +132,8 @@ def average_hash(image, hash_size=8):
@image must be a PIL instance.
"""
if hash_size < 0:
raise ValueError("Hash size must be positive")
if hash_size < 2:
raise ValueError("Hash size must be greater than or equal to 2")

# reduce size and complexity, then covert to grayscale
image = image.convert("L").resize((hash_size, hash_size), Image.ANTIALIAS)
Expand All @@ -147,8 +156,8 @@ def phash(image, hash_size=8, highfreq_factor=4):
@image must be a PIL instance.
"""
if hash_size < 0:
raise ValueError("Hash size must be positive")
if hash_size < 2:
raise ValueError("Hash size must be greater than or equal to 2")

import scipy.fftpack
img_size = hash_size * highfreq_factor
Expand Down Expand Up @@ -191,8 +200,8 @@ def dhash(image, hash_size=8):
@image must be a PIL instance.
"""
# resize(w, h), but numpy.array((h, w))
if hash_size < 0:
raise ValueError("Hash size must be positive")
if hash_size < 2:
raise ValueError("Hash size must be greater than or equal to 2")

image = image.convert("L").resize((hash_size + 1, hash_size), Image.ANTIALIAS)
pixels = numpy.asarray(image)
Expand Down
33 changes: 18 additions & 15 deletions imagehash/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,27 @@ def check_hash_algorithm(self, func, image):
distance))
self.assertTrue(distance > 10, emsg)

def check_hash_length(self, func, image, sizes):
def check_hash_length(self, func, image, sizes=range(2,21)):
for hash_size in sizes:
image_hash = func(image, hash_size=hash_size)
emsg = 'hash_size={} is not respected'.format(hash_size)
self.assertEqual(image_hash.hash.size, hash_size**2, emsg)

def check_hash_stored(self, func, image):
image_hash = func(image)
other_hash = imagehash.hex_to_hash(str(image_hash))
emsg = 'stringified hash {} != original hash {}'.format(other_hash,
image_hash)
self.assertEqual(image_hash, other_hash, emsg)
distance = image_hash - other_hash
emsg = ('unexpected hamming distance {}: original hash {} '
'- stringified hash {}'.format(distance, image_hash,
other_hash))
self.assertEqual(distance, 0, emsg)
def check_hash_stored(self, func, image, sizes=range(2,21)):
for hash_size in sizes:
image_hash = func(image, hash_size)
other_hash = imagehash.hex_to_hash(str(image_hash))
emsg = 'stringified hash {} != original hash {}'.format(other_hash,
image_hash)
self.assertEqual(image_hash, other_hash, emsg)
distance = image_hash - other_hash
emsg = ('unexpected hamming distance {}: original hash {} '
'- stringified hash {}'.format(distance, image_hash,
other_hash))
self.assertEqual(distance, 0, emsg)

def check_hash_size(self, func, image, sizes=range(-1,2)):
for hash_size in sizes:
with self.assertRaises(ValueError):
func(image, hash_size)

def check_hash_size(self, func, image, size):
with self.assertRaises(ValueError):
func(image, -1)
4 changes: 2 additions & 2 deletions imagehash/tests/test_average_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def test_average_hash(self):
self.check_hash_algorithm(self.func, self.image)

def test_average_hash_length(self):
self.check_hash_length(self.func, self.image, [8, 20])
self.check_hash_length(self.func, self.image)

def test_average_hash_stored(self):
self.check_hash_stored(self.func, self.image)

def test_average_hash_size(self):
self.check_hash_size(self.func, self.image, -1)
self.check_hash_size(self.func, self.image)



Expand Down
4 changes: 2 additions & 2 deletions imagehash/tests/test_dhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def test_dhash(self):
self.check_hash_algorithm(self.func, self.image)

def test_dhash_length(self):
self.check_hash_length(self.func, self.image, [8, 20])
self.check_hash_length(self.func, self.image)

def test_dhash_stored(self):
self.check_hash_stored(self.func, self.image)

def test_dhash_size(self):
self.check_hash_size(self.func, self.image, -1)
self.check_hash_size(self.func, self.image)

if __name__ == '__main__':
unittest.main()
99 changes: 99 additions & 0 deletions imagehash/tests/test_hex_conversions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import unittest
import numpy as np
import imagehash


# Each row is a test case where the first value is a bit sequence and
# the second value is the expected hexadecimal representation for it.
binary_to_hexadecimal_values = [
['1', '1'],
['11', '3'],
['111', '7'],
['1111', 'f'],
['10000', '10'],
['110000', '30'],
['1110000', '70'],
['11110000', 'f0'],
['00001', '01'],
['000011', '03'],
['0000111', '07'],
['00001111', '0f'],
['10000001', '81'],
['00000000000000001', '00001'],
['000000000000000011', '00003'],
['0000000000000000111', '00007'],
['00000000000000001111', '0000f'],
['11110000111100001111', 'f0f0f'],
['00001111000011110000', '0f0f0'],
['11110000000100100011010001010110011110001001101010111100110111101111', 'f0123456789abcdef'],
['1001111000111100110000011111000011110000110000111110011111000000', '9e3cc1f0f0c3e7c0'],
['1000111100001111000011110000111100001111000010110000101101111010', '8f0f0f0f0f0b0b7a'],
]

# Each row is a test case where the first value is a hexadecimal sequence
# and the second value is the expected binary representation for it.
hexadecimal_to_binary_values = [
['1', '0001'],
['2', '0010'],
['3', '0011'],
['a', '1010'],
['f', '1111'],
['101', '100000001'],
['1b1', '110110001'],
['0b1', '010110001'],
['f0f0', '1111000011110000'],
['0f0f', '0000111100001111'],
['000c', '0000000000001100'],
['100000d', '1000000000000000000001101'],
['000000d', '0000000000000000000001101'],
['000000001', '000000000000000000000000000000000001'],
['800000001', '100000000000000000000000000000000001'],
['0000000000001', '0000000000000000000000000000000000000000000000001'],
['1000000000001', '1000000000000000000000000000000000000000000000001'],
['0000000000000001', '0000000000000000000000000000000000000000000000000000000000000001'],
['8000000000000001', '1000000000000000000000000000000000000000000000000000000000000001'],
]

# Each row is a test case where the first value is a hexadecimal
# sequence and the second value is the expected bool array for it.
hexadecimal_to_bool_array = [
['9e3cc1f0f0c3e7c0', np.array([ [True, False, False, True, True, True, True, False],
[False, False, True, True, True, True, False, False],
[True, True, False, False, False, False, False, True],
[True, True, True, True, False, False, False, False],
[True, True, True, True, False, False, False, False],
[True, True, False, False, False, False, True, True],
[True, True, True, False, False, True, True, True],
[True, True, False, False, False, False, False, False]]) ],
]

class TestHexConversions(unittest.TestCase):

def setUp(self):
self.to_hex = imagehash._binary_array_to_hex
self.from_hex = imagehash.hex_to_hash

def test_binary_array_to_hex_input(self):
for case in hexadecimal_to_bool_array:
self.assertEqual(case[0], self.to_hex(case[1]))

def test_hex_to_hash_output(self):
for case in hexadecimal_to_bool_array:
self.assertTrue(np.array_equal(case[1], self.from_hex(case[0]).hash))

def test_conversion_to_hex(self):
for case in binary_to_hexadecimal_values:
expected = case[1]
bit_array = np.array([int(d) for d in case[0]])
result = self.to_hex(bit_array)
self.assertEqual(expected, result)

def test_conversion_from_hex(self):
for case in hexadecimal_to_binary_values:
expected = case[1]
result = ''.join(str(b) for b in 1 * self.from_hex(case[0]).hash.flatten())
self.assertEqual(expected, result)


if __name__ == '__main__':
unittest.main()
30 changes: 30 additions & 0 deletions imagehash/tests/test_old_hex_conversions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import unittest
import numpy as np
import imagehash


# Each row is a test case where the first value is a hexadecimal
# sequence and the second value is the expected bool array for it.
old_hexadecimal_to_bool_array = [
['ffeb89818193ffff', np.array([ [True, True, True, True, True, True, True, True],
[True, True, False, True, False, True, True, True],
[True, False, False, True, False, False, False, True],
[True, False, False, False, False, False, False, True],
[True, False, False, False, False, False, False, True],
[True, True, False, False, True, False, False, True],
[True, True, True, True, True, True, True, True],
[True, True, True, True, True, True, True, True]]) ],
]

class TestOldHexConversions(unittest.TestCase):

def setUp(self):
self.from_hex = imagehash.old_hex_to_hash

def test_hex_to_hash_output(self):
for case in old_hexadecimal_to_bool_array:
self.assertTrue(np.array_equal(case[1], self.from_hex(case[0]).hash))


if __name__ == '__main__':
unittest.main()
4 changes: 2 additions & 2 deletions imagehash/tests/test_phash.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def test_phash(self):
self.check_hash_algorithm(self.func, self.image)

def test_phash_length(self):
self.check_hash_length(self.func, self.image, [8, 20])
self.check_hash_length(self.func, self.image)

def test_phash_stored(self):
self.check_hash_stored(self.func, self.image)

def test_phash_size(self):
self.check_hash_size(self.func, self.image, -1)
self.check_hash_size(self.func, self.image)

if __name__ == '__main__':
unittest.main()
18 changes: 18 additions & 0 deletions imagehash/tests/test_whash.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,23 @@
import unittest

import imagehash
import imagehash.tests as tests


class TestBasic(tests.TestImageHash):

def setUp(self):
self.image = self.get_data_image()
self.func = imagehash.whash

def test_whash(self):
self.check_hash_algorithm(self.func, self.image)

def test_whash_length(self):
self.check_hash_length(self.func, self.image, sizes=[2,4,8,16,32,64])

def test_whash_stored(self):
self.check_hash_stored(self.func, self.image, sizes=[2,4,8,16,32,64])


class Test(unittest.TestCase):
Expand Down Expand Up @@ -57,5 +74,6 @@ def test_image_scale_not_2power(self):
with six.assertRaisesRegex(self, AssertionError, emsg):
imagehash.whash(self.image, image_scale=image_scale+1)


if __name__ == '__main__':
unittest.main()

0 comments on commit 5450033

Please sign in to comment.