diff --git a/CHANGELOG.md b/CHANGELOG.md index 15fa32b..b92e529 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,12 +25,15 @@ This project has adhered to - Change the format of CHANGELOG.md to conform to the [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard ([#63](https://github.com/hajimes/mmh3/issues/63)). +- **Backward-incompatible**: Change the constructors of hasher classes to + accept a buffer as the first argument. ### Fixed - Fix a reference leak in the `hash_from_buffer()` function ([#75](https://github.com/hajimes/mmh3/issues/75)). -- Fix type hints. +- Fix type hints ([#76](https://github.com/hajimes/mmh3/issues/76), + [#77](https://github.com/hajimes/mmh3/issues/77)). ## [4.1.0] - 2024-01-09 diff --git a/README.md b/README.md index 1074efc..d87186e 100644 --- a/README.md +++ b/README.md @@ -150,12 +150,15 @@ complete changelog. - Change the format of CHANGELOG.md to conform to the [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard ([#63](https://github.com/hajimes/mmh3/issues/63)). +- **Backward-incompatible**: Change the constructors of hasher classes to + accept a buffer as the first argument. #### Fixed - Fix a reference leak in the `hash_from_buffer()` function ([#75](https://github.com/hajimes/mmh3/issues/75)). -- Fix type hints. +- Fix type hints ([#76](https://github.com/hajimes/mmh3/issues/76), + [#77](https://github.com/hajimes/mmh3/issues/77)). ### [4.1.0] - 2024-01-09 @@ -166,20 +169,11 @@ complete changelog. #### Fixed - Fix issues with Bazel by changing the directory structure of the project - (). -- Fix incorrect type hints (). + ([#50](https://github.com/hajimes/mmh3/issues/50)). +- Fix incorrect type hints ([#51](https://github.com/hajimes/mmh3/issues/51)). - Fix invalid results on s390x when the arg `x64arch` of `hash64` or - `hash_bytes` is set to `False` (). - -### [4.0.1] - 2023-07-14 - -#### Changed - -- Refactor the project structure (). - -#### Fixed - -- Fix incorrect type hints. + `hash_bytes` is set to `False` + ([#52](https://github.com/hajimes/mmh3/issues/52)). ## License diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 7816513..722360e 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -102,6 +102,14 @@ by Python. by Simon Willison is a good introduction to Docker/QEMU settings for emulating s390x. +If the above does not work, you may also want to try the following: + +```shell +docker run --rm --privileged tonistiigi/binfmt --install all +docker buildx create --name mybuilder --use +docker run -it multiarch/ubuntu-core:s390x-focal /bin/bash +``` + ## Pull request Once you've pushed your changes to your fork, you can @@ -160,7 +168,7 @@ For example, pip install ".[benchmark]" mkdir results python benchmark/benchmark.py -o results/mmh3_128.json \ - --test-hash mmh3_128 --test-buffer-size-max 134217728 + --test-hash mmh3_128 --test-buffer-size-max 262144 ``` As of version 4.2.0, the following hash function identifiers are available for diff --git a/src/mmh3/mmh3module.c b/src/mmh3/mmh3module.c index ef6bf07..a7d50a8 100644 --- a/src/mmh3/mmh3module.c +++ b/src/mmh3/mmh3module.c @@ -1623,62 +1623,18 @@ typedef struct { static PyTypeObject MMH3Hasher128x86Type; -static void -MMH3Hasher128x86_dealloc(MMH3Hasher128x86 *self) -{ - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static PyObject * -MMH3Hasher128x86_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - MMH3Hasher128x86 *self; - self = (MMH3Hasher128x86 *)type->tp_alloc(type, 0); - if (self != NULL) { - self->h1 = 0; - self->h2 = 0; - self->h3 = 0; - self->h4 = 0; - self->buffer1 = 0; - self->buffer2 = 0; - self->buffer3 = 0; - self->buffer4 = 0; - self->shift = 0; - self->length = 0; - } - return (PyObject *)self; -} - -static int -MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds) -{ - static char *kwlist[] = {"seed", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|I", kwlist, &self->h1)) - return -1; - - self->h2 = self->h1; - self->h3 = self->h1; - self->h4 = self->h1; - - return 0; -} - -static PyObject * -MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj) +static FORCE_INLINE void +update_x86_128_impl(MMH3Hasher128x86 *self, Py_buffer *buf) { Py_ssize_t i = 0; - Py_buffer buf; uint32_t h1 = self->h1; uint32_t h2 = self->h2; uint32_t h3 = self->h3; uint32_t h4 = self->h4; uint32_t k1 = 0; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - for (; i < buf.len; i++) { - k1 = ((uint8_t *)buf.buf)[i]; + for (; i < buf->len; i++) { + k1 = ((uint8_t *)buf->buf)[i]; if (self->shift < 32) { // TODO: use bit ops self->buffer1 |= k1 << self->shift; } @@ -1720,12 +1676,71 @@ MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj) } } - PyBuffer_Release(&buf); + PyBuffer_Release(buf); self->h1 = h1; self->h2 = h2; self->h3 = h3; self->h4 = h4; +} + +static void +MMH3Hasher128x86_dealloc(MMH3Hasher128x86 *self) +{ + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject * +MMH3Hasher128x86_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + MMH3Hasher128x86 *self; + self = (MMH3Hasher128x86 *)type->tp_alloc(type, 0); + if (self != NULL) { + self->h1 = 0; + self->h2 = 0; + self->h3 = 0; + self->h4 = 0; + self->buffer1 = 0; + self->buffer2 = 0; + self->buffer3 = 0; + self->buffer4 = 0; + self->shift = 0; + self->length = 0; + } + return (PyObject *)self; +} + +static int +MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds) +{ + Py_buffer target_buf = {0}; + static char *kwlist[] = {"data", "seed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*I", kwlist, &target_buf, + &self->h1)) + return -1; + + self->h2 = self->h1; + self->h3 = self->h1; + self->h4 = self->h1; + + if (target_buf.buf != NULL) { + // target_buf will be released in update_x86_128_impl + update_x86_128_impl(self, &target_buf); + } + + return 0; +} + +static PyObject * +MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj) +{ + Py_buffer buf; + + GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); + + // buf will be released in update_x86_128_impl + update_x86_128_impl(self, &buf); Py_RETURN_NONE; } diff --git a/tests/test_mmh3_hasher.py b/tests/test_mmh3_hasher.py index 88186d9..104059b 100644 --- a/tests/test_mmh3_hasher.py +++ b/tests/test_mmh3_hasher.py @@ -20,6 +20,11 @@ def test_mmh3_32_digest() -> None: hasher.update(b" world!") assert hasher.digest() == b"\xBA\x4C\x88\x24" + hasher = mmh3.mmh3_32(b"", 0x9747B28C) + hasher.update(b"Hello,") + hasher.update(b" world!") + assert hasher.digest() == b"\xBA\x4C\x88\x24" + hasher = mmh3.mmh3_32(b"Hello,", 0x9747B28C) hasher.update(b" world!") assert hasher.digest() == b"\xBA\x4C\x88\x24" @@ -146,6 +151,10 @@ def test_mmh3_x64_128_digest() -> None: hasher.update(b"The quick brown fox jumps over the lazy dog") assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9" + hasher = mmh3.mmh3_x64_128(b"", 0x9747B28C) + hasher.update(b"The quick brown fox jumps over the lazy dog") + assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9" + hasher = mmh3.mmh3_x64_128(b"The quick brown ", seed=0x9747B28C) hasher.update(b"fox jumps over the lazy dog") assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9" @@ -327,6 +336,25 @@ def test_mmh3_x86_128_digest() -> None: hasher.update(b"zy dog") assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd" + hasher = mmh3.mmh3_x86_128(b"", 0x9747B28C) + hasher.update(b"The quick brown fox ju") + hasher.update(b"mps ove") + hasher.update(b"r the la") + hasher.update(b"zy dog") + assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd" + + hasher = mmh3.mmh3_x86_128(b"The quick brown fox ju", seed=0x9747B28C) + hasher.update(b"mps ove") + hasher.update(b"r the la") + hasher.update(b"zy dog") + assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd" + + hasher = mmh3.mmh3_x86_128(b"The quick brown fox ju", 0x9747B28C) + hasher.update(b"mps ove") + hasher.update(b"r the la") + hasher.update(b"zy dog") + assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd" + def test_mmh3_x86_128_sintdigest() -> None: hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)