Skip to content

Commit

Permalink
Add data param to mmh3_x86_128 __init__
Browse files Browse the repository at this point in the history
  • Loading branch information
hajimes committed Sep 16, 2024
1 parent 439ecf5 commit b109158
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 65 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@ This project has adhered to
- Change the format of CHANGELOG.md to conform to the
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard
([#63](https://github.com/hajimes/mmh3/issues/63)).
- **Backward-incompatible**: Change the constructors of hasher classes to
accept a buffer as the first argument.

### Fixed

- Fix a reference leak in the `hash_from_buffer()` function
([#75](https://github.com/hajimes/mmh3/issues/75)).
- Fix type hints.
- Fix type hints ([#76](https://github.com/hajimes/mmh3/issues/76),
[#77](https://github.com/hajimes/mmh3/issues/77)).

## [4.1.0] - 2024-01-09

Expand Down
22 changes: 8 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,15 @@ complete changelog.
- Change the format of CHANGELOG.md to conform to the
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) standard
([#63](https://github.com/hajimes/mmh3/issues/63)).
- **Backward-incompatible**: Change the constructors of hasher classes to
accept a buffer as the first argument.

#### Fixed

- Fix a reference leak in the `hash_from_buffer()` function
([#75](https://github.com/hajimes/mmh3/issues/75)).
- Fix type hints.
- Fix type hints ([#76](https://github.com/hajimes/mmh3/issues/76),
[#77](https://github.com/hajimes/mmh3/issues/77)).

### [4.1.0] - 2024-01-09

Expand All @@ -166,20 +169,11 @@ complete changelog.
#### Fixed

- Fix issues with Bazel by changing the directory structure of the project
(<https://github.com/hajimes/mmh3/issues/50>).
- Fix incorrect type hints (<https://github.com/hajimes/mmh3/issues/51>).
([#50](https://github.com/hajimes/mmh3/issues/50)).
- Fix incorrect type hints ([#51](https://github.com/hajimes/mmh3/issues/51)).
- Fix invalid results on s390x when the arg `x64arch` of `hash64` or
`hash_bytes` is set to `False` (<https://github.com/hajimes/mmh3/issues/52>).

### [4.0.1] - 2023-07-14

#### Changed

- Refactor the project structure (<https://github.com/hajimes/mmh3/issues/48>).

#### Fixed

- Fix incorrect type hints.
`hash_bytes` is set to `False`
([#52](https://github.com/hajimes/mmh3/issues/52)).

## License

Expand Down
10 changes: 9 additions & 1 deletion docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ by Python.
by Simon Willison is a good introduction to Docker/QEMU settings for emulating
s390x.

If the above does not work, you may also want to try the following:

```shell
docker run --rm --privileged tonistiigi/binfmt --install all
docker buildx create --name mybuilder --use
docker run -it multiarch/ubuntu-core:s390x-focal /bin/bash
```

## Pull request

Once you've pushed your changes to your fork, you can
Expand Down Expand Up @@ -160,7 +168,7 @@ For example,
pip install ".[benchmark]"
mkdir results
python benchmark/benchmark.py -o results/mmh3_128.json \
--test-hash mmh3_128 --test-buffer-size-max 134217728
--test-hash mmh3_128 --test-buffer-size-max 262144
```

As of version 4.2.0, the following hash function identifiers are available for
Expand Down
113 changes: 64 additions & 49 deletions src/mmh3/mmh3module.c
Original file line number Diff line number Diff line change
Expand Up @@ -1623,62 +1623,18 @@ typedef struct {

static PyTypeObject MMH3Hasher128x86Type;

static void
MMH3Hasher128x86_dealloc(MMH3Hasher128x86 *self)
{
Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *
MMH3Hasher128x86_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
MMH3Hasher128x86 *self;
self = (MMH3Hasher128x86 *)type->tp_alloc(type, 0);
if (self != NULL) {
self->h1 = 0;
self->h2 = 0;
self->h3 = 0;
self->h4 = 0;
self->buffer1 = 0;
self->buffer2 = 0;
self->buffer3 = 0;
self->buffer4 = 0;
self->shift = 0;
self->length = 0;
}
return (PyObject *)self;
}

static int
MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds)
{
static char *kwlist[] = {"seed", NULL};

if (!PyArg_ParseTupleAndKeywords(args, kwds, "|I", kwlist, &self->h1))
return -1;

self->h2 = self->h1;
self->h3 = self->h1;
self->h4 = self->h1;

return 0;
}

static PyObject *
MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj)
static FORCE_INLINE void
update_x86_128_impl(MMH3Hasher128x86 *self, Py_buffer *buf)
{
Py_ssize_t i = 0;
Py_buffer buf;
uint32_t h1 = self->h1;
uint32_t h2 = self->h2;
uint32_t h3 = self->h3;
uint32_t h4 = self->h4;
uint32_t k1 = 0;

GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);

for (; i < buf.len; i++) {
k1 = ((uint8_t *)buf.buf)[i];
for (; i < buf->len; i++) {
k1 = ((uint8_t *)buf->buf)[i];
if (self->shift < 32) { // TODO: use bit ops
self->buffer1 |= k1 << self->shift;
}
Expand Down Expand Up @@ -1720,12 +1676,71 @@ MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj)
}
}

PyBuffer_Release(&buf);
PyBuffer_Release(buf);

self->h1 = h1;
self->h2 = h2;
self->h3 = h3;
self->h4 = h4;
}

static void
MMH3Hasher128x86_dealloc(MMH3Hasher128x86 *self)
{
Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *
MMH3Hasher128x86_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
MMH3Hasher128x86 *self;
self = (MMH3Hasher128x86 *)type->tp_alloc(type, 0);
if (self != NULL) {
self->h1 = 0;
self->h2 = 0;
self->h3 = 0;
self->h4 = 0;
self->buffer1 = 0;
self->buffer2 = 0;
self->buffer3 = 0;
self->buffer4 = 0;
self->shift = 0;
self->length = 0;
}
return (PyObject *)self;
}

static int
MMH3Hasher128x86_init(MMH3Hasher128x86 *self, PyObject *args, PyObject *kwds)
{
Py_buffer target_buf = {0};
static char *kwlist[] = {"data", "seed", NULL};

if (!PyArg_ParseTupleAndKeywords(args, kwds, "|y*I", kwlist, &target_buf,
&self->h1))
return -1;

self->h2 = self->h1;
self->h3 = self->h1;
self->h4 = self->h1;

if (target_buf.buf != NULL) {
// target_buf will be released in update_x86_128_impl
update_x86_128_impl(self, &target_buf);
}

return 0;
}

static PyObject *
MMH3Hasher128x86_update(MMH3Hasher128x86 *self, PyObject *obj)
{
Py_buffer buf;

GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);

// buf will be released in update_x86_128_impl
update_x86_128_impl(self, &buf);

Py_RETURN_NONE;
}
Expand Down
28 changes: 28 additions & 0 deletions tests/test_mmh3_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ def test_mmh3_32_digest() -> None:
hasher.update(b" world!")
assert hasher.digest() == b"\xBA\x4C\x88\x24"

hasher = mmh3.mmh3_32(b"", 0x9747B28C)
hasher.update(b"Hello,")
hasher.update(b" world!")
assert hasher.digest() == b"\xBA\x4C\x88\x24"

hasher = mmh3.mmh3_32(b"Hello,", 0x9747B28C)
hasher.update(b" world!")
assert hasher.digest() == b"\xBA\x4C\x88\x24"
Expand Down Expand Up @@ -146,6 +151,10 @@ def test_mmh3_x64_128_digest() -> None:
hasher.update(b"The quick brown fox jumps over the lazy dog")
assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"

hasher = mmh3.mmh3_x64_128(b"", 0x9747B28C)
hasher.update(b"The quick brown fox jumps over the lazy dog")
assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"

hasher = mmh3.mmh3_x64_128(b"The quick brown ", seed=0x9747B28C)
hasher.update(b"fox jumps over the lazy dog")
assert hasher.digest() == b"!1c\xd2;\x7f\x8as\xe5\x16\xc0~rsE\xf9"
Expand Down Expand Up @@ -327,6 +336,25 @@ def test_mmh3_x86_128_digest() -> None:
hasher.update(b"zy dog")
assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

hasher = mmh3.mmh3_x86_128(b"", 0x9747B28C)
hasher.update(b"The quick brown fox ju")
hasher.update(b"mps ove")
hasher.update(b"r the la")
hasher.update(b"zy dog")
assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

hasher = mmh3.mmh3_x86_128(b"The quick brown fox ju", seed=0x9747B28C)
hasher.update(b"mps ove")
hasher.update(b"r the la")
hasher.update(b"zy dog")
assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"

hasher = mmh3.mmh3_x86_128(b"The quick brown fox ju", 0x9747B28C)
hasher.update(b"mps ove")
hasher.update(b"r the la")
hasher.update(b"zy dog")
assert hasher.digest() == b"^\xd5\xd4\x8aqa\xb8L\x9c:\xa7\x8e>y\xb6\xcd"


def test_mmh3_x86_128_sintdigest() -> None:
hasher = mmh3.mmh3_x86_128(seed=0x9747B28C)
Expand Down

0 comments on commit b109158

Please sign in to comment.