Skip to content

Commit

Permalink
Fix mmh3_86_128 to handle big-endian
Browse files Browse the repository at this point in the history
  • Loading branch information
hajimes committed Jan 4, 2024
1 parent ad30524 commit 05f4c9b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
7 changes: 7 additions & 0 deletions src/mmh3/_mmh3/murmurhash3.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,17 @@ murmurhash3_x86_128(const void *key, const Py_ssize_t len, uint32_t seed,
h3 += h1;
h4 += h1;

#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
((uint32_t *)out)[0] = h2;
((uint32_t *)out)[1] = h1;
((uint32_t *)out)[2] = h4;
((uint32_t *)out)[3] = h3;
#else
((uint32_t *)out)[0] = h1;
((uint32_t *)out)[1] = h2;
((uint32_t *)out)[2] = h3;
((uint32_t *)out)[3] = h4;
#endif
}

//-----------------------------------------------------------------------------
Expand Down
36 changes: 35 additions & 1 deletion src/mmh3/_mmh3/refresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,39 @@ def transform_finalization_mixes(subcode: str) -> str:
return subcode


def transform_x86_128_return(subcode: str) -> str:
"""Revise the return block of MurmurHash3_x86_128 so that it handles big endian.
Args:
subcode (str): The code to be transformed.
Returns:
str: The transformed code.
"""

BYTE_SWAP_IF_BIG_ENDIAN = textwrap.dedent(
"""\
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
((uint32_t *)out)[0] = h2;
((uint32_t *)out)[1] = h1;
((uint32_t *)out)[2] = h4;
((uint32_t *)out)[3] = h3;
#else
\\1
#endif
"""
)

subcode = re.sub(
r"(\(\(uint32_t\*\)out\)\[0\] = h1;[\s\S]*\(\(uint32_t\*\)out\)\[3\] = h4;)",
BYTE_SWAP_IF_BIG_ENDIAN,
subcode,
flags=re.DOTALL | re.MULTILINE,
)

return subcode


def expand_win_stdint_typedefs(subcode: str) -> str:
"""Delineate int type defitions for the older versions of the VS compiler.
Expand Down Expand Up @@ -623,7 +656,8 @@ def lowercase_function_names(subcode: str) -> str:
new_source_builder.add(source.note_comment)
new_source_builder.add(source.header_include, [str.lower])
new_source_builder.add(
source.body, [introduce_py_ssize_t, lowercase_function_names]
source.body,
[introduce_py_ssize_t, transform_x86_128_return, lowercase_function_names],
)

new_header_builder = MMH3CodeBuilder()
Expand Down

0 comments on commit 05f4c9b

Please sign in to comment.