Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wasm2c: update memory/table operations to use u64 + harmonize checks #2506

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 37 additions & 42 deletions src/prebuilt/wasm2c_source_declarations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,32 @@ R"w2c_template( (CHECK_CALL_INDIRECT(table, ft, x), \
R"w2c_template( DO_CALL_INDIRECT(table, t, x, __VA_ARGS__))
)w2c_template"
R"w2c_template(
#ifdef SUPPORT_MEMORY64
static inline bool add_overflow(uint64_t a, uint64_t b, uint64_t* resptr) {
)w2c_template"
R"w2c_template(#define RANGE_CHECK(mem, offset, len) \
R"w2c_template(#if __has_builtin(__builtin_add_overflow)
)w2c_template"
R"w2c_template( return __builtin_add_overflow(a, b, resptr);
)w2c_template"
R"w2c_template(#elif defined(_MSC_VER)
)w2c_template"
R"w2c_template( return _addcarry_u64(0, a, b, resptr);
)w2c_template"
R"w2c_template(#else
)w2c_template"
R"w2c_template(#error "Missing implementation of __builtin_add_overflow or _addcarry_u64"
)w2c_template"
R"w2c_template(#endif
)w2c_template"
R"w2c_template(}
)w2c_template"
R"w2c_template(
#define RANGE_CHECK(mem, offset, len) \
)w2c_template"
R"w2c_template( do { \
)w2c_template"
R"w2c_template( uint64_t res; \
)w2c_template"
R"w2c_template( if (__builtin_add_overflow(offset, len, &res)) \
R"w2c_template( if (UNLIKELY(add_overflow(offset, len, &res))) \
)w2c_template"
R"w2c_template( TRAP(OOB); \
)w2c_template"
Expand All @@ -183,16 +200,6 @@ R"w2c_template( TRAP(OOB); \
)w2c_template"
R"w2c_template( } while (0);
)w2c_template"
R"w2c_template(#else
)w2c_template"
R"w2c_template(#define RANGE_CHECK(mem, offset, len) \
)w2c_template"
R"w2c_template( if (UNLIKELY(offset + (uint64_t)len > mem->size)) \
)w2c_template"
R"w2c_template( TRAP(OOB);
)w2c_template"
R"w2c_template(#endif
)w2c_template"
R"w2c_template(
#if WASM_RT_USE_SEGUE_FOR_THIS_MODULE && WASM_RT_SANITY_CHECKS
)w2c_template"
Expand Down Expand Up @@ -976,7 +983,7 @@ R"w2c_template( return sqrtf(x);
R"w2c_template(}
)w2c_template"
R"w2c_template(
static inline void memory_fill(wasm_rt_memory_t* mem, u32 d, u32 val, u32 n) {
static inline void memory_fill(wasm_rt_memory_t* mem, u64 d, u32 val, u64 n) {
)w2c_template"
R"w2c_template( RANGE_CHECK(mem, d, n);
)w2c_template"
Expand All @@ -989,11 +996,11 @@ static inline void memory_copy(wasm_rt_memory_t* dest,
)w2c_template"
R"w2c_template( const wasm_rt_memory_t* src,
)w2c_template"
R"w2c_template( u32 dest_addr,
R"w2c_template( u64 dest_addr,
)w2c_template"
R"w2c_template( u32 src_addr,
R"w2c_template( u64 src_addr,
)w2c_template"
R"w2c_template( u32 n) {
R"w2c_template( u64 n) {
)w2c_template"
R"w2c_template( RANGE_CHECK(dest, dest_addr, n);
)w2c_template"
Expand All @@ -1010,7 +1017,7 @@ R"w2c_template( const u8* src,
)w2c_template"
R"w2c_template( u32 src_size,
)w2c_template"
R"w2c_template( u32 dest_addr,
R"w2c_template( u64 dest_addr,
)w2c_template"
R"w2c_template( u32 src_addr,
)w2c_template"
Expand Down Expand Up @@ -1046,7 +1053,7 @@ R"w2c_template( const wasm_elem_segment_exp
)w2c_template"
R"w2c_template( u32 src_size,
)w2c_template"
R"w2c_template( u32 dest_addr,
R"w2c_template( u64 dest_addr,
)w2c_template"
R"w2c_template( u32 src_addr,
)w2c_template"
Expand All @@ -1058,9 +1065,7 @@ R"w2c_template( if (UNLIKELY(src_addr + (uint64_t)n > src_size))
)w2c_template"
R"w2c_template( TRAP(OOB);
)w2c_template"
R"w2c_template( if (UNLIKELY(dest_addr + (uint64_t)n > dest->size))
)w2c_template"
R"w2c_template( TRAP(OOB);
R"w2c_template( RANGE_CHECK(dest, dest_addr, n);
)w2c_template"
R"w2c_template( for (u32 i = 0; i < n; i++) {
)w2c_template"
Expand Down Expand Up @@ -1107,7 +1112,7 @@ R"w2c_template(static inline void externref_table_init(wasm_rt_externref_table_t
)w2c_template"
R"w2c_template( u32 src_size,
)w2c_template"
R"w2c_template( u32 dest_addr,
R"w2c_template( u64 dest_addr,
)w2c_template"
R"w2c_template( u32 src_addr,
)w2c_template"
Expand All @@ -1117,9 +1122,7 @@ R"w2c_template( if (UNLIKELY(src_addr + (uint64_t)n > src_size))
)w2c_template"
R"w2c_template( TRAP(OOB);
)w2c_template"
R"w2c_template( if (UNLIKELY(dest_addr + (uint64_t)n > dest->size))
)w2c_template"
R"w2c_template( TRAP(OOB);
R"w2c_template( RANGE_CHECK(dest, dest_addr, n);
)w2c_template"
R"w2c_template( for (u32 i = 0; i < n; i++) {
)w2c_template"
Expand All @@ -1136,17 +1139,11 @@ R"w2c_template( static inline void type##_table_copy(wasm_rt_##type##_table_t*
)w2c_template"
R"w2c_template( const wasm_rt_##type##_table_t* src, \
)w2c_template"
R"w2c_template( u32 dest_addr, u32 src_addr, u32 n) { \
)w2c_template"
R"w2c_template( if (UNLIKELY(dest_addr + (uint64_t)n > dest->size)) \
)w2c_template"
R"w2c_template( TRAP(OOB); \
)w2c_template"
R"w2c_template( if (UNLIKELY(src_addr + (uint64_t)n > src->size)) \
R"w2c_template( u64 dest_addr, u64 src_addr, u64 n) { \
)w2c_template"
R"w2c_template( TRAP(OOB); \
R"w2c_template( RANGE_CHECK(dest, dest_addr, n); \
)w2c_template"
R"w2c_template( \
R"w2c_template( RANGE_CHECK(src, src_addr, n); \
)w2c_template"
R"w2c_template( memmove(dest->data + dest_addr, src->data + src_addr, \
)w2c_template"
Expand All @@ -1164,7 +1161,7 @@ R"w2c_template(
)w2c_template"
R"w2c_template( static inline wasm_rt_##type##_t type##_table_get( \
)w2c_template"
R"w2c_template( const wasm_rt_##type##_table_t* table, u32 i) { \
R"w2c_template( const wasm_rt_##type##_table_t* table, u64 i) { \
)w2c_template"
R"w2c_template( if (UNLIKELY(i >= table->size)) \
)w2c_template"
Expand All @@ -1184,7 +1181,7 @@ R"w2c_template(
)w2c_template"
R"w2c_template( static inline void type##_table_set(const wasm_rt_##type##_table_t* table, \
)w2c_template"
R"w2c_template( u32 i, const wasm_rt_##type##_t val) { \
R"w2c_template( u64 i, const wasm_rt_##type##_t val) { \
)w2c_template"
R"w2c_template( if (UNLIKELY(i >= table->size)) \
)w2c_template"
Expand All @@ -1204,13 +1201,11 @@ R"w2c_template(
)w2c_template"
R"w2c_template( static inline void type##_table_fill(const wasm_rt_##type##_table_t* table, \
)w2c_template"
R"w2c_template( u32 d, const wasm_rt_##type##_t val, \
)w2c_template"
R"w2c_template( u32 n) { \
R"w2c_template( u64 d, const wasm_rt_##type##_t val, \
)w2c_template"
R"w2c_template( if (UNLIKELY((uint64_t)d + n > table->size)) \
R"w2c_template( u64 n) { \
)w2c_template"
R"w2c_template( TRAP(OOB); \
R"w2c_template( RANGE_CHECK(table, d, n); \
)w2c_template"
R"w2c_template( for (uint32_t i = d; i < d + n; i++) { \
)w2c_template"
Expand Down
58 changes: 28 additions & 30 deletions src/template/wasm2c.declarations.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,20 +89,24 @@ static inline bool func_types_eq(const wasm_rt_func_type_t a,
(CHECK_CALL_INDIRECT(table, ft, x), \
DO_CALL_INDIRECT(table, t, x, __VA_ARGS__))

#ifdef SUPPORT_MEMORY64
static inline bool add_overflow(uint64_t a, uint64_t b, uint64_t* resptr) {
#if __has_builtin(__builtin_add_overflow)
return __builtin_add_overflow(a, b, resptr);
#elif defined(_MSC_VER)
return _addcarry_u64(0, a, b, resptr);
#else
#error "Missing implementation of __builtin_add_overflow or _addcarry_u64"
#endif
}

#define RANGE_CHECK(mem, offset, len) \
Copy link
Collaborator

@shravanrn shravanrn Nov 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While defaulting to the 64-bit RANGE check is fine for memcpy, tables, etc. (it's unlikely to affect performance), the concern is that the 64-bit RANGE_CHECK will slow down accesses to 32-bit linear memories for bounds-checked wasm2c. Firefox uses the bounds-checked wasm2c for Wasm on 32-bit devices, and so it is perf sensitive to this.

I don't know if this is addressed in a future PR, but this particular PR would be a perf problem from the Firefox use case.

  • If you believe future PRs you are landing will give us the property "bounds checks on 32-bit memories are not slowed down", then i don't have any concerns. (I'd prefer landing this PR and the PR that fixes it in quick succession though). I'll look through the other PRs next to see if this is resolved by them

  • If you believe this is not addressed in future PRs, we may need to specialize the bounds checked added depending on the type of memory, which may need specializing i32_load etc. on the type of memory

  • An alternate approach would be to make the current PR about changing the RANGE_CHECK on the memory_fill style operations only, but leaving the RANGE_CHECKs on memory ops as is, i.e., it checks depending on SUPPORT_MEMORY64

Edit: I see that this might possibly be addressed in the next PR. If yes, please disregard the concern

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for these thoughtful (and well-taken) comments. I believe #2507 will nail this for you (by preserving the current RANGE_CHECK on 32-bit, default-page-size memories), so, how about we wait to get alignment on both #2506 and #2507 and then land them at the same time.

I should say that even the current RANGE_CHECK uses 64-bit arithmetic:

#define RANGE_CHECK(mem, offset, len)               \
  if (UNLIKELY(offset + (uint64_t)len > mem->size)) \
    TRAP(OOB);

... but the difference is that RANGE_CHECK64 does an explicit check for 64-bit overflow. I wish I had the benchmarking infrastructure to promise you it won't affect performance on 32-bit x86 but... safer to wait for #2507 which lets you keep the same code.

do { \
uint64_t res; \
if (__builtin_add_overflow(offset, len, &res)) \
if (UNLIKELY(add_overflow(offset, len, &res))) \
TRAP(OOB); \
if (UNLIKELY(res > mem->size)) \
TRAP(OOB); \
} while (0);
#else
#define RANGE_CHECK(mem, offset, len) \
if (UNLIKELY(offset + (uint64_t)len > mem->size)) \
TRAP(OOB);
#endif

#if WASM_RT_USE_SEGUE_FOR_THIS_MODULE && WASM_RT_SANITY_CHECKS
#include <stdio.h>
Expand Down Expand Up @@ -523,16 +527,16 @@ static float wasm_sqrtf(float x) {
return sqrtf(x);
}

static inline void memory_fill(wasm_rt_memory_t* mem, u32 d, u32 val, u32 n) {
static inline void memory_fill(wasm_rt_memory_t* mem, u64 d, u32 val, u64 n) {
RANGE_CHECK(mem, d, n);
memset(MEM_ADDR(mem, d, n), val, n);
}

static inline void memory_copy(wasm_rt_memory_t* dest,
const wasm_rt_memory_t* src,
u32 dest_addr,
u32 src_addr,
u32 n) {
u64 dest_addr,
u64 src_addr,
u64 n) {
RANGE_CHECK(dest, dest_addr, n);
RANGE_CHECK(src, src_addr, n);
memmove(MEM_ADDR(dest, dest_addr, n), MEM_ADDR(src, src_addr, n), n);
Expand All @@ -541,7 +545,7 @@ static inline void memory_copy(wasm_rt_memory_t* dest,
static inline void memory_init(wasm_rt_memory_t* dest,
const u8* src,
u32 src_size,
u32 dest_addr,
u64 dest_addr,
u32 src_addr,
u32 n) {
if (UNLIKELY(src_addr + (uint64_t)n > src_size))
Expand All @@ -560,14 +564,13 @@ typedef struct {
static inline void funcref_table_init(wasm_rt_funcref_table_t* dest,
const wasm_elem_segment_expr_t* src,
u32 src_size,
u32 dest_addr,
u64 dest_addr,
u32 src_addr,
u32 n,
void* module_instance) {
if (UNLIKELY(src_addr + (uint64_t)n > src_size))
TRAP(OOB);
if (UNLIKELY(dest_addr + (uint64_t)n > dest->size))
TRAP(OOB);
RANGE_CHECK(dest, dest_addr, n);
for (u32 i = 0; i < n; i++) {
const wasm_elem_segment_expr_t* const src_expr = &src[src_addr + i];
wasm_rt_funcref_t* const dest_val = &(dest->data[dest_addr + i]);
Expand All @@ -591,13 +594,12 @@ static inline void funcref_table_init(wasm_rt_funcref_table_t* dest,
// Currently wasm2c only supports initializing externref tables with ref.null.
static inline void externref_table_init(wasm_rt_externref_table_t* dest,
u32 src_size,
u32 dest_addr,
u64 dest_addr,
u32 src_addr,
u32 n) {
if (UNLIKELY(src_addr + (uint64_t)n > src_size))
TRAP(OOB);
if (UNLIKELY(dest_addr + (uint64_t)n > dest->size))
TRAP(OOB);
RANGE_CHECK(dest, dest_addr, n);
for (u32 i = 0; i < n; i++) {
dest->data[dest_addr + i] = wasm_rt_externref_null_value;
}
Expand All @@ -606,12 +608,9 @@ static inline void externref_table_init(wasm_rt_externref_table_t* dest,
#define DEFINE_TABLE_COPY(type) \
static inline void type##_table_copy(wasm_rt_##type##_table_t* dest, \
const wasm_rt_##type##_table_t* src, \
u32 dest_addr, u32 src_addr, u32 n) { \
if (UNLIKELY(dest_addr + (uint64_t)n > dest->size)) \
TRAP(OOB); \
if (UNLIKELY(src_addr + (uint64_t)n > src->size)) \
TRAP(OOB); \
\
u64 dest_addr, u64 src_addr, u64 n) { \
RANGE_CHECK(dest, dest_addr, n); \
RANGE_CHECK(src, src_addr, n); \
memmove(dest->data + dest_addr, src->data + src_addr, \
n * sizeof(wasm_rt_##type##_t)); \
}
Expand All @@ -621,7 +620,7 @@ DEFINE_TABLE_COPY(externref)

#define DEFINE_TABLE_GET(type) \
static inline wasm_rt_##type##_t type##_table_get( \
const wasm_rt_##type##_table_t* table, u32 i) { \
const wasm_rt_##type##_table_t* table, u64 i) { \
if (UNLIKELY(i >= table->size)) \
TRAP(OOB); \
return table->data[i]; \
Expand All @@ -632,7 +631,7 @@ DEFINE_TABLE_GET(externref)

#define DEFINE_TABLE_SET(type) \
static inline void type##_table_set(const wasm_rt_##type##_table_t* table, \
u32 i, const wasm_rt_##type##_t val) { \
u64 i, const wasm_rt_##type##_t val) { \
if (UNLIKELY(i >= table->size)) \
TRAP(OOB); \
table->data[i] = val; \
Expand All @@ -643,10 +642,9 @@ DEFINE_TABLE_SET(externref)

#define DEFINE_TABLE_FILL(type) \
static inline void type##_table_fill(const wasm_rt_##type##_table_t* table, \
u32 d, const wasm_rt_##type##_t val, \
u32 n) { \
if (UNLIKELY((uint64_t)d + n > table->size)) \
TRAP(OOB); \
u64 d, const wasm_rt_##type##_t val, \
u64 n) { \
RANGE_CHECK(table, d, n); \
for (uint32_t i = d; i < d + n; i++) { \
table->data[i] = val; \
} \
Expand Down
Loading
Loading