diff --git a/build-scripts/unsupported_combination.cmake b/build-scripts/unsupported_combination.cmake index 4284be32bf..1789c71b52 100644 --- a/build-scripts/unsupported_combination.cmake +++ b/build-scripts/unsupported_combination.cmake @@ -64,7 +64,17 @@ endfunction() if(WAMR_BUILD_EXCE_HANDLING EQUAL 1) check_aot_mode_error("Unsupported build configuration: EXCE_HANDLING + AOT") - check_fast_interp_error("Unsupported build configuration: EXCE_HANDLING + FAST_INTERP") + # FAST_INTERP + EXCE_HANDLING is supported for *throw-only* shapes: + # WASM modules that declare tags and execute throw / rethrow without + # ever entering a same-function try / catch handler. The throw + # propagates to the caller via the existing got_exception bailout + # path, exactly like any other trap. This covers Porffor (its + # JS-to-wasm compiler emits 0 try/catch handlers; every JS throw + # escapes to the host). Modules that contain WASM_OP_TRY / CATCH / + # CATCH_ALL / DELEGATE still load, but those handlers report + # "unsupported opcode" at runtime — see the WASM_OP_TRY handler in + # core/iwasm/interpreter/wasm_interp_fast.c. Full same-function + # try / catch lowering is the natural follow-up. check_fast_jit_error("Unsupported build configuration: EXCE_HANDLING + FAST_JIT") check_llvm_jit_error("Unsupported build configuration: EXCE_HANDLING + JIT") endif() diff --git a/core/iwasm/interpreter/wasm.h b/core/iwasm/interpreter/wasm.h index c60349d10f..879bdc64b1 100644 --- a/core/iwasm/interpreter/wasm.h +++ b/core/iwasm/interpreter/wasm.h @@ -681,6 +681,54 @@ typedef struct WASMImport { } u; } WASMImport; +#if WASM_ENABLE_EXCE_HANDLING != 0 && WASM_ENABLE_FAST_INTERP != 0 +/* One typed `catch N` clause inside a single try-region. The handler_pc + * points at the first opcode of the catch body in the rewritten fast- + * interp IR; the loader patches it in pass 2 of the preprocess pass. */ +typedef struct WASMFastEHCatch { + uint32 tag_index; + uint8 *handler_pc; + /* Tag-with-params payload routing (same-function dispatch only). + * When this catch matches, the throw walker copies `param_cell_num` + * 32-bit cells from the throw site's *source* slots (encoded as + * `int16` immediates after the THROW opcode in the rewritten IR) + * into these *destination* slots in the catch body's `frame_lp`, + * then sets `frame_ip = handler_pc`. The destination slots are + * allocated by the CATCH loader at preprocess time, mirroring how + * block-with-params allocate fresh `dynamic_offset` slots via + * `PUSH_OFFSET_TYPE`. NULL iff `param_cell_num == 0` (the typical + * tag-without-params shape, e.g. Porffor's empty-payload tags). + * + * Cross-function dispatch (caller's catch fires for a callee's + * throw) does NOT copy the payload: the callee's source slots + * sit in a frame that's about to be torn down by return_func. + * That gap is documented as an ignored integration test — + * `cross_function_tag_with_params` in + * crates/benchmark-core/tests/eh_correctness.rs. */ + uint32 param_cell_num; + int16 *param_dst_offsets; +} WASMFastEHCatch; + +/* One entry per same-function try-region, indexed by the uint32 immediate + * emitted after the rewritten TRY opcode. Allocated once per function at + * load time, sized by `func->exception_handler_count`. At runtime the + * dispatch loop carries one stack-allocated handle per *active* try- + * region (see frame->eh_stack); hot ops (CALL / LOAD / STORE) never + * touch this table. */ +typedef struct WASMFastEHEntry { + uint32 catch_count; + WASMFastEHCatch *catches; /* may be NULL when catch_count == 0 */ + uint8 *catch_all_pc; /* NULL if no `catch_all` clause */ + /* UINT32_MAX iff the try-region closes with `end`; otherwise the + * LEB depth from `delegate N`. */ + uint32 delegate_target_depth; + /* Rewritten-IR pc of the op immediately after the try-region's `end` + * (or `delegate`). CATCH / CATCH_ALL handlers branch here when their + * body completes; the loader patches it when the `end` is seen. */ + uint8 *end_of_region_pc; +} WASMFastEHEntry; +#endif /* WASM_ENABLE_EXCE_HANDLING && WASM_ENABLE_FAST_INTERP */ + struct WASMFunction { #if WASM_ENABLE_CUSTOM_NAME_SECTION != 0 char *field_name; @@ -721,7 +769,19 @@ struct WASMFunction { #endif #if WASM_ENABLE_EXCE_HANDLING != 0 + /* Number of `try` opcodes in this function. Populated by the loader + * during the preprocess pass (classic-interp uses this to size the + * runtime handler-pointer array stored on the value stack; fast- + * interp uses it to size `exception_handlers[]` below). */ uint32 exception_handler_count; +#if WASM_ENABLE_FAST_INTERP != 0 + /* Per-function table of try-regions in source order, length + * `exception_handler_count`. Allocated and populated in pass 2 of + * the fast-interp preprocess pass; the uint32 immediate emitted + * after the rewritten TRY opcode is the index into this array. + * NULL iff `exception_handler_count == 0`. */ + WASMFastEHEntry *exception_handlers; +#endif #endif #if WASM_ENABLE_FAST_JIT != 0 || WASM_ENABLE_JIT != 0 \ diff --git a/core/iwasm/interpreter/wasm_interp.h b/core/iwasm/interpreter/wasm_interp.h index 1416405460..8ca6fe5f23 100644 --- a/core/iwasm/interpreter/wasm_interp.h +++ b/core/iwasm/interpreter/wasm_interp.h @@ -40,6 +40,17 @@ typedef struct WASMInterpFrame { */ bool exception_raised; uint32 tag_index; +#if WASM_ENABLE_FAST_INTERP != 0 + /* Number of *currently-active* try-regions on this frame's eh- + * stack. The stack itself lives in the trailing cells of the + * frame's operand[] block — see call_func_from_entry in + * wasm_interp_fast.c where all_cell_num is grown by + * `exception_handler_count` cells per frame. Read+written only by + * the WASM_OP_TRY / CATCH / CATCH_ALL / END / THROW handlers; the + * hot ops (CALL / LOAD / STORE) never touch it, so this field + * stays cold and clusters with exception_raised/tag_index above. */ + uint32 eh_count; +#endif #endif #if WASM_ENABLE_FAST_INTERP != 0 diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 937a7fdecf..31343f5723 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -25,6 +25,15 @@ #include "simde/wasm/simd128.h" #endif +/* MSVC has no `__builtin_expect`; the cold-path hints below are + * GCC/Clang only. Provide a no-op fallback so the loop still + * compiles on the Windows MSVC build. Branch-predictor hints are + * an optimization, not correctness, so dropping them on MSVC is + * fine. */ +#if !defined(__GNUC__) && !defined(__clang__) +#define __builtin_expect(expr, expected) (expr) +#endif + typedef int32 CellType_I32; typedef int64 CellType_I64; typedef float32 CellType_F32; @@ -102,6 +111,21 @@ typedef float64 CellType_F64; #define CHECK_INSTRUCTION_LIMIT() (void)0 #endif +#if WASM_ENABLE_EXCE_HANDLING != 0 +/* Per-frame eh-stack entries are 2 cells wide. Cell 0 packs the index + * into `func->exception_handlers[]` (low 31 bits) and a state bit + * (top bit): clear when the try-region's handler is *in scope* (TRY + * state — a throw matching one of its catches will dispatch into the + * handler), set once the throw walker has selected one of its + * handlers (CATCH state — further throws raised from inside that + * handler skip the entry and propagate outward). Cell 1 holds the + * wasm tag index of the exception currently being handled (written + * by the throw walker on dispatch; read by RETHROW). The tag is + * undefined while the entry is in TRY state. */ +#define EH_TRY_CATCH_STATE_BIT 0x80000000u +#define EH_ENTRY_CELLS 2 +#endif + static inline uint32 rotl32(uint32 n, uint32 c) { @@ -1538,6 +1562,30 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, uint8 *maddr = NULL; uint32 local_idx, local_offset, global_idx; uint8 opcode = 0, local_type, *global_addr; +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* Carries the wasm tag index from WASM_OP_THROW to the + * find_a_catch_handler label, and from a callee's return through + * frame->tag_index back to a caller-side find_a_catch_handler. + * Cold path only — the dispatch loop's hot ops never reference + * this variable, so the compiler is free to spill it. */ + uint32 exception_tag_index = 0; + /* Tag-with-params payload routing for same-function dispatch. + * Read off the IR after THROW's tag_index immediate; + * `throw_src_offsets` points at the first src-slot int16 in the + * rewritten IR, and `throw_param_cell_num` is the total cell + * count across all of the tag's params. find_a_catch_handler + * uses these to copy frame_lp[src[i]] into the matched catch's + * pre-allocated dst slots. Both are cold-path-only — like + * exception_tag_index, the dispatch loop's hot ops never + * reference them. RETHROW re-points throw_src_offsets at the + * still-alive catch's `param_dst_offsets` (the original + * payload values, unchanged by the catch body since they live + * in a different slot range from locals) so the re-raised + * exception carries the same payload across outer try-regions + * in this frame. */ + uint32 throw_param_cell_num = 0; + int16 *throw_src_offsets = NULL; +#endif #if WASM_ENABLE_INSTRUCTION_METERING != 0 int instructions_left = -1; @@ -1774,7 +1822,27 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, val = GET_OPERAND(uint32, I32, 0); frame_ip += 2; - if ((uint32)val >= tbl_inst->cur_size) { + /* Bounds / null / type-mismatch checks below are + * structurally cold paths — well-formed wasm modules + * pass them on every dispatched CALL_INDIRECT. Marking + * them `__builtin_expect(cond, 0)` lets the compiler + * (a) hint the branch predictor with a static-bias + * fallback for unseen call sites, and (b) lay out the + * error-handling tail away from the hot path so each + * fall-through case stays in one straight-line I-cache + * line. Apple Silicon E-cores (Icestorm, iPhone 12) + * showed ~27 % `Discarded` (bad-spec / mispredict) + * on the AS variant of graphql-validation under + * fast-interp, where megamorphic vtable dispatch + * hits CALL_INDIRECT thousands of times; the layout + * hint matters more than the branch hint on Apple's + * sophisticated predictor. PMU bucket shares stay + * within run-to-run noise on both Porffor and AS + * graphql-validation workloads, so the change is + * documentation-as-code more than a speedup — + * keep it because the cold-path semantic is real + * and the cost is zero. */ + if (__builtin_expect((uint32)val >= tbl_inst->cur_size, 0)) { wasm_set_exception(module, "undefined element"); goto got_exception; } @@ -1782,13 +1850,13 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* clang-format off */ #if WASM_ENABLE_GC == 0 fidx = (uint32)tbl_inst->elems[val]; - if (fidx == (uint32)-1) { + if (__builtin_expect(fidx == (uint32)-1, 0)) { wasm_set_exception(module, "uninitialized element"); goto got_exception; } #else func_obj = (WASMFuncObjectRef)tbl_inst->elems[val]; - if (!func_obj) { + if (__builtin_expect(!func_obj, 0)) { wasm_set_exception(module, "uninitialized element"); goto got_exception; } @@ -1801,7 +1869,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, * another module. in that case, we don't validate * the elem value while loading */ - if (fidx >= module->e->function_count) { + if (__builtin_expect(fidx >= module->e->function_count, 0)) { wasm_set_exception(module, "unknown function"); goto got_exception; } @@ -1816,12 +1884,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, /* clang-format off */ #if WASM_ENABLE_GC == 0 - if (cur_type != cur_func_type) { + if (__builtin_expect(cur_type != cur_func_type, 0)) { wasm_set_exception(module, "indirect call type mismatch"); goto got_exception; } #else - if (!wasm_func_type_is_super_of(cur_type, cur_func_type)) { + if (__builtin_expect( + !wasm_func_type_is_super_of(cur_type, cur_func_type), + 0)) { wasm_set_exception(module, "indirect call type mismatch"); goto got_exception; } @@ -1836,14 +1906,410 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, } #if WASM_ENABLE_EXCE_HANDLING != 0 + HANDLE_OP(WASM_OP_THROW) + { + /* Loader emits + * `WASM_OP_THROW + * + * ... + * `. Read the tag plus payload-source + * metadata, then walk the eh-stack in find_a_catch_handler to + * find a matching catch — first in this frame, then via + * return_func's hook in caller frames, and finally + * falling out to the host via got_exception when no + * match is found anywhere. + * + * Payload routing: when a same-function catch matches, + * find_a_catch_handler copies frame_lp[src[i]] into + * the catch's pre-allocated dst slots (recorded on + * `WASMFastEHCatch.param_dst_offsets` at load time). + * For tag-without-params (the typical Porffor shape), + * `throw_param_cell_num == 0` makes the copy a no-op. + * For cross-function dispatch the source frame is + * torn down before the caller's walker runs, so the + * payload is dropped — this gap is documented in + * AGENTS.md and exercised as + * `cross_function_tag_with_params` (#[ignore]). */ + exception_tag_index = read_uint32(frame_ip); + throw_param_cell_num = read_uint32(frame_ip); + throw_src_offsets = (int16 *)frame_ip; + frame_ip += sizeof(int16) * throw_param_cell_num; + goto find_a_catch_handler; + } + + find_a_catch_handler: + { + /* The eh-stack lives in the trailing cells of + * frame->operand[] (see call_func_from_entry and the + * runtime push from WASM_OP_TRY). Each entry packs the + * eh-table index into the low 31 bits; the top bit + * (EH_TRY_CATCH_STATE_BIT) is set on entries whose + * catch handler is *already running* — those are + * skipped here so a throw raised from inside a catch + * body propagates outward rather than re-entering the + * same handler. + * + * Cost shape: the walk runs only on the throw path + * (cold). CALL / LOAD / STORE handlers are untouched, + * and the eh-stack cells share a cache line with the + * value stack they're allocated next to, so the walk + * hits warm memory. + * + * Known limitation in this patch: try-regions with a + * non-void result-type are *not yet supported* by the + * normal-flow path. The fix is a loader-side + * try-body→block-dynamic-offset COPY emit at CATCH + * processing time (mirrors how WASM_OP_ELSE aligns + * the if-body's result via reserve_block_ret). See the + * AGENTS.md "Open follow-up — WAMR fast-interp legacy + * exception handling" section for the architectural + * note. The throw → catch dispatch implemented here + * still works correctly for void-result try-regions + * (which is what graphql-validation-porf-accurate's + * single try-block is). */ + WASMFunction *cur_wasm_func = cur_func->u.func; + uint32 *eh_stack = frame_lp + cur_func->param_cell_num + + cur_func->local_cell_num + + cur_wasm_func->max_stack_cell_num; + uint32 i; + for (i = frame->eh_count; i > 0; i--) { + uint32 *cells = eh_stack + (i - 1) * EH_ENTRY_CELLS; + uint32 packed = cells[0]; + uint32 eh_idx; + WASMFastEHEntry *entry; + uint32 j; + if (packed & EH_TRY_CATCH_STATE_BIT) + continue; /* in-progress catch — skip */ + eh_idx = packed & ~EH_TRY_CATCH_STATE_BIT; + bh_assert(eh_idx < cur_wasm_func->exception_handler_count); + entry = &cur_wasm_func->exception_handlers[eh_idx]; + if (entry->delegate_target_depth != UINT32_MAX) { + /* This try-region was closed by `delegate N`, + * not `end`. The spec says the exception is + * re-raised at the location of the target + * block — i.e. it propagates past every try + * whose body the delegate's try sits inside + * (but the target is also inside). The loader + * already counted those tries as + * `delegate_target_depth = delta`. Marking + * THIS entry as consumed and decrementing `i` + * by `delta` makes the for-loop's natural + * i-- land on the first eh-stack entry + * strictly *outside* the target block — which + * is exactly where the spec wants the throw + * to resume matching. + * + * If `delta + 1 >= i`, the target block is + * outside this function's eh-stack entirely + * (e.g. `delegate `): + * break out to the "no handler in this + * frame" path and let return_func forward the + * exception to the caller. + * + * Cost: cold path; only THROW reaches here. + * Hot ops untouched. */ + uint32 delta = entry->delegate_target_depth; + cells[0] = packed | EH_TRY_CATCH_STATE_BIT; + if (delta + 1 >= i) { + /* Underflow guard + escape signal: any + * `delta` that would skip past the start + * of the eh-stack means the target lies + * past this function's try-blocks. */ + break; + } + i -= delta; + continue; + } + for (j = 0; j < entry->catch_count; j++) { + if (entry->catches[j].tag_index == exception_tag_index) { + /* Mark the entry as in-progress catch and + * stash the tag that's being handled so a + * RETHROW from this catch body can re- + * raise it. */ + cells[0] = packed | EH_TRY_CATCH_STATE_BIT; + cells[1] = exception_tag_index; + /* Payload copy (same-function dispatch). + * The loader guaranteed + * `entry->catches[j].param_cell_num == + * throw_param_cell_num` by checking the + * tag type at both THROW and CATCH; the + * runtime just executes the cell-wise + * frame_lp move. Tag-without-params makes + * the loop trivial. */ + if (throw_param_cell_num > 0 + && entry->catches[j].param_dst_offsets) { + uint32 c; + int16 *dst = entry->catches[j].param_dst_offsets; + for (c = 0; c < throw_param_cell_num; c++) { + frame_lp[dst[c]] = + frame_lp[throw_src_offsets[c]]; + } + } + /* Pop the inner eh-stack entries that the + * throw is jumping past. When the match is + * at the topmost entry this is a no-op + * (i == frame->eh_count). When the match is + * an outer entry, the nested-try entries + * above it (indices i .. eh_count-1) are + * out of scope after the catch-dispatch; + * leaving them counted would let a + * subsequent throw inside the catch body + * see stale in-scope entries (and a tight + * loop of throw → outer-catch → throw + * would eventually overflow the fixed + * reservation). The matched entry stays + * at index i-1 with its state bit set; the + * catch body's END pops it when it + * completes. Cost: one indexed store on + * the cold throw path; CALL / LOAD / STORE + * untouched. */ + frame->eh_count = i; + frame_ip = entry->catches[j].handler_pc; + HANDLE_OP_END(); + } + } + if (entry->catch_all_pc) { + /* catch_all binds no payload (spec: catch_all + * has no exception values), so we drop the + * src cells here. RETHROW from inside a + * catch_all body cannot re-emit a payload — + * documented as a known limitation. */ + cells[0] = packed | EH_TRY_CATCH_STATE_BIT; + cells[1] = exception_tag_index; + /* Same unwind as the typed-catch path above — + * pop any nested-try entries the throw is + * jumping past so a subsequent throw inside + * this catch_all body doesn't dispatch + * against stale inner entries. */ + frame->eh_count = i; + frame_ip = entry->catch_all_pc; + HANDLE_OP_END(); + } + } + /* No handler in this frame. Hand the exception off to + * the caller via return_func, which checks + * frame->exception_raised after RECOVER_CONTEXT and + * re-enters this label with the caller's frame in + * scope. If we're already at the top of the wasm + * stack, the existing got_exception path lets the + * host observe the trap via wasm_runtime_get_exception. + * + * Tag-with-params payload is intentionally NOT + * preserved across the frame boundary: the source + * cells (throw_src_offsets) live in *this* frame's + * frame_lp, which return_func is about to tear down. + * A caller-side typed catch would then bind + * uninitialized destination slots, producing wrong + * results in the catch body (or, if the typed catch + * uses the slots as a struct-of-pointers, memory + * corruption). The safe action when a payload- + * bearing throw escapes its callee is to trap to the + * host with a clear diagnostic. Same-function + * payload routing (the common Porffor / AS shape) + * is unaffected — it dispatches via the loop above + * before this branch runs. catch_all in the caller + * would technically tolerate a zero-payload bind, + * but the typed-vs-catch_all choice happens in the + * caller's walker, which we can't peek into here + * without coupling the frames; trap unconditionally + * for payload-bearing throws and let the test + * `cross_function_tag_with_params` document the + * shape. */ + if (prev_frame && prev_frame->ip) { + if (throw_param_cell_num > 0) { + wasm_set_exception(module, + "cross-function exception payload " + "not supported by fast-interp"); + goto got_exception; + } + prev_frame->tag_index = exception_tag_index; + prev_frame->exception_raised = true; + goto return_func; + } + { + char exception_buf[64]; + snprintf(exception_buf, sizeof(exception_buf), + "wasm exception thrown (tag %u)", exception_tag_index); + wasm_set_exception(module, exception_buf); + } + goto got_exception; + } + HANDLE_OP(WASM_OP_TRY) + { + /* Loader emits `WASM_OP_TRY `. Push one + * entry onto the per-frame eh-stack so subsequent + * THROW / RETHROW handlers can find the in-scope + * catches by walking it. + * + * The eh-stack lives in the trailing cells of + * frame->operand[] — EH_ENTRY_CELLS cells per try- + * region, sized by + * cur_wasm_func->exception_handler_count * + * EH_ENTRY_CELLS at frame setup. Cell 1 (caught_tag) + * is unspecified while the entry is in TRY state and + * gets written by the throw walker on catch dispatch. + * Cost: one indexed store + one increment, both on a + * cold path; CALL / LOAD / STORE are untouched. */ + uint32 eh_idx = read_uint32(frame_ip); + WASMFunction *cur_wasm_func = cur_func->u.func; + uint32 *eh_stack = frame_lp + cur_func->param_cell_num + + cur_func->local_cell_num + + cur_wasm_func->max_stack_cell_num; + bh_assert(frame->eh_count + < cur_wasm_func->exception_handler_count); + eh_stack[frame->eh_count * EH_ENTRY_CELLS + 0] = eh_idx; + frame->eh_count++; + HANDLE_OP_END(); + } + HANDLE_OP(WASM_OP_CATCH) - HANDLE_OP(WASM_OP_THROW) + HANDLE_OP(WASM_OP_CATCH_ALL) + { + /* Loader emits ` ` (commit 1's + * exception_handlers table records each catch body's + * pc and the region's end_of_region_pc). + * + * Reached via *normal flow* — execution either ran the + * try body to completion (CATCH is the first opcode + * after the try body) or fell through from a previous + * catch body. Either way: pop one eh-stack entry and + * branch past the try-region's end. The THROW dispatch + * (follow-up commit) jumps directly to a catch body's + * first opcode, *skipping* the CATCH opcode itself, so + * this handler never runs as a result of a caught + * throw — only as a fall-through exit. */ + uint32 eh_idx = read_uint32(frame_ip); + WASMFunction *cur_wasm_func = cur_func->u.func; + bh_assert(eh_idx < cur_wasm_func->exception_handler_count); + bh_assert(frame->eh_count > 0); + frame->eh_count--; + frame_ip = + cur_wasm_func->exception_handlers[eh_idx].end_of_region_pc; + HANDLE_OP_END(); + } + HANDLE_OP(WASM_OP_RETHROW) + { + /* Loader emits `WASM_OP_RETHROW `. Re-raise + * the exception currently being handled by an + * enclosing catch (the (depth+1)-th `state=CATCH` + * entry from the top of the eh-stack at this point — + * each in-progress catch we're nested in contributes + * one such entry, in source order). RETHROW is a + * cold op (only fires inside catch bodies); the walk + * runs across at most the number of catches nested + * around the rethrow site. CALL / LOAD / STORE are + * untouched. */ + uint32 depth = read_uint32(frame_ip); + WASMFunction *cur_wasm_func = cur_func->u.func; + uint32 *eh_stack = frame_lp + cur_func->param_cell_num + + cur_func->local_cell_num + + cur_wasm_func->max_stack_cell_num; + uint32 i; + uint32 catch_seen = 0; + for (i = frame->eh_count; i > 0; i--) { + uint32 *cells = eh_stack + (i - 1) * EH_ENTRY_CELLS; + if (!(cells[0] & EH_TRY_CATCH_STATE_BIT)) + continue; + if (catch_seen == depth) { + /* Re-raise the caught tag against the *outer* + * try-regions. find_a_catch_handler iterates + * top-down and skips state=CATCH entries, so + * this same entry won't re-match. + * + * Payload routing: the original throw's + * payload values were copied into THIS + * catch's dst slots by the previous + * find_a_catch_handler dispatch. The wasm + * spec says the catch body can't mutate + * those exception values directly (they're + * not addressable as locals, and the only + * way to read them is to pop off the + * operand stack at catch entry — which + * advances past the dst slots without + * writing them back). So at RETHROW time + * the dst slots still hold the original + * payload, and we can point throw_src_offsets + * at them so the outer catch's copy lands + * on a fresh set of dst slots with the + * same values. + * + * If the original match was via catch_all + * (no typed catch matched cells[1]), + * `match->param_dst_offsets == NULL` and the + * payload was already dropped at the + * catch_all dispatch. RETHROW from + * catch_all then re-raises with no payload + * — documented as a known limitation. */ + uint32 ent_eh_idx = cells[0] & ~EH_TRY_CATCH_STATE_BIT; + WASMFastEHEntry *ent = + &cur_wasm_func->exception_handlers[ent_eh_idx]; + WASMFastEHCatch *match = NULL; + uint32 mj; + for (mj = 0; mj < ent->catch_count; mj++) { + if (ent->catches[mj].tag_index == cells[1]) { + match = &ent->catches[mj]; + break; + } + } + if (match && match->param_dst_offsets) { + throw_param_cell_num = match->param_cell_num; + throw_src_offsets = match->param_dst_offsets; + } + else { + throw_param_cell_num = 0; + throw_src_offsets = NULL; + } + exception_tag_index = cells[1]; + goto find_a_catch_handler; + } + catch_seen++; + } + /* Loader validated rethrow's depth at compile time; + * if we got here the eh-stack is inconsistent with + * the IR (typically a runtime bug in the loader's + * eh-table population). */ + wasm_set_exception(module, "rethrow depth out of range"); + goto got_exception; + } + HANDLE_OP(WASM_OP_DELEGATE) - HANDLE_OP(WASM_OP_CATCH_ALL) + { + /* Normal-flow exit from a `try ... delegate N` region: + * the try body completed without throwing, so the + * runtime just pops the eh-stack entry that + * HANDLE_OP(WASM_OP_TRY) pushed and falls through to + * the next op in the rewritten IR (which is whatever + * came after the `delegate N` in source). + * + * The forwarding semantics ("if the try body throws, + * re-raise at the target block") are handled by the + * find_a_catch_handler walker reading the eh-table + * entry's `delegate_target_depth` and skipping that + * many nested-try eh-stack entries — DELEGATE itself + * doesn't run in the throw path, only on fall-through. + * + * No immediate to read: the loader skipped emit_br_info + * so the depth lives in the per-function eh-table + * indexed by the eh_idx of *this* try-region (which is + * the eh-stack's top). Cost: one decrement on a cold + * path; CALL / LOAD / STORE untouched. */ + bh_assert(frame->eh_count > 0); + frame->eh_count--; + HANDLE_OP_END(); + } + HANDLE_OP(EXT_OP_TRY) { + /* The fast-interp loader doesn't emit EXT_OP_TRY yet + * (the eh-table records CATCH / CATCH_ALL / DELEGATE + * indices directly on the per-function table; TRY's + * uint32 immediate is the eh_idx, not a type-index + * blocktype). Reaching this handler means a future + * loader change started emitting EXT_OP_TRY without + * the runtime catching up — surface that as an + * explicit trap. */ wasm_set_exception(module, "unsupported opcode"); goto got_exception; } @@ -7526,9 +7992,31 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP(WASM_OP_GET_LOCAL) HANDLE_OP(WASM_OP_DROP) HANDLE_OP(WASM_OP_DROP_64) +#if WASM_ENABLE_EXCE_HANDLING != 0 + HANDLE_OP(WASM_OP_END) + { + /* Block / loop / if / function-level `end` is stripped from + * the IR at load time (skip_label in the END case of + * wasm_loader_prepare_bytecode). Only try-region `end`s + * survive — the loader keeps them so the runtime can pop + * the matching eh-stack entry here when control falls + * through the bottom of a catch body (or runs the body of + * a catchless `try ... end`). + * + * Cost: one decrement on a cold path. CALL / LOAD / STORE + * are untouched. */ + bh_assert(frame->eh_count > 0); + frame->eh_count--; + HANDLE_OP_END(); + } + + HANDLE_OP(WASM_OP_BLOCK) + HANDLE_OP(WASM_OP_LOOP) +#else HANDLE_OP(WASM_OP_BLOCK) HANDLE_OP(WASM_OP_LOOP) HANDLE_OP(WASM_OP_END) +#endif HANDLE_OP(WASM_OP_NOP) HANDLE_OP(EXT_OP_BLOCK) HANDLE_OP(EXT_OP_LOOP) @@ -7734,6 +8222,17 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, * these cells */ local_cell_num = cur_func->param_cell_num + cur_func->local_cell_num; +#endif +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* EH_ENTRY_CELLS cells per try-region in the function, + * appended past the value stack — cell 0 holds the + * packed eh_idx | state_bit, cell 1 holds the caught tag + * for RETHROW. Functions without try blocks pay zero + * cells. Mirrors classic-interp's eh_size accounting at + * wasm_interp_classic.c:6786 (which also stores per- + * handler pointers on the value stack). */ + all_cell_num += + cur_wasm_func->exception_handler_count * EH_ENTRY_CELLS; #endif /* param_cell_num, local_cell_num, const_cell_num and max_stack_cell_num are all no larger than UINT16_MAX (checked @@ -7751,6 +8250,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, frame_ip = wasm_get_func_code(cur_func); frame_ip_end = wasm_get_func_code_end(cur_func); +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* eh-stack starts empty; WASM_OP_TRY appends entries. */ + frame->eh_count = 0; + /* exception_raised is the marker `return_func` reads on + * every wasm-to-wasm call return; if a callee's throw + * found no in-frame handler it stashes the tag on the + * caller's frame->tag_index and sets this flag, then + * goes to return_func. ALLOC_FRAME doesn't zero-init + * the frame header, so leaving the slot uninitialized + * trips the return_func hook on every call return with + * stale memory contents — turning a non-throwing run + * into "wasm exception thrown (tag N)" for random N. */ + frame->exception_raised = false; +#endif + frame_lp = frame->lp = frame->operand + cur_wasm_func->const_cell_num; @@ -7807,6 +8321,34 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, RECOVER_CONTEXT(prev_frame); #if WASM_ENABLE_GC != 0 local_cell_num = cur_func->param_cell_num + cur_func->local_cell_num; +#endif +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* Inter-function unwind: the callee stashed a wasm tag on + * this frame (now the active one after RECOVER_CONTEXT) + * when its eh-stack walk found no in-frame match. Re-enter + * find_a_catch_handler so the caller's eh-stack gets a + * chance to catch. Predicted strongly not-taken — + * exceptions are rare, this single check is the entire + * CALL-return-side cost of EH; the success path takes the + * HANDLE_OP_END() below. + * + * Cross-frame payload routing: the callee's throw site's + * source slots lived in the callee's frame_lp, which has + * already been freed by the time we get here. We zero out + * the throw_param_cell_num / throw_src_offsets pair so the + * caller's find_a_catch_handler doesn't try to dereference + * freed memory — the catch (if any matches) will fire with + * a zero-cell payload. This is the same gap documented at + * the WASM_OP_THROW handler and surfaced as + * `cross_function_tag_with_params` in the integration + * suite. */ + if (frame->exception_raised) { + exception_tag_index = frame->tag_index; + throw_param_cell_num = 0; + throw_src_offsets = NULL; + frame->exception_raised = false; + goto find_a_catch_handler; + } #endif HANDLE_OP_END(); } diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index a2c67bea2c..6f83a3715a 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -7360,6 +7360,33 @@ wasm_loader_unload(WASMModule *module) wasm_runtime_free(module->functions[i]->code_compiled); if (module->functions[i]->consts) wasm_runtime_free(module->functions[i]->consts); +#if WASM_ENABLE_EXCE_HANDLING != 0 + if (module->functions[i]->exception_handlers) { + uint32 eh_idx; + for (eh_idx = 0; + eh_idx < module->functions[i]->exception_handler_count; + eh_idx++) { + WASMFastEHEntry *eh_entry = + &module->functions[i]->exception_handlers[eh_idx]; + if (eh_entry->catches) { + uint32 cj; + /* Free each catch's tag-with-params dst + * slot array. param_dst_offsets is NULL + * for the (common) tag-without-params + * case, in which case the free is a + * no-op. */ + for (cj = 0; cj < eh_entry->catch_count; cj++) { + if (eh_entry->catches[cj].param_dst_offsets) { + wasm_runtime_free(eh_entry->catches[cj] + .param_dst_offsets); + } + } + wasm_runtime_free(eh_entry->catches); + } + } + wasm_runtime_free(module->functions[i]->exception_handlers); + } +#endif /* end of WASM_ENABLE_EXCE_HANDLING */ #endif #if WASM_ENABLE_FAST_JIT != 0 if (module->functions[i]->fast_jit_jitted_code) { @@ -8470,6 +8497,14 @@ typedef struct BranchBlock { * to copy the stack operands to the loop block's arguments in * wasm_loader_emit_br_info for opcode br. */ uint16 start_dynamic_offset; +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* For LABEL_TYPE_TRY/CATCH/CATCH_ALL: index into + * func->exception_handlers (the same index across the whole try- + * catch-end region — a CATCH clause inherits its parent TRY's + * index when the loader rewrites the block label). UINT32_MAX + * for non-EH label types. */ + uint32 eh_entry_idx; +#endif #endif /* Indicate the operand stack is in polymorphic state. @@ -8551,6 +8586,13 @@ typedef struct WASMLoaderContext { * than the final code_compiled_size, we record the peak size to ensure * there will not be invalid memory access during second traverse */ uint32 code_compiled_peak_size; +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* Index of the next entry to claim in func->exception_handlers, + * during the second traverse only (the first traverse merely counts + * try-blocks into func->exception_handler_count to size the array). + * Reset to 0 in wasm_loader_ctx_reinit. */ + uint32 cur_eh_entry_idx; +#endif #endif } WASMLoaderContext; @@ -8822,6 +8864,11 @@ wasm_loader_ctx_init(WASMFunction *func, char *error_buf, uint32 error_buf_size) #if WASM_ENABLE_EXCE_HANDLING != 0 func->exception_handler_count = 0; +#if WASM_ENABLE_FAST_INTERP != 0 + /* Allocated at the start of the second traverse, once + * exception_handler_count is known from the first traverse. */ + func->exception_handlers = NULL; +#endif #endif #if WASM_ENABLE_FAST_INTERP != 0 @@ -9344,6 +9391,12 @@ wasm_loader_push_frame_csp(WASMLoaderContext *ctx, uint8 label_type, #if WASM_ENABLE_FAST_INTERP != 0 ctx->frame_csp->dynamic_offset = ctx->dynamic_offset; ctx->frame_csp->patch_list = NULL; +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* Default sentinel; the WASM_OP_TRY handler patches this on entry + * and the CATCH/CATCH_ALL handlers propagate it onto the rewritten + * label. */ + ctx->frame_csp->eh_entry_idx = UINT32_MAX; +#endif #endif ctx->frame_csp++; ctx->csp_num++; @@ -9567,6 +9620,13 @@ wasm_loader_ctx_reinit(WASMLoaderContext *ctx) /* init preserved local offsets */ ctx->preserved_local_offset = ctx->max_dynamic_offset; +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* Start of the second traverse — reset the per-function try-block + * cursor so it tracks the same source-order index as the first + * traverse used to size func->exception_handlers. */ + ctx->cur_eh_entry_idx = 0; +#endif + /* const buf is reserved */ return true; } @@ -11247,6 +11307,39 @@ check_branch_block(WASMLoaderContext *loader_ctx, uint8 **p_buf, uint8 *buf_end, } #if WASM_ENABLE_EXCE_HANDLING != 0 +/* Returns the number of LABEL_TYPE_TRY / _CATCH / _CATCH_ALL + * frames whose END the runtime br will SKIP — i.e. the count of + * such frames at csp positions `cur_block` down to `target_block` + * inclusive (target_block included because br to a non-LOOP + * target lands AFTER target's end, skipping it; LOOP targets + * aren't try-typed so the inclusive vs exclusive distinction + * doesn't matter for them). The runtime br jumps directly to the + * target's resolved pc without decrementing `frame->eh_count`, + * so each such frame represents one stale eh-stack entry that + * survives the br. A single leaked entry is benign — frame + * allocation reserves `exception_handler_count * EH_ENTRY_CELLS` + * cells, the walker iterates top-down so sibling-try throws + * still match correctly, and the stale entry dies at frame + * teardown. But a br to a surrounding LOOP re-pushes one entry + * every iteration, eventually overflowing the static reservation; + * the resulting out-of-bounds writes go through silently in + * release builds (`bh_assert` is a no-op without `BH_DEBUG`). + * Caller logs a warning so the shape shows up in load-time + * diagnostics. */ +static uint32 +count_try_blocks_crossed(BranchBlock *cur_block, BranchBlock *target_block) +{ + BranchBlock *b; + uint32 count = 0; + for (b = cur_block; b >= target_block; b--) { + if (b->label_type == LABEL_TYPE_TRY || b->label_type == LABEL_TYPE_CATCH + || b->label_type == LABEL_TYPE_CATCH_ALL) { + count++; + } + } + return count; +} + static BranchBlock * check_branch_block_for_delegate(WASMLoaderContext *loader_ctx, uint8 **p_buf, uint8 *buf_end, char *error_buf, @@ -11961,6 +12054,27 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, loader_ctx->i32_const_num = k; } } + +#if WASM_ENABLE_EXCE_HANDLING != 0 + /* The first traverse counted `func->exception_handler_count` + * try-blocks; the second traverse is about to populate one + * entry per try-block in source order. Allocate the array now + * (zero-initialized) and reset delegate_target_depth to the + * "no delegate" sentinel on every entry. */ + if (func->exception_handler_count > 0) { + uint64 eh_size = + (uint64)sizeof(WASMFastEHEntry) * func->exception_handler_count; + uint32 eh_i; + if (!(func->exception_handlers = + loader_malloc(eh_size, error_buf, error_buf_size))) { + goto fail; + } + for (eh_i = 0; eh_i < func->exception_handler_count; eh_i++) { + func->exception_handlers[eh_i].delegate_target_depth = + UINT32_MAX; + } + } +#endif } #endif @@ -12011,11 +12125,17 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, #if WASM_ENABLE_EXCE_HANDLING != 0 case WASM_OP_TRY: if (opcode == WASM_OP_TRY) { - /* - * keep track of exception handlers to account for - * memory allocation - */ +#if WASM_ENABLE_FAST_INTERP != 0 + /* Two-traverse loader: the first traverse counts + * try-blocks into func->exception_handler_count so + * the second traverse can allocate the per-function + * exception_handlers[] table (see re_scan block). */ + if (loader_ctx->p_code_compiled == NULL) + func->exception_handler_count++; +#else + /* Single-traverse classic-interp / shared loader. */ func->exception_handler_count++; +#endif /* * try is a block @@ -12276,7 +12396,22 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } #if WASM_ENABLE_EXCE_HANDLING != 0 else if (opcode == WASM_OP_TRY) { - skip_label(); + /* The auto-emit_label at the top of the dispatch + * loop already wrote the WASM_OP_TRY byte into the + * rewritten IR; the runtime handler for that + * opcode (HANDLE_OP(WASM_OP_TRY) in + * wasm_interp_fast.c) reads the uint32 eh_idx + * immediate we emit below and pushes one entry + * onto the per-frame eh-stack. Unlike BLOCK / LOOP, + * we keep the opcode in the IR — its runtime + * effect (push) is what makes throws find the + * right catches. */ + bh_assert(loader_ctx->cur_eh_entry_idx + < func->exception_handler_count); + (loader_ctx->frame_csp - 1)->eh_entry_idx = + loader_ctx->cur_eh_entry_idx; + emit_uint32(loader_ctx, loader_ctx->cur_eh_entry_idx); + loader_ctx->cur_eh_entry_idx++; } #endif else if (opcode == WASM_OP_IF) { @@ -12379,6 +12514,76 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, goto fail; } +#if WASM_ENABLE_FAST_INTERP != 0 + /* Fast-interp THROW IR shape (emitted in BOTH traverses + * so pass-1 / pass-2 size accounting stays balanced): + * + * + * + * + * ... + * + * Where `param_cell_num` is the sum across all params' + * cell widths (i32 = 1, i64 = 2, v128 = 4, etc.) and + * src_offset_i is the throw-site's frame_lp slot for + * the i-th payload cell, read directly off the top of + * `loader_ctx->frame_offset[]`. The validation loop + * below pops frame_ref / available_stack_cell but + * doesn't touch frame_offset, so the src offsets are + * stable to read here. They get consumed at runtime + * by find_a_catch_handler when a *same-function* + * catch matches: it copies `param_cell_num` cells + * from frame_lp[src_offset_i] into the catch body's + * `param_dst_offsets[i]` slots before jumping to + * handler_pc. + * + * Cross-function dispatch (callee throws, caller's + * catch fires after return_func unwinds) does NOT + * preserve the payload — the source slots live in a + * frame that's about to be torn down. That gap is + * documented as an ignored integration test, in line + * with the cost-model rule that EH must not tax hot + * ops: a per-thread payload buffer would force every + * CALL / RETURN handler to spill scratch state across + * the boundary, which we explicitly refuse. + * + * Tag-without-params is the common case (Porffor + * emits empty payloads; many spec tests use bare + * tags too). param_cell_num=0 makes the for-loop + * trivial and the resulting IR is just the tag_index + * + a single zero — same hot-path cost as the + * pre-tag-with-params shape, since the runtime + * read_uint32 of param_cell_num happens on the cold + * THROW handler. */ + emit_uint32(loader_ctx, tag_index); + emit_uint32(loader_ctx, tag_type->param_cell_num); + { + /* Multi-cell types (i64, f64, v128) only have a + * meaningful first-cell offset in + * `frame_offset[]` — subsequent cells of the + * same value are left uninitialized by + * `wasm_loader_push_frame_offset` (it just + * advances the pointer without writing). For + * each param walk the per-param first cell out + * of frame_offset and synthesize consecutive + * cell offsets `(first, first+1, ...)`; that + * matches the runtime invariant that an n-cell + * value occupies n consecutive frame_lp cells. */ + uint32 pi, c, cell_so_far = 0; + int16 *base = + loader_ctx->frame_offset - tag_type->param_cell_num; + for (pi = 0; pi < tag_type->param_count; pi++) { + uint32 this_cells = + wasm_value_type_cell_num(tag_type->types[pi]); + int16 first_slot = base[cell_so_far]; + for (c = 0; c < this_cells; c++) { + emit_operand(loader_ctx, (int16)(first_slot + c)); + } + cell_so_far += this_cells; + } + } +#endif + int32 available_stack_cell = (int32)(loader_ctx->stack_cell_num - cur_block->stack_cell_num); @@ -12450,21 +12655,50 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } case WASM_OP_RETHROW: { - /* must be done before checking branch block */ + /* must be done before reading the depth */ SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE(true); - /* check the target catching block: LABEL_TYPE_CATCH */ - if (!(frame_csp_tmp = - check_branch_block(loader_ctx, &p, p_end, opcode, - error_buf, error_buf_size))) - goto fail; - - if (frame_csp_tmp->label_type != LABEL_TYPE_CATCH - && frame_csp_tmp->label_type != LABEL_TYPE_CATCH_ALL) { - /* trap according to spectest (rethrow.wast) */ - set_error_buf(error_buf, error_buf_size, - "invalid rethrow label"); - goto fail; + /* Manual depth + label-type validation. We deliberately + * skip the shared `check_branch_block` here because + * RETHROW doesn't *branch* to its target — it walks + * the eh-stack at runtime and re-raises — so the + * branch-info bytes that check_branch_block / + * emit_br_info would write between the auto-emitted + * opcode label and our depth immediate are dead + * weight (4 bytes arity + 8 bytes target ptr + + * arity-dependent operand-offsets, all unread by the + * runtime walker). Worse, leaving them in the IR + * shifts our depth immediate past where the runtime + * read_uint32(frame_ip) looks for it. */ + { + uint32 rethrow_depth = 0; + BranchBlock *target_block; + pb_read_leb_uint32(p, p_end, rethrow_depth); + if (rethrow_depth + 1 > loader_ctx->csp_num) { +#if WASM_ENABLE_SPEC_TEST == 0 + set_error_buf(error_buf, error_buf_size, + "unknown rethrow label"); +#else + set_error_buf(error_buf, error_buf_size, + "unknown label"); +#endif + goto fail; + } + target_block = loader_ctx->frame_csp - rethrow_depth - 1; + if (target_block->label_type != LABEL_TYPE_CATCH + && target_block->label_type != LABEL_TYPE_CATCH_ALL) { + /* trap according to spectest (rethrow.wast) */ + set_error_buf(error_buf, error_buf_size, + "invalid rethrow label"); + goto fail; + } +#if WASM_ENABLE_FAST_INTERP != 0 + /* Emit the depth as a uint32 immediate after the + * auto-emitted RETHROW opcode. Pass 1's size + * accounting must match pass 2's actual emit so + * we run this branch in both traverses. */ + emit_uint32(loader_ctx, rethrow_depth); +#endif } BranchBlock *cur_block = loader_ctx->frame_csp - 1; @@ -12476,15 +12710,95 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } case WASM_OP_DELEGATE: { - /* check target block is valid */ - if (!(frame_csp_tmp = check_branch_block_for_delegate( - loader_ctx, &p, p_end, error_buf, error_buf_size))) - goto fail; - + /* Manual depth + label-type validation. Like RETHROW + * (above), we deliberately skip the shared + * `check_branch_block_for_delegate` here because: + * (1) DELEGATE doesn't *branch* to its target at + * runtime — when the try-body throws, the + * find_a_catch_handler walker reads the precomputed + * `delegate_target_depth` off the eh-table entry + * and skips the right number of nested-try entries + * on the per-frame eh-stack. The branch-info bytes + * that `emit_br_info` would write between the + * auto-emitted DELEGATE label and any subsequent + * operand are dead weight (4 bytes arity + 8 bytes + * target ptr, all unread by either the runtime + * DELEGATE handler or the throw walker). + * (2) Worse, leaving them in the IR shifts any + * immediate we *do* want to emit past where the + * runtime reads it — same gotcha that bit + * RETHROW. + * + * `delegate N` targets the (N+1)-th block out from the + * current try-delegate frame. The try-delegate itself + * still sits on the loader's csp stack at this point + * (POP_CSP is called below), so the target is at + * frame_csp - N - 2 + * and the spec rejects `delegate N` whose N+1 would + * climb past the function frame. */ + uint32 delegate_depth = 0; BranchBlock *cur_block = loader_ctx->frame_csp - 1; + BranchBlock *target_block; uint8 label_type = cur_block->label_type; - (void)label_type; + + pb_read_leb_uint32(p, p_end, delegate_depth); + bh_assert(loader_ctx->csp_num > 0); + if (loader_ctx->csp_num - 1 <= delegate_depth) { +#if WASM_ENABLE_SPEC_TEST == 0 + set_error_buf(error_buf, error_buf_size, + "unknown delegate label"); +#else + set_error_buf(error_buf, error_buf_size, "unknown label"); +#endif + goto fail; + } + target_block = loader_ctx->frame_csp - delegate_depth - 2; + (void)target_block; + +#if WASM_ENABLE_FAST_INTERP != 0 + /* Second traverse only: populate the eh-table entry so + * the runtime walker can dispatch through it. + * + * delegate_target_depth = (count of try / catch / + * catch_all blocks STRICTLY between cur_block and + * target_block on the loader's csp stack) + * + * At runtime those `delta` blocks are exactly the + * eh-stack entries immediately below the delegate's own + * entry that the throw walker must SKIP — the spec + * re-raises the exception "at the target block's + * location", so any try whose body the delegate's try + * is nested inside (but the target is also inside) + * doesn't get to catch it. + * + * end_of_region_pc still gets set to the IR pc just + * after the auto-emitted DELEGATE label. The walker + * never reads it for delegate entries (it forwards via + * delta instead), but a future DELEGATE-end runtime + * handler that wanted to advance frame_ip past the + * region could use it; recording it keeps the + * shape identical to the END(try) capture and the + * field semantics easy to reason about. */ + if (loader_ctx->p_code_compiled != NULL) { + uint32 eh_idx = cur_block->eh_entry_idx; + uint32 delta = 0; + BranchBlock *b; + bh_assert(eh_idx < func->exception_handler_count); + bh_assert(func->exception_handlers != NULL); + for (b = cur_block - 1; b > target_block; b--) { + if (b->label_type == LABEL_TYPE_TRY + || b->label_type == LABEL_TYPE_CATCH + || b->label_type == LABEL_TYPE_CATCH_ALL) { + delta++; + } + } + func->exception_handlers[eh_idx].delegate_target_depth = + delta; + func->exception_handlers[eh_idx].end_of_region_pc = + loader_ctx->p_code_compiled; + } +#endif /* DELEGATE ends the block */ POP_CSP(); break; @@ -12533,6 +12847,100 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, goto fail; } + /* Validate previous body's stack (try body on first + * CATCH, previous catch body on subsequent CATCH) + * matches the block's result type. Without this the + * loader would silently accept stack-shape mismatches + * between the try body and the catch bodies and the + * next op would read garbage. Same pattern as ELSE + * runs `check_block_stack` on the if-body before the + * else body's PUSH_TYPE sequence. */ + if (!check_block_stack(loader_ctx, cur_block, error_buf, + error_buf_size)) + goto fail; + +#if WASM_ENABLE_FAST_INTERP != 0 + /* For result-typed try-regions, inject a COPY of the + * previous body's last value(s) into the block's + * `dynamic_offset` slot BEFORE the auto-emitted CATCH + * label. The normal-flow CATCH dispatch jumps from + * here to `end_of_region_pc` — the body's value would + * otherwise be lost. Mirrors how `reserve_block_ret` + * + `case WASM_OP_ELSE` align the if-body's result + * for the else-body's END to read. Layout becomes: + * + * [previous body ops...] + * [EXT_OP_COPY_STACK_TOP src=prev_top dst=dyn_off] + * [CATCH label][eh_idx][dst-slots from PUSH...] + * [catch body ops...] + * + * The `src != dst` check runs in BOTH traverses so + * pass-1 size accounting matches pass-2 writes: + * `dynamic_offset` evolves identically in both + * passes, and although const-pool slots get + * renumbered between passes by the qsort/dedup at + * the start of pass 2, they stay strictly negative + * (offsets `-(count)..-1`) while `dynamic_offset` is + * strictly non-negative (`>= start_dynamic_offset = + * param_cell_num + local_cell_num`). So the + * predicate is sign-stable across passes. + * + * Multi-return-value try-regions need + * `EXT_OP_COPY_STACK_VALUES`; we error out + * explicitly until a follow-up commit lifts that + * restriction. Single-return covers every shape + * Porffor / AS / our integration tests emit. */ + { + uint8 *return_types = NULL; +#if WASM_ENABLE_GC == 0 + uint32 return_count = block_type_get_result_types( + &cur_block->block_type, &return_types); +#else + WASMRefTypeMap *return_reftype_maps = NULL; + uint32 return_reftype_map_count = 0; + uint32 return_count = block_type_get_result_types( + &cur_block->block_type, &return_types, + &return_reftype_maps, &return_reftype_map_count); +#endif + if (return_count == 1) { + uint8 cell = + (uint8)wasm_value_type_cell_num(return_types[0]); + int16 src = *(loader_ctx->frame_offset - cell); + int16 dst = cur_block->dynamic_offset; + if (src != dst) { + skip_label(); + if (cell == 4) + emit_label(EXT_OP_COPY_STACK_TOP_V128); + else if (cell == 2) + emit_label(EXT_OP_COPY_STACK_TOP_I64); + else + emit_label(EXT_OP_COPY_STACK_TOP); + emit_operand(loader_ctx, src); + emit_operand(loader_ctx, dst); + emit_label(opcode); + } + } + else if (return_count > 1) { + set_error_buf(error_buf, error_buf_size, + "multi-return try-region not " + "supported in fast interpreter"); + goto fail; + } + } + + /* Emit `` after the auto-emitted CATCH + * opcode. The runtime CATCH handler reads it to find + * end_of_region_pc when the catch is reached via + * normal flow. Emitted in BOTH traverses so pass 1's + * size measurement and pass 2's actual writes match; + * if this were inside the populate guard below, + * pass 2 would overrun the code_compiled buffer by + * sizeof(uint32) bytes per catch, corrupting whatever + * loader allocation the heap placed immediately after + * (typically func->exception_handlers itself). */ + emit_uint32(loader_ctx, cur_block->eh_entry_idx); +#endif + /* * replace frame_csp by LABEL_TYPE_CATCH */ @@ -12541,13 +12949,52 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, /* RESET_STACK removes the values pushed in TRY or previous * CATCH Blocks */ RESET_STACK(); + /* Reset the polymorphic flag the way `WASM_OP_ELSE` + * does: the catch body is a freshly-reachable region, + * not a continuation of the (dead) try body after a + * throw. Without this reset, the catch body's END + * runs `check_block_stack` in polymorphic mode, which + * emits a `POP_OFFSET_TYPE` operand byte for each + * return-cell — those bytes land between the auto- + * emitted END label and the case body's + * `skip_label()`, shifting the re-emitted END label + * forward by `2 * return_cell_num` bytes and leaving + * a corrupt handler-ptr at the originally-recorded + * `handler_pc`. (The same bug latent in non-EH + * polymorphic blocks doesn't bite because their END + * gets stripped from the IR entirely; the EH path's + * runtime needs the END opcode to actually exist for + * the eh-stack pop.) */ + SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE(false); #if WASM_ENABLE_GC != 0 WASMRefType *ref_type; uint32 j = 0; #endif - /* push types on the stack according to caught type */ + /* Push the tag's params onto the catch body's operand + * stack. Classic-interp uses PUSH_TYPE (which only + * touches the value-type stack used by validation); + * fast-interp also needs `PUSH_OFFSET_TYPE`, which + * allocates fresh `dynamic_offset` slots for each cell + * (and emits the slot offsets as `int16` operands in + * the IR right after the eh_idx). The catch body's + * downstream ops then `POP_OFFSET_TYPE` to consume + * these slots — same shape the loader uses for + * block-with-params (see `copy_params_to_dynamic_ + * space`). + * + * Note: the emitted dst slots are *unused* by the + * runtime CATCH normal-flow handler (it only reads + * eh_idx and branches to end_of_region_pc) — they + * sit in the IR as dead bytes on the fall-through + * path. The throw walker doesn't read them either; + * it consults the pre-decoded copy on + * `WASMFastEHCatch.param_dst_offsets` (populated + * below). They're emitted only so PUSH_OFFSET_TYPE's + * pass-1 / pass-2 size accounting stays balanced and + * the catch body's POP_OFFSET_TYPEs find the right + * slot offsets in `frame_offset[]`. */ for (i = 0; i < func_type->param_count; i++) { #if WASM_ENABLE_GC != 0 if (wasm_is_type_multi_byte_type(func_type->types[i])) { @@ -12558,9 +13005,113 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, wasm_reftype_struct_size(ref_type)); j++; } +#endif + /* Allocate a fresh `dynamic_offset` slot for the + * catch param AND push its type onto `frame_ref` + * (so `stack_cell_num` stays balanced). One + * without the other doesn't work: a bare + * `PUSH_OFFSET_TYPE` leaves the offset side + * ahead of the ref side, so the catch body's + * first consumer (e.g. `global.set $g`) hits + * `wasm_loader_pop_frame_offset`'s polymorphic + * short-circuit — the CATCH block inherits the + * polymorphic flag from THROW's + * `SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE`, and + * with `available_stack_cell == 0` the pop + * silently returns without emitting the source + * slot. The consumer's runtime read then lands + * on heap garbage and crashes with SIGBUS / + * SIGSEGV. PUSH_TYPE rebalances and avoids + * the short-circuit so the catch body's pops + * emit real source-slot operand bytes. */ +#if WASM_ENABLE_FAST_INTERP != 0 + PUSH_OFFSET_TYPE(func_type->types[i]); #endif PUSH_TYPE(func_type->types[i]); } + +#if WASM_ENABLE_FAST_INTERP != 0 + /* Second traverse only: append a fully-populated + * `WASMFastEHCatch` entry to the parent try-region's + * catches[]. handler_pc is captured *after* the + * PUSH_OFFSET_TYPE emits above so it points at the + * first rewritten-IR byte of the catch body proper + * (skipping the dead dst-slot bytes). param_cell_num + * is the sum of cells across all tag params (i32 = 1 + * cell, i64 = 2, v128 = 4); param_dst_offsets is a + * loader-owned copy of the int16 slot offsets just + * pushed onto frame_offset[]. NULL when the tag has + * no params (the typical Porffor shape). */ + if (loader_ctx->p_code_compiled != NULL) { + uint32 eh_idx = cur_block->eh_entry_idx; + WASMFastEHEntry *entry; + WASMFastEHCatch *new_catches; + uint64 new_size; + bh_assert(eh_idx < func->exception_handler_count); + bh_assert(func->exception_handlers != NULL); + entry = &func->exception_handlers[eh_idx]; + new_size = (uint64)sizeof(WASMFastEHCatch) + * (entry->catch_count + 1); + if (!(new_catches = loader_malloc(new_size, error_buf, + error_buf_size))) { + goto fail; + } + if (entry->catches) { + bh_memcpy_s(new_catches, (uint32)new_size, + entry->catches, + (uint32)sizeof(WASMFastEHCatch) + * entry->catch_count); + wasm_runtime_free(entry->catches); + } + new_catches[entry->catch_count].tag_index = tag_index; + new_catches[entry->catch_count].handler_pc = + loader_ctx->p_code_compiled; + new_catches[entry->catch_count].param_cell_num = + func_type->param_cell_num; + new_catches[entry->catch_count].param_dst_offsets = NULL; + if (func_type->param_cell_num > 0) { + uint64 dst_size = + (uint64)sizeof(int16) * func_type->param_cell_num; + int16 *dst; + uint32 pi, c, cell_so_far = 0; + int16 *base; + if (!(dst = loader_malloc(dst_size, error_buf, + error_buf_size))) { + wasm_runtime_free(new_catches); + goto fail; + } + /* Synthesize per-cell dst offsets from each + * param's first cell. Same multi-cell shape + * concern as the THROW src emit: + * `wasm_loader_push_frame_offset` writes a + * meaningful int16 only for the first cell + * of a multi-cell value (i64 / f64 / v128); + * subsequent cells of the same value have + * unspecified frame_offset entries. The + * runtime walker copies one frame_lp cell + * per iteration, so its `param_cell_num` + * loop needs an offset array indexed by + * absolute cell number, not by frame_offset + * position. Build that here by walking + * params and synthesizing `(first, first+1, + * ..., first+param_cells-1)` for each one. */ + base = loader_ctx->frame_offset + - func_type->param_cell_num; + for (pi = 0; pi < func_type->param_count; pi++) { + uint32 this_cells = + wasm_value_type_cell_num(func_type->types[pi]); + int16 first_slot = base[cell_so_far]; + for (c = 0; c < this_cells; c++) { + dst[cell_so_far + c] = (int16)(first_slot + c); + } + cell_so_far += this_cells; + } + new_catches[entry->catch_count].param_dst_offsets = dst; + } + entry->catches = new_catches; + entry->catch_count++; + } +#endif break; } case WASM_OP_CATCH_ALL: @@ -12576,6 +13127,77 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, goto fail; } + /* Same previous-body-stack validation as in CATCH. */ + if (!check_block_stack(loader_ctx, cur_block, error_buf, + error_buf_size)) + goto fail; + +#if WASM_ENABLE_FAST_INTERP != 0 + /* Same COPY-to-block-dynamic_offset shape as CATCH + * (see the long comment in the CATCH case for the + * rationale and pass-1/pass-2 alignment argument). + * catch_all is the only place the body-COPY can run + * for a try with a result-type and only a catch_all, + * so without this emit a result-typed + * `try (result T) ... catch_all` would lose the try + * body's value on the normal-flow path. */ + { + uint8 *return_types = NULL; +#if WASM_ENABLE_GC == 0 + uint32 return_count = block_type_get_result_types( + &cur_block->block_type, &return_types); +#else + WASMRefTypeMap *return_reftype_maps = NULL; + uint32 return_reftype_map_count = 0; + uint32 return_count = block_type_get_result_types( + &cur_block->block_type, &return_types, + &return_reftype_maps, &return_reftype_map_count); +#endif + if (return_count == 1) { + uint8 cell = + (uint8)wasm_value_type_cell_num(return_types[0]); + int16 src = *(loader_ctx->frame_offset - cell); + int16 dst = cur_block->dynamic_offset; + if (src != dst) { + skip_label(); + if (cell == 4) + emit_label(EXT_OP_COPY_STACK_TOP_V128); + else if (cell == 2) + emit_label(EXT_OP_COPY_STACK_TOP_I64); + else + emit_label(EXT_OP_COPY_STACK_TOP); + emit_operand(loader_ctx, src); + emit_operand(loader_ctx, dst); + emit_label(opcode); + } + } + else if (return_count > 1) { + set_error_buf(error_buf, error_buf_size, + "multi-return try-region not " + "supported in fast interpreter"); + goto fail; + } + } + + /* Emit `` after the auto-emitted CATCH_ALL + * opcode in BOTH traverses (pass 1's size accounting + * must include this or pass 2 overruns + * code_compiled). Pass 2 additionally records + * catch_all_pc on the parent try-region — set exactly + * once per region (spec allows at most one catch_all + * per try). */ + emit_uint32(loader_ctx, cur_block->eh_entry_idx); + if (loader_ctx->p_code_compiled != NULL) { + uint32 eh_idx = cur_block->eh_entry_idx; + bh_assert(eh_idx < func->exception_handler_count); + bh_assert(func->exception_handlers != NULL); + bh_assert(func->exception_handlers[eh_idx].catch_all_pc + == NULL); + func->exception_handlers[eh_idx].catch_all_pc = + loader_ctx->p_code_compiled; + } +#endif + /* no immediates */ /* replace frame_csp by LABEL_TYPE_CATCH_ALL */ cur_block->label_type = LABEL_TYPE_CATCH_ALL; @@ -12583,6 +13205,9 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, /* RESET_STACK removes the values pushed in TRY or previous * CATCH Blocks */ RESET_STACK(); + /* Same polymorphic reset as `WASM_OP_CATCH` — see the + * matching comment there for the rationale. */ + SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE(false); /* catch_all has no tagtype and therefore no parameters */ break; @@ -12659,6 +13284,22 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, case WASM_OP_END: { BranchBlock *cur_block = loader_ctx->frame_csp - 1; +#if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_EXCE_HANDLING != 0 + /* If this END closes a try-region (LABEL_TYPE_TRY when + * the region has only a try-body and no catch, or + * LABEL_TYPE_CATCH / CATCH_ALL when at least one catch + * clause is present), we need to remember the entry's + * index and label type now — POP_CSP and the subsequent + * skip_label / reserve_block_ret happen first, but the + * end_of_region_pc capture has to wait until after + * those advance loader_ctx->p_code_compiled. */ + uint32 ending_eh_idx = cur_block->eh_entry_idx; + bool ending_was_eh = + (ending_eh_idx != UINT32_MAX) + && (cur_block->label_type == LABEL_TYPE_TRY + || cur_block->label_type == LABEL_TYPE_CATCH + || cur_block->label_type == LABEL_TYPE_CATCH_ALL); +#endif /* check whether block stack matches its result type */ if (!check_block_stack(loader_ctx, cur_block, error_buf, @@ -12685,30 +13326,62 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, POP_CSP(); #if WASM_ENABLE_FAST_INTERP != 0 - skip_label(); - /* copy the result to the block return address */ - if (!reserve_block_ret(loader_ctx, opcode, disable_emit, - error_buf, error_buf_size)) { - /* it could be tmp frame_csp allocated from opcode like - * OP_BR and not counted in loader_ctx->csp_num, it won't - * be freed in wasm_loader_ctx_destroy(loader_ctx) so need - * to free the loader_ctx->frame_csp if fails */ +#if WASM_ENABLE_EXCE_HANDLING != 0 + if (ending_was_eh) { + /* try-region END must execute the eh-stack pop in + * the runtime END handler — including when reached + * via `br N` (whose target was registered into + * this block's PATCH_END list by emit_br_info). + * + * Rewind the auto-emitted END byte, point all + * PATCH_END entries at the rewound position, then + * re-emit the END byte so both branches and fall- + * through dispatch the pop. reserve_block_ret's + * COPY (if any) lands *after* the END byte: the + * pop only adjusts eh_count and doesn't touch the + * operand stack the COPY moves from. */ + skip_label(); + apply_label_patch(loader_ctx, 0, PATCH_END); + emit_label(WASM_OP_END); + if (!reserve_block_ret(loader_ctx, opcode, disable_emit, + error_buf, error_buf_size)) { + free_label_patch_list(loader_ctx->frame_csp); + goto fail; + } free_label_patch_list(loader_ctx->frame_csp); - goto fail; + /* A try-region's END can never coincide with + * LABEL_TYPE_FUNCTION (the implicit function block + * is not a try); no WASM_OP_RETURN emit needed. */ } + else +#endif /* WASM_ENABLE_EXCE_HANDLING */ + { + skip_label(); + /* copy the result to the block return address */ + if (!reserve_block_ret(loader_ctx, opcode, disable_emit, + error_buf, error_buf_size)) { + /* it could be tmp frame_csp allocated from opcode like + * OP_BR and not counted in loader_ctx->csp_num, it + * won't be freed in wasm_loader_ctx_destroy(loader_ctx) + * so need to free the loader_ctx->frame_csp if fails */ + free_label_patch_list(loader_ctx->frame_csp); + goto fail; + } - apply_label_patch(loader_ctx, 0, PATCH_END); - free_label_patch_list(loader_ctx->frame_csp); - if (loader_ctx->frame_csp->label_type == LABEL_TYPE_FUNCTION) { - int32 idx; - uint8 ret_type; - - emit_label(WASM_OP_RETURN); - for (idx = (int32)func->func_type->result_count - 1; - idx >= 0; idx--) { - ret_type = *(func->func_type->types - + func->func_type->param_count + idx); - POP_OFFSET_TYPE(ret_type); + apply_label_patch(loader_ctx, 0, PATCH_END); + free_label_patch_list(loader_ctx->frame_csp); + if (loader_ctx->frame_csp->label_type + == LABEL_TYPE_FUNCTION) { + int32 idx; + uint8 ret_type; + + emit_label(WASM_OP_RETURN); + for (idx = (int32)func->func_type->result_count - 1; + idx >= 0; idx--) { + ret_type = *(func->func_type->types + + func->func_type->param_count + idx); + POP_OFFSET_TYPE(ret_type); + } } } #endif @@ -12733,6 +13406,22 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, } #endif +#if WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_EXCE_HANDLING != 0 + /* Second-traverse-only: if this END closed a try- + * region, record where the rewritten IR continues so a + * runtime catch-handler body can branch past the + * region after running. The captured pc lands *after* + * the END's own skip_label and reserve_block_ret, so + * the next dispatched op is whatever follows the + * source-level END byte. */ + if (loader_ctx->p_code_compiled != NULL && ending_was_eh) { + bh_assert(ending_eh_idx < func->exception_handler_count); + bh_assert(func->exception_handlers != NULL); + func->exception_handlers[ending_eh_idx].end_of_region_pc = + loader_ctx->p_code_compiled; + } +#endif + break; } @@ -12743,6 +13432,46 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf, error_buf_size))) goto fail; +#if WASM_ENABLE_EXCE_HANDLING != 0 && WASM_ENABLE_FAST_INTERP != 0 + /* When a br skips over a try-region's END, the + * runtime br doesn't pop eh-stack entries. For a + * one-shot br to a block / function-end / catch, + * the leaked entry is absorbed by the static + * `exception_handler_count * EH_ENTRY_CELLS` + * reservation and dies at frame teardown — log + * a warning so the shape shows up in load-time + * diagnostics, but accept the module. + * + * If the br target is a LOOP entry, however, + * every iteration's TRY push adds one more entry + * to the eh-stack and eventually overwrites past + * the static reservation (silently in release + * builds since `bh_assert` is a no-op without + * `BH_DEBUG`). Reject those modules at load time + * — emitting cleanup at the br site would be the + * other fix, but it complicates the hot dispatch + * loop and the shape is rare in practice. */ + { + uint32 leaked = count_try_blocks_crossed( + loader_ctx->frame_csp - 1, frame_csp_tmp); + if (leaked > 0 + && frame_csp_tmp->label_type == LABEL_TYPE_LOOP) { + set_error_buf(error_buf, error_buf_size, + "br to loop entry from inside " + "try-region not supported in fast " + "interpreter (would leak eh-stack " + "entries per iteration)"); + goto fail; + } + if (leaked > 0 && loader_ctx->p_code_compiled == NULL) { + LOG_WARNING("wasm fast-interp: br at func[%u] crosses " + "%u try-region(s); each leaks one " + "eh-stack entry until frame teardown", + cur_func_idx, leaked); + } + } +#endif + RESET_STACK(); SET_CUR_BLOCK_STACK_POLYMORPHIC_STATE(true); break; @@ -12757,6 +13486,29 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf, error_buf_size))) goto fail; +#if WASM_ENABLE_EXCE_HANDLING != 0 && WASM_ENABLE_FAST_INTERP != 0 + { + uint32 leaked = count_try_blocks_crossed( + loader_ctx->frame_csp - 1, frame_csp_tmp); + if (leaked > 0 + && frame_csp_tmp->label_type == LABEL_TYPE_LOOP) { + set_error_buf(error_buf, error_buf_size, + "br_if to loop entry from inside " + "try-region not supported in fast " + "interpreter (would leak eh-stack " + "entries per iteration)"); + goto fail; + } + if (leaked > 0 && loader_ctx->p_code_compiled == NULL) { + LOG_WARNING( + "wasm fast-interp: br_if at func[%u] crosses " + "%u try-region(s); each leaks one " + "eh-stack entry until frame teardown", + cur_func_idx, leaked); + } + } +#endif + break; } @@ -12823,6 +13575,30 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, goto fail; } +#if WASM_ENABLE_EXCE_HANDLING != 0 && WASM_ENABLE_FAST_INTERP != 0 + { + uint32 leaked = count_try_blocks_crossed( + loader_ctx->frame_csp - 1, frame_csp_tmp); + if (leaked > 0 + && frame_csp_tmp->label_type == LABEL_TYPE_LOOP) { + set_error_buf(error_buf, error_buf_size, + "br_table to loop entry from inside " + "try-region not supported in fast " + "interpreter (would leak eh-stack " + "entries per iteration)"); + goto fail; + } + if (leaked > 0 && loader_ctx->p_code_compiled == NULL) { + LOG_WARNING( + "wasm fast-interp: br_table[%u] at " + "func[%u] crosses %u try-region(s); each " + "leaks one eh-stack entry until frame " + "teardown", + i, cur_func_idx, leaked); + } + } +#endif + #if WASM_ENABLE_FAST_INTERP == 0 if (br_table_cache) { br_table_cache->br_depths[i] = depth; diff --git a/tests/wamr-test-suites/test_wamr.sh b/tests/wamr-test-suites/test_wamr.sh index 97dc84d548..c4b6efa903 100755 --- a/tests/wamr-test-suites/test_wamr.sh +++ b/tests/wamr-test-suites/test_wamr.sh @@ -919,8 +919,9 @@ function do_execute_in_running_mode() # keep alpha order if [[ ${ENABLE_EH} -eq 1 ]]; then - if [[ "${RUNNING_MODE}" != "classic-interp" ]]; then - echo "support exception handling in classic-interp" + if [[ "${RUNNING_MODE}" != "classic-interp" \ + && "${RUNNING_MODE}" != "fast-interp" ]]; then + echo "support exception handling in classic-interp and fast-interp" return 0; fi fi