From 0bfd2deda3a21fba846feda769777eb84285f65e Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 22 May 2026 16:53:14 -0700 Subject: [PATCH 1/2] Simplify externref literal representation Instead of using a complex bit-packing scheme to represent externref payloads and differentiate them from externalized internal references, simply store the payload in the GCData. Externalized internal references are also stored in the GCData, but can be differentiated from externref payloads because the latter have type i32 and the former are always reference types. Besides being complex, the bit packing scheme was also incorrect on big-endian architectures. --- src/literal.h | 37 ++++++++++++++++++++++++------------- src/wasm/literal.cpp | 34 ++++++++++++++-------------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/literal.h b/src/literal.h index 686348d1942..356ed2ee636 100644 --- a/src/literal.h +++ b/src/literal.h @@ -266,11 +266,7 @@ class Literal { lit.i32 = value | 0x80000000; return lit; } - static Literal makeExtern(int32_t payload, Shareability share) { - auto lit = Literal(Type(HeapTypes::ext.getBasic(share), NonNullable)); - lit.i32 = (payload << 1) | 1; - return lit; - } + static Literal makeExtern(int32_t payload, Shareability share); // Wasm has nondeterministic rules for NaN propagation in some operations. For // example. f32.neg is deterministic and just flips the sign, even of a NaN, // but f32.add is nondeterministic, and if one or more of the inputs is a NaN, @@ -308,14 +304,8 @@ class Literal { // Cast to unsigned for the left shift to avoid undefined behavior. return signed_ ? int32_t((uint32_t(i32) << 1)) >> 1 : (i32 & 0x7fffffff); } - bool hasExternPayload() const { - assert(type.getHeapType().isMaybeShared(HeapType::ext)); - return (i32 & 1) == 1; - } - int32_t getExternPayload() const { - assert(hasExternPayload()); - return int32_t(uint32_t(i32) >> 1); - } + bool hasExternPayload() const; + int32_t getExternPayload() const; int64_t geti64() const { assert(type == Type::i64); return i64; @@ -813,6 +803,19 @@ struct GCData { : values(std::move(values)), desc(desc) {} }; +inline bool Literal::hasExternPayload() const { + if (isNull()) { + return false; + } + assert(type.getHeapType().isMaybeShared(HeapType::ext)); + return gcData->values[0].type == Type::i32; +} + +inline int32_t Literal::getExternPayload() const { + assert(hasExternPayload()); + return gcData->values[0].geti32(); +} + } // namespace wasm namespace std { @@ -857,6 +860,14 @@ template<> struct hash { wasm::rehash(digest, a.geti31(true)); return digest; } + if (type.isMaybeShared(wasm::HeapType::ext)) { + if (a.hasExternPayload()) { + wasm::rehash(digest, a.getExternPayload()); + return digest; + } + wasm::rehash(digest, (*this)(a.internalize())); + return digest; + } if (type.isMaybeShared(wasm::HeapType::any)) { // This may be an extern string that was internalized to |any|. Undo // that to get the actual value. (Rehash here with the existing digest, diff --git a/src/wasm/literal.cpp b/src/wasm/literal.cpp index 8c5074e1c74..d20921bccbf 100644 --- a/src/wasm/literal.cpp +++ b/src/wasm/literal.cpp @@ -66,7 +66,8 @@ Literal::Literal(Type type) : type(type) { if (type.isRef() && type.getHeapType().isMaybeShared(HeapType::ext)) { assert(type.isNonNullable()); - i32 = 1; + new (&gcData) std::shared_ptr( + std::make_shared(Literals{Literal(int32_t(0))})); return; } @@ -92,6 +93,11 @@ Literal Literal::makeFunc(Name func, Module& wasm) { return makeFunc(func, wasm.getFunction(func)->type); } +Literal Literal::makeExtern(int32_t payload, Shareability share) { + auto ext = HeapTypes::ext.getBasic(share); + return Literal(std::make_shared(Literals{Literal(payload)}), ext); +} + Literal::Literal(std::shared_ptr gcData, HeapType type) : gcData(gcData), type(type, gcData ? NonNullable : Nullable, @@ -175,17 +181,9 @@ Literal::Literal(const Literal& other) : type(other.type) { case HeapType::exn: new (&exnData) std::shared_ptr(other.exnData); return; - case HeapType::ext: { - if (other.hasExternPayload()) { - i32 = other.i32; - } else { - // Externalized internal reference. - new (&gcData) std::shared_ptr(other.gcData); - } - return; - } + case HeapType::ext: case HeapType::any: - // Internalized external reference or string. + // Externalized or internalized reference/payload. new (&gcData) std::shared_ptr(other.gcData); return; case HeapType::none: @@ -210,12 +208,8 @@ Literal::~Literal() { if (type.isBasic()) { return; } - if (type.getHeapType().isMaybeShared(HeapType::ext) && !hasExternPayload()) { - // Externalized internal reference. - gcData.~shared_ptr(); - return; - } - if (isNull() || isData() || type.getHeapType().isMaybeShared(HeapType::any)) { + if (isNull() || isData() || type.getHeapType().isMaybeShared(HeapType::any) || + type.getHeapType().isMaybeShared(HeapType::ext)) { gcData.~shared_ptr(); } else if (isFunction()) { funcData.~shared_ptr(); @@ -506,10 +500,10 @@ bool Literal::operator==(const Literal& other) const { return i32 == other.i32; } if (heapType.isMaybeShared(HeapType::ext)) { + if (hasExternPayload() != other.hasExternPayload()) { + return false; + } if (hasExternPayload()) { - if (!other.hasExternPayload()) { - return false; - } return getExternPayload() == other.getExternPayload(); } return internalize() == other.internalize(); From 0724bcaed11865a9f0ec4f9130544c0a0103b7fe Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sat, 23 May 2026 19:39:01 -0700 Subject: [PATCH 2/2] update comments --- src/literal.h | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/src/literal.h b/src/literal.h index 356ed2ee636..e982dae68aa 100644 --- a/src/literal.h +++ b/src/literal.h @@ -43,28 +43,18 @@ class Literal { // Note: i31 is stored in the |i32| field, with the lower 31 bits containing // the value if there is one, and the highest bit containing whether there // is a value. Thus, a null is |i32 === 0|. - // - // Externref payloads, which serve to differentiate different external - // references but are otherwise meaningless, are also stored in the i32 - // field, with their low bit set to differentiate an externref with a - // payload from an externalized internal reference, which uses the gcData - // field instead. This scheme supports 31 bits of payload for externrefs, - // which should be sufficient for spec test and fuzzing purposes, but if we - // need more bits we can use the i64 field instead. This scheme also depends - // on the low bit of a shared_ptr not being used. int32_t i32; int64_t i64; uint8_t v128[16]; // A reference to Function data. std::shared_ptr funcData; - // A reference to GC data, either a Struct or an Array. For both of those we - // store the referred data as a Literals object (which is natural for an - // Array, and for a Struct, is just the fields in order). The type is used - // to indicate whether this is a Struct or an Array, and of what type. We - // also use this to store String data, as it is similarly stored on the - // heap. For externalized or internalized references (including strings), - // gcData holds a single value, which is the wrapped internal or external - // reference. + // A reference to GC data, used for structs, arrays, strings, externrefs, + // and internalized externrefs. The GCData contains the struct or array + // fields, or the characters in the string. Externrefs are either + // externalized internal references, in which case the GCData will contain + // the internal reference, or a host reference, in which case the GCData + // will contain an i32 payload. Internalized references contain the wrapped + // externref in the GCData. std::shared_ptr gcData; // A reference to Exn data. std::shared_ptr exnData;