From c46902d54e27321a314cd8615d44ae8ffed86304 Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Thu, 11 Jun 2026 18:10:38 -0700 Subject: [PATCH 1/9] kernel32: make FLS values per-thread FlsAlloc/FlsGetValue/FlsSetValue stored values in a single process-global array, so every thread shared one cell per FLS index. Fiber-local storage without fibers is thread-local storage on Windows: each thread is exactly one fiber, and FlsGetValue must return the value the calling thread set. The shared cell breaks msvcr120.dll (VS2013 CRT) thread creation. The CRT stores its per-thread data block (_ptd) -- which carries the _beginthreadex entry point and argument -- via FlsSetValue, and the new thread's _threadstartex/_callthreadstartex re-read it via FlsGetValue. With a process-global cell, two concurrently-starting threads overwrite each other's _ptd and can both start with the same argument. Observed with VS2013 cl.exe: c2.dll's parallel codegen pool creates four worker threads back-to-back; ~1-2% of compiles deadlocked forever. An API trace of a hung process shows two workers waiting on the same per-worker dispatch event while another worker's event has a signal and no waiter: CreateEvent h=7c / 88 / 94 / a0 (per-worker "go" events) t251103 WaitForSingleObject h=88 <- worker 1 t251102 WaitForSingleObject h=88 <- different thread, SAME event boss SetEvent 7c, 88, 94, a0 <- 94 never gets a waiter boss WaitForMultipleObjects({done events}, bWaitAll, INFINITE) -> hangs forever: the orphaned worker never runs its work item, so its "done" event is never set. Fix: keep the index allocation map process-wide (FLS indices are process-wide on Windows) but store the values in a thread_local array; wibo maps guest threads 1:1 onto host threads, so thread_local is exactly per-guest-thread. New threads observe zero-initialized values, matching Windows. Index alloc/free is guarded by a mutex. Caveat (unchanged behavior): FLS destructor callbacks are still never invoked, and freeing then reallocating an index does not clear other threads' stale values for it. The VS2013 CRT allocates its index once per process, so neither is reachable for it. With this fix the cl.exe hang rate dropped from ~1-2% to ~0.1-0.3% over 1000-run stress batches; the remainder was a separate CRITICAL_SECTION issue fixed in the following commit. Co-Authored-By: Claude Fable 5 --- dll/kernel32/fibersapi.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/dll/kernel32/fibersapi.cpp b/dll/kernel32/fibersapi.cpp index c834bd6..59a97e9 100644 --- a/dll/kernel32/fibersapi.cpp +++ b/dll/kernel32/fibersapi.cpp @@ -5,11 +5,21 @@ #include "errors.h" #include "internal.h" +#include + namespace { +// FLS without fibers is THREAD-local storage on +// Windows (every thread is exactly one fiber); only the index allocation map +// is process-wide. The previous process-global value array made all threads +// share one cell per index, which clobbers msvcr120's per-thread data (_ptd) -- +// including the _beginthreadex entry/argument that _threadstartex re-reads +// through FlsGetValue -- so concurrently-starting threads could duplicate or +// swap their start arguments. constexpr DWORD kMaxFlsValues = 0x100; +std::mutex g_flsMutex; bool g_flsValuesUsed[kMaxFlsValues] = {false}; -LPVOID g_flsValues[kMaxFlsValues] = {nullptr}; +thread_local LPVOID t_flsValues[kMaxFlsValues] = {nullptr}; } // namespace @@ -19,10 +29,11 @@ DWORD WINAPI FlsAlloc(PFLS_CALLBACK_FUNCTION lpCallback) { HOST_CONTEXT_GUARD(); DEBUG_LOG("FlsAlloc(%p)", lpCallback); // If the function succeeds, the return value is an FLS index initialized to zero. + std::lock_guard lk(g_flsMutex); for (DWORD i = 0; i < kMaxFlsValues; i++) { if (g_flsValuesUsed[i] == false) { g_flsValuesUsed[i] = true; - g_flsValues[i] = nullptr; + t_flsValues[i] = nullptr; DEBUG_LOG(" -> %d\n", i); return i; } @@ -35,6 +46,7 @@ DWORD WINAPI FlsAlloc(PFLS_CALLBACK_FUNCTION lpCallback) { BOOL WINAPI FlsFree(DWORD dwFlsIndex) { HOST_CONTEXT_GUARD(); DEBUG_LOG("FlsFree(%u)\n", dwFlsIndex); + std::lock_guard lk(g_flsMutex); if (dwFlsIndex < kMaxFlsValues && g_flsValuesUsed[dwFlsIndex]) { g_flsValuesUsed[dwFlsIndex] = false; return TRUE; @@ -49,7 +61,7 @@ PVOID WINAPI FlsGetValue(DWORD dwFlsIndex) { VERBOSE_LOG("FlsGetValue(%u)\n", dwFlsIndex); PVOID result = nullptr; if (dwFlsIndex < kMaxFlsValues && g_flsValuesUsed[dwFlsIndex]) { - result = g_flsValues[dwFlsIndex]; + result = t_flsValues[dwFlsIndex]; // See https://learn.microsoft.com/en-us/windows/win32/api/fibersapi/nf-fibersapi-flsgetvalue setLastError(ERROR_SUCCESS); } else { @@ -63,7 +75,7 @@ BOOL WINAPI FlsSetValue(DWORD dwFlsIndex, PVOID lpFlsData) { HOST_CONTEXT_GUARD(); VERBOSE_LOG("FlsSetValue(%u, %p)\n", dwFlsIndex, lpFlsData); if (dwFlsIndex < kMaxFlsValues && g_flsValuesUsed[dwFlsIndex]) { - g_flsValues[dwFlsIndex] = lpFlsData; + t_flsValues[dwFlsIndex] = lpFlsData; return TRUE; } else { setLastError(ERROR_INVALID_PARAMETER); From 3edd33e21dd28131c3841f047b63807443163a4e Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Thu, 11 Jun 2026 18:10:38 -0700 Subject: [PATCH 2/9] kernel32: make CRITICAL_SECTION handoff and Leave faithful to Windows Two divergences from the real Windows state machine, both observed breaking VS2013 cl.exe (c2.dll multithreaded codegen): 1. Contended EnterCriticalSection waited until OwningThread was observed to be 0 and then claimed the section with a plain store. Two waiters can both observe 0 after a single Leave and both enter the critical section simultaneously. The fingerprint -- a free section left with LockCount == 0 instead of -1, created when the second "owner"'s Leave failed the ownership check and returned without decrementing -- was captured with gdb in hung c2.dll worker pools. 2. LeaveCriticalSection bailed out when the calling thread did not match OwningThread. Real Windows Leave performs no caller validation at all: it unconditionally decrements RecursionCount/LockCount and releases exactly one waiter; mutual exclusion is carried entirely by the LockCount/semaphore state machine. Guest lock usage that Windows tolerates therefore silently strands wibo's lock state instead. Instrumented runs caught exactly this on c2.dll's work-queue section: leave-not-owner cs= tid=B owner=A lock=1 rec=1 after which the queue's "active worker" counter and the LockCount were each stranded one too high, the queue's drain condition became unsatisfiable, and the compiler deadlocked (~0.1-0.3% of compiles even after the FLS fix). Fix, mirroring the Windows protocol: - Model LockSemaphore as a ticket count: every contended Leave posts exactly one ticket (InterlockedIncrement + WakeByAddressSingle); every blocked Enter consumes exactly one ticket (CAS decrement, WaitOnAddress otherwise) and then owns the section by construction. There is no claim race on OwningThread. TryEnterCriticalSection cannot steal while waiters exist because each waiter's LockCount increment persists until it owns and leaves. - Leave validates nothing, like Windows: if --RecursionCount != 0, decrement LockCount; otherwise clear the owner, decrement, and release one waiter if the result is >= 0. Measured with VS2013 cl.exe under stress (18KB C unit, 25s timeout = hang): ~1-2% hangs before, 0 hangs in 5,100 runs with this commit plus the FLS fix (2x1000 + 2000 at 24-way parallelism, 300 sequential, 800 on two other source units). Compiler output stays byte-identical to native Windows. Side effect: heavily contended compiles got ~6x faster (1000 parallel compiles: 26s -> 4s wall) because the old wait loop woke every waiter to race on each release. Co-Authored-By: Claude Fable 5 --- dll/kernel32/synchapi.cpp | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/dll/kernel32/synchapi.cpp b/dll/kernel32/synchapi.cpp index c349faa..e418f91 100644 --- a/dll/kernel32/synchapi.cpp +++ b/dll/kernel32/synchapi.cpp @@ -455,18 +455,29 @@ inline void setOwningThread(LPCRITICAL_SECTION crit, DWORD threadId) { } void waitForCriticalSection(LPCRITICAL_SECTION cs) { - auto *sequence = reinterpret_cast(&cs->LockSemaphore); - LONG observed = __atomic_load_n(sequence, __ATOMIC_ACQUIRE); - while (owningThreadId(cs) != 0) { - kernel32::WaitOnAddress(sequence, &observed, sizeof(observed), INFINITE); - observed = __atomic_load_n(sequence, __ATOMIC_ACQUIRE); + // Ticket handoff: Windows hands a contended section to exactly one + // waiter per Leave through a counted semaphore (LockSemaphore). Model the + // semaphore as a ticket count on the LockSemaphore word: Leave posts one + // ticket, each blocked Enter consumes one. The woken waiter owns the + // section by construction (no claim race on OwningThread). + auto *tickets = reinterpret_cast(&cs->LockSemaphore); + for (;;) { + LONG available = __atomic_load_n(tickets, __ATOMIC_ACQUIRE); + if (available > 0) { + if (__atomic_compare_exchange_n(tickets, &available, available - 1, false, __ATOMIC_ACQ_REL, + __ATOMIC_ACQUIRE)) { + return; + } + continue; + } + kernel32::WaitOnAddress(tickets, &available, sizeof(available), INFINITE); } } void signalCriticalSection(LPCRITICAL_SECTION cs) { - auto *sequence = reinterpret_cast(&cs->LockSemaphore); - kernel32::InterlockedIncrement(const_cast(sequence)); - kernel32::WakeByAddressSingle(sequence); + auto *tickets = reinterpret_cast(&cs->LockSemaphore); + kernel32::InterlockedIncrement(const_cast(tickets)); + kernel32::WakeByAddressSingle(tickets); } inline bool trySpinAcquireCriticalSection(LPCRITICAL_SECTION cs, DWORD threadId) { @@ -1086,7 +1097,7 @@ void WINAPI EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection) { lpCriticalSection->RecursionCount++; return; } - waitForCriticalSection(lpCriticalSection); + waitForCriticalSection(lpCriticalSection); // ticket handoff: we own the section now } setOwningThread(lpCriticalSection, threadId); lpCriticalSection->RecursionCount = 1; @@ -1100,16 +1111,13 @@ void WINAPI LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection) { return; } - const DWORD threadId = GetCurrentThreadId(); - if (owningThreadId(lpCriticalSection) != threadId || lpCriticalSection->RecursionCount <= 0) { - DEBUG_LOG("LeaveCriticalSection: thread %u does not own %p (owner=%u, recursion=%ld)\n", threadId, - lpCriticalSection, owningThreadId(lpCriticalSection), - static_cast(lpCriticalSection->RecursionCount)); - return; - } - + // Windows LeaveCriticalSection performs NO + // caller/ownership validation; it unconditionally decrements + // RecursionCount/LockCount and releases one waiter. Guest patterns that + // rely on that (cross-thread leave, reinitialized sections) must behave + // identically here. auto *lockCount = const_cast(&lpCriticalSection->LockCount); - if (--lpCriticalSection->RecursionCount > 0) { + if (--lpCriticalSection->RecursionCount != 0) { kernel32::InterlockedDecrement(lockCount); return; } From 005b09ee1f72f2f7df7debf29c44cd2c9f70b892 Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:09 -0700 Subject: [PATCH 3/9] loader: tolerate resource-only DLLs without a relocation directory Resource-only DLLs (e.g. MSVC's clui.dll) are linked /NOENTRY with their relocations stripped. When their preferred image base is already occupied they must be mapped elsewhere, at which point loadPE bailed with "relocation required but no relocation directory present". Such images have no executable section and address their resources via the actual mapped base, so it is safe to continue without applying relocations. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/loader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/loader.cpp b/src/loader.cpp index ea6e69c..9cd43e0 100644 --- a/src/loader.cpp +++ b/src/loader.cpp @@ -549,8 +549,15 @@ bool loadPEFromSource(wibo::Executable &executable, const PeInputView &source, b if (exec && executable.relocationDelta != 0) { if (executable.relocationDirectoryRVA == 0 || executable.relocationDirectorySize == 0) { - DEBUG_LOG("Relocation required but no relocation directory present\n"); - return false; + bool hasCodeSection = false; // msvc-compat: tolerate resource-only DLLs + for (const auto &sec : executable.sections) { + if (sec.characteristics & 0x20000000u /*IMAGE_SCN_MEM_EXECUTE*/) { hasCodeSection = true; break; } + } + if (hasCodeSection) { + DEBUG_LOG("Relocation required but no relocation directory present\n"); + return false; + } + DEBUG_LOG("Relocation required but absent; resource-only image, continuing\n"); } uint8_t *relocCursor = executable.fromRVA(executable.relocationDirectoryRVA); From 0eabae06d3f2268b11ecea6e76ecf66082f3da6f Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:10 -0700 Subject: [PATCH 4/9] kernel32: implement GetModuleHandleExA/W Backed by wibo's module registry. The GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS form (moduleInfoFromAddress) is how the MSVC CRT/compiler finds its own module to build the localized-resource (1033\clui.dll) path. Co-Authored-By: Claude Opus 4.8 (1M context) --- dll/kernel32/libloaderapi.cpp | 33 +++++++++++++++++++++++++++++++++ dll/kernel32/libloaderapi.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/dll/kernel32/libloaderapi.cpp b/dll/kernel32/libloaderapi.cpp index dfc2e08..be65b5f 100644 --- a/dll/kernel32/libloaderapi.cpp +++ b/dll/kernel32/libloaderapi.cpp @@ -74,6 +74,39 @@ HMODULE WINAPI GetModuleHandleW(LPCWSTR lpModuleName) { return GetModuleHandleA(nullptr); } +static BOOL getModuleHandleExImpl(DWORD dwFlags, const void *nameOrAddr, bool wide, HMODULE *phModule) { + constexpr DWORD GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS = 0x00000004; + wibo::ModuleInfo *info = nullptr; + if (dwFlags & GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS) { + info = wibo::moduleInfoFromAddress(const_cast(nameOrAddr)); + } else if (nameOrAddr) { + std::string name = wide ? wideStringToString(static_cast(nameOrAddr)) + : std::string(static_cast(nameOrAddr)); + info = wibo::findLoadedModule(name.c_str()); + } else { + info = wibo::mainModule; + } + if (!info) { + if (phModule) *phModule = NO_HANDLE; + kernel32::setLastError(ERROR_MOD_NOT_FOUND); + return FALSE; + } + if (phModule) *phModule = info->handle; + return TRUE; +} + +BOOL WINAPI GetModuleHandleExA(DWORD dwFlags, LPCSTR lpModuleName, HMODULE *phModule) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("GetModuleHandleExA(0x%x, %p, %p)\n", dwFlags, (const void *)lpModuleName, (void *)phModule); + return getModuleHandleExImpl(dwFlags, lpModuleName, false, phModule); +} + +BOOL WINAPI GetModuleHandleExW(DWORD dwFlags, LPCWSTR lpModuleName, HMODULE *phModule) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("GetModuleHandleExW(0x%x, %p, %p)\n", dwFlags, (const void *)lpModuleName, (void *)phModule); + return getModuleHandleExImpl(dwFlags, lpModuleName, true, phModule); +} + DWORD WINAPI GetModuleFileNameA(HMODULE hModule, LPSTR lpFilename, DWORD nSize) { HOST_CONTEXT_GUARD(); DEBUG_LOG("GetModuleFileNameA(%p, %p, %u)\n", hModule, lpFilename, nSize); diff --git a/dll/kernel32/libloaderapi.h b/dll/kernel32/libloaderapi.h index 7638de8..6ccccc3 100644 --- a/dll/kernel32/libloaderapi.h +++ b/dll/kernel32/libloaderapi.h @@ -7,6 +7,8 @@ namespace kernel32 { BOOL WINAPI DisableThreadLibraryCalls(HMODULE hLibModule); HMODULE WINAPI GetModuleHandleA(LPCSTR lpModuleName); HMODULE WINAPI GetModuleHandleW(LPCWSTR lpModuleName); +BOOL WINAPI GetModuleHandleExA(DWORD dwFlags, LPCSTR lpModuleName, HMODULE *phModule); +BOOL WINAPI GetModuleHandleExW(DWORD dwFlags, LPCWSTR lpModuleName, HMODULE *phModule); DWORD WINAPI GetModuleFileNameA(HMODULE hModule, LPSTR lpFilename, DWORD nSize); DWORD WINAPI GetModuleFileNameW(HMODULE hModule, LPWSTR lpFilename, DWORD nSize); HRSRC WINAPI FindResourceA(HMODULE hModule, LPCSTR lpName, LPCSTR lpType); From 103057bc163f8c53d7419a6b2f011f6641161b3e Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:10 -0700 Subject: [PATCH 5/9] kernel32: implement Interlocked SList operations Adds InterlockedPushEntrySList / InterlockedPopEntrySList / InterlockedFlushSList / QueryDepthSList (wibo already had SLIST_HEADER and InitializeSListHead), serialized with a mutex. Co-Authored-By: Claude Opus 4.8 (1M context) --- dll/kernel32/interlockedapi.cpp | 45 +++++++++++++++++++++++++++++++++ dll/kernel32/interlockedapi.h | 4 +++ 2 files changed, 49 insertions(+) diff --git a/dll/kernel32/interlockedapi.cpp b/dll/kernel32/interlockedapi.cpp index 5d79729..13c458d 100644 --- a/dll/kernel32/interlockedapi.cpp +++ b/dll/kernel32/interlockedapi.cpp @@ -4,6 +4,7 @@ #include "context.h" #include +#include namespace kernel32 { @@ -48,4 +49,48 @@ void WINAPI InitializeSListHead(PSLIST_HEADER ListHead) { std::memset(ListHead, 0, sizeof(*ListHead)); } +// These are "Interlocked" ops on a shared list; mspdbcore's worker-thread pool +// pushes/pops the same SLIST concurrently, so they MUST be atomic. A global +// mutex serializes all SLIST operations (correct, if not strictly lock-free). +static std::mutex g_slistMutex; + +PSLIST_ENTRY WINAPI InterlockedPushEntrySList(PSLIST_HEADER ListHead, PSLIST_ENTRY ListEntry) { + HOST_CONTEXT_GUARD(); + if (!ListHead || !ListEntry) return nullptr; + std::lock_guard lk(g_slistMutex); + GUEST_PTR prevHead = ListHead->Head; + ListEntry->Next = prevHead; + ListHead->Head = toGuestPtr(ListEntry); + ListHead->Depth++; + return fromGuestPtr(prevHead); +} + +PSLIST_ENTRY WINAPI InterlockedPopEntrySList(PSLIST_HEADER ListHead) { + HOST_CONTEXT_GUARD(); + if (!ListHead) return nullptr; + std::lock_guard lk(g_slistMutex); + if (!ListHead->Head) return nullptr; + PSLIST_ENTRY entry = fromGuestPtr(ListHead->Head); + ListHead->Head = entry->Next; + if (ListHead->Depth) ListHead->Depth--; + return entry; +} + +PSLIST_ENTRY WINAPI InterlockedFlushSList(PSLIST_HEADER ListHead) { + HOST_CONTEXT_GUARD(); + if (!ListHead) return nullptr; + std::lock_guard lk(g_slistMutex); + PSLIST_ENTRY first = fromGuestPtr(ListHead->Head); + ListHead->Head = GUEST_NULL; + ListHead->Depth = 0; + return first; +} + +USHORT WINAPI QueryDepthSList(PSLIST_HEADER ListHead) { + HOST_CONTEXT_GUARD(); + if (!ListHead) return 0; + std::lock_guard lk(g_slistMutex); + return ListHead->Depth; +} + } // namespace kernel32 diff --git a/dll/kernel32/interlockedapi.h b/dll/kernel32/interlockedapi.h index fab6962..546b507 100644 --- a/dll/kernel32/interlockedapi.h +++ b/dll/kernel32/interlockedapi.h @@ -23,5 +23,9 @@ LONG WINAPI InterlockedDecrement(LONG volatile *Addend); LONG WINAPI InterlockedExchange(LONG volatile *Target, LONG Value); LONG WINAPI InterlockedCompareExchange(LONG volatile *Destination, LONG Exchange, LONG Comperand); void WINAPI InitializeSListHead(PSLIST_HEADER ListHead); +PSLIST_ENTRY WINAPI InterlockedPushEntrySList(PSLIST_HEADER ListHead, PSLIST_ENTRY ListEntry); +PSLIST_ENTRY WINAPI InterlockedPopEntrySList(PSLIST_HEADER ListHead); +PSLIST_ENTRY WINAPI InterlockedFlushSList(PSLIST_HEADER ListHead); +USHORT WINAPI QueryDepthSList(PSLIST_HEADER ListHead); } // namespace kernel32 From 5af597c40928b96f4566b82cb5ae67a7fd0d2067 Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:10 -0700 Subject: [PATCH 6/9] kernel32: add LCMapStringEx/CompareStringEx and CRT/locale/SRW/wait stubs LCMapStringEx/CompareStringEx forward to LCMapStringW/CompareStringW. A small compat TU adds conservative stubs the MSVC CRT/compiler probe at startup (GetEnabledXStateFeatures, Get/SetThreadPreferredUILanguages, IsValidLocaleName, InitializeSRWLock, WaitForSingleObjectEx/MultipleEx). Co-Authored-By: Claude Opus 4.8 (1M context) --- CMakeLists.txt | 2 + dll/kernel32/msvc_compat_stubs.cpp | 64 ++++++++++++++++++++++++++++++ dll/kernel32/msvc_compat_stubs.h | 15 +++++++ dll/kernel32/winnls.cpp | 8 ++++ dll/kernel32/winnls.h | 2 + 5 files changed, 91 insertions(+) create mode 100644 dll/kernel32/msvc_compat_stubs.cpp create mode 100644 dll/kernel32/msvc_compat_stubs.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 50cae49..d32b803 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,6 +210,7 @@ add_executable(wibo dll/kernel32/stringapiset.cpp dll/kernel32/synchapi.cpp dll/kernel32/sysinfoapi.cpp + dll/kernel32/msvc_compat_stubs.cpp dll/kernel32/timezoneapi.cpp dll/kernel32/winbase.cpp dll/kernel32/wincon.cpp @@ -402,6 +403,7 @@ wibo_codegen_module(NAME kernel32 HEADERS dll/kernel32/stringapiset.h dll/kernel32/synchapi.h dll/kernel32/sysinfoapi.h + dll/kernel32/msvc_compat_stubs.h dll/kernel32/timezoneapi.h dll/kernel32/winbase.h dll/kernel32/wincon.h diff --git a/dll/kernel32/msvc_compat_stubs.cpp b/dll/kernel32/msvc_compat_stubs.cpp new file mode 100644 index 0000000..fc3bb0a --- /dev/null +++ b/dll/kernel32/msvc_compat_stubs.cpp @@ -0,0 +1,64 @@ +#include "msvc_compat_stubs.h" +#include "common.h" +#include "context.h" +#include "winnls.h" +#include "synchapi.h" + +namespace kernel32 { + +ULONGLONG WINAPI GetEnabledXStateFeatures() { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: GetEnabledXStateFeatures()\n"); + return 0; +} + +BOOL WINAPI SetThreadPreferredUILanguages(DWORD dwFlags, void *langs, unsigned int *pnum) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: SetThreadPreferredUILanguages()\n"); + (void)dwFlags; (void)langs; if (pnum) *pnum = 0; return TRUE; +} + +BOOL WINAPI GetThreadPreferredUILanguages(DWORD dwFlags, unsigned int *pulNumLanguages, unsigned short *pwszBuf, unsigned int *pcchBuf) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: GetThreadPreferredUILanguages()\n"); + (void)dwFlags; + if (pulNumLanguages) *pulNumLanguages = 0; + if (pcchBuf) { + if (pwszBuf && *pcchBuf >= 2) { pwszBuf[0] = 0; pwszBuf[1] = 0; } + *pcchBuf = 2; + } + return TRUE; +} + +BOOL WINAPI IsValidLocaleName(void *lpLocaleName) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: IsValidLocaleName()\n"); + (void)lpLocaleName; return TRUE; +} + +VOID WINAPI InitializeSRWLock(void **SRWLock) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: InitializeSRWLock()\n"); + if (SRWLock) *SRWLock = nullptr; +} + +DWORD WINAPI WaitForSingleObjectEx(HANDLE hHandle, DWORD dwMilliseconds, int bAlertable) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: WaitForSingleObjectEx()\n"); + (void)bAlertable; return WaitForSingleObject(hHandle, dwMilliseconds); +} + +DWORD WINAPI WaitForMultipleObjectsEx(DWORD nCount, const HANDLE *lpHandles, int bWaitAll, DWORD dwMilliseconds, int bAlertable) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: WaitForMultipleObjectsEx()\n"); + (void)bAlertable; return WaitForMultipleObjects(nCount, lpHandles, bWaitAll, dwMilliseconds); +} + +int WINAPI CompareStringEx(void *lpLocaleName, DWORD dwCmpFlags, LPCWCH lpString1, int cchCount1, LPCWCH lpString2, int cchCount2, void *lpVersionInformation, void *lpReserved, void *lParam) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("STUB: CompareStringEx()\n"); + (void)lpLocaleName; (void)lpVersionInformation; (void)lpReserved; (void)lParam; + return CompareStringW(0x0409, dwCmpFlags, lpString1, cchCount1, lpString2, cchCount2); +} + +} // namespace kernel32 diff --git a/dll/kernel32/msvc_compat_stubs.h b/dll/kernel32/msvc_compat_stubs.h new file mode 100644 index 0000000..7aea0bf --- /dev/null +++ b/dll/kernel32/msvc_compat_stubs.h @@ -0,0 +1,15 @@ +#pragma once +#include "types.h" + +namespace kernel32 { + +ULONGLONG WINAPI GetEnabledXStateFeatures(); +BOOL WINAPI SetThreadPreferredUILanguages(DWORD dwFlags, void *langs, unsigned int *pnum); +BOOL WINAPI GetThreadPreferredUILanguages(DWORD dwFlags, unsigned int *pulNumLanguages, unsigned short *pwszBuf, unsigned int *pcchBuf); +BOOL WINAPI IsValidLocaleName(void *lpLocaleName); +VOID WINAPI InitializeSRWLock(void **SRWLock); +DWORD WINAPI WaitForSingleObjectEx(HANDLE hHandle, DWORD dwMilliseconds, int bAlertable); +DWORD WINAPI WaitForMultipleObjectsEx(DWORD nCount, const HANDLE *lpHandles, int bWaitAll, DWORD dwMilliseconds, int bAlertable); +int WINAPI CompareStringEx(void *lpLocaleName, DWORD dwCmpFlags, LPCWCH lpString1, int cchCount1, LPCWCH lpString2, int cchCount2, void *lpVersionInformation, void *lpReserved, void *lParam); + +} // namespace kernel32 diff --git a/dll/kernel32/winnls.cpp b/dll/kernel32/winnls.cpp index 6d684c0..0ccb54e 100644 --- a/dll/kernel32/winnls.cpp +++ b/dll/kernel32/winnls.cpp @@ -435,6 +435,14 @@ int WINAPI LCMapStringW(LCID Locale, DWORD dwMapFlags, LPCWCH lpSrcStr, int cchS return static_cast(srcLen); } +int WINAPI LCMapStringEx(LPCWSTR lpLocaleName, DWORD dwMapFlags, LPCWCH lpSrcStr, int cchSrc, LPWSTR lpDestStr, + int cchDest, void *lpVersionInformation, void *lpReserved, void *sortHandle) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("LCMapStringEx(%p, 0x%x, %p, %d, %p, %d)\n", lpLocaleName, dwMapFlags, lpSrcStr, cchSrc, lpDestStr, cchDest); + (void)lpLocaleName; (void)lpVersionInformation; (void)lpReserved; (void)sortHandle; + return LCMapStringW(0x0409, dwMapFlags, lpSrcStr, cchSrc, lpDestStr, cchDest); +} + int WINAPI LCMapStringA(LCID Locale, DWORD dwMapFlags, LPCCH lpSrcStr, int cchSrc, LPSTR lpDestStr, int cchDest) { HOST_CONTEXT_GUARD(); DEBUG_LOG("LCMapStringA(%u, 0x%x, %p, %d, %p, %d)\n", Locale, dwMapFlags, lpSrcStr, cchSrc, lpDestStr, cchDest); diff --git a/dll/kernel32/winnls.h b/dll/kernel32/winnls.h index 3700351..5923df3 100644 --- a/dll/kernel32/winnls.h +++ b/dll/kernel32/winnls.h @@ -37,5 +37,7 @@ BOOL WINAPI IsDBCSLeadByte(BYTE TestChar); BOOL WINAPI IsDBCSLeadByteEx(UINT CodePage, BYTE TestChar); int WINAPI LCMapStringW(LCID Locale, DWORD dwMapFlags, LPCWCH lpSrcStr, int cchSrc, LPWSTR lpDestStr, int cchDest); int WINAPI LCMapStringA(LCID Locale, DWORD dwMapFlags, LPCCH lpSrcStr, int cchSrc, LPSTR lpDestStr, int cchDest); +int WINAPI LCMapStringEx(LPCWSTR lpLocaleName, DWORD dwMapFlags, LPCWCH lpSrcStr, int cchSrc, LPWSTR lpDestStr, + int cchDest, void *lpVersionInformation, void *lpReserved, void *sortHandle); } // namespace kernel32 From ecfee83a0bb1a21fdbb7f96ad5b3d5ffc2afa0cb Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:10 -0700 Subject: [PATCH 7/9] ntdll: implement NtCreateFile/NtClose/NtQueryDirectoryFile and Rtl*UnicodeString The MSVC frontend opens source/output files through the NT file API. NtCreateFile maps OBJECT_ATTRIBUTES onto kernel32 CreateFileW (with FILE_FLAG_BACKUP_SEMANTICS for FILE_DIRECTORY_FILE); Rtl*UnicodeString use the process heap; NtQueryDirectoryFile enumerates via std::filesystem. Co-Authored-By: Claude Opus 4.8 (1M context) --- CMakeLists.txt | 3 +- dll/msvc_compat_ntdll.cpp | 261 ++++++++++++++++++++++++++++++++++++++ dll/msvc_compat_ntdll.h | 28 ++++ 3 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 dll/msvc_compat_ntdll.cpp create mode 100644 dll/msvc_compat_ntdll.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d32b803..6982a04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -220,6 +220,7 @@ add_executable(wibo dll/lmgr.cpp dll/mscoree.cpp dll/ntdll.cpp + dll/msvc_compat_ntdll.cpp dll/ole32.cpp dll/rpcrt4.cpp dll/shlwapi.cpp @@ -384,7 +385,7 @@ wibo_codegen_module(NAME vcruntime HEADERS dll/vcruntime.h) wibo_codegen_module(NAME lmgr HEADERS dll/lmgr.h) wibo_codegen_module(NAME ole32 HEADERS dll/ole32.h) wibo_codegen_module(NAME user32 HEADERS dll/user32.h) -wibo_codegen_module(NAME ntdll HEADERS dll/ntdll.h) +wibo_codegen_module(NAME ntdll HEADERS dll/ntdll.h dll/msvc_compat_ntdll.h) wibo_codegen_module(NAME kernel32 HEADERS dll/kernel32/debugapi.h dll/kernel32/errhandlingapi.h diff --git a/dll/msvc_compat_ntdll.cpp b/dll/msvc_compat_ntdll.cpp new file mode 100644 index 0000000..a6b4053 --- /dev/null +++ b/dll/msvc_compat_ntdll.cpp @@ -0,0 +1,261 @@ +#include "msvc_compat_ntdll.h" + +#include "common.h" +#include "context.h" +#include "errors.h" +#include "files.h" +#include "handles.h" +#include "kernel32/fileapi.h" +#include "kernel32/handleapi.h" +#include "kernel32/heapapi.h" +#include "kernel32/internal.h" +#include "strutil.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr NTSTATUS STATUS_SUCCESS_ = 0x00000000; +constexpr NTSTATUS STATUS_NO_MORE_FILES_ = 0x80000006; +constexpr NTSTATUS STATUS_INVALID_HANDLE_ = 0xC0000008; +constexpr NTSTATUS STATUS_INVALID_PARAMETER_ = 0xC000000D; +constexpr NTSTATUS STATUS_BUFFER_OVERFLOW_ = 0x80000005; + +uint64_t unixToFiletime(time_t sec) { + return (static_cast(sec) + 11644473600ULL) * 10000000ULL; +} + +bool wildcardMatchCI(const std::string &name, const std::string &pat) { + // Case-insensitive '*'/'?' match. '<' '>' '"' DOS wildcards not handled. + size_t n = 0, p = 0, star = std::string::npos, mark = 0; + auto low = [](char c) { return (c >= 'A' && c <= 'Z') ? char(c + 32) : c; }; + while (n < name.size()) { + if (p < pat.size() && (pat[p] == '?' || low(pat[p]) == low(name[n]))) { + ++n; ++p; + } else if (p < pat.size() && pat[p] == '*') { + star = p++; mark = n; + } else if (star != std::string::npos) { + p = star + 1; n = ++mark; + } else { + return false; + } + } + while (p < pat.size() && pat[p] == '*') ++p; + return p == pat.size(); +} + +struct DirEnum { + std::vector names; + size_t pos = 0; +}; +std::mutex g_dirMutex; +std::map g_dirEnum; + +void putU32(uint8_t *b, size_t off, uint32_t v) { std::memcpy(b + off, &v, 4); } +void putU64(uint8_t *b, size_t off, uint64_t v) { std::memcpy(b + off, &v, 8); } + +// FileName field offset for the directory-info classes cl is likely to request. +size_t fileNameOffsetForClass(ULONG cls) { + switch (cls) { + case 1: return 64; // FileDirectoryInformation + case 2: return 68; // FileFullDirectoryInformation + case 3: return 94; // FileBothDirectoryInformation + case 12: return 12; // FileNamesInformation + case 37: return 104; // FileIdBothDirectoryInformation + case 38: return 80; // FileIdFullDirectoryInformation + default: return 94; + } +} + +} // namespace + +namespace ntdll { + +BOOLEAN WINAPI RtlCreateUnicodeString(UNICODE_STRING *DestinationString, const WCHAR *SourceString) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("RtlCreateUnicodeString(%p, %p)\n", (void *)DestinationString, (const void *)SourceString); + if (!DestinationString) { + return FALSE; + } + size_t chars = SourceString ? wstrlen(SourceString) : 0; + size_t bytes = (chars + 1) * sizeof(uint16_t); + void *buf = kernel32::HeapAlloc(kernel32::GetProcessHeap(), 0, bytes); + if (!buf) { + return FALSE; + } + if (SourceString) { + std::memcpy(buf, SourceString, bytes); + } else { + *static_cast(buf) = 0; + } + DestinationString->Length = static_cast(chars * sizeof(uint16_t)); + DestinationString->MaximumLength = static_cast(bytes); + DestinationString->Buffer = toGuestPtr(buf); + return TRUE; +} + +VOID WINAPI RtlFreeUnicodeString(UNICODE_STRING *UnicodeString) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("RtlFreeUnicodeString(%p)\n", (void *)UnicodeString); + if (UnicodeString && UnicodeString->Buffer) { + kernel32::HeapFree(kernel32::GetProcessHeap(), 0, fromGuestPtr(UnicodeString->Buffer)); + UnicodeString->Buffer = GUEST_NULL; + UnicodeString->Length = 0; + UnicodeString->MaximumLength = 0; + } +} + +NTSTATUS WINAPI NtClose(HANDLE Handle) { + HOST_CONTEXT_GUARD(); + DEBUG_LOG("NtClose(%p)\n", Handle); + { + std::lock_guard lk(g_dirMutex); + g_dirEnum.erase(Handle); + } + return kernel32::CloseHandle(Handle) ? STATUS_SUCCESS_ : STATUS_INVALID_HANDLE_; +} + +NTSTATUS WINAPI NtCreateFile(HANDLE *FileHandle, ULONG DesiredAccess, OBJECT_ATTRIBUTES *ObjectAttributes, + PIO_STATUS_BLOCK IoStatusBlock, PLARGE_INTEGER AllocationSize, ULONG FileAttributes, + ULONG ShareAccess, ULONG CreateDisposition, ULONG CreateOptions, PVOID EaBuffer, + ULONG EaLength) { + HOST_CONTEXT_GUARD(); + (void)AllocationSize; (void)EaBuffer; (void)EaLength; + if (!FileHandle || !ObjectAttributes || !ObjectAttributes->ObjectName) { + return STATUS_INVALID_PARAMETER_; + } + UNICODE_STRING *name = ObjectAttributes->ObjectName; + const uint16_t *buf = fromGuestPtr(name->Buffer); + size_t n = buf ? name->Length / sizeof(uint16_t) : 0; + if (n >= 4 && buf[0] == '\\' && buf[1] == '?' && buf[2] == '?' && buf[3] == '\\') { + buf += 4; + n -= 4; + } + std::vector wname(buf, buf + n); + wname.push_back(0); + DEBUG_LOG("NtCreateFile('%s' access=0x%x disp=%u share=%u opts=0x%x)\n", + wideStringToString(reinterpret_cast(wname.data())).c_str(), DesiredAccess, CreateDisposition, + ShareAccess, CreateOptions); + + DWORD disp; + switch (CreateDisposition) { + case 0: disp = 2; break; // FILE_SUPERSEDE -> CREATE_ALWAYS + case 1: disp = 3; break; // FILE_OPEN -> OPEN_EXISTING + case 2: disp = 1; break; // FILE_CREATE -> CREATE_NEW + case 3: disp = 4; break; // FILE_OPEN_IF -> OPEN_ALWAYS + case 4: disp = 5; break; // FILE_OVERWRITE -> TRUNCATE_EXISTING + case 5: disp = 2; break; // FILE_OVERWRITE_IF -> CREATE_ALWAYS + default: disp = 3; break; + } + DWORD flags = FileAttributes ? FileAttributes : 0x80; // FILE_ATTRIBUTE_NORMAL + if (CreateOptions & 0x00001000u) flags |= 0x04000000u; // FILE_DELETE_ON_CLOSE + if (CreateOptions & 0x00000001u) flags |= 0x02000000u; // FILE_DIRECTORY_FILE -> BACKUP_SEMANTICS + + HANDLE h = kernel32::CreateFileW(reinterpret_cast(wname.data()), DesiredAccess, ShareAccess, nullptr, disp, + flags, NO_HANDLE); + if (h == INVALID_HANDLE_VALUE) { + DWORD e = kernel32::getLastError(); + NTSTATUS st = (e == ERROR_ACCESS_DENIED) ? 0xC0000022 : 0xC0000034; + if (IoStatusBlock) { IoStatusBlock->Status = st; IoStatusBlock->Information = 0; } + return st; + } + *FileHandle = h; + if (IoStatusBlock) { IoStatusBlock->Status = STATUS_SUCCESS_; IoStatusBlock->Information = 1; } // FILE_OPENED + return STATUS_SUCCESS_; +} + +NTSTATUS WINAPI NtQueryDirectoryFile(HANDLE FileHandle, HANDLE Event, PVOID ApcRoutine, PVOID ApcContext, + PIO_STATUS_BLOCK IoStatusBlock, PVOID FileInformation, ULONG Length, + ULONG FileInformationClass, ULONG ReturnSingleEntry, UNICODE_STRING *FileName, + ULONG RestartScan) { + HOST_CONTEXT_GUARD(); + (void)Event; (void)ApcRoutine; (void)ApcContext; (void)ReturnSingleEntry; + auto fobj = wibo::handles().getAs(FileHandle); + if (!fobj) { + return STATUS_INVALID_HANDLE_; + } + std::string pattern = "*"; + if (FileName && FileName->Buffer && FileName->Length) { + const uint16_t *pb = fromGuestPtr(FileName->Buffer); + std::vector pw(pb, pb + FileName->Length / sizeof(uint16_t)); + pw.push_back(0); + pattern = wideStringToString(reinterpret_cast(pw.data())); + } + DEBUG_LOG("NtQueryDirectoryFile(class=%u pattern='%s' restart=%u dir='%s')\n", FileInformationClass, + pattern.c_str(), RestartScan, fobj->canonicalPath.c_str()); + + std::lock_guard lk(g_dirMutex); + DirEnum &en = g_dirEnum[FileHandle]; + if (RestartScan || (en.names.empty() && en.pos == 0)) { + en.names.clear(); + en.pos = 0; + std::error_code ec; + for (auto &de : std::filesystem::directory_iterator(fobj->canonicalPath, ec)) { + std::string fn = de.path().filename().string(); + if (wildcardMatchCI(fn, pattern)) { + en.names.push_back(fn); + } + } + std::sort(en.names.begin(), en.names.end()); + } + if (en.pos >= en.names.size()) { + if (IoStatusBlock) { IoStatusBlock->Status = STATUS_NO_MORE_FILES_; IoStatusBlock->Information = 0; } + return STATUS_NO_MORE_FILES_; + } + + const std::string &fname = en.names[en.pos]; + std::filesystem::path full = fobj->canonicalPath / fname; + struct stat stbuf{}; + bool haveStat = ::stat(full.c_str(), &stbuf) == 0; + bool isDir = haveStat && S_ISDIR(stbuf.st_mode); + + size_t nameOff = fileNameOffsetForClass(FileInformationClass); + size_t nameBytes = fname.size() * sizeof(uint16_t); + size_t need = nameOff + nameBytes; + if (Length < need) { + if (IoStatusBlock) { IoStatusBlock->Status = STATUS_BUFFER_OVERFLOW_; IoStatusBlock->Information = 0; } + return STATUS_BUFFER_OVERFLOW_; + } + + uint8_t *b = static_cast(FileInformation); + std::memset(b, 0, nameOff); + uint64_t ft = unixToFiletime(haveStat ? stbuf.st_mtime : 0); + uint32_t attrs = isDir ? 0x10u : 0x20u; // DIRECTORY : ARCHIVE + uint64_t size = (haveStat && !isDir) ? static_cast(stbuf.st_size) : 0; + + if (FileInformationClass == 12) { // FileNamesInformation + putU32(b, 0, 0); // NextEntryOffset + putU32(b, 4, 0); // FileIndex + putU32(b, 8, static_cast(nameBytes)); + } else { + putU32(b, 0, 0); // NextEntryOffset (single/last) + putU32(b, 4, 0); // FileIndex + putU64(b, 8, ft); // CreationTime + putU64(b, 16, ft); // LastAccessTime + putU64(b, 24, ft); // LastWriteTime + putU64(b, 32, ft); // ChangeTime + putU64(b, 40, size); // EndOfFile + putU64(b, 48, size); // AllocationSize + putU32(b, 56, attrs); // FileAttributes + putU32(b, 60, static_cast(nameBytes)); // FileNameLength + // EaSize / ShortName / FileId already zeroed by memset for classes != 1. + } + // Write the file name as UTF-16 at the class-specific offset. + uint16_t *namePtr = reinterpret_cast(b + nameOff); + for (size_t i = 0; i < fname.size(); ++i) { + namePtr[i] = static_cast(static_cast(fname[i])); + } + + en.pos++; + if (IoStatusBlock) { IoStatusBlock->Status = STATUS_SUCCESS_; IoStatusBlock->Information = need; } + return STATUS_SUCCESS_; +} + +} // namespace ntdll diff --git a/dll/msvc_compat_ntdll.h b/dll/msvc_compat_ntdll.h new file mode 100644 index 0000000..99a4713 --- /dev/null +++ b/dll/msvc_compat_ntdll.h @@ -0,0 +1,28 @@ +#pragma once +#include "types.h" +#include "ntdll.h" + +typedef struct _OBJECT_ATTRIBUTES { + ULONG Length; + HANDLE RootDirectory; + UNICODE_STRING *ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; + PVOID SecurityQualityOfService; +} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES; + +namespace ntdll { + +BOOLEAN WINAPI RtlCreateUnicodeString(UNICODE_STRING *DestinationString, const WCHAR *SourceString); +VOID WINAPI RtlFreeUnicodeString(UNICODE_STRING *UnicodeString); +NTSTATUS WINAPI NtClose(HANDLE Handle); +NTSTATUS WINAPI NtCreateFile(HANDLE *FileHandle, ULONG DesiredAccess, OBJECT_ATTRIBUTES *ObjectAttributes, + PIO_STATUS_BLOCK IoStatusBlock, PLARGE_INTEGER AllocationSize, ULONG FileAttributes, + ULONG ShareAccess, ULONG CreateDisposition, ULONG CreateOptions, PVOID EaBuffer, + ULONG EaLength); +NTSTATUS WINAPI NtQueryDirectoryFile(HANDLE FileHandle, HANDLE Event, PVOID ApcRoutine, PVOID ApcContext, + PIO_STATUS_BLOCK IoStatusBlock, PVOID FileInformation, ULONG Length, + ULONG FileInformationClass, ULONG ReturnSingleEntry, UNICODE_STRING *FileName, + ULONG RestartScan); + +} // namespace ntdll From 1e785de18eb9239efee38ddf0281d87e44f7cbcb Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:10 -0700 Subject: [PATCH 8/9] tls: allocate static TLS for threads created after TLS modules load General threading fix (not MSVC-specific). Two defects left __declspec(thread) data broken on guest-created threads (NULL ThreadLocalStoragePointer), crashing MSVC c2.dll's parallel-codegen workers: 1. initializeTib() set up the module-TLS array via ensureModuleArrayCapacityLocked(g_moduleArrayCapacity), which early- returns when required <= current capacity, so a thread created after the TLS-bearing DLLs loaded got no array. Allocate it directly for the new TIB. 2. notifyDllThreadAttach() only allocated static TLS for modules passing shouldDeliverThreadNotifications(); a DLL that calls DisableThreadLibraryCalls (c2.dll) is excluded, yet Windows still allocates its static TLS. Allocate static TLS for every hasTls module. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/modules.cpp | 9 +++++++++ src/tls.cpp | 7 +++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/modules.cpp b/src/modules.cpp index 5c930d2..0eddd65 100644 --- a/src/modules.cpp +++ b/src/modules.cpp @@ -1089,6 +1089,15 @@ void releaseModuleTls(ModuleInfo &module) { void notifyDllThreadAttach() { auto reg = registry(); + // msvc-compat: static TLS must exist for EVERY module that has it, even ones + // that disabled thread-library-calls (that flag suppresses DllMain calls, + // not static __declspec(thread) TLS allocation). + for (auto &pair : reg->modulesByKey) { + wibo::ModuleInfo *m = pair.second.get(); + if (m && m->tlsInfo.hasTls) { + allocateModuleTlsForThread(*m, currentThreadTeb); + } + } std::vector targets; targets.reserve(reg->modulesByKey.size()); for (auto &pair : reg->modulesByKey) { diff --git a/src/tls.cpp b/src/tls.cpp index 7322de3..9cfc6cf 100644 --- a/src/tls.cpp +++ b/src/tls.cpp @@ -248,8 +248,11 @@ void initializeTib(TEB *tib) { setExpansionArray(tib, arr); } } - if (g_moduleArrayCapacity > 0) { - if (!ensureModuleArrayCapacityLocked(g_moduleArrayCapacity)) { + if (g_moduleArrayCapacity > 0 && !getModuleArray(tib)) { // msvc-compat: per-new-tib module array + if (auto *arr = allocateTlsArray(g_moduleArrayCapacity)) { + g_moduleArrays[tib] = arr; + tib->ThreadLocalStoragePointer = toGuestPtr(arr->slots); + } else { DEBUG_LOG("initializeTib: failed to allocate module TLS array for %p\n", tib); } } From a0614de7b93e221f2a3ed76bd055a5fe2d1adb9b Mon Sep 17 00:00:00 2001 From: Jeff McJunkin Date: Fri, 12 Jun 2026 14:12:10 -0700 Subject: [PATCH 9/9] ntdll: report OS version 6.1 (Windows 7) for VS2013 c1.dll source-open path VS2013 c1.dll selects its source-file resolution strategy from the OS version reported by RtlGetVersion. 6.2 (Windows 8) steers it into a directory-canonicalization path that dead-ends on wibo's Z:-mapped volumes (C1083 without ever opening the source); 6.1 makes it use the direct CreateFileW(source) path. Windows 7 (6.1, build 7601) was VS2013's contemporary host. Co-Authored-By: Claude Opus 4.8 (1M context) --- dll/ntdll.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dll/ntdll.cpp b/dll/ntdll.cpp index 17309d4..7516ce7 100644 --- a/dll/ntdll.cpp +++ b/dll/ntdll.cpp @@ -55,8 +55,8 @@ struct RTL_OSVERSIONINFOEXW : RTL_OSVERSIONINFOW { using PRTL_OSVERSIONINFOEXW = RTL_OSVERSIONINFOEXW *; constexpr ULONG kOsMajorVersion = 6; -constexpr ULONG kOsMinorVersion = 2; -constexpr ULONG kOsBuildNumber = 0; +constexpr ULONG kOsMinorVersion = 1; // msvc-compat: Win7 (6.1) -> VS2013 c1 uses direct source-open path +constexpr ULONG kOsBuildNumber = 7601; // Windows 7 SP1 constexpr ULONG kOsPlatformId = 2; // VER_PLATFORM_WIN32_NT constexpr BYTE kProductTypeWorkstation = 1; // VER_NT_WORKSTATION