Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions dll/kernel32/fibersapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,21 @@
#include "errors.h"
#include "internal.h"

#include <mutex>

namespace {

// FLS without fibers is THREAD-local storage on
// Windows (every thread is exactly one fiber); only the index allocation map
// is process-wide. The previous process-global value array made all threads
// share one cell per index, which clobbers msvcr120's per-thread data (_ptd) --
// including the _beginthreadex entry/argument that _threadstartex re-reads
// through FlsGetValue -- so concurrently-starting threads could duplicate or
// swap their start arguments.
constexpr DWORD kMaxFlsValues = 0x100;
std::mutex g_flsMutex;
bool g_flsValuesUsed[kMaxFlsValues] = {false};
LPVOID g_flsValues[kMaxFlsValues] = {nullptr};
thread_local LPVOID t_flsValues[kMaxFlsValues] = {nullptr};

} // namespace

Expand All @@ -19,10 +29,11 @@ DWORD WINAPI FlsAlloc(PFLS_CALLBACK_FUNCTION lpCallback) {
HOST_CONTEXT_GUARD();
DEBUG_LOG("FlsAlloc(%p)", lpCallback);
// If the function succeeds, the return value is an FLS index initialized to zero.
std::lock_guard lk(g_flsMutex);
for (DWORD i = 0; i < kMaxFlsValues; i++) {
if (g_flsValuesUsed[i] == false) {
g_flsValuesUsed[i] = true;
g_flsValues[i] = nullptr;
t_flsValues[i] = nullptr;
DEBUG_LOG(" -> %d\n", i);
return i;
}
Expand All @@ -35,6 +46,7 @@ DWORD WINAPI FlsAlloc(PFLS_CALLBACK_FUNCTION lpCallback) {
BOOL WINAPI FlsFree(DWORD dwFlsIndex) {
HOST_CONTEXT_GUARD();
DEBUG_LOG("FlsFree(%u)\n", dwFlsIndex);
std::lock_guard lk(g_flsMutex);
if (dwFlsIndex < kMaxFlsValues && g_flsValuesUsed[dwFlsIndex]) {
g_flsValuesUsed[dwFlsIndex] = false;
return TRUE;
Expand All @@ -49,7 +61,7 @@ PVOID WINAPI FlsGetValue(DWORD dwFlsIndex) {
VERBOSE_LOG("FlsGetValue(%u)\n", dwFlsIndex);
PVOID result = nullptr;
if (dwFlsIndex < kMaxFlsValues && g_flsValuesUsed[dwFlsIndex]) {
result = g_flsValues[dwFlsIndex];
result = t_flsValues[dwFlsIndex];
// See https://learn.microsoft.com/en-us/windows/win32/api/fibersapi/nf-fibersapi-flsgetvalue
setLastError(ERROR_SUCCESS);
} else {
Expand All @@ -63,7 +75,7 @@ BOOL WINAPI FlsSetValue(DWORD dwFlsIndex, PVOID lpFlsData) {
HOST_CONTEXT_GUARD();
VERBOSE_LOG("FlsSetValue(%u, %p)\n", dwFlsIndex, lpFlsData);
if (dwFlsIndex < kMaxFlsValues && g_flsValuesUsed[dwFlsIndex]) {
g_flsValues[dwFlsIndex] = lpFlsData;
t_flsValues[dwFlsIndex] = lpFlsData;
return TRUE;
} else {
setLastError(ERROR_INVALID_PARAMETER);
Expand Down
44 changes: 26 additions & 18 deletions dll/kernel32/synchapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,18 +455,29 @@ inline void setOwningThread(LPCRITICAL_SECTION crit, DWORD threadId) {
}

void waitForCriticalSection(LPCRITICAL_SECTION cs) {
auto *sequence = reinterpret_cast<LONG volatile *>(&cs->LockSemaphore);
LONG observed = __atomic_load_n(sequence, __ATOMIC_ACQUIRE);
while (owningThreadId(cs) != 0) {
kernel32::WaitOnAddress(sequence, &observed, sizeof(observed), INFINITE);
observed = __atomic_load_n(sequence, __ATOMIC_ACQUIRE);
// Ticket handoff: Windows hands a contended section to exactly one
// waiter per Leave through a counted semaphore (LockSemaphore). Model the
// semaphore as a ticket count on the LockSemaphore word: Leave posts one
// ticket, each blocked Enter consumes one. The woken waiter owns the
// section by construction (no claim race on OwningThread).
auto *tickets = reinterpret_cast<LONG volatile *>(&cs->LockSemaphore);
for (;;) {
LONG available = __atomic_load_n(tickets, __ATOMIC_ACQUIRE);
if (available > 0) {
if (__atomic_compare_exchange_n(tickets, &available, available - 1, false, __ATOMIC_ACQ_REL,
__ATOMIC_ACQUIRE)) {
return;
}
continue;
}
kernel32::WaitOnAddress(tickets, &available, sizeof(available), INFINITE);
}
}

void signalCriticalSection(LPCRITICAL_SECTION cs) {
auto *sequence = reinterpret_cast<LONG *>(&cs->LockSemaphore);
kernel32::InterlockedIncrement(const_cast<LONG volatile *>(sequence));
kernel32::WakeByAddressSingle(sequence);
auto *tickets = reinterpret_cast<LONG *>(&cs->LockSemaphore);
kernel32::InterlockedIncrement(const_cast<LONG volatile *>(tickets));
kernel32::WakeByAddressSingle(tickets);
}

inline bool trySpinAcquireCriticalSection(LPCRITICAL_SECTION cs, DWORD threadId) {
Expand Down Expand Up @@ -1086,7 +1097,7 @@ void WINAPI EnterCriticalSection(LPCRITICAL_SECTION lpCriticalSection) {
lpCriticalSection->RecursionCount++;
return;
}
waitForCriticalSection(lpCriticalSection);
waitForCriticalSection(lpCriticalSection); // ticket handoff: we own the section now
}
setOwningThread(lpCriticalSection, threadId);
lpCriticalSection->RecursionCount = 1;
Expand All @@ -1100,16 +1111,13 @@ void WINAPI LeaveCriticalSection(LPCRITICAL_SECTION lpCriticalSection) {
return;
}

const DWORD threadId = GetCurrentThreadId();
if (owningThreadId(lpCriticalSection) != threadId || lpCriticalSection->RecursionCount <= 0) {
DEBUG_LOG("LeaveCriticalSection: thread %u does not own %p (owner=%u, recursion=%ld)\n", threadId,
lpCriticalSection, owningThreadId(lpCriticalSection),
static_cast<long>(lpCriticalSection->RecursionCount));
return;
}

// Windows LeaveCriticalSection performs NO
// caller/ownership validation; it unconditionally decrements
// RecursionCount/LockCount and releases one waiter. Guest patterns that
// rely on that (cross-thread leave, reinitialized sections) must behave
// identically here.
auto *lockCount = const_cast<LONG volatile *>(&lpCriticalSection->LockCount);
if (--lpCriticalSection->RecursionCount > 0) {
if (--lpCriticalSection->RecursionCount != 0) {
kernel32::InterlockedDecrement(lockCount);
return;
}
Expand Down
Loading