diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed4045e..fdb5b12 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,7 +23,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: '1.25.x' + go-version: '1.26.x' cache: true - name: Verify formatting @@ -51,7 +51,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: '1.25.x' + go-version: '1.26.x' cache: true - name: Test with coverage (exclude internal/*) @@ -118,7 +118,7 @@ jobs: - name: Setup Go uses: actions/setup-go@v5 with: - go-version: '1.25.x' + go-version: '1.26.x' cache: true - name: Cross compile (packages + tests, no execution) diff --git a/go.mod b/go.mod index 5279650..d2380e3 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module code.hybscloud.com/spin -go 1.25 +go 1.26 diff --git a/lock.go b/lock.go index da59319..cc33fd0 100644 --- a/lock.go +++ b/lock.go @@ -12,22 +12,44 @@ import ( // Lock is a minimal, non-fair spin lock intended for very short // critical sections on hot paths. It avoids allocations and OS mutex // overhead but should not be used as a general-purpose lock. +// +// Acquisition uses FAA (Fetch-And-Add) with a TTAS (test-and-test-and-set) +// slow path. FAA completes in a single atomic instruction (LOCK XADD on +// x86, LDADDAL on arm64) — under contention, CAS produces O(n²) cache-line +// invalidations while FAA keeps traffic at O(n). The TTAS slow path spins +// on read-only Load (MESI Shared) to avoid bouncing the cache line, and +// only attempts FAA when the lock appears free. type Lock struct { _ noCopy n atomic.Uintptr } -// Lock acquires the lock, spinning with an adaptive backoff. +// Lock acquires the lock. +// Fast path: single FAA. Slow path: TTAS (test-and-test-and-set) — +// spin on read-only Load to keep the cache line in MESI Shared state, +// then attempt FAA only when the lock appears free. func (sl *Lock) Lock() { - for { - n := sl.n.Add(1) - if n == 1 { - return - } else if n < 4 { + if sl.n.Add(1) == 1 { + return + } + sl.lockSlow() +} + +func (sl *Lock) lockSlow() { + for i := 0; ; i++ { + // Spin on Load (read-only): multiple cores hold the line in + // Shared state with zero cross-core invalidations. Only + // transition to Add (write) when the lock appears released. + if sl.n.Load() == 0 { + if sl.n.Add(1) == 1 { + return + } + } + if i >= 4 { + runtime.Gosched() + } else { Pause(defaultPauseCycles) - continue } - runtime.Gosched() } } @@ -38,6 +60,16 @@ func (sl *Lock) Unlock() { // Try attempts to acquire the lock without blocking. // It returns true if the lock was acquired. +// +// Uses FAA rather than CAS: on x86 LOCK CMPXCHG takes exclusive +// cache-line ownership regardless of comparison outcome, so a failed +// CAS costs the same coherence traffic as FAA. FAA keeps the atomic +// pattern consistent across the Lock API. +// +// A failed Try increments n without a corresponding decrement. +// This is intentional: Unlock resets n to zero via Store(0), +// clearing all accumulated increments. The uintptr counter space +// is large enough that overflow is not a practical concern. func (sl *Lock) Try() bool { return sl.n.Add(1) == 1 } diff --git a/pause_test.go b/pause_test.go index 818d359..815a087 100644 --- a/pause_test.go +++ b/pause_test.go @@ -15,7 +15,7 @@ import ( ) func TestPause(t *testing.T) { - // Default (20 cycles) + // Default (30 cycles) spin.Pause() // Single cycle diff --git a/yield.go b/yield.go index f8ef4d5..0017e52 100644 --- a/yield.go +++ b/yield.go @@ -9,7 +9,10 @@ import ( "time" ) -var yieldDuration = 250 * time.Microsecond +var ( + yieldDuration = 250 * time.Microsecond + yieldMu Lock // protects yieldDuration +) // Yield cooperatively yields execution to reduce CPU burn in tight loops. // @@ -22,7 +25,9 @@ var yieldDuration = 250 * time.Microsecond // // For automatic adaptive backoff in tight loops, use Wait instead. func Yield(duration ...time.Duration) { + yieldMu.Lock() d := yieldDuration + yieldMu.Unlock() if len(duration) > 0 { d = max(0, duration[0]) } @@ -34,10 +39,13 @@ func Yield(duration ...time.Duration) { } // SetYieldDuration sets the base duration unit for Yield(). +// Safe for concurrent use. // Recommended: 50-250 microseconds for real-time systems, 1-4 ms for general workloads. func SetYieldDuration(d time.Duration) { if d < 0 { d = 0 } + yieldMu.Lock() yieldDuration = d + yieldMu.Unlock() }