Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: '1.25.x'
go-version: '1.26.x'
cache: true

- name: Verify formatting
Expand Down Expand Up @@ -51,7 +51,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: '1.25.x'
go-version: '1.26.x'
cache: true

- name: Test with coverage (exclude internal/*)
Expand Down Expand Up @@ -118,7 +118,7 @@ jobs:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: '1.25.x'
go-version: '1.26.x'
cache: true

- name: Cross compile (packages + tests, no execution)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module code.hybscloud.com/spin

go 1.25
go 1.26
48 changes: 40 additions & 8 deletions lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,44 @@ import (
// Lock is a minimal, non-fair spin lock intended for very short
// critical sections on hot paths. It avoids allocations and OS mutex
// overhead but should not be used as a general-purpose lock.
//
// Acquisition uses FAA (Fetch-And-Add) with a TTAS (test-and-test-and-set)
// slow path. FAA completes in a single atomic instruction (LOCK XADD on
// x86, LDADDAL on arm64) — under contention, CAS produces O(n²) cache-line
// invalidations while FAA keeps traffic at O(n). The TTAS slow path spins
// on read-only Load (MESI Shared) to avoid bouncing the cache line, and
// only attempts FAA when the lock appears free.
type Lock struct {
_ noCopy
n atomic.Uintptr
}

// Lock acquires the lock, spinning with an adaptive backoff.
// Lock acquires the lock.
// Fast path: single FAA. Slow path: TTAS (test-and-test-and-set) —
// spin on read-only Load to keep the cache line in MESI Shared state,
// then attempt FAA only when the lock appears free.
func (sl *Lock) Lock() {
for {
n := sl.n.Add(1)
if n == 1 {
return
} else if n < 4 {
if sl.n.Add(1) == 1 {
return
}
sl.lockSlow()
}

func (sl *Lock) lockSlow() {
for i := 0; ; i++ {
// Spin on Load (read-only): multiple cores hold the line in
// Shared state with zero cross-core invalidations. Only
// transition to Add (write) when the lock appears released.
if sl.n.Load() == 0 {
if sl.n.Add(1) == 1 {
return
}
}
if i >= 4 {
runtime.Gosched()
Comment thread
hayabusa-cloud marked this conversation as resolved.
} else {
Pause(defaultPauseCycles)
continue
}
runtime.Gosched()
}
}

Expand All @@ -38,6 +60,16 @@ func (sl *Lock) Unlock() {

// Try attempts to acquire the lock without blocking.
// It returns true if the lock was acquired.
//
// Uses FAA rather than CAS: on x86 LOCK CMPXCHG takes exclusive
// cache-line ownership regardless of comparison outcome, so a failed
// CAS costs the same coherence traffic as FAA. FAA keeps the atomic
// pattern consistent across the Lock API.
//
// A failed Try increments n without a corresponding decrement.
// This is intentional: Unlock resets n to zero via Store(0),
// clearing all accumulated increments. The uintptr counter space
// is large enough that overflow is not a practical concern.
func (sl *Lock) Try() bool {
return sl.n.Add(1) == 1
}
2 changes: 1 addition & 1 deletion pause_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import (
)

func TestPause(t *testing.T) {
// Default (20 cycles)
// Default (30 cycles)
spin.Pause()

// Single cycle
Expand Down
10 changes: 9 additions & 1 deletion yield.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ import (
"time"
)

var yieldDuration = 250 * time.Microsecond
var (
yieldDuration = 250 * time.Microsecond
yieldMu Lock // protects yieldDuration
Comment thread
hayabusa-cloud marked this conversation as resolved.
)

// Yield cooperatively yields execution to reduce CPU burn in tight loops.
//
Expand All @@ -22,7 +25,9 @@ var yieldDuration = 250 * time.Microsecond
//
// For automatic adaptive backoff in tight loops, use Wait instead.
func Yield(duration ...time.Duration) {
yieldMu.Lock()
d := yieldDuration
yieldMu.Unlock()
if len(duration) > 0 {
d = max(0, duration[0])
}
Expand All @@ -34,10 +39,13 @@ func Yield(duration ...time.Duration) {
}

// SetYieldDuration sets the base duration unit for Yield().
// Safe for concurrent use.
// Recommended: 50-250 microseconds for real-time systems, 1-4 ms for general workloads.
func SetYieldDuration(d time.Duration) {
if d < 0 {
d = 0
}
yieldMu.Lock()
yieldDuration = d
yieldMu.Unlock()
}
Loading