From b1824ece8df908c26152790f794d2d1558bc3f52 Mon Sep 17 00:00:00 2001 From: Tomas Valenta Date: Fri, 29 May 2026 14:29:10 -0700 Subject: [PATCH 01/10] fix(uffd): dedupe deferred page faults --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 19 +++++++++++---- .../sandbox/uffd/userfaultfd/deferred_test.go | 24 +++++++++++++++++++ .../sandbox/uffd/userfaultfd/userfaultfd.go | 5 ++-- 3 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index 70d85c03c5..c25198419e 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -7,20 +7,31 @@ import "sync" // deferredFaults collects pagefaults that returned EAGAIN so they get // retried on the next poll iteration. Safe for concurrent push. type deferredFaults struct { - mu sync.Mutex - pf []*UffdPagefault + mu sync.Mutex + pf []*UffdPagefault + byAddr map[uint64]struct{} } -func (d *deferredFaults) push(pf *UffdPagefault) { +func (d *deferredFaults) push(pf *UffdPagefault) bool { d.mu.Lock() + defer d.mu.Unlock() + if d.byAddr == nil { + d.byAddr = make(map[uint64]struct{}) + } + if _, ok := d.byAddr[pf.address]; ok { + return false + } + d.byAddr[pf.address] = struct{}{} d.pf = append(d.pf, pf) - d.mu.Unlock() + + return true } func (d *deferredFaults) drain() []*UffdPagefault { d.mu.Lock() out := d.pf d.pf = nil + clear(d.byAddr) d.mu.Unlock() return out diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go new file mode 100644 index 0000000000..741853e864 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -0,0 +1,24 @@ +//go:build linux + +package userfaultfd + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDeferredFaultsDedupesByAddress(t *testing.T) { + t.Parallel() + + var d deferredFaults + require.True(t, d.push(&UffdPagefault{address: 42})) + require.False(t, d.push(&UffdPagefault{address: 42})) + require.True(t, d.push(&UffdPagefault{address: 43})) + + require.Len(t, d.drain(), 2) + require.Empty(t, d.drain()) + + require.True(t, d.push(&UffdPagefault{address: 42})) + require.Len(t, d.drain(), 1) +} diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 096a53c571..7bac7f85ad 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -461,8 +461,9 @@ func (u *Userfaultfd) Serve( } u.prefetchTracker.Add(offset, accessType) case faultDeferred: - deferred.push(pf) - u.signalWakeup() + if deferred.push(pf) { + u.signalWakeup() + } case faultDiscarded: // No install happened (ESRCH); retry would be pointless. } From db70cc91808b358d97c3af3302cd2134e2a6ee85 Mon Sep 17 00:00:00 2001 From: Tomas Valenta Date: Fri, 29 May 2026 15:05:23 -0700 Subject: [PATCH 02/10] fix(uffd): cast deferred fault addresses --- .../orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index c25198419e..d47a3efb87 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -18,10 +18,11 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { if d.byAddr == nil { d.byAddr = make(map[uint64]struct{}) } - if _, ok := d.byAddr[pf.address]; ok { + addr := uint64(pf.address) + if _, ok := d.byAddr[addr]; ok { return false } - d.byAddr[pf.address] = struct{}{} + d.byAddr[addr] = struct{}{} d.pf = append(d.pf, pf) return true From 306359e1ab71b1913bea61ed880d488d80de467a Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 30 May 2026 19:50:29 -0700 Subject: [PATCH 03/10] fix(uffd): upgrade deduped deferred fault to write --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 15 +++++++++++---- .../pkg/sandbox/uffd/userfaultfd/deferred_test.go | 12 ++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index d47a3efb87..69b835d614 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -9,20 +9,27 @@ import "sync" type deferredFaults struct { mu sync.Mutex pf []*UffdPagefault - byAddr map[uint64]struct{} + byAddr map[uint64]*UffdPagefault } +// push queues a deferred fault, deduping by address. If the same page is +// faulted as both read and write, the retained fault is upgraded to write so +// the retry installs it dirty instead of leaving a later WP fault to catch it. func (d *deferredFaults) push(pf *UffdPagefault) bool { d.mu.Lock() defer d.mu.Unlock() if d.byAddr == nil { - d.byAddr = make(map[uint64]struct{}) + d.byAddr = make(map[uint64]*UffdPagefault) } addr := uint64(pf.address) - if _, ok := d.byAddr[addr]; ok { + if existing, ok := d.byAddr[addr]; ok { + if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { + existing.flags |= UFFD_PAGEFAULT_FLAG_WRITE + } + return false } - d.byAddr[addr] = struct{}{} + d.byAddr[addr] = pf d.pf = append(d.pf, pf) return true diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go index 741853e864..b6e82af1b5 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -22,3 +22,15 @@ func TestDeferredFaultsDedupesByAddress(t *testing.T) { require.True(t, d.push(&UffdPagefault{address: 42})) require.Len(t, d.drain(), 1) } + +func TestDeferredFaultsUpgradesReadToWrite(t *testing.T) { + t.Parallel() + + var d deferredFaults + require.True(t, d.push(&UffdPagefault{address: 42})) + require.False(t, d.push(&UffdPagefault{address: 42, flags: UFFD_PAGEFAULT_FLAG_WRITE})) + + out := d.drain() + require.Len(t, out, 1) + require.NotZero(t, out[0].flags&UFFD_PAGEFAULT_FLAG_WRITE) +} From e45c22545647d59eaa381a94770c961b63793c39 Mon Sep 17 00:00:00 2001 From: Tomas Valenta Date: Sat, 30 May 2026 20:02:14 -0700 Subject: [PATCH 04/10] fix(uffd): release deferred fault address map on drain Drop the dedupe map after draining so one burst of unique addresses does not retain map capacity for the sandbox lifetime. --- packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index 69b835d614..ec21e43122 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -39,7 +39,7 @@ func (d *deferredFaults) drain() []*UffdPagefault { d.mu.Lock() out := d.pf d.pf = nil - clear(d.byAddr) + d.byAddr = nil d.mu.Unlock() return out From dd691d500204f7f7d35291e8ab981515b6dd88e7 Mon Sep 17 00:00:00 2001 From: Tomas Valenta Date: Sat, 30 May 2026 20:07:41 -0700 Subject: [PATCH 05/10] fix(uffd): dedupe deferred faults by page Key deferred page faults by page boundary so same-page offsets coalesce before retry. --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 17 +++++++++++++---- .../sandbox/uffd/userfaultfd/deferred_test.go | 8 ++++---- .../pkg/sandbox/uffd/userfaultfd/userfaultfd.go | 2 +- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index ec21e43122..8d9fcd3989 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -7,9 +7,10 @@ import "sync" // deferredFaults collects pagefaults that returned EAGAIN so they get // retried on the next poll iteration. Safe for concurrent push. type deferredFaults struct { - mu sync.Mutex - pf []*UffdPagefault - byAddr map[uint64]*UffdPagefault + mu sync.Mutex + pf []*UffdPagefault + byAddr map[uint64]*UffdPagefault + pageSize uintptr } // push queues a deferred fault, deduping by address. If the same page is @@ -21,7 +22,7 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { if d.byAddr == nil { d.byAddr = make(map[uint64]*UffdPagefault) } - addr := uint64(pf.address) + addr := d.key(pf.address) if existing, ok := d.byAddr[addr]; ok { if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { existing.flags |= UFFD_PAGEFAULT_FLAG_WRITE @@ -35,6 +36,14 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { return true } +func (d *deferredFaults) key(addr uintptr) uint64 { + if d.pageSize == 0 { + return uint64(addr) + } + + return uint64(addr & ^(d.pageSize - 1)) +} + func (d *deferredFaults) drain() []*UffdPagefault { d.mu.Lock() out := d.pf diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go index b6e82af1b5..14160649e4 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -8,13 +8,13 @@ import ( "github.com/stretchr/testify/require" ) -func TestDeferredFaultsDedupesByAddress(t *testing.T) { +func TestDeferredFaultsDedupesByPage(t *testing.T) { t.Parallel() - var d deferredFaults + d := deferredFaults{pageSize: 4096} require.True(t, d.push(&UffdPagefault{address: 42})) - require.False(t, d.push(&UffdPagefault{address: 42})) - require.True(t, d.push(&UffdPagefault{address: 43})) + require.False(t, d.push(&UffdPagefault{address: 43})) + require.True(t, d.push(&UffdPagefault{address: 4096})) require.Len(t, d.drain(), 2) require.Empty(t, d.drain()) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 7bac7f85ad..6c2d8f4dfe 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -261,7 +261,7 @@ func (u *Userfaultfd) Serve( unix.POLLNVAL: "POLLNVAL", } - var deferred deferredFaults + deferred := deferredFaults{pageSize: u.pageSize} for { if _, err := unix.Poll( From 3f35cda5f988c918c3bd53b23808095c4d8b18ea Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 30 May 2026 20:43:31 -0700 Subject: [PATCH 06/10] fix(uffd): dedupe deferred faults by address --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 17 ++++------------- .../sandbox/uffd/userfaultfd/deferred_test.go | 8 ++++---- .../pkg/sandbox/uffd/userfaultfd/userfaultfd.go | 2 +- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index 8d9fcd3989..ec21e43122 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -7,10 +7,9 @@ import "sync" // deferredFaults collects pagefaults that returned EAGAIN so they get // retried on the next poll iteration. Safe for concurrent push. type deferredFaults struct { - mu sync.Mutex - pf []*UffdPagefault - byAddr map[uint64]*UffdPagefault - pageSize uintptr + mu sync.Mutex + pf []*UffdPagefault + byAddr map[uint64]*UffdPagefault } // push queues a deferred fault, deduping by address. If the same page is @@ -22,7 +21,7 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { if d.byAddr == nil { d.byAddr = make(map[uint64]*UffdPagefault) } - addr := d.key(pf.address) + addr := uint64(pf.address) if existing, ok := d.byAddr[addr]; ok { if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { existing.flags |= UFFD_PAGEFAULT_FLAG_WRITE @@ -36,14 +35,6 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { return true } -func (d *deferredFaults) key(addr uintptr) uint64 { - if d.pageSize == 0 { - return uint64(addr) - } - - return uint64(addr & ^(d.pageSize - 1)) -} - func (d *deferredFaults) drain() []*UffdPagefault { d.mu.Lock() out := d.pf diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go index 14160649e4..b6e82af1b5 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -8,13 +8,13 @@ import ( "github.com/stretchr/testify/require" ) -func TestDeferredFaultsDedupesByPage(t *testing.T) { +func TestDeferredFaultsDedupesByAddress(t *testing.T) { t.Parallel() - d := deferredFaults{pageSize: 4096} + var d deferredFaults require.True(t, d.push(&UffdPagefault{address: 42})) - require.False(t, d.push(&UffdPagefault{address: 43})) - require.True(t, d.push(&UffdPagefault{address: 4096})) + require.False(t, d.push(&UffdPagefault{address: 42})) + require.True(t, d.push(&UffdPagefault{address: 43})) require.Len(t, d.drain(), 2) require.Empty(t, d.drain()) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 6c2d8f4dfe..7bac7f85ad 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -261,7 +261,7 @@ func (u *Userfaultfd) Serve( unix.POLLNVAL: "POLLNVAL", } - deferred := deferredFaults{pageSize: u.pageSize} + var deferred deferredFaults for { if _, err := unix.Poll( From 2e11f6e65720b3f18ceac65a442e79a87fe485d3 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 30 May 2026 20:51:56 -0700 Subject: [PATCH 07/10] fix(uffd): minimize deferred fault dedup to address-only --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 23 +++++++------------ .../sandbox/uffd/userfaultfd/deferred_test.go | 20 ++++------------ .../sandbox/uffd/userfaultfd/userfaultfd.go | 5 ++-- 3 files changed, 14 insertions(+), 34 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index ec21e43122..a88026f5cc 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -9,30 +9,23 @@ import "sync" type deferredFaults struct { mu sync.Mutex pf []*UffdPagefault - byAddr map[uint64]*UffdPagefault + byAddr map[uint64]struct{} } -// push queues a deferred fault, deduping by address. If the same page is -// faulted as both read and write, the retained fault is upgraded to write so -// the retry installs it dirty instead of leaving a later WP fault to catch it. -func (d *deferredFaults) push(pf *UffdPagefault) bool { +// push queues a deferred fault, skipping addresses already queued so a page +// faulted by several threads is retried once instead of once per fault. +func (d *deferredFaults) push(pf *UffdPagefault) { d.mu.Lock() defer d.mu.Unlock() if d.byAddr == nil { - d.byAddr = make(map[uint64]*UffdPagefault) + d.byAddr = make(map[uint64]struct{}) } addr := uint64(pf.address) - if existing, ok := d.byAddr[addr]; ok { - if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { - existing.flags |= UFFD_PAGEFAULT_FLAG_WRITE - } - - return false + if _, ok := d.byAddr[addr]; ok { + return } - d.byAddr[addr] = pf + d.byAddr[addr] = struct{}{} d.pf = append(d.pf, pf) - - return true } func (d *deferredFaults) drain() []*UffdPagefault { diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go index b6e82af1b5..87ec8f81a2 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -12,25 +12,13 @@ func TestDeferredFaultsDedupesByAddress(t *testing.T) { t.Parallel() var d deferredFaults - require.True(t, d.push(&UffdPagefault{address: 42})) - require.False(t, d.push(&UffdPagefault{address: 42})) - require.True(t, d.push(&UffdPagefault{address: 43})) + d.push(&UffdPagefault{address: 42}) + d.push(&UffdPagefault{address: 42}) + d.push(&UffdPagefault{address: 43}) require.Len(t, d.drain(), 2) require.Empty(t, d.drain()) - require.True(t, d.push(&UffdPagefault{address: 42})) + d.push(&UffdPagefault{address: 42}) require.Len(t, d.drain(), 1) } - -func TestDeferredFaultsUpgradesReadToWrite(t *testing.T) { - t.Parallel() - - var d deferredFaults - require.True(t, d.push(&UffdPagefault{address: 42})) - require.False(t, d.push(&UffdPagefault{address: 42, flags: UFFD_PAGEFAULT_FLAG_WRITE})) - - out := d.drain() - require.Len(t, out, 1) - require.NotZero(t, out[0].flags&UFFD_PAGEFAULT_FLAG_WRITE) -} diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 7bac7f85ad..096a53c571 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -461,9 +461,8 @@ func (u *Userfaultfd) Serve( } u.prefetchTracker.Add(offset, accessType) case faultDeferred: - if deferred.push(pf) { - u.signalWakeup() - } + deferred.push(pf) + u.signalWakeup() case faultDiscarded: // No install happened (ESRCH); retry would be pointless. } From 390e586b2a41285487c2b74e9afe1a84ceefed10 Mon Sep 17 00:00:00 2001 From: Tomas Valenta Date: Sat, 30 May 2026 20:52:10 -0700 Subject: [PATCH 08/10] fix(uffd): cast deferred fault address before keying Convert the cgo pagefault address to uint64 before page-boundary dedupe. --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index 8d9fcd3989..007f333885 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -22,7 +22,7 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { if d.byAddr == nil { d.byAddr = make(map[uint64]*UffdPagefault) } - addr := d.key(pf.address) + addr := d.key(uint64(pf.address)) if existing, ok := d.byAddr[addr]; ok { if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { existing.flags |= UFFD_PAGEFAULT_FLAG_WRITE @@ -36,12 +36,14 @@ func (d *deferredFaults) push(pf *UffdPagefault) bool { return true } -func (d *deferredFaults) key(addr uintptr) uint64 { +func (d *deferredFaults) key(addr uint64) uint64 { if d.pageSize == 0 { - return uint64(addr) + return addr } - return uint64(addr & ^(d.pageSize - 1)) + pageSize := uint64(d.pageSize) + + return addr & ^(pageSize - 1) } func (d *deferredFaults) drain() []*UffdPagefault { From 509eb5858a7339d44492f95225448e58ab80f0f8 Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 30 May 2026 22:21:41 -0700 Subject: [PATCH 09/10] fix(uffd): drop redundant page-aligned dedup key --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 21 ++++++------------- .../sandbox/uffd/userfaultfd/deferred_test.go | 6 +++--- .../sandbox/uffd/userfaultfd/userfaultfd.go | 2 +- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index bb689a8934..094d5ee2e3 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -7,21 +7,22 @@ import "sync" // deferredFaults collects pagefaults that returned EAGAIN so they get // retried on the next poll iteration. Safe for concurrent push. type deferredFaults struct { - mu sync.Mutex - pf []*UffdPagefault - byAddr map[uint64]struct{} - pageSize uintptr + mu sync.Mutex + pf []*UffdPagefault + byAddr map[uint64]struct{} } // push queues a deferred fault, skipping addresses already queued so a page // faulted by several threads is retried once instead of once per fault. +// Fault addresses are already page-aligned by the kernel (UFFDIO_COPY rejects +// unaligned dst), so the raw address keys per page. func (d *deferredFaults) push(pf *UffdPagefault) { d.mu.Lock() defer d.mu.Unlock() if d.byAddr == nil { d.byAddr = make(map[uint64]struct{}) } - addr := d.key(uint64(pf.address)) + addr := uint64(pf.address) if _, ok := d.byAddr[addr]; ok { return } @@ -29,16 +30,6 @@ func (d *deferredFaults) push(pf *UffdPagefault) { d.pf = append(d.pf, pf) } -func (d *deferredFaults) key(addr uint64) uint64 { - if d.pageSize == 0 { - return addr - } - - pageSize := uint64(d.pageSize) - - return addr & ^(pageSize - 1) -} - func (d *deferredFaults) drain() []*UffdPagefault { d.mu.Lock() out := d.pf diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go index 5cf04e985d..87ec8f81a2 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -8,13 +8,13 @@ import ( "github.com/stretchr/testify/require" ) -func TestDeferredFaultsDedupesByPage(t *testing.T) { +func TestDeferredFaultsDedupesByAddress(t *testing.T) { t.Parallel() - d := deferredFaults{pageSize: 4096} + var d deferredFaults + d.push(&UffdPagefault{address: 42}) d.push(&UffdPagefault{address: 42}) d.push(&UffdPagefault{address: 43}) - d.push(&UffdPagefault{address: 4096}) require.Len(t, d.drain(), 2) require.Empty(t, d.drain()) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 710db9ee55..096a53c571 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -261,7 +261,7 @@ func (u *Userfaultfd) Serve( unix.POLLNVAL: "POLLNVAL", } - deferred := deferredFaults{pageSize: u.pageSize} + var deferred deferredFaults for { if _, err := unix.Poll( From 2b7e091ac4e64b2e030a7db6ebaa164ccb0b7e2a Mon Sep 17 00:00:00 2001 From: ValentaTomas Date: Sat, 30 May 2026 22:23:06 -0700 Subject: [PATCH 10/10] fix(uffd): upgrade deduped deferred fault to write --- .../pkg/sandbox/uffd/userfaultfd/deferred.go | 16 +++++++++++----- .../sandbox/uffd/userfaultfd/deferred_test.go | 12 ++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go index 094d5ee2e3..cc11682bb9 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -9,24 +9,30 @@ import "sync" type deferredFaults struct { mu sync.Mutex pf []*UffdPagefault - byAddr map[uint64]struct{} + byAddr map[uint64]*UffdPagefault } // push queues a deferred fault, skipping addresses already queued so a page // faulted by several threads is retried once instead of once per fault. // Fault addresses are already page-aligned by the kernel (UFFDIO_COPY rejects -// unaligned dst), so the raw address keys per page. +// unaligned dst), so the raw address keys per page. If the same page is faulted +// as both read and write, the retained fault is upgraded to write so the retry +// installs it dirty instead of leaving a later WP fault to catch it. func (d *deferredFaults) push(pf *UffdPagefault) { d.mu.Lock() defer d.mu.Unlock() if d.byAddr == nil { - d.byAddr = make(map[uint64]struct{}) + d.byAddr = make(map[uint64]*UffdPagefault) } addr := uint64(pf.address) - if _, ok := d.byAddr[addr]; ok { + if existing, ok := d.byAddr[addr]; ok { + if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { + existing.flags |= UFFD_PAGEFAULT_FLAG_WRITE + } + return } - d.byAddr[addr] = struct{}{} + d.byAddr[addr] = pf d.pf = append(d.pf, pf) } diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go index 87ec8f81a2..d28ad24bfe 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred_test.go @@ -22,3 +22,15 @@ func TestDeferredFaultsDedupesByAddress(t *testing.T) { d.push(&UffdPagefault{address: 42}) require.Len(t, d.drain(), 1) } + +func TestDeferredFaultsUpgradesReadToWrite(t *testing.T) { + t.Parallel() + + var d deferredFaults + d.push(&UffdPagefault{address: 42}) + d.push(&UffdPagefault{address: 42, flags: UFFD_PAGEFAULT_FLAG_WRITE}) + + out := d.drain() + require.Len(t, out, 1) + require.NotZero(t, out[0].flags&UFFD_PAGEFAULT_FLAG_WRITE) +}