From a46c5b44f688e5c6803ad3eefacb50b99618acc7 Mon Sep 17 00:00:00 2001 From: nsakaimbo Date: Sun, 12 Apr 2026 20:23:48 -0400 Subject: [PATCH] fix: pre-allocate on Darwin + remove null-seed chunk limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace os.Truncate with preallocateFile in AssembleFile. On Darwin/APFS, uses fcntl(F_PREALLOCATE) to physically allocate blocks before truncating, preventing sparse-hole corruption during concurrent assembly. On other platforms, delegates to os.Truncate (no behavior change). Remove the 100-chunk limit in nullChunkSeed.LongestMatchWith — unnecessary since WriteInto returns immediately when isBlank=true. --- assemble.go | 8 +++---- nullseed.go | 15 ++++-------- preallocate_darwin.go | 53 +++++++++++++++++++++++++++++++++++++++++++ preallocate_other.go | 13 +++++++++++ 4 files changed, 75 insertions(+), 14 deletions(-) create mode 100644 preallocate_darwin.go create mode 100644 preallocate_other.go diff --git a/assemble.go b/assemble.go index c0e5d278..87b7ffa5 100644 --- a/assemble.go +++ b/assemble.go @@ -123,11 +123,11 @@ func AssembleFile(ctx context.Context, name string, idx Index, s Store, seeds [] isBlank = true } - // Truncate the output file to the full expected size. Not only does this - // confirm there's enough disk space, but it allows for an optimization - // when dealing with the Null Chunk + // Pre-allocate and truncate the output file to the full expected size. + // On Darwin/APFS, this also physically allocates disk blocks to prevent + // sparse-hole issues with concurrent writes. if !isBlkDevice { - if err := os.Truncate(name, idx.Length()); err != nil { + if err := preallocateFile(name, idx.Length()); err != nil { return stats, err } } diff --git a/nullseed.go b/nullseed.go index 26033e71..c5c108d3 100644 --- a/nullseed.go +++ b/nullseed.go @@ -47,17 +47,12 @@ func (s *nullChunkSeed) LongestMatchWith(chunks []IndexChunk) (int, SeedSegment) if len(chunks) == 0 { return 0, nil } - var ( - n int - limit int - ) - if !s.canReflink { - limit = 100 - } + // No limit needed: when isBlank=true, WriteInto skips without copying. + // When isBlank=false, we must still write zeros to overwrite stale data. + // The previous limit of 100 caused chunks beyond the limit to fall + // through to other code paths, leading to incorrect assembly. + var n int for _, c := range chunks { - if limit != 0 && limit == n { - break - } if c.ID != s.id { break } diff --git a/preallocate_darwin.go b/preallocate_darwin.go new file mode 100644 index 00000000..b249d6c2 --- /dev/null +++ b/preallocate_darwin.go @@ -0,0 +1,53 @@ +//go:build darwin + +package desync + +import ( + "fmt" + "os" + "syscall" + "unsafe" +) + +type fstore_t struct { + Flags uint32 + Posmode int32 + Offset int64 + Length int64 + Bytesalloc int64 +} + +const ( + fAllocateAll = 0x00000004 + fPeofPosmode = 3 + fPreallocate = 42 +) + +// preallocateFile physically allocates disk blocks and sets the file size. +// On APFS, a plain Truncate creates sparse holes. When concurrent workers +// call WriteAt on adjacent regions, copy-on-write of sparse blocks can +// cause non-deterministic data corruption. Pre-allocating real blocks +// avoids this. +func preallocateFile(name string, size int64) error { + f, err := os.OpenFile(name, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + return err + } + defer f.Close() + + store := fstore_t{ + Flags: fAllocateAll, + Posmode: fPeofPosmode, + Offset: 0, + Length: size, + } + _, _, errno := syscall.Syscall(syscall.SYS_FCNTL, + uintptr(f.Fd()), + uintptr(fPreallocate), + uintptr(unsafe.Pointer(&store))) + if errno != 0 { + return fmt.Errorf("F_PREALLOCATE: %w", errno) + } + + return f.Truncate(size) +} diff --git a/preallocate_other.go b/preallocate_other.go new file mode 100644 index 00000000..abc7b303 --- /dev/null +++ b/preallocate_other.go @@ -0,0 +1,13 @@ +//go:build !darwin + +package desync + +import "os" + +// preallocateFile truncates the file to the given size. +// On Linux (ext4) and other platforms, Truncate produces a file that +// reads back as zeros without sparse-hole issues, so no special +// preallocation is needed. +func preallocateFile(name string, size int64) error { + return os.Truncate(name, size) +}