From 4d2afbdd5d37153faeea2afd6363c7aaf02f4d5a Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:20 -0400 Subject: [PATCH 01/25] Separate the op log from its projections Package items are stored as an append-only log of ops (package_ops); the package tables (functions, types, values, locations, dependencies, deprecations) are regenerable projections folded from that log. Adds the projection registry and rebuild/refold, so the log is the canonical source of truth and the projections can be dropped and rebuilt at will. --- ...260519_133237_package_ops_composite_pk.sql | 2 + backend/migrations/schema.sql | 50 +++- backend/src/LibDB/PackageOpPlayback.fs | 157 ++++++++---- backend/src/LibDB/Seed.fs | 117 +++++++++ backend/tests/Tests/OpsProjections.Tests.fs | 230 ++++++++++++++++++ 5 files changed, 511 insertions(+), 45 deletions(-) create mode 100644 backend/tests/Tests/OpsProjections.Tests.fs diff --git a/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql b/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql index a2a15e31d1..4c4c6ea299 100644 --- a/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql +++ b/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql @@ -14,6 +14,8 @@ CREATE TABLE package_ops ( applied INTEGER NOT NULL DEFAULT 0, propagation_id TEXT NULL, created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')), + -- sync timestamp-LWW (must mirror schema.sql; this rebuild re-runs on kill-and-fill) + origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')), PRIMARY KEY (id, branch_id) ); diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 57f5fb7bb8..7437a0b07f 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -6,7 +6,7 @@ -- final shape is what runs against an empty DB. -- -- system_migrations_v0 (the legacy per-named-migration table) is the one --- exception, since pre-cutover DBs are adopted via that table; created +-- exception, since legacy DBs are adopted via that table; created -- here AND by Migrations.fs's adoptLegacyDB path. -- -- Order: bookkeeping → branches → commits → ops → package projections → @@ -95,7 +95,13 @@ CREATE TABLE IF NOT EXISTS package_ops ( commit_hash TEXT REFERENCES commits(hash), -- NULL = WIP applied INTEGER NOT NULL DEFAULT 0, propagation_id TEXT NULL, -- direct lookup for PropagateUpdate ops - created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) + created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')), + -- Authoring timestamp, PORTABLE across sync. A + -- locally-authored op self-stamps here at insert; a SYNCED op preserves its origin (the sync + -- receiver writes the peer's value), so every instance agrees on a given op's origin_ts and + -- max(origin_ts) picks the same divergence winner → no swap. Distinct from `created_at` (which + -- is local-insert time and differs per instance for the same op). + origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')) ); CREATE INDEX IF NOT EXISTS idx_package_ops_wip ON package_ops(branch_id) WHERE commit_hash IS NULL; @@ -169,7 +175,12 @@ CREATE TABLE IF NOT EXISTS locations ( branch_id TEXT NOT NULL REFERENCES branches(id), commit_hash TEXT REFERENCES commits(hash), -- NULL = WIP created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')), - unlisted_at TIMESTAMP NULL -- set when a later row supersedes this one + unlisted_at TIMESTAMP NULL, -- set when a later row supersedes this one + -- The origin_ts of the op that set THIS binding — the name→authoring-time mapping that lets + -- playback order by CREATION, not arrival (timestamp-LWW). A SetName whose op was created + -- EARLIER than the current binding (an old op arriving late via sync) is stale: playback skips + -- the rebind, so the latest-by-creation name wins on every instance regardless of sync order. + origin_ts TEXT NULL ); CREATE INDEX IF NOT EXISTS idx_locations_branch_lookup ON locations(branch_id, owner, modules, name, item_type) @@ -351,3 +362,36 @@ CREATE TABLE IF NOT EXISTS scripts_v0 ( name TEXT NOT NULL UNIQUE, text TEXT NOT NULL ); + + +-------------------- +-- Sync (local-only setup; NOT synced) — see LibDB/{Remotes,SyncCursors,Conflicts}.fs +-------------------- + +-- Registered sync peers (managed via `dark remote ...`). Each row is a (name, url) the +-- tailnet sync daemon polls. +CREATE TABLE IF NOT EXISTS sync_remotes ( + name TEXT PRIMARY KEY, + url TEXT NOT NULL +); + +-- Per-remote poll resume state: how far this instance has folded each peer's op stream. +-- The cursor is a `package_ops` rowid (SQLite's monotonic insertion order). +CREATE TABLE IF NOT EXISTS sync_cursors ( + remote TEXT PRIMARY KEY, + folded_through_rowid INTEGER NOT NULL DEFAULT 0 +); + +-- The recorded, reviewable log of auto-resolved name-binding divergences (`dark conflicts`). +-- Recorded at pull time; auto-resolved by policy (default last-writer-wins) but never silently lost. +CREATE TABLE IF NOT EXISTS sync_conflicts ( + id TEXT PRIMARY KEY, + location TEXT NOT NULL, + local_hash TEXT NOT NULL, + incoming_hash TEXT NOT NULL, + resolution TEXT NOT NULL, + remote TEXT NOT NULL, + detected_at TEXT NOT NULL DEFAULT (datetime('now')), + acknowledged INTEGER NOT NULL DEFAULT 0, + overridden INTEGER NOT NULL DEFAULT 0 +); diff --git a/backend/src/LibDB/PackageOpPlayback.fs b/backend/src/LibDB/PackageOpPlayback.fs index d918bc9c30..b6fad4cad1 100644 --- a/backend/src/LibDB/PackageOpPlayback.fs +++ b/backend/src/LibDB/PackageOpPlayback.fs @@ -196,7 +196,7 @@ let private applyAddType /// Apply a single AddValue op to the package_values table. /// Note: rt_dval and value_type are stored as NULL here. They are populated -/// in Phase 3 by Seed.evaluateAllValues after all ops are applied, so cross- +/// by Seed.evaluateAllValues after all ops are applied, so cross- /// package references resolve correctly. let private applyAddValue (ctx : Ctx) @@ -276,55 +276,128 @@ let private applySetName let locationId = System.Guid.NewGuid() let (Hash itemHashStr) = itemHash - // 1. Deprecate any existing location at the target path (handles updates) - do! - exec ctx """ - UPDATE locations - SET unlisted_at = datetime('now') - WHERE owner = $owner - AND modules = $modules - AND name = $name - AND item_type = $item_type - AND unlisted_at IS NULL - AND branch_id = $branch_id - """ (fun cmd -> - p cmd "$owner" location.owner - p cmd "$modules" modulesStr - p cmd "$name" location.name - p cmd "$item_type" itemTypeStr - pUuid cmd "$branch_id" branchId) - - // 2. If this is a rename (standalone SetName, not paired with Add*), - // also deprecate old locations pointing to the same hash. - // We do NOT do this for Add+SetName pairs because multiple items can - // legitimately share the same hash (e.g. Int8.ParseError and - // Int16.ParseError have identical definitions). - if isRename then + // ── timestamp-LWW: order this binding by the op's CREATION time, + // not arrival. Read this op's `origin_ts` (the authoring stamp, already in package_ops) and the + // CURRENT binding's `origin_ts` (the name→authoring-time mapping in `locations`). If this op was + // created BEFORE the current binding's op — an old op arriving late via sync — it's stale: keep the + // existing, newer-by-creation binding (the op still lives in the log; it's just not the active name). + // Computed identically on every instance, so all converge to the SAME hash regardless of arrival + // order. Unknown stamps (op not in package_ops / pre-origin_ts data) → no skip = prior last-writer + // behavior, so non-sync playback (seed grow, local authoring) is unchanged. Reads run on ctx.conn so + // they see writes from earlier ops in this same applyOps transaction. + let thisOp = + PT.PackageOp.SetName( + location, + PT.Reference.fromHashAndKind (itemHash, itemKind) + ) + let (Hash thisOpHashStr) = LibSerialization.Hashing.Hashing.computeOpHash thisOp + let thisOpId = System.Guid(System.Convert.FromHexString(thisOpHashStr)[0..15]) + + let! thisTs = + task { + use cmd = ctx.conn.CreateCommand() + cmd.CommandText <- "SELECT origin_ts FROM package_ops WHERE id = $id" + cmd.Parameters.AddWithValue("$id", string thisOpId) + |> ignore + use! reader = cmd.ExecuteReaderAsync() + let! hasRow = reader.ReadAsync() + if hasRow && not (reader.IsDBNull 0) then + return Some(reader.GetString 0) + else + return None + } + + let! curBinding = + task { + use cmd = ctx.conn.CreateCommand() + cmd.CommandText <- + "SELECT item_hash, origin_ts FROM locations " + + "WHERE owner = $owner AND modules = $modules AND name = $name " + + "AND item_type = $item_type AND branch_id = $branch_id AND unlisted_at IS NULL LIMIT 1" + cmd.Parameters.AddWithValue("$owner", location.owner) + |> ignore + cmd.Parameters.AddWithValue("$modules", modulesStr) + |> ignore + cmd.Parameters.AddWithValue("$name", location.name) + |> ignore + cmd.Parameters.AddWithValue("$item_type", itemTypeStr) + |> ignore + cmd.Parameters.AddWithValue("$branch_id", string branchId) + |> ignore + use! reader = cmd.ExecuteReaderAsync() + let! hasRow = reader.ReadAsync() + if hasRow then + let h = reader.GetString 0 + let ts = if reader.IsDBNull 1 then None else Some(reader.GetString 1) + return Some(h, ts) + else + return None + } + + let isStale = + match curBinding, thisTs with + // Order a binding by its op's CREATION time (origin_ts); a stale op arriving late via sync loses + // to the newer binding. On an EXACT TIE (two DIFFERENT ops for one name stamped the same + // millisecond — a genuine cross-instance race), break deterministically by item hash: the higher + // hash wins. That tie-break is PORTABLE (content, not arrival/rowid), so every instance — and a + // from-scratch projection rebuild — converges on the same winner. Local sequential authoring + // (v2 replacing v1 in one batch) never reaches this tie: `Inserts` self-stamps each op in a + // local batch with a strictly-increasing origin_ts, so v2 is newer-by-creation and just wins. + | Some(curHash, Some curTs), Some t when curHash <> itemHashStr -> + t < curTs || (t = curTs && itemHashStr < curHash) + | _ -> false + + if isStale then + return () + else + // 1. Deprecate any existing location at the target path (handles updates) do! exec ctx """ UPDATE locations SET unlisted_at = datetime('now') - WHERE item_hash = $item_hash - AND branch_id = $branch_id + WHERE owner = $owner + AND modules = $modules + AND name = $name + AND item_type = $item_type AND unlisted_at IS NULL + AND branch_id = $branch_id """ (fun cmd -> - p cmd "$item_hash" itemHashStr + p cmd "$owner" location.owner + p cmd "$modules" modulesStr + p cmd "$name" location.name + p cmd "$item_type" itemTypeStr pUuid cmd "$branch_id" branchId) - // 3. Insert new location entry. - do! - exec ctx """ - INSERT INTO locations (location_id, item_hash, owner, modules, name, item_type, branch_id, commit_hash) - VALUES ($location_id, $item_hash, $owner, $modules, $name, $item_type, $branch_id, $commit_hash) - """ (fun cmd -> - pUuid cmd "$location_id" locationId - p cmd "$item_hash" itemHashStr - p cmd "$owner" location.owner - p cmd "$modules" modulesStr - p cmd "$name" location.name - p cmd "$item_type" itemTypeStr - pUuid cmd "$branch_id" branchId - pOpt cmd "$commit_hash" commitHash) + // 2. If this is a rename (standalone SetName, not paired with Add*), also deprecate old locations + // pointing to the same hash. We do NOT do this for Add+SetName pairs because multiple items can + // legitimately share the same hash (e.g. Int8.ParseError and Int16.ParseError). + if isRename then + do! + exec ctx """ + UPDATE locations + SET unlisted_at = datetime('now') + WHERE item_hash = $item_hash + AND branch_id = $branch_id + AND unlisted_at IS NULL + """ (fun cmd -> + p cmd "$item_hash" itemHashStr + pUuid cmd "$branch_id" branchId) + + // 3. Insert new location entry (with origin_ts for cross-instance timestamp-LWW). + do! + exec ctx """ + INSERT INTO locations (location_id, item_hash, owner, modules, name, item_type, branch_id, commit_hash, origin_ts) + VALUES ($location_id, $item_hash, $owner, $modules, $name, $item_type, $branch_id, $commit_hash, $origin_ts) + """ (fun cmd -> + pUuid cmd "$location_id" locationId + p cmd "$item_hash" itemHashStr + p cmd "$owner" location.owner + p cmd "$modules" modulesStr + p cmd "$name" location.name + p cmd "$item_type" itemTypeStr + pUuid cmd "$branch_id" branchId + pOpt cmd "$commit_hash" commitHash + pOpt cmd "$origin_ts" thisTs) } diff --git a/backend/src/LibDB/Seed.fs b/backend/src/LibDB/Seed.fs index fef1ec014e..ce3b2473c9 100644 --- a/backend/src/LibDB/Seed.fs +++ b/backend/src/LibDB/Seed.fs @@ -69,6 +69,7 @@ let export (outputPath : string) : Task = DELETE FROM package_values; DELETE FROM package_functions; DELETE FROM package_dependencies; + DELETE FROM deprecations; DELETE FROM package_ops WHERE branch_id IN ( SELECT id FROM branches WHERE archived_at IS NOT NULL); @@ -237,6 +238,122 @@ let applyUnappliedOps () : Task = } +/// ops⊥projections prototype: drop every projection table and re-fold the entire +/// package_ops log to rebuild them. Proves projections are *regenerable from the ops* +/// (the ops⊥projections split) — losing a projection costs only the CPU to re-fold; the +/// op log (package_ops) is the canonical durable state and is never touched here. +/// Returns the count of ops re-applied. +/// A regenerable projection in the per-branch cache: `table` is the projection, and `dirtiedBy` is +/// the set of `package_ops` kinds whose arrival invalidates it (so an incremental update re-folds +/// only the projections an incoming op touches). Op-kind names match the `PackageOp` DU cases. +type Projection = { table : string; dirtiedBy : Set } + +/// The regenerable projections — every table the op-fold writes. `deprecations` is one: it's folded +/// from `Deprecate`/`Undeprecate` ops (its `annotation_blob` reconstructs from the op), so it's +/// regenerable and `export` strips it like the others. NOT `package_blobs` (canonical content — +/// op-playback never writes it), nor the op log / branch / commit / account state. +let projectionRegistry : List = + [ { table = "package_functions"; dirtiedBy = Set.ofList [ "AddFn" ] } + { table = "package_types"; dirtiedBy = Set.ofList [ "AddType" ] } + { table = "package_values"; dirtiedBy = Set.ofList [ "AddValue" ] } + { table = "locations" + dirtiedBy = Set.ofList [ "SetName"; "RevertPropagation" ] } + { table = "package_dependencies" + dirtiedBy = Set.ofList [ "AddFn"; "AddType"; "AddValue" ] } + { table = "deprecations"; dirtiedBy = Set.ofList [ "Deprecate"; "Undeprecate" ] } ] + +/// The tables a full rebuild clears + refolds — derived from the registry (single source of truth). +let projectionTables : List = + projectionRegistry |> List.map (fun p -> p.table) + +/// Which projection tables an incoming op kind invalidates (incremental-refold targets). +let projectionsDirtiedBy (opKind : string) : List = + projectionRegistry + |> List.filter (fun p -> Set.contains opKind p.dirtiedBy) + |> List.map (fun p -> p.table) + +/// The projection tables a whole op BATCH dirties — the union over its op kinds. This is the +/// incremental-refold *decision*: a rebuild after appending a batch need only clear+refold +/// these tables, leaving every other projection (and its rows) untouched. +let projectionsDirtiedByBatch (opKinds : Set) : Set = + opKinds |> Set.toList |> List.collect projectionsDirtiedBy |> Set.ofList + +/// An op's kind name (matches the registry's keys). +let opKindName (op : PT.PackageOp) : string = + match op with + | PT.PackageOp.AddType _ -> "AddType" + | PT.PackageOp.AddValue _ -> "AddValue" + | PT.PackageOp.AddFn _ -> "AddFn" + | PT.PackageOp.SetName _ -> "SetName" + | PT.PackageOp.Deprecate _ -> "Deprecate" + | PT.PackageOp.Undeprecate _ -> "Undeprecate" + | PT.PackageOp.PropagateUpdate _ -> "PropagateUpdate" + | PT.PackageOp.RevertPropagation _ -> "RevertPropagation" + +/// Incremental refold (the selective counterpart to `rebuildProjections`): clear ONLY the +/// projections this op-kind batch dirties, then re-fold ONLY the ops of those kinds back into +/// them — leaving every other projection untouched. Returns the count re-folded. +/// +/// Faithful for content-addressed `Add*` kinds (their fold is batch-independent). Note: a +/// `SetName`-only refold would mis-detect renames (rename detection needs the batch's added +/// hashes), so include the accompanying `Add*` kinds when refolding `SetName`. +let rebuildDirtied (opKinds : Set) : Task = + task { + let dirtied = projectionsDirtiedByBatch opKinds + for t in Set.toList dirtied do + do! Sql.query $"DELETE FROM {t}" |> Sql.executeStatementAsync + + let! ops = + Sql.query + "SELECT id, op_blob, branch_id, commit_hash FROM package_ops ORDER BY created_at ASC" + |> Sql.executeAsync (fun read -> + let opId = read.uuid "id" + let op = BS.PT.PackageOp.deserialize opId (read.bytes "op_blob") + let branchId : PT.BranchId = read.uuid "branch_id" + let commitHash = read.stringOrNone "commit_hash" + (op, branchId, commitHash)) + + let relevant = + ops |> List.filter (fun (op, _, _) -> Set.contains (opKindName op) opKinds) + let groups = relevant |> List.groupBy (fun (_, b, c) -> (b, c)) |> Map.toList + for ((branchId, commitHash), g) in groups do + do! + PackageOpPlayback.applyOps + branchId + commitHash + (g |> List.map (fun (op, _, _) -> op)) + return int64 (List.length relevant) + } + +let rebuildProjections () : Task = + task { + // 1. clear the regenerable projection tables — from the projection registry (single + // source of truth), so the rebuild set can never drift from the fold/dirty descriptors. + for t in projectionTables do + do! Sql.query $"DELETE FROM {t}" |> Sql.executeStatementAsync + // 2. mark all ops unapplied so the fold reprocesses the whole log + do! Sql.query "UPDATE package_ops SET applied = 0" |> Sql.executeStatementAsync + // 3. re-fold ops -> projections via the existing playback path + return! applyUnappliedOps () + } + + +/// Projection-currency counters for `dark status`: `(opsCount, foldedThrough)` — total ops in the +/// canonical `package_ops` log vs how many are folded into the projections (the `applied` flag). +/// Equal => the projection cache is current; a gap => ops have been appended/pulled but not yet +/// folded (run `branch rebuild`, or restart to `growIfNeeded`). +let projectionStatus () : Task = + task { + let! total = + Sql.query "SELECT COUNT(*) as cnt FROM package_ops" + |> Sql.executeRowAsync (fun read -> read.int64 "cnt") + let! folded = + Sql.query "SELECT COUNT(*) as cnt FROM package_ops WHERE applied = 1" + |> Sql.executeRowAsync (fun read -> read.int64 "cnt") + return (total, folded) + } + + /// Evaluate all package values that have NULL rt_dval. /// Multi-pass: values may depend on other values, so we retry until convergence. let evaluateAllValues diff --git a/backend/tests/Tests/OpsProjections.Tests.fs b/backend/tests/Tests/OpsProjections.Tests.fs new file mode 100644 index 0000000000..047b6e6c7a --- /dev/null +++ b/backend/tests/Tests/OpsProjections.Tests.fs @@ -0,0 +1,230 @@ +/// Tests for the ops⊥projections prototype (LibDB.Seed.rebuildProjections). +/// Proves the central claim of the storage split: the projection tables are +/// *regenerable from the op log* — drop them, re-fold package_ops, and they +/// come back identical. The op log (package_ops) is canonical and untouched. +module Tests.OpsProjections + +open Expecto + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +module Seed = LibDB.Seed + +let private countRows (table : string) : Task = + Sql.query $"SELECT COUNT(*) as n FROM {table}" + |> Sql.executeRowAsync (fun read -> read.int64 "n") + +// `testSequenced` because the drop+rebuild case DELETEs + refolds the *shared* projection +// tables and marks all ops unapplied — it must not run concurrently with other DB tests (it +// would race their reads/writes mid-rebuild). The registry cases are pure but ride along. +let tests = + testSequenced + <| testList + "OpsProjections" + [ testTask + "rebuildProjections deterministically regenerates projections from the op log" { + // package_blobs is canonical content — a rebuild must never touch it + let! blobsBefore = countRows "package_blobs" + + // drop the regenerable projections + re-fold the entire op log + let! reapplied = Seed.rebuildProjections () + Expect.isTrue (reapplied > 0L) "ops were re-folded" + let! fns1 = countRows "package_functions" + let! locs1 = countRows "locations" + Expect.isTrue + (fns1 > 0L) + "projections regenerated (non-empty) from the op log" + + // a SECOND rebuild reproduces the EXACT same projections — the rebuild is a deterministic + // function of the op log. (Robust to other tests mutating the shared DB: we compare two + // rebuilds of the *current* log, not a pristine-seed count they'd perturb.) + let! _ = Seed.rebuildProjections () + let! fns2 = countRows "package_functions" + let! locs2 = countRows "locations" + Expect.equal + fns2 + fns1 + "package_functions: a re-rebuild reproduces the same projection" + Expect.equal + locs2 + locs1 + "locations: a re-rebuild reproduces the same projection" + + // canonical content (package_blobs) is NOT a projection — untouched across rebuilds + let! blobsAfter = countRows "package_blobs" + Expect.equal + blobsAfter + blobsBefore + "package_blobs (canonical content) preserved, not dropped" + } + + // the projection registry — the fold/dirty descriptors + test "the projection registry covers exactly the 6 regenerable projections" { + Expect.equal + (List.sort Seed.projectionTables) + (List.sort + [ "package_functions" + "package_types" + "package_values" + "locations" + "package_dependencies" + "deprecations" ]) + "the registry's tables are exactly Seed.export's stripped projections (incl. deprecations)" + } + + test "projectionsDirtiedBy maps an op kind to the projections it invalidates" { + Expect.equal + (List.sort (Seed.projectionsDirtiedBy "AddFn")) + (List.sort [ "package_functions"; "package_dependencies" ]) + "AddFn dirties the fn projection + the dependency edges" + Expect.equal + (List.sort (Seed.projectionsDirtiedBy "AddType")) + (List.sort [ "package_types"; "package_dependencies" ]) + "AddType dirties the type projection + deps" + Expect.equal + (Seed.projectionsDirtiedBy "SetName") + [ "locations" ] + "SetName dirties only locations" + Expect.equal + (Seed.projectionsDirtiedBy "RevertPropagation") + [ "locations" ] + "RevertPropagation dirties only locations" + Expect.equal + (Seed.projectionsDirtiedBy "Deprecate") + [ "deprecations" ] + "Deprecate dirties the deprecations projection (it IS regenerable from ops)" + Expect.isEmpty + (Seed.projectionsDirtiedBy "PropagateUpdate") + "PropagateUpdate is a no-op (its accompanying SetNames do the work) — dirties nothing" + } + + // the incremental-refold DECISION: which projections a whole op batch dirties (the union) + test + "projectionsDirtiedByBatch unions a batch's dirtied projections; AddFn-only skips locations" { + let addFnOnly = Seed.projectionsDirtiedByBatch (Set.ofList [ "AddFn" ]) + Expect.equal + addFnOnly + (Set.ofList [ "package_functions"; "package_dependencies" ]) + "an AddFn-only batch dirties functions + deps" + Expect.isFalse + (Set.contains "locations" addFnOnly) + "locations is NOT dirtied — an incremental refold leaves it (and its rows) untouched" + + let mixed = + Seed.projectionsDirtiedByBatch (Set.ofList [ "AddFn"; "SetName" ]) + Expect.equal + mixed + (Set.ofList [ "package_functions"; "package_dependencies"; "locations" ]) + "an AddFn+SetName batch dirties functions + deps + locations (the union)" + + Expect.isEmpty + (Seed.projectionsDirtiedByBatch Set.empty) + "an empty batch dirties no projections" + } + + // the selective FOLD: rebuildDirtied refolds ONLY the dirtied tables, leaving others alone + testTask + "rebuildDirtied {AddFn} refolds functions but leaves locations untouched (selective)" { + let! locsBefore = countRows "locations" + let! refolded = Seed.rebuildDirtied (Set.ofList [ "AddFn" ]) + Expect.isTrue (refolded > 0L) "AddFn ops were re-folded" + let! fnsAfter = countRows "package_functions" + Expect.isTrue + (fnsAfter > 0L) + "package_functions regenerated from the AddFn ops" + // SELECTIVITY: locations isn't in {AddFn}'s dirtied set, so it's never cleared/refolded + let! locsAfter = countRows "locations" + Expect.equal + locsAfter + locsBefore + "locations untouched — rebuildDirtied {AddFn} refolds only the dirtied projections" + } + + // A schema change keeps your work. The bootstrap drops ONLY `projectionTables` and re-folds the op + // log (LocalExec.Migrations.dropProjectionTables) — so the authored, canonical data must NEVER appear + // in that drop-set. If it did, a schema bump would delete your work. This guards that line. + test + "a schema change never drops the op log: no canonical table is in the projection drop-set" { + let canonical = + [ "package_ops" // the authored op log — the truth + "package_blobs" // canonical content (op-playback never writes it) + "branches" + "commits" + "branch_ops" + "accounts_v0" + "user_data_v0" + "toplevels_v0" + "scripts_v0" + "sync_remotes" + "sync_cursors" + "sync_conflicts" ] + canonical + |> List.iter (fun t -> + Expect.isFalse + (List.contains t Seed.projectionTables) + $"{t} is canonical and must NOT be in the projection drop-set (it would be lost on a schema change)") + } + + // A schema change keeps your work, end to end. A schema change now runs `rebuildProjections` (drop + // projections + re-fold), exactly what this exercises. The first test pins projection-regen + blobs; + // this pins the thing that actually matters — your authored op LOG (and branch/commit state) come + // through a full re-fold IDENTICAL. If this regresses, a schema bump is eating real work. + testTask "a schema change keeps your work: a full re-fold preserves the op log" { + let! opsBefore = countRows "package_ops" + let! branchesBefore = countRows "branches" + let! commitsBefore = countRows "commits" + Expect.isTrue + (opsBefore > 0L) + "there are ops to preserve (not a vacuous test)" + + let! _ = Seed.rebuildProjections () + + let! opsAfter = countRows "package_ops" + let! branchesAfter = countRows "branches" + let! commitsAfter = countRows "commits" + Expect.equal + opsAfter + opsBefore + "package_ops (the authored op log) is untouched by a re-fold" + Expect.equal + branchesAfter + branchesBefore + "branches preserved across a re-fold" + Expect.equal commitsAfter commitsBefore "commits preserved across a re-fold" + } + + // Projection-currency counters — the `dark status` glance (`projectionStatus` → opsCount vs + // folded-through). Equal when the cache is current; a gap when ops are appended/pulled but not yet + // folded. Guards the surface that tells you a `branch rebuild` is owed. + testTask + "projectionStatus: folded == total when current; a gap appears when an op is unapplied" { + let! _ = Seed.rebuildProjections () // re-fold → every op applied → current + let! (total1, folded1) = Seed.projectionStatus () + Expect.isTrue (total1 > 0L) "there are ops to count" + Expect.equal + folded1 + total1 + "after a rebuild, folded-through == total (cache current)" + // mark one op unapplied → a one-op gap + do! + Sql.query + "UPDATE package_ops SET applied = 0 WHERE rowid = (SELECT MIN(rowid) FROM package_ops)" + |> Sql.executeStatementAsync + let! (total2, folded2) = Seed.projectionStatus () + Expect.equal + total2 + total1 + "total ops unchanged (the canonical log is untouched)" + Expect.equal + folded2 + (folded1 - 1L) + "one unapplied op → folded-through drops by one (a visible gap)" + let! _ = Seed.rebuildProjections () // restore: re-fold so the shared DB stays consistent + () + } ] From 2cbe88b197615549eff4ea0a723a5bea88ec5b2e Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:20 -0400 Subject: [PATCH 02/25] Add the conflict-dispatch seam A single extensible socket on ExecutionState turns a runtime conflict (a missing function, a sync divergence, ...) into a policy decision: substitute a value or fail loudly. The default policy fails loudly, byte-identical to before, and it's wired into the interpreter's missing-function path so a policy can later resolve it instead. --- backend/src/LibExecution/Execution.fs | 19 +++ backend/src/LibExecution/Interpreter.fs | 24 +++- backend/src/LibExecution/RuntimeTypes.fs | 35 ++++++ backend/tests/Tests/ConflictDispatch.Tests.fs | 111 ++++++++++++++++++ 4 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 backend/tests/Tests/ConflictDispatch.Tests.fs diff --git a/backend/src/LibExecution/Execution.fs b/backend/src/LibExecution/Execution.fs index cdd87e7181..3809581e39 100644 --- a/backend/src/LibExecution/Execution.fs +++ b/backend/src/LibExecution/Execution.fs @@ -38,6 +38,25 @@ let createState reportException = reportException notify = notify + // Default: FailLoudly for every conflict — unchanged from the behavior before this seam existed. + conflictDispatch = + fun conflict _ctx -> + uply { + match conflict with + | RT.CRuntimeError rte -> return RT.RFailLoudly rte + | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) + | RT.CSyncDivergence(location, existing, incoming) -> + // strict default: fail loudly. A sync policy installs surface-as-data / last-writer + // so the receiver never blocks — but the default doesn't pick a winner. + return + RT.RFailLoudly( + RTE.UncaughtException( + $"sync divergence at {location}: {existing} vs {incoming}", + [] + ) + ) + } + lambdaInstrCache = System.Collections.Concurrent.ConcurrentDictionary() packageFnInstrCache = System.Collections.Concurrent.ConcurrentDictionary() diff --git a/backend/src/LibExecution/Interpreter.fs b/backend/src/LibExecution/Interpreter.fs index e6eb30a2b1..db9b7acbf8 100644 --- a/backend/src/LibExecution/Interpreter.fs +++ b/backend/src/LibExecution/Interpreter.fs @@ -314,7 +314,29 @@ let rec private executeInner (exeState : ExecutionState) (vm : VMState) : Ply return raiseRTE (RTE.FnNotFound(FQFnName.Package fn)) + | None -> + // Route a missing package fn through the conflict-dispatch seam — the runtime's + // shared "I can't proceed; here are the options" hook (the SAME hook sync's + // divergence routing uses, so the dispatch is real infrastructure, not a sync-only + // appendage). The default policy returns `RFailLoudly (FnNotFound …)` → raise, so + // this is byte-identical to before. The teed-up consumer is fetch-on-miss: a policy + // pulls the fn from a peer and resolves it, instead of failing. (notes/sync-future-ops.md) + let cc : CallContext = + { branchId = exeState.branchId; threadID = vm.threadID } + match! + exeState.conflictDispatch (CFnNotFound(FQFnName.Package fn)) cc + with + | RFailLoudly rte -> return raiseRTE rte + | RSubstitute _ -> + // A policy substituted a value for the missing fn, but result-injection isn't wired + // at this call site yet (it needs the call to return a Dval, not instructions). Raise + // a DISTINCT internal error — not a bare FnNotFound — so if a policy ever returns + // RSubstitute here before fetch-on-miss lands, the unwired path is diagnosable rather + // than masquerading as "the fn doesn't exist". + return + Exception.raiseInternal + "conflict-dispatch returned RSubstitute for a missing package fn, but value-substitution is not wired at this call site yet" + [ "fn", fn ] } diff --git a/backend/src/LibExecution/RuntimeTypes.fs b/backend/src/LibExecution/RuntimeTypes.fs index 8e6b5ff2a5..8a214ac189 100644 --- a/backend/src/LibExecution/RuntimeTypes.fs +++ b/backend/src/LibExecution/RuntimeTypes.fs @@ -1849,11 +1849,46 @@ and ExceptionReporter = ExecutionState -> VMState -> Metadata -> exn -> Ply VMState -> string -> Metadata -> Ply +// -- Conflict dispatch: the runtime "I can't proceed; here are my options" hook. -- +// These MUST live here (the and-chain), not a separate ConflictTypes.fs: they mention +// RuntimeError.Error/Dval (defined above) AND ExecutionState references ConflictDispatch, +// so a later file can't satisfy both. (Same constraint a buses field would have.) +and Conflict = + // Extensible by design — `Conflict` is the meta-model. As more ops are added, new cases join here + // and a policy decides each the same way (RSubstitute / FailLoudly). Anticipated future cases: a + // move collision (a MoveItem/MoveModule lands a name where one already lives), a value-update race + // (two concurrent updates to one mutable package value), a capability denial (a gate refused; a + // policy could prompt or escalate instead of failing). + | CRuntimeError of RuntimeError.Error + | CFnNotFound of FQFnName.FQFnName + // A name bound to two different hashes across synced instances — the `name → two hashes` + // divergence. Hashes are RT-level strings (PT's `Hash of string` can't be referenced here — PT + // depends on RT, not the reverse). Default dispatch surfaces it loudly; a sync policy can + // RSubstitute the converged winner (last-writer-wins). + | CSyncDivergence of + location : string * + existingHash : string * + incomingHash : string + +and Resolution = + // How a policy answers a Conflict: substitute a value to proceed, or fail loudly. (A future + // "park" resolution — pause and await external input — would be added here.) + | RSubstitute of Dval + | RFailLoudly of RuntimeError.Error + +and CallContext = { branchId : BranchId; threadID : uuid } // assembled from ExecState + VMState + +and ConflictDispatch = Conflict -> CallContext -> Ply + /// All state set when starting an execution; non-changing /// (as opposed to the VMState, which changes as the execution progresses) and ExecutionState = { // -- Set consistently across a runtime -- tracing : Tracing.Tracing + + /// The conflict-dispatch hook. Default (createState) returns FailLoudly for every + /// conflict — the behavior before this seam existed. A policy can install substitute/park later. + conflictDispatch : ConflictDispatch test : TestContext /// Lambda instructions registered by `CreateLambda`, looked up on `Apply`. diff --git a/backend/tests/Tests/ConflictDispatch.Tests.fs b/backend/tests/Tests/ConflictDispatch.Tests.fs new file mode 100644 index 0000000000..a7488ad20a --- /dev/null +++ b/backend/tests/Tests/ConflictDispatch.Tests.fs @@ -0,0 +1,111 @@ +/// Tests for the conflict-dispatch seam (LibExecution.RuntimeTypes +/// Conflict/Resolution/ConflictDispatch + ExecutionState.conflictDispatch). +/// Verifies the default dispatch is FailLoudly (the unchanged prior behavior) and +/// that an installed policy overrides it — the whole point of the hook. +module Tests.ConflictDispatch + +open Expecto + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude +open TestUtils.TestUtils + +module RT = LibExecution.RuntimeTypes +module RTE = RT.RuntimeError +module Exe = LibExecution.Execution +module PT = LibExecution.ProgramTypes + +let private freshState () : RT.ExecutionState = + let builtins = localBuiltIns pmPT + Exe.createState + builtins + pmRT + Exe.noTracing + (fun _ _ _ _ -> uply { return () }) + (fun _ _ _ _ -> uply { return () }) + PT.mainBranchId + { dbs = Map.empty } + +let private ctx (state : RT.ExecutionState) : RT.CallContext = + { branchId = state.branchId; threadID = System.Guid.NewGuid() } + +let private aName = RT.FQFnName.fqBuiltin "doesNotExist" 0 + +let tests = + testList + "ConflictDispatch" + [ testTask "default dispatch FailLoudly-s a RuntimeError unchanged" { + let state = freshState () + let err = RTE.FnNotFound aName + let! res = + state.conflictDispatch (RT.CRuntimeError err) (ctx state) |> Ply.toTask + match res with + | RT.RFailLoudly e -> + Expect.equal e err "FailLoudly carries the same RuntimeError" + | _ -> failtest "default dispatch should FailLoudly" + } + + testTask "default dispatch maps FnNotFound to a FailLoudly FnNotFound" { + let state = freshState () + let! res = + state.conflictDispatch (RT.CFnNotFound aName) (ctx state) |> Ply.toTask + match res with + | RT.RFailLoudly(RTE.FnNotFound n) -> Expect.equal n aName "name preserved" + | _ -> failtest "default dispatch should FailLoudly with FnNotFound" + } + + testTask "an installed policy overrides the default (Substitute)" { + let baseState = freshState () + let state = + { baseState with + conflictDispatch = + fun _ _ -> uply { return RT.RSubstitute(RT.DInt64 0L) } } + let! res = + state.conflictDispatch (RT.CFnNotFound aName) (ctx state) |> Ply.toTask + match res with + | RT.RSubstitute(RT.DInt64 n) -> + Expect.equal n 0L "installed policy substituted" + | _ -> + failtest + "installed policy should Substitute, proving the hook is swappable" + } + + // CSyncDivergence — two peers bound the same name to different content. It flows + // through the SAME conflict policy: strict default fails loudly; a sync policy resolves. + testTask + "a sync divergence fails loudly by default and is policy-resolvable (last-writer)" { + let state = freshState () + let conflict = RT.CSyncDivergence("Stachu.foo", "hashA", "hashB") + + // strict default: surface the divergence as a loud error naming the location + both hashes + let! def = state.conflictDispatch conflict (ctx state) |> Ply.toTask + match def with + | RT.RFailLoudly(RTE.UncaughtException(msg, _)) -> + Expect.stringContains + msg + "sync divergence" + "default surfaces the divergence loudly" + Expect.stringContains msg "Stachu.foo" "the diverged location is named" + | _ -> + failtest "default dispatch should FailLoudly (surface) a sync divergence" + + // a last-writer sync policy resolves it without blocking: substitute the incoming hash. + let lastWriter = + { state with + conflictDispatch = + fun conflict _ -> + uply { + match conflict with + | RT.CSyncDivergence(_, _, incoming) -> + return RT.RSubstitute(RT.DString incoming) + | _ -> + return RT.RFailLoudly(RTE.UncaughtException("unexpected", [])) + } } + let! res = lastWriter.conflictDispatch conflict (ctx state) |> Ply.toTask + match res with + | RT.RSubstitute(RT.DString s) -> + Expect.equal s "hashB" "the last-writer policy picked the incoming hash" + | _ -> failtest "last-writer policy should RSubstitute the incoming hash" + } ] From b71623d3002572615537b784122b1245e2e4502e Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:20 -0400 Subject: [PATCH 03/25] Sync the op log between Darklang instances Replicate the package_ops log across instances over a file or HTTP/Tailscale transport, applying remote ops through the same idempotent path as local ones. Instances converge by each op's portable authoring time (last-writer-wins; same-millisecond ties broken deterministically by content hash); auto-resolved name-binding divergences are recorded for review and routed through the conflict-dispatch seam. Only committed ops are shared (the commit is the unit of sync), and a mismatched-version peer is paused with a clear upgrade message rather than a decode error. Includes the sync builtins, the sync CLI, and an always-on autosync daemon. --- .../Builtins.Http.Client/Libs/HttpClient.fs | 35 + .../src/Builtins/Builtins.Matter/Builtin.fs | 2 + .../Builtins.Matter/Builtins.Matter.fsproj | 2 + .../Builtins.Matter/Libs/PM/Remotes.fs | 78 ++ .../Builtins/Builtins.Matter/Libs/PM/Seed.fs | 52 + .../Builtins/Builtins.Matter/Libs/PM/Sync.fs | 582 +++++++++ backend/src/LibDB/Conflicts.fs | 163 +++ backend/src/LibDB/Inserts.fs | 101 +- backend/src/LibDB/Remotes.fs | 56 + backend/src/LibDB/RuntimeTypes.fs | 30 +- backend/src/LibDB/Sync.fs | 617 ++++++++++ backend/src/LibDB/SyncCursors.fs | 53 + .../execution/pre-s-and-s/autosync.dark | 41 + .../pre-s-and-s/conflicts-display.dark | 32 + .../execution/pre-s-and-s/conflicts-list.dark | 65 + .../execution/pre-s-and-s/status-cli.dark | 12 + .../execution/pre-s-and-s/sync-check.dark | 48 + .../execution/pre-s-and-s/sync-cli.dark | 24 + backend/tests/Tests/Remotes.Tests.fs | 71 ++ backend/tests/Tests/SyncIdempotency.Tests.fs | 1051 +++++++++++++++++ backend/tests/Tests/SyncScenarios.Tests.fs | 608 ++++++++++ packages/darklang/cli/conflicts.dark | 113 ++ packages/darklang/cli/core.dark | 6 +- packages/darklang/cli/remote.dark | 90 ++ packages/darklang/cli/scm/branch.dark | 16 +- packages/darklang/cli/scm/status.dark | 22 + packages/darklang/cli/sync.dark | 227 ++++ packages/darklang/sync/api.dark | 182 +++ packages/darklang/sync/autosync.dark | 174 +++ packages/darklang/sync/daemon.dark | 65 + packages/darklang/sync/display.dark | 139 +++ packages/darklang/sync/server.dark | 108 ++ packages/darklang/sync/tailscale.dark | 11 + 33 files changed, 4858 insertions(+), 18 deletions(-) create mode 100644 backend/src/Builtins/Builtins.Matter/Libs/PM/Remotes.fs create mode 100644 backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs create mode 100644 backend/src/LibDB/Conflicts.fs create mode 100644 backend/src/LibDB/Remotes.fs create mode 100644 backend/src/LibDB/Sync.fs create mode 100644 backend/src/LibDB/SyncCursors.fs create mode 100644 backend/testfiles/execution/pre-s-and-s/autosync.dark create mode 100644 backend/testfiles/execution/pre-s-and-s/conflicts-display.dark create mode 100644 backend/testfiles/execution/pre-s-and-s/conflicts-list.dark create mode 100644 backend/testfiles/execution/pre-s-and-s/status-cli.dark create mode 100644 backend/testfiles/execution/pre-s-and-s/sync-check.dark create mode 100644 backend/testfiles/execution/pre-s-and-s/sync-cli.dark create mode 100644 backend/tests/Tests/Remotes.Tests.fs create mode 100644 backend/tests/Tests/SyncIdempotency.Tests.fs create mode 100644 backend/tests/Tests/SyncScenarios.Tests.fs create mode 100644 packages/darklang/cli/conflicts.dark create mode 100644 packages/darklang/cli/remote.dark create mode 100644 packages/darklang/cli/sync.dark create mode 100644 packages/darklang/sync/api.dark create mode 100644 packages/darklang/sync/autosync.dark create mode 100644 packages/darklang/sync/daemon.dark create mode 100644 packages/darklang/sync/display.dark create mode 100644 packages/darklang/sync/server.dark create mode 100644 packages/darklang/sync/tailscale.dark diff --git a/backend/src/Builtins/Builtins.Http.Client/Libs/HttpClient.fs b/backend/src/Builtins/Builtins.Http.Client/Libs/HttpClient.fs index 725dabff76..bd916c3edb 100644 --- a/backend/src/Builtins/Builtins.Http.Client/Libs/HttpClient.fs +++ b/backend/src/Builtins/Builtins.Http.Client/Libs/HttpClient.fs @@ -761,6 +761,41 @@ let fns (config : Configuration) : List = deprecated = NotDeprecated } + { name = fn "httpClientGetUnsafe" 0 + typeParams = [] + parameters = + [ Param.make + "uri" + TString + "URL to GET with SSRF guards OFF (loopback/private/tailnet allowed)" ] + returnType = TypeReference.result TString TString + description = + "GET with NO SSRF guards — loopback, RFC-1918, and Tailscale (100.64/10) + addresses are reachable, unlike the guarded `httpClient.request`. TRUSTED-CLI use only + (the caller IS the code author): used by `dark sync pull ` to reach a tailnet peer's + sync server. Returns the response body as a String (Ok) or an error message (Error)." + fn = + let looseClient = BaseClient.create looseConfig + let resultOk = Dval.resultOk KTString KTString + let resultError = Dval.resultError KTString KTString + (function + | _, _, _, [ DString uri ] -> + uply { + let request : Request = + { url = uri; method = HttpMethod "GET"; headers = []; body = [||] } + let! response = makeRequest looseConfig looseClient request + match response with + | Ok r -> + return resultOk (DString(System.Text.Encoding.UTF8.GetString r.body)) + | Error _ -> return resultError (DString "sync fetch failed") + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.Needs.http + deprecated = NotDeprecated } + + // —————————————————————————————————————————————————————————— // Streaming HTTP. // diff --git a/backend/src/Builtins/Builtins.Matter/Builtin.fs b/backend/src/Builtins/Builtins.Matter/Builtin.fs index 1c1322e5f7..4d0ae950d4 100644 --- a/backend/src/Builtins/Builtins.Matter/Builtin.fs +++ b/backend/src/Builtins/Builtins.Matter/Builtin.fs @@ -25,6 +25,8 @@ let builtins (pm : PT.PackageManager) : Builtins = Libs.PM.Dependencies.builtins () Libs.PM.Seed.builtins Libs.PM.Caps.builtins + Libs.PM.Sync.builtins () + Libs.PM.Remotes.builtins // Traces (reader surface) Libs.Traces.builtins () diff --git a/backend/src/Builtins/Builtins.Matter/Builtins.Matter.fsproj b/backend/src/Builtins/Builtins.Matter/Builtins.Matter.fsproj index 829ecc0971..003d2de165 100644 --- a/backend/src/Builtins/Builtins.Matter/Builtins.Matter.fsproj +++ b/backend/src/Builtins/Builtins.Matter/Builtins.Matter.fsproj @@ -20,6 +20,8 @@ + + diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Remotes.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Remotes.fs new file mode 100644 index 0000000000..ed478dfda4 --- /dev/null +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Remotes.fs @@ -0,0 +1,78 @@ +/// Builtins behind `dark remote` — the registry of sync peers the tailnet daemon polls. Thin +/// wrappers over `LibDB.Remotes`; the pull/apply sync surface lives in `Libs.PM.Sync`. +module Builtins.Matter.Libs.PM.Remotes + +open Prelude +open LibExecution.RuntimeTypes + +module Dval = LibExecution.Dval +module Builtin = LibExecution.Builtin + +open Builtin.Shortcuts + + +let fns : List = + [ { name = fn "pmRemoteAdd" 0 + typeParams = [] + parameters = [ Param.make "name" TString ""; Param.make "url" TString "" ] + returnType = TUnit + description = + "Register (or update) a sync remote by name. `url` is the pollable target (an http(s) URL or a + local data.db path). Idempotent by name." + fn = + (function + | _, _, _, [ DString name; DString url ] -> + uply { + do! LibDB.Remotes.add name url + return DUnit + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmRemoteList" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TList TString + description = + "Registered sync remotes, one `name → url` line each (for `dark remote list`). Empty if none." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! remotes = LibDB.Remotes.list () + let lines = + remotes |> List.map (fun (name, url) -> DString $"{name} → {url}") + return Dval.list KTString lines + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmRemoteRemove" 0 + typeParams = [] + parameters = [ Param.make "name" TString "" ] + returnType = TBool + description = + "Unregister a sync remote by name. Returns true if it existed (its sync cursor, if any, stays)." + fn = + (function + | _, _, _, [ DString name ] -> + uply { + let! existed = LibDB.Remotes.remove name + return DBool existed + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } ] + + +let builtins = LibExecution.Builtin.make [] fns diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Seed.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Seed.fs index 6917905bb0..9c391e3ee8 100644 --- a/backend/src/Builtins/Builtins.Matter/Libs/PM/Seed.fs +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Seed.fs @@ -31,6 +31,58 @@ let fns : List = sqlSpec = NotQueryable previewable = Impure capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmRebuildProjections" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TInt64 + description = + "Drop every regenerable projection table and re-fold the entire `package_ops` log to + rebuild them (the ops⊥projections recovery path — `dark branch rebuild`). Projections + are non-authoritative: losing one costs only the CPU to re-fold from the canonical op + log. Returns the number of ops re-folded." + fn = + (function + | exeState, _, _, [ DUnit ] -> + uply { + let! refolded = LibDB.Seed.rebuildProjections () + // The structural fold restores `package_values` rows with NULL `rt_dval` + // (evaluation is a separate phase that normally runs at startup over unapplied + // ops). Re-evaluate here so values are readable right after a rebuild — otherwise + // reading any value hits a NULL rt_dval. Mirrors `pmAddOps`' post-insert eval. + let builtins : Builtins = + { values = exeState.values.builtIn; fns = exeState.fns.builtIn } + let! _ = LibDB.Seed.evaluateAllValues builtins LibDB.PackageManager.rt + return DInt64 refolded + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmProjectionStatus" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TTuple(TInt64, TInt64, []) + description = + "Projection-currency counters for `dark status`: `(opsCount, foldedThrough)` — total ops + in the canonical `package_ops` log vs how many are folded into the projections. Equal + means the cache is current; a gap means ops await folding (`branch rebuild` / restart)." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! (total, folded) = LibDB.Seed.projectionStatus () + return DTuple(DInt64 total, DInt64 folded, []) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps deprecated = NotDeprecated } ] let builtins = LibExecution.Builtin.make [] fns diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs new file mode 100644 index 0000000000..5a6cc953d3 --- /dev/null +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs @@ -0,0 +1,582 @@ +/// Sync surface — pull another instance's package ops into this one. +module Builtins.Matter.Libs.PM.Sync + +open Prelude +open LibExecution.RuntimeTypes + +module Builtin = LibExecution.Builtin +module PT = LibExecution.ProgramTypes +module Dval = LibExecution.Dval +module VT = LibExecution.ValueType + +open Builtin.Shortcuts + + +let fns () : List = + [ { name = fn "pmSyncPull" 0 + typeParams = [] + parameters = + [ Param.make "sourcePath" TString "Path to a peer's data.db to pull ops from" ] + returnType = TTuple(TInt64, TInt64, []) + description = + "Pull a peer's package ops from a local `data.db` file into this instance: resume from + the stored per-peer cursor, apply the peer's new ops (op log + projections, idempotent), + and persist the advanced cursor. Returns `(newCursor, divergenceCount)` — the peer's + last applied rowid, and how many name→hash divergences were surfaced (never blocks)." + fn = + (function + | exeState, vm, _, [ DString sourcePath ] -> + uply { + let! (newCursor, divergences) = LibDB.Sync.pullFromFile sourcePath + // Route each surfaced divergence through the runtime conflict-dispatch seam. The + // default policy keeps today's behavior (surface-as-data, LWW stands); a sync policy + // can keep-local. branchId = the puller's current branch. + let callCtx : CallContext = + { branchId = exeState.branchId; threadID = vm.threadID } + let! _reconciled = + LibDB.Sync.routeDivergences + exeState.conflictDispatch + callCtx + sourcePath + exeState.branchId + divergences + return + DTuple(DInt64 newCursor, DInt64(int64 (List.length divergences)), []) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + // ── HTTP transport — server read + client apply, base64 over the wire ── + { name = fn "pmSyncOpsSince" 0 + typeParams = [] + parameters = + [ Param.make + "cursor" + TInt64 + "Resume point — the last rowid the puller already has" ] + returnType = TString + description = + "The sync SERVER read (the `GET /sync/events?since=cursor` body): the ops the puller + hasn't seen, encoded as a base64 wire batch. A Darklang HTTP router returns this string; + the puller decodes + applies it via `pmSyncApplyWire`." + fn = + (function + | _, _, _, [ DInt64 cursor ] -> + uply { + let! ops = LibDB.Sync.opsToSend cursor + return DString(System.Convert.ToBase64String(LibDB.Sync.encodeBatch ops)) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncOpsSinceCommitted" 0 + typeParams = [] + parameters = + [ Param.make + "cursor" + TInt64 + "Resume point — the last rowid the puller already has" ] + returnType = TString + description = + "The COMMITTED-ONLY server read (`GET /sync/events?since=cursor&committed=1`): like + `pmSyncOpsSince` but ships only ops belonging to a commit (excludes WIP) — the multi-author + scope, so a coworker syncs your committed history, not your uncommitted mid-edits." + fn = + (function + | _, _, _, [ DInt64 cursor ] -> + uply { + let! ops = LibDB.Inserts.opsSinceCommitted cursor + return DString(System.Convert.ToBase64String(LibDB.Sync.encodeBatch ops)) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncApplyWire" 0 + typeParams = [] + parameters = + [ Param.make + "remote" + TString + "Peer identity (e.g. its URL) — keys the resume cursor" + Param.make + "wireB64" + TString + "A base64 wire batch from the peer's `pmSyncOpsSince`" ] + returnType = TTuple(TInt64, TInt64, []) + description = + "The sync CLIENT apply (after `httpRequest`-ing a peer's `/sync/events`): decode the + base64 wire batch and apply it into this instance (op log + projections, idempotent), + advancing this peer's cursor. Returns `(newCursor, divergenceCount)` — divergences + surfaced (never blocked), same as the file pull." + fn = + (function + | exeState, vm, _, [ DString remote; DString wireB64 ] -> + uply { + let bytes = System.Convert.FromBase64String wireB64 + let! (newCursor, divergences) = + LibDB.Sync.applyWireBatch remote PT.mainBranchId None bytes + // Same dispatch routing as the file pull (the wire batch applies on main). + let callCtx : CallContext = + { branchId = exeState.branchId; threadID = vm.threadID } + let! _reconciled = + LibDB.Sync.routeDivergences + exeState.conflictDispatch + callCtx + remote + PT.mainBranchId + divergences + return + DTuple(DInt64 newCursor, DInt64(int64 (List.length divergences)), []) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncCursorFor" 0 + typeParams = [] + parameters = + [ Param.make + "remote" + TString + "Peer identity (path or URL) — the resume cursor key" ] + returnType = TInt64 + description = + "The stored resume cursor for this peer — the last rowid we've applied from it, or 0 if + never synced. The HTTP client passes this as `?since=` so the server returns only the + ops we don't yet have (incremental pull instead of the whole log every time)." + fn = + (function + | _, _, _, [ DString remote ] -> + uply { + let! cursor = LibDB.SyncCursors.cursorFor remote + return DInt64 cursor + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncOpKindsSince" 0 + typeParams = [] + parameters = + [ Param.make "sourcePath" TString "A peer's data.db path" + Param.make "sinceCursor" TInt64 "Only ops with rowid above this" ] + returnType = TList TString + description = + "The item kinds ('fn'/'type'/'value') of the naming ops a file peer has above sinceCursor — + for `dark sync pull`'s breakdown. One entry per SetName (each item named once, no + double-count). File peers only." + fn = + (function + | _, _, _, [ DString sourcePath; DInt64 sinceCursor ] -> + uply { + let! kinds = LibDB.Sync.opKindsSince sourcePath sinceCursor + return Dval.list KTString (kinds |> List.map DString) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncStatus" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TList TString + description = + "Every peer this instance has synced with and how far (the last applied rowid) — one + pre-formatted line per peer, for `dark sync status`. Empty if nothing's been synced." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! cursors = LibDB.SyncCursors.listCursors () + let peerLines = + cursors + |> List.map (fun (remote, c) -> + DString $"{remote} — synced through op {string c}") + // registered remotes (`dark remote add`) that have NO cursor yet — show them as pending + // so a freshly-added peer appears in status (the daemon already polls it) instead of + // silently absent until its first pull. + let! registered = LibDB.Remotes.list () + let syncedSet = cursors |> List.map fst |> Set.ofList + let registeredLines = + registered + |> List.filter (fun (_name, url) -> not (Set.contains url syncedSet)) + |> List.map (fun (name, url) -> + DString $"{url} — registered as '{name}', not yet synced") + // proactively RAISE unacknowledged conflicts here so `dark sync status` flags them + // without the user having to remember to run `dark conflicts` — the common path is a + // glance at status, see the pending count, then go ack. + let! conflicts = LibDB.Conflicts.list () + let pending = + conflicts + |> List.filter (fun (c : LibDB.Conflicts.Conflict) -> + not c.acknowledged) + let conflictLines = + if List.isEmpty pending then + [] + else + [ DString + $"⚠ {List.length pending} unacknowledged sync conflict(s) — run `dark conflicts`" ] + return Dval.list KTString (peerLines @ registeredLines @ conflictLines) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncRemotes" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TList TString + description = + "Every peer a tailnet-wide daemon polls: the UNION of remotes we have a sync cursor for + (pulled at least once) and remotes explicitly registered via `dark remote add`. De-duped; + empty if nothing's been synced or registered yet." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! cursors = LibDB.SyncCursors.listCursors () + let! registered = LibDB.Remotes.urls () + // union: cursor-peers (implicit, from pulling) + registered remotes (explicit) — so a + // `remote add`ed peer is polled even before its first manual pull, and neither path drops. + let remotes = + (cursors |> List.map fst) @ registered + |> List.distinct + |> List.map DString + return Dval.list KTString remotes + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncHealth" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TString + description = + "Liveness + state probe for the sync SERVER (the `GET /sync/health` body): confirms the + server is up, its RELEASE (the op-format/wire version — the single coordinate that gates + cross-instance sync), and how many ops its canonical log holds. So a puller (or a person) + can check readiness, see convergence (server ops vs local ops), AND detect a version skew + before pulling. `release` comes first so the older `ops=` parser stays valid." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! (opsCount, _folded) = LibDB.Seed.projectionStatus () + return + DString + $"sync-server ok; release={string LibDB.Sync.wireFormatVersion}; ops={string opsCount}" + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncReleaseVersion" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TInt64 + description = + "THIS instance's sync RELEASE (the op-format/wire version). The single coordinate that must + match a peer's for sync to proceed — the wire gate refuses a mismatched batch (fail-closed, + no corruption). Compare against a peer's `/sync/health` `release=` to detect a skew and tell + the user to upgrade, rather than surfacing a raw decode failure." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { return DInt64(int64 LibDB.Sync.wireFormatVersion) } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + // ── HTTP blob channel (sender) — content addressed, mirrors the file pull's blob fetch ── + { name = fn "pmSyncBlobHashes" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TString + description = + "The blob MANIFEST (the `GET /sync/blobs` body): every content hash this instance holds, + newline-joined. A receiver keeps the ones it lacks (`Blob.missing`) and pulls each one's + bytes via `pmSyncBlobBytes` — the HTTP blob channel, mirroring the file pull's blob fetch." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! hashes = LibDB.RuntimeTypes.Blob.allHashes () + return DString(String.concat "\n" hashes) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncBlobBytes" 0 + typeParams = [] + parameters = [ Param.make "hash" TString "The content hash to fetch" ] + returnType = TString + description = + "The bytes for one content hash, base64-encoded (the `GET /sync/blob?hash=` body), or empty + if this instance lacks it. The receiver base64-decodes + `Blob.insert`s — content-addressed, + so the insert dedups exactly like ops (the blob counterpart to `pmSyncOpsSince`)." + fn = + (function + | _, _, _, [ DString hash ] -> + uply { + match! LibDB.RuntimeTypes.Blob.get hash with + | Some bytes -> return DString(System.Convert.ToBase64String bytes) + | None -> return DString "" + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + // ── HTTP blob channel (receiver) — which of a peer's blobs we lack, + store a fetched one ── + { name = fn "pmBlobMissing" 0 + typeParams = [] + parameters = + [ Param.make + "hashes" + (TList TString) + "A peer's offered content hashes (its manifest)" ] + returnType = TList TString + description = + "Of the peer's offered content hashes, which this instance LACKS — exactly the blobs to + fetch (`pmSyncBlobBytes` each). Over `Blob.missing`; content-addressed, so it's a pure + set-difference (no cursor, unlike the op stream)." + fn = + (function + | _, _, _, [ DList(_, hashDvals) ] -> + uply { + let hashes = + hashDvals + |> List.choose (fun d -> + match d with + | DString s -> Some s + | _ -> None) + let! missing = LibDB.RuntimeTypes.Blob.missing hashes + return Dval.list KTString (missing |> List.map DString) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmBlobInsert" 0 + typeParams = [] + parameters = + [ Param.make "hash" TString "The content hash" + Param.make + "base64Bytes" + TString + "The blob's bytes, base64-encoded (empty = skip)" ] + returnType = TBool + description = + "Store a fetched blob: base64-decode the bytes and insert under its content hash. Idempotent + (content-addressed dedup, same guarantee as ops). Returns true if non-empty bytes were + inserted, false if the peer's body was empty (it lacked the blob)." + fn = + (function + | _, _, _, [ DString hash; DString b64 ] -> + uply { + if b64 = "" then + return DBool false + else + let bytes = System.Convert.FromBase64String b64 + do! LibDB.RuntimeTypes.Blob.insert hash bytes + return DBool true + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + // ── conflicts surface — the recorded, reviewable auto-resolutions (dark conflicts) ── + { name = fn "pmConflictsList" 0 + typeParams = [] + parameters = + [ Param.make + "includeResolved" + TBool + "true = include acked/overridden (history); false = only pending" ] + returnType = + TList( + TTuple(TString, TString, [ TString; TString; TString; TString; TString ]) + ) + description = + "Recorded sync conflicts (auto-resolved name-binding divergences), one STRUCTURED tuple each + for `dark conflicts` to format in Dark (so the display is package-testable + iterable): + `(id, location, status, resolution, localHash, incomingHash, remote)`. `status` is + NEW/acked/overridden; hashes are full (the Dark formatter shortens). `includeResolved=false` + shows only pending (the actionable view — acked/overridden drop out, the ack-to-dismiss + model). Empty if none match." + fn = + (function + | _, _, _, [ DBool includeResolved ] -> + uply { + let! all = LibDB.Conflicts.list () + let conflicts = + if includeResolved then + all + else + all + |> List.filter (fun (c : LibDB.Conflicts.Conflict) -> + not (c.acknowledged || c.overridden)) + let rows = + conflicts + |> List.map (fun c -> + let status = + if c.overridden then "overridden" + elif c.acknowledged then "acked" + else "NEW" + DTuple( + DString c.id, + DString c.location, + [ DString status + DString c.resolution + DString c.localHash + DString c.incomingHash + DString c.remote ] + )) + return + Dval.list + (KTTuple( + VT.string, + VT.string, + [ VT.string; VT.string; VT.string; VT.string; VT.string ] + )) + rows + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmConflictAck" 0 + typeParams = [] + parameters = + [ Param.make + "idPrefix" + TString + "A recorded conflict's id (or a unique prefix of it)" ] + returnType = TBool + description = + "Acknowledge a recorded conflict — 'the auto-resolution was right' (the common case). Matches + by id or a unique id-prefix; returns true if exactly one matched and was acknowledged, + false if none or ambiguous." + fn = + (function + | _, _, _, [ DString idPrefix ] -> + uply { + let! conflicts = LibDB.Conflicts.list () + match conflicts |> List.filter (fun c -> c.id.StartsWith idPrefix) with + | [ c ] -> + do! LibDB.Conflicts.acknowledge c.id + return DBool true + | _ -> return DBool false + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmConflictAckAll" 0 + typeParams = [] + parameters = [ Param.make "unit" TUnit "" ] + returnType = TInt64 + description = + "Acknowledge ALL unacknowledged sync conflicts at once (the bulk 'the auto thing was right' + path). Returns how many were newly acknowledged." + fn = + (function + | _, _, _, [ DUnit ] -> + uply { + let! n = LibDB.Conflicts.acknowledgeAll () + return DInt64(int64 n) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmConflictResolve" 0 + typeParams = [] + parameters = + [ Param.make + "idPrefix" + TString + "A recorded conflict's id (or a unique prefix)" + Param.make + "keepMine" + TBool + "true = re-bind to YOUR hash; false = keep the incoming" ] + returnType = TBool + description = + "Override a recorded conflict's auto-resolution (by id or unique prefix). `keepMine` re-binds + the location to your hash via a new WIP `SetName` op (op-log clean); false keeps the incoming + (which already won). Marks it overridden. True if exactly one matched and was resolved." + fn = + (function + | _, _, _, [ DString idPrefix; DBool keepMine ] -> + uply { + let! conflicts = LibDB.Conflicts.list () + match conflicts |> List.filter (fun c -> c.id.StartsWith idPrefix) with + | [ c ] -> + let! ok = LibDB.Sync.resolveConflict c.id keepMine + return DBool ok + | _ -> return DBool false + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } ] + + +let builtins () : Builtins = LibExecution.Builtin.make [] (fns ()) diff --git a/backend/src/LibDB/Conflicts.fs b/backend/src/LibDB/Conflicts.fs new file mode 100644 index 0000000000..58a8ef2d10 --- /dev/null +++ b/backend/src/LibDB/Conflicts.fs @@ -0,0 +1,163 @@ +/// Sync conflicts — the recorded, reviewable log of auto-resolved name-binding divergences. +/// +/// A conflict is recorded at PULL TIME, where it's the accurate signal: `detectDivergences` knows a +/// pulled `SetName` rebinds a name we'd locally bound to a *different* hash (incoming-vs-local). The +/// pull auto-resolves by policy (default last-writer-wins — the incoming bind already applied) AND +/// records the conflict here, so nothing is *silently* lost: it's raised to the user, who usually +/// acknowledges ("the auto thing was right") and occasionally overrides. +/// +/// Why a recorded log, not a pure op-log projection: everyone's "main" shares the constant +/// `PT.mainBranchId`, so two competing edits are SAME-branch — the log can't distinguish "a peer +/// overwrote me" from "I re-edited." The pull is the one place that knows it was a sync, so the +/// record is captured there. (A true projection would need a per-op source/peer marker.) +/// +/// Local-only, NOT synced. The `sync_conflicts` table lives in `backend/migrations/schema.sql`. +module LibDB.Conflicts + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +/// One recorded conflict: a name we'd bound to `localHash` that a pull from `remote` rebound to +/// `incomingHash`, auto-resolved by `resolution`. `acknowledged` = the user said "auto was right"; +/// `overridden` = the user emitted a different resolution (a reconciling op). +type Conflict = + { id : string + location : string + localHash : string + incomingHash : string + resolution : string + remote : string + acknowledged : bool + overridden : bool } + +/// Record an auto-resolved divergence. Idempotent on the live (location, remote, both hashes): the +/// same conflict re-detected on a re-pull doesn't pile up duplicate rows. +let record + (remote : string) + (location : string) + (localHash : string) + (incomingHash : string) + (resolution : string) + : Task = + task { + // dedup: skip if this exact unresolved conflict is already on record + let! existing = + Sql.query + """ + SELECT id FROM sync_conflicts + WHERE location = @loc AND remote = @remote + AND local_hash = @local AND incoming_hash = @incoming + AND overridden = 0 + LIMIT 1 + """ + |> Sql.parameters + [ "loc", Sql.string location + "remote", Sql.string remote + "local", Sql.string localHash + "incoming", Sql.string incomingHash ] + |> Sql.executeAsync (fun read -> read.string "id") + match existing with + | _ :: _ -> () + | [] -> + let id = System.Guid.NewGuid() |> string + do! + Sql.query + """ + INSERT INTO sync_conflicts + (id, location, local_hash, incoming_hash, resolution, remote) + VALUES (@id, @loc, @local, @incoming, @resolution, @remote) + """ + |> Sql.parameters + [ "id", Sql.string id + "loc", Sql.string location + "local", Sql.string localHash + "incoming", Sql.string incomingHash + "resolution", Sql.string resolution + "remote", Sql.string remote ] + |> Sql.executeStatementAsync + } + +/// All recorded conflicts, newest first — the `dark conflicts` surface. +let list () : Task> = + task { + return! + Sql.query + """ + SELECT id, location, local_hash, incoming_hash, resolution, remote, + acknowledged, overridden + FROM sync_conflicts ORDER BY detected_at DESC + """ + |> Sql.executeAsync (fun read -> + { id = read.string "id" + location = read.string "location" + localHash = read.string "local_hash" + incomingHash = read.string "incoming_hash" + resolution = read.string "resolution" + remote = read.string "remote" + acknowledged = read.int64 "acknowledged" <> 0L + overridden = read.int64 "overridden" <> 0L }) + } + +/// The user agrees with the auto-resolution — stop surfacing it (the common case). +let acknowledge (id : string) : Task = + task { + do! + Sql.query "UPDATE sync_conflicts SET acknowledged = 1 WHERE id = @id" + |> Sql.parameters [ "id", Sql.string id ] + |> Sql.executeStatementAsync + } + +/// Acknowledge ALL currently-unacknowledged conflicts at once — the bulk "yeah, the auto thing was +/// right" path. Returns how many were newly acknowledged. +let acknowledgeAll () : Task = + task { + let! pending = + Sql.query "SELECT COUNT(*) AS n FROM sync_conflicts WHERE acknowledged = 0" + |> Sql.executeAsync (fun read -> read.int64 "n") + do! + Sql.query "UPDATE sync_conflicts SET acknowledged = 1 WHERE acknowledged = 0" + |> Sql.executeStatementAsync + return + (match pending with + | n :: _ -> int n + | [] -> 0) + } + +/// The user chose a different resolution (a reconciling op was emitted) — mark it overridden. +let markOverridden (id : string) : Task = + task { + do! + Sql.query + "UPDATE sync_conflicts SET overridden = 1, acknowledged = 1 WHERE id = @id" + |> Sql.parameters [ "id", Sql.string id ] + |> Sql.executeStatementAsync + } + +/// Mark the most recent un-overridden conflict at a location overridden — used when a resolution +/// POLICY (not a human) keeps local: `Sync.routeDivergences` emitted a reconciling op, so the +/// recorded auto-LWW outcome no longer reflects the live binding. Keyed by remote + location. +let markOverriddenByLocation (remote : string) (location : string) : Task = + task { + do! + Sql.query + """ + UPDATE sync_conflicts SET overridden = 1, acknowledged = 1 + WHERE id = (SELECT id FROM sync_conflicts + WHERE remote = @remote AND location = @loc AND overridden = 0 + ORDER BY detected_at DESC LIMIT 1) + """ + |> Sql.parameters [ "remote", Sql.string remote; "loc", Sql.string location ] + |> Sql.executeStatementAsync + } + +/// Look up one conflict by id (for the resolve flow). +let getById (id : string) : Task> = + task { + let! all = list () + return all |> List.tryFind (fun c -> c.id = id) + } diff --git a/backend/src/LibDB/Inserts.fs b/backend/src/LibDB/Inserts.fs index 12e3120d2d..aee620e0a0 100644 --- a/backend/src/LibDB/Inserts.fs +++ b/backend/src/LibDB/Inserts.fs @@ -29,22 +29,25 @@ let computeOpHash (op : PT.PackageOp) : System.Guid = /// commitHash = None means WIP (commit_hash = NULL), Some id means committed /// Returns the count of ops actually inserted (duplicates are skipped via INSERT OR IGNORE) /// -/// Uses a two-phase approach for consistency: -/// 1. Insert ops with applied=false -/// 2. Apply ops to projection tables -/// 3. Mark ops as applied=true -/// -/// This ensures that if step 2 fails, we can identify unapplied ops and retry/rollback. -let insertAndApplyOps +/// Two steps: (1) insert ops as applied=false, (2) apply them to the projection tables, then mark +/// applied=true. The `applied` flag is ADVISORY — a diagnostic record of which ops cleared the fold +/// — not an automatic recovery trigger: nothing reads `applied = 0` to retry or roll back. The final +/// mark is best-effort (a failure is logged, not fatal; the ops are already applied). +/// Shared impl. `originTs` maps an op-id → the authoring stamp to store; for ops not in the map +/// (the normal local-authoring path) the op self-stamps `now`. The SYNC path supplies the peer's +/// origin_ts here so the op is inserted with its TRUE creation time BEFORE the fold — so playback's +/// `applySetName` orders the binding by creation, not arrival (timestamp-LWW). +let private insertAndApplyOpsImpl (branchId : PT.BranchId) (commitHash : Option) (ops : List) + (originTs : Map) : Task = task { if List.isEmpty ops then return 0L else - // Phase 1: Insert ops with applied=false + // Insert ops as unapplied // Tag all ops in a propagation batch with the same propagation_id. // This allows cleanup of all related ops when undoing a propagation. let batchPropagationId = @@ -62,13 +65,20 @@ let insertAndApplyOps let opBlob = BS.PT.PackageOp.serialize opId op (opId, op, opBlob, batchPropagationId)) + // Base stamp for locally-authored ops. Each op in this batch gets `baseNow + its index` ms, so a + // same-batch rebind of one name (v1 then v2) gets STRICTLY-INCREASING origin_ts — v2 is + // newer-by-creation and wins outright, never hitting playback's same-millisecond hash tie-break + // (which is reserved for genuine cross-instance races). Sub-millisecond authoring would otherwise + // tie, making a local rebind resolve by content hash instead of order. + let baseNow = System.DateTime.UtcNow + let insertStatements = opsWithIds - |> List.map (fun (opId, _op, opBlob, propagationId) -> + |> List.mapi (fun i (opId, _op, opBlob, propagationId) -> let sql = """ - INSERT OR IGNORE INTO package_ops (id, op_blob, branch_id, applied, commit_hash, propagation_id) - VALUES (@id, @op_blob, @branch_id, @applied, @commit_hash, @propagation_id) + INSERT OR IGNORE INTO package_ops (id, op_blob, branch_id, applied, commit_hash, propagation_id, origin_ts) + VALUES (@id, @op_blob, @branch_id, @applied, @commit_hash, @propagation_id, @origin_ts) """ let commitHashParam = @@ -76,12 +86,21 @@ let insertAndApplyOps | Some s -> Sql.string s | None -> Sql.dbnull + // the op's authoring stamp: the peer's value on sync (preserve it), else a strictly-increasing + // local stamp (`baseNow + index`) so same-batch rebinds order by authoring sequence. + let originTsVal = + match Map.tryFind opId originTs with + | Some t -> t + | None -> + baseNow.AddMilliseconds(float i).ToString("yyyy-MM-ddTHH:mm:ss.fffZ") + let parameters = [ "id", Sql.uuid opId "op_blob", Sql.bytes opBlob "branch_id", Sql.uuid branchId "applied", Sql.bool false // Insert as unapplied "commit_hash", commitHashParam + "origin_ts", Sql.string originTsVal "propagation_id", (match propagationId with | Some id -> Sql.uuid id @@ -131,6 +150,24 @@ let insertAndApplyOps return insertedCount } +/// Insert + apply ops authored locally (each self-stamps `now` as its origin_ts). +let insertAndApplyOps + (branchId : PT.BranchId) + (commitHash : Option) + (ops : List) + : Task = + insertAndApplyOpsImpl branchId commitHash ops Map.empty + +/// Insert + apply ops received via SYNC, preserving each op's authoring stamp (`originTs` by op-id) +/// so the fold orders bindings by CREATION time, not arrival (timestamp-LWW, conflicts doc). +let insertAndApplyOpsWithOrigin + (branchId : PT.BranchId) + (commitHash : Option) + (ops : List) + (originTs : Map) + : Task = + insertAndApplyOpsImpl branchId commitHash ops originTs + /// Create a new commit and insert ops with that commit_hash /// Returns the commit Hash @@ -644,3 +681,45 @@ let discardWipOps (branchId : PT.BranchId) : Task> = with ex -> return Error ex.Message } + + +/// Sync read path: all ops with rowid > cursor, in insertion order. +/// Uses SQLite's implicit `rowid` as a monotonic cursor — `package_ops`'s PK is TEXT, so the +/// rowid is a free, strictly-increasing insertion sequence; no `seq` column / migration needed. +/// Returns (rowid, opId, opBlob) per op so a poller advances `since` to the last rowid seen. +let opsSince (cursor : int64) : Task> = + Sql.query + "SELECT rowid, id, origin_ts, op_blob FROM package_ops WHERE rowid > @cursor ORDER BY rowid ASC" + |> Sql.parameters [ "cursor", Sql.int64 cursor ] + |> Sql.executeAsync (fun read -> + (read.int64 "rowid", + read.uuid "id", + read.string "origin_ts", + read.bytes "op_blob")) + +/// COMMITTED-ONLY variant of `opsSince` — only ops belonging to a commit (`commit_hash` set), +/// excluding WIP (`commit_hash IS NULL`). This is the DEFAULT sync read: a peer (another of your +/// devices, or a coworker) syncs your committed history, never your uncommitted mid-edit work. +/// +/// **Cursors on the COMMIT's rowid, not the op's.** A WIP op gets its `package_ops.rowid` at insert +/// time, then a later commit promotes it IN PLACE (the rowid never moves). So an op-rowid cursor can +/// permanently skip an op that commits *after* the cursor already advanced past its rowid (e.g. a +/// fresh-committed op syncs first, lifting the cursor over an older still-WIP op). Cursoring on +/// `commits.rowid` — assigned when the commit is created, monotonic, and shared by every op in the +/// commit — fixes this: a newly-created commit always sorts AFTER everything already synced, and a +/// commit's ops travel together (the unit of sync is the commit). The returned first element is the +/// commit's rowid (the cursor coordinate the receiver stores), not the op's. No schema change. +let opsSinceCommitted + (cursor : int64) + : Task> = + Sql.query + "SELECT c.rowid AS crowid, po.id, po.origin_ts, po.op_blob + FROM package_ops po JOIN commits c ON po.commit_hash = c.hash + WHERE c.rowid > @cursor + ORDER BY c.rowid ASC, po.rowid ASC" + |> Sql.parameters [ "cursor", Sql.int64 cursor ] + |> Sql.executeAsync (fun read -> + (read.int64 "crowid", + read.uuid "id", + read.string "origin_ts", + read.bytes "op_blob")) diff --git a/backend/src/LibDB/Remotes.fs b/backend/src/LibDB/Remotes.fs new file mode 100644 index 0000000000..7adfcc815e --- /dev/null +++ b/backend/src/LibDB/Remotes.fs @@ -0,0 +1,56 @@ +/// Remotes — the registered sync peers. +/// +/// Local-only, NOT synced (a remote list is per-instance setup, like git's). Each entry is a +/// (name, url) the tailnet-wide daemon polls — so you can `dark remote add` a peer and have the +/// daemon sync it WITHOUT a manual pull first. Distinct from `sync_cursors` (poll resume state): +/// cursors are created implicitly by pulling; remotes are explicitly registered. The daemon's +/// poll set is the UNION of the two, so neither path regresses. +/// +/// The `sync_remotes` table lives in `backend/migrations/schema.sql` (created at startup). +module LibDB.Remotes + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +/// Register (or update) a remote by name. Idempotent upsert — re-adding a name updates its url. +let add (name : string) (url : string) : Task = + Sql.query + """ + INSERT INTO sync_remotes (name, url) VALUES (@name, @url) + ON CONFLICT(name) DO UPDATE SET url = excluded.url + """ + |> Sql.parameters [ "name", Sql.string name; "url", Sql.string url ] + |> Sql.executeStatementAsync + +/// Remove a registered remote by name. Returns true if it existed (and was deleted). +let remove (name : string) : Task = + task { + let! before = + Sql.query "SELECT COUNT(*) AS n FROM sync_remotes WHERE name = @name" + |> Sql.parameters [ "name", Sql.string name ] + |> Sql.executeAsync (fun read -> read.int64 "n") + let existed = + (match before with + | n :: _ -> n > 0L + | [] -> false) + do! + Sql.query "DELETE FROM sync_remotes WHERE name = @name" + |> Sql.parameters [ "name", Sql.string name ] + |> Sql.executeStatementAsync + return existed + } + +/// All registered remotes as (name, url), ordered by name for a stable display. +let list () : Task> = + Sql.query "SELECT name, url FROM sync_remotes ORDER BY name" + |> Sql.executeAsync (fun read -> (read.string "name", read.string "url")) + +/// Just the pollable urls of registered remotes — what the daemon adds to its poll set. +let urls () : Task> = + Sql.query "SELECT url FROM sync_remotes ORDER BY url" + |> Sql.executeAsync (fun read -> read.string "url") diff --git a/backend/src/LibDB/RuntimeTypes.fs b/backend/src/LibDB/RuntimeTypes.fs index efe4adddcc..ff33f56a2d 100644 --- a/backend/src/LibDB/RuntimeTypes.fs +++ b/backend/src/LibDB/RuntimeTypes.fs @@ -112,6 +112,26 @@ module Blob = return () } + /// Of `hashes`, the ones this store does NOT already have — the receiver's blob request + /// (sync's fetch-on-miss: `package_blobs` don't ride the op stream). Content-addressed, so + /// a hash we have is identical content; only the genuinely-absent ones need fetching. + let missing (hashes : List) : Ply> = + uply { + let! present = + Sql.query "SELECT hash FROM package_blobs" + |> Sql.executeAsync (fun read -> read.string "hash") + let presentSet = Set.ofList present + return hashes |> List.filter (fun h -> not (Set.contains h presentSet)) + } + + /// Every content hash this store holds — the sync blob MANIFEST (sender side of fetch-on-miss). + let allHashes () : Ply> = + uply { + return! + Sql.query "SELECT hash FROM package_blobs" + |> Sql.executeAsync (fun read -> read.string "hash") + } + /// Walk a Dval tree and collect every `Persistent` blob hash it /// references. Ephemeral blobs aren't rows in `package_blobs` — they @@ -160,10 +180,12 @@ module Blob = /// that might later hold Dvals (User DB rows, `trace_data`) will /// need their own reference-collection pass. /// - /// Idempotent: re-running after a clean sweep deletes nothing. Safe - /// to run while the system is live — worst-case race is a concurrent - /// promote racing the delete, which the foreign-key-style orphan - /// check prevents (content-addressed re-insert is cheap). + /// Idempotent: re-running after a clean sweep deletes nothing. Safe to + /// run while the system is live, with one caveat: the reference scan and + /// the per-orphan DELETE are NOT one transaction, so a value promoted in + /// that window could have its blob swept. Benign because blobs are + /// content-addressed — the next promote re-inserts it cheaply — but run + /// sweeps when writes are quiet if that window matters. /// /// For a package set with N values and M blobs, cost is O(N+M) /// deserialise passes plus one DELETE per orphan. Good enough for diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs new file mode 100644 index 0000000000..8318de5c99 --- /dev/null +++ b/backend/src/LibDB/Sync.fs @@ -0,0 +1,617 @@ +/// The sync receiver/sender API — the cohesive module the Dark HTTP +/// handlers call, tying together the proven primitives (opsSince + insertAndApplyOps + +/// sync cursors) into the two wire operations. +/// +/// A remote op and a local op are the same thing: the receiver replays through the existing +/// `insertAndApplyOps` path (idempotent — `INSERT OR IGNORE INTO package_ops`), then advances +/// this remote's cursor so the next poll resumes after the batch. +module LibDB.Sync + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +module PT = LibExecution.ProgramTypes +module RT = LibExecution.RuntimeTypes +module BS = LibSerialization.Binary.Serialization +module PMBlob = LibDB.RuntimeTypes.Blob + +// ── Wire codec (the HTTP/Tailscale transport) ───────────────────────────────────────── +// The HTTP body for `GET /sync/events` is a batch of `(rowid, id, op_blob)` rows. `op_blob` is +// already the serialized `PackageOp`, so the wire format is just self-describing binary framing +// over the existing bytes — no new per-op serializer. Same bytes the file-based pull +// reads; only the carrier differs. Encode on the server, decode on the client, then apply +// through the identical `pull`/`insertAndApplyOps` path. + +/// The sync wire-format version — the leading int32 of every batch. The op encoding is the +/// serialized `PackageOp` (BUMP this when that, or this framing, changes incompatibly). A peer on a +/// different version produces a different leading int32, so `decodeBatch` rejects it at decode +/// (fail-closed) rather than misreading — a cheap op-format-stability guard, in the +/// wire itself instead of a separate `/whoami` handshake. (Free to add now: no sync is deployed.) +// v2: each op now carries its `origin_ts` ADJACENT to the op (a wire field, never inside the op — +// that would change its content hash). v1 peers (no origin_ts) are rejected by the version gate. +// v3: MEANING-STABLE HASHING — content hashes are now over the alpha-normalized canonical form +// (bound-variable names no longer affect a hash). Every package item's hash changed, so a v2 peer and +// a v3 peer are incompatible; the Release gate pauses sync between them. Crossing v2→v3 is a clean +// break (pre-v3 data is disposable; the store rebuilds from source / re-pulls from a v3 peer) — see +// `Releases` Release 3. +let wireFormatVersion : int = 3 + +/// Encode an op batch to a single self-describing byte buffer: int32 version, int32 count, then per +/// op `rowid:int64 · id:16 bytes · origin_ts:utf8-string · len:int32 · op_blob:len bytes`. +let encodeBatch (ops : List) : byte[] = + use ms = new System.IO.MemoryStream() + use w = new System.IO.BinaryWriter(ms) + w.Write(wireFormatVersion) + w.Write(List.length ops) + for (rowid, id, originTs, blob) in ops do + w.Write(rowid) + w.Write(id.ToByteArray()) + w.Write(originTs) // BinaryWriter.Write(string) is length-prefixed UTF-8 + w.Write(blob.Length) + w.Write(blob) + w.Flush() + ms.ToArray() + +/// Decode a wire buffer produced by `encodeBatch` back to the op batch (inverse of `encodeBatch`). +let decodeBatch (bytes : byte[]) : List = + use ms = new System.IO.MemoryStream(bytes) + use r = new System.IO.BinaryReader(ms) + // version first — a mismatch means the peer speaks a different op encoding; reject rather than + // misread (silent corruption). Fail-closed: the pull errors + the operator learns to upgrade. + let version = r.ReadInt32() + if version <> wireFormatVersion then + Exception.raiseInternal + $"decodeBatch: wire-format version mismatch — peer sent {version}, this instance speaks {wireFormatVersion}. A peer on a different Dark version; upgrade to sync." + [] + let count = r.ReadInt32() + [ for _ in 1..count do + let rowid = r.ReadInt64() + let id = System.Guid(r.ReadBytes 16) + let originTs = r.ReadString() // inverse of BinaryWriter.Write(string) + let len = r.ReadInt32() + let blob = r.ReadBytes len + // BinaryReader.ReadBytes returns FEWER bytes at EOF WITHOUT throwing — so a wire buffer + // truncated mid-blob (a connection dropped mid-response over the tailnet) would silently + // yield a partial, corrupt op. Guard it: fail loudly so the pull errors + retries on the + // next poll instead of applying garbage. (Fixed-width fields above already throw at EOF.) + if blob.Length <> len then + Exception.raiseInternal + $"decodeBatch: truncated wire buffer — expected {len}-byte op blob, got {blob.Length}" + [] + yield (rowid, id, originTs, blob) ] + +/// The sender read (`GET /sync/events?since=cursor`): the COMMITTED ops the puller hasn't seen, as +/// `(commitRowid, id, origin_ts, op_blob)` — `op_blob` + the commit-rowid cursor + the authoring +/// stamp the receiver needs for timestamp-LWW (carried adjacent, not inside the op). +/// +/// **Sync ships committed work only** (the unit of sync is the commit; WIP stays local — see +/// `opsSinceCommitted`). A peer never receives your uncommitted mid-edit state; it converges on the +/// history you've chosen to publish by committing. +let opsToSend (cursor : int64) : Task> = + Inserts.opsSinceCommitted cursor + +/// Detect sync divergences in a remote batch BEFORE applying it. For each incoming +/// `SetName`, if the location is already bound LOCALLY to a *different*, non-deprecated hash, +/// two peers gave the same name different content. Returns `(location, existingHash, +/// incomingHash)` per divergence — surfaced as **data** so the receiver never blocks; a higher +/// layer turns these into `Conflict.CSyncDivergence` for the resolution policy. +/// Internal core of both `detectDivergences` (which stringifies these) and `reconcileBatch` (which +/// turns them into reconciling ops): for each incoming `SetName` whose location is locally bound to +/// a DIFFERENT non-deprecated hash, the `(incoming op, existing local hash)` pair. +let private divergentBindings + (branchId : PT.BranchId) + (ops : List) + : Task> = + task { + let result = ResizeArray() + for op in ops do + match op with + | PT.PackageOp.SetName(loc, target) -> + let modulesStr = String.concat "." loc.modules + let (PT.Hash incomingHash) = target.hash + // the current (non-superseded) binding at this location, if any + let! existing = + Sql.query + """ + SELECT item_hash FROM locations + WHERE owner = @owner AND modules = @modules AND name = @name + AND branch_id = @branch_id AND unlisted_at IS NULL + LIMIT 1 + """ + |> Sql.parameters + [ "owner", Sql.string loc.owner + "modules", Sql.string modulesStr + "name", Sql.string loc.name + "branch_id", Sql.uuid branchId ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + match existing with + | existingHash :: _ when existingHash <> incomingHash -> + result.Add((op, existingHash)) + | _ -> () + | _ -> () + return List.ofSeq result + } + +let detectDivergences + (branchId : PT.BranchId) + (ops : List) + : Task> = + task { + let! pairs = divergentBindings branchId ops + return + pairs + |> List.map (fun (op, existingHash) -> + match op with + | PT.PackageOp.SetName(loc, target) -> + let modulesStr = String.concat "." loc.modules + let (PT.Hash incomingHash) = target.hash + let locStr = + if modulesStr = "" then + $"{loc.owner}.{loc.name}" + else + $"{loc.owner}.{modulesStr}.{loc.name}" + (locStr, existingHash, incomingHash) + // divergentBindings only ever returns SetName ops, so this is unreachable + | _ -> ("", existingHash, "")) + } + + +/// The receiver apply (`POST /sync/events`): insert + apply a remote batch via the existing +/// playback path (idempotent), then advance this remote's cursor to the **max sender-rowid** +/// in the batch so the next poll resumes after it. Returns the new cursor. An empty batch is a +/// no-op (cursor unchanged). The `rowid` paired with each op is the *sender's* rowid (from its +/// `opsSince`), which is what the cursor tracks for resuming the poll. +/// Record each auto-resolved divergence in the conflict store so it's REVIEWABLE (`dark conflicts`) +/// rather than silently lost. The auto-resolution is last-writer-wins — the incoming bind has just +/// applied — so we log `(location, what-we-had, what-they-sent, "auto: last-writer-wins")`. The user +/// usually acknowledges; occasionally overrides (`dark conflicts resolve … mine`). +/// The live binding hash at an FQ "owner[.modules].name" location, if any. Parses the FQ string the +/// way `resolveConflict` does (owner = head, name = last, modules = the middle). +// NOTE: reads the live binding ACROSS branches (no `branch_id` filter). Correct while sync is +// main-only — the same FQN is live on at most one branch — but when cross-branch sync lands this must +// take the divergence's `branch_id` so it logs the right branch's outcome. `ORDER BY rowid DESC` keeps +// the `LIMIT 1` deterministic (newest row) in the meantime. +let private liveBindingHash (location : string) : Task> = + task { + match location.Split('.') |> List.ofArray with + | owner :: rest -> + match List.rev rest with + | name :: revModules -> + let modulesStr = revModules |> List.rev |> String.concat "." + let! rows = + Sql.query + """ + SELECT item_hash FROM locations + WHERE owner = @o AND modules = @m AND name = @n AND unlisted_at IS NULL + ORDER BY rowid DESC + LIMIT 1 + """ + |> Sql.parameters + [ "o", Sql.string owner + "m", Sql.string modulesStr + "n", Sql.string name ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + return List.tryHead rows + | [] -> return None + | _ -> return None + } + +let recordDivergences + (remote : string) + (divergences : List) + : Task = + task { + for (location, localHash, incomingHash) in divergences do + // record the ACTUAL outcome after timestamp-LWW (the fold may have kept the local op if the + // incoming was older-by-creation) — not a blanket "incoming won". Read the live binding. + let! winner = liveBindingHash location + let resolution = + if winner = Some incomingHash then + "auto: timestamp-LWW — incoming won (newer creation)" + elif winner = Some localHash then + "auto: timestamp-LWW — kept local (newer creation)" + else + "auto: timestamp-LWW" + do! Conflicts.record remote location localHash incomingHash resolution + } + + +/// The item kind of a content hash — needed to rebuild a `SetName` Reference for a keep-local +/// reconcile (the transport surfaced the divergence as data, so the original op isn't retained). +/// Reads `locations.item_type` first — that's the kind of the very binding we're restoring, present +/// even for a binding the incoming since superseded (its row is unlisted, not gone) — then falls +/// back to the projection tables. None if the hash is unknown to all of them. +let private kindOfHash (hash : string) : Task> = + task { + let! rows = + Sql.query + """ + SELECT item_type AS k FROM locations WHERE item_hash = @h + UNION ALL SELECT 'fn' FROM package_functions WHERE hash = @h + UNION ALL SELECT 'type' FROM package_types WHERE hash = @h + UNION ALL SELECT 'value' FROM package_values WHERE hash = @h + LIMIT 1 + """ + |> Sql.parameters [ "h", Sql.string hash ] + |> Sql.executeAsync (fun read -> read.string "k") + return rows |> List.tryHead |> Option.map PT.ItemKind.fromString + } + +/// Parse an FQ "owner[.modules].name" location back into a `PackageLocation` (owner = head, +/// name = last, modules = the middle) — the inverse of `detectDivergences`' stringification. +let private parseLocation (location : string) : Option = + match location.Split('.') |> List.ofArray with + | owner :: rest -> + match List.rev rest with + | name :: revModules -> + Some { owner = owner; modules = List.rev revModules; name = name } + | [] -> None + | _ -> None + +/// Route each detected divergence through the runtime conflict-dispatch seam +/// (`exeState.conflictDispatch`). This is the "higher layer" the transport defers to: the receiver +/// surfaces each `name → two hashes` divergence as data (never blocks); HERE it becomes a first-class +/// `Conflict.CSyncDivergence` the runtime policy resolves — +/// - default policy (`FailLoudly`) → no reconciling op: the divergence stays surfaced and the +/// timestamp-LWW outcome the fold already applied stands. Behaviorally unchanged (the timestamp-LWW outcome already applied stands). +/// - a sync policy may return `RSubstitute (DString hash)`: +/// · hash = the LOCAL (existing) hash → KEEP LOCAL: emit + apply a reconciling `SetName` +/// re-binding the location to our hash (a fresh op that also propagates the decision to +/// peers, like a human override), and mark the recorded conflict overridden. +/// · hash = the incoming hash / anything else → no-op: the incoming bind already applied. +/// `branchId` is the branch the reconcile op is written to (the receiver's current branch — sync +/// divergences are name bindings, applied on the branch the puller is on). Returns the number of +/// divergences the policy actively reconciled (0 under the default). +let routeDivergences + (dispatch : RT.ConflictDispatch) + (callCtx : RT.CallContext) + (remote : string) + (branchId : PT.BranchId) + (divergences : List) + : Task = + task { + let mutable reconciled = 0 + for (location, existingHash, incomingHash) in divergences do + let conflict = RT.CSyncDivergence(location, existingHash, incomingHash) + let! resolution = dispatch conflict callCtx |> Ply.toTask + match resolution with + | RT.RSubstitute(RT.DString keepHash) when keepHash = existingHash -> + // keep local: re-bind the location to our existing hash, overriding the incoming bind. + // This is the same move as a human 'mine' override (`resolveConflict`): the `SetName` to our + // hash is content-identical to the op that first bound it, so it's already in the log and a + // fresh insert would `INSERT OR IGNORE`-dedup. RE-STAMP that op's `origin_ts` to now (so it + // wins timestamp-LWW, and the newer stamp rides sync so peers re-adopt our hash too) and + // RE-FOLD it directly via `applyOps` (which re-runs `applySetName`, un-listing the incoming + // row and re-activating ours) — `insertAndApplyOps` only folds NEWLY-inserted ops. + // The re-fold below re-binds locally regardless of whether the re-stamp matched a row, so the + // worst case (the original op somehow absent from the log) is a non-propagating override, never + // a wrong local binding. + match! kindOfHash existingHash with + | Some kind -> + match parseLocation location with + | Some loc -> + let mineOp = + PT.PackageOp.SetName( + loc, + PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) + ) + do! + Sql.query + "UPDATE package_ops SET origin_ts = strftime('%Y-%m-%dT%H:%M:%fZ','now') WHERE id = @id" + |> Sql.parameters [ "id", Sql.uuid (Inserts.computeOpHash mineOp) ] + |> Sql.executeStatementAsync + do! PackageOpPlayback.applyOps branchId None [ mineOp ] + do! Conflicts.markOverriddenByLocation remote location + reconciled <- reconciled + 1 + | None -> () + | None -> () + | _ -> () // RFailLoudly / RSubstitute(incoming|other) → surfaced-as-data, LWW stands + return reconciled + } + +let applyRemoteOps + (remote : string) + (branchId : PT.BranchId) + (commitHash : Option) + (opsWithRowids : List) + : Task> = + task { + match opsWithRowids with + | [] -> + let! cursor = SyncCursors.cursorFor remote + return (cursor, []) + | _ -> + let ops = opsWithRowids |> List.map (fun (_, _, op) -> op) + // detect divergences BEFORE applying (checks current local bindings) — same as `pull`; + // surfaced as data, never blocks the apply. + let! divergences = detectDivergences branchId ops + // carry the peer's origin_ts (adjacent, from the wire) into the fold so playback orders the + // binding by CREATION time, not arrival — the HTTP path now matches the file pull. + let originTsMap = + opsWithRowids + |> List.map (fun (_, originTs, op) -> (Inserts.computeOpHash op, originTs)) + |> Map.ofList + let! _ = + Inserts.insertAndApplyOpsWithOrigin branchId commitHash ops originTsMap + let maxRowid = + opsWithRowids |> List.map (fun (rowid, _, _) -> rowid) |> List.max + do! SyncCursors.advanceCursor remote maxRowid + // record the auto-resolved divergences (recordDivergences reads the live binding so it logs + // the actual timestamp-LWW outcome) — reviewable, not silently lost + do! recordDivergences remote divergences + return (maxRowid, divergences) + } + +/// The file-based pull: PULL another instance's ops into THIS instance by reading the peer's +/// op log directly (a second `data.db`, opened read-only) — no wire. Reads the source ops with +/// `rowid > sinceCursor` (ascending) and applies each through the **same `insertAndApplyOps` +/// the wire receiver uses** — so the local op LOG is written (making us a re-serving peer) AND +/// projections fold, idempotently (`INSERT OR IGNORE` by op-id). Returns the new cursor (max +/// source rowid applied, or `sinceCursor` if none). +/// +/// Ops are grouped by `(branchId, commitHash)` — `insertAndApplyOps` takes one of each per +/// batch — preserving source-rowid order within each group. This is the engine of +/// `dark sync pull `; the HTTP transport only swaps the source connection for a wire +/// body, applied through this same path. +let pull + (sourceConnStr : string) + (sinceCursor : int64) + : Task> = + task { + // read the source's ops above the cursor, into memory, then close the reader (the apply + // path uses the local global connection, independent of this source connection). + use source = new Microsoft.Data.Sqlite.SqliteConnection(sourceConnStr) + source.Open() + // The peer's authoring stamp travels ADJACENT to the op (a column), never inside it — putting + // it in the op would change the content hash and break idempotent dedup. `origin_ts` is part of + // the `package_ops` schema (the migration guarantees it), so we read it directly. + // Committed ops only, cursored on the COMMIT's rowid (see `Inserts.opsSinceCommitted`): the unit + // of sync is the commit, and the commit-rowid is monotonic at commit-creation so a later commit + // never sorts before one already pulled. WIP (commit_hash IS NULL) stays local. + use cmd = source.CreateCommand() + cmd.CommandText <- + "SELECT c.rowid AS crowid, po.id, po.op_blob, po.branch_id, po.commit_hash, po.origin_ts " + + "FROM package_ops po JOIN commits c ON po.commit_hash = c.hash " + + "WHERE c.rowid > $cursor ORDER BY c.rowid ASC, po.rowid ASC" + cmd.Parameters.AddWithValue("$cursor", sinceCursor) + |> ignore + + // each row carries the peer's origin_ts so it travels with the op (timestamp-LWW: portable + // authoring time, preserved on receive — not re-stamped to pull-time). + let rows = + ResizeArray * + string>() + use reader = cmd.ExecuteReader() + while reader.Read() do + let commitHash = if reader.IsDBNull 4 then None else Some(reader.GetString 4) + rows.Add( + reader.GetInt64 0, + System.Guid.Parse(reader.GetString 1), + reader.GetFieldValue 2, + System.Guid.Parse(reader.GetString 3), + commitHash, + reader.GetString 5 + ) + reader.Close() + + let rowList = List.ofSeq rows + + // apply per distinct (branchId, commitHash) batch through the existing receiver path — + // writes the canonical op log + folds projections, idempotent by op-id. (`rowList` is in + // rowid order, so each filtered batch preserves it.) + let batches = + rowList + |> List.map (fun (_, _, _, branchId, commitHash, _) -> (branchId, commitHash)) + |> List.distinct + let divergences = ResizeArray() + for (branchId, commitHash) in batches do + let batchRows = + rowList + |> List.filter (fun (_, _, _, b, c, _) -> b = branchId && c = commitHash) + let ops = + batchRows + |> List.map (fun (_, id, blob, _, _, _) -> + BS.PT.PackageOp.deserialize id blob) + // surface divergences BEFORE applying — `detectDivergences` checks the *current* local + // binding, so it must run before `insertAndApplyOps` rebinds (last-writer). Surfaced as + // data; never blocks the apply (divergence is data, resolved later, never blocking the apply). + let! batchDivs = detectDivergences branchId ops + divergences.AddRange batchDivs + // insert + fold with the peer's ORIGIN timestamp per op (vs the local-insert default), so the + // op is in package_ops with its TRUE creation time BEFORE playback folds it — that's what lets + // `applySetName` order this binding by creation, not arrival, and skip a stale (older-created, + // late-arriving) SetName. Every instance agrees on each op's origin_ts → all converge. + let originTsMap = + batchRows |> List.map (fun (_, id, _, _, _, ts) -> (id, ts)) |> Map.ofList + let! _ = + Inserts.insertAndApplyOpsWithOrigin branchId commitHash ops originTsMap + () + + let cursor = + rowList + |> List.fold (fun acc (rowid, _, _, _, _, _) -> max acc rowid) sinceCursor + return (cursor, List.ofSeq divergences) + } + +/// Fetch the content blobs a peer has that we lack — the fetch-on-miss half of sync, since +/// `package_blobs` (large/binary content) don't ride the op stream. Reads the peer's blob +/// hashes, keeps the ones `Blob.missing` says we don't have, and copies their bytes over +/// (content-addressed `INSERT OR IGNORE`, so idempotent). Returns the count fetched. +let private pullBlobsFromStore (sourceConnStr : string) : Task = + task { + use source = new Microsoft.Data.Sqlite.SqliteConnection(sourceConnStr) + source.Open() + + use hashCmd = source.CreateCommand() + hashCmd.CommandText <- "SELECT hash FROM package_blobs" + let peerHashes = ResizeArray() + use reader = hashCmd.ExecuteReader() + while reader.Read() do + peerHashes.Add(reader.GetString 0) + reader.Close() + + let! missing = PMBlob.missing (List.ofSeq peerHashes) |> Ply.toTask + + let mutable fetched = 0 + for h in missing do + use bytesCmd = source.CreateCommand() + bytesCmd.CommandText <- "SELECT bytes FROM package_blobs WHERE hash = $h" + bytesCmd.Parameters.AddWithValue("$h", h) + |> ignore + let result = bytesCmd.ExecuteScalar() + if not (isNull result) then + do! PMBlob.insert h (result :?> byte[]) |> Ply.toTask + fetched <- fetched + 1 + return fetched + } + +/// `dark sync pull `, F# half: resume from the stored cursor for this peer, +/// `pull` its new ops into the local instance, fetch any content blobs we're missing, then +/// persist the advanced cursor — so the next pull resumes where this left off. The peer key is +/// the source path. Returns the new cursor. +let pullFromFile + (sourcePath : string) + : Task> = + task { + let connStr = $"Data Source={sourcePath};Mode=ReadOnly" + let! cursor = SyncCursors.cursorFor sourcePath + let! (newCursor, divergences) = pull connStr cursor + let! _blobsFetched = pullBlobsFromStore connStr + do! SyncCursors.advanceCursor sourcePath newCursor + // auto-resolved (last-writer-wins) — record so it's reviewable, not silently lost + do! recordDivergences sourcePath divergences + return (newCursor, divergences) + } + +/// The display kinds of the ops a peer has above `sinceCursor` — feeds `dark sync pull`'s breakdown. +/// Reads the source (read-only) and, for each `SetName`, returns its target item kind +/// ("fn"/"type"/"value"). Counting the naming SetName — each item is named exactly once — gives the +/// per-kind item count with NO Add+SetName double-count and no pairing logic; non-naming ops are +/// omitted. File peers only (an HTTP wire batch isn't re-readable after the pull). +let opKindsSince (sourcePath : string) (sinceCursor : int64) : Task> = + task { + let connStr = $"Data Source={sourcePath};Mode=ReadOnly" + use source = new Microsoft.Data.Sqlite.SqliteConnection(connStr) + source.Open() + use cmd = source.CreateCommand() + cmd.CommandText <- + "SELECT id, op_blob FROM package_ops WHERE rowid > $cursor ORDER BY rowid ASC" + cmd.Parameters.AddWithValue("$cursor", sinceCursor) + |> ignore + + let kinds = ResizeArray() + use reader = cmd.ExecuteReader() + while reader.Read() do + let id = System.Guid.Parse(reader.GetString 0) + let blob = reader.GetFieldValue 1 + match BS.PT.PackageOp.deserialize id blob with + | PT.PackageOp.SetName(_loc, target) -> kinds.Add(target.kind.toString ()) + | _ -> () + reader.Close() + return List.ofSeq kinds + } + +/// The client apply-half of the HTTP transport: decode a wire batch (the body of a +/// `GET /sync/events` response, produced by `encodeBatch`) and apply it through the SAME receiver +/// path as the file pull and the wire POST — `applyRemoteOps` (op log + projections, idempotent), +/// advancing this peer's cursor. `branchId`/`commitHash` come from the request context (the +/// 3-field wire carries the ops; the protocol conveys branch + commit per request). Returns the +/// new cursor. So HTTP sync = `httpRequest` the body → `applyWireBatch` — the same fold, a wire +/// source instead of a file. +let applyWireBatch + (remote : string) + (branchId : PT.BranchId) + (commitHash : Option) + (wireBytes : byte[]) + : Task> = + task { + // This F# receiver applies ops only — it does NOT fetch referenced content blobs. The blob + // fetch is a SEPARATE step at the Dark layer: the CLI HTTP pull calls + // `Darklang.Sync.fetchMissingBlobs` after this `applyWire` (GET /sync/blobs → pmBlobMissing → + // GET /sync/blob?hash= + pmBlobInsert), the HTTP counterpart to the file pull's + // `pullBlobsFromStore`. So the file/HTTP blob asymmetry is closed at the caller, not here. + let opsWithRowids = + decodeBatch wireBytes + |> List.map (fun (rowid, id, originTs, blob) -> + (rowid, originTs, BS.PT.PackageOp.deserialize id blob)) + return! applyRemoteOps remote branchId commitHash opsWithRowids + } + + +/// Override a recorded conflict's auto-resolution. `keepMine` re-activates OUR hash at the location +/// by re-folding the `SetName` into projections (LOCAL — the op is already in the log, so this can't +/// create a new syncable op; propagating an override needs a distinguishing op, see the conflict +/// note); `theirs` keeps the incoming (which already won under last-writer-wins). Either way marks +/// the conflict overridden. Returns true if it was found + resolved. +let resolveConflict (conflictId : string) (keepMine : bool) : Task = + task { + match! Conflicts.getById conflictId with + | None -> return false + | Some c -> + if not keepMine then + // "theirs" — the incoming bind already applied (last-writer-wins); just record the choice + do! Conflicts.markOverridden c.id + return true + else + // "mine" — re-bind the location to our hash. Parse the FQ "owner[.modules].name" and read + // the binding's kind + branch from `locations`, then emit + apply a WIP SetName to our hash. + // FQ "owner[.modules].name" → owner = head, name = last, modules = the middle. Reverse-match + // to bind name + modules directly (this codebase's List.head/last return Option). + match c.location.Split('.') |> List.ofArray with + | owner :: rest -> + match List.rev rest with + | name :: revModules -> + let modulesStr = revModules |> List.rev |> String.concat "." + let! meta = + Sql.query + """ + SELECT item_type, branch_id FROM locations + WHERE owner = @o AND modules = @m AND name = @n LIMIT 1 + """ + |> Sql.parameters + [ "o", Sql.string owner + "m", Sql.string modulesStr + "n", Sql.string name ] + |> Sql.executeAsync (fun read -> + (read.string "item_type", (read.uuid "branch_id" : PT.BranchId))) + match meta with + | (itemType, branchId) :: _ -> + let kind = PT.ItemKind.fromString itemType + let modulesList = + if modulesStr = "" then [] else modulesStr.Split('.') |> List.ofArray + let loc : PT.PackageLocation = + { owner = owner; modules = modulesList; name = name } + let target = PT.Reference.fromHashAndKind (PT.Hash c.localHash, kind) + let mineOp = PT.PackageOp.SetName(loc, target) + // A human override is the LATEST decision — RE-STAMP our op's origin_ts to now so it + // wins timestamp-LWW (last-resolver-wins) AND propagates: the re-stamp rides the op on + // sync (preserve-on-receive), so peers see our hash as the most-recent-by-creation and + // adopt it too. (This is also what un-blocks the playback stale-check from skipping our + // re-fold below — without it, our op's OLD origin_ts would read as stale vs the + // incoming that just won, and the binding wouldn't flip back.) + do! + Sql.query + "UPDATE package_ops SET origin_ts = strftime('%Y-%m-%dT%H:%M:%fZ','now') WHERE id = @id" + |> Sql.parameters [ "id", Sql.uuid (Inserts.computeOpHash mineOp) ] + |> Sql.executeStatementAsync + // RE-FOLD the SetName directly. We can't go through insertAndApplyOps: `SetName(loc, + // ourHash)` is content-identical to the op that first bound our hash, so it's already + // in the op log and INSERT OR IGNORE would dedup it — and insertAndApplyOps only folds + // NEWLY-inserted ops, so the binding would never flip back. applyOps re-runs + // applySetName (un-list the incoming row, re-activate ours — now the freshest stamp). + do! PackageOpPlayback.applyOps branchId None [ mineOp ] + do! Conflicts.markOverridden c.id + return true + | [] -> return false // the location no longer exists locally + | [] -> return false // "owner" only, no name + | _ -> return false // unparseable location + } diff --git a/backend/src/LibDB/SyncCursors.fs b/backend/src/LibDB/SyncCursors.fs new file mode 100644 index 0000000000..3374127b00 --- /dev/null +++ b/backend/src/LibDB/SyncCursors.fs @@ -0,0 +1,53 @@ +/// Sync cursors — per-remote poll resume state. +/// +/// Local-only, NOT synced: for each remote peer, how far this instance has folded that +/// remote's op stream. The cursor is a `package_ops` **rowid** — SQLite's implicit monotonic +/// insertion order (prework finding: no `seq` column needed). A poll resumes from the cursor +/// so a peer pulls only ops it hasn't seen, via `Inserts.opsSince cursor`. +/// +/// The `sync_cursors` table lives in `backend/migrations/schema.sql` (created at startup). +module LibDB.SyncCursors + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +/// How far we've folded `remote`'s op stream (0 if never polled — pull from the start). +let cursorFor (remote : string) : Task = + task { + let! rows = + Sql.query + "SELECT folded_through_rowid AS r FROM sync_cursors WHERE remote = @remote" + |> Sql.parameters [ "remote", Sql.string remote ] + |> Sql.executeAsync (fun read -> read.int64 "r") + return + (match rows with + | r :: _ -> r + | [] -> 0L) + } + +/// Advance `remote`'s cursor to `rowid`. Idempotent upsert that NEVER moves backward — a +/// stale/duplicate advance can't rewind the cursor (so a re-applied batch won't cause a +/// re-fold of older ops). `MAX(existing, incoming)` enforces monotonicity. +let advanceCursor (remote : string) (rowid : int64) : Task = + Sql.query + """ + INSERT INTO sync_cursors (remote, folded_through_rowid) + VALUES (@remote, @rowid) + ON CONFLICT(remote) DO UPDATE SET + folded_through_rowid = + MAX(sync_cursors.folded_through_rowid, excluded.folded_through_rowid) + """ + |> Sql.parameters [ "remote", Sql.string remote; "rowid", Sql.int64 rowid ] + |> Sql.executeStatementAsync + +/// All known peers and how far we've synced each — the `dark sync status` surface. Empty if we've +/// never synced. Ordered by `remote` for a stable display. +let listCursors () : Task> = + Sql.query + "SELECT remote, folded_through_rowid AS r FROM sync_cursors ORDER BY remote" + |> Sql.executeAsync (fun read -> (read.string "remote", read.int64 "r")) diff --git a/backend/testfiles/execution/pre-s-and-s/autosync.dark b/backend/testfiles/execution/pre-s-and-s/autosync.dark new file mode 100644 index 0000000000..2d6ee9e51f --- /dev/null +++ b/backend/testfiles/execution/pre-s-and-s/autosync.dark @@ -0,0 +1,41 @@ +// autosync — the ADAPTIVE poll interval, now a real package: +// `Darklang.Sync.Autosync.nextPollMs`. Snap to a short floor right after a sync that pulled +// changes (be responsive); back off (double, capped at a ceiling) while idle (don't hammer the +// tailnet). This file is the test of that package fn. (Params: 2s floor, 60s ceiling.) + +// changes seen → snap back to the responsive floor (from any interval) +Darklang.Sync.Autosync.nextPollMs true 60000L = 2000L +Darklang.Sync.Autosync.nextPollMs true 2000L = 2000L + +// idle → back off by doubling +Darklang.Sync.Autosync.nextPollMs false 2000L = 4000L +Darklang.Sync.Autosync.nextPollMs false 16000L = 32000L + +// idle → capped at the ceiling (never overshoots) +Darklang.Sync.Autosync.nextPollMs false 40000L = 60000L +Darklang.Sync.Autosync.nextPollMs false 60000L = 60000L + +// decideNext — the adaptive decision over a pull's cursor before/after (the pure half of pollOnce). +// cursor ADVANCED (changes arrived) → keep the new cursor, snap to the floor +Darklang.Sync.Autosync.decideNext 5L 8L 16000L = (8L, 2000L) +// cursor UNCHANGED (a converged, empty pull) → same cursor, back off (double, capped at ceiling) +Darklang.Sync.Autosync.decideNext 5L 5L 16000L = (5L, 32000L) +Darklang.Sync.Autosync.decideNext 5L 5L 40000L = (5L, 60000L) +// the steady state: once converged, every poll is empty → cursor stays → the tailnet quiets down + +// tailnet-wide poll (the daemon syncs EVERY known peer, not one): +// an empty peer set → nothing advanced, no conflicts, no Release-skew → (no changes, 0, no skews) +Darklang.Sync.Autosync.pollAllPeers [] = (false, 0L, []) +// runLoopAll with zero steps is a no-op that returns the interval unchanged (no peers polled) +Darklang.Sync.Autosync.runLoopAll 8000L 0L = 8000L + +// Release-skew gate (the migrator's version coordinate gating the daemon): a peer only syncs when its +// Release matches ours — else it's paused + surfaced, not silently no-advanced. +// same Release → in sync, pull it +Darklang.Sync.Autosync.peerInSync 2L 2L = true +// peer on a newer Release → skew, skip (upgrade this machine) +Darklang.Sync.Autosync.peerInSync 2L 3L = false +// peer on an older Release → skew, skip (upgrade the peer) +Darklang.Sync.Autosync.peerInSync 2L 1L = false +// peer with no release stamp (an old pre-release Dark reports 0) → skew, skip +Darklang.Sync.Autosync.peerInSync 2L 0L = false diff --git a/backend/testfiles/execution/pre-s-and-s/conflicts-display.dark b/backend/testfiles/execution/pre-s-and-s/conflicts-display.dark new file mode 100644 index 0000000000..ad86921189 --- /dev/null +++ b/backend/testfiles/execution/pre-s-and-s/conflicts-display.dark @@ -0,0 +1,32 @@ +// Conflict/divergence display — the PURE "surface as +// data" UX over a sync result. Sync.detectDivergences (built in F#) returns +// (location, existingHash, incomingHash) tuples and NEVER blocks the apply; this formats them +// for `dark sync`/`dark conflicts`. Pure, testable — completes the divergence story end to end. + +let divergenceLine (location: String) (existing: String) (incoming: String) : String = + "⚠ divergence at " ++ location ++ ": " ++ existing ++ " vs " ++ incoming + +let divergenceHeader (n: Int64) : String = + if n == 0L then + "✓ no divergences" + else if n == 1L then + "⚠ 1 divergence surfaced (not blocked)" + else + "⚠ " ++ (Stdlib.Int64.toString n) ++ " divergences surfaced (not blocked)" + +let divergenceReport (divs: List<(String * String * String)>) : String = + let header = divergenceHeader (Stdlib.List.length divs) + let lines = Stdlib.List.map divs (fun (loc, ex, inc) -> divergenceLine loc ex inc) + Stdlib.String.join (Stdlib.List.append [header] lines) "\n" + +// one divergence's line +divergenceLine "Stachu.foo" "hashA" "hashB" = "⚠ divergence at Stachu.foo: hashA vs hashB" + +// the header (none / one / many), with pluralization +divergenceHeader 0L = "✓ no divergences" +divergenceHeader 1L = "⚠ 1 divergence surfaced (not blocked)" +divergenceHeader 3L = "⚠ 3 divergences surfaced (not blocked)" + +// the full report over a list of detectDivergences tuples +divergenceReport [] = "✓ no divergences" +divergenceReport [("Stachu.foo", "hashA", "hashB")] = "⚠ 1 divergence surfaced (not blocked)\n⚠ divergence at Stachu.foo: hashA vs hashB" diff --git a/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark b/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark new file mode 100644 index 0000000000..cc9a799a20 --- /dev/null +++ b/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark @@ -0,0 +1,65 @@ +// `dark conflicts` — the review surface for last-write-wins auto-resolutions. Sync NEVER blocks: a +// `name → two hashes` divergence is auto-resolved by timestamp-LWW (newest authoring time wins) and +// RECORDED so it's never silently lost — the user eventually `ack`s (agrees) or overrides. These pure +// formatters (`Darklang.Sync.Display.conflict*`) render that surface; this file pins the UX. + +// ── winner read from the recorded resolution tag ── +Darklang.Sync.Display.conflictWinner "auto: timestamp-LWW — incoming won (newer creation)" = "them" +Darklang.Sync.Display.conflictWinner "auto: timestamp-LWW — kept local (newer creation)" = "you" +Darklang.Sync.Display.conflictWinner "auto: timestamp-LWW" = "" + +// ── verdict always names last-write-wins (the auto-resolution is never opaque) ── +Darklang.Sync.Display.conflictVerdict "auto: timestamp-LWW — incoming won (newer creation)" = "last-write-wins → kept theirs" +Darklang.Sync.Display.conflictVerdict "auto: timestamp-LWW — kept local (newer creation)" = "last-write-wins → kept yours" +Darklang.Sync.Display.conflictVerdict "auto: timestamp-LWW" = "auto-resolved (last-write-wins)" + +// ── hashes: ✓ marks the winning side, the arrow points to it ── +Darklang.Sync.Display.conflictHashes "auto: timestamp-LWW — incoming won (newer creation)" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 → them e5f6a7b8 ✓" +Darklang.Sync.Display.conflictHashes "auto: timestamp-LWW — kept local (newer creation)" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 ✓ ← them e5f6a7b8" +Darklang.Sync.Display.conflictHashes "auto: timestamp-LWW" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 · them e5f6a7b8" + +// short hash (full → first 8; already-short unchanged) +Darklang.Sync.Display.shortHash "abcdefgh12345" = "abcdefgh" +Darklang.Sync.Display.shortHash "abc" = "abc" + +// status glyph: pending needs ack; acked/overridden are dismissed history +Darklang.Sync.Display.conflictGlyph "NEW" = "⚠" +Darklang.Sync.Display.conflictGlyph "acked" = "✓" +Darklang.Sync.Display.conflictGlyph "overridden" = "↩" + +// ── empty report: pending view vs full history ── +Darklang.Sync.Display.conflictReport [] false = ["✓ no pending sync conflicts — every divergence is acknowledged"] +Darklang.Sync.Display.conflictReport [] true = ["✓ no sync conflicts on record"] + +// ── one PENDING conflict → header (frames the ack model) + 2-line block + footer (the actions) ── +// the block names the winner (them, via ✓/→) and prints the exact `ack ` to dismiss it +Darklang.Sync.Display.conflictReport [(("3f8a92c1deadbeef", "Stachu.MyApp.greeting", "NEW", "auto: timestamp-LWW — incoming won (newer creation)", "a1b2c3d4ffff", "e5f6a7b80000", "desktop"))] false = [ + "1 auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:"; + ""; + "⚠ Stachu.MyApp.greeting"; + " last-write-wins → kept theirs you a1b2c3d4 → them e5f6a7b8 ✓ · from desktop · ack 3f8a92c1"; + ""; + " ack (agree) · ack all · resolve mine|theirs (override)" +] + +// an acked conflict in the history view → ✓ glyph, no ack hint (already dismissed) +Darklang.Sync.Display.conflictReport [(("7c0ffee0", "Stachu.Lib.parse", "acked", "auto: timestamp-LWW — kept local (newer creation)", "11112222", "33334444", "laptop"))] true = [ + "1 auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:"; + ""; + "✓ Stachu.Lib.parse"; + " last-write-wins → kept yours you 11112222 ✓ ← them 33334444 · from laptop"; + ""; + " ack (agree) · ack all · resolve mine|theirs (override)" +] + +// TWO pending races from different peers → header counts both, one block each (a realistic review) +Darklang.Sync.Display.conflictReport [(("aaaa1111", "Stachu.MyApp.greeting", "NEW", "auto: timestamp-LWW — incoming won (newer creation)", "11111111", "22222222", "desktop")); (("bbbb2222", "Stachu.MyApp.config", "NEW", "auto: timestamp-LWW — kept local (newer creation)", "33333333", "44444444", "laptop"))] false = [ + "2 auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:"; + ""; + "⚠ Stachu.MyApp.greeting"; + " last-write-wins → kept theirs you 11111111 → them 22222222 ✓ · from desktop · ack aaaa1111"; + "⚠ Stachu.MyApp.config"; + " last-write-wins → kept yours you 33333333 ✓ ← them 44444444 · from laptop · ack bbbb2222"; + ""; + " ack (agree) · ack all · resolve mine|theirs (override)" +] diff --git a/backend/testfiles/execution/pre-s-and-s/status-cli.dark b/backend/testfiles/execution/pre-s-and-s/status-cli.dark new file mode 100644 index 0000000000..cc098ac34e --- /dev/null +++ b/backend/testfiles/execution/pre-s-and-s/status-cli.dark @@ -0,0 +1,12 @@ +// `dark status` projection-currency line — now a real package fn, +// `Darklang.Cli.SCM.Status.statusLine`. opsCount = rows in `package_ops`; foldedThrough = how +// many are folded (applied). Pure arithmetic over the two — this file tests that package fn. +// (The real `dark status` reads the two counts via `Builtin.pmProjectionStatus`.) + +// caught up (incl. the empty DB) +Darklang.Cli.SCM.Status.statusLine 1234L 1234L = "✓ up to date (1234 ops)" +Darklang.Cli.SCM.Status.statusLine 0L 0L = "✓ up to date (0 ops)" + +// behind — shows how many ops the projections still owe +Darklang.Cli.SCM.Status.statusLine 1234L 1230L = "core.db: 1234 ops · projections folded through 1230 (4 behind)" +Darklang.Cli.SCM.Status.statusLine 5L 0L = "core.db: 5 ops · projections folded through 0 (5 behind)" diff --git a/backend/testfiles/execution/pre-s-and-s/sync-check.dark b/backend/testfiles/execution/pre-s-and-s/sync-check.dark new file mode 100644 index 0000000000..e57abec593 --- /dev/null +++ b/backend/testfiles/execution/pre-s-and-s/sync-check.dark @@ -0,0 +1,48 @@ +// `dark sync check ` — the "am I caught up with this peer?" readiness/convergence glance. The +// CLI GETs the peer's `/sync/health` (body: "sync-server ok; ops="), machine-reads the count with +// `parseHealthOps` (the AGENT-facing read), then `convergenceLine` formats the HUMAN-facing verdict. +// Both halves are pure package fns (`Darklang.Sync.{parseHealthOps,convergenceLine}`) — this file +// pins the realistic UX end to end so a peer's reachability + how-far-behind never regresses silently. + +// ── parseHealthOps: the AGENT read of the health body ── +// the exact body the server emits (Builtin.pmSyncHealth = "sync-server ok; ops=") +Darklang.Sync.parseHealthOps "sync-server ok; ops=42" = 42L +Darklang.Sync.parseHealthOps "sync-server ok; ops=0" = 0L +// a fresh/large server +Darklang.Sync.parseHealthOps "sync-server ok; ops=9845" = 9845L +// resilience: a body with no count, or an unreachable/garbage response → 0 (treated as "behind", +// never throws — the CLI stays usable when a peer half-answers) +Darklang.Sync.parseHealthOps "sync-server ok" = 0L +Darklang.Sync.parseHealthOps "" = 0L +Darklang.Sync.parseHealthOps "404 not found" = 0L +// the CURRENT body carries the release stamp before ops — ops still parses (release is first by design) +Darklang.Sync.parseHealthOps "sync-server ok; release=2; ops=42" = 42L + +// ── parseHealthRelease: the AGENT read of the peer's RELEASE (the sync-gate coordinate) ── +Darklang.Sync.parseHealthRelease "sync-server ok; release=2; ops=42" = 2L +Darklang.Sync.parseHealthRelease "sync-server ok; release=7; ops=0" = 7L +// an OLD peer with no release stamp → 0 (reads as a mismatch against any real release — the intent) +Darklang.Sync.parseHealthRelease "sync-server ok; ops=42" = 0L +Darklang.Sync.parseHealthRelease "" = 0L + +// ── releaseSkewLine: sync needs both sides on the same release; say WHICH to upgrade on a mismatch ── +// matched → "" (no skew; the check falls through to the convergence line) +Darklang.Sync.releaseSkewLine "desktop" 2L 2L = "" +// peer has no release stamp (old Dark) → upgrade it +Darklang.Sync.releaseSkewLine "desktop" 2L 0L = "⚠ desktop is on an older Dark (no release) — upgrade it to sync" +// peer behind us → upgrade the peer +Darklang.Sync.releaseSkewLine "desktop" 3L 2L = "⚠ desktop is on Release 2, you're on 3 — upgrade desktop to sync" +// peer ahead of us → upgrade this machine +Darklang.Sync.releaseSkewLine "desktop" 2L 5L = "⚠ desktop is on Release 5, you're on 2 — upgrade this machine to sync" + +// ── convergenceLine: the HUMAN verdict over (our cursor, peer op count) ── +// caught up — our cursor reached the peer's latest op +Darklang.Sync.convergenceLine "desktop" 9845L 9845L = "✓ desktop — caught up (9845 ops)" +// AHEAD counts as caught up too (we have ops the peer hasn't pulled yet — we're not behind it) +Darklang.Sync.convergenceLine "desktop" 9850L 9845L = "✓ desktop — caught up (9845 ops)" +// behind — shows the gap and both sides, so a human knows exactly how far a pull would catch them up +Darklang.Sync.convergenceLine "desktop" 9800L 9845L = "⟳ desktop — 45 behind (you 9800 / peer 9845)" +// brand-new instance (cursor 0) vs a populated peer +Darklang.Sync.convergenceLine "desktop" 0L 9845L = "⟳ desktop — 9845 behind (you 0 / peer 9845)" +// the converged steady state on an empty pair (both at 0) reads as caught up, not "behind 0" +Darklang.Sync.convergenceLine "desktop" 0L 0L = "✓ desktop — caught up (0 ops)" diff --git a/backend/testfiles/execution/pre-s-and-s/sync-cli.dark b/backend/testfiles/execution/pre-s-and-s/sync-cli.dark new file mode 100644 index 0000000000..076aa04d54 --- /dev/null +++ b/backend/testfiles/execution/pre-s-and-s/sync-cli.dark @@ -0,0 +1,24 @@ +// `dark sync` CLI output — the pure human-facing summary over a sync result, in the package +// `Darklang.Sync.Display`. The counts come from the pull (cursor delta = ops pulled, the log being +// append-only/contiguous); this formats them. `syncSummary` is wired into `dark sync pull`. + +// pluralization +Darklang.Sync.Display.opWord 1L = "op" +Darklang.Sync.Display.opWord 4L = "ops" + +// op-kind breakdown: mixed, single, empty, all-one-kind (only present kinds, in fn/type/value/rename order) +Darklang.Sync.Display.opKindBreakdown ["fn"; "fn"; "type"; "rename"] = "2 fns, 1 type, 1 rename" +Darklang.Sync.Display.opKindBreakdown ["fn"] = "1 fn" +Darklang.Sync.Display.opKindBreakdown [] = "" +Darklang.Sync.Display.opKindBreakdown ["value"; "value"] = "2 values" + +// pulled something / already in sync +Darklang.Sync.Display.syncSummary "major" 4L = "↓ pulled 4 ops from major" +Darklang.Sync.Display.syncSummary "major" 1L = "↓ pulled 1 op from major" +Darklang.Sync.Display.syncSummary "major" 0L = "✓ already in sync with major" + +// poll-interval label: whole seconds when divisible (the adaptive intervals always are), else ms +Darklang.Sync.Display.intervalLabel 2000L = "2s" +Darklang.Sync.Display.intervalLabel 60000L = "60s" +Darklang.Sync.Display.intervalLabel 500L = "500ms" +Darklang.Sync.Display.intervalLabel 1500L = "1500ms" diff --git a/backend/tests/Tests/Remotes.Tests.fs b/backend/tests/Tests/Remotes.Tests.fs new file mode 100644 index 0000000000..8663512c3b --- /dev/null +++ b/backend/tests/Tests/Remotes.Tests.fs @@ -0,0 +1,71 @@ +/// Tests for LibDB.Remotes — the registered sync-peer store behind `dark remote`. +/// +/// Covers the upsert-by-name semantics, ordered list/urls, and remove's existed-flag. +/// The `sync_remotes` table is created by schema.sql at test-DB migration time. +module Tests.Remotes + +open Expecto + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +module Remotes = LibDB.Remotes + +// Unique names per run so parallel suites / reruns don't collide on the shared test DB. +let private uniq (label : string) : string = $"{label}-{System.Guid.NewGuid()}" + + +let addThenList = + testTask "add registers a remote that list returns as (name, url)" { + let name = uniq "box" + do! Remotes.add name "http://127.0.0.1:9922" + let! all = Remotes.list () + Expect.contains + all + (name, "http://127.0.0.1:9922") + "added remote shows up in list" + } + +let addIsUpsertByName = + testTask "re-adding a name updates its url (idempotent upsert, no duplicate row)" { + let name = uniq "laptop" + do! Remotes.add name "https://old.example" + do! Remotes.add name "https://new.example" + let! all = Remotes.list () + let matching = all |> List.filter (fun (n, _) -> n = name) + Expect.equal + matching + [ (name, "https://new.example") ] + "single row, url updated to the latest" + } + +let urlsReturnsPollTargets = + testTask "urls returns just the pollable urls of registered remotes" { + let name = uniq "peer" + let url = uniq "https://peer" + ".ts.net" + do! Remotes.add name url + let! urls = Remotes.urls () + Expect.contains urls url "the remote's url is in the poll set" + } + +let removeReportsExisted = + testTask "remove returns true for a known remote, false for an unknown one" { + let name = uniq "ephemeral" + do! Remotes.add name "http://gone.example" + let! existed = Remotes.remove name + Expect.isTrue existed "removing a registered remote reports existed=true" + let! again = Remotes.remove name + Expect.isFalse again "removing it a second time reports existed=false" + let! all = Remotes.list () + Expect.isFalse + (all |> List.exists (fun (n, _) -> n = name)) + "removed remote is gone from the list" + } + + +let tests = + testList + "remotes" + [ addThenList; addIsUpsertByName; urlsReturnsPollTargets; removeReportsExisted ] diff --git a/backend/tests/Tests/SyncIdempotency.Tests.fs b/backend/tests/Tests/SyncIdempotency.Tests.fs new file mode 100644 index 0000000000..80247c466d --- /dev/null +++ b/backend/tests/Tests/SyncIdempotency.Tests.fs @@ -0,0 +1,1051 @@ +/// Tests the sync wire's core safety property: applying the +/// same op twice is a no-op. The receiver path is the existing +/// `Inserts.insertAndApplyOps`, which does `INSERT OR IGNORE INTO package_ops (id, …)` +/// — so a remote op already in the log is ignored. This is what makes tailnet-wide +/// sync safe: the same op can arrive from N peers without corrupting the log. +module Tests.SyncIdempotency + +open Expecto + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite +open Microsoft.Data.Sqlite + +module BS = LibSerialization.Binary.Serialization +module Inserts = LibDB.Inserts +module SyncCursors = LibDB.SyncCursors +module Conflicts = LibDB.Conflicts +module Remotes = LibDB.Remotes +module Sync = LibDB.Sync +module PMBlob = LibDB.RuntimeTypes.Blob +module PT = LibExecution.ProgramTypes +module PT2RT = LibExecution.ProgramTypesToRuntimeTypes + +let private countOps () : Task = + Sql.query "SELECT COUNT(*) as n FROM package_ops" + |> Sql.executeRowAsync (fun read -> read.int64 "n") + +/// A synthetic op authoring stamp RELATIVE to actual now — no baked-in calendar year (these tests +/// run in any year). Positive minutes = future (beats a `now` re-stamp), negative = past (loses to +/// it). Same `origin_ts` format the schema/sync use. +let private relTs (minutesFromNow : float) : string = + System.DateTime.UtcNow + .AddMinutes(minutesFromNow) + .ToString("yyyy-MM-ddTHH:mm:ss.fffZ") + +// Isolation: these cases touch GLOBAL state (apply the op log to the shared store; the +// divergence check reads `locations`). The fix that actually removed the flake was keeping each +// test's global mutation SMALL — the heavy applies (`pullFromFile`, `applyWireBatch`) use 40-op +// slices, not the whole log, so the race window is ~1s not ~30s. `testSequenced` is kept as +// belt-and-suspenders (the full-suite-green run included it); the small windows are the real fix. +let tests = + testSequenced + <| testList + "SyncIdempotency" + [ testTask + "re-applying an existing op is a no-op (INSERT OR IGNORE dedups by id)" { + // grab a real op from the seeded log (as a remote peer would re-send it) + let! existing = + Sql.query + "SELECT id, op_blob, branch_id, commit_hash FROM package_ops LIMIT 1" + |> Sql.executeRowAsync (fun read -> + let id = read.uuid "id" + let blob = read.bytes "op_blob" + let branchId : PT.BranchId = read.uuid "branch_id" + let commitHash = read.stringOrNone "commit_hash" + (id, blob, branchId, commitHash)) + let (opId, opBlob, branchId, commitHash) = existing + let op = BS.PT.PackageOp.deserialize opId opBlob + + let! before = countOps () + // the sync receiver applies the SAME op again (as if from another tailnet peer) + let! _ = Inserts.insertAndApplyOps branchId commitHash [ op ] + let! after = countOps () + Expect.equal + after + before + "package_ops count unchanged — the duplicate op was ignored (idempotent apply)" + } + + testTask "opsSince read path: rowid(0) = whole log in order, rowid(max) = none" { + let! total = countOps () + let! all = Inserts.opsSince 0L + Expect.equal + (int64 (List.length all)) + total + "opsSince(0) returns every op in the log" + // rowid is a monotonic cursor — results come back strictly ordered + let rowids = all |> List.map (fun (r, _, _, _) -> r) + Expect.equal + rowids + (List.sort rowids) + "ops returned in ascending rowid order" + // a cursor past the end returns nothing (a caller polls from its last-seen rowid) + let maxRowid = if List.isEmpty rowids then 0L else List.max rowids + let! none = Inserts.opsSince maxRowid + Expect.isEmpty none "opsSince(maxRowid) returns no further ops" + } + + // The cross-instance safety property behind dedup: the receiver decodes a wire blob to a + // PackageOp and RE-HASHES it (insertAndApplyOps computes `computeOpHash op` for the id). + // For INSERT OR IGNORE to dedup a re-sent op rather than fork the log, that re-hash must + // reproduce the sender's stored id. This checks it holds for the ENTIRE log, not one op. + testTask + "wire round-trip: every op re-hashes to its stored id (serialize→decode→re-hash)" { + let! wire = Inserts.opsSince 0L // (rowid, id, op_blob) exactly as stored = the wire payload + Expect.isTrue + (List.length wire > 0) + "the seeded log has ops to send over the wire" + let mismatches = + wire + |> List.filter (fun (_rowid, storedId, _originTs, blob) -> + let op = BS.PT.PackageOp.deserialize storedId blob + Inserts.computeOpHash op <> storedId) + Expect.equal + (List.length mismatches) + 0 + "every op keeps its content-hash id across the wire — so dedup (by id) is sound" + } + + // Two fully-synced peers exchanging their whole logs must be a total no-op. This drives + // the REAL receiver path (insertAndApplyOps → INSERT OR IGNORE → applyOps refold) over the + // entire log, grouped by (branch, commit) the way a receiver applies a batch. + testTask + "re-applying the entire op log to an already-synced store changes nothing" { + let! before = countOps () + let! ops = + Sql.query + "SELECT id, op_blob, branch_id, commit_hash FROM package_ops ORDER BY rowid" + |> Sql.executeAsync (fun read -> + let id = read.uuid "id" + let blob = read.bytes "op_blob" + let branchId : PT.BranchId = read.uuid "branch_id" + let commitHash = read.stringOrNone "commit_hash" + (BS.PT.PackageOp.deserialize id blob, branchId, commitHash)) + let groups = ops |> List.groupBy (fun (_, b, c) -> (b, c)) |> Map.toList + for ((branchId, commitHash), g) in groups do + let! _ = + Inserts.insertAndApplyOps + branchId + commitHash + (g |> List.map (fun (op, _, _) -> op)) + () + let! after = countOps () + Expect.equal + after + before + "re-applying the whole log added no rows — INSERT OR IGNORE deduped every op" + } + + // A LITERAL cross-store transfer: read ops from store A (the global seeded DB) and apply + // them to a SEPARATE temp SQLite file (store B), proving the op log moves between two real + // stores — the cross-instance round-trip the single-global-connection blocks at the LibDB + // helper level. Store B's op-log table is created standalone (the canonical synced table); + // the receiver's INSERT OR IGNORE runs against B's own connection. + testTask + "cross-store: ops read from store A transfer into a separate store B, idempotently" { + // the wire payload, read from store A via the global LibDB connection + let! fromA = + Sql.query + "SELECT id, op_blob, branch_id, commit_hash FROM package_ops ORDER BY rowid" + |> Sql.executeAsync (fun read -> + (read.string "id", + read.bytes "op_blob", + read.string "branch_id", + read.stringOrNone "commit_hash")) + Expect.isTrue (List.length fromA > 0) "store A has ops to send over the wire" + + let pathB = + $"{System.IO.Path.GetTempPath()}sync-storeB-{System.Guid.NewGuid()}.db" + try + use connB = + new SqliteConnection($"Data Source={pathB};Mode=ReadWriteCreate") + connB.Open() + + let exec (sql : string) (ps : (string * obj) list) : unit = + use cmd = connB.CreateCommand() + cmd.CommandText <- sql + ps + |> List.iter (fun (k, v) -> + cmd.Parameters.AddWithValue(k, v) |> ignore) + cmd.ExecuteNonQuery() |> ignore + + // store B's canonical op-log table (FK to branches dropped — standalone op stream). PK matches + // the real schema's composite (id, branch_id) so a same-id-different-branch op (which the global + // store A can hold, esp. under parallel test runs) transfers as two rows, not one collapsed by id. + exec + "CREATE TABLE package_ops (id TEXT NOT NULL, op_blob BLOB NOT NULL, branch_id TEXT NOT NULL, commit_hash TEXT, origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')), PRIMARY KEY (id, branch_id))" + [] + + let transfer () = + for (id, blob, branchId, commitHash) in fromA do + exec + "INSERT OR IGNORE INTO package_ops (id, op_blob, branch_id, commit_hash) VALUES ($id, $blob, $branch, $commit)" + [ "$id", box id + "$blob", box blob + "$branch", box branchId + "$commit", + (match commitHash with + | Some s -> box s + | None -> box System.DBNull.Value) ] + + let countB () : int64 = + use cmd = connB.CreateCommand() + cmd.CommandText <- "SELECT COUNT(*) FROM package_ops" + cmd.ExecuteScalar() :?> int64 + + // first exchange: store B mirrors store A's op log + transfer () + Expect.equal + (countB ()) + (int64 (List.length fromA)) + "every op transferred into store B" + + // second exchange (peers re-sync): INSERT OR IGNORE dedups by id — no growth in B + transfer () + Expect.equal + (countB ()) + (int64 (List.length fromA)) + "re-transfer is a no-op in store B — dedup by id holds across stores" + + connB.Close() + finally + if System.IO.File.Exists pathB then System.IO.File.Delete pathB + } + + // sync_cursors — per-remote poll resume state (local-only) + testTask "sync cursor: starts at 0, advances, and never moves backward" { + let remote = $"peer-{System.Guid.NewGuid()}" + let! start = SyncCursors.cursorFor remote + Expect.equal + start + 0L + "an unseen remote's cursor starts at 0 (pull from the start)" + + do! SyncCursors.advanceCursor remote 5L + let! atFive = SyncCursors.cursorFor remote + Expect.equal atFive 5L "the cursor advanced to 5" + + // a stale/duplicate advance must NOT rewind the cursor + do! SyncCursors.advanceCursor remote 3L + let! stillFive = SyncCursors.cursorFor remote + Expect.equal + stillFive + 5L + "a backward advance is ignored — the cursor is monotonic" + + do! SyncCursors.advanceCursor remote 10L + let! atTen = SyncCursors.cursorFor remote + Expect.equal atTen 10L "the cursor advances forward to 10" + } + + // the cursor + opsSince together = resumable poll: fold the whole log, then a re-poll + // from the cursor returns nothing new. + testTask + "sync cursor + opsSince: after folding the log, a re-poll returns no new ops" { + let remote = $"hub-{System.Guid.NewGuid()}" + // first poll: from cursor 0, pull the whole log + let! cursor0 = SyncCursors.cursorFor remote + let! firstBatch = Inserts.opsSince cursor0 + Expect.isTrue + (List.length firstBatch > 0) + "the first poll pulls the existing ops" + + // advance the cursor to the highest rowid we just folded + let maxRowid = firstBatch |> List.map (fun (r, _, _, _) -> r) |> List.max + do! SyncCursors.advanceCursor remote maxRowid + + // second poll: from the advanced cursor, nothing new (we already folded it all) + let! cursor1 = SyncCursors.cursorFor remote + let! secondBatch = Inserts.opsSince cursor1 + Expect.isEmpty + secondBatch + "a re-poll from the advanced cursor returns no already-folded ops" + } + + // the cohesive receiver API: applyRemoteOps ties insert+apply+cursor-advance together + testTask + "Sync.applyRemoteOps applies a batch (idempotent) and advances the remote cursor" { + let remote = $"sender-{System.Guid.NewGuid()}" + // use a real seeded op as the 'remote' batch, with a synthetic sender rowid + let! existing = + Sql.query + "SELECT id, op_blob, branch_id, commit_hash FROM package_ops LIMIT 1" + |> Sql.executeRowAsync (fun read -> + (read.uuid "id", + read.bytes "op_blob", + (read.uuid "branch_id" : PT.BranchId), + read.stringOrNone "commit_hash")) + let (opId, opBlob, branchId, commitHash) = existing + let op = BS.PT.PackageOp.deserialize opId opBlob + let senderRowid = 42L + + let! before = countOps () + let! (newCursor, _) = + Sync.applyRemoteOps + remote + branchId + commitHash + [ (senderRowid, relTs (-60.0), op) ] + let! after = countOps () + Expect.equal + after + before + "the op already exists — INSERT OR IGNORE adds no row" + Expect.equal + newCursor + senderRowid + "the receiver advanced this remote's cursor to the sender rowid" + + let! persisted = SyncCursors.cursorFor remote + Expect.equal + persisted + senderRowid + "the advanced cursor persisted for this remote" + + // an empty batch is a no-op — the cursor stays put + let! (afterEmpty, _) = Sync.applyRemoteOps remote branchId commitHash [] + Expect.equal + afterEmpty + senderRowid + "an empty batch leaves the cursor unchanged" + } + + // divergence detection: an incoming SetName rebinding a location to a DIFFERENT hash than + // the current local binding is surfaced as data (never blocks). + testTask + "Sync.detectDivergences flags a SetName that rebinds a location to a different hash" { + // a real current location binding from the seed + let! loc = + Sql.query + "SELECT owner, modules, name, item_hash, item_type, branch_id FROM locations WHERE unlisted_at IS NULL LIMIT 1" + |> Sql.executeRowAsync (fun read -> + (read.string "owner", + read.string "modules", + read.string "name", + read.string "item_hash", + read.string "item_type", + (read.uuid "branch_id" : PT.BranchId))) + let (owner, modulesStr, name, existingHash, itemType, branchId) = loc + let modules = + if modulesStr = "" then [] else modulesStr.Split('.') |> Array.toList + let location : PT.PackageLocation = + { owner = owner; modules = modules; name = name } + let kind = PT.ItemKind.fromString itemType + + // incoming SetName binding the SAME location to a DIFFERENT hash → divergence + let differentTarget = + PT.Reference.fromHashAndKind (PT.Hash(existingHash + "_DIFFERENT"), kind) + let! divs = + Sync.detectDivergences + branchId + [ PT.PackageOp.SetName(location, differentTarget) ] + match divs with + | [ (_loc, existing, incoming) ] -> + Expect.equal + existing + existingHash + "the divergence reports the existing local hash" + Expect.equal + incoming + (existingHash + "_DIFFERENT") + "and the incoming (remote) hash" + | other -> + failtest $"expected exactly one divergence, got {List.length other}" + + // binding to the SAME hash is NOT a divergence (idempotent re-bind) + let sameTarget = PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) + let! none = + Sync.detectDivergences + branchId + [ PT.PackageOp.SetName(location, sameTarget) ] + Expect.isEmpty none "rebinding to the same hash is not a divergence" + } + + // Sync.pullFromFile — the one-call `dark sync pull ` op: read a + // PEER's data.db op log directly and apply it into THIS instance via the same receiver path + // (op log + projections), resuming + persisting a per-peer cursor, no wire. Here the peer's + // log is a copy of ours, so the apply is a no-op (already-synced peers) — what's under test is + // reading a separate file's log, applying it, and the cursor advancing/persisting/resuming. + testTask + "Sync.pullFromFile pulls a peer's op log into the local instance + persists the cursor" { + let srcPath = + $"{System.IO.Path.GetTempPath()}sync-pull-src-{System.Guid.NewGuid()}.db" + // Foreign Keys=False: the temp file carries only a standalone op log, not the full + // relational state (branches rows) a real instance has — so package_ops' branch FK isn't + // enforced here. The read + apply mechanics are what's under test. + let srcConn = + $"Data Source={srcPath};Mode=ReadWriteCreate;Foreign Keys=False" + + let execOn (sql : string) (ps : (string * obj) list) : unit = + use conn = new SqliteConnection(srcConn) + conn.Open() + use cmd = conn.CreateCommand() + cmd.CommandText <- sql + ps + |> List.iter (fun (k, v) -> + cmd.Parameters.AddWithValue(k, v) |> ignore) + cmd.ExecuteNonQuery() |> ignore + conn.Close() + + try + // build the source file: a standalone op log carrying a copy of our (global) log. PK is the + // real schema's composite (id, branch_id) so a same-id-different-branch op copies as two rows. + execOn + "CREATE TABLE package_ops (id TEXT NOT NULL, op_blob BLOB NOT NULL, branch_id TEXT NOT NULL, commit_hash TEXT, origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')), PRIMARY KEY (id, branch_id))" + [] + let! ops = + Sql.query + // a 40-op slice (not the whole log): keeps the apply fast + the global-state window + // tiny, so divergence detection's `locations` read doesn't race concurrent rebinds. + "SELECT id, op_blob, branch_id, commit_hash FROM package_ops ORDER BY rowid LIMIT 40" + |> Sql.executeAsync (fun read -> + (read.string "id", + read.bytes "op_blob", + read.string "branch_id", + read.stringOrNone "commit_hash")) + Expect.isTrue + (List.length ops > 0) + "the local log has ops to copy into the peer file" + for (id, blob, branch, commit) in ops do + execOn + "INSERT OR IGNORE INTO package_ops (id, op_blob, branch_id, commit_hash) VALUES ($i, $b, $br, $c)" + [ "$i", box id + "$b", box blob + "$br", box branch + "$c", + (match commit with + | Some s -> box s + | None -> box System.DBNull.Value) ] + + // the peer's commits table — sync is committed-only and the file pull joins `commits` to + // cursor on the commit-rowid, so the source needs the commit rows its ops reference (a real + // peer always has them). Insert the distinct commit hashes in op order, so their rowids run + // 1..K and the pull cursor lands on K (the last commit pulled). + let distinctCommits = + ops |> List.choose (fun (_, _, _, c) -> c) |> List.distinct + Expect.isTrue + (List.length distinctCommits > 0) + "the copied slice has committed ops to ship (sync is committed-only)" + execOn "CREATE TABLE commits (hash TEXT PRIMARY KEY)" [] + for h in distinctCommits do + execOn "INSERT OR IGNORE INTO commits (hash) VALUES ($h)" [ "$h", box h ] + + // give the peer a content blob this instance lacks, to exercise the fetch-on-miss channel + let blobHash = $"sync-pull-blob-{System.Guid.NewGuid()}" + let blobBytes = System.Text.Encoding.UTF8.GetBytes blobHash + execOn + "CREATE TABLE package_blobs (hash TEXT PRIMARY KEY, length INTEGER NOT NULL, bytes BLOB NOT NULL, created_at TEXT DEFAULT (datetime('now')))" + [] + execOn + "INSERT INTO package_blobs (hash, length, bytes) VALUES ($h, $l, $b)" + [ "$h", box blobHash + "$l", box (int64 blobBytes.Length) + "$b", box blobBytes ] + + // pullFromFile = the one-call `dark sync pull ` op: resume the stored cursor for + // this peer, pull ops + fetch missing blobs, persist. Source rowids are 1..N (fresh + // table), so the cursor lands on N = the op count; the apply is idempotent. + // pull returns (cursor, divergences) — the wiring shape is exercised here; the divergence + // *logic* (clean vs rebind) is covered by the isolated `detectDivergences` test above, which + // controls the locations state and so isn't subject to the full-suite race. + let! (cursor, _divergences) = Sync.pullFromFile srcPath + Expect.equal + cursor + (int64 (List.length distinctCommits)) + "pull advanced the cursor to the source's last COMMIT-rowid (committed-only, commit-granular)" + + // the cursor is PERSISTED for this peer (keyed by the source path) + let! stored = SyncCursors.cursorFor srcPath + Expect.equal + stored + cursor + "the advanced cursor was persisted for this peer" + + // fetch-on-miss: the peer's content blob we lacked is now present locally + let! localBlob = PMBlob.get blobHash |> Ply.toTask + Expect.equal + localBlob + (Some blobBytes) + "pull fetched the peer's content blob we were missing" + + // resume: a second pull reads from the stored cursor → nothing beyond it, same cursor + let! (cursor2, _) = Sync.pullFromFile srcPath + Expect.equal + cursor2 + cursor + "the second pull resumes from the persisted cursor (no-op)" + finally + if System.IO.File.Exists srcPath then System.IO.File.Delete srcPath + } + + // Sync wire codec (the HTTP transport): an op batch round-trips through encode → decode + // byte-for-byte. The HTTP body reuses the existing op_blob bytes, so the wire carries exactly + // what the file-based pull reads; only the carrier differs. + testTask "Sync.encodeBatch/decodeBatch round-trips an op batch byte-for-byte" { + let! batch = Sync.opsToSend 0L // the whole log as (rowid, id, op_blob) + Expect.isTrue (List.length batch > 0) "there are ops to encode" + let decoded = Sync.decodeBatch (Sync.encodeBatch batch) + Expect.equal decoded batch "the op batch survives encode → decode unchanged" + Expect.equal + (Sync.decodeBatch (Sync.encodeBatch [])) + [] + "an empty batch round-trips to empty" + } + + // TRUNCATION SAFETY: a connection dropped mid-response over the tailnet yields a partial body. + // decodeBatch must FAIL on it (so the pull errors + retries next poll) rather than silently + // decode a corrupt, partial op. (BinaryReader.ReadBytes returns short at EOF without throwing, + // so the blob length is guarded explicitly.) + testTask + "Sync.decodeBatch rejects a truncated wire buffer (fail-safe, not silent garbage)" { + let! batch = Sync.opsToSend 0L + let wire = Sync.encodeBatch (List.truncate 3 batch) + Expect.isTrue + (wire.Length > 16) + "the wire buffer has real content to truncate" + // cut the buffer in half — lands mid-stream, so some read fails (a fixed field at EOF, or + // the guarded short blob) + let truncated = wire[0 .. (wire.Length / 2)] + Expect.throws + (fun () -> + Sync.decodeBatch truncated + |> ignore>) + "a truncated wire buffer is rejected, not decoded into a partial op" + } + + // SELF-VERSIONING WIRE (op-format-stability guard): every batch leads with a format version; + // a version-skewed peer's batch is REJECTED at decode (fail-closed), not misread. This is what + // makes "update one machine, not the other" refuse cleanly instead of corrupting the op log. + testTask + "Sync.decodeBatch rejects a wrong wire-format version (peer on a different Dark version)" { + let! batch = Sync.opsToSend 0L + let wire = Sync.encodeBatch (List.truncate 2 batch) + // patch the leading version int32 to a bogus value (a future/older incompatible format) + let tampered = Array.copy wire + (System.BitConverter.GetBytes(9999)).CopyTo(tampered, 0) + Expect.throws + (fun () -> + Sync.decodeBatch tampered + |> ignore>) + "a batch with a mismatched wire-format version is rejected, not misread" + } + + // COMMITTED-ONLY is now the sync DEFAULT (`opsToSend` reads `opsSinceCommitted`): a peer syncs + // your committed history, never your uncommitted mid-edit WIP (WIP stays local; own-device WIP + // sync is deferred). `opsSinceCommitted` filters `commit_hash IS NOT NULL` (joins commits), so + // it's a subset of the full log. Building block proven here: committed ⊆ all, non-empty. + testTask + "Sync committed-only scope: opsSinceCommitted ⊆ opsSince (the multi-author mechanism)" { + let! all = Inserts.opsSince 0L + let! committed = Inserts.opsSinceCommitted 0L + Expect.isTrue + (List.length committed > 0) + "the seed has committed ops to ship" + Expect.isTrue + (List.length committed <= List.length all) + "committed ops are a subset of all ops (WIP, if any, is excluded)" + let allIds = all |> List.map (fun (_, id, _, _) -> id) |> Set.ofList + let committedIds = + committed |> List.map (fun (_, id, _, _) -> id) |> Set.ofList + Expect.isTrue + (Set.isSubset committedIds allIds) + "every committed op is in the full op set (commit_hash filter narrows, never invents)" + } + + // the full HTTP payload path: server encodes a batch → client decodes + applies it + // through the same receiver as the file/POST path, advancing the cursor. Same fold, wire + // source. (Idempotent here — the batch is our own log — so what's tested is decode→apply→cursor.) + testTask + "Sync.applyWireBatch applies an encoded batch + advances the cursor (server→client)" { + // a 40-op slice (not the whole log) — keeps the global apply light, reducing the + // shared-state contention that flakes the full parallel suite. + let! fullBatch = Sync.opsToSend 0L + let batch = List.truncate 40 fullBatch + Expect.isTrue (List.length batch > 0) "there are ops to ship over the wire" + let wire = Sync.encodeBatch batch + let remote = $"wire-peer-{System.Guid.NewGuid()}" + let! (cursor, _) = Sync.applyWireBatch remote PT.mainBranchId None wire + let maxRowid = batch |> List.map (fun (r, _, _, _) -> r) |> List.max + Expect.equal + cursor + maxRowid + "applyWireBatch applied the decoded ops + advanced the cursor to the batch's max rowid" + } + + // STEADY STATE: once two peers are converged, every poll's `/sync/events` returns an EMPTY wire + // batch (no new ops). applyWireBatch must accept it cleanly — nothing applied, cursor unchanged, + // no divergences, no error. This is the COMMON case after devices sync, so a bug here would + // break autosync the moment it converges. (Safe under concurrent load: an empty batch mutates + // nothing.) + testTask + "Sync.applyWireBatch on an empty wire batch is a clean no-op (the converged steady state)" { + let remote = $"empty-peer-{System.Guid.NewGuid()}" + let! before = SyncCursors.cursorFor remote // 0 — this fresh peer was never synced + let! (cursor, divergences) = + Sync.applyWireBatch remote PT.mainBranchId None (Sync.encodeBatch []) + Expect.equal + cursor + before + "an empty batch leaves the cursor unchanged (nothing applied) — the converged steady state" + Expect.isEmpty divergences "an empty batch surfaces no divergences" + } + + // AT-LEAST-ONCE delivery: a flaky tailnet may deliver the SAME wire batch twice (a pull that + // timed out, got retried, but actually landed). The receiver must dedup — applying the same + // batch twice leaves the same cursor + no double-apply (INSERT OR IGNORE by op id). This is the + // wire-level counterpart to the op-id idempotency test; it pins the retry-safety of the HTTP path. + testTask + "Sync.applyWireBatch is idempotent over the wire (a retried pull double-delivers safely)" { + let! fullBatch = Sync.opsToSend 0L + let batch = List.truncate 20 fullBatch // light slice; these are our own log's ops (already present) + let wire = Sync.encodeBatch batch + let remote = $"retry-peer-{System.Guid.NewGuid()}" + let! (cursor1, _) = Sync.applyWireBatch remote PT.mainBranchId None wire + let! (cursor2, _) = Sync.applyWireBatch remote PT.mainBranchId None wire // same batch, redelivered + Expect.equal + cursor2 + cursor1 + "re-applying the same wire batch leaves the cursor unchanged — at-least-once delivery is safe" + } + + // The CONFLICT STORE (Conflicts) — record an auto-resolved name-binding divergence, surface it, + // dedup on re-detect, acknowledge ("the auto was right"). Isolated table → safe under load. This + // is the foundation of `dark conflicts`: auto-resolve by policy + RECORD + raise + ack/override. + testTask + "Conflicts: record an auto-resolved divergence, list it, dedup on re-detect, acknowledge" { + let remote = $"conflict-peer-{System.Guid.NewGuid()}" + let loc = $"Stachu.Foo.bar-{System.Guid.NewGuid()}" + let res = "auto: last-writer-wins (incoming)" + do! Conflicts.record remote loc "hashLocal" "hashIncoming" res + // dedup — the same conflict re-detected on a re-pull doesn't pile up a second row + do! Conflicts.record remote loc "hashLocal" "hashIncoming" res + let! all = Conflicts.list () + match + all |> List.filter (fun (x : Conflicts.Conflict) -> x.location = loc) + with + | [ c ] -> + Expect.equal + c.localHash + "hashLocal" + "records what we had (the loser under last-writer-wins)" + Expect.equal + c.incomingHash + "hashIncoming" + "and what the peer sent (the auto-resolved winner)" + Expect.isFalse c.acknowledged "starts un-acknowledged (raised to the user)" + // ack — the common case ("the auto thing was right") + do! Conflicts.acknowledge c.id + let! after = Conflicts.getById c.id + match after with + | Some(ac : Conflicts.Conflict) -> + Expect.isTrue ac.acknowledged "acknowledged after ack" + | None -> failtest "conflict vanished after acknowledge" + | other -> + failtest + $"expected exactly 1 recorded conflict (dedup), got {List.length other}" + } + + // The receiver WIRING: `recordDivergences` (called by both pull paths after apply) turns each + // detected divergence into a recorded conflict. Safe — only touches the isolated conflict table. + testTask + "Sync.recordDivergences records each detected divergence as a reviewable conflict" { + let remote = $"divrec-{System.Guid.NewGuid()}" + let loc = $"Foo.bar-{System.Guid.NewGuid()}" + do! Sync.recordDivergences remote [ (loc, "hMine", "hTheirs") ] + let! all = Conflicts.list () + match + all |> List.filter (fun (c : Conflicts.Conflict) -> c.location = loc) + with + | [ c ] -> + Expect.equal c.remote remote "recorded against the peer it came from" + Expect.equal c.localHash "hMine" "records what we had" + Expect.stringContains + c.resolution + "timestamp-LWW" + "tagged with the auto-resolution policy" + | other -> + failtest $"expected the divergence recorded once, got {List.length other}" + } + + // END-TO-END conflict flow (as close to live as headless allows): establish a LOCAL binding, + // then PULL a divergent one through the real receiver — and assert the conflict is auto-resolved + // (last-writer-wins) AND recorded. Fresh GUID name → no collision; minimal global mutation. + testTask + "end-to-end: a divergent pull auto-resolves (LWW) AND records the conflict" { + let nm = "cflo" + System.Guid.NewGuid().ToString().Replace("-", "") + let loc : PT.PackageLocation = + { owner = "ConflictTest"; modules = [ "X" ]; name = nm } + let hashA = System.String('a', 64) + let hashB = System.String('b', 64) + let refA = PT.Reference.fromHashAndKind (PT.Hash hashA, PT.ItemKind.Fn) + let refB = PT.Reference.fromHashAndKind (PT.Hash hashB, PT.ItemKind.Fn) + // establish OUR local binding: name -> hashA + let! _ = + Inserts.insertAndApplyOps + PT.mainBranchId + None + [ PT.PackageOp.SetName(loc, refA) ] + // pull a DIVERGENT incoming bind (name -> hashB) through the real HTTP receiver path + let remote = $"e2e-{System.Guid.NewGuid()}" + let! (_cursor, divs) = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs 60.0, PT.PackageOp.SetName(loc, refB)) ] + Expect.isFalse + (List.isEmpty divs) + "the divergence (hashA vs hashB) was detected on the pull" + // and it's recorded in the conflict store, tagged with both hashes + the peer + let! all = Conflicts.list () + let fq = $"ConflictTest.X.{nm}" + match + all |> List.filter (fun (c : Conflicts.Conflict) -> c.location = fq) + with + | [ c ] -> + Expect.equal + c.localHash + hashA + "recorded what WE had (hashA — the loser under LWW)" + Expect.equal + c.incomingHash + hashB + "and the incoming (hashB — the auto-resolved winner)" + Expect.equal c.remote remote "tagged with the peer it came from" + | other -> + failtest + $"expected one recorded conflict for {fq}, got {List.length other}" + } + + testTask + "conflict resolve 'theirs' keeps the incoming bind + marks the conflict overridden" { + let nm = "cfthe" + System.Guid.NewGuid().ToString().Replace("-", "") + let loc : PT.PackageLocation = + { owner = "ConflictTest"; modules = [ "R" ]; name = nm } + let hashA = System.String('a', 64) + let hashB = System.String('b', 64) + let refA = PT.Reference.fromHashAndKind (PT.Hash hashA, PT.ItemKind.Fn) + let refB = PT.Reference.fromHashAndKind (PT.Hash hashB, PT.ItemKind.Fn) + let! _ = + Inserts.insertAndApplyOps + PT.mainBranchId + None + [ PT.PackageOp.SetName(loc, refA) ] + let remote = $"rthe-{System.Guid.NewGuid()}" + let! _ = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs 60.0, PT.PackageOp.SetName(loc, refB)) ] + let fq = $"ConflictTest.R.{nm}" + let! all = Conflicts.list () + match + all |> List.filter (fun (c : Conflicts.Conflict) -> c.location = fq) + with + | [ c ] -> + let! ok = Sync.resolveConflict c.id false + Expect.isTrue ok "resolveConflict 'theirs' found + resolved the conflict" + match! Conflicts.getById c.id with + | Some(c2 : Conflicts.Conflict) -> + Expect.isTrue c2.overridden "the conflict is marked overridden" + | None -> failtest "conflict vanished after resolve" + // 'theirs' = keep the incoming (LWW winner): the binding stays on hashB + let! rows = + Sql.query + "SELECT item_hash FROM locations WHERE owner = @o AND modules = @m AND name = @n AND unlisted_at IS NULL LIMIT 1" + |> Sql.parameters + [ "o", Sql.string "ConflictTest" + "m", Sql.string "R" + "n", Sql.string nm ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + Expect.equal + (List.tryHead rows) + (Some hashB) + "location stays bound to the incoming hash" + | other -> + failtest $"expected one conflict for {fq}, got {List.length other}" + } + + testTask + "conflict resolve 'mine' re-binds the location to OUR hash + marks overridden" { + let nm = "cfmin" + System.Guid.NewGuid().ToString().Replace("-", "") + let loc : PT.PackageLocation = + { owner = "ConflictTest"; modules = [ "M" ]; name = nm } + let hashA = System.String('a', 64) + let hashB = System.String('b', 64) + let refA = PT.Reference.fromHashAndKind (PT.Hash hashA, PT.ItemKind.Fn) + let refB = PT.Reference.fromHashAndKind (PT.Hash hashB, PT.ItemKind.Fn) + // PAST stamps (local 2020 < incoming 2021), so the incoming wins the pull by timestamp-LWW — + // then the human 'mine' override RE-STAMPS our op to `now` (2025+), which beats both. + let opA = PT.PackageOp.SetName(loc, refA) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ opA ] + (Map.ofList [ (Inserts.computeOpHash opA, relTs (-120.0)) ]) + let remote = $"rmin-{System.Guid.NewGuid()}" + let! _ = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs (-60.0), PT.PackageOp.SetName(loc, refB)) ] + // incoming (2021) won the pull; 'mine' re-stamps OUR op to now and must re-bind it to hashA + let fq = $"ConflictTest.M.{nm}" + let! all = Conflicts.list () + match + all |> List.filter (fun (c : Conflicts.Conflict) -> c.location = fq) + with + | [ c ] -> + let! ok = Sync.resolveConflict c.id true + Expect.isTrue ok "resolveConflict 'mine' found + resolved the conflict" + match! Conflicts.getById c.id with + | Some(c2 : Conflicts.Conflict) -> + Expect.isTrue c2.overridden "the conflict is marked overridden" + | None -> failtest "conflict vanished after resolve" + // 'mine' re-binds via a WIP SetName: the local projection now points at OUR hash + let! rows = + Sql.query + "SELECT item_hash FROM locations WHERE owner = @o AND modules = @m AND name = @n AND unlisted_at IS NULL LIMIT 1" + |> Sql.parameters + [ "o", Sql.string "ConflictTest" + "m", Sql.string "M" + "n", Sql.string nm ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + Expect.equal + (List.tryHead rows) + (Some hashA) + "location re-bound to OUR hash (hashA)" + | other -> + failtest $"expected one conflict for {fq}, got {List.length other}" + } + + testTask "Remotes: add (upsert) → list/urls include it → remove (idempotent)" { + let nm = "rmt" + System.Guid.NewGuid().ToString().Replace("-", "") + let url = $"http://{nm}.example:9922" + // add, then re-add with a new url — upsert by name (no duplicate row) + do! Remotes.add nm "http://stale:1" + do! Remotes.add nm url + let! listed = Remotes.list () + let mine = listed |> List.filter (fun (n, _) -> n = nm) + match mine with + | [ (_, u) ] -> Expect.equal u url "upsert kept ONE row with the latest url" + | other -> + failtest $"expected exactly one row for {nm}, got {List.length other}" + // its url is in the daemon's poll set + let! urls = Remotes.urls () + Expect.isTrue + (List.contains url urls) + "the registered url is in the poll set (urls)" + // remove reports it existed; a second remove reports it didn't + let! removed = Remotes.remove nm + Expect.isTrue removed "remove of an existing remote returns true" + let! removedAgain = Remotes.remove nm + Expect.isFalse removedAgain "remove of a missing remote returns false" + let! after = Remotes.list () + Expect.isFalse + (after |> List.exists (fun (n, _) -> n = nm)) + "the remote is gone from the list after remove" + } + + testTask + "Sync.opKindsSince maps each SetName above the cursor to its target kind (fn/type/value)" { + // a standalone peer db with three naming ops (fn, type, value) — opKindsSince reads SetName + // target kinds; an Add* op (not a SetName) is ignored, proving the no-double-count counting. + let path = $"{System.IO.Path.GetTempPath()}okinds-{System.Guid.NewGuid()}.db" + try + use conn = new SqliteConnection($"Data Source={path};Mode=ReadWriteCreate") + conn.Open() + + let exec (sql : string) (ps : (string * obj) list) : unit = + use cmd = conn.CreateCommand() + cmd.CommandText <- sql + ps + |> List.iter (fun (k, v) -> + cmd.Parameters.AddWithValue(k, v) |> ignore) + cmd.ExecuteNonQuery() |> ignore + + exec + "CREATE TABLE package_ops (id TEXT PRIMARY KEY, op_blob BLOB NOT NULL, branch_id TEXT NOT NULL, commit_hash TEXT, origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')))" + [] + + let setName + (md : string) + (nm : string) + (kind : PT.ItemKind) + (h : string) + : PT.PackageOp = + let loc : PT.PackageLocation = + { owner = "OK"; modules = [ md ]; name = nm } + PT.PackageOp.SetName(loc, PT.Reference.fromHashAndKind (PT.Hash h, kind)) + + let ops = + [ setName "M" "f" PT.ItemKind.Fn (System.String('a', 64)) + setName "M" "t" PT.ItemKind.Type (System.String('b', 64)) + setName "M" "v" PT.ItemKind.Value (System.String('c', 64)) ] + + for op in ops do + let id = Inserts.computeOpHash op + let blob = BS.PT.PackageOp.serialize id op + exec + "INSERT INTO package_ops (id, op_blob, branch_id, commit_hash) VALUES ($id, $blob, $b, $c)" + [ "$id", box (string id) + "$blob", box blob + "$b", box (string PT.mainBranchId) + "$c", box System.DBNull.Value ] + + conn.Close() + + // from cursor 0 → all three, in rowid order + let! kinds = Sync.opKindsSince path 0L + Expect.equal + kinds + [ "fn"; "type"; "value" ] + "each SetName mapped to its target item kind, in order" + // above the last rowid → nothing + let! none = Sync.opKindsSince path 99L + Expect.isEmpty none "no ops above a high cursor" + finally + try + System.IO.File.Delete path + with _ -> + () + } + + testTask + "Conflicts.acknowledgeAll acknowledges every pending conflict (bulk ack)" { + let loc = $"AckAll.X.{System.Guid.NewGuid()}" + do! + Conflicts.record + "ackall-peer" + loc + (System.String('a', 64)) + (System.String('b', 64)) + "auto" + let! before = Conflicts.list () + Expect.isTrue + (before + |> List.exists (fun (c : Conflicts.Conflict) -> + c.location = loc && not c.acknowledged)) + "the conflict is recorded and pending" + let! n = Conflicts.acknowledgeAll () + Expect.isTrue + (n >= 1) + "acknowledgeAll reports it cleared at least the one we recorded" + let! after = Conflicts.list () + match + after |> List.filter (fun (c : Conflicts.Conflict) -> c.location = loc) + with + | [ c ] -> Expect.isTrue c.acknowledged "our conflict is now acknowledged" + | other -> + failtest $"expected one conflict for {loc}, got {List.length other}" + } + + // ── timestamp-LWW: playback orders bindings by CREATION time, not arrival ── + // An op authored EARLIER but applied LATER (it + // arrived late via sync) must not override a later-created binding. Tested directly through + // applySetName via insertAndApplyOpsWithOrigin (which stamps each op's origin_ts). + let bindingOf (modul : string) (nm : string) : Task> = + task { + let! rows = + Sql.query + "SELECT item_hash FROM locations WHERE owner = @o AND modules = @m AND name = @n AND unlisted_at IS NULL LIMIT 1" + |> Sql.parameters + [ "o", Sql.string "TsLww"; "m", Sql.string modul; "n", Sql.string nm ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + return List.tryHead rows + } + + testTask + "timestamp-LWW: an EARLIER-created op applied LATER does NOT override the newer binding" { + let nm = "tslww" + System.Guid.NewGuid().ToString().Replace("-", "") + let loc : PT.PackageLocation = + { owner = "TsLww"; modules = [ "C" ]; name = nm } + let refEarly = + PT.Reference.fromHashAndKind ( + PT.Hash(System.String('1', 64)), + PT.ItemKind.Fn + ) + let refLate = + PT.Reference.fromHashAndKind ( + PT.Hash(System.String('2', 64)), + PT.ItemKind.Fn + ) + let opEarly = PT.PackageOp.SetName(loc, refEarly) + let opLate = PT.PackageOp.SetName(loc, refLate) + let tsEarly = relTs (-120.0) + let tsLate = relTs (-60.0) + // apply the LATE-created op first → binds hash '2' + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ opLate ] + (Map.ofList [ (Inserts.computeOpHash opLate, tsLate) ]) + let! afterLate = bindingOf "C" nm + Expect.equal + afterLate + (Some(System.String('2', 64))) + "late-created op binds the name" + // now apply the EARLIER-created op (arrives later) → must be SKIPPED as stale-by-creation + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ opEarly ] + (Map.ofList [ (Inserts.computeOpHash opEarly, tsEarly) ]) + let! afterEarly = bindingOf "C" nm + Expect.equal + afterEarly + (Some(System.String('2', 64))) + "an earlier-created op applied later does NOT override" + } + + testTask + "timestamp-LWW: convergence is order-INDEPENDENT — the other apply order yields the SAME winner" { + let nm = "tsconv" + System.Guid.NewGuid().ToString().Replace("-", "") + let loc : PT.PackageLocation = + { owner = "TsLww"; modules = [ "D" ]; name = nm } + let refEarly = + PT.Reference.fromHashAndKind ( + PT.Hash(System.String('3', 64)), + PT.ItemKind.Fn + ) + let refLate = + PT.Reference.fromHashAndKind ( + PT.Hash(System.String('4', 64)), + PT.ItemKind.Fn + ) + let opEarly = PT.PackageOp.SetName(loc, refEarly) + let opLate = PT.PackageOp.SetName(loc, refLate) + let tsEarly = relTs (-120.0) + let tsLate = relTs (-60.0) + // apply EARLY first, then LATE → LATE (max origin_ts) wins, same as the reverse order above + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ opEarly ] + (Map.ofList [ (Inserts.computeOpHash opEarly, tsEarly) ]) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ opLate ] + (Map.ofList [ (Inserts.computeOpHash opLate, tsLate) ]) + let! winner = bindingOf "D" nm + Expect.equal + winner + (Some(System.String('4', 64))) + "max-origin_ts wins regardless of apply order (no swap)" + } + + ] diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs new file mode 100644 index 0000000000..b8b6864de2 --- /dev/null +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -0,0 +1,608 @@ +/// Scenario coverage for the sync conflict-dispatch seam (`Sync.routeDivergences`) — the wire that +/// turns a surfaced `name → two hashes` divergence into a first-class `Conflict.CSyncDivergence` the +/// runtime resolution policy (`ExecutionState.conflictDispatch`) decides. Complements +/// `SyncIdempotency.Tests` (the transport's idempotence + LWW); here we exercise the POLICY layer. +/// +/// Most scenarios are DATA: one `Scenario` record describes a divergent pull (local vs incoming hash + +/// authoring times) and a policy, and `runScenario` runs it and checks the live binding, the number of +/// reconciling ops, and the recorded conflict. Adding a case is a few fields. The handful that don't +/// fit the single-divergence shape (ties, multi-binding batches, re-pulls) stay as explicit tests. +module Tests.SyncScenarios + +open Expecto + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +module Inserts = LibDB.Inserts +module Conflicts = LibDB.Conflicts +module Sync = LibDB.Sync +module PT = LibExecution.ProgramTypes +module RT = LibExecution.RuntimeTypes +module RTE = LibExecution.RuntimeTypes.RuntimeError + +// ── helpers ────────────────────────────────────────────────────────────────────────────────── + +/// An authoring stamp relative to now (positive = future/newer, negative = past/older) — same +/// format the schema/sync use; no baked-in year so it runs in any calendar year. +let private relTs (minutesFromNow : float) : string = + System.DateTime.UtcNow + .AddMinutes(minutesFromNow) + .ToString("yyyy-MM-ddTHH:mm:ss.fffZ") + +let private callCtx : RT.CallContext = + { branchId = PT.mainBranchId; threadID = System.Guid.NewGuid() } + +/// The runtime default (mirrors `Execution.createState`): every divergence fails loudly — meaning, +/// to the sync receiver, "I pick no winner", so `routeDivergences` leaves the LWW outcome standing. +let private defaultDispatch : RT.ConflictDispatch = + fun conflict _ctx -> + uply { + match conflict with + | RT.CSyncDivergence(loc, e, i) -> + return + RT.RFailLoudly(RTE.UncaughtException($"divergence {loc}: {e} vs {i}", [])) + | RT.CRuntimeError rte -> return RT.RFailLoudly rte + | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) + } + +/// A keep-local policy: always substitute the EXISTING (local) hash — "my version wins". +let private keepLocalDispatch : RT.ConflictDispatch = + fun conflict _ctx -> + uply { + match conflict with + | RT.CSyncDivergence(_loc, existing, _incoming) -> + return RT.RSubstitute(RT.DString existing) + | RT.CRuntimeError rte -> return RT.RFailLoudly rte + | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) + } + +/// A keep-incoming policy: substitute the INCOMING hash (= what already applied) — a no-op rebind. +let private keepIncomingDispatch : RT.ConflictDispatch = + fun conflict _ctx -> + uply { + match conflict with + | RT.CSyncDivergence(_loc, _existing, incoming) -> + return RT.RSubstitute(RT.DString incoming) + | RT.CRuntimeError rte -> return RT.RFailLoudly rte + | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) + } + +let private liveHash (loc : PT.PackageLocation) : Task> = + Sql.query + "SELECT item_hash FROM locations WHERE owner=@o AND modules=@m AND name=@n AND unlisted_at IS NULL LIMIT 1" + |> Sql.parameters + [ "o", Sql.string loc.owner + "m", Sql.string (String.concat "." loc.modules) + "n", Sql.string loc.name ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + |> fun t -> + task { + let! rows = t + return List.tryHead rows + } + +let private fqOf (loc : PT.PackageLocation) : string = + let mods = String.concat "." loc.modules + if mods = "" then + $"{loc.owner}.{loc.name}" + else + $"{loc.owner}.{mods}.{loc.name}" + +let private uniqueName (prefix : string) : string = + prefix + System.Guid.NewGuid().ToString().Replace("-", "") + +let private hashChar (c : char) = System.String(c, 64) + +/// Establish a local binding (name → hash) with an authoring stamp, then PULL a divergent incoming +/// (name → other hash) through the real receiver. Returns (loc, divergences) so the scenario can +/// route them through a chosen dispatch policy. `remote` keys the conflict record. +let private setupDivergentPull + (loc : PT.PackageLocation) + (kind : PT.ItemKind) + (localHash : string) + (localTs : float) + (incomingHash : string) + (incomingTs : float) + (remote : string) + : Task> = + task { + let localRef = PT.Reference.fromHashAndKind (PT.Hash localHash, kind) + let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incomingHash, kind) + let localOp = PT.PackageOp.SetName(loc, localRef) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ localOp ] + (Map.ofList [ (Inserts.computeOpHash localOp, relTs localTs) ]) + let! (_cursor, divs) = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs incomingTs, PT.PackageOp.SetName(loc, incomingRef)) ] + return divs + } + +// ── the dense scenario form: a divergent pull as DATA ────────────────────────────────────────── + +type private Policy = + | Default // the runtime default — surface as data, LWW stands + | KeepLocal // override to our hash + | KeepIncoming // affirm the applied incoming hash (no-op) + | SubstituteUnrelated // a hash bound to neither side — must be ignored + +/// Which binding the location should hold after routing. +type private Winner = + | Local + | Incoming + +/// One divergent-pull scenario. A local binding (hash `local`, authored `localAge` minutes ago) meets +/// an incoming binding (hash `incoming`, `incomingAge` minutes ago) for the same name; `policy` +/// resolves the pull. Expect the live binding on `winner`, the policy to emit `reconciled` ops, and the +/// recorded conflict's `overridden` flag to be `overridden`. (`'b' > 'a'`, so the higher hash is `'b'`.) +type private Scenario = + { desc : string + kind : PT.ItemKind + local : char + localAge : float + incoming : char + incomingAge : float + policy : Policy + winner : Winner + reconciled : int + overridden : bool } + +let private dispatchFor (policy : Policy) : RT.ConflictDispatch = + match policy with + | Default -> defaultDispatch + | KeepLocal -> keepLocalDispatch + | KeepIncoming -> keepIncomingDispatch + | SubstituteUnrelated -> + fun _ _ -> uply { return RT.RSubstitute(RT.DString(hashChar 'z')) } + +let private runScenario (s : Scenario) : Test = + testTask s.desc { + let loc : PT.PackageLocation = + { owner = "Scenario"; modules = [ "S" ]; name = uniqueName "s" } + let localH, incomingH = hashChar s.local, hashChar s.incoming + let remote = uniqueName "r" + let! divs = + setupDivergentPull loc s.kind localH s.localAge incomingH s.incomingAge remote + Expect.equal (List.length divs) 1 $"{s.desc}: exactly one divergence surfaced" + let! reconciled = + Sync.routeDivergences + (dispatchFor s.policy) + callCtx + remote + PT.mainBranchId + divs + Expect.equal reconciled s.reconciled $"{s.desc}: reconciling-op count" + let! winner = liveHash loc + let expected = + match s.winner with + | Local -> localH + | Incoming -> incomingH + Expect.equal winner (Some expected) $"{s.desc}: live binding" + let! all = Conflicts.list () + match + all |> List.filter (fun (c : Conflicts.Conflict) -> c.location = fqOf loc) + with + | [ c ] -> + Expect.equal c.overridden s.overridden $"{s.desc}: conflict.overridden" + | other -> + failtest + $"{s.desc}: expected exactly one recorded conflict, got {List.length other}" + } + +/// The scenario table. Each row is one convergence/policy case; the runner does the rest. +let private scenarios : List = + [ { desc = "default policy: incoming is newer → LWW keeps it, nothing reconciled" + kind = PT.ItemKind.Fn + local = 'a' + localAge = -120.0 + incoming = 'b' + incomingAge = -60.0 + policy = Default + winner = Incoming + reconciled = 0 + overridden = false } + { desc = + "keep-local: override the LWW loss → our hash wins, conflict marked overridden" + kind = PT.ItemKind.Fn + local = 'a' + localAge = -120.0 + incoming = 'b' + incomingAge = -60.0 + policy = KeepLocal + winner = Local + reconciled = 1 + overridden = true } + { desc = "keep-incoming: affirm the already-applied bind → a safe no-op" + kind = PT.ItemKind.Fn + local = 'a' + localAge = -120.0 + incoming = 'b' + incomingAge = -60.0 + policy = KeepIncoming + winner = Incoming + reconciled = 0 + overridden = false } + { desc = "unknown substitute (bound to neither side) is ignored → LWW stands" + kind = PT.ItemKind.Fn + local = 'a' + localAge = -120.0 + incoming = 'b' + incomingAge = -60.0 + policy = SubstituteUnrelated + winner = Incoming + reconciled = 0 + overridden = false } + { desc = "incoming is OLDER → local stays (the divergence is still recorded)" + kind = PT.ItemKind.Fn + local = 'a' + localAge = -30.0 + incoming = 'b' + incomingAge = -90.0 + policy = Default + winner = Local + reconciled = 0 + overridden = false } + { desc = "a TYPE binding diverges too — default keeps the newer incoming" + kind = PT.ItemKind.Type + local = 'c' + localAge = -120.0 + incoming = 'd' + incomingAge = -60.0 + policy = Default + winner = Incoming + reconciled = 0 + overridden = false } + { desc = "keep-local works for a TYPE binding (kind isn't fn-only)" + kind = PT.ItemKind.Type + local = 'c' + localAge = -120.0 + incoming = 'd' + incomingAge = -60.0 + policy = KeepLocal + winner = Local + reconciled = 1 + overridden = true } + { desc = "keep-incoming on a TYPE binding is also a no-op" + kind = PT.ItemKind.Type + local = 'c' + localAge = -120.0 + incoming = 'd' + incomingAge = -60.0 + policy = KeepIncoming + winner = Incoming + reconciled = 0 + overridden = false } + { desc = "a VALUE binding diverges too — default keeps the newer incoming" + kind = PT.ItemKind.Value + local = 'e' + localAge = -120.0 + incoming = 'f' + incomingAge = -60.0 + policy = Default + winner = Incoming + reconciled = 0 + overridden = false } + { desc = + "keep-local works for a VALUE binding (every item kind reconciles the same way)" + kind = PT.ItemKind.Value + local = 'e' + localAge = -120.0 + incoming = 'f' + incomingAge = -60.0 + policy = KeepLocal + winner = Local + reconciled = 1 + overridden = true } ] + +// ── the scenarios that don't fit the single-divergence table ─────────────────────────────────── + +let private emptyConverged = + testTask + "empty divergence list routes to a clean zero (the converged steady state)" { + let remote = uniqueName "rempty" + let! reconciled = + Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId [] + Expect.equal reconciled 0 "no divergences → nothing reconciled, no ops" + } + +// The same-millisecond cross-instance tie: two DIFFERENT ops bind one name with the EXACT same +// origin_ts (two machines authored in the same ms). Resolution must be DETERMINISTIC — the higher +// item hash wins — so both machines converge regardless of which side they hold. Run it both ways. +let private sameMsTie = + testTask + "same-millisecond tie resolves deterministically by hash (higher wins, both ways)" { + let mk suffix : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Tie" ]; name = uniqueName suffix } + let lowH, highH = hashChar 'a', hashChar 'b' // 'b' > 'a' → highH wins + let tie = "2025-01-01T00:00:00.000Z" // one exact stamp both sides share + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + + let runTie (localH : string) (incomingH : string) : Task> = + task { + let loc = mk "tie" + let localOp = PT.PackageOp.SetName(loc, refOf localH) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ localOp ] + (Map.ofList [ (Inserts.computeOpHash localOp, tie) ]) + let! _ = + Sync.applyRemoteOps + (uniqueName "rtie") + PT.mainBranchId + None + [ (1L, tie, PT.PackageOp.SetName(loc, refOf incomingH)) ] + return! liveHash loc + } + + // incoming holds the higher hash → incoming wins + let! a = runTie lowH highH + Expect.equal a (Some highH) "higher hash wins (incoming was higher)" + // local holds the higher hash → incoming is stale, local stays — SAME winner + let! b = runTie highH lowH + Expect.equal b (Some highH) "higher hash wins (incoming was lower → stale)" + } + +let private multiDivergenceBatch = + testTask + "multi-divergence batch (default policy): every location surfaces + LWW converges" { + // The SHIPPED path: a single pull carrying TWO divergent bindings. The default policy + // reconciles nothing (surface-as-data); each location converges to its own LWW winner. + let mk suffix : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Multi" ]; name = uniqueName suffix } + let loc1, loc2 = mk "m1", mk "m2" + let local1, incoming1 = hashChar 'a', hashChar 'b' + let local2, incoming2 = hashChar 'c', hashChar 'd' + let remote = uniqueName "rmulti" + let! divs1 = + setupDivergentPull loc1 PT.ItemKind.Fn local1 -120.0 incoming1 -60.0 remote + let! divs2 = + setupDivergentPull loc2 PT.ItemKind.Fn local2 -120.0 incoming2 -60.0 remote + let divs = divs1 @ divs2 + Expect.equal (List.length divs) 2 "two divergences collected from the batch" + let! reconciled = + Sync.routeDivergences defaultDispatch callCtx remote PT.mainBranchId divs + Expect.equal reconciled 0 "default policy reconciles nothing (surface-as-data)" + let! w1 = liveHash loc1 + let! w2 = liveHash loc2 + Expect.equal w1 (Some incoming1) "first location converged to its LWW winner" + Expect.equal w2 (Some incoming2) "second location converged to its LWW winner" + } + +let private keepLocalPropagates = + testTask + "keep-local re-stamp makes our op the newest-by-creation (rides sync to peers)" { + let loc : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Prop" ]; name = uniqueName "p" } + let local, incoming = hashChar 'a', hashChar 'b' + let remote = uniqueName "rprop" + let! divs = + setupDivergentPull loc PT.ItemKind.Fn local -120.0 incoming -60.0 remote + let localRef = PT.Reference.fromHashAndKind (PT.Hash local, PT.ItemKind.Fn) + let localOpId = Inserts.computeOpHash (PT.PackageOp.SetName(loc, localRef)) + let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incoming, PT.ItemKind.Fn) + let incomingOpId = Inserts.computeOpHash (PT.PackageOp.SetName(loc, incomingRef)) + let originTs (id : System.Guid) : Task = + Sql.query "SELECT origin_ts FROM package_ops WHERE id = @id LIMIT 1" + |> Sql.parameters [ "id", Sql.uuid id ] + |> Sql.executeRowAsync (fun read -> read.string "origin_ts") + let! _ = + Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId divs + // after keep-local, OUR op's origin_ts is re-stamped to now — strictly newer than the incoming's. + // A peer re-pulling reads our op's adjacent (newer) origin_ts and, by the same timestamp-LWW, + // re-adopts our hash. Convergence, not divergence forever. + let! localStamp = originTs localOpId + let! incomingStamp = originTs incomingOpId + Expect.isGreaterThan + localStamp + incomingStamp + "our op is now the newest-by-creation (the re-stamp rides sync so peers re-adopt local)" + } + +let private orderIndependent = + testTask + "order-independent: both machines converge to the newer op regardless of arrival side" { + let mk suffix : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Order" ]; name = uniqueName suffix } + let a, b = hashChar 'a', hashChar 'b' + // machine-1: local=a (older), incoming=b (newer) → b wins as the incoming + let loc1 = mk "ord1" + let! _ = + setupDivergentPull loc1 PT.ItemKind.Fn a -120.0 b -60.0 (uniqueName "ro1") + let! w1 = liveHash loc1 + // machine-2: local=b (newer), incoming=a (older) → b stays as the local + let loc2 = mk "ord2" + let! _ = + setupDivergentPull loc2 PT.ItemKind.Fn b -60.0 a -120.0 (uniqueName "ro2") + let! w2 = liveHash loc2 + Expect.equal w1 (Some b) "machine where b arrived incoming: b (newer) won" + Expect.equal + w2 + (Some b) + "machine where b was local: b (newer) stayed — same winner" + } + +let private idempotentRePull = + testTask + "re-pulling an already-applied divergent op is a no-op (no new conflict, no flip)" { + let loc : PT.PackageLocation = + { owner = "Scenario"; modules = [ "NoOp" ]; name = uniqueName "np" } + let localH, incomingH = hashChar 'a', hashChar 'b' + let remote = uniqueName "rnoop" + let! _ = + setupDivergentPull loc PT.ItemKind.Fn localH -120.0 incomingH -60.0 remote + let! winner1 = liveHash loc + Expect.equal winner1 (Some incomingH) "incoming (newer) won the first pull" + // re-deliver the same incoming op + let incomingRef = + PT.Reference.fromHashAndKind (PT.Hash incomingH, PT.ItemKind.Fn) + let! (_cursor, divs2) = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs -60.0, PT.PackageOp.SetName(loc, incomingRef)) ] + Expect.isEmpty divs2 "re-pulling the same op surfaces no new divergence" + let! winner2 = liveHash loc + Expect.equal + winner2 + (Some incomingH) + "binding unchanged after the idempotent re-pull" + } + +// A resolution STICKS and propagates: after keep-local re-binds + re-stamps our hash to now, the +// superseded incoming op (its old, now-stale stamp) re-arriving does NOT flip the binding back. And +// because our op is now the newest-by-creation, a peer pulling it re-adopts our hash — the resolution +// rides sync to the other instances (it doesn't live only on the machine that made it). +let private resolutionSticks = + testTask + "a keep-local resolution holds: the superseded incoming op re-arriving doesn't undo it" { + let loc : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Stick" ]; name = uniqueName "st" } + let local, incoming = hashChar 'a', hashChar 'b' + let remote = uniqueName "rstick" + // incoming newer → it won the pull; keep-local overrides + re-stamps our hash to now + let! divs = + setupDivergentPull loc PT.ItemKind.Fn local -120.0 incoming -60.0 remote + let! _ = + Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId divs + let! afterResolve = liveHash loc + Expect.equal + afterResolve + (Some local) + "keep-local re-bound the location to our hash" + // the same incoming op re-arrives with its original (now-stale) stamp — must not win + let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incoming, PT.ItemKind.Fn) + let! (_c, _divs2) = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs -60.0, PT.PackageOp.SetName(loc, incomingRef)) ] + let! afterRePull = liveHash loc + Expect.equal + afterRePull + (Some local) + "the resolution holds — the superseded incoming op doesn't flip it back" + } + +// Weird timing: ops don't arrive in creation order. A stale op (older authoring time) delivered AFTER +// a fresher one must NOT overwrite it — convergence is by authoring time, not arrival order. +let private lateStaleArrival = + testTask + "out-of-order timing: a stale op arriving after a fresher one does not overwrite it" { + let loc : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Late" ]; name = uniqueName "lt" } + let a, b, c = hashChar 'a', hashChar 'b', hashChar 'c' + let remote = uniqueName "rlate" + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + // local 'a' @ -60 + let aOp = PT.PackageOp.SetName(loc, refOf a) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ aOp ] + (Map.ofList [ (Inserts.computeOpHash aOp, relTs -60.0) ]) + // a fresher 'c' @ -30 arrives → wins + let! _ = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs -30.0, PT.PackageOp.SetName(loc, refOf c)) ] + let! afterFresh = liveHash loc + Expect.equal + afterFresh + (Some c) + "the freshest op ('c' @ -30) is the live binding" + // a STALE 'b' @ -120 arrives LATE → older than 'c' → must not overwrite + let! _ = + Sync.applyRemoteOps + remote + PT.mainBranchId + None + [ (1L, relTs -120.0, PT.PackageOp.SetName(loc, refOf b)) ] + let! afterStale = liveHash loc + Expect.equal + afterStale + (Some c) + "the stale late arrival ('b' @ -120) did not overwrite the fresher 'c'" + } + +// Three competing instances edit one name at different times; whatever order their ops arrive, all +// converge to the newest-by-creation. Proven by applying the same three ops in two different orders. +let private threeWayConverge = + testTask + "three competing instances converge to the newest op, regardless of arrival order" { + let a, b, c = hashChar 'a', hashChar 'b', hashChar 'c' + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + // ages: a = -90 (oldest), c = -60, b = -30 (newest) → 'b' should win, both orders + let converge + (suffix : string) + (order : List) + : Task> = + task { + let loc : PT.PackageLocation = + { owner = "Scenario"; modules = [ "Three" ]; name = uniqueName suffix } + match order with + | (h0, t0) :: rest -> + let op0 = PT.PackageOp.SetName(loc, refOf h0) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ op0 ] + (Map.ofList [ (Inserts.computeOpHash op0, relTs t0) ]) + for (h, t) in rest do + let! _ = + Sync.applyRemoteOps + (uniqueName "r3") + PT.mainBranchId + None + [ (1L, relTs t, PT.PackageOp.SetName(loc, refOf h)) ] + () + return! liveHash loc + | [] -> return None + } + let! w1 = converge "o1" [ (a, -90.0); (c, -60.0); (b, -30.0) ] + let! w2 = converge "o2" [ (b, -30.0); (a, -90.0); (c, -60.0) ] + Expect.equal + w1 + (Some b) + "converges to the newest ('b' @ -30) — arrival order a, c, b" + Expect.equal + w2 + (Some b) + "converges to the newest ('b' @ -30) — arrival order b, a, c (same winner)" + } + +// ── all scenarios ────────────────────────────────────────────────────────────────────────────── + +let tests = + testSequenced + <| testList + "SyncScenarios" + ((scenarios |> List.map runScenario) + @ [ emptyConverged + sameMsTie + multiDivergenceBatch + keepLocalPropagates + orderIndependent + idempotentRePull + resolutionSticks + lateStaleArrival + threeWayConverge ]) diff --git a/packages/darklang/cli/conflicts.dark b/packages/darklang/cli/conflicts.dark new file mode 100644 index 0000000000..37ad8491c1 --- /dev/null +++ b/packages/darklang/cli/conflicts.dark @@ -0,0 +1,113 @@ +module Darklang.Cli.Conflicts + +// `dark conflicts` — recorded sync conflicts: auto-resolved name-binding divergences (you and a peer +// defined the same name differently). Auto-resolve is last-writer-wins and NEVER blocks the sync; +// this surfaces what was resolved so nothing is silently lost. The common case is `ack` ("the auto +// thing was right"); `resolve mine|theirs` overrides it the other way. + +// Single home for the pmConflictsList builtin — every other +// caller (the display below, the `sync status` count) routes through this one fn. Returns STRUCTURED +// rows `(id, location, status, resolution, localHash, incomingHash, remote)`; the human formatting +// lives in `Darklang.Sync.Display.conflictReport` (pure → package-testable, iterable without a rebuild). +let recorded + (includeResolved: Bool) + : List<(String * String * String * String * String * String * String)> = + Builtin.pmConflictsList includeResolved + +// How many conflicts are still pending (unacknowledged) — the `dark sync status` glance shows this. +let pendingCount () : Int64 = + Stdlib.List.length (recorded false) + +// Print the conflict list — pending only, or all (including acked/overridden history). The report +// frames the last-write-wins auto-resolution, marks the winning side, and nudges toward `ack`. +let showConflicts (includeResolved: Bool) : Unit = + Stdlib.printLines ( + Darklang.Sync.Display.conflictReport (recorded includeResolved) includeResolved + ) + + +let execute (state: AppState) (args: List) : AppState = + match args with + | [] | [ "list" ] -> + // default view: PENDING only (acked/overridden drop out — the ack-to-dismiss model) + showConflicts false + state + + | [ "list"; "all" ] -> + // history: every recorded conflict, including acked/overridden + showConflicts true + state + + | [ "ack"; "all" ] -> + // bulk "the auto thing was right" — clear every pending conflict at once + let n = Builtin.pmConflictAckAll () + + if n == 0L then + Stdlib.printLine (Colors.success "✓ no unacknowledged conflicts") + else + Stdlib.printLine (Colors.success $"acknowledged {Stdlib.Int64.toString n} conflict(s)") + + state + + | [ "ack"; id ] -> + if Builtin.pmConflictAck id then + Stdlib.printLine (Colors.success $"acknowledged conflict {id}") + else + Stdlib.printLine + (Colors.error $"no single conflict matching '{id}' (check `conflicts list`)") + + state + + | [ "resolve"; id; choice ] -> + if (choice == "mine") || (choice == "theirs") then + let keepMine = choice == "mine" + + if Builtin.pmConflictResolve id keepMine then + let what = + if keepMine then + "kept YOUR version (re-bound via a WIP SetName — commit to share it)" + else + "kept the incoming version" + + Stdlib.printLine (Colors.success $"resolved conflict {id} — {what}") + else + Stdlib.printLine + (Colors.error $"no single conflict matching '{id}' (check `conflicts list`)") + else + Stdlib.printLine (Colors.error "resolve mine|theirs (choice must be 'mine' or 'theirs')") + + state + + | _ -> + Stdlib.printLine + (Colors.error "Usage: conflicts [list] | conflicts ack | conflicts resolve mine|theirs") + state + + +let complete + (_state: AppState) + (_args: List) + : List = + [ Completion.simple "list" + Completion.simple "ack" + Completion.simple "resolve" ] + + +let help (state: AppState) : AppState = + [ "Usage: conflicts [list] | conflicts ack | conflicts resolve mine|theirs" + "" + "Recorded sync conflicts — auto-resolved name-binding divergences (you and a peer defined the" + "same name differently). Auto-resolve is last-writer-wins and never blocks the sync; this" + "surfaces what was resolved so nothing is silently lost." + "" + " list Show PENDING conflicts (acked/overridden drop out — ack to dismiss)" + " list all Show every recorded conflict, including acked/overridden history" + " ack Acknowledge — 'the auto-resolution was right' (the common case)" + " ack all Acknowledge ALL pending conflicts at once (bulk 'all were right')" + " resolve mine Override — re-bind to YOUR version (a WIP SetName; commit to share)" + " resolve theirs Override — keep the incoming version (what already won)" + "" + " is the short id from `conflicts list` (a unique prefix is fine)." ] + |> Stdlib.printLines + + state diff --git a/packages/darklang/cli/core.dark b/packages/darklang/cli/core.dark index 5e941fb4f5..edb5dd28a7 100644 --- a/packages/darklang/cli/core.dark +++ b/packages/darklang/cli/core.dark @@ -279,7 +279,10 @@ module Registry = ("caps", "View + control this instance's capability grant (default NONE)", [ "capabilities" ], Caps.Command.execute, Caps.Command.help, Caps.Command.complete) ("devices", "Your tailnet devices (status / serve / ping), over the `tailscale` CLI", [], Devices.execute, Devices.help, Devices.complete) ("apps", "List, install, and manage apps (daemons, foreground, UI)", [], Apps.Command.execute, Apps.Command.help, Apps.Command.complete) - ("text-editor", "Demo: a tiny text editor (the outliner's widget surfaced as an app)", [], Apps.Examples.TextEdit.execute, Apps.Examples.TextEdit.help, Apps.Examples.TextEdit.complete) ] + ("text-editor", "Demo: a tiny text editor (the outliner's widget surfaced as an app)", [], Apps.Examples.TextEdit.execute, Apps.Examples.TextEdit.help, Apps.Examples.TextEdit.complete) + ("sync", "Pull package ops from a peer (file or HTTP) + show sync status", [], Sync.execute, Sync.help, Sync.complete) + ("conflicts", "Review + resolve recorded sync conflicts", [], Conflicts.execute, Conflicts.help, Conflicts.complete) + ("remote", "Register tailnet sync peers (add/list/remove/add-ts)", [], Remote.execute, Remote.help, Remote.complete) ] |> Stdlib.List.map( // CLEANUP nitpicky: swap help and complete fun (name, desc, aliases, execute, help, complete) -> @@ -354,6 +357,7 @@ module Registry = let commandGroups () : List<(String * List)> = [ ("Packages", [ "nav"; "ls"; "view"; "tree"; "back"; "search"; "deps"; "val"; "let"; "fn"; "type"; "hash"; "db"; "deprecate"; "delete" ]) ("SCM", [ "status"; "log"; "commit"; "discard"; "show"; "branch"; "rebase"; "merge"; "review" ]) + ("Sync", [ "sync"; "conflicts"; "remote" ]) ("Execution", [ "run"; "eval"; "scripts" ]) ("Installation", [ "install"; "update"; "uninstall"; "version" ]) ("AI", [ "agent" ]) diff --git a/packages/darklang/cli/remote.dark b/packages/darklang/cli/remote.dark new file mode 100644 index 0000000000..d2a688bea8 --- /dev/null +++ b/packages/darklang/cli/remote.dark @@ -0,0 +1,90 @@ +module Darklang.Cli.Remote + +// `dark remote` — the sync SETUP surface: register the tailnet peers this instance syncs with. A +// remote is a (name, url) where url is an http(s) sync-server URL or a local data.db path. The +// tailnet-wide daemon (`dark apps start sync-daemon`) polls every registered remote — so `remote +// add` lets you wire a peer once and have it sync automatically, without a manual `sync pull` first. +// Remotes are LOCAL setup (not synced), like git's. + +// Register a remote + report it. The single call site for the `pmRemoteAdd` builtin, so both +// `add` and `add-ts` share one place. +let register (name: String) (url: String) (note: String) : Unit = + Builtin.pmRemoteAdd name url + // "registered" rather than "added": pmRemoteAdd is an upsert, so this also covers updating an + // existing remote's url. + Stdlib.printLine (Colors.success $"✓ remote '{name}' registered — {url}") + Stdlib.printLine (Colors.dimText note) + + +let execute (state: AppState) (args: List) : AppState = + match args with + | [] | [ "list" ] -> + let lines = Builtin.pmRemoteList () + + if Stdlib.List.isEmpty lines then + Stdlib.printLine (Colors.dimText "no remotes registered — `remote add `") + else + Stdlib.printLine "Remotes:" + Stdlib.printLines lines + + state + + | [ "add"; name; url ] -> + register + name + url + " the sync daemon will poll it; `sync pull` now also accepts the url" + state + + | [ "add-ts"; name; tailnet ] -> + // tailnet convenience: derive the MagicDNS URL (https://..ts.net, fronted by + // `tailscale serve --https=443 `) instead of typing it out. + let url = Darklang.Sync.Tailscale.peerUrl name tailnet + + register + name + url + " tailnet peer via MagicDNS; ensure `tailscale serve --https=443 ` runs there" + state + + | [ "remove"; name ] -> + if Builtin.pmRemoteRemove name then + Stdlib.printLine (Colors.success $"✓ remote '{name}' removed") + else + Stdlib.printLine (Colors.error $"no remote named '{name}' (see `remote list`)") + + state + + | _ -> + Stdlib.printLine + (Colors.error + "Usage: remote [list] | add | add-ts | remove ") + state + + +let complete + (_state: AppState) + (_args: List) + : List = + [ Completion.simple "list" + Completion.simple "add" + Completion.simple "add-ts" + Completion.simple "remove" ] + + +let help (state: AppState) : AppState = + [ "Usage: remote [list] | remote add | remote remove " + "" + "Register the tailnet peers this instance syncs with. A remote is a (name, url) where url is an" + "http(s) sync-server URL or a local data.db path. The tailnet-wide sync daemon polls every" + "registered remote, so you can wire a peer once and have it sync automatically." + "" + " list Show registered remotes (name → url)" + " add Register (or update) a remote by name" + " add-ts Register a tailnet peer by MagicDNS (derives the https URL)" + " remove Unregister a remote (its sync cursor is left intact)" + "" + "Remotes are LOCAL setup (not synced), like git's." ] + |> Stdlib.printLines + + state diff --git a/packages/darklang/cli/scm/branch.dark b/packages/darklang/cli/scm/branch.dark index 79225c5e13..4abe2a7122 100644 --- a/packages/darklang/cli/scm/branch.dark +++ b/packages/darklang/cli/scm/branch.dark @@ -201,15 +201,26 @@ let execute (state: AppState) (args: List) : AppState = Stdlib.printLine (Colors.error $"Branch '{name}' not found.") state + | [ "rebuild" ] -> + // ops⊥projections recovery: drop every regenerable projection table and re-fold the whole + // canonical op log. Projections are non-authoritative, so this is always safe (it only costs + // the CPU to re-fold) — use it after a schema bump or if a projection looks stale. + let refolded = Builtin.pmRebuildProjections () + let n = Stdlib.Int64.toString refolded + Stdlib.printLine + (Colors.success $"Rebuilt projections — re-folded {n} ops from the canonical log.") + state + | _ -> - Stdlib.printLine "Usage: branch [list|create|switch|rename|archive] [args]" + Stdlib.printLine + "Usage: branch [list|create|switch|rename|archive|rebuild] [args]" state let complete (_state: AppState) (args: List) : List = match args with | [] -> - [ "list"; "create"; "switch"; "rename"; "archive" ] + [ "list"; "create"; "switch"; "rename"; "archive"; "rebuild" ] |> Stdlib.List.map Completion.simple | [ "switch" ] | [ "archive" ] | [ "delete" ] -> let branches = SCM.Branch.list () @@ -228,6 +239,7 @@ let help (_state: AppState) : Unit = " rename Rename a branch" " archive Archive a branch (soft delete)" " delete Archive a branch (alias for archive)" + " rebuild Drop + re-fold this branch's projections from the op log (recovery)" "" "Note: branch context does not persist between CLI invocations." "In interactive mode, 'branch switch' works for the session." diff --git a/packages/darklang/cli/scm/status.dark b/packages/darklang/cli/scm/status.dark index 1e420feb3c..be349c085b 100644 --- a/packages/darklang/cli/scm/status.dark +++ b/packages/darklang/cli/scm/status.dark @@ -1,6 +1,24 @@ module Darklang.Cli.SCM.Status +// The ops⊥projections currency line: is the local projection cache current with the canonical op +// log? `opsCount` = rows in `package_ops`; `foldedThrough` = how many are folded (applied). Pure +// arithmetic over the two — testable without a live DB (see testfiles/.../status-cli.dark). +let statusLine (opsCount: Int64) (foldedThrough: Int64) : String = + if foldedThrough >= opsCount then + "✓ up to date (" ++ (Stdlib.Int64.toString opsCount) ++ " ops)" + else + let behind = opsCount - foldedThrough + + "core.db: " + ++ (Stdlib.Int64.toString opsCount) + ++ " ops · projections folded through " + ++ (Stdlib.Int64.toString foldedThrough) + ++ " (" + ++ (Stdlib.Int64.toString behind) + ++ " behind)" + + let execute (state: AppState) (args: List) : AppState = let branchId = state.currentBranchId @@ -12,6 +30,10 @@ let execute (state: AppState) (args: List) : AppState = Stdlib.printLine $"On branch {Cli.Colors.cyan}{branchName}{Cli.Colors.reset}" + // projection-cache currency (canonical op log vs how far projections are folded) + let (opsCount, foldedThrough) = Builtin.pmProjectionStatus () + Stdlib.printLine (Cli.Colors.dimText (statusLine opsCount foldedThrough)) + let summary = SCM.PackageOps.getWipSummary branchId if summary.total == 0L then diff --git a/packages/darklang/cli/sync.dark b/packages/darklang/cli/sync.dark new file mode 100644 index 0000000000..029775fd19 --- /dev/null +++ b/packages/darklang/cli/sync.dark @@ -0,0 +1,227 @@ +module Darklang.Cli.Sync + +// `dark sync pull ` — local two-instance sync: pull a peer's package ops +// from a local data.db file into this instance (op log + projections), resuming + persisting a +// per-peer cursor. The fold is the same op-playback used everywhere; only the source differs. + +let execute (state: AppState) (args: List) : AppState = + match args with + | [ "pull"; target ] -> + // cursor BEFORE the pull → the delta is exactly the ops pulled (the op log is append-only, so + // its rowids are contiguous and `after - before` can't over/under-count). + let before = Darklang.Sync.cursorFor target + + if Stdlib.String.startsWith target "http" then + // pull over HTTP from a peer's sync server (tailnet-wide). Uses the + // SSRF-unguarded fetch: a tailnet/localhost peer is trusted (the tailnet is the trust + // boundary), and the guarded client blocks loopback + the 100.64/10 tailnet range. + // `?since=` makes it incremental — only the ops we don't yet have. + match Darklang.Sync.pullHttp target with + | Ok((newCursor, divCount, blobsFetched)) -> + let blobNote = + if blobsFetched > 0L then + $" + {Stdlib.Int64.toString blobsFetched} blob(s)" + else + "" + + let divNote = + if divCount > 0L then + $" ({Stdlib.Int64.toString divCount} name divergence(s) surfaced — not blocked; see `dark conflicts`)" + else + "" + + let summary = Darklang.Sync.Display.syncSummary target (newCursor - before) + + Stdlib.printLine ( + Colors.success + $"{summary} (at op {Stdlib.Int64.toString newCursor}){blobNote}.{divNote}" + ) + + state + | Error _ -> + Stdlib.printLine (Colors.error $"sync pull failed: could not reach {target}") + Stdlib.printLine ( + Colors.dimText + $" is `dark serve Darklang.Sync.Server.router --port ` running there? check host/port — try {Darklang.Sync.healthUrl target}" + ) + state + else if Stdlib.Bool.not (Stdlib.Cli.File.exists target) then + // a missing peer file would throw a raw SQLite error from pullFromFile — refuse cleanly + Stdlib.printLine (Colors.error $"sync pull: no such peer db: {target}") + Stdlib.printLine + (Colors.dimText " give a local data.db path, or an http(s):// sync-server URL") + state + else + // pull from a peer's local data.db file + let (newCursor, divCount) = Darklang.Sync.pullFromFile target + + let divNote = + if divCount > 0L then + $" ({Stdlib.Int64.toString divCount} name divergence(s) surfaced — not blocked; see `dark conflicts`)" + else + "" + + let pulled = newCursor - before + let summary = Darklang.Sync.Display.syncSummary target pulled + + // op-kind breakdown (file peers only): "(2 fns, 1 type)" over the naming ops just pulled + let kindNote = + if pulled > 0L then + let breakdown = + Darklang.Sync.Display.opKindBreakdown (Builtin.pmSyncOpKindsSince target before) + + if breakdown == "" then "" else $" ({breakdown})" + else + "" + + Stdlib.printLine ( + Colors.success + $"{summary} (at op {Stdlib.Int64.toString newCursor}){kindNote}.{divNote}" + ) + + state + | [] | [ "status" ] -> + // bare `dark sync` defaults to the status glance (consistent with conflicts/remote/apps) + let lines = Darklang.Sync.status () + + if Stdlib.List.isEmpty lines then + Stdlib.printLine "No peers synced yet." + else + Stdlib.printLines lines + + // fold the unacknowledged-conflict count into the status glance (routes through the single + // pmConflictsList call site in conflicts.dark) + let pending = Conflicts.pendingCount () + + if pending > 0L then + Stdlib.printLine ( + Colors.warning + $"⚠ {Stdlib.Int64.toString pending} unacknowledged conflict(s) — `dark conflicts` to review" + ) + + state + | [ "auto"; peer ] -> + // one adaptive poll step (pull + decide the next interval); a daemon loops this on a timer. + // Release-aware: a peer on a different Release is paused (not pulled) and reported, not synced. + let (cursor, nextMs, skew) = Darklang.Sync.Autosync.pollOnce peer 2000L + + if skew != "" then + Stdlib.printLine (Colors.warning $"⚠ {skew}") + else + Stdlib.printLine ( + Colors.success + $"Synced {peer} through op {Stdlib.Int64.toString cursor} — next poll in {Darklang.Sync.Display.intervalLabel nextMs} (adaptive)" + ) + + state + | [ "auto"; peer; timesStr ] -> + // run `times` adaptive poll steps (blocking foreground loop), sleeping between each + match Stdlib.Int64.parse timesStr with + | Ok times -> + let _lastInterval = Darklang.Sync.Autosync.runLoop peer 2000L times + Stdlib.printLine (Colors.success $"autosync: ran {timesStr} poll steps for {peer}") + state + | Error _ -> + Stdlib.printLine (Colors.error "Usage: sync auto [times]") + state + | [ "check"; target ] -> + // "am I caught up?" — GET the peer's /sync/health, compare its op count to our synced cursor + if Stdlib.String.startsWith target "http" then + match Darklang.Sync.httpGet (Darklang.Sync.healthUrl target) with + | Ok body -> + // version-skew first: if the peer's release differs from ours, sync can't proceed (the wire + // gate fails closed) — say which side to upgrade, instead of letting a pull decode-error later. + let skew = + Darklang.Sync.releaseSkewLine + target + (Darklang.Sync.ourRelease ()) + (Darklang.Sync.parseHealthRelease body) + + if skew == "" then + let peerOps = Darklang.Sync.parseHealthOps body + let ourCursor = Darklang.Sync.cursorFor target + Stdlib.printLine (Darklang.Sync.convergenceLine target ourCursor peerOps) + else + Stdlib.printLine (Colors.warning skew) + + state + | Error _ -> + Stdlib.printLine + (Colors.error + $"sync check: could not reach {target} — is `dark serve` running there?") + state + else + Stdlib.printLine (Colors.error "sync check needs an http(s) peer URL") + state + | [ "daemon" ] | [ "daemon"; "status" ] -> + // the background auto-sync daemon's state (running / stopped / stale), via its pidfile + Stdlib.printLine (Darklang.Sync.Daemon.statusLine "sync-daemon") + state + | [ "daemon"; "start" ] -> + // tailnet-wide: poll EVERY known peer on an adaptive interval, detached (survives CLI exit) + match Darklang.Sync.Daemon.startAll 5000L with + | Ok msg -> Stdlib.printLine (Colors.success msg) + | Error e -> Stdlib.printLine (Colors.error e) + state + | [ "daemon"; "start"; peer ] -> + // single-peer daemon (a file path or http(s):// URL) + match Darklang.Sync.Daemon.start peer 5000L with + | Ok msg -> Stdlib.printLine (Colors.success msg) + | Error e -> Stdlib.printLine (Colors.error e) + state + | [ "daemon"; "stop" ] -> + match Darklang.Sync.Daemon.stop "sync-daemon" with + | Ok msg -> Stdlib.printLine (Colors.success msg) + | Error e -> Stdlib.printLine (Colors.error e) + state + | [ "daemon"; "logs" ] -> + let lines = Darklang.Sync.Daemon.tailLog "sync-daemon" 20L + if Stdlib.List.isEmpty lines then + Stdlib.printLine (Colors.dimText "no daemon logs yet (has it run?)") + else + Stdlib.printLines lines + state + | _ -> + Stdlib.printLine ( + Colors.error + "Usage: sync pull | status | auto [times] | check | daemon start/stop/status/logs" + ) + + state + + +let complete + (_state: AppState) + (_args: List) + : List = + [ Completion.simple "pull" + Completion.simple "status" + Completion.simple "auto" + Completion.simple "check" + Completion.simple "daemon" ] + + +let help (state: AppState) : AppState = + [ "Usage: sync pull | status | auto [times] | check | daemon start/stop/status/logs" + "" + "Sync package ops across instances (tailnet-wide). The receiver applies ops idempotently" + "(op log + projections) and resumes from a per-peer cursor, so re-running only applies what's new." + "" + " pull Pull from a peer's local data.db file" + " pull http://host:port Pull from a peer's sync server over HTTP/tailnet" + " status Show each synced peer and how far (last applied op)" + " auto One adaptive poll step (pull + decide the next interval)" + " auto Run N adaptive poll steps (foreground loop)" + " check Am I caught up? Compare a peer's op count to our synced cursor" + " daemon start [peer] Start the background auto-sync daemon (all peers, or just one)" + " daemon stop Stop the background daemon" + " daemon status Is the daemon running?" + " daemon logs Tail recent daemon output" + "" + "To SERVE ops to peers (run on the always-on box, e.g. the desktop):" + " dark serve Darklang.Sync.Server.router --port " + " peers pull with: dark sync pull http://:" + " check it's up: open http://:/sync/health" ] + |> Stdlib.printLines + + state diff --git a/packages/darklang/sync/api.dark b/packages/darklang/sync/api.dark new file mode 100644 index 0000000000..d874e09187 --- /dev/null +++ b/packages/darklang/sync/api.dark @@ -0,0 +1,182 @@ +module Darklang.Sync + +// The programmatic sync client API — a thin Dark surface over the sync builtins, so Dark code +// (e.g. the autosync poll loop) can sync without reaching for `Builtin.*`. The CLI's +// `dark sync pull`/`status` are one caller; an autosync daemon is the next. + +// Pull a peer's package ops into this instance (op log + projections + missing blobs), resuming +// from + persisting a per-peer cursor. `peer` is a local data.db path. Returns +// `(newCursor, divergenceCount)` — divergences are surfaced, never block. +let pullFromFile (peer: String) : (Int64 * Int64) = + Builtin.pmSyncPull peer + +// The stored resume cursor for a peer (last applied rowid), or 0 if never synced. +let cursorFor (peer: String) : Int64 = + Builtin.pmSyncCursorFor peer + +// Apply a base64 wire batch (an HTTP `/sync/events` body) into this instance, advancing the +// peer's cursor. Returns `(newCursor, divergenceCount)`. Used by the HTTP pull path. +let applyWire (peer: String) (wireB64: String) : (Int64 * Int64) = + Builtin.pmSyncApplyWire peer wireB64 + +// One `" — synced through op N"` line per peer this instance has synced with. +let status () : List = + Builtin.pmSyncStatus () + +// The raw list of every peer this instance has a sync cursor for — what a tailnet-wide daemon polls. +let remotes () : List = + Builtin.pmSyncRemotes () + + +// ── HTTP peer URL construction ── pure, so the live cross-machine setup can't trip on a +// trailing slash. A `http://host:9912/` base must NOT produce `…//sync/events`. + +// Strip a trailing "/" from a peer base URL. +let normalizeBase (baseUrl: String) : String = + if Stdlib.String.endsWith baseUrl "/" then + Stdlib.String.dropLast baseUrl 1L + else + baseUrl + +// The `/sync/events` URL for an incremental pull from `cursor` (the ops we don't yet have). +let eventsUrl (baseUrl: String) (cursor: Int64) : String = + (normalizeBase baseUrl) ++ "/sync/events?since=" ++ (Stdlib.Int64.toString cursor) + +// The `/sync/health` readiness-probe URL — confirm a peer's server is up before pulling. +let healthUrl (baseUrl: String) : String = + (normalizeBase baseUrl) ++ "/sync/health" + +// The blob channel URLs: the manifest (all the peer's content hashes) + a single blob. +let blobsUrl (baseUrl: String) : String = + (normalizeBase baseUrl) ++ "/sync/blobs" + +let blobUrl (baseUrl: String) (hash: String) : String = + (normalizeBase baseUrl) ++ "/sync/blob?hash=" ++ hash + +// One trusted-peer HTTP GET (the tailnet is the trust boundary). The single call site for the +// SSRF-unguarded fetch builtin, so it's referenced exactly once even as more GETs (events, blobs) +// are added. +let httpGet (url: String) : Stdlib.Result.Result = + Builtin.httpClientGetUnsafe url + + +// After applying a peer's ops over HTTP, fetch the content blobs we lack — the HTTP counterpart to +// the file pull's `pullBlobsFromStore`. GET the peer's manifest, keep what we're missing, GET + +// insert each. Returns the count inserted. Best-effort: a failed GET or empty body is skipped — the +// blob retries on the next pull (content-addressed, so the insert dedups, like ops). +let fetchMissingBlobs (baseUrl: String) : Int64 = + match httpGet (blobsUrl baseUrl) with + | Error _ -> 0L + | Ok manifest -> + let offered = + (Stdlib.String.split manifest "\n") + |> Stdlib.List.filter (fun h -> h != "") + + let missing = Builtin.pmBlobMissing offered + + let inserted = + missing + |> Stdlib.List.map (fun h -> + match httpGet (blobUrl baseUrl h) with + | Ok b64 -> Builtin.pmBlobInsert h b64 + | Error _ -> false) + + inserted |> Stdlib.List.filter (fun r -> r) |> Stdlib.List.length + + +// The full HTTP pull: the peer's new ops THEN its missing content blobs — the reusable +// core behind both `dark sync pull ` and HTTP autosync. Returns +// `(newCursor, divergenceCount, blobsFetched)` on success, or the GET error (unreachable peer) so +// the caller can react (the CLI shows an actionable hint; autosync just doesn't advance). +let pullHttp + (baseUrl: String) + : Stdlib.Result.Result<(Int64 * Int64 * Int64), String> = + let cursor = cursorFor baseUrl + + match httpGet (eventsUrl baseUrl cursor) with + | Ok body -> + let (newCursor, divCount) = applyWire baseUrl body + let blobs = fetchMissingBlobs baseUrl + Stdlib.Result.Result.Ok((newCursor, divCount, blobs)) + | Error e -> Stdlib.Result.Result.Error e + + +// Parse the op count out of a `/sync/health` body (`"sync-server ok; ops=N"`). 0 if unparseable. +// Pure, so the convergence display is testable without a live server. +let parseHealthOps (healthBody: String) : Int64 = + match Stdlib.String.split healthBody "ops=" with + | [ _before; n ] -> + match Stdlib.Int64.parse n with + | Ok v -> v + | Error _ -> 0L + | _ -> 0L + +// The peer's sync RELEASE from its `/sync/health` body ("...; release=; ops="). The single +// coordinate that must match ours for sync to proceed. A peer that omits it (an OLD pre-release +// version) parses to 0 — which reads as a mismatch against any real release, exactly the intent. +let parseHealthRelease (healthBody: String) : Int64 = + match Stdlib.String.split healthBody "release=" with + | [ _before; rest ] -> + // `rest` is "; ops=" — take the digits before the next ";" + match Stdlib.String.split rest ";" with + | n :: _ -> + match Stdlib.Int64.parse n with + | Ok v -> v + | Error _ -> 0L + | _ -> 0L + | _ -> 0L + +// THIS instance's sync release. Single home for the `pmSyncReleaseVersion` builtin (the multi-ref +// guard wants ≤1 reference) — every caller routes through here. +let ourRelease () : Int64 = + Builtin.pmSyncReleaseVersion () + +// Sync requires both sides on the SAME release (the wire gate fails closed — a mismatched batch is +// rejected, never misread). When they differ, this is the clear, actionable line — saying WHICH side +// to upgrade — that a `check`/`pull` shows instead of a raw decode error. "" when releases match (no +// skew). A peer with no release stamp parses to 0 (an old pre-release Dark) → "upgrade it". +let releaseSkewLine + (peerLabel: String) + (ourRelease: Int64) + (peerRelease: Int64) + : String = + if ourRelease == peerRelease then + "" + else if peerRelease == 0L then + "⚠ " ++ peerLabel ++ " is on an older Dark (no release) — upgrade it to sync" + else + let theirs = Stdlib.Int64.toString peerRelease + let ours = Stdlib.Int64.toString ourRelease + let which = if peerRelease < ourRelease then peerLabel else "this machine" + + "⚠ " + ++ peerLabel + ++ " is on Release " + ++ theirs + ++ ", you're on " + ++ ours + ++ " — upgrade " + ++ which + ++ " to sync" + +// "Am I caught up with this peer?" — compare our synced cursor to the peer's op count. Caught up +// when our cursor has reached the peer's latest op; else how far behind, both sides shown. +let convergenceLine + (peerLabel: String) + (ourCursor: Int64) + (peerOpCount: Int64) + : String = + if ourCursor >= peerOpCount then + "✓ " ++ peerLabel ++ " — caught up (" ++ (Stdlib.Int64.toString peerOpCount) ++ " ops)" + else + let behind = peerOpCount - ourCursor + + "⟳ " + ++ peerLabel + ++ " — " + ++ (Stdlib.Int64.toString behind) + ++ " behind (you " + ++ (Stdlib.Int64.toString ourCursor) + ++ " / peer " + ++ (Stdlib.Int64.toString peerOpCount) + ++ ")" diff --git a/packages/darklang/sync/autosync.dark b/packages/darklang/sync/autosync.dark new file mode 100644 index 0000000000..994a2861b3 --- /dev/null +++ b/packages/darklang/sync/autosync.dark @@ -0,0 +1,174 @@ +module Darklang.Sync.Autosync + +// Autosync poll core. The ADAPTIVE interval: snap to a responsive floor right after a +// sync that pulled changes, back off (double, capped at a ceiling) while idle — so a quiet +// tailnet isn't hammered but a real change propagates fast. Pure arithmetic; the loop that +// sleeps + repeats is the daemon (the one part needing a timer + background process). + +let pollFloorMs () : Int64 = 2000L +let pollCeilingMs () : Int64 = 60000L + +let nextPollMs (sawChanges: Bool) (currentMs: Int64) : Int64 = + if sawChanges then + pollFloorMs () + else + let doubled = currentMs * 2L + + if doubled > (pollCeilingMs ()) then + pollCeilingMs () + else + doubled + + +// The adaptive DECISION, given the cursor before/after a pull: `(newCursor, nextIntervalMs)`. The +// cursor ADVANCING means changes arrived → snap to the responsive floor; UNCHANGED (a converged, +// empty pull) → back off. Pure — separated from pollOnce's pull side-effect so it's testable; this +// is what makes a converged tailnet quiet down instead of polling at the floor forever. +let decideNext (before: Int64) (after: Int64) (currentMs: Int64) : (Int64 * Int64) = + (after, nextPollMs (after > before) currentMs) + + +// Pull a peer regardless of transport: an `http(s)` URL pulls over HTTP (ops + content blobs), +// anything else is a local `data.db` file path. Returns `(newCursor, divCount)`. An unreachable +// HTTP peer doesn't advance the cursor — so the poll treats it as idle and backs off (no crash). +let pullPeer (peer: String) : (Int64 * Int64) = + if Stdlib.String.startsWith peer "http" then + match Darklang.Sync.pullHttp peer with + | Ok((c, d, _b)) -> (c, d) + | Error _ -> ((Darklang.Sync.cursorFor peer), 0L) + else if Stdlib.Cli.File.exists peer then + Darklang.Sync.pullFromFile peer + else + // a local peer whose db is currently missing (offline/removed) must NOT crash the daemon — treat + // it like an unreachable http peer: no advance, back off. (Essential for a tailnet-wide daemon, + // where peers come and go.) + ((Darklang.Sync.cursorFor peer), 0L) + + +// Pure: may we sync with a peer on `peerRelease`, given we're on `ourRelease`? Both must match — the +// wire gate fails closed on a mismatch (a skewed batch is rejected, never misread). A peer with no +// release stamp (an old pre-release Dark) reports 0, which never matches a real release → skip. This +// is the Release coordinate (the migrator's version) gating the daemon, same as it gates the wire. +let peerInSync (ourRelease: Int64) (peerRelease: Int64) : Bool = + ourRelease == peerRelease + + +// Release-aware pull of one HTTP peer: check the peer's `/sync/health` release FIRST, and only pull +// when it matches ours. On a skew, SKIP the pull and return the actionable skew line (which side to +// upgrade) — so a half-upgraded tailnet shows its paused peers plainly, instead of the silent +// no-advance you'd get from letting the wire gate reject the batch. Returns +// `(newCursor, divCount, skewLine)` — `skewLine` is "" when in sync (or for a local file peer, which +// has no health endpoint and is pulled as before). An unreachable peer is treated as idle, no skew. +let pullPeerReleaseAware (peer: String) : (Int64 * Int64 * String) = + if Stdlib.String.startsWith peer "http" then + match Darklang.Sync.httpGet (Darklang.Sync.healthUrl peer) with + | Ok health -> + let ours = Darklang.Sync.ourRelease () + let peerRel = Darklang.Sync.parseHealthRelease health + + if peerInSync ours peerRel then + let (c, d) = pullPeer peer + (c, d, "") + else + ((Darklang.Sync.cursorFor peer), 0L, Darklang.Sync.releaseSkewLine peer ours peerRel) + | Error _ -> ((Darklang.Sync.cursorFor peer), 0L, "") + else + let (c, d) = pullPeer peer + (c, d, "") + + +// One poll step: pull the peer (HTTP or file, Release-aware — a skewed HTTP peer is paused, not +// pulled), then decide the next interval — responsive if it brought new ops, backing off if idle. +// Returns `(newCursor, nextIntervalMs, skewLine)`; `skewLine` is "" unless the peer was paused on a +// Release mismatch. A daemon calls this on a timer, sleeping `nextIntervalMs` between calls. +let pollOnce (peer: String) (currentIntervalMs: Int64) : (Int64 * Int64 * String) = + let before = Darklang.Sync.cursorFor peer + let (after, _divCount, skew) = pullPeerReleaseAware peer + let (cursor, nextMs) = decideNext before after currentIntervalMs + (cursor, nextMs, skew) + + +// Run `times` adaptive poll steps, sleeping the computed interval BETWEEN steps (not after the +// last). A blocking FOREGROUND loop — the true background daemon (non-blocking, survives CLI +// exit) is a future detached daemon. Returns the last interval. +let runLoop (peer: String) (intervalMs: Int64) (times: Int64) : Int64 = + if times <= 0L then + intervalMs + else + let (cursor, nextMs, skew) = pollOnce peer intervalMs + + if skew != "" then + Stdlib.printLine $" [autosync] {skew}" + else + () + + Stdlib.printLine + $" [autosync] {peer} → op {Stdlib.Int64.toString cursor}, next in {Darklang.Sync.Display.intervalLabel nextMs}" + + if times > 1L then + Stdlib.Cli.Posix.sleep (Stdlib.Int64.toFloat nextMs) + else + () + + runLoop peer nextMs (times - 1L) + + +// ── tailnet-wide poll ── the daemon syncs EVERY known peer, not one. (Sync is tailnet-wide.) + +// Poll every peer once; return `(sawChanges, conflictsRecorded)` — whether ANY peer advanced (one +// cursor advancing is enough to call the cycle "saw changes" → snap back to the floor) and the total +// divergences auto-resolved this cycle (so the daemon LOG records background conflict resolution — +// the user isn't watching, so this is where it's surfaced, alongside `dark conflicts`/`sync status`). +let pollAllPeers (peers: List) : (Bool * Int64 * List) = + let results = + peers + |> Stdlib.List.map (fun p -> + let before = Darklang.Sync.cursorFor p + let (after, div, skew) = pullPeerReleaseAware p + (after > before, div, skew)) + + let sawChanges = Stdlib.List.any results (fun (changed, _div, _skew) -> changed) + let conflicts = Stdlib.List.fold results 0L (fun acc (_changed, div, _skew) -> acc + div) + // peers paused this cycle on a Release mismatch — surfaced so a half-upgraded tailnet is legible + let skews = + results + |> Stdlib.List.map (fun (_changed, _div, skew) -> skew) + |> Stdlib.List.filter (fun s -> s != "") + (sawChanges, conflicts, skews) + + +// The tailnet-wide adaptive loop: each cycle re-reads the peer set (so peers added mid-run are +// picked up) and polls them all, then sleeps an interval that snaps to the floor if ANY peer brought +// changes and backs off when the whole tailnet is quiet. Returns the last interval. +let runLoopAll (intervalMs: Int64) (times: Int64) : Int64 = + if times <= 0L then + intervalMs + else + let peers = Darklang.Sync.remotes () + let (sawChanges, conflicts, skews) = pollAllPeers peers + let nextMs = nextPollMs sawChanges intervalMs + + let conflictNote = + if conflicts > 0L then + $", {Stdlib.Int64.toString conflicts} conflict(s) auto-resolved" + else + "" + + let skewNote = + if Stdlib.List.isEmpty skews then + "" + else + $", {Stdlib.Int64.toString (Stdlib.List.length skews)} peer(s) paused (Release skew)" + + Stdlib.printLine + $" [autosync] polled {Stdlib.Int64.toString (Stdlib.List.length peers)} peer(s), changes={Stdlib.Bool.toString sawChanges}{conflictNote}{skewNote} → next in {Darklang.Sync.Display.intervalLabel nextMs}" + + // surface each paused peer's actionable upgrade line (which side to upgrade) in the daemon log + skews |> Stdlib.List.iter (fun line -> Stdlib.printLine $" [autosync] {line}") + + if times > 1L then + Stdlib.Cli.Posix.sleep (Stdlib.Int64.toFloat nextMs) + else + () + + runLoopAll nextMs (times - 1L) diff --git a/packages/darklang/sync/daemon.dark b/packages/darklang/sync/daemon.dark new file mode 100644 index 0000000000..42a368c2ab --- /dev/null +++ b/packages/darklang/sync/daemon.dark @@ -0,0 +1,65 @@ +module Darklang.Sync.Daemon + +// The background sync daemon: a DETACHED `dark` process running the adaptive autosync poll loop, tracked +// by a pidfile so `sync daemon start/stop/status/logs` (and `dark apps`) can manage it. The generic +// process lifecycle — pidfile, signal-0 liveness, SIGTERM stop, log tail, verified detached launch — is +// the shared `Stdlib.Cli.Daemon` substrate (the same one `dark apps` daemons use). Only the sync-specific +// entrypoints and the launch expression live here. + +let private daemonName : String = "sync-daemon" + + +// ── lifecycle: thin pass-throughs to the shared daemon substrate ── + +let statusLine (name: String) : String = Stdlib.Cli.Daemon.statusLine name + +let stop (name: String) : Stdlib.Result.Result = Stdlib.Cli.Daemon.stop name + +let tailLog (name: String) (n: Int64) : List = Stdlib.Cli.Daemon.tailLog name n + + +// ── entrypoints: run detached, claim the pidfile (recording this worker's pid), then poll forever +// (≈ a billion steps); a SIGTERM from `sync daemon stop` ends it. Invoked via `eval`, see `start`. ── + +let runDaemon (peer: String) (intervalMs: Int64) : Int64 = + let _ = Stdlib.Cli.Daemon.claimPidfile daemonName + Stdlib.printLine + $"daemon: started for {peer} (pid {Stdlib.Int64.toString (Stdlib.Cli.Sys.currentPid ())})" + Darklang.Sync.Autosync.runLoop peer intervalMs 1000000000L + +let runDaemonAll (intervalMs: Int64) : Int64 = + let _ = Stdlib.Cli.Daemon.claimPidfile daemonName + Stdlib.printLine + $"daemon: started for all peers (pid {Stdlib.Int64.toString (Stdlib.Cli.Sys.currentPid ())})" + Darklang.Sync.Autosync.runLoopAll intervalMs 1000000000L + + +// ── launch: spawn the entrypoint detached + verified (refuses if already running) via the substrate ── + +let start (peer: String) (intervalMs: Int64) : Stdlib.Result.Result = + // a file peer that doesn't exist would launch a daemon that crashes on the first pull and goes stale — + // refuse early. (http peers are checked at poll time: an unreachable one just backs off, doesn't crash.) + if + (Stdlib.Bool.not (Stdlib.String.startsWith peer "http")) + && (Stdlib.Bool.not (Stdlib.Cli.File.exists peer)) + then + Stdlib.Result.Result.Error $"peer file not found: {peer} (nothing to sync from)" + else + let q = "\"" + let expr = + "Darklang.Sync.Daemon.runDaemon " ++ q ++ peer ++ q ++ " " ++ (Stdlib.Int64.toString intervalMs) ++ "L" + + match Stdlib.Cli.Daemon.start daemonName expr with + | Ok pid -> + Stdlib.Result.Result.Ok + $"sync-daemon started for {peer} (pid {Stdlib.Int64.toString pid}, interval {Stdlib.Int64.toString intervalMs}ms)" + | Error e -> Stdlib.Result.Result.Error e + +let startAll (intervalMs: Int64) : Stdlib.Result.Result = + let expr = "Darklang.Sync.Daemon.runDaemonAll " ++ (Stdlib.Int64.toString intervalMs) ++ "L" + + match Stdlib.Cli.Daemon.start daemonName expr with + | Ok pid -> + Stdlib.Result.Result.Ok + $"sync-daemon started for all peers (pid {Stdlib.Int64.toString pid}, interval {Stdlib.Int64.toString intervalMs}ms)" + | Error e -> Stdlib.Result.Result.Error e diff --git a/packages/darklang/sync/display.dark b/packages/darklang/sync/display.dark new file mode 100644 index 0000000000..ec0023c1d8 --- /dev/null +++ b/packages/darklang/sync/display.dark @@ -0,0 +1,139 @@ +module Darklang.Sync.Display + +// The human-facing summary over a sync result — the UX surface of `dark sync`. Pure: the counts +// come from the pull (cursor delta = ops pulled, since the op log is append-only and contiguous); +// this just formats them. + +let opWord (n: Int64) : String = + if n == 1L then "op" else "ops" + +let pullLine (remote: String) (pulled: Int64) : String = + "↓ pulled " ++ (Stdlib.Int64.toString pulled) ++ " " ++ (opWord pulled) ++ " from " ++ remote + +let syncSummary (remote: String) (pulled: Int64) : String = + if pulled == 0L then "✓ already in sync with " ++ remote else pullLine remote pulled + +// Render a poll interval for humans: whole seconds when it divides evenly (the adaptive intervals +// always do — floor, ceiling, and doublings are all whole seconds), else milliseconds. +let intervalLabel (ms: Int64) : String = + if ms >= 1000L && ms % 1000L == 0L then + (Stdlib.Int64.toString (ms / 1000L)) ++ "s" + else + (Stdlib.Int64.toString ms) ++ "ms" + + +// the richer op-kind breakdown: "(2 fns, 1 type, 1 rename)" over the pulled op kinds. (The kinds +// themselves aren't threaded through the pull path yet — these formatters are ready for when they are.) +let countOf (kinds: List) (k: String) : Int64 = + Stdlib.List.length (Stdlib.List.filter kinds (fun x -> x == k)) + +let pluralKind (k: String) (n: Int64) : String = + if n == 1L then k else k ++ "s" + +let kindPart (kinds: List) (k: String) : List = + let n = countOf kinds k + if n == 0L then [] else [ (Stdlib.Int64.toString n) ++ " " ++ (pluralKind k n) ] + +let opKindBreakdown (kinds: List) : String = + let parts = + Stdlib.List.append + (Stdlib.List.append (kindPart kinds "fn") (kindPart kinds "type")) + (Stdlib.List.append (kindPart kinds "value") (kindPart kinds "rename")) + + Stdlib.String.join parts ", " + + +// ── conflict display: surfacing last-write-wins auto-resolutions for eventual ack ── +// Sync NEVER blocks — a `name → two hashes` divergence is auto-resolved by timestamp-LWW (newest +// authoring time wins) and RECORDED so it's never silently lost. This is the review surface +// (`dark conflicts`): it shows what was auto-kept, which side won, and nudges the user to `ack` +// (agree) or `resolve … mine|theirs` (override). Pure over the structured tuples from +// `pmConflictsList` — testable in testfiles/execution/pre-s-and-s/conflicts-list.dark. + +let shortHash (h: String) : String = + if Stdlib.String.length h > 8L then Stdlib.String.slice h 0L 8L else h + +// Which side timestamp-LWW kept, read from the recorded resolution tag: "you" | "them" | "" (unknown). +let conflictWinner (resolution: String) : String = + if Stdlib.String.contains resolution "incoming won" then + "them" + else if Stdlib.String.contains resolution "kept local" then + "you" + else + "" + +// The human verdict phrase — always names last-write-wins so the auto-resolution is never opaque. +let conflictVerdict (resolution: String) : String = + match conflictWinner resolution with + | "them" -> "last-write-wins → kept theirs" + | "you" -> "last-write-wins → kept yours" + | _ -> "auto-resolved (last-write-wins)" + +// you/them hashes with a ✓ on the side that won (and the arrow pointing to it). +let conflictHashes (resolution: String) (localHash: String) (incomingHash: String) : String = + let you = shortHash localHash + let them = shortHash incomingHash + + match conflictWinner resolution with + | "them" -> "you " ++ you ++ " → them " ++ them ++ " ✓" + | "you" -> "you " ++ you ++ " ✓ ← them " ++ them + | _ -> "you " ++ you ++ " · them " ++ them + +// status glyph: pending (needs ack) vs already-dismissed history. +let conflictGlyph (status: String) : String = + if status == "acked" then "✓" + else if status == "overridden" then "↩" + else "⚠" + +// One recorded conflict as a 2-line block: the location headline, then the indented verdict + +// who-won + peer + (for pending) the exact `ack ` to dismiss it. +let conflictBlock + (id: String) + (location: String) + (status: String) + (resolution: String) + (localHash: String) + (incomingHash: String) + (remote: String) + : List = + let ackHint = + if status == "NEW" then " · ack " ++ (shortHash id) else "" + + [ (conflictGlyph status) ++ " " ++ location + " " + ++ (conflictVerdict resolution) + ++ " " + ++ (conflictHashes resolution localHash incomingHash) + ++ " · from " + ++ remote + ++ ackHint ] + +// The full `dark conflicts` report over the structured rows from `pmConflictsList`. +// Header frames the ack model; the footer lists the actions. Empty → a clean "nothing pending". +let conflictReport + (conflicts: List<(String * String * String * String * String * String * String)>) + (includeResolved: Bool) + : List = + if Stdlib.List.isEmpty conflicts then + if includeResolved then + [ "✓ no sync conflicts on record" ] + else + [ "✓ no pending sync conflicts — every divergence is acknowledged" ] + else + let n = Stdlib.List.length conflicts + + let header = + (Stdlib.Int64.toString n) + ++ " auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:" + + let blocks = + conflicts + |> Stdlib.List.map (fun (id, location, status, resolution, localHash, incomingHash, remote) -> + conflictBlock id location status resolution localHash incomingHash remote) + |> Stdlib.List.flatten + + let footer = " ack (agree) · ack all · resolve mine|theirs (override)" + + Stdlib.List.append + (Stdlib.List.append [ header; "" ] blocks) + [ ""; footer ] diff --git a/packages/darklang/sync/server.dark b/packages/darklang/sync/server.dark new file mode 100644 index 0000000000..27dd9535f5 --- /dev/null +++ b/packages/darklang/sync/server.dark @@ -0,0 +1,108 @@ +module Darklang.Sync.Server + +// The sync SERVER side (cross-machine sync over HTTP/Tailscale). Serve this with +// `dark serve Darklang.Sync.Server.router --port `; a peer pulls from it with +// `dark sync pull http://:`. The body is a base64 wire batch (from `pmSyncOpsSince`) +// the puller decodes + applies via `pmSyncApplyWire` — the same op-playback fold the file +// transport uses, just carried over HTTP. + +// Parse the `?since=` resume point from a request URL. Defaults to 0 (the whole log) when +// the param is ABSENT or UNPARSEABLE — so a bare or malformed GET from a confused client still +// bootstraps the peer instead of erroring. Pure, so it's testable without a live server. +let parseSince (url: String) : Int64 = + match Stdlib.Dict.get (Stdlib.Http.parseQueryString url) "since" with + | Some s -> + match Stdlib.Int64.parse s with + | Ok n -> n + | Error _ -> 0L + | None -> 0L + + +// Parse `?committed=1` (or `committed=true`). RESERVED: sync ships committed ops only for now (the +// unit of sync is the commit; WIP stays local — see `Inserts.opsSinceCommitted`). Both builtins are +// committed-only today, so this toggle is a no-op kept for when WIP/own-device sync is added back. +let parseCommitted (url: String) : Bool = + match Stdlib.Dict.get (Stdlib.Http.parseQueryString url) "committed" with + | Some s -> s == "1" || s == "true" + | None -> false + + +// GET /sync/events?since= — the COMMITTED ops after `since`, as a base64 wire batch. +// `since` is a commit-rowid cursor (commit-granular). WIP is never shipped (deferred). +let eventsHandlerFn (req: Stdlib.Http.Request) : Stdlib.Http.Response = + let since = parseSince req.url + + // Both arms are committed-only today (pmSyncOpsSince now reads opsSinceCommitted). Kept structurally + // so re-enabling WIP sync later is a one-line change, not a re-plumb. + let body = + if parseCommitted req.url then + Builtin.pmSyncOpsSinceCommitted since + else + Builtin.pmSyncOpsSince since + + Stdlib.Http.responseWithText body 200L + + +let eventsHandler: Stdlib.HttpServer.Handler = + Stdlib.HttpServer.Handler + { route = "/sync/events" + method = "GET" + handler = eventsHandlerFn } + + +// GET /sync/health — liveness + op count. A puller (or a person) hits this to confirm the server +// is up and reachable BEFORE pulling (the readiness probe the live cross-machine setup needs), and +// to see convergence: compare the server's op count to the local one. +let healthHandlerFn (req: Stdlib.Http.Request) : Stdlib.Http.Response = + Stdlib.Http.responseWithText (Builtin.pmSyncHealth ()) 200L + + +let healthHandler: Stdlib.HttpServer.Handler = + Stdlib.HttpServer.Handler + { route = "/sync/health" + method = "GET" + handler = healthHandlerFn } + + +// ── HTTP blob channel (sender side) — closes the file/HTTP asymmetry: code rides in ops, but a +// value's large `package_blobs` content needs its own transfer. Mirrors the file pull's blob fetch. + +// GET /sync/blobs — the blob manifest (every content hash this server holds, newline-joined). +let blobsHandlerFn (req: Stdlib.Http.Request) : Stdlib.Http.Response = + Stdlib.Http.responseWithText (Builtin.pmSyncBlobHashes ()) 200L + + +let blobsHandler: Stdlib.HttpServer.Handler = + Stdlib.HttpServer.Handler + { route = "/sync/blobs" + method = "GET" + handler = blobsHandlerFn } + + +// Parse `?hash=` for the single-blob fetch. Empty if absent. Pure, so it's testable. +let parseHash (url: String) : String = + match Stdlib.Dict.get (Stdlib.Http.parseQueryString url) "hash" with + | Some h -> h + | None -> "" + + +// GET /sync/blob?hash= — the base64 bytes for one content hash (empty body if this server +// lacks it). The receiver base64-decodes + inserts (content-addressed, so the insert dedups). +let blobHandlerFn (req: Stdlib.Http.Request) : Stdlib.Http.Response = + Stdlib.Http.responseWithText (Builtin.pmSyncBlobBytes (parseHash req.url)) 200L + + +let blobHandler: Stdlib.HttpServer.Handler = + Stdlib.HttpServer.Handler + { route = "/sync/blob" + method = "GET" + handler = blobHandlerFn } + + +let handlers: List = + [ eventsHandler; healthHandler; blobsHandler; blobHandler ] + + +/// Router for the sync server — pass to `dark serve` (a named fn so the builtin can call it). +let router (req: Stdlib.Http.Request) : Stdlib.Http.Response = + Stdlib.HttpServer.routeRequest handlers req diff --git a/packages/darklang/sync/tailscale.dark b/packages/darklang/sync/tailscale.dark new file mode 100644 index 0000000000..4340bf371b --- /dev/null +++ b/packages/darklang/sync/tailscale.dark @@ -0,0 +1,11 @@ +module Darklang.Sync.Tailscale + +// Sync's USE of the base `Darklang.Tailscale` surface — the seam between the generic tailnet +// helpers and how sync addresses a peer. The CLI (`dark remote add-ts`) goes through here rather +// than reaching into the base lib directly, so the sync layer owns "what a tailnet peer means to +// sync" (today: its sync server is reached at the peer's MagicDNS https URL). + +// The sync-peer URL for a tailnet peer: where this peer's sync server is served (via +// `tailscale serve`). Delegates to the base lib's URL builder. +let peerUrl (name: String) (tailnet: String) : String = + Darklang.Tailscale.peerUrl name tailnet From 7d77fc4f06c935fca654db9b88be160647a3ec0b Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:35 -0400 Subject: [PATCH 04/25] Keep the op log across a schema change When schema.sql changes, drop only the regenerable projection tables and re-fold the surviving op log, instead of dropping every table. The canonical log, blobs, and branch/commit state come through identical, so a schema change can't lose authored work. --- backend/src/LocalExec/Migrations.fs | 86 ++++++++++++++++++----------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/backend/src/LocalExec/Migrations.fs b/backend/src/LocalExec/Migrations.fs index e5010d6fcb..d471ca2b4b 100644 --- a/backend/src/LocalExec/Migrations.fs +++ b/backend/src/LocalExec/Migrations.fs @@ -27,10 +27,9 @@ /// /// CLEANUP maybe move this to LibDB? /// -/// Pre-cutover DBs (those whose `system_migrations_v0` already lists -/// 13 historical names) get adopted: stamp the current schema hash -/// without dropping data. Drop this adapter once nobody's pulling -/// pre-2026-05-08 main. +/// A legacy DB (one whose `system_migrations_v0` already lists the full +/// set of old migration names) is adopted: stamp the current schema hash +/// without dropping data. module LocalExec.Migrations open System.IO @@ -82,24 +81,35 @@ let private storedHash () : Option = | Error err -> Exception.raiseInternal $"storedHash: {err}" [ "err", err ] -let private dropAllUserTables () : unit = - // Disable FK enforcement for the bulk drop. Without this, SQLite refuses - // to drop parent tables before children when the child table's FK - // column is non-nullable; drop order is sqlite_master row order, - // not topological. PRAGMA foreign_keys is connection-scoped, so the - // next connection (which runs schema.sql) gets the default back. +/// Drop ONLY the regenerable projection tables — never the canonical op log, blobs, branches, +/// commits, or account/user state. This is what lets a schema change keep your work: your authored +/// ops survive; only the cache is rebuilt. The list is `Seed.projectionTables` (single source +/// of truth — the same set the runtime's `rebuildProjections` clears), so it can't drift. +let private dropProjectionTables () : unit = + // FK off for the drop (a child projection may FK a parent we're keeping); connection-scoped, so + // the next connection (which replays schema.sql) gets the default back. Sql.query "PRAGMA foreign_keys = OFF" |> Sql.executeStatementSync - let userTables = - Sql.query - "SELECT name FROM sqlite_master - WHERE type = 'table' - AND name NOT LIKE 'sqlite_%' - AND name <> 'schema_state_v0'" - |> Sql.execute (fun read -> read.string "name") - |> Result.unwrap - for t in userTables do + for t in LibDB.Seed.projectionTables do Sql.query (sprintf "DROP TABLE IF EXISTS \"%s\"" t) |> Sql.executeStatementSync +/// Mark every op unapplied so the next `Seed.growIfNeeded` re-folds the whole log into the freshly +/// recreated projections. Re-folding (with value evaluation) needs the runtime, which the migration +/// phase doesn't have — so we defer the fold to startup, exactly like a fresh seed does. +let private markOpsUnapplied () : unit = + if tableExists "package_ops" then + Sql.query "UPDATE package_ops SET applied = 0" |> Sql.executeStatementSync + +let private opCount () : int = + if tableExists "package_ops" then + match + Sql.query "SELECT COUNT(*) AS c FROM package_ops" + |> Sql.execute (fun read -> read.int "c") + with + | Ok(c :: _) -> c + | _ -> 0 + else + 0 + let private writeHash (hash : string) : unit = Sql.query @@ -111,11 +121,10 @@ let private writeHash (hash : string) : unit = |> Sql.executeStatementSync -/// Pre-cutover DBs were migrated by name through `system_migrations_v0`. -/// If we see one with the full set of old migration names already -/// applied, treat it as fully-migrated under the new flow — write the -/// current schema hash so subsequent runs see "up to date" and don't -/// kill-and-fill. Drop this once nobody's pulling pre-2026-05-08 main. +/// A legacy DB was migrated by name through `system_migrations_v0`. If one +/// already has the full set of old migration names applied, treat it as +/// fully migrated under the schema-hash flow — write the current schema +/// hash so subsequent runs see "up to date" and don't kill-and-fill. let private adoptLegacyDB (currentHash : string) : bool = if not (tableExists "system_migrations_v0") then false @@ -129,7 +138,7 @@ let private adoptLegacyDB (currentHash : string) : bool = | _ -> 0 if count >= 13 then print - $"Adopting pre-cutover DB ({count} migrations on record). \ + $"Adopting legacy DB ({count} migrations on record). \ Stamping schema hash; no data dropped." writeHash currentHash true @@ -144,11 +153,19 @@ let private runSchemaBootstrap () : unit = match storedHash () with | Some have when have = want -> () | Some have -> + // Preserve-and-refold (not kill-and-fill): drop only the regenerable projections; the canonical op + // log + blobs + branch/commit/account state survive. Replaying schema.sql recreates the dropped + // projections in their new shape and is a no-op for the surviving canonical tables + // (CREATE TABLE IF NOT EXISTS). Marking ops unapplied makes the next `growIfNeeded` re-fold them. + // NOTE: a canonical-table SHAPE change can't go through this path (CREATE IF NOT EXISTS won't + // alter an existing table) — it needs a data-preserving incremental (the Release migrator). + let ops = opCount () print - $"schema.sql changed (hash {have[0..7]} → {want[0..7]}); \ - kill-and-fill." - dropAllUserTables () + $"schema.sql changed (hash {have[0..7]} → {want[0..7]}); preserving {ops} op(s), \ + rebuilding projections." + dropProjectionTables () Sql.query sql |> Sql.executeStatementSync + markOpsUnapplied () writeHash want | None -> if adoptLegacyDB want then @@ -162,10 +179,9 @@ let private runSchemaBootstrap () : unit = // Per-file incremental migrations (atop the schema.sql base) // --------------------- // -// Lifted from the pre-cutover shape — name-dedup'd via -// `system_migrations_v0`. schema.sql guarantees the table exists, so -// no separate init step. File naming convention: -// `YYYYMMDD_HHMMSS_.sql`. +// Each file runs once, name-dedup'd via `system_migrations_v0`. +// schema.sql guarantees the table exists, so no separate init step. +// File naming convention: `YYYYMMDD_HHMMSS_.sql`. let private incrementalDir = "incremental" @@ -240,4 +256,10 @@ let private runIncrementalMigrations () : unit = let run () : unit = runSchemaBootstrap () + // The Release migrator: after the schema-hash bootstrap, reconcile the store's Release coordinate + // (the op-format/language/hash version) with this binary's. A fresh store is stamped at the current + // Release; an older store runs the pending migration steps; a NEWER store is refused (older code + // must not open it). The single coordinate is `Sync.wireFormatVersion` — the same one that gates + // cross-instance sync. The step registry lives in `LibDB.Releases`. + LibDB.Releases.applyPending LibDB.Sync.wireFormatVersion runIncrementalMigrations () From ee73d9d3c1d5adb7316dd8035320fa463c76cc89 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:36 -0400 Subject: [PATCH 05/25] Record branch-merge collisions in the conflict store A merge silently lets the merging branch win when both branches bound the same name to different content. Detect those collisions and record each in the same reviewable conflict store sync uses, so concurrent edits across branches converge visibly instead of overwriting without a trace. --- backend/src/LibDB/Merge.fs | 51 ++++++++ backend/tests/Tests/BranchOps.Tests.fs | 166 ++++++++++++++++++++++++- 2 files changed, 215 insertions(+), 2 deletions(-) diff --git a/backend/src/LibDB/Merge.fs b/backend/src/LibDB/Merge.fs index 5d2af47766..85bd260e50 100644 --- a/backend/src/LibDB/Merge.fs +++ b/backend/src/LibDB/Merge.fs @@ -82,6 +82,42 @@ let canMerge (branchId : PT.BranchId) : Task> = } +/// A merge collision: the merging branch and its parent BOTH have a listed binding for the same FQN, +/// to DIFFERENT content. A `MergeBranch` silently lets the child win (it unlists the parent's +/// binding) — so, exactly like a sync divergence, we detect these and RECORD them in the same +/// reviewable conflict store, rather than dropping the parent's binding without a trace. Two AI +/// agents editing the same item on two branches across two instances converge here: the merge still +/// completes (child-wins stands), but the overwrite is visible via `dark conflicts`, not silent. +/// +/// Returns `(location, parentHash, childHash)` per collision, with `location` formatted exactly as +/// `Sync.detectDivergences` formats it, so merge collisions and sync divergences read identically. +let detectMergeCollisions + (branchId : PT.BranchId) + (parentId : PT.BranchId) + : Task> = + Sql.query + """ + SELECT ch.owner AS owner, ch.modules AS modules, ch.name AS name, + p.item_hash AS parent_hash, ch.item_hash AS child_hash + FROM locations ch + JOIN locations p + ON p.owner = ch.owner AND p.modules = ch.modules + AND p.name = ch.name AND p.item_type = ch.item_type + WHERE ch.branch_id = @branch_id AND ch.unlisted_at IS NULL + AND p.branch_id = @parent_id AND p.unlisted_at IS NULL + AND p.item_hash <> ch.item_hash + """ + |> Sql.parameters + [ "branch_id", Sql.uuid branchId; "parent_id", Sql.uuid parentId ] + |> Sql.executeAsync (fun read -> + let owner = read.string "owner" + let modules = read.string "modules" + let name = read.string "name" + let locStr = + if modules = "" then $"{owner}.{name}" else $"{owner}.{modules}.{name}" + (locStr, read.string "parent_hash", read.string "child_hash")) + + /// Merge a branch into its parent. /// /// TODO (multi-tenant): reads parent's state (`canMerge` queries @@ -101,6 +137,21 @@ let merge (branchId : PT.BranchId) : Task> = | Some branch -> let parentId = branch.parentBranchId |> Option.defaultValue PT.mainBranchId + // Detect + record merge collisions BEFORE MergeBranch unlists the parent's binding. The + // child still wins (the merge semantics are unchanged), but each silently-overwritten parent + // binding is recorded in the same store sync uses — reviewable via `dark conflicts`, keyed + // by `merge:` so its origin is clear. localHash = the parent binding we replaced, + // incomingHash = the child binding that won (mirrors sync's local-vs-incoming). + let! collisions = detectMergeCollisions branchId parentId + for (locStr, parentHash, childHash) in collisions do + do! + Conflicts.record + $"merge:{branch.name}" + locStr + parentHash + childHash + "MergeChildWins" + do! BranchOpPlayback.insertAndApply ( PT.BranchOp.MergeBranch(branchId, parentId) diff --git a/backend/tests/Tests/BranchOps.Tests.fs b/backend/tests/Tests/BranchOps.Tests.fs index 801ece198b..ee90a50e0d 100644 --- a/backend/tests/Tests/BranchOps.Tests.fs +++ b/backend/tests/Tests/BranchOps.Tests.fs @@ -12,7 +12,6 @@ open TestUtils.PTShortcuts module PT = LibExecution.ProgramTypes module Branches = LibDB.Branches module Inserts = LibDB.Inserts -module BranchOpPlayback = LibDB.BranchOpPlayback open Fumble open LibDB.Sqlite @@ -387,6 +386,166 @@ let testPartialCommitDeprecationState = } +// MERGE COLLISIONS through the conflict mechanism. Two branches (think: two AI agents on two +// instances) both bind the SAME name to DIFFERENT content. A merge silently lets the child win +// (the parent binding is unlisted) — so, exactly like a sync divergence, the collision is DETECTED +// and RECORDED in the same reviewable store, instead of vanishing. This pins the detection. +let testMergeCollisionDetection = + testTask + "merge collisions: same FQN bound to different content on two branches is detected" { + let! (parent : PT.Branch) = Branches.create "mc-parent" PT.mainBranchId + let! (child : PT.Branch) = Branches.create "mc-child" PT.mainBranchId + + let fnP = makeFn (eVar "x") + let fnC = makeFn (eVar "y") // different body → different content hash + Expect.notEqual + fnP.hash + fnC.hash + "the two fns differ in content (different hashes)" + + // same location, different hash, on each branch (WIP is enough — detection reads `locations`) + let! (_ : int64) = + Inserts.insertAndApplyOpsAsWip + parent.id + [ PT.PackageOp.AddFn fnP + PT.PackageOp.SetName(loc "collide", PT.PackageFn fnP.hash) ] + let! (_ : int64) = + Inserts.insertAndApplyOpsAsWip + child.id + [ PT.PackageOp.AddFn fnC + PT.PackageOp.SetName(loc "collide", PT.PackageFn fnC.hash) ] + // a child-only name must NOT count as a collision + let fnSolo = makeFn (eInt64 7L) + let! (_ : int64) = + Inserts.insertAndApplyOpsAsWip + child.id + [ PT.PackageOp.AddFn fnSolo + PT.PackageOp.SetName(loc "childonly", PT.PackageFn fnSolo.hash) ] + + let! collisions = LibDB.Merge.detectMergeCollisions child.id parent.id + let (PT.Hash pHash) = fnP.hash + let (PT.Hash cHash) = fnC.hash + match + collisions |> List.filter (fun (l, _, _) -> l = "Test.BranchOps.collide") + with + | [ (_, parentHash, childHash) ] -> + Expect.equal parentHash pHash "parent's (overwritten/local) hash is recorded" + Expect.equal childHash cHash "child's (incoming/winning) hash is recorded" + | other -> + failtest + $"expected exactly one collision at Test.BranchOps.collide, got {List.length other}" + Expect.isFalse + (collisions |> List.exists (fun (l, _, _) -> l = "Test.BranchOps.childonly")) + "a name only the child branch has is not a collision" + } + + +// MERGE COLLISIONS, end to end: a real `merge` records each collision in the conflict store (the +// SAME `dark conflicts` surface sync uses). Everything is kept off `main` (P off main, C off P) so +// the shared main bindings aren't disturbed by the parallel suite. +let testMergeRecordsCollisions = + testTask + "merge records each collision in the conflict store (the surface sync uses)" { + let! (p : PT.Branch) = Branches.create "mc-rec-parent" PT.mainBranchId + let fnP = makeFn (eVar "x") + let! (_ : int64) = + Inserts.insertAndApplyOpsAsWip + p.id + [ PT.PackageOp.AddFn fnP + PT.PackageOp.SetName(loc "reccollide", PT.PackageFn fnP.hash) ] + let! (commitP : Result) = + Inserts.commitWipOps LibCloud.Account.IDs.darklang p.id "p binds reccollide" + Expect.isOk commitP "P's commit should succeed" + + // C branches off P's latest commit (so it's rebased), then rebinds the name to different content + let! (c : PT.Branch) = Branches.create "mc-rec-child" p.id + let fnC = makeFn (eVar "y") + let! (_ : int64) = + Inserts.insertAndApplyOpsAsWip + c.id + [ PT.PackageOp.AddFn fnC + PT.PackageOp.SetName(loc "reccollide", PT.PackageFn fnC.hash) ] + let! (commitC : Result) = + Inserts.commitWipOps LibCloud.Account.IDs.darklang c.id "c rebinds reccollide" + Expect.isOk commitC "C's commit should succeed" + + let! (mergeResult : Result) = LibDB.Merge.merge c.id + Expect.isOk mergeResult "merge C into P should succeed" + + let! all = LibDB.Conflicts.list () + let (PT.Hash pHash) = fnP.hash + let (PT.Hash cHash) = fnC.hash + let recorded = + all + |> List.filter (fun (x : LibDB.Conflicts.Conflict) -> + x.location = "Test.BranchOps.reccollide" && x.remote = "merge:mc-rec-child") + match recorded with + | [ conflict ] -> + Expect.equal + conflict.localHash + pHash + "parent (overwritten) hash is the local side" + Expect.equal + conflict.incomingHash + cHash + "child (winning) hash is the incoming side" + Expect.equal + conflict.resolution + "MergeChildWins" + "the merge auto-resolution is recorded for review" + | other -> + failtest $"expected one recorded merge collision, got {List.length other}" + } + + +// The real edit → commit flow on a branch — the op path `dark --branch feat fn …` and +// `dark --branch feat commit …` drive (insertAndApplyOpsAsWip → commitWipOps) — wired to the +// committed-only sync read. A WIP edit is NOT shippable to a peer; committing it is what makes the +// op appear in the sync stream. (The commit is the unit of sync.) +let testEditCommitSyncFlow = + testTask + "edit → commit on a branch: WIP isn't synced, the commit makes it shippable" { + let! (branch : PT.Branch) = Branches.create "sync-flow" PT.mainBranchId + + // `dark --branch sync-flow fn Test.BranchOps.flow …` → a WIP AddFn + SetName on the branch + let fn = makeFn (eVar "x") + let ops = + [ PT.PackageOp.AddFn fn + PT.PackageOp.SetName(loc "flow", PT.PackageFn fn.hash) ] + let! (_ : int64) = Inserts.insertAndApplyOpsAsWip branch.id ops + + let committedOnBranch () : Task = + Sql.query + "SELECT COUNT(*) as cnt FROM package_ops WHERE branch_id = @b AND commit_hash IS NOT NULL" + |> Sql.parameters [ "b", Sql.uuid branch.id ] + |> Sql.executeRowAsync (fun read -> read.int64 "cnt") + + // WIP: nothing committed on the branch yet → the committed-only sync read won't ship it + let! committedBefore = committedOnBranch () + Expect.equal + committedBefore + 0L + "a WIP edit is uncommitted, so sync won't ship it" + let! syncBefore = Inserts.opsSinceCommitted 0L + + // `dark --branch sync-flow commit "add flow"` + let! (commitResult : Result) = + Inserts.commitWipOps LibCloud.Account.IDs.darklang branch.id "add flow" + Expect.isOk commitResult "commit should succeed" + + // now the branch's two ops are committed AND appear in the committed-only sync stream + let! committedAfter = committedOnBranch () + Expect.equal + committedAfter + 2L + "after commit, the branch's AddFn + SetName are committed" + let! syncAfter = Inserts.opsSinceCommitted 0L + Expect.isTrue + (List.length syncAfter >= List.length syncBefore + 2) + "the committed ops now appear in the committed-only sync read (committing is publishing)" + } + + let tests = testList "BranchOps" @@ -396,4 +555,7 @@ let tests = testGhostFunctionCrossBranch testPartialCommit testPartialCommitSameFqn - testPartialCommitDeprecationState ] + testPartialCommitDeprecationState + testMergeCollisionDetection + testMergeRecordsCollisions + testEditCommitSyncFlow ] From 7360b3f1347187fa774032a5028cf534e01e38d7 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:36 -0400 Subject: [PATCH 06/25] Add the Release migrator and version coordinate A single Release version (the sync wire version) gates cross-instance compatibility and local store upgrades. The migrator is a forward-only registry of steps with a boot guard: a store from a newer Release is refused, an older store is migrated forward, and a fresh store is stamped at the current Release. A step is either a durable migration (copy-and-swap SQL + an optional one-shot op re-serialize + a projection re-fold) or a clean-break boundary that clears the package dataset so it rebuilds from source or re-pulls from a same-Release peer. --- backend/src/LibDB/LibDB.fsproj | 11 ++ backend/src/LibDB/Releases.fs | 193 ++++++++++++++++++++++++++ backend/tests/Tests/Releases.Tests.fs | 115 +++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 backend/src/LibDB/Releases.fs create mode 100644 backend/tests/Tests/Releases.Tests.fs diff --git a/backend/src/LibDB/LibDB.fsproj b/backend/src/LibDB/LibDB.fsproj index 5b55851ff4..7f594828d0 100644 --- a/backend/src/LibDB/LibDB.fsproj +++ b/backend/src/LibDB/LibDB.fsproj @@ -31,6 +31,9 @@ + + @@ -45,6 +48,14 @@ + + + + + + + + diff --git a/backend/src/LibDB/Releases.fs b/backend/src/LibDB/Releases.fs new file mode 100644 index 0000000000..373318792d --- /dev/null +++ b/backend/src/LibDB/Releases.fs @@ -0,0 +1,193 @@ +/// The Release migrator — moves a store forward one Release at a time, and (the part that matters +/// today) REFUSES to open a store from a NEWER Release with older code. +/// +/// A **Release** is the single version coordinate spanning {language/`ProgramTypes`, op-serialization +/// format, SQL schema, content-hashing} — the same integer (`Sync.wireFormatVersion`) that gates +/// cross-instance sync also gates whether this binary may open this store. One coordinate, one upgrade. +/// +/// A `Release N` step bundles: +/// 1. a forward, **copy-and-swap** canonical `.sql` — never `DROP`; the op log is preserved, +/// 2. an optional **op-format remap** — re-serialize the whole log once, in one transaction, +/// 3. a **projection refold** — mark ops unapplied; startup regenerates the projections. +/// Projections are dropped+refolded, never migrated. Forward-only; the undo is "restore from a peer" +/// (every peer holds the whole log, so the tailnet is the backup). +/// +/// The current baseline is **Release 3** — a fresh store is born here. The lone registry entry records +/// why 3 is a clean break (meaning-stable hashing redefined every content hash), but because no older +/// store exists, no store ever replays it; the machinery is in place for the first real format change, +/// which appends the next entry. See `releases`. +module LibDB.Releases + +open Prelude + +open Fumble +open LibDB.Sqlite + +/// One forward step that ARRIVES at Release `n` (apply it to move a store from `n-1` to `n`). +type Release = + { + /// the Release this step lands on + n : int + /// canonical-table forward migration — copy-and-swap, NEVER drop. "" = no canonical-shape change. + sql : string + /// op-format remap (old `op_blob` bytes → new), only when the serialization format changed. + /// `None` = the op format is unchanged at this step (the common case). + reserialize : (byte[] -> byte[]) option + /// CLEAN-BREAK boundary: when `true`, pre-this-Release data is treated as disposable and the package + /// dataset is CLEARED so the store rebuilds from source (dev) or re-pulls from a same-Release peer + /// (tailnet). Use this only when the change can't be cheaply migrated AND the old data is disposable + /// — e.g. a content-hash redefinition. The default (`false`) is the durable path (sql/reserialize + + /// re-fold). The PRINCIPLE: PT (the op log) is the source of truth and migrates forward; RT-derived + /// data (rt_dval, traces) is never migrated — it's dropped and recomputed from PT. + clearForRebuild : bool + } + + +/// The ordered registry of forward steps. Add an entry when you bump `Sync.wireFormatVersion`; the +/// migrator does the rest. Each `n` must be exactly one greater than the previous (see +/// `registryIsWellFormed`). +/// +/// **Release 3 — meaning-stable hashing.** Content hashes are now over the alpha-normalized canonical +/// form, so older `op_blob`s would embed stale hashes and can't be cheaply migrated — hence the +/// CLEAN-BREAK marker (`clearForRebuild`). In practice every store is BORN at Release 3 and never runs +/// this step; it stands as the worked example of the clean-break path and the registry's upper bound. +let releases : Release list = + [ { n = 3; sql = ""; reserialize = None; clearForRebuild = true } ] + + +// ── The pure planning half (unit-tested; takes the registry explicitly so tests inject their own) ── + +/// The steps to move a store from `storeN` up to `codeN`: the entries with `storeN < n <= codeN`, in +/// ascending order. Pure. +let pendingReleases + (registry : Release list) + (storeN : int) + (codeN : int) + : Release list = + registry + |> List.filter (fun r -> r.n > storeN && r.n <= codeN) + |> List.sortBy (fun r -> r.n) + +/// Is the registry well-formed: strictly ascending, **contiguous** (no gaps), no duplicates, and none +/// above `codeRelease`? A gap would silently skip a migration; a dup would double-apply; an entry above +/// the code's Release is unreachable. Pure guard — unit-tested and asserted at boot. +let registryIsWellFormed (registry : Release list) (codeRelease : int) : bool = + let ns = registry |> List.map (fun r -> r.n) + let contiguous = ns |> List.pairwise |> List.forall (fun (a, b) -> b = a + 1) + let distinct = (List.distinct ns) = ns + let noneAboveCode = ns |> List.forall (fun n -> n <= codeRelease) + contiguous && distinct && noneAboveCode + + +// ── The store's Release stamp (a tiny local table, separate from the schema-hash stamp) ── + +let private releaseTable = "release_state_v0" + +/// The Release this store was last stamped at, or `None` if it predates Release tracking (or is fresh). +let storedRelease () : int option = + let exists = + Sql.query "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = @n" + |> Sql.parameters [ "n", Sql.string releaseTable ] + |> Sql.executeExistsSync + if not exists then + None + else + match + Sql.query $"SELECT release FROM {releaseTable} WHERE id = 0" + |> Sql.execute (fun read -> read.int64 "release") + with + | Ok [ r ] -> Some(int r) + | _ -> None + +/// Stamp the store at Release `n`. +let writeRelease (n : int) : unit = + Sql.query + $"CREATE TABLE IF NOT EXISTS {releaseTable} (id INTEGER PRIMARY KEY, release INTEGER NOT NULL)" + |> Sql.executeStatementSync + Sql.query $"INSERT OR REPLACE INTO {releaseTable} (id, release) VALUES (0, @r)" + |> Sql.parameters [ "r", Sql.int64 (int64 n) ] + |> Sql.executeStatementSync + + +// ── Applying a step ── + +/// Re-serialize the WHOLE op log once through `remap` (old `op_blob` → new), in a single transaction. +/// The op id is content-addressed over the op's MEANING (a normalized canonical form), not its raw +/// bytes — so a pure *format* change keeps the same id, and we update `op_blob` in place. (A remap that +/// changes an op's meaning/hash is a different, louder operation — the hash-remap path — not this.) +let reserializeLog (remap : byte[] -> byte[]) : unit = + let rows = + Sql.query "SELECT id, branch_id, op_blob FROM package_ops" + |> Sql.execute (fun read -> + (read.string "id", read.string "branch_id", read.bytes "op_blob")) + |> Result.unwrap + let updates = + rows + |> List.map (fun (id, branchId, blob) -> + ("UPDATE package_ops SET op_blob = @blob WHERE id = @id AND branch_id = @branch_id", + [ [ "blob", Sql.bytes (remap blob) + "id", Sql.string id + "branch_id", Sql.string branchId ] ])) + Sql.executeTransactionSync updates |> ignore> + +/// The package dataset cleared by a `clearForRebuild` boundary: the PT op log + blobs, the branch +/// structure, the regenerable projections, the RT-derived caches (traces), and local sync cursors/ +/// conflicts. Reload-from-source (dev) or a re-pull from a same-Release peer (tailnet) repopulates it. +/// We KEEP accounts, user data, and the peer list (`sync_remotes`) — only the package world is reset. +/// (`rt_dval` lives in `package_values`, so it's cleared with the projections — RT recomputed from PT.) +let private rebuildClearTables : List = + [ "package_ops"; "package_blobs"; "branches"; "commits"; "branch_ops" ] + @ LibDB.Seed.projectionTables + @ [ "traces"; "trace_fn_calls"; "sync_cursors"; "sync_conflicts" ] + +/// Clear the package dataset for a clean-break Release (FK off; the rows go, the tables stay so the +/// next reload/sync refills them). +let clearForRebuildData () : unit = + Sql.query "PRAGMA foreign_keys = OFF" |> Sql.executeStatementSync + for t in rebuildClearTables do + Sql.query (sprintf "DELETE FROM \"%s\"" t) |> Sql.executeStatementSync + +/// Apply one Release step. A CLEAN-BREAK (`clearForRebuild`) clears the package dataset so it rebuilds +/// from source/peer (disposable pre-Release data). Otherwise it's the durable path: the canonical +/// copy-swap `.sql` (if any), then the op-format remap (if any, which marks the log unapplied so startup +/// refolds projections from the new bytes). +let applyRelease (r : Release) : unit = + if r.clearForRebuild then + print + $"Release {r.n}: clean-break boundary — clearing the package dataset; it rebuilds from source / re-pulls from a same-Release peer." + clearForRebuildData () + else + if r.sql <> "" then Sql.query r.sql |> Sql.executeStatementSync + match r.reserialize with + | Some remap -> + reserializeLog remap + Sql.query "UPDATE package_ops SET applied = 0" |> Sql.executeStatementSync + | None -> () + + +// ── The boot guard + forward migrator ── + +/// Reconcile the store's Release with this binary's (`codeRelease = Sync.wireFormatVersion`): +/// - **store == code** → nothing; +/// - **store > code** → REFUSE (a newer store; older code would misread the op format) — raise; +/// - **store < code** → apply each pending step in order, then stamp `code`; +/// - **store absent** → stamp `code` (a fresh store is born at the current Release; a pre-tracking +/// store is, by construction, at the current format — no format bumps happened before tracking). +let applyPending (codeRelease : int) : unit = + if not (registryIsWellFormed releases codeRelease) then + Exception.raiseInternal + "Release registry is not well-formed (a gap, duplicate, or an entry above the code Release)" + [ "codeRelease", codeRelease ] + match storedRelease () with + | None -> writeRelease codeRelease + | Some s when s = codeRelease -> () + | Some s when s > codeRelease -> + Exception.raiseInternal + $"This store is on Release {s}; this Dark speaks Release {codeRelease}. Upgrade Dark to open it — never open a newer store with older code." + [] + | Some s -> + for r in pendingReleases releases s codeRelease do + let extra = if r.reserialize.IsSome then " + op re-serialize" else "" + print $"Applying Release {r.n} (schema{extra})…" + applyRelease r + writeRelease codeRelease diff --git a/backend/tests/Tests/Releases.Tests.fs b/backend/tests/Tests/Releases.Tests.fs new file mode 100644 index 0000000000..6b50eb239f --- /dev/null +++ b/backend/tests/Tests/Releases.Tests.fs @@ -0,0 +1,115 @@ +/// Tests for the Release migrator's planning + guard logic (LibDB.Releases). +/// +/// The DB-mutating half (storedRelease/writeRelease/applyPending) is exercised by every suite startup +/// — the migrator runs in `LocalExec.Migrations.run` before the Tests binary boots, so if it threw the +/// suite wouldn't start. These tests pin the PURE half — `pendingReleases` (which steps to apply) and +/// `registryIsWellFormed` (the gap/dup/over-code guard) — over injected registries, plus a guard that +/// the REAL registry is well-formed against the code's current Release. +module Tests.Releases + +open Expecto +open Prelude + +module Releases = LibDB.Releases +module Sync = LibDB.Sync + +/// a step that only declares its Release number (no migration body) — enough for the planning tests +let private step (n : int) : Releases.Release = + { n = n; sql = ""; reserialize = None; clearForRebuild = false } + +let tests = + testList + "Releases" + [ test "pendingReleases: an empty registry never has anything to apply" { + Expect.isEmpty (Releases.pendingReleases [] 1 5) "empty registry → no steps" + } + + test "pendingReleases: returns exactly the steps in (storeN, codeN], ascending" { + let registry = [ step 4; step 2; step 3; step 5 ] // deliberately unsorted + Expect.equal + (Releases.pendingReleases registry 2 5 |> List.map (fun r -> r.n)) + [ 3; 4; 5 ] + "store 2 → code 5 applies 3,4,5 in order" + Expect.equal + (Releases.pendingReleases registry 3 4 |> List.map (fun r -> r.n)) + [ 4 ] + "store 3 → code 4 applies only 4" + } + + test + "pendingReleases: nothing to do when the store is already at the code Release" { + let registry = [ step 2; step 3 ] + Expect.isEmpty + (Releases.pendingReleases registry 3 3) + "store == code → no steps (the common steady state)" + } + + test + "pendingReleases: a step at or below the store Release is never re-applied" { + let registry = [ step 2; step 3; step 4 ] + Expect.equal + (Releases.pendingReleases registry 3 4 |> List.map (fun r -> r.n)) + [ 4 ] + "already-applied steps (<= storeN) are excluded" + } + + test + "registryIsWellFormed: a contiguous, distinct, in-range registry is well-formed" { + Expect.isTrue + (Releases.registryIsWellFormed [ step 2; step 3; step 4 ] 4) + "2,3,4 with code 4 is well-formed" + Expect.isTrue + (Releases.registryIsWellFormed [] 2) + "an empty registry is well-formed" + } + + test + "registryIsWellFormed: a GAP is rejected (it would silently skip a migration)" { + Expect.isFalse + (Releases.registryIsWellFormed [ step 2; step 4 ] 4) + "2 then 4 (missing 3) is not well-formed" + } + + test "registryIsWellFormed: a DUPLICATE is rejected (it would double-apply)" { + Expect.isFalse + (Releases.registryIsWellFormed [ step 2; step 3; step 3 ] 3) + "a repeated Release number is not well-formed" + } + + test + "registryIsWellFormed: a step ABOVE the code Release is rejected (unreachable)" { + Expect.isFalse + (Releases.registryIsWellFormed [ step 2; step 3 ] 2) + "a step at Release 3 when the code speaks only 2 is not well-formed" + } + + // The guard that protects the shipped registry: whatever steps exist must be well-formed against + // the Release this binary actually speaks. `applyPending` asserts this at boot; pin it here too so + // a malformed registry fails in CI, not on someone's machine at startup. + test + "the shipped Release registry is well-formed against the code's current Release" { + Expect.isTrue + (Releases.registryIsWellFormed Releases.releases Sync.wireFormatVersion) + "LibDB.Releases.releases is contiguous/distinct and none above wireFormatVersion" + } + + // Release 3 (meaning-stable hashing) is the first real step, and it's a clean-break boundary. + test "Release 3 is the shipped meaning-stable-hashing clean-break step" { + match Releases.releases |> List.tryFind (fun r -> r.n = 3) with + | Some r -> + Expect.isTrue + r.clearForRebuild + "Release 3 is a clean-break (clearForRebuild) — pre-v3 data is disposable, rebuilt from source" + | None -> failtest "expected a Release 3 entry in the shipped registry" + } + + // a v2 store upgrades to the current code Release via exactly the shipped steps (here: just 3). + test "a v2 store upgrades to the current Release via the shipped steps" { + let steps = + Releases.pendingReleases Releases.releases 2 Sync.wireFormatVersion + |> List.map (fun r -> r.n) + Expect.equal + steps + [ 3 ] + "store 2 → code 3 applies exactly Release 3 (the clean-break)" + } ] From ba95bf73cb4f371dc4ded6ba6b2ba082f2222cb0 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 15:05:36 -0400 Subject: [PATCH 07/25] Make content hashing meaning-stable Hash the alpha-normalized canonical form of a package item, so bound-variable names (parameters, let/lambda/match binders, and their uses) no longer affect its content hash. Two functions identical up to a parameter rename share one hash, while which argument is used, binder order, and shadowing stay distinct. This keeps an item's identity stable across an op-format or language change, so sync sees no phantom divergences. --- .../Hashing/AlphaNormalize.fs | 238 ++++++++++++++++++ .../src/LibSerialization/Hashing/Canonical.fs | 4 +- .../src/LibSerialization/Hashing/Hashing.fs | 15 +- .../LibSerialization/LibSerialization.fsproj | 1 + backend/tests/Tests/AlphaNormalize.Tests.fs | 128 ++++++++++ backend/tests/Tests/LibParser.Tests.fs | 2 +- backend/tests/Tests/Tests.fs | 9 +- backend/tests/Tests/Tests.fsproj | 7 + 8 files changed, 396 insertions(+), 8 deletions(-) create mode 100644 backend/src/LibSerialization/Hashing/AlphaNormalize.fs create mode 100644 backend/tests/Tests/AlphaNormalize.Tests.fs diff --git a/backend/src/LibSerialization/Hashing/AlphaNormalize.fs b/backend/src/LibSerialization/Hashing/AlphaNormalize.fs new file mode 100644 index 0000000000..dd422e6b4b --- /dev/null +++ b/backend/src/LibSerialization/Hashing/AlphaNormalize.fs @@ -0,0 +1,238 @@ +/// Alpha-normalization for meaning-stable hashing. +/// +/// The content hash of a package item is over its canonical serialized form. That form must not depend +/// on incidental BOUND-VARIABLE NAMES — two items identical up to a `let`/lambda/match binder rename are +/// the same item and must hash the same. (Function parameters are already handled elsewhere: a parameter +/// use is lowered to `EArg index`, and the parameter name isn't hashed — see `Canonical.writeParameter`. +/// This pass is what covers the binders the parser keeps named: `let`, lambda, and `match`.) +/// +/// It renames every such bound variable to a canonical, position-derived name (`$0`, `$1`, … assigned in +/// a fixed structural traversal), consistently at its binder and all its uses (`EVariable`/`EPipeVariable`). +/// Alpha-equivalent expressions normalize to the SAME tree, so they serialize and hash identically. Free +/// variables (not locally bound) are left untouched, as are ids, types, and fully-qualified references. +/// `computeFnHash`/`computeValueHash` run this before serializing. +module LibSerialization.Hashing.AlphaNormalize + +open Prelude +module PT = LibExecution.ProgramTypes + + +// ── the variables a pattern BINDS, left-to-right (deduped within an or-pattern, whose alternatives +// bind the same names) ── + +let rec private letPatternVars (p : PT.LetPattern) : List = + match p with + | PT.LPVariable(_, name) -> [ name ] + | PT.LPUnit _ -> [] + | PT.LPWildcard _ -> [] + | PT.LPTuple(_, first, second, rest) -> + letPatternVars first @ letPatternVars second @ List.collect letPatternVars rest + +let rec private matchPatternVars (p : PT.MatchPattern) : List = + match p with + | PT.MPVariable(_, name) -> [ name ] + | PT.MPEnum(_, _, fields) -> List.collect matchPatternVars fields + | PT.MPTuple(_, first, second, rest) -> + matchPatternVars first + @ matchPatternVars second + @ List.collect matchPatternVars rest + | PT.MPList(_, pats) -> List.collect matchPatternVars pats + | PT.MPListCons(_, head, tail) -> matchPatternVars head @ matchPatternVars tail + // an or-pattern's alternatives bind the SAME variables — collect them once (first occurrence order) + | PT.MPOr(_, pats) -> + pats |> NEList.toList |> List.collect matchPatternVars |> List.distinct + | _ -> [] // literal patterns bind nothing + + +// ── rewrite a pattern's bound-variable names per a name→canonical map ── + +let rec private renameLetPattern + (m : Map) + (p : PT.LetPattern) + : PT.LetPattern = + match p with + | PT.LPVariable(id, name) -> + PT.LPVariable(id, Map.tryFind name m |> Option.defaultValue name) + | PT.LPUnit _ -> p + | PT.LPWildcard _ -> p + | PT.LPTuple(id, first, second, rest) -> + PT.LPTuple( + id, + renameLetPattern m first, + renameLetPattern m second, + List.map (renameLetPattern m) rest + ) + +let rec private renameMatchPattern + (m : Map) + (p : PT.MatchPattern) + : PT.MatchPattern = + match p with + | PT.MPVariable(id, name) -> + PT.MPVariable(id, Map.tryFind name m |> Option.defaultValue name) + | PT.MPEnum(id, caseName, fields) -> + PT.MPEnum(id, caseName, List.map (renameMatchPattern m) fields) + | PT.MPTuple(id, first, second, rest) -> + PT.MPTuple( + id, + renameMatchPattern m first, + renameMatchPattern m second, + List.map (renameMatchPattern m) rest + ) + | PT.MPList(id, pats) -> PT.MPList(id, List.map (renameMatchPattern m) pats) + | PT.MPListCons(id, head, tail) -> + PT.MPListCons(id, renameMatchPattern m head, renameMatchPattern m tail) + | PT.MPOr(id, pats) -> PT.MPOr(id, NEList.map (renameMatchPattern m) pats) + | _ -> p // literal patterns have no names + + +// ── the core: rewrite an expr so every bound variable is a canonical `$n` name ── + +// The counter is threaded as a `ref` mutated in a FIXED structural traversal order, so two +// alpha-equivalent trees (identical structure, different names) get identical `$n` assignments. + +let private mergeEnv + (env : Map) + (m : Map) + : Map = + // inner bindings shadow outer ones + Map.fold (fun acc k v -> Map.add k v acc) env m + +let private bind (counter : int ref) (vars : List) : Map = + vars + |> List.map (fun v -> + let n = counter.Value + counter.Value <- n + 1 + (v, "$" + string n)) + |> Map.ofList + +let private lookup (env : Map) (name : string) : string = + // a bound variable → its canonical name; a free variable (not locally bound) → unchanged + Map.tryFind name env |> Option.defaultValue name + +let rec private norm + (c : int ref) + (env : Map) + (e : PT.Expr) + : PT.Expr = + let r = norm c env // recurse with the same scope (non-binding children) + match e with + // uses of variables — the whole point + | PT.EVariable(id, name) -> PT.EVariable(id, lookup env name) + + // binders + | PT.ELet(id, pat, rhs, body) -> + let rhs = norm c env rhs // `let` is non-recursive: the rhs is in the OUTER scope + let m = bind c (letPatternVars pat) + PT.ELet(id, renameLetPattern m pat, rhs, norm c (mergeEnv env m) body) + | PT.ELambda(id, pats, body) -> + let m = bind c (pats |> NEList.toList |> List.collect letPatternVars) + PT.ELambda( + id, + NEList.map (renameLetPattern m) pats, + norm c (mergeEnv env m) body + ) + | PT.EMatch(id, scrutinee, cases) -> + let scrutinee = norm c env scrutinee + let cases = + cases + |> List.map (fun case -> + let m = bind c (matchPatternVars case.pat) + let env = mergeEnv env m + let normalized : PT.MatchCase = + { pat = renameMatchPattern m case.pat + whenCondition = Option.map (norm c env) case.whenCondition + rhs = norm c env case.rhs } + normalized) + PT.EMatch(id, scrutinee, cases) + + // structural recursion (no new bindings) — every child that is an Expr is normalized + | PT.EString(id, segments) -> + PT.EString(id, List.map (normStringSegment c env) segments) + | PT.EIf(id, cond, thenExpr, elseExpr) -> + PT.EIf(id, r cond, r thenExpr, Option.map r elseExpr) + | PT.ERecordFieldAccess(id, expr, field) -> + PT.ERecordFieldAccess(id, r expr, field) + | PT.EApply(id, fn, typeArgs, args) -> + PT.EApply(id, r fn, typeArgs, NEList.map r args) + | PT.EList(id, exprs) -> PT.EList(id, List.map r exprs) + | PT.ERecord(id, typeName, typeArgs, fields) -> + PT.ERecord(id, typeName, typeArgs, List.map (fun (n, ex) -> (n, r ex)) fields) + | PT.ERecordUpdate(id, record, updates) -> + PT.ERecordUpdate(id, r record, NEList.map (fun (n, ex) -> (n, r ex)) updates) + | PT.EEnum(id, typeName, typeArgs, caseName, fields) -> + PT.EEnum(id, typeName, typeArgs, caseName, List.map r fields) + | PT.ETuple(id, first, second, rest) -> + PT.ETuple(id, r first, r second, List.map r rest) + | PT.EInfix(id, op, left, right) -> PT.EInfix(id, op, r left, r right) + | PT.EDict(id, pairs) -> PT.EDict(id, List.map (fun (k, ex) -> (k, r ex)) pairs) + | PT.EStatement(id, first, next) -> PT.EStatement(id, r first, r next) + | PT.EPipe(id, expr, pipes) -> + PT.EPipe(id, r expr, List.map (normPipe c env) pipes) + + // leaves / no Expr children / no bound names — unchanged + | PT.EInt64 _ + | PT.EUInt64 _ + | PT.EInt8 _ + | PT.EUInt8 _ + | PT.EInt16 _ + | PT.EUInt16 _ + | PT.EInt32 _ + | PT.EUInt32 _ + | PT.EInt128 _ + | PT.EUInt128 _ + | PT.EBool _ + | PT.EChar _ + | PT.EFloat _ + | PT.EUnit _ + | PT.EValue _ + | PT.EFnName _ + | PT.ESelf _ + | PT.EArg _ -> e + +and private normStringSegment + (c : int ref) + (env : Map) + (seg : PT.StringSegment) + : PT.StringSegment = + match seg with + | PT.StringText _ -> seg + | PT.StringInterpolation expr -> PT.StringInterpolation(norm c env expr) + +and private normPipe + (c : int ref) + (env : Map) + (p : PT.PipeExpr) + : PT.PipeExpr = + match p with + // a pipe into a variable is a USE — normalize the name like EVariable + | PT.EPipeVariable(id, name, args) -> + PT.EPipeVariable(id, lookup env name, List.map (norm c env) args) + | PT.EPipeLambda(id, pats, body) -> + let m = bind c (pats |> NEList.toList |> List.collect letPatternVars) + PT.EPipeLambda( + id, + NEList.map (renameLetPattern m) pats, + norm c (mergeEnv env m) body + ) + | PT.EPipeInfix(id, op, expr) -> PT.EPipeInfix(id, op, norm c env expr) + | PT.EPipeFnCall(id, fnName, typeArgs, args) -> + PT.EPipeFnCall(id, fnName, typeArgs, List.map (norm c env) args) + | PT.EPipeEnum(id, typeName, caseName, fields) -> + PT.EPipeEnum(id, typeName, caseName, List.map (norm c env) fields) + + +// ── public entry points ── + +/// Alpha-normalize a standalone expression: its `let`/lambda/match binders become canonical. +let expr (e : PT.Expr) : PT.Expr = norm (ref 0) Map.empty e + +/// Alpha-normalize a value: its body's binders become canonical. +let value (v : PT.PackageValue.PackageValue) : PT.PackageValue.PackageValue = + { v with body = expr v.body } + +/// Alpha-normalize a function: normalize the body's binders. Parameters need no handling here — a +/// parameter reference in the body is already positional (`EArg index`), and the parameter name isn't +/// part of the hash (see `Canonical.writeParameter`), so a parameter rename can't affect the result. +let fn (f : PT.PackageFn.PackageFn) : PT.PackageFn.PackageFn = + { f with body = expr f.body } diff --git a/backend/src/LibSerialization/Hashing/Canonical.fs b/backend/src/LibSerialization/Hashing/Canonical.fs index 6ac99ce170..be20be9348 100644 --- a/backend/src/LibSerialization/Hashing/Canonical.fs +++ b/backend/src/LibSerialization/Hashing/Canonical.fs @@ -502,7 +502,9 @@ let writeParameter (w : BinaryWriter) (p : PT.PackageFn.Parameter) = - Common.String.write w p.name + // The parameter NAME is not hashed: a body references parameters by position (the parser lowers a + // parameter use to `EArg index`), so the name is cosmetic and a rename leaves the function's meaning + // unchanged. Only the type is part of the content hash. writeTypeReference mode w p.typ let writeRecordField diff --git a/backend/src/LibSerialization/Hashing/Hashing.fs b/backend/src/LibSerialization/Hashing/Hashing.fs index e093fca3bf..5903129926 100644 --- a/backend/src/LibSerialization/Hashing/Hashing.fs +++ b/backend/src/LibSerialization/Hashing/Hashing.fs @@ -49,9 +49,11 @@ module Hashing = hashWithWriter (fun w -> Canonical.writeType mode w t) - /// Hash a PackageFn (skip id, description, deprecated, param descriptions) + /// Hash a PackageFn (skip id, description, deprecated, param descriptions). + /// MEANING-STABLE: alpha-normalize first, so bound-variable names (parameters, let/lambda/match + /// binders) don't affect the hash — `fn add x y = x + y` and `fn add a b = a + b` hash identically. let computeFnHash (mode : HashRefMode) (fn : PT.PackageFn.PackageFn) : Hash = - hashWithWriter (fun w -> Canonical.writeFn mode w fn) + hashWithWriter (fun w -> Canonical.writeFn mode w (AlphaNormalize.fn fn)) /// Hash a PackageValue (skip id, description, deprecated) @@ -59,7 +61,8 @@ module Hashing = (mode : HashRefMode) (v : PT.PackageValue.PackageValue) : Hash = - hashWithWriter (fun w -> Canonical.writeValue mode w v) + // meaning-stable: alpha-normalize the body's binders first (see computeFnHash) + hashWithWriter (fun w -> Canonical.writeValue mode w (AlphaNormalize.value v)) /// Hash a PackageOp (reuse existing PackageOp.write — ops have no metadata to skip) @@ -201,10 +204,12 @@ module Hashing = : byte array = use ms = new MemoryStream() use w = new BinaryWriter(ms) + // meaning-stable: alpha-normalize fns/values so the batch (SCC) hash, like the single-item hash, + // ignores bound-variable names. Types have no binders, so they pass through unchanged. match item with | TypeItem(t, _, _, _) -> Canonical.writeType mode w t - | FnItem(fn, _, _, _) -> Canonical.writeFn mode w fn - | ValueItem(v, _, _, _) -> Canonical.writeValue mode w v + | FnItem(fn, _, _, _) -> Canonical.writeFn mode w (AlphaNormalize.fn fn) + | ValueItem(v, _, _, _) -> Canonical.writeValue mode w (AlphaNormalize.value v) ms.ToArray() diff --git a/backend/src/LibSerialization/LibSerialization.fsproj b/backend/src/LibSerialization/LibSerialization.fsproj index 55958534a3..5e9adf0c64 100644 --- a/backend/src/LibSerialization/LibSerialization.fsproj +++ b/backend/src/LibSerialization/LibSerialization.fsproj @@ -42,6 +42,7 @@ + diff --git a/backend/tests/Tests/AlphaNormalize.Tests.fs b/backend/tests/Tests/AlphaNormalize.Tests.fs new file mode 100644 index 0000000000..9da7adedb0 --- /dev/null +++ b/backend/tests/Tests/AlphaNormalize.Tests.fs @@ -0,0 +1,128 @@ +/// Proves meaning-stable hashing: after alpha-normalization, a package item's content hash depends on +/// its MEANING, not on incidental bound-variable names. Two items identical up to renaming of +/// parameters / `let` / lambda / match binders hash IDENTICALLY; items that differ in meaning (which +/// variable is used, binder order, a free variable) still hash DIFFERENTLY. +/// +/// Everything is compared through `computeFnHash` (which skips ids), since two freshly-built exprs have +/// different ids and so can't be compared structurally with `=`. +module Tests.AlphaNormalize + +open Expecto +open Prelude + +open TestUtils.TestUtils +open TestUtils.PTShortcuts + +module PT = LibExecution.ProgramTypes +open LibSerialization.Hashing + +module AN = LibSerialization.Hashing.AlphaNormalize + + +// ── helpers: a fn with the given parameter names + body; its raw vs alpha-normalized hash ── + +let private fnOf + (paramNames : List) + (body : PT.Expr) + : PT.PackageFn.PackageFn = + let ps = + match paramNames with + | [] -> NEList.singleton "unused" + | h :: t -> NEList.ofList h t + testPackageFn [] ps PT.TInt64 body + +// the LIVE content hash — `computeFnHash` alpha-normalizes internally, so this *is* the +// meaning-stable hash. (`AN.fn` stays public + idempotent; the last test pins that directly.) +let private h (paramNames : List) (body : PT.Expr) : PT.Hash = + Hashing.computeFnHash Hashing.Normal (fnOf paramNames body) + +let private mpVar (name : string) : PT.MatchPattern = PT.MPVariable(gid (), name) + +let private caseOf (pat : PT.MatchPattern) (rhs : PT.Expr) : PT.MatchCase = + { pat = pat; whenCondition = None; rhs = rhs } + + +let tests = + testList + "AlphaNormalize" + [ test + "parameter names don't affect the hash (a parameter use is positional, EArg)" { + // the body references parameters by position (EArg index), and the parameter name isn't hashed, + // so two fns identical up to parameter names share one content hash. (Meaning-preservation — + // which parameter goes where — is pinned by the next test and the let/lambda/match tests below.) + let body = eTuple (eArg 0) (eArg 1) [] // (param0, param1) — positional + Expect.equal + (h [ "x"; "y" ] body) + (h [ "a"; "b" ] body) + "same meaning, same hash — the function's identity ignores incidental parameter names" + } + + test "parameter POSITION is meaning: `(arg0, arg1)` ≠ `(arg1, arg0)`" { + Expect.notEqual + (h [ "x"; "y" ] (eTuple (eArg 0) (eArg 1) [])) + (h [ "x"; "y" ] (eTuple (eArg 1) (eArg 0) [])) + "which parameter goes where is meaning — the positional reference keeps it" + } + + test "let binder rename: `let x = 1 in x` ≡ `let y = 1 in y`" { + let lx = eLet (lpVar "x") (eInt64 1L) (eVar "x") + let ly = eLet (lpVar "y") (eInt64 1L) (eVar "y") + Expect.equal (h [] lx) (h [] ly) "same meaning, same hash" + } + + test "lambda binder rename: `fun x -> x` ≡ `fun y -> y`" { + let lx = eLambda (gid ()) [ lpVar "x" ] (eVar "x") + let ly = eLambda (gid ()) [ lpVar "y" ] (eVar "y") + Expect.equal (h [] lx) (h [] ly) "alpha-equivalent lambdas hash equal" + } + + test "lambda meaning preserved: `fun x y -> x` ≠ `fun x y -> y`" { + let first = eLambda (gid ()) [ lpVar "x"; lpVar "y" ] (eVar "x") + let second = eLambda (gid ()) [ lpVar "x"; lpVar "y" ] (eVar "y") + Expect.notEqual + (h [] first) + (h [] second) + "returning the first vs the second argument is a real difference" + } + + test "match binder rename: `match 0 with | x -> x` ≡ `| y -> y`" { + let mx = eMatch (eInt64 0L) [ caseOf (mpVar "x") (eVar "x") ] + let my = eMatch (eInt64 0L) [ caseOf (mpVar "y") (eVar "y") ] + Expect.equal (h [] mx) (h [] my) "alpha-equivalent match cases hash equal" + } + + test + "free variables are preserved (a free var is a real reference, not a binder)" { + // `z` / `w` are neither parameters nor locally bound — they must survive normalization distinctly + Expect.notEqual + (h [] (eVar "z")) + (h [] (eVar "w")) + "two different free variables stay different after normalization" + } + + test "shadowing: inner-use ≢ outer-use; and inner-use is alpha-stable" { + // let _ = 1 in let _ = 2 in + let useInnerXY = + eLet (lpVar "x") (eInt64 1L) (eLet (lpVar "y") (eInt64 2L) (eVar "y")) + let useInnerAB = + eLet (lpVar "a") (eInt64 1L) (eLet (lpVar "b") (eInt64 2L) (eVar "b")) + let useOuter = + eLet (lpVar "a") (eInt64 1L) (eLet (lpVar "b") (eInt64 2L) (eVar "a")) + Expect.equal + (h [] useInnerXY) + (h [] useInnerAB) + "using the inner binding is alpha-stable across renames" + Expect.notEqual + (h [] useInnerAB) + (h [] useOuter) + "using the inner vs the outer binding is a real difference (shadowing respected)" + } + + test + "normalization is idempotent (normalizing an already-normalized fn is a no-op)" { + let f = fnOf [ "x"; "y" ] (eTuple (eVar "y") (eVar "x") []) + Expect.equal + (Hashing.computeFnHash Hashing.Normal (AN.fn (AN.fn f))) + (Hashing.computeFnHash Hashing.Normal (AN.fn f)) + "AN.fn (AN.fn f) hashes the same as AN.fn f" + } ] diff --git a/backend/tests/Tests/LibParser.Tests.fs b/backend/tests/Tests/LibParser.Tests.fs index fcb455d65e..3e0985ff1e 100644 --- a/backend/tests/Tests/LibParser.Tests.fs +++ b/backend/tests/Tests/LibParser.Tests.fs @@ -154,7 +154,7 @@ let exprRTs = Ok { name = PT.FQFnName.fqPackage - "e544bb61997193e298daa64917d67e38419df64b5dd321778ec191c18883fae8" + "4329161371ed3e0eb3e62f40897da790d8795d14e6f44e7f98ad075b38dd3d9c" location = None } }, [], [ PT.EInt64(id, 5L) ] diff --git a/backend/tests/Tests/Tests.fs b/backend/tests/Tests/Tests.fs index 92150498e9..0747e2985b 100644 --- a/backend/tests/Tests/Tests.fs +++ b/backend/tests/Tests/Tests.fs @@ -39,6 +39,7 @@ let main (args : string array) : int = // package manager Tests.Propagation.tests Tests.Hashing.tests + Tests.AlphaNormalize.tests Tests.BranchOps.tests // serialization @@ -60,7 +61,13 @@ let main (args : string array) : int = Tests.Blob.tests Tests.Stream.tests - Tests.Capabilities.tests ] + Tests.Capabilities.tests + Tests.OpsProjections.tests + Tests.ConflictDispatch.tests + Tests.SyncIdempotency.tests + Tests.SyncScenarios.tests + Tests.Releases.tests + Tests.Remotes.tests ] let cancelationTokenSource = new System.Threading.CancellationTokenSource() let httpClientTestsTask = Tests.HttpClient.init cancelationTokenSource.Token diff --git a/backend/tests/Tests/Tests.fsproj b/backend/tests/Tests/Tests.fsproj index fecb19f51c..fbf210f436 100644 --- a/backend/tests/Tests/Tests.fsproj +++ b/backend/tests/Tests/Tests.fsproj @@ -40,6 +40,7 @@ + @@ -52,8 +53,14 @@ + + + + + + From db9acfb5f7c37f9bb8994e183256ded47adbbb42 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 17:44:42 -0400 Subject: [PATCH 08/25] Tighten sync internals (readability, no behavior change) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A follow-up readability pass over the sync code; behavior and the full suite are unchanged. - Extract `parseLocation` / `formatLocation` (the FQ "owner[.modules].name" inverse pair) and reuse them in `detectDivergences`, `liveBindingHash`, and `resolveConflict` instead of open-coding the split/join at each site. - Reshape `divergentBindings` to return the structured `PackageLocation` + hashes, so `detectDivergences` just renders the location — removing its unreachable match arm. - Extract `restampAndRefold`, shared by the automatic keep-local policy (`routeDivergences`) and the human 'mine' override (`resolveConflict`), which re-stamped + re-folded an op identically. - Flatten `opKindBreakdown`'s nested `List.append`; extract `Display.divergenceNote`, shared by the HTTP and file-pull branches of `dark sync pull` so both word the divergence note identically. - Note that `pmSyncOpsSince` is committed-only today (so equivalent to `pmSyncOpsSinceCommitted`). --- .../Builtins/Builtins.Matter/Libs/PM/Sync.fs | 3 +- backend/src/LibDB/Sync.fs | 219 ++++++++---------- packages/darklang/cli/sync.dark | 12 +- packages/darklang/sync/display.dark | 17 +- 4 files changed, 117 insertions(+), 134 deletions(-) diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs index 5a6cc953d3..a66db7a9d1 100644 --- a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs @@ -62,7 +62,8 @@ let fns () : List = description = "The sync SERVER read (the `GET /sync/events?since=cursor` body): the ops the puller hasn't seen, encoded as a base64 wire batch. A Darklang HTTP router returns this string; - the puller decodes + applies it via `pmSyncApplyWire`." + the puller decodes + applies it via `pmSyncApplyWire`. Committed-only today (so equivalent to + `pmSyncOpsSinceCommitted`); kept as the general entry point for when WIP/own-device sync returns." fn = (function | _, _, _, [ DInt64 cursor ] -> diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index 8318de5c99..9fcf1a8d00 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -95,20 +95,39 @@ let decodeBatch (bytes : byte[]) : List = let opsToSend (cursor : int64) : Task> = Inserts.opsSinceCommitted cursor +/// Render a `PackageLocation` as the FQ "owner[.modules].name" string sync uses on the wire and in +/// the conflict store — the inverse of `parseLocation`. +let private formatLocation (loc : PT.PackageLocation) : string = + let modulesStr = String.concat "." loc.modules + if modulesStr = "" then + $"{loc.owner}.{loc.name}" + else + $"{loc.owner}.{modulesStr}.{loc.name}" + +/// Parse an FQ "owner[.modules].name" location back into a `PackageLocation` (owner = head, +/// name = last, modules = the middle) — the inverse of `formatLocation`. +let private parseLocation (location : string) : Option = + match location.Split('.') |> List.ofArray with + | owner :: rest -> + match List.rev rest with + | name :: revModules -> + Some { owner = owner; modules = List.rev revModules; name = name } + | [] -> None + | _ -> None + /// Detect sync divergences in a remote batch BEFORE applying it. For each incoming /// `SetName`, if the location is already bound LOCALLY to a *different*, non-deprecated hash, /// two peers gave the same name different content. Returns `(location, existingHash, /// incomingHash)` per divergence — surfaced as **data** so the receiver never blocks; a higher /// layer turns these into `Conflict.CSyncDivergence` for the resolution policy. -/// Internal core of both `detectDivergences` (which stringifies these) and `reconcileBatch` (which -/// turns them into reconciling ops): for each incoming `SetName` whose location is locally bound to -/// a DIFFERENT non-deprecated hash, the `(incoming op, existing local hash)` pair. +/// `divergentBindings` is the core (it returns the structured location + hashes); `detectDivergences` +/// just renders the location to its FQ string. let private divergentBindings (branchId : PT.BranchId) (ops : List) - : Task> = + : Task> = task { - let result = ResizeArray() + let result = ResizeArray() for op in ops do match op with | PT.PackageOp.SetName(loc, target) -> @@ -131,7 +150,7 @@ let private divergentBindings |> Sql.executeAsync (fun read -> read.string "item_hash") match existing with | existingHash :: _ when existingHash <> incomingHash -> - result.Add((op, existingHash)) + result.Add((loc, existingHash, incomingHash)) | _ -> () | _ -> () return List.ofSeq result @@ -142,22 +161,11 @@ let detectDivergences (ops : List) : Task> = task { - let! pairs = divergentBindings branchId ops + let! triples = divergentBindings branchId ops return - pairs - |> List.map (fun (op, existingHash) -> - match op with - | PT.PackageOp.SetName(loc, target) -> - let modulesStr = String.concat "." loc.modules - let (PT.Hash incomingHash) = target.hash - let locStr = - if modulesStr = "" then - $"{loc.owner}.{loc.name}" - else - $"{loc.owner}.{modulesStr}.{loc.name}" - (locStr, existingHash, incomingHash) - // divergentBindings only ever returns SetName ops, so this is unreachable - | _ -> ("", existingHash, "")) + triples + |> List.map (fun (loc, existingHash, incomingHash) -> + (formatLocation loc, existingHash, incomingHash)) } @@ -178,27 +186,23 @@ let detectDivergences // the `LIMIT 1` deterministic (newest row) in the meantime. let private liveBindingHash (location : string) : Task> = task { - match location.Split('.') |> List.ofArray with - | owner :: rest -> - match List.rev rest with - | name :: revModules -> - let modulesStr = revModules |> List.rev |> String.concat "." - let! rows = - Sql.query - """ - SELECT item_hash FROM locations - WHERE owner = @o AND modules = @m AND name = @n AND unlisted_at IS NULL - ORDER BY rowid DESC - LIMIT 1 - """ - |> Sql.parameters - [ "o", Sql.string owner - "m", Sql.string modulesStr - "n", Sql.string name ] - |> Sql.executeAsync (fun read -> read.string "item_hash") - return List.tryHead rows - | [] -> return None - | _ -> return None + match parseLocation location with + | Some loc -> + let! rows = + Sql.query + """ + SELECT item_hash FROM locations + WHERE owner = @o AND modules = @m AND name = @n AND unlisted_at IS NULL + ORDER BY rowid DESC + LIMIT 1 + """ + |> Sql.parameters + [ "o", Sql.string loc.owner + "m", Sql.string (String.concat "." loc.modules) + "n", Sql.string loc.name ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + return List.tryHead rows + | None -> return None } let recordDivergences @@ -242,16 +246,25 @@ let private kindOfHash (hash : string) : Task> = return rows |> List.tryHead |> Option.map PT.ItemKind.fromString } -/// Parse an FQ "owner[.modules].name" location back into a `PackageLocation` (owner = head, -/// name = last, modules = the middle) — the inverse of `detectDivergences`' stringification. -let private parseLocation (location : string) : Option = - match location.Split('.') |> List.ofArray with - | owner :: rest -> - match List.rev rest with - | name :: revModules -> - Some { owner = owner; modules = List.rev revModules; name = name } - | [] -> None - | _ -> None +/// Re-stamp a keep-local override op's `origin_ts` to now and re-fold it. Shared by the automatic +/// keep-local policy (`routeDivergences`) and the human 'mine' override (`resolveConflict`): both +/// re-bind a location to OUR hash by re-stamping the op that first bound it (a `SetName` content- +/// identical to one already in the log, so it's addressed by `computeOpHash`) and re-folding it +/// directly. The fresh stamp makes it win timestamp-LWW locally AND rides sync so peers re-adopt it. +/// `applyOps` (not `insertAndApplyOps`) because the op is already in the log — `insertAndApplyOps` +/// only folds NEWLY-inserted ops, so the binding would never flip back. +let private restampAndRefold + (branchId : PT.BranchId) + (mineOp : PT.PackageOp) + : Task = + task { + do! + Sql.query + "UPDATE package_ops SET origin_ts = strftime('%Y-%m-%dT%H:%M:%fZ','now') WHERE id = @id" + |> Sql.parameters [ "id", Sql.uuid (Inserts.computeOpHash mineOp) ] + |> Sql.executeStatementAsync + do! PackageOpPlayback.applyOps branchId None [ mineOp ] + } /// Route each detected divergence through the runtime conflict-dispatch seam /// (`exeState.conflictDispatch`). This is the "higher layer" the transport defers to: the receiver @@ -281,16 +294,10 @@ let routeDivergences let! resolution = dispatch conflict callCtx |> Ply.toTask match resolution with | RT.RSubstitute(RT.DString keepHash) when keepHash = existingHash -> - // keep local: re-bind the location to our existing hash, overriding the incoming bind. - // This is the same move as a human 'mine' override (`resolveConflict`): the `SetName` to our - // hash is content-identical to the op that first bound it, so it's already in the log and a - // fresh insert would `INSERT OR IGNORE`-dedup. RE-STAMP that op's `origin_ts` to now (so it - // wins timestamp-LWW, and the newer stamp rides sync so peers re-adopt our hash too) and - // RE-FOLD it directly via `applyOps` (which re-runs `applySetName`, un-listing the incoming - // row and re-activating ours) — `insertAndApplyOps` only folds NEWLY-inserted ops. - // The re-fold below re-binds locally regardless of whether the re-stamp matched a row, so the - // worst case (the original op somehow absent from the log) is a non-propagating override, never - // a wrong local binding. + // keep local: re-bind the location to our existing hash via `restampAndRefold` (the same move + // a human 'mine' override makes in `resolveConflict`). The re-fold re-binds locally regardless + // of whether the re-stamp matched a row, so the worst case (the original op somehow absent from + // the log) is a non-propagating override, never a wrong local binding. match! kindOfHash existingHash with | Some kind -> match parseLocation location with @@ -300,12 +307,7 @@ let routeDivergences loc, PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) ) - do! - Sql.query - "UPDATE package_ops SET origin_ts = strftime('%Y-%m-%dT%H:%M:%fZ','now') WHERE id = @id" - |> Sql.parameters [ "id", Sql.uuid (Inserts.computeOpHash mineOp) ] - |> Sql.executeStatementAsync - do! PackageOpPlayback.applyOps branchId None [ mineOp ] + do! restampAndRefold branchId mineOp do! Conflicts.markOverriddenByLocation remote location reconciled <- reconciled + 1 | None -> () @@ -562,56 +564,33 @@ let resolveConflict (conflictId : string) (keepMine : bool) : Task = do! Conflicts.markOverridden c.id return true else - // "mine" — re-bind the location to our hash. Parse the FQ "owner[.modules].name" and read - // the binding's kind + branch from `locations`, then emit + apply a WIP SetName to our hash. - // FQ "owner[.modules].name" → owner = head, name = last, modules = the middle. Reverse-match - // to bind name + modules directly (this codebase's List.head/last return Option). - match c.location.Split('.') |> List.ofArray with - | owner :: rest -> - match List.rev rest with - | name :: revModules -> - let modulesStr = revModules |> List.rev |> String.concat "." - let! meta = - Sql.query - """ - SELECT item_type, branch_id FROM locations - WHERE owner = @o AND modules = @m AND name = @n LIMIT 1 - """ - |> Sql.parameters - [ "o", Sql.string owner - "m", Sql.string modulesStr - "n", Sql.string name ] - |> Sql.executeAsync (fun read -> - (read.string "item_type", (read.uuid "branch_id" : PT.BranchId))) - match meta with - | (itemType, branchId) :: _ -> - let kind = PT.ItemKind.fromString itemType - let modulesList = - if modulesStr = "" then [] else modulesStr.Split('.') |> List.ofArray - let loc : PT.PackageLocation = - { owner = owner; modules = modulesList; name = name } - let target = PT.Reference.fromHashAndKind (PT.Hash c.localHash, kind) - let mineOp = PT.PackageOp.SetName(loc, target) - // A human override is the LATEST decision — RE-STAMP our op's origin_ts to now so it - // wins timestamp-LWW (last-resolver-wins) AND propagates: the re-stamp rides the op on - // sync (preserve-on-receive), so peers see our hash as the most-recent-by-creation and - // adopt it too. (This is also what un-blocks the playback stale-check from skipping our - // re-fold below — without it, our op's OLD origin_ts would read as stale vs the - // incoming that just won, and the binding wouldn't flip back.) - do! - Sql.query - "UPDATE package_ops SET origin_ts = strftime('%Y-%m-%dT%H:%M:%fZ','now') WHERE id = @id" - |> Sql.parameters [ "id", Sql.uuid (Inserts.computeOpHash mineOp) ] - |> Sql.executeStatementAsync - // RE-FOLD the SetName directly. We can't go through insertAndApplyOps: `SetName(loc, - // ourHash)` is content-identical to the op that first bound our hash, so it's already - // in the op log and INSERT OR IGNORE would dedup it — and insertAndApplyOps only folds - // NEWLY-inserted ops, so the binding would never flip back. applyOps re-runs - // applySetName (un-list the incoming row, re-activate ours — now the freshest stamp). - do! PackageOpPlayback.applyOps branchId None [ mineOp ] - do! Conflicts.markOverridden c.id - return true - | [] -> return false // the location no longer exists locally - | [] -> return false // "owner" only, no name - | _ -> return false // unparseable location + // "mine" — re-bind the location to our hash. Parse the FQ "owner[.modules].name", read the + // binding's kind + branch from `locations`, then re-stamp + re-fold a SetName to our hash. A + // human override is the LATEST decision, so `restampAndRefold` makes it win timestamp-LWW + // (last-resolver-wins) and ride sync so peers re-adopt our hash too. + match parseLocation c.location with + | Some loc -> + let modulesStr = String.concat "." loc.modules + let! meta = + Sql.query + """ + SELECT item_type, branch_id FROM locations + WHERE owner = @o AND modules = @m AND name = @n LIMIT 1 + """ + |> Sql.parameters + [ "o", Sql.string loc.owner + "m", Sql.string modulesStr + "n", Sql.string loc.name ] + |> Sql.executeAsync (fun read -> + (read.string "item_type", (read.uuid "branch_id" : PT.BranchId))) + match meta with + | (itemType, branchId) :: _ -> + let kind = PT.ItemKind.fromString itemType + let target = PT.Reference.fromHashAndKind (PT.Hash c.localHash, kind) + let mineOp = PT.PackageOp.SetName(loc, target) + do! restampAndRefold branchId mineOp + do! Conflicts.markOverridden c.id + return true + | [] -> return false // the location no longer exists locally + | None -> return false // unparseable location } diff --git a/packages/darklang/cli/sync.dark b/packages/darklang/cli/sync.dark index 029775fd19..ffa9186591 100644 --- a/packages/darklang/cli/sync.dark +++ b/packages/darklang/cli/sync.dark @@ -24,11 +24,7 @@ let execute (state: AppState) (args: List) : AppState = else "" - let divNote = - if divCount > 0L then - $" ({Stdlib.Int64.toString divCount} name divergence(s) surfaced — not blocked; see `dark conflicts`)" - else - "" + let divNote = Darklang.Sync.Display.divergenceNote divCount let summary = Darklang.Sync.Display.syncSummary target (newCursor - before) @@ -55,11 +51,7 @@ let execute (state: AppState) (args: List) : AppState = // pull from a peer's local data.db file let (newCursor, divCount) = Darklang.Sync.pullFromFile target - let divNote = - if divCount > 0L then - $" ({Stdlib.Int64.toString divCount} name divergence(s) surfaced — not blocked; see `dark conflicts`)" - else - "" + let divNote = Darklang.Sync.Display.divergenceNote divCount let pulled = newCursor - before let summary = Darklang.Sync.Display.syncSummary target pulled diff --git a/packages/darklang/sync/display.dark b/packages/darklang/sync/display.dark index ec0023c1d8..ad35b7209a 100644 --- a/packages/darklang/sync/display.dark +++ b/packages/darklang/sync/display.dark @@ -36,13 +36,24 @@ let kindPart (kinds: List) (k: String) : List = let opKindBreakdown (kinds: List) : String = let parts = - Stdlib.List.append - (Stdlib.List.append (kindPart kinds "fn") (kindPart kinds "type")) - (Stdlib.List.append (kindPart kinds "value") (kindPart kinds "rename")) + [ kindPart kinds "fn" + kindPart kinds "type" + kindPart kinds "value" + kindPart kinds "rename" ] + |> Stdlib.List.flatten Stdlib.String.join parts ", " +// The "(N name divergence(s) surfaced — not blocked)" tail on a `dark sync pull` line. Empty when +// nothing diverged. Shared by the HTTP and file-pull branches so both word it identically. +let divergenceNote (divCount: Int64) : String = + if divCount > 0L then + $" ({Stdlib.Int64.toString divCount} name divergence(s) surfaced — not blocked; see `dark conflicts`)" + else + "" + + // ── conflict display: surfacing last-write-wins auto-resolutions for eventual ack ── // Sync NEVER blocks — a `name → two hashes` divergence is auto-resolved by timestamp-LWW (newest // authoring time wins) and RECORDED so it's never silently lost. This is the review surface From 8ce17c25fb55cc6b111528ece74ee5fbe297b42f Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Wed, 10 Jun 2026 18:10:01 -0400 Subject: [PATCH 09/25] Run sync as an observable managed app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make always-on sync a first-class `dark apps` daemon and give it structured telemetry. App + lifecycle: - Register "Sync" in the apps catalog (Daemon target `Darklang.Sync.Daemon.runManaged`), so `apps add/start/stop/status/logs sync` and `apps enable sync` all work with no new plumbing. - Unify the daemon identity on one pidfile ("sync") across the manual `sync daemon …` subcommands and the apps surface, so both manage the same process. - New `apps enable sync --boot` enables systemd user-lingering so it starts at boot, not just login. The poll loop already backs off cleanly when tailscale isn't up yet, so it self-heals on wake without needing network-ordering in the unit. Structured telemetry: - A `sync_daemon_events` table records one row per poll cycle (peers polled, changed, conflicts, skews), trimmed to the most recent rows; the tailnet loop writes a row each cycle. - `pmSyncRecordDaemonEvent` / `pmSyncRecentDaemonEvents` expose it, and `sync events` renders recent cycles as a table (`Display.daemonEventsTable`, pure + testfile-covered) instead of scraping logs. --- backend/migrations/schema.sql | 12 ++++ .../Builtins/Builtins.Matter/Libs/PM/Sync.fs | 66 +++++++++++++++++++ backend/src/LibDB/Sync.fs | 48 ++++++++++++++ .../execution/pre-s-and-s/sync-cli.dark | 7 ++ packages/darklang/cli/apps/command.dark | 14 ++++ packages/darklang/cli/apps/registry.dark | 5 ++ packages/darklang/cli/apps/service.dark | 15 +++++ packages/darklang/cli/remote.dark | 2 +- packages/darklang/cli/sync.dark | 21 ++++-- packages/darklang/sync/autosync.dark | 8 +++ packages/darklang/sync/daemon.dark | 14 +++- packages/darklang/sync/display.dark | 44 +++++++++++++ 12 files changed, 248 insertions(+), 8 deletions(-) diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 7437a0b07f..12d52462ae 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -395,3 +395,15 @@ CREATE TABLE IF NOT EXISTS sync_conflicts ( acknowledged INTEGER NOT NULL DEFAULT 0, overridden INTEGER NOT NULL DEFAULT 0 ); + +-- Structured telemetry from the autosync daemon: one row per poll cycle, so `sync events` (and a +-- future dashboard view) can show activity as DATA rather than scraping the text log. Local-only, +-- never synced; trimmed to the most recent rows so it stays bounded. +CREATE TABLE IF NOT EXISTS sync_daemon_events ( + id INTEGER PRIMARY KEY, + at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')), + peers_polled INTEGER NOT NULL, + changed INTEGER NOT NULL, + conflicts INTEGER NOT NULL, + skews INTEGER NOT NULL +); diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs index a66db7a9d1..f9706bdf06 100644 --- a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs @@ -495,6 +495,72 @@ let fns () : List = deprecated = NotDeprecated } + { name = fn "pmSyncRecordDaemonEvent" 0 + typeParams = [] + parameters = + [ Param.make "peersPolled" TInt64 "How many peers this cycle polled" + Param.make "changed" TBool "Did any peer advance this cycle?" + Param.make "conflicts" TInt64 "Divergences auto-resolved this cycle" + Param.make "skews" TInt64 "Peers paused on a Release skew this cycle" ] + returnType = TUnit + description = + "Record one autosync cycle's outcome to the local `sync_daemon_events` telemetry (trimmed to + the most recent rows). Lets `sync events` and a dashboard view show daemon activity as data, + not scraped log text." + fn = + (function + | _, _, _, [ DInt64 peers; DBool changed; DInt64 conflicts; DInt64 skews ] -> + uply { + do! + LibDB.Sync.recordDaemonEvent + (int peers) + changed + (int conflicts) + (int skews) + return DUnit + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncRecentDaemonEvents" 0 + typeParams = [] + parameters = + [ Param.make "limit" TInt64 "How many recent cycles to return (newest first)" ] + returnType = TList(TTuple(TString, TInt64, [ TBool; TInt64; TInt64 ])) + description = + "The autosync daemon's most recent poll cycles as STRUCTURED rows for `sync events` to format + in Dark: `(at, peersPolled, changed, conflicts, skews)`, newest first. Empty if it never ran." + fn = + (function + | _, _, _, [ DInt64 limit ] -> + uply { + let! rows = LibDB.Sync.recentDaemonEvents (int limit) + let dvals = + rows + |> List.map (fun (at, peers, changed, conflicts, skews) -> + DTuple( + DString at, + DInt64(int64 peers), + [ DBool(changed <> 0) + DInt64(int64 conflicts) + DInt64(int64 skews) ] + )) + return + Dval.list + (KTTuple(VT.string, VT.int64, [ VT.bool; VT.int64; VT.int64 ])) + dvals + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + { name = fn "pmConflictAck" 0 typeParams = [] parameters = diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index 9fcf1a8d00..bfb3dc59f8 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -225,6 +225,54 @@ let recordDivergences } +// ── daemon telemetry (local, not synced): one row per autosync poll cycle ── + +/// Record one autosync cycle's outcome, then trim the table to the most recent 500 rows so this +/// telemetry stays bounded (the daemon polls every few seconds to a minute, forever). +let recordDaemonEvent + (peersPolled : int) + (changed : bool) + (conflicts : int) + (skews : int) + : Task = + task { + do! + Sql.query + """ + INSERT INTO sync_daemon_events (peers_polled, changed, conflicts, skews) + VALUES (@p, @c, @x, @s) + """ + |> Sql.parameters + [ "p", Sql.int peersPolled + "c", Sql.int (if changed then 1 else 0) + "x", Sql.int conflicts + "s", Sql.int skews ] + |> Sql.executeStatementAsync + do! + Sql.query + """ + DELETE FROM sync_daemon_events + WHERE id NOT IN (SELECT id FROM sync_daemon_events ORDER BY id DESC LIMIT 500) + """ + |> Sql.executeStatementAsync + } + +/// The most recent `limit` daemon cycles, newest first — `(at, peersPolled, changed, conflicts, skews)`. +let recentDaemonEvents (limit : int) : Task> = + Sql.query + """ + SELECT at, peers_polled, changed, conflicts, skews + FROM sync_daemon_events ORDER BY id DESC LIMIT @n + """ + |> Sql.parameters [ "n", Sql.int limit ] + |> Sql.executeAsync (fun read -> + (read.string "at", + read.int "peers_polled", + read.int "changed", + read.int "conflicts", + read.int "skews")) + + /// The item kind of a content hash — needed to rebuild a `SetName` Reference for a keep-local /// reconcile (the transport surfaced the divergence as data, so the original op isn't retained). /// Reads `locations.item_type` first — that's the kind of the very binding we're restoring, present diff --git a/backend/testfiles/execution/pre-s-and-s/sync-cli.dark b/backend/testfiles/execution/pre-s-and-s/sync-cli.dark index 076aa04d54..2281809523 100644 --- a/backend/testfiles/execution/pre-s-and-s/sync-cli.dark +++ b/backend/testfiles/execution/pre-s-and-s/sync-cli.dark @@ -22,3 +22,10 @@ Darklang.Sync.Display.intervalLabel 2000L = "2s" Darklang.Sync.Display.intervalLabel 60000L = "60s" Darklang.Sync.Display.intervalLabel 500L = "500ms" Darklang.Sync.Display.intervalLabel 1500L = "1500ms" + +// daemon-events table: empty → a start hint; otherwise a header line + one line per cycle +Darklang.Sync.Display.daemonEventsTable [] = [ "no daemon activity yet — start it with `dark apps start sync` (or `sync daemon start`)" ] +Stdlib.List.length (Darklang.Sync.Display.daemonEventsTable [(("2026-06-10T20:00:00.000Z", 3L, true, 1L, 0L))]) = 2L +// an idle cycle row contains the peer count and the idle marker +Stdlib.String.contains (Darklang.Sync.Display.daemonEventRow "now" 1L false 0L 0L) "1 peer(s)" = true +Stdlib.String.contains (Darklang.Sync.Display.daemonEventRow "now" 2L true 3L 1L) "3 conflict(s)" = true diff --git a/packages/darklang/cli/apps/command.dark b/packages/darklang/cli/apps/command.dark index 14e653099c..54d86a9a1c 100644 --- a/packages/darklang/cli/apps/command.dark +++ b/packages/darklang/cli/apps/command.dark @@ -212,6 +212,20 @@ let execute (state: Cli.AppState) (args: List) : Cli.AppState = | [ "inspect"; slug ] -> inspect state slug + | [ "enable"; slug; "--boot" ] -> + // like `enable`, plus start-at-boot (not just login) via user-service lingering + match Registry.findBySlug slug with + | None -> Stdlib.printLine (Colors.error $"no app '{slug}' (see `apps list-available`)") + | Some app -> + match Service.enable app with + | Error e -> Stdlib.printLine (Colors.error e) + | Ok msg -> + Stdlib.printLine (Colors.success $"✓ {msg}") + match Service.enableLinger () with + | Ok m -> Stdlib.printLine (Colors.success $"✓ {m}") + | Error e -> Stdlib.printLine (Colors.warning e) + state + | [ "enable"; slug ] -> match Registry.findBySlug slug with | None -> Stdlib.printLine (Colors.error $"no app '{slug}' (see `apps list-available`)") diff --git a/packages/darklang/cli/apps/registry.dark b/packages/darklang/cli/apps/registry.dark index cbd35d0007..1c3428431f 100644 --- a/packages/darklang/cli/apps/registry.dark +++ b/packages/darklang/cli/apps/registry.dark @@ -30,6 +30,11 @@ let available () : List = slug = "text-editor" description = "A tiny text editor — the outliner's TextEditor widget surfaced as an app" target = Model.Target.Foreground "text-editor" } + Model.App + { name = "Sync" + slug = "sync" + description = "Always-on package sync across your tailnet (poll every peer, adaptive interval)" + target = Model.Target.Daemon "Darklang.Sync.Daemon.runManaged" } Model.App { name = "Dark Packages HTTP Server" slug = "dark-packages" diff --git a/packages/darklang/cli/apps/service.dark b/packages/darklang/cli/apps/service.dark index a5c1837216..cd532ba6f0 100644 --- a/packages/darklang/cli/apps/service.dark +++ b/packages/darklang/cli/apps/service.dark @@ -68,6 +68,21 @@ let enable (app: Model.App) : Stdlib.Result.Result = | Ok _ -> Stdlib.Result.Result.Ok $"enabled — {app.name} will start on login (launchd: com.darklang.{app.slug})" | Error _ -> Stdlib.Result.Result.Ok $"wrote {path} — activate with: {activate}" +/// Linux only: enable user-service "lingering" so `enable`d daemons start at BOOT, not just at login +/// (`loginctl enable-linger`). macOS launch agents already load at login, which on a personal machine +/// is effectively "when you're at the PC". +let enableLinger () : Stdlib.Result.Result = + match platform () with + | Linux -> + match Stdlib.Cli.Process.exec "/bin/sh" [ "-c"; "loginctl enable-linger" ] with + | Ok _ -> + Stdlib.Result.Result.Ok "lingering enabled — user services start at boot (no login needed)" + | Error _ -> + Stdlib.Result.Result.Error "could not enable lingering — run: loginctl enable-linger" + | Mac -> Stdlib.Result.Result.Ok "macOS launch agents already start at login" + | Unsupported -> + Stdlib.Result.Result.Error "auto-start isn't supported on this platform (Linux / macOS only)" + /// Remove the OS service for `slug` (deactivate + delete the unit). Idempotent. let disable (slug: String) : Stdlib.Result.Result = match platform () with diff --git a/packages/darklang/cli/remote.dark b/packages/darklang/cli/remote.dark index d2a688bea8..ff411a240b 100644 --- a/packages/darklang/cli/remote.dark +++ b/packages/darklang/cli/remote.dark @@ -2,7 +2,7 @@ module Darklang.Cli.Remote // `dark remote` — the sync SETUP surface: register the tailnet peers this instance syncs with. A // remote is a (name, url) where url is an http(s) sync-server URL or a local data.db path. The -// tailnet-wide daemon (`dark apps start sync-daemon`) polls every registered remote — so `remote +// tailnet-wide daemon (`dark apps start sync`) polls every registered remote — so `remote // add` lets you wire a peer once and have it sync automatically, without a manual `sync pull` first. // Remotes are LOCAL setup (not synced), like git's. diff --git a/packages/darklang/cli/sync.dark b/packages/darklang/cli/sync.dark index ffa9186591..1d461a4bc2 100644 --- a/packages/darklang/cli/sync.dark +++ b/packages/darklang/cli/sync.dark @@ -147,7 +147,7 @@ let execute (state: AppState) (args: List) : AppState = state | [ "daemon" ] | [ "daemon"; "status" ] -> // the background auto-sync daemon's state (running / stopped / stale), via its pidfile - Stdlib.printLine (Darklang.Sync.Daemon.statusLine "sync-daemon") + Stdlib.printLine (Darklang.Sync.Daemon.statusLine "sync") state | [ "daemon"; "start" ] -> // tailnet-wide: poll EVERY known peer on an adaptive interval, detached (survives CLI exit) @@ -162,21 +162,26 @@ let execute (state: AppState) (args: List) : AppState = | Error e -> Stdlib.printLine (Colors.error e) state | [ "daemon"; "stop" ] -> - match Darklang.Sync.Daemon.stop "sync-daemon" with + match Darklang.Sync.Daemon.stop "sync" with | Ok msg -> Stdlib.printLine (Colors.success msg) | Error e -> Stdlib.printLine (Colors.error e) state | [ "daemon"; "logs" ] -> - let lines = Darklang.Sync.Daemon.tailLog "sync-daemon" 20L + let lines = Darklang.Sync.Daemon.tailLog "sync" 20L if Stdlib.List.isEmpty lines then Stdlib.printLine (Colors.dimText "no daemon logs yet (has it run?)") else Stdlib.printLines lines state + | [ "events" ] -> + // the autosync daemon's recent poll cycles as a structured table (telemetry, newest first) + let rows = Builtin.pmSyncRecentDaemonEvents 20L + Stdlib.printLines (Darklang.Sync.Display.daemonEventsTable rows) + state | _ -> Stdlib.printLine ( Colors.error - "Usage: sync pull | status | auto [times] | check | daemon start/stop/status/logs" + "Usage: sync pull | status | auto [times] | check | daemon start/stop/status/logs | events" ) state @@ -190,11 +195,12 @@ let complete Completion.simple "status" Completion.simple "auto" Completion.simple "check" - Completion.simple "daemon" ] + Completion.simple "daemon" + Completion.simple "events" ] let help (state: AppState) : AppState = - [ "Usage: sync pull | status | auto [times] | check | daemon start/stop/status/logs" + [ "Usage: sync pull | status | auto [times] | check | daemon start/stop/status/logs | events" "" "Sync package ops across instances (tailnet-wide). The receiver applies ops idempotently" "(op log + projections) and resumes from a per-peer cursor, so re-running only applies what's new." @@ -209,6 +215,9 @@ let help (state: AppState) : AppState = " daemon stop Stop the background daemon" " daemon status Is the daemon running?" " daemon logs Tail recent daemon output" + " events Recent daemon poll cycles as a table (structured telemetry)" + "" + "Tip: `dark apps enable sync --boot` keeps it running across reboots (systemd/launchd)." "" "To SERVE ops to peers (run on the always-on box, e.g. the desktop):" " dark serve Darklang.Sync.Server.router --port " diff --git a/packages/darklang/sync/autosync.dark b/packages/darklang/sync/autosync.dark index 994a2861b3..5f51b47e2d 100644 --- a/packages/darklang/sync/autosync.dark +++ b/packages/darklang/sync/autosync.dark @@ -148,6 +148,14 @@ let runLoopAll (intervalMs: Int64) (times: Int64) : Int64 = let (sawChanges, conflicts, skews) = pollAllPeers peers let nextMs = nextPollMs sawChanges intervalMs + // record this cycle as structured telemetry (read back by `sync events` / a dashboard view) + let _ = + Builtin.pmSyncRecordDaemonEvent + (Stdlib.List.length peers) + sawChanges + conflicts + (Stdlib.List.length skews) + let conflictNote = if conflicts > 0L then $", {Stdlib.Int64.toString conflicts} conflict(s) auto-resolved" diff --git a/packages/darklang/sync/daemon.dark b/packages/darklang/sync/daemon.dark index 42a368c2ab..d23d6bcf2b 100644 --- a/packages/darklang/sync/daemon.dark +++ b/packages/darklang/sync/daemon.dark @@ -6,7 +6,9 @@ module Darklang.Sync.Daemon // the shared `Stdlib.Cli.Daemon` substrate (the same one `dark apps` daemons use). Only the sync-specific // entrypoints and the launch expression live here. -let private daemonName : String = "sync-daemon" +// One identity for the daemon across every control surface: the manual `sync daemon …` subcommands, +// `apps start/stop/status/logs sync`, and the `apps enable sync` OS service all manage this pidfile. +let private daemonName : String = "sync" // ── lifecycle: thin pass-throughs to the shared daemon substrate ── @@ -34,6 +36,16 @@ let runDaemonAll (intervalMs: Int64) : Int64 = Darklang.Sync.Autosync.runLoopAll intervalMs 1000000000L +// The apps-managed entrypoint: `apps start sync` evals `runManaged "sync"` (the substrate passes the +// app slug). Claims the pidfile under that slug, then polls every peer forever. The start interval is +// the adaptive floor — it converges within a cycle or two regardless, so it isn't configurable here. +let runManaged (name: String) : Int64 = + let _ = Stdlib.Cli.Daemon.claimPidfile name + Stdlib.printLine + $"sync-daemon: started for all peers (pid {Stdlib.Int64.toString (Stdlib.Cli.Sys.currentPid ())})" + Darklang.Sync.Autosync.runLoopAll 2000L 1000000000L + + // ── launch: spawn the entrypoint detached + verified (refuses if already running) via the substrate ── let start (peer: String) (intervalMs: Int64) : Stdlib.Result.Result = diff --git a/packages/darklang/sync/display.dark b/packages/darklang/sync/display.dark index ad35b7209a..b2f7cb6c33 100644 --- a/packages/darklang/sync/display.dark +++ b/packages/darklang/sync/display.dark @@ -148,3 +148,47 @@ let conflictReport Stdlib.List.append (Stdlib.List.append [ header; "" ] blocks) [ ""; footer ] + + +// ── daemon activity (`sync events`): the autosync telemetry as a compact table ── +// Pure over the structured rows `(at, peersPolled, changed, conflicts, skews)` from +// `pmSyncRecentDaemonEvents` (newest first) — so the display is package-testable. + +let private padR (s: String) (w: Int64) : String = + let len = Stdlib.String.length s + if len >= w then s else s ++ (Stdlib.String.repeat " " (w - len)) + +let daemonEventRow + (at: String) + (peers: Int64) + (changed: Bool) + (conflicts: Int64) + (skews: Int64) + : String = + let result = if changed then "↓ changes" else "· idle" + let conflictPart = + if conflicts > 0L then $" {Stdlib.Int64.toString conflicts} conflict(s)" else "" + let skewPart = + if skews > 0L then $" {Stdlib.Int64.toString skews} paused" else "" + + (padR at 26L) + ++ " " + ++ (padR ((Stdlib.Int64.toString peers) ++ " peer(s)") 10L) + ++ " " + ++ (padR result 11L) + ++ conflictPart + ++ skewPart + +let daemonEventsTable + (events: List<(String * Int64 * Bool * Int64 * Int64)>) + : List = + if Stdlib.List.isEmpty events then + [ "no daemon activity yet — start it with `dark apps start sync` (or `sync daemon start`)" ] + else + let header = (padR "WHEN" 26L) ++ " " ++ (padR "PEERS" 10L) ++ " " ++ "RESULT" + let rows = + events + |> Stdlib.List.map (fun (at, peers, changed, conflicts, skews) -> + daemonEventRow at peers changed conflicts skews) + + Stdlib.List.append [ header ] rows From 03284964bb7f0b059f55ce91a6d98b825bd20a3a Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 01:05:10 -0400 Subject: [PATCH 10/25] Harden and finalize stable + syncing Fixes and tests on top of the op-log / durable-canon / meaning-stable-hashing foundation, bringing the branch to a mergeable, deployable state: - Boot: a fresh build crashed in durable-canon because schema.sql's projection tables lacked `description` (it lived only in an incremental). Add it to schema.sql, drop the incremental, guard it with a regression test. - Portable login: config was written to a dead relative path so login never persisted; derive the config path from the executable dir (beside data.db). - Conflict overrides now propagate cross-machine: `conflicts resolve mine` used to re-stamp the existing op in place (same commit-rowid, so peers that had already pulled it never re-adopted). Emit a distinct OverrideName op carrying a resolver stamp so it rides the next incremental pull and wins LWW; tested with a binary round-trip and a receiver-side end-to-end test. - Sync commands return a non-zero exit code on failure (a script wrapping `dark sync pull` can now detect it). - Single-peer sync daemon records telemetry, so `dark sync events` is populated for `daemon start `, not only the tailnet-wide loop. - Don't crash reading an unevaluated package value (rt_dval NULL). - Schema housekeeping: move the migrator bookkeeping tables and the package_ops composite-PK declaration into schema.sql (single source for all CREATE TABLE). --- ...260519_133237_package_ops_composite_pk.sql | 29 ----- .../20260527_211202_package_description.sql | 4 - backend/migrations/incremental/README.md | 29 +++-- backend/migrations/schema.sql | 30 ++++- backend/src/LibDB/PackageManager.fs | 6 + backend/src/LibDB/PackageOpPlayback.fs | 44 +++++-- backend/src/LibDB/Releases.fs | 6 +- backend/src/LibDB/RuntimeTypes.fs | 4 +- backend/src/LibDB/Seed.fs | 3 + backend/src/LibDB/Sync.fs | 64 +++++----- backend/src/LibExecution/Interpreter.fs | 2 +- backend/src/LibExecution/ProgramTypes.fs | 11 ++ .../LibExecution/ProgramTypesToDarkTypes.fs | 5 + .../Binary/Serializers/PT/PackageOp.fs | 10 ++ backend/src/LocalExec/Migrations.fs | 40 +------ .../execution/pre-s-and-s/sync-check.dark | 8 +- backend/tests/Tests/OpsProjections.Tests.fs | 18 +++ backend/tests/Tests/SyncScenarios.Tests.fs | 112 +++++++++++++++--- packages/darklang/cli/config.dark | 29 ++++- packages/darklang/cli/conflicts.dark | 4 +- packages/darklang/cli/core.dark | 11 +- packages/darklang/cli/sync.dark | 8 +- packages/darklang/sync/autosync.dark | 20 +++- 23 files changed, 327 insertions(+), 170 deletions(-) delete mode 100644 backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql delete mode 100644 backend/migrations/incremental/20260527_211202_package_description.sql diff --git a/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql b/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql deleted file mode 100644 index 4c4c6ea299..0000000000 --- a/backend/migrations/incremental/20260519_133237_package_ops_composite_pk.sql +++ /dev/null @@ -1,29 +0,0 @@ --- Ghost-function fix: PK was bare `id` (op content hash), so an --- identical op on two branches hashed equal and the second INSERT was --- dropped by `INSERT OR IGNORE` — fn invisible everywhere though `fn` --- printed `✓ Created`. PK becomes (id, branch_id), so IGNORE only --- catches true within-branch repeats. - -DROP TABLE package_ops; - -CREATE TABLE package_ops ( - id TEXT NOT NULL, - op_blob BLOB NOT NULL, - branch_id TEXT NOT NULL REFERENCES branches(id), - commit_hash TEXT REFERENCES commits(hash), - applied INTEGER NOT NULL DEFAULT 0, - propagation_id TEXT NULL, - created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')), - -- sync timestamp-LWW (must mirror schema.sql; this rebuild re-runs on kill-and-fill) - origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')), - PRIMARY KEY (id, branch_id) -); - -CREATE INDEX IF NOT EXISTS idx_package_ops_wip - ON package_ops(branch_id) WHERE commit_hash IS NULL; -CREATE INDEX IF NOT EXISTS idx_package_ops_created ON package_ops(created_at); -CREATE INDEX IF NOT EXISTS idx_package_ops_applied - ON package_ops(applied) WHERE applied = 0; -CREATE INDEX IF NOT EXISTS idx_package_ops_commit_hash ON package_ops(commit_hash); -CREATE INDEX IF NOT EXISTS idx_package_ops_propagation_id - ON package_ops(propagation_id) WHERE propagation_id IS NOT NULL; diff --git a/backend/migrations/incremental/20260527_211202_package_description.sql b/backend/migrations/incremental/20260527_211202_package_description.sql deleted file mode 100644 index 94ecdcb971..0000000000 --- a/backend/migrations/incremental/20260527_211202_package_description.sql +++ /dev/null @@ -1,4 +0,0 @@ --- Plain-text doc comments for SQL package search. -ALTER TABLE package_types ADD COLUMN description TEXT NOT NULL DEFAULT ''; -ALTER TABLE package_values ADD COLUMN description TEXT NOT NULL DEFAULT ''; -ALTER TABLE package_functions ADD COLUMN description TEXT NOT NULL DEFAULT ''; diff --git a/backend/migrations/incremental/README.md b/backend/migrations/incremental/README.md index d4c400c37d..f05199439f 100644 --- a/backend/migrations/incremental/README.md +++ b/backend/migrations/incremental/README.md @@ -1,18 +1,17 @@ -# Incremental migrations +# Incremental migrations (legacy escape hatch — empty by default) -This directory holds per-file additive migrations, run in lexical -order on top of `../schema.sql` and name-dedup'd via -`system_migrations_v0`. Empty by default. +**The schema lives in one place: `../schema.sql`.** It is the single, complete source of truth — every +table, column, and index is defined there in its final form. `schema.sql` is content-hashed and +re-applied on change (durable-canon: regenerable projections are dropped + re-folded from the op log; +the canonical op log / blobs / branch state come through intact). Edit `schema.sql` directly. -When to add a file here vs editing `schema.sql`: +**A versioned change that must coordinate across instances goes through the Release migrator** +(`backend/src/LibDB/Releases.fs`): one `Release` step can carry forward SQL (copy-and-swap), an optional +op-format re-serialize, and a projection re-fold — gated by the single Release coordinate that also gates +cross-instance sync. That's the mechanism for future schema/format evolution. -- **Edit `schema.sql`**: structural redesigns, adding a new table or - column where rebuilding from source is fine. The file is hashed + - kill-and-fill'd; data in the affected tables is lost. -- **Add a file here**: data backfills, transforms, additive - alterations on populated dev/test DBs you don't want to nuke. - -File naming: `YYYYMMDD_HHMMSS_.sql` so lexical sort gives -chronological order. First line may be the literal `--#[no_tx]` to -skip the wrapping transaction (rare; for DDL SQLite refuses inside -one). +This `incremental/` directory is a rarely-needed escape hatch for an additive backfill on a populated dev +DB you don't want to rebuild. It is **empty by default** and you almost never want a file here — prefer +`schema.sql` (+ a Release step when the change must travel). Files run in lexical order, name-dedup'd via +`system_migrations_v0`; naming is `YYYYMMDD_HHMMSS_.sql`, and a first line of the literal +`--#[no_tx]` skips the wrapping transaction (rare; for DDL SQLite refuses inside one). diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 12d52462ae..797194a37a 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -5,9 +5,9 @@ -- no "build vN then DROP and rebuild as vN+1" — kill-and-fill means the -- final shape is what runs against an empty DB. -- --- system_migrations_v0 (the legacy per-named-migration table) is the one --- exception, since legacy DBs are adopted via that table; created --- here AND by Migrations.fs's adoptLegacyDB path. +-- The migrator's OWN bookkeeping tables live here too (the schema-hash stamp, the Release coordinate, +-- the legacy per-named-migration log) — schema.sql is the single home for every CREATE TABLE; the +-- migrator code only reads/writes rows. -- -- Order: bookkeeping → branches → commits → ops → package projections → -- locations → traces → user-data, toplevels, scripts. FK targets come @@ -24,6 +24,20 @@ CREATE TABLE IF NOT EXISTS system_migrations_v0 ( sql TEXT NOT NULL ); +-- The schema-hash stamp: the hash of THIS file when last applied, so a change is detected and triggers +-- a durable-canon preserve-and-refold. Rows written by `LocalExec/Migrations.fs` after applying schema.sql. +CREATE TABLE IF NOT EXISTS schema_state_v0 ( + id INTEGER PRIMARY KEY, + hash TEXT NOT NULL +); + +-- The store's Release coordinate (the op-format/language/hash version) — the same integer that gates +-- cross-instance sync. Rows written by the Release migrator (`LibDB/Releases.fs`). +CREATE TABLE IF NOT EXISTS release_state_v0 ( + id INTEGER PRIMARY KEY, + release INTEGER NOT NULL +); + CREATE TABLE IF NOT EXISTS accounts_v0 ( id TEXT PRIMARY KEY, @@ -89,7 +103,9 @@ CREATE INDEX IF NOT EXISTS idx_commits_branch -- The source of truth for all package changes (branch-scoped). CREATE TABLE IF NOT EXISTS package_ops ( - id TEXT PRIMARY KEY, + -- (id, branch_id) is the PK: the SAME content-addressed op id can exist on different branches, so an + -- op is identified by id WITHIN a branch (a branch is a self-contained op stream). + id TEXT NOT NULL, op_blob BLOB NOT NULL, branch_id TEXT NOT NULL REFERENCES branches(id), commit_hash TEXT REFERENCES commits(hash), -- NULL = WIP @@ -101,7 +117,8 @@ CREATE TABLE IF NOT EXISTS package_ops ( -- receiver writes the peer's value), so every instance agrees on a given op's origin_ts and -- max(origin_ts) picks the same divergence winner → no swap. Distinct from `created_at` (which -- is local-insert time and differs per instance for the same op). - origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')) + origin_ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')), + PRIMARY KEY (id, branch_id) ); CREATE INDEX IF NOT EXISTS idx_package_ops_wip ON package_ops(branch_id) WHERE commit_hash IS NULL; @@ -133,6 +150,7 @@ CREATE TABLE IF NOT EXISTS package_types ( hash TEXT PRIMARY KEY, pt_def BLOB NOT NULL, rt_def BLOB NOT NULL, + description TEXT NOT NULL DEFAULT '', -- plain-text doc comment (not hashed) created_at TEXT NOT NULL DEFAULT (datetime('now')) ); @@ -141,6 +159,7 @@ CREATE TABLE IF NOT EXISTS package_values ( pt_def BLOB NOT NULL, rt_dval BLOB, -- NULL until evaluated value_type BLOB, -- for finding values of a given ValueType + description TEXT NOT NULL DEFAULT '', -- plain-text doc comment (not hashed) created_at TEXT NOT NULL DEFAULT (datetime('now')) ); CREATE INDEX IF NOT EXISTS idx_package_values_type ON package_values(value_type); @@ -149,6 +168,7 @@ CREATE TABLE IF NOT EXISTS package_functions ( hash TEXT PRIMARY KEY, pt_def BLOB NOT NULL, rt_instrs BLOB NOT NULL, + description TEXT NOT NULL DEFAULT '', -- plain-text doc comment (not hashed) created_at TEXT NOT NULL DEFAULT (datetime('now')) ); diff --git a/backend/src/LibDB/PackageManager.fs b/backend/src/LibDB/PackageManager.fs index 01bf3e8198..4017bfdc38 100644 --- a/backend/src/LibDB/PackageManager.fs +++ b/backend/src/LibDB/PackageManager.fs @@ -124,6 +124,12 @@ let createInMemory (ops : List) : PT.PackageManager = | PT.PackageType h -> typeLocations.Add(loc, h) | PT.PackageValue h -> valueLocations.Add(loc, h) | PT.PackageFn h -> fnLocations.Add(loc, h) + | PT.PackageOp.OverrideName(loc, target, _) -> + // an override binds a name just like SetName + match target with + | PT.PackageType h -> typeLocations.Add(loc, h) + | PT.PackageValue h -> valueLocations.Add(loc, h) + | PT.PackageFn h -> fnLocations.Add(loc, h) | PT.PackageOp.AddType _ | PT.PackageOp.AddValue _ | PT.PackageOp.AddFn _ -> () diff --git a/backend/src/LibDB/PackageOpPlayback.fs b/backend/src/LibDB/PackageOpPlayback.fs index b6fad4cad1..76fb8b24ec 100644 --- a/backend/src/LibDB/PackageOpPlayback.fs +++ b/backend/src/LibDB/PackageOpPlayback.fs @@ -261,6 +261,13 @@ let private applyAddFn (ctx : Ctx) (fn : PT.PackageFn.PackageFn) : Task = /// branchId = branch context, commitHash = None means WIP, Some id means committed. /// isRename = true when this SetName is a standalone rename (not paired with Add*), /// meaning old locations for the same hash should be deprecated. +/// The op's id as stored in `package_ops` (UUID derived from the content hash) — used to read the op's +/// own `origin_ts` back. Must be computed from the ACTUAL op (`SetName` vs `OverrideName` hash to +/// different ids), so an override reads its own resolver stamp, not the original SetName's stale one. +let private opIdOf (op : PT.PackageOp) : System.Guid = + let (Hash h) = LibSerialization.Hashing.Hashing.computeOpHash op + System.Guid(System.Convert.FromHexString(h)[0..15]) + let private applySetName (ctx : Ctx) (branchId : PT.BranchId) @@ -269,6 +276,7 @@ let private applySetName (itemHash : Hash) (location : PT.PackageLocation) (itemKind : PT.ItemKind) + (opId : System.Guid) : Task = task { let modulesStr = String.concat "." location.modules @@ -285,20 +293,11 @@ let private applySetName // order. Unknown stamps (op not in package_ops / pre-origin_ts data) → no skip = prior last-writer // behavior, so non-sync playback (seed grow, local authoring) is unchanged. Reads run on ctx.conn so // they see writes from earlier ops in this same applyOps transaction. - let thisOp = - PT.PackageOp.SetName( - location, - PT.Reference.fromHashAndKind (itemHash, itemKind) - ) - let (Hash thisOpHashStr) = LibSerialization.Hashing.Hashing.computeOpHash thisOp - let thisOpId = System.Guid(System.Convert.FromHexString(thisOpHashStr)[0..15]) - let! thisTs = task { use cmd = ctx.conn.CreateCommand() cmd.CommandText <- "SELECT origin_ts FROM package_ops WHERE id = $id" - cmd.Parameters.AddWithValue("$id", string thisOpId) - |> ignore + cmd.Parameters.AddWithValue("$id", string opId) |> ignore use! reader = cmd.ExecuteReaderAsync() let! hasRow = reader.ReadAsync() if hasRow && not (reader.IsDBNull 0) then @@ -623,7 +622,30 @@ let private applyOp | PT.PackageOp.AddFn fn -> do! applyAddFn ctx fn | PT.PackageOp.SetName(loc, target) -> let isRename = not (Set.contains target.hash addedHashes) - do! applySetName ctx branchId commitHash isRename target.hash loc target.kind + do! + applySetName + ctx + branchId + commitHash + isRename + target.hash + loc + target.kind + (opIdOf op) + | PT.PackageOp.OverrideName(loc, target, _resolvedAt) -> + // Folds exactly like SetName — re-bind the location to `target`. The op's `origin_ts` (a fresh + // resolver stamp) is the newest, so the timestamp-LWW playback re-activates this binding. + let isRename = not (Set.contains target.hash addedHashes) + do! + applySetName + ctx + branchId + commitHash + isRename + target.hash + loc + target.kind + (opIdOf op) | PT.PackageOp.Deprecate(target, kind, message) -> do! applyDeprecate ctx branchId commitHash target kind message | PT.PackageOp.Undeprecate target -> diff --git a/backend/src/LibDB/Releases.fs b/backend/src/LibDB/Releases.fs index 373318792d..f2582f89cf 100644 --- a/backend/src/LibDB/Releases.fs +++ b/backend/src/LibDB/Releases.fs @@ -99,11 +99,9 @@ let storedRelease () : int option = | Ok [ r ] -> Some(int r) | _ -> None -/// Stamp the store at Release `n`. +/// Stamp the store at Release `n`. `release_state_v0` is defined in schema.sql and created when it's +/// applied (the schema bootstrap runs before the Release migrator), so this just writes the row. let writeRelease (n : int) : unit = - Sql.query - $"CREATE TABLE IF NOT EXISTS {releaseTable} (id INTEGER PRIMARY KEY, release INTEGER NOT NULL)" - |> Sql.executeStatementSync Sql.query $"INSERT OR REPLACE INTO {releaseTable} (id, release) VALUES (0, @r)" |> Sql.parameters [ "r", Sql.int64 (int64 n) ] |> Sql.executeStatementSync diff --git a/backend/src/LibDB/RuntimeTypes.fs b/backend/src/LibDB/RuntimeTypes.fs index ff33f56a2d..2a24d36f92 100644 --- a/backend/src/LibDB/RuntimeTypes.fs +++ b/backend/src/LibDB/RuntimeTypes.fs @@ -32,12 +32,14 @@ module Value = let get (hash : Hash) : Ply> = uply { let (Hash hashStr) = hash + // `rt_dval` is NULL until the value is evaluated (Seed.evaluateAllValues). Filter NULLs out so an + // unevaluated value reads as "not materialized" (None) instead of crashing `read.bytes` on a NULL. return! Sql.query """ SELECT rt_dval FROM package_values - WHERE hash = @hash + WHERE hash = @hash AND rt_dval IS NOT NULL """ |> Sql.parameters [ "hash", Sql.string hashStr ] |> Sql.executeRowOptionAsync (fun read -> read.bytes "rt_dval") diff --git a/backend/src/LibDB/Seed.fs b/backend/src/LibDB/Seed.fs index ce3b2473c9..c292501dd6 100644 --- a/backend/src/LibDB/Seed.fs +++ b/backend/src/LibDB/Seed.fs @@ -285,6 +285,9 @@ let opKindName (op : PT.PackageOp) : string = | PT.PackageOp.AddValue _ -> "AddValue" | PT.PackageOp.AddFn _ -> "AddFn" | PT.PackageOp.SetName _ -> "SetName" + // An override folds exactly like SetName (same `locations` projection, same rename detection), so for + // the dirty-tracking refold it IS a SetName — this keeps it in SetName's dirtied-set + Add* handling. + | PT.PackageOp.OverrideName _ -> "SetName" | PT.PackageOp.Deprecate _ -> "Deprecate" | PT.PackageOp.Undeprecate _ -> "Undeprecate" | PT.PackageOp.PropagateUpdate _ -> "PropagateUpdate" diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index bfb3dc59f8..ccf8cc8d16 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -294,24 +294,34 @@ let private kindOfHash (hash : string) : Task> = return rows |> List.tryHead |> Option.map PT.ItemKind.fromString } -/// Re-stamp a keep-local override op's `origin_ts` to now and re-fold it. Shared by the automatic -/// keep-local policy (`routeDivergences`) and the human 'mine' override (`resolveConflict`): both -/// re-bind a location to OUR hash by re-stamping the op that first bound it (a `SetName` content- -/// identical to one already in the log, so it's addressed by `computeOpHash`) and re-folding it -/// directly. The fresh stamp makes it win timestamp-LWW locally AND rides sync so peers re-adopt it. -/// `applyOps` (not `insertAndApplyOps`) because the op is already in the log — `insertAndApplyOps` -/// only folds NEWLY-inserted ops, so the binding would never flip back. -let private restampAndRefold +/// Re-bind a location to OUR hash as a deliberate override ("keep mine"). Shared by the automatic +/// keep-local policy (`routeDivergences`) and the human 'mine' override (`resolveConflict`). +/// +/// It emits a DISTINCT `OverrideName` op carrying a fresh resolver stamp — so its content hash (and thus +/// its op id and commit-rowid) differs from the original `SetName`. That distinctness is the whole point: +/// sync is incremental by commit-rowid, so re-stamping the original op IN PLACE (same rowid) would never +/// reach a peer that already pulled it. A fresh `OverrideName` op rides the next incremental pull, and its +/// resolver stamp (the newest `origin_ts`) wins timestamp-LWW — so peers actually adopt our choice. +/// +/// The op is inserted as WIP (uncommitted) and folded immediately (re-binds locally now); the user's next +/// `commit` ships it to peers. +let private overrideBinding (branchId : PT.BranchId) - (mineOp : PT.PackageOp) + (loc : PT.PackageLocation) + (target : PT.Reference) : Task = task { - do! - Sql.query - "UPDATE package_ops SET origin_ts = strftime('%Y-%m-%dT%H:%M:%fZ','now') WHERE id = @id" - |> Sql.parameters [ "id", Sql.uuid (Inserts.computeOpHash mineOp) ] - |> Sql.executeStatementAsync - do! PackageOpPlayback.applyOps branchId None [ mineOp ] + let resolvedAt = System.DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffZ") + let overrideOp = PT.PackageOp.OverrideName(loc, target, resolvedAt) + // stamp origin_ts = the resolver time (newest) so playback's timestamp-LWW re-activates this binding + let opId = Inserts.computeOpHash overrideOp + let! _inserted = + Inserts.insertAndApplyOpsWithOrigin + branchId + None + [ overrideOp ] + (Map.ofList [ (opId, resolvedAt) ]) + return () } /// Route each detected divergence through the runtime conflict-dispatch seam @@ -342,20 +352,15 @@ let routeDivergences let! resolution = dispatch conflict callCtx |> Ply.toTask match resolution with | RT.RSubstitute(RT.DString keepHash) when keepHash = existingHash -> - // keep local: re-bind the location to our existing hash via `restampAndRefold` (the same move - // a human 'mine' override makes in `resolveConflict`). The re-fold re-binds locally regardless - // of whether the re-stamp matched a row, so the worst case (the original op somehow absent from - // the log) is a non-propagating override, never a wrong local binding. + // keep local: re-bind the location to our existing hash via `overrideBinding` (the same move a + // human 'mine' override makes in `resolveConflict`) — a fresh `OverrideName` op that rides sync + // so peers re-adopt our hash too. match! kindOfHash existingHash with | Some kind -> match parseLocation location with | Some loc -> - let mineOp = - PT.PackageOp.SetName( - loc, - PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) - ) - do! restampAndRefold branchId mineOp + let target = PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) + do! overrideBinding branchId loc target do! Conflicts.markOverriddenByLocation remote location reconciled <- reconciled + 1 | None -> () @@ -613,9 +618,9 @@ let resolveConflict (conflictId : string) (keepMine : bool) : Task = return true else // "mine" — re-bind the location to our hash. Parse the FQ "owner[.modules].name", read the - // binding's kind + branch from `locations`, then re-stamp + re-fold a SetName to our hash. A - // human override is the LATEST decision, so `restampAndRefold` makes it win timestamp-LWW - // (last-resolver-wins) and ride sync so peers re-adopt our hash too. + // binding's kind + branch from `locations`, then emit an `OverrideName` op for our hash. A human + // override is the LATEST decision, so `overrideBinding` makes it win timestamp-LWW + // (last-resolver-wins) and — as a distinct op with a fresh rowid — rides sync so peers re-adopt it. match parseLocation c.location with | Some loc -> let modulesStr = String.concat "." loc.modules @@ -635,8 +640,7 @@ let resolveConflict (conflictId : string) (keepMine : bool) : Task = | (itemType, branchId) :: _ -> let kind = PT.ItemKind.fromString itemType let target = PT.Reference.fromHashAndKind (PT.Hash c.localHash, kind) - let mineOp = PT.PackageOp.SetName(loc, target) - do! restampAndRefold branchId mineOp + do! overrideBinding branchId loc target do! Conflicts.markOverridden c.id return true | [] -> return false // the location no longer exists locally diff --git a/backend/src/LibExecution/Interpreter.fs b/backend/src/LibExecution/Interpreter.fs index db9b7acbf8..9d09b96215 100644 --- a/backend/src/LibExecution/Interpreter.fs +++ b/backend/src/LibExecution/Interpreter.fs @@ -320,7 +320,7 @@ let rec private executeInner (exeState : ExecutionState) (vm : VMState) : Ply "AddFn", [ PackageFn.toDT f ] | PT.PackageOp.SetName(loc, target) -> "SetName", [ PackageLocation.toDT loc; Reference.toDT target ] + // An override binds a name like SetName; Dark code never authors one (it's created in F# by the + // conflict resolver), so it surfaces to the Dark side as a plain SetName — the `resolvedAt` stamp + // exists only to distinguish the op for sync and isn't part of the binding's meaning. + | PT.PackageOp.OverrideName(loc, target, _resolvedAt) -> + "SetName", [ PackageLocation.toDT loc; Reference.toDT target ] | PT.PackageOp.Deprecate(target, kind, message) -> "Deprecate", [ Reference.toDT target; DeprecationKind.toDT kind; DString message ] diff --git a/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs b/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs index 44176c5e97..741205952c 100644 --- a/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs +++ b/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs @@ -83,6 +83,11 @@ let write (w : BinaryWriter) (op : PackageOp) : unit = w.Write(3uy) PackageLocation.write w location Reference.write w target + | PackageOp.OverrideName(location, target, resolvedAt) -> + w.Write(8uy) + PackageLocation.write w location + Reference.write w target + String.write w resolvedAt | PackageOp.Deprecate(target, kind, message) -> w.Write(4uy) Reference.write w target @@ -130,6 +135,11 @@ let read (r : BinaryReader) : PackageOp = let location = PackageLocation.read r let target = Reference.read r PackageOp.SetName(location, target) + | 8uy -> + let location = PackageLocation.read r + let target = Reference.read r + let resolvedAt = String.read r + PackageOp.OverrideName(location, target, resolvedAt) | 4uy -> let target = Reference.read r let kind = DeprecationKind.read r diff --git a/backend/src/LocalExec/Migrations.fs b/backend/src/LocalExec/Migrations.fs index d471ca2b4b..2b2d577e8e 100644 --- a/backend/src/LocalExec/Migrations.fs +++ b/backend/src/LocalExec/Migrations.fs @@ -111,41 +111,14 @@ let private opCount () : int = 0 +// schema_state_v0 is defined in schema.sql and created when it's applied (below), so this just writes +// the row — the table always exists by the time writeHash runs. let private writeHash (hash : string) : unit = - Sql.query - "CREATE TABLE IF NOT EXISTS schema_state_v0 - (id INTEGER PRIMARY KEY, hash TEXT NOT NULL)" - |> Sql.executeStatementSync Sql.query "INSERT OR REPLACE INTO schema_state_v0 (id, hash) VALUES (0, @hash)" |> Sql.parameters [ "hash", Sql.string hash ] |> Sql.executeStatementSync -/// A legacy DB was migrated by name through `system_migrations_v0`. If one -/// already has the full set of old migration names applied, treat it as -/// fully migrated under the schema-hash flow — write the current schema -/// hash so subsequent runs see "up to date" and don't kill-and-fill. -let private adoptLegacyDB (currentHash : string) : bool = - if not (tableExists "system_migrations_v0") then - false - else - let count = - match - Sql.query "SELECT COUNT(*) AS c FROM system_migrations_v0" - |> Sql.execute (fun read -> read.int "c") - with - | Ok [ c ] -> c - | _ -> 0 - if count >= 13 then - print - $"Adopting legacy DB ({count} migrations on record). \ - Stamping schema hash; no data dropped." - writeHash currentHash - true - else - false - - let private runSchemaBootstrap () : unit = let sql = File.readfile Config.Migrations schemaFile let want = computeHash sql @@ -168,11 +141,10 @@ let private runSchemaBootstrap () : unit = markOpsUnapplied () writeHash want | None -> - if adoptLegacyDB want then - () - else - Sql.query sql |> Sql.executeStatementSync - writeHash want + // fresh (or pre-stamp) store: apply schema.sql in full, then stamp its hash. `CREATE TABLE IF NOT + // EXISTS` is idempotent, so this is safe even if some tables already exist. + Sql.query sql |> Sql.executeStatementSync + writeHash want // --------------------- diff --git a/backend/testfiles/execution/pre-s-and-s/sync-check.dark b/backend/testfiles/execution/pre-s-and-s/sync-check.dark index e57abec593..7d932dd3a1 100644 --- a/backend/testfiles/execution/pre-s-and-s/sync-check.dark +++ b/backend/testfiles/execution/pre-s-and-s/sync-check.dark @@ -1,10 +1,10 @@ // `dark sync check ` — the "am I caught up with this peer?" readiness/convergence glance. The // CLI GETs the peer's `/sync/health` (body: "sync-server ok; ops="), machine-reads the count with -// `parseHealthOps` (the AGENT-facing read), then `convergenceLine` formats the HUMAN-facing verdict. +// `parseHealthOps`, then `convergenceLine` formats the human-facing verdict. // Both halves are pure package fns (`Darklang.Sync.{parseHealthOps,convergenceLine}`) — this file // pins the realistic UX end to end so a peer's reachability + how-far-behind never regresses silently. -// ── parseHealthOps: the AGENT read of the health body ── +// ── parseHealthOps: the machine read of the health body ── // the exact body the server emits (Builtin.pmSyncHealth = "sync-server ok; ops=") Darklang.Sync.parseHealthOps "sync-server ok; ops=42" = 42L Darklang.Sync.parseHealthOps "sync-server ok; ops=0" = 0L @@ -18,7 +18,7 @@ Darklang.Sync.parseHealthOps "404 not found" = 0L // the CURRENT body carries the release stamp before ops — ops still parses (release is first by design) Darklang.Sync.parseHealthOps "sync-server ok; release=2; ops=42" = 42L -// ── parseHealthRelease: the AGENT read of the peer's RELEASE (the sync-gate coordinate) ── +// ── parseHealthRelease: the machine read of the peer's RELEASE (the sync-gate coordinate) ── Darklang.Sync.parseHealthRelease "sync-server ok; release=2; ops=42" = 2L Darklang.Sync.parseHealthRelease "sync-server ok; release=7; ops=0" = 7L // an OLD peer with no release stamp → 0 (reads as a mismatch against any real release — the intent) @@ -35,7 +35,7 @@ Darklang.Sync.releaseSkewLine "desktop" 3L 2L = "⚠ desktop is on Release 2, yo // peer ahead of us → upgrade this machine Darklang.Sync.releaseSkewLine "desktop" 2L 5L = "⚠ desktop is on Release 5, you're on 2 — upgrade this machine to sync" -// ── convergenceLine: the HUMAN verdict over (our cursor, peer op count) ── +// ── convergenceLine: the human verdict over (our cursor, peer op count) ── // caught up — our cursor reached the peer's latest op Darklang.Sync.convergenceLine "desktop" 9845L 9845L = "✓ desktop — caught up (9845 ops)" // AHEAD counts as caught up too (we have ops the peer hasn't pulled yet — we're not behind it) diff --git a/backend/tests/Tests/OpsProjections.Tests.fs b/backend/tests/Tests/OpsProjections.Tests.fs index 047b6e6c7a..3b4a5ff087 100644 --- a/backend/tests/Tests/OpsProjections.Tests.fs +++ b/backend/tests/Tests/OpsProjections.Tests.fs @@ -199,6 +199,24 @@ let tests = Expect.equal commitsAfter commitsBefore "commits preserved across a re-fold" } + // Durable-canon recreates the projection tables FROM schema.sql (drop + replay), then re-folds an + // INSERT that writes `description`. That column once lived ONLY in an incremental, so a fresh build's + // schema.sql-recreated table lacked it and the re-fold INSERT crashed at boot. + // The incremental is now deleted → schema.sql is the SOLE provider. This pins that contract so a + // schema.sql regression fails here, not on a user's first run. + testTask + "schema completeness: projection tables carry `description`" { + for table in [ "package_functions"; "package_types"; "package_values" ] do + let! present = + Sql.query + $"SELECT COUNT(*) as n FROM pragma_table_info('{table}') WHERE name = 'description'" + |> Sql.executeRowAsync (fun read -> read.int64 "n") + Expect.equal + present + 1L + $"{table} must have a `description` column in schema.sql (durable-canon re-fold INSERTs it)" + } + // Projection-currency counters — the `dark status` glance (`projectionStatus` → opsCount vs // folded-through). Equal when the cache is current; a gap when ops are appended/pulled but not yet // folded. Guards the surface that tells you a `branch rebuild` is owed. diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index b8b6864de2..d9b8663c82 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -382,34 +382,110 @@ let private multiDivergenceBatch = Expect.equal w2 (Some incoming2) "second location converged to its LWW winner" } -let private keepLocalPropagates = +// Regression: a keep-local override must APPEND a distinct, newer op (a fresh `OverrideName` with its +// own rowid), not re-stamp the existing op in place. Incremental sync is by commit-rowid, so a re-stamp +// (same rowid) never reaches a peer that already pulled the op — the binding stays diverged. A fresh op +// above the peer's cursor DOES ride the next pull, and its newest `origin_ts` wins timestamp-LWW. This +// asserts both the local effect (our hash wins) AND the propagation property (a new op, above the prior +// max rowid, carrying the newest stamp). +let private keepLocalAppendsPropagableOverride = testTask - "keep-local re-stamp makes our op the newest-by-creation (rides sync to peers)" { + "keep-local override appends a distinct, newer op so it can propagate" { let loc : PT.PackageLocation = { owner = "Scenario"; modules = [ "Prop" ]; name = uniqueName "p" } let local, incoming = hashChar 'a', hashChar 'b' let remote = uniqueName "rprop" let! divs = setupDivergentPull loc PT.ItemKind.Fn local -120.0 incoming -60.0 remote - let localRef = PT.Reference.fromHashAndKind (PT.Hash local, PT.ItemKind.Fn) - let localOpId = Inserts.computeOpHash (PT.PackageOp.SetName(loc, localRef)) + let maxRowid () : Task = + Sql.query "SELECT COALESCE(MAX(rowid), 0) AS m FROM package_ops" + |> Sql.executeRowAsync (fun read -> read.int64 "m") + let stampOf sql ps : Task = + Sql.query sql + |> Sql.parameters ps + |> Sql.executeRowAsync (fun read -> read.string "origin_ts") let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incoming, PT.ItemKind.Fn) let incomingOpId = Inserts.computeOpHash (PT.PackageOp.SetName(loc, incomingRef)) - let originTs (id : System.Guid) : Task = - Sql.query "SELECT origin_ts FROM package_ops WHERE id = @id LIMIT 1" - |> Sql.parameters [ "id", Sql.uuid id ] - |> Sql.executeRowAsync (fun read -> read.string "origin_ts") + let! cursorBefore = maxRowid () + let! incomingStamp = + stampOf + "SELECT origin_ts FROM package_ops WHERE id = @id LIMIT 1" + [ "id", Sql.uuid incomingOpId ] + // keep-local override (the same path the human 'mine' override uses) let! _ = Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId divs - // after keep-local, OUR op's origin_ts is re-stamped to now — strictly newer than the incoming's. - // A peer re-pulling reads our op's adjacent (newer) origin_ts and, by the same timestamp-LWW, - // re-adopts our hash. Convergence, not divergence forever. - let! localStamp = originTs localOpId - let! incomingStamp = originTs incomingOpId + // 1. our hash is the live binding + let! winner = liveHash loc + Expect.equal winner (Some local) "keep-local: our hash is the live binding" + // 2. a NEW op was appended above any synced peer's cursor (an in-place re-stamp would add no row) + let! cursorAfter = maxRowid () + Expect.isGreaterThan + cursorAfter + cursorBefore + "the override appended a new op above the peer's cursor — so it rides the next incremental pull" + // 3. that newest op carries the newest origin_ts → a re-pulling peer adopts our hash by timestamp-LWW + let! overrideStamp = + stampOf "SELECT origin_ts FROM package_ops ORDER BY rowid DESC LIMIT 1" [] Expect.isGreaterThan - localStamp + overrideStamp incomingStamp - "our op is now the newest-by-creation (the re-stamp rides sync so peers re-adopt local)" + "the override op's origin_ts is the newest — a re-pulling peer re-adopts our hash" + } + +// An override only propagates if it survives the wire: a peer DESERIALIZES the op_blob (read tag 8) and +// folds it. The keep-local path only ever serializes + folds the in-memory op, so exercise the read path +// directly — `OverrideName` (with its `resolvedAt`) must round-trip byte-for-byte through the op codec. +let private overrideOpRoundTrips = + test "OverrideName round-trips through the op serializer (rides the wire — tag 8)" { + let loc : PT.PackageLocation = { owner = "RT"; modules = [ "M" ]; name = "x" } + let target = PT.Reference.fromHashAndKind (PT.Hash(hashChar 'a'), PT.ItemKind.Fn) + let op = PT.PackageOp.OverrideName(loc, target, "2026-06-11T12:34:56.789Z") + let id = Inserts.computeOpHash op + let blob = LibSerialization.Binary.Serialization.PT.PackageOp.serialize id op + let decoded = LibSerialization.Binary.Serialization.PT.PackageOp.deserialize id blob + Expect.equal decoded op "OverrideName survives binary serialize → deserialize unchanged" + } + +// End-to-end, the RECEIVER half: a peer currently bound to the incoming hash (it already pulled the +// race) receives the OTHER machine's committed override op over the normal receive path and must ADOPT +// our hash. This is the actual cross-machine propagation — the headline override-propagation claim — exercised through +// `applyRemoteOps` (the same path an HTTP/file pull uses). An `OverrideName` is NOT re-flagged as a new +// divergence (it isn't a SetName), so it just folds, and its newer stamp wins timestamp-LWW. +let private overridePropagatesToPeer = + testTask + "a peer receiving a committed override op adopts our hash (end-to-end, receiver side)" { + let loc : PT.PackageLocation = + { owner = "Recv"; modules = [ "O" ]; name = uniqueName "r" } + let ours, theirs = hashChar 'a', hashChar 'b' + let remote = uniqueName "rrecv" + // the peer is currently bound to the incoming hash `theirs` (older), having pulled the race already + let theirsOp = + PT.PackageOp.SetName( + loc, + PT.Reference.fromHashAndKind (PT.Hash theirs, PT.ItemKind.Fn) + ) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ theirsOp ] + (Map.ofList [ (Inserts.computeOpHash theirsOp, relTs -60.0) ]) + let! before = liveHash loc + Expect.equal before (Some theirs) "precondition: the peer holds the incoming hash" + // now it pulls the other machine's override op (re-bind to `ours`, newest stamp) over the wire path + let overrideOp = + PT.PackageOp.OverrideName( + loc, + PT.Reference.fromHashAndKind (PT.Hash ours, PT.ItemKind.Fn), + relTs 0.0 + ) + let! _ = + Sync.applyRemoteOps remote PT.mainBranchId None [ (1L, relTs 0.0, overrideOp) ] + let! after = liveHash loc + Expect.equal + after + (Some ours) + "the peer adopted our override — the resolution propagated cross-machine" } let private orderIndependent = @@ -474,7 +550,7 @@ let private resolutionSticks = { owner = "Scenario"; modules = [ "Stick" ]; name = uniqueName "st" } let local, incoming = hashChar 'a', hashChar 'b' let remote = uniqueName "rstick" - // incoming newer → it won the pull; keep-local overrides + re-stamps our hash to now + // incoming newer → it won the pull; keep-local overrides, re-binding our hash with a now-stamp let! divs = setupDivergentPull loc PT.ItemKind.Fn local -120.0 incoming -60.0 remote let! _ = @@ -600,7 +676,9 @@ let tests = @ [ emptyConverged sameMsTie multiDivergenceBatch - keepLocalPropagates + keepLocalAppendsPropagableOverride + overrideOpRoundTrips + overridePropagatesToPeer orderIndependent idempotentRePull resolutionSticks diff --git a/packages/darklang/cli/config.dark b/packages/darklang/cli/config.dark index 7794b4792b..0fec97a280 100644 --- a/packages/darklang/cli/config.dark +++ b/packages/darklang/cli/config.dark @@ -1,9 +1,23 @@ module Darklang.Cli.Config -// Config file path - uses rundir/ in portable mode, ~/.darklang/ when installed +// Config file path - lives beside data.db in the portable .darklang/ +// dir, or in ~/.darklang/ when installed. let configFilePath () : String = match Installation.System.getInstallationMode () with - | Portable -> "rundir/cli-config.json" + | Portable -> + // Colocate the config with the portable data.db, which lives in a + // `.darklang/` next to the executable (the runtime resolves it from + // AppContext.BaseDirectory — the binary's dir — not the cwd). Deriving + // from the exe path keeps config + data together even when the binary + // is on PATH and run from an arbitrary cwd. The old bare relative + // `rundir/cli-config.json` was written nowhere (no such dir next to an + // AOT install), so the failed write was swallowed and login never stuck. + let exeDir = + (Builtin.getCurrentExecutablePath ()) + |> Stdlib.String.split "/" + |> Stdlib.List.dropLast + |> (fun parts -> Stdlib.String.join parts "/") + $"{exeDir}/.darklang/cli-config.json" | Installed -> let host = (Stdlib.Cli.Host.getRuntimeHost ()) |> Builtin.unwrap let darklangHomeDir = Installation.Config.getDarklangHomeDir host @@ -30,8 +44,15 @@ let readConfig () : Dict = let writeConfig (config: Dict) : Unit = let json = Stdlib.Json.serialize> config let jsonBytes = Stdlib.String.toBlob json - let _ = Builtin.fileWrite jsonBytes (configFilePath ()) - () + + match Builtin.fileWrite jsonBytes (configFilePath ()) with + | Ok _ -> () + | Error e -> + // Surface a failed config write LOUDLY. A silently-discarded failure here (the old `let _ = …`) + // hid a "login never persists" bug — config was written to a path that didn't exist and the error + // vanished. Better a visible warning than a silently-dropped write. + Stdlib.printLine + (Colors.error $"⚠ could not save config to {configFilePath ()}: {e}") // Helper to get a single config value diff --git a/packages/darklang/cli/conflicts.dark b/packages/darklang/cli/conflicts.dark index 37ad8491c1..7514f2da37 100644 --- a/packages/darklang/cli/conflicts.dark +++ b/packages/darklang/cli/conflicts.dark @@ -65,7 +65,7 @@ let execute (state: AppState) (args: List) : AppState = if Builtin.pmConflictResolve id keepMine then let what = if keepMine then - "kept YOUR version (re-bound via a WIP SetName — commit to share it)" + "kept YOUR version — re-bound here (a WIP override; commit to share it with peers)" else "kept the incoming version" @@ -104,7 +104,7 @@ let help (state: AppState) : AppState = " list all Show every recorded conflict, including acked/overridden history" " ack Acknowledge — 'the auto-resolution was right' (the common case)" " ack all Acknowledge ALL pending conflicts at once (bulk 'all were right')" - " resolve mine Override — re-bind to YOUR version (a WIP SetName; commit to share)" + " resolve mine Override — re-bind to YOUR version (a WIP override; commit to share it)" " resolve theirs Override — keep the incoming version (what already won)" "" " is the short id from `conflicts list` (a unique prefix is fine)." ] diff --git a/packages/darklang/cli/core.dark b/packages/darklang/cli/core.dark index edb5dd28a7..c6e44352c4 100644 --- a/packages/darklang/cli/core.dark +++ b/packages/darklang/cli/core.dark @@ -41,7 +41,11 @@ type AppState = cachedStatusBar: String cachedStatusBarBranch: Uuid /// When true, write telemetry to rundir/logs/telemetry.jsonl - telemetryEnabled: Bool } + telemetryEnabled: Bool + + /// Process exit code for a non-interactive (`dark `) run. 0 = success; a command sets this + /// non-zero on failure so scripts/daemons wrapping the CLI can detect it (e.g. a failed `sync pull`). + exitCode: Int64 } let locationStr (state: AppState) : String = Packages.formatLocation state.packageData.currentLocation @@ -112,7 +116,8 @@ let initState () : AppState = commandOptionsCache = commandOptions () cachedStatusBar = "" cachedStatusBarBranch = Builtin.unwrap (Stdlib.Uuid.parse_v0 "00000000-0000-0000-0000-000000000000") - telemetryEnabled = (Builtin.environmentGet "DARK_TELEMETRY") == Stdlib.Option.Option.Some "1" } + telemetryEnabled = (Builtin.environmentGet "DARK_TELEMETRY") == Stdlib.Option.Option.Some "1" + exitCode = 0L } type Msg = | ProcessInput of String @@ -905,4 +910,4 @@ let executeCliCommand (args: List) : Int64 = | SubApp _ -> StatusBar.init () runInteractiveLoop resultState - | _ -> 0L + | _ -> resultState.exitCode diff --git a/packages/darklang/cli/sync.dark b/packages/darklang/cli/sync.dark index 1d461a4bc2..5b18d18569 100644 --- a/packages/darklang/cli/sync.dark +++ b/packages/darklang/cli/sync.dark @@ -40,13 +40,13 @@ let execute (state: AppState) (args: List) : AppState = Colors.dimText $" is `dark serve Darklang.Sync.Server.router --port ` running there? check host/port — try {Darklang.Sync.healthUrl target}" ) - state + { state with exitCode = 1L } else if Stdlib.Bool.not (Stdlib.Cli.File.exists target) then // a missing peer file would throw a raw SQLite error from pullFromFile — refuse cleanly Stdlib.printLine (Colors.error $"sync pull: no such peer db: {target}") Stdlib.printLine (Colors.dimText " give a local data.db path, or an http(s):// sync-server URL") - state + { state with exitCode = 1L } else // pull from a peer's local data.db file let (newCursor, divCount) = Darklang.Sync.pullFromFile target @@ -141,10 +141,10 @@ let execute (state: AppState) (args: List) : AppState = Stdlib.printLine (Colors.error $"sync check: could not reach {target} — is `dark serve` running there?") - state + { state with exitCode = 1L } else Stdlib.printLine (Colors.error "sync check needs an http(s) peer URL") - state + { state with exitCode = 1L } | [ "daemon" ] | [ "daemon"; "status" ] -> // the background auto-sync daemon's state (running / stopped / stale), via its pidfile Stdlib.printLine (Darklang.Sync.Daemon.statusLine "sync") diff --git a/packages/darklang/sync/autosync.dark b/packages/darklang/sync/autosync.dark index 5f51b47e2d..7685fba86f 100644 --- a/packages/darklang/sync/autosync.dark +++ b/packages/darklang/sync/autosync.dark @@ -77,13 +77,29 @@ let pullPeerReleaseAware (peer: String) : (Int64 * Int64 * String) = (c, d, "") +// Single call site for the `pmSyncRecordDaemonEvent` builtin — both the single-peer (`pollOnce`) and +// tailnet (`runLoopAll`) loops record one cycle's telemetry through here. +let recordEvent + (peersPolled: Int64) + (changed: Bool) + (conflicts: Int64) + (skews: Int64) + : Unit = + Builtin.pmSyncRecordDaemonEvent peersPolled changed conflicts skews + + // One poll step: pull the peer (HTTP or file, Release-aware — a skewed HTTP peer is paused, not // pulled), then decide the next interval — responsive if it brought new ops, backing off if idle. // Returns `(newCursor, nextIntervalMs, skewLine)`; `skewLine` is "" unless the peer was paused on a // Release mismatch. A daemon calls this on a timer, sleeping `nextIntervalMs` between calls. let pollOnce (peer: String) (currentIntervalMs: Int64) : (Int64 * Int64 * String) = let before = Darklang.Sync.cursorFor peer - let (after, _divCount, skew) = pullPeerReleaseAware peer + let (after, divCount, skew) = pullPeerReleaseAware peer + + // record this single-peer cycle as structured telemetry, so `sync events` is populated for a + // single-peer daemon (`daemon start `) too — not only the tailnet-wide `runLoopAll`. + let _ = recordEvent 1L (after > before) divCount (if skew == "" then 0L else 1L) + let (cursor, nextMs) = decideNext before after currentIntervalMs (cursor, nextMs, skew) @@ -150,7 +166,7 @@ let runLoopAll (intervalMs: Int64) (times: Int64) : Int64 = // record this cycle as structured telemetry (read back by `sync events` / a dashboard view) let _ = - Builtin.pmSyncRecordDaemonEvent + recordEvent (Stdlib.List.length peers) sawChanges conflicts From 3b5cb15977eb14f050d1b5bc32be985a2860a852 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 10:22:19 -0400 Subject: [PATCH 11/25] conflicts: separate sync conflicts from runtime conflicts (model + policy seam) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 1 of the conflict/resolution redesign. De-conflate the two domains that were sharing one RT dispatch: - ProgramTypes: add SyncConflict (one case, Divergence of location * candidates), ResolvedBy (Auto of policy | Human), and DivergenceResolution { chosen; by } — beside PackageOp, because a sync conflict is a disagreement about the op log and its resolution is itself an op. Per-kind resolution shape, not a global enum. - RuntimeTypes: Conflict is now just FnNotFound (runtime-only); Resolution is Substitute | FailLoudly (dropped the C*/R* prefixes, dropped the sync-divergence and unused-runtime-error cases). The execution seam stays for missing-fn dispatch. - Sync: new SyncPolicyChoice (AcceptLww | OverrideTo of Reference), SyncPolicy, and defaultSyncPolicy = AcceptLww. routeDivergences builds a first-class PT.SyncConflict.Divergence and consults the sync policy instead of the RT dispatch. Behavior preserved: default keeps LWW standing; keep-local mints an OverrideName. - PM/Sync builtins pass defaultSyncPolicy; tests migrate dispatches -> sync policies. Full backend suite green (9,787 passed). Behavior unchanged. --- .../Builtins/Builtins.Matter/Libs/PM/Sync.fs | 12 +-- backend/src/LibDB/Sync.fs | 73 ++++++++----- backend/src/LibExecution/Execution.fs | 17 +-- backend/src/LibExecution/Interpreter.fs | 23 ++-- backend/src/LibExecution/ProgramTypes.fs | 27 +++++ backend/src/LibExecution/RuntimeTypes.fs | 25 ++--- backend/tests/Tests/ConflictDispatch.Tests.fs | 69 +++--------- backend/tests/Tests/SyncScenarios.Tests.fs | 102 ++++++++---------- 8 files changed, 160 insertions(+), 188 deletions(-) diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs index f9706bdf06..20e2a21110 100644 --- a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs @@ -28,14 +28,14 @@ let fns () : List = | exeState, vm, _, [ DString sourcePath ] -> uply { let! (newCursor, divergences) = LibDB.Sync.pullFromFile sourcePath - // Route each surfaced divergence through the runtime conflict-dispatch seam. The - // default policy keeps today's behavior (surface-as-data, LWW stands); a sync policy - // can keep-local. branchId = the puller's current branch. + // Route each surfaced divergence through the sync policy. The default keeps today's + // behavior (surface-as-data, LWW stands); a policy can keep-local. branchId = the + // puller's current branch. let callCtx : CallContext = { branchId = exeState.branchId; threadID = vm.threadID } let! _reconciled = LibDB.Sync.routeDivergences - exeState.conflictDispatch + LibDB.Sync.defaultSyncPolicy callCtx sourcePath exeState.branchId @@ -128,12 +128,12 @@ let fns () : List = let bytes = System.Convert.FromBase64String wireB64 let! (newCursor, divergences) = LibDB.Sync.applyWireBatch remote PT.mainBranchId None bytes - // Same dispatch routing as the file pull (the wire batch applies on main). + // Same policy routing as the file pull (the wire batch applies on main). let callCtx : CallContext = { branchId = exeState.branchId; threadID = vm.threadID } let! _reconciled = LibDB.Sync.routeDivergences - exeState.conflictDispatch + LibDB.Sync.defaultSyncPolicy callCtx remote PT.mainBranchId diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index ccf8cc8d16..f55c51261a 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -119,7 +119,7 @@ let private parseLocation (location : string) : Option = /// `SetName`, if the location is already bound LOCALLY to a *different*, non-deprecated hash, /// two peers gave the same name different content. Returns `(location, existingHash, /// incomingHash)` per divergence — surfaced as **data** so the receiver never blocks; a higher -/// layer turns these into `Conflict.CSyncDivergence` for the resolution policy. +/// layer (`routeDivergences`) turns these into a `PT.SyncConflict.Divergence` for the sync policy. /// `divergentBindings` is the core (it returns the structured location + hashes); `detectDivergences` /// just renders the location to its FQ string. let private divergentBindings @@ -324,22 +324,38 @@ let private overrideBinding return () } -/// Route each detected divergence through the runtime conflict-dispatch seam -/// (`exeState.conflictDispatch`). This is the "higher layer" the transport defers to: the receiver -/// surfaces each `name → two hashes` divergence as data (never blocks); HERE it becomes a first-class -/// `Conflict.CSyncDivergence` the runtime policy resolves — -/// - default policy (`FailLoudly`) → no reconciling op: the divergence stays surfaced and the -/// timestamp-LWW outcome the fold already applied stands. Behaviorally unchanged (the timestamp-LWW outcome already applied stands). -/// - a sync policy may return `RSubstitute (DString hash)`: -/// · hash = the LOCAL (existing) hash → KEEP LOCAL: emit + apply a reconciling `SetName` -/// re-binding the location to our hash (a fresh op that also propagates the decision to -/// peers, like a human override), and mark the recorded conflict overridden. -/// · hash = the incoming hash / anything else → no-op: the incoming bind already applied. +/// A sync policy's verdict on a `SyncConflict`: accept the convergent last-writer-wins outcome the +/// fold already applied, or override the location to a specific reference. `OverrideTo` the LOCAL +/// reference is the "keep mine" move — it mints a propagating `OverrideName` op; `OverrideTo` the +/// incoming (or anything already applied) is a no-op. +type SyncPolicyChoice = + | AcceptLww + | OverrideTo of PT.Reference + +/// How sync conflicts are decided — the sync-side analogue of the runtime `ConflictDispatch`. Pure +/// (the shipped LWW policy needs no IO); `CallContext` is passed for parity and future policies that +/// branch on it (e.g. per-branch rules). +type SyncPolicy = PT.SyncConflict -> RT.CallContext -> SyncPolicyChoice + +/// The shipped default: accept the last-writer-wins outcome the fold already applied — surface as +/// data, never block, pick no override. Swappable in tests / future config for keep-local or +/// ask-human policies. +let defaultSyncPolicy : SyncPolicy = fun _conflict _ctx -> AcceptLww + +/// Route each detected divergence through the **sync policy**. This is the "higher layer" the +/// transport defers to: the receiver surfaces each `name → two hashes` divergence as data (never +/// blocks); HERE it becomes a first-class `PT.SyncConflict.Divergence` the policy resolves — +/// - `AcceptLww` (the default) → no reconciling op: the divergence stays surfaced and the +/// timestamp-LWW outcome the fold already applied stands. Behaviorally unchanged. +/// - `OverrideTo localRef` → KEEP LOCAL: emit + apply a reconciling `OverrideName` re-binding the +/// location to our hash (a fresh op that also propagates the decision to peers, like a human +/// override), and mark the recorded conflict overridden. +/// - `OverrideTo` the incoming ref / anything else → no-op: that bind already applied. /// `branchId` is the branch the reconcile op is written to (the receiver's current branch — sync /// divergences are name bindings, applied on the branch the puller is on). Returns the number of /// divergences the policy actively reconciled (0 under the default). let routeDivergences - (dispatch : RT.ConflictDispatch) + (policy : SyncPolicy) (callCtx : RT.CallContext) (remote : string) (branchId : PT.BranchId) @@ -348,24 +364,27 @@ let routeDivergences task { let mutable reconciled = 0 for (location, existingHash, incomingHash) in divergences do - let conflict = RT.CSyncDivergence(location, existingHash, incomingHash) - let! resolution = dispatch conflict callCtx |> Ply.toTask - match resolution with - | RT.RSubstitute(RT.DString keepHash) when keepHash = existingHash -> - // keep local: re-bind the location to our existing hash via `overrideBinding` (the same move a - // human 'mine' override makes in `resolveConflict`) — a fresh `OverrideName` op that rides sync - // so peers re-adopt our hash too. - match! kindOfHash existingHash with - | Some kind -> - match parseLocation location with - | Some loc -> - let target = PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) + // Rebuild the contending references so the conflict is first-class data. Both contend for the + // same location, hence the same kind; `kindOfHash` reads it from the binding we're restoring. + match! kindOfHash existingHash with + | Some kind -> + match parseLocation location with + | Some loc -> + let localRef = PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) + let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incomingHash, kind) + let conflict = PT.SyncConflict.Divergence(loc, [ localRef; incomingRef ]) + match policy conflict callCtx with + | OverrideTo target when target = localRef -> + // keep local: re-bind the location to our hash via `overrideBinding` (the same move a + // human 'mine' override makes in `resolveConflict`) — a fresh `OverrideName` op that rides + // sync so peers re-adopt our hash too — and mark the recorded conflict overridden. do! overrideBinding branchId loc target do! Conflicts.markOverriddenByLocation remote location reconciled <- reconciled + 1 - | None -> () + | AcceptLww + | OverrideTo _ -> () // accept LWW, or override-to-incoming (already applied) → no new op | None -> () - | _ -> () // RFailLoudly / RSubstitute(incoming|other) → surfaced-as-data, LWW stands + | None -> () return reconciled } diff --git a/backend/src/LibExecution/Execution.fs b/backend/src/LibExecution/Execution.fs index 3809581e39..c945f8f9aa 100644 --- a/backend/src/LibExecution/Execution.fs +++ b/backend/src/LibExecution/Execution.fs @@ -38,23 +38,14 @@ let createState reportException = reportException notify = notify - // Default: FailLoudly for every conflict — unchanged from the behavior before this seam existed. + // Default: FailLoudly for every runtime conflict — unchanged from the behavior before this seam + // existed. (Sync conflicts no longer ride this seam; they're decided by a `Sync.SyncPolicy`.) conflictDispatch = fun conflict _ctx -> uply { match conflict with - | RT.CRuntimeError rte -> return RT.RFailLoudly rte - | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) - | RT.CSyncDivergence(location, existing, incoming) -> - // strict default: fail loudly. A sync policy installs surface-as-data / last-writer - // so the receiver never blocks — but the default doesn't pick a winner. - return - RT.RFailLoudly( - RTE.UncaughtException( - $"sync divergence at {location}: {existing} vs {incoming}", - [] - ) - ) + | RT.Conflict.FnNotFound name -> + return RT.Resolution.FailLoudly(RTE.FnNotFound name) } lambdaInstrCache = System.Collections.Concurrent.ConcurrentDictionary() diff --git a/backend/src/LibExecution/Interpreter.fs b/backend/src/LibExecution/Interpreter.fs index 9d09b96215..def53fd748 100644 --- a/backend/src/LibExecution/Interpreter.fs +++ b/backend/src/LibExecution/Interpreter.fs @@ -315,27 +315,28 @@ let rec private executeInner (exeState : ExecutionState) (vm : VMState) : Ply - // Route a missing package fn through the conflict-dispatch seam — the runtime's - // shared "I can't proceed; here are the options" hook (the SAME hook sync's - // divergence routing uses, so the dispatch is real infrastructure, not a sync-only - // appendage). The default policy returns `RFailLoudly (FnNotFound …)` → raise, so - // this is byte-identical to before. The teed-up consumer is fetch-on-miss: a policy - // pulls the fn from a peer and resolves it, instead of failing. + // Route a missing package fn through the runtime conflict-dispatch seam — the + // runtime's shared "I can't proceed; here are the options" hook. The default policy + // returns `FailLoudly (FnNotFound …)` → raise, so this is byte-identical to before. + // The teed-up consumer is fetch-on-miss: a policy pulls the fn from a peer and + // resolves it, instead of failing. let cc : CallContext = { branchId = exeState.branchId; threadID = vm.threadID } match! - exeState.conflictDispatch (CFnNotFound(FQFnName.Package fn)) cc + exeState.conflictDispatch + (Conflict.FnNotFound(FQFnName.Package fn)) + cc with - | RFailLoudly rte -> return raiseRTE rte - | RSubstitute _ -> + | Resolution.FailLoudly rte -> return raiseRTE rte + | Resolution.Substitute _ -> // A policy substituted a value for the missing fn, but result-injection isn't wired // at this call site yet (it needs the call to return a Dval, not instructions). Raise // a DISTINCT internal error — not a bare FnNotFound — so if a policy ever returns - // RSubstitute here before fetch-on-miss lands, the unwired path is diagnosable rather + // Substitute here before fetch-on-miss lands, the unwired path is diagnosable rather // than masquerading as "the fn doesn't exist". return Exception.raiseInternal - "conflict-dispatch returned RSubstitute for a missing package fn, but value-substitution is not wired at this call site yet" + "conflict-dispatch returned Substitute for a missing package fn, but value-substitution is not wired at this call site yet" [ "fn", fn ] } diff --git a/backend/src/LibExecution/ProgramTypes.fs b/backend/src/LibExecution/ProgramTypes.fs index faa6e34c4d..92c26f6203 100644 --- a/backend/src/LibExecution/ProgramTypes.fs +++ b/backend/src/LibExecution/ProgramTypes.fs @@ -744,6 +744,33 @@ type PackageOp = +/// A SYNC conflict — surfaced when applying a peer's ops reveals state two instances disagree on. +/// It lives HERE, beside `PackageOp`, because every sync conflict is ultimately a disagreement about +/// the op log, and its resolution is itself an op. This is deliberately distinct from a *runtime* +/// conflict (`RuntimeTypes.Conflict`, e.g. a missing fn mid-execution): different lifetime, different +/// surface — only the dispatch PATTERN is shared. One case today; a new kind (a move collision, a +/// value-update race) joins here and resolves the same way — a per-kind resolution recorded as an op — +/// with no change to the sync engine. +and SyncConflict = + /// One location bound to two different contents across instances (the `name → two hashes` + /// divergence). `candidates` are the contending references — today exactly two, ordered + /// [local; incoming]. + | Divergence of location : PackageLocation * candidates : List + +/// WHO/what chose a conflict's resolution. `Auto` carries the policy name that picked it (e.g. +/// `"last-writer-wins"`) so a surfaced auto-resolution is self-describing; `Human` is an explicit +/// override. There is no "unresolved" case — an unresolved conflict has no resolution recorded yet +/// (that's a status, not a resolution). +and ResolvedBy = + | Auto of policy : string + | Human + +/// The resolution of a `Divergence`: which reference won the location, and by whom/what. This is the +/// per-kind resolution shape — as `SyncConflict` gains cases, each gets its own resolution record, +/// rather than one global `Resolution` enum straining to cover every conflict. +and DivergenceResolution = { chosen : Reference; by : ResolvedBy } + + /// The kind of package item (function, type, or value) and ItemKind = | Fn diff --git a/backend/src/LibExecution/RuntimeTypes.fs b/backend/src/LibExecution/RuntimeTypes.fs index 8a214ac189..b6610246f0 100644 --- a/backend/src/LibExecution/RuntimeTypes.fs +++ b/backend/src/LibExecution/RuntimeTypes.fs @@ -1854,27 +1854,18 @@ and Notifier = ExecutionState -> VMState -> string -> Metadata -> Ply // RuntimeError.Error/Dval (defined above) AND ExecutionState references ConflictDispatch, // so a later file can't satisfy both. (Same constraint a buses field would have.) and Conflict = - // Extensible by design — `Conflict` is the meta-model. As more ops are added, new cases join here - // and a policy decides each the same way (RSubstitute / FailLoudly). Anticipated future cases: a - // move collision (a MoveItem/MoveModule lands a name where one already lives), a value-update race - // (two concurrent updates to one mutable package value), a capability denial (a gate refused; a - // policy could prompt or escalate instead of failing). - | CRuntimeError of RuntimeError.Error - | CFnNotFound of FQFnName.FQFnName - // A name bound to two different hashes across synced instances — the `name → two hashes` - // divergence. Hashes are RT-level strings (PT's `Hash of string` can't be referenced here — PT - // depends on RT, not the reverse). Default dispatch surfaces it loudly; a sync policy can - // RSubstitute the converged winner (last-writer-wins). - | CSyncDivergence of - location : string * - existingHash : string * - incomingHash : string + // A *runtime* conflict: execution reached a point it can't proceed past on its own, and a policy + // must decide. One case today — a missing package fn — but the seam is shared infra: future cases + // (a capability denial, a value-update race seen at execution time) join here and a policy decides + // each the same way (`Substitute` / `FailLoudly`). SYNC conflicts are modeled separately, over in + // `ProgramTypes.SyncConflict`, next to the op log they're about — they don't ride this seam. + | FnNotFound of FQFnName.FQFnName and Resolution = // How a policy answers a Conflict: substitute a value to proceed, or fail loudly. (A future // "park" resolution — pause and await external input — would be added here.) - | RSubstitute of Dval - | RFailLoudly of RuntimeError.Error + | Substitute of Dval + | FailLoudly of RuntimeError.Error and CallContext = { branchId : BranchId; threadID : uuid } // assembled from ExecState + VMState diff --git a/backend/tests/Tests/ConflictDispatch.Tests.fs b/backend/tests/Tests/ConflictDispatch.Tests.fs index a7488ad20a..4009e993fa 100644 --- a/backend/tests/Tests/ConflictDispatch.Tests.fs +++ b/backend/tests/Tests/ConflictDispatch.Tests.fs @@ -1,7 +1,8 @@ -/// Tests for the conflict-dispatch seam (LibExecution.RuntimeTypes +/// Tests for the runtime conflict-dispatch seam (LibExecution.RuntimeTypes /// Conflict/Resolution/ConflictDispatch + ExecutionState.conflictDispatch). /// Verifies the default dispatch is FailLoudly (the unchanged prior behavior) and -/// that an installed policy overrides it — the whole point of the hook. +/// that an installed policy overrides it — the whole point of the hook. (SYNC conflicts +/// are decided separately by a `Sync.SyncPolicy`; see `SyncScenarios.Tests`.) module Tests.ConflictDispatch open Expecto @@ -36,23 +37,14 @@ let private aName = RT.FQFnName.fqBuiltin "doesNotExist" 0 let tests = testList "ConflictDispatch" - [ testTask "default dispatch FailLoudly-s a RuntimeError unchanged" { + [ testTask "default dispatch maps FnNotFound to a FailLoudly FnNotFound" { let state = freshState () - let err = RTE.FnNotFound aName let! res = - state.conflictDispatch (RT.CRuntimeError err) (ctx state) |> Ply.toTask + state.conflictDispatch (RT.Conflict.FnNotFound aName) (ctx state) + |> Ply.toTask match res with - | RT.RFailLoudly e -> - Expect.equal e err "FailLoudly carries the same RuntimeError" - | _ -> failtest "default dispatch should FailLoudly" - } - - testTask "default dispatch maps FnNotFound to a FailLoudly FnNotFound" { - let state = freshState () - let! res = - state.conflictDispatch (RT.CFnNotFound aName) (ctx state) |> Ply.toTask - match res with - | RT.RFailLoudly(RTE.FnNotFound n) -> Expect.equal n aName "name preserved" + | RT.Resolution.FailLoudly(RTE.FnNotFound n) -> + Expect.equal n aName "name preserved" | _ -> failtest "default dispatch should FailLoudly with FnNotFound" } @@ -61,51 +53,14 @@ let tests = let state = { baseState with conflictDispatch = - fun _ _ -> uply { return RT.RSubstitute(RT.DInt64 0L) } } + fun _ _ -> uply { return RT.Resolution.Substitute(RT.DInt64 0L) } } let! res = - state.conflictDispatch (RT.CFnNotFound aName) (ctx state) |> Ply.toTask + state.conflictDispatch (RT.Conflict.FnNotFound aName) (ctx state) + |> Ply.toTask match res with - | RT.RSubstitute(RT.DInt64 n) -> + | RT.Resolution.Substitute(RT.DInt64 n) -> Expect.equal n 0L "installed policy substituted" | _ -> failtest "installed policy should Substitute, proving the hook is swappable" - } - - // CSyncDivergence — two peers bound the same name to different content. It flows - // through the SAME conflict policy: strict default fails loudly; a sync policy resolves. - testTask - "a sync divergence fails loudly by default and is policy-resolvable (last-writer)" { - let state = freshState () - let conflict = RT.CSyncDivergence("Stachu.foo", "hashA", "hashB") - - // strict default: surface the divergence as a loud error naming the location + both hashes - let! def = state.conflictDispatch conflict (ctx state) |> Ply.toTask - match def with - | RT.RFailLoudly(RTE.UncaughtException(msg, _)) -> - Expect.stringContains - msg - "sync divergence" - "default surfaces the divergence loudly" - Expect.stringContains msg "Stachu.foo" "the diverged location is named" - | _ -> - failtest "default dispatch should FailLoudly (surface) a sync divergence" - - // a last-writer sync policy resolves it without blocking: substitute the incoming hash. - let lastWriter = - { state with - conflictDispatch = - fun conflict _ -> - uply { - match conflict with - | RT.CSyncDivergence(_, _, incoming) -> - return RT.RSubstitute(RT.DString incoming) - | _ -> - return RT.RFailLoudly(RTE.UncaughtException("unexpected", [])) - } } - let! res = lastWriter.conflictDispatch conflict (ctx state) |> Ply.toTask - match res with - | RT.RSubstitute(RT.DString s) -> - Expect.equal s "hashB" "the last-writer policy picked the incoming hash" - | _ -> failtest "last-writer policy should RSubstitute the incoming hash" } ] diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index d9b8663c82..19a3967cb5 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -1,7 +1,7 @@ -/// Scenario coverage for the sync conflict-dispatch seam (`Sync.routeDivergences`) — the wire that -/// turns a surfaced `name → two hashes` divergence into a first-class `Conflict.CSyncDivergence` the -/// runtime resolution policy (`ExecutionState.conflictDispatch`) decides. Complements -/// `SyncIdempotency.Tests` (the transport's idempotence + LWW); here we exercise the POLICY layer. +/// Scenario coverage for the sync conflict layer (`Sync.routeDivergences`) — the wire that turns a +/// surfaced `name → two hashes` divergence into a first-class `PT.SyncConflict.Divergence` a +/// `Sync.SyncPolicy` decides. Complements `SyncIdempotency.Tests` (the transport's idempotence + +/// LWW); here we exercise the POLICY layer. /// /// Most scenarios are DATA: one `Scenario` record describes a divergent pull (local vs incoming hash + /// authoring times) and a policy, and `runScenario` runs it and checks the live binding, the number of @@ -24,7 +24,6 @@ module Conflicts = LibDB.Conflicts module Sync = LibDB.Sync module PT = LibExecution.ProgramTypes module RT = LibExecution.RuntimeTypes -module RTE = LibExecution.RuntimeTypes.RuntimeError // ── helpers ────────────────────────────────────────────────────────────────────────────────── @@ -38,40 +37,25 @@ let private relTs (minutesFromNow : float) : string = let private callCtx : RT.CallContext = { branchId = PT.mainBranchId; threadID = System.Guid.NewGuid() } -/// The runtime default (mirrors `Execution.createState`): every divergence fails loudly — meaning, -/// to the sync receiver, "I pick no winner", so `routeDivergences` leaves the LWW outcome standing. -let private defaultDispatch : RT.ConflictDispatch = - fun conflict _ctx -> - uply { - match conflict with - | RT.CSyncDivergence(loc, e, i) -> - return - RT.RFailLoudly(RTE.UncaughtException($"divergence {loc}: {e} vs {i}", [])) - | RT.CRuntimeError rte -> return RT.RFailLoudly rte - | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) - } - -/// A keep-local policy: always substitute the EXISTING (local) hash — "my version wins". -let private keepLocalDispatch : RT.ConflictDispatch = +/// The shipped default (mirrors `Sync.defaultSyncPolicy`): accept the last-writer-wins outcome the +/// fold already applied — pick no override, so `routeDivergences` leaves the LWW outcome standing. +let private defaultPolicy : Sync.SyncPolicy = fun _conflict _ctx -> Sync.AcceptLww + +/// A keep-local policy: override to the LOCAL candidate (candidates[0]) — "my version wins". +let private keepLocalPolicy : Sync.SyncPolicy = fun conflict _ctx -> - uply { - match conflict with - | RT.CSyncDivergence(_loc, existing, _incoming) -> - return RT.RSubstitute(RT.DString existing) - | RT.CRuntimeError rte -> return RT.RFailLoudly rte - | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) - } - -/// A keep-incoming policy: substitute the INCOMING hash (= what already applied) — a no-op rebind. -let private keepIncomingDispatch : RT.ConflictDispatch = + match conflict with + | PT.SyncConflict.Divergence(_loc, local :: _) -> Sync.OverrideTo local + | PT.SyncConflict.Divergence(_loc, []) -> Sync.AcceptLww + +/// A keep-incoming policy: override to the INCOMING candidate (candidates[1]) — what already applied, +/// so a no-op rebind. +let private keepIncomingPolicy : Sync.SyncPolicy = fun conflict _ctx -> - uply { - match conflict with - | RT.CSyncDivergence(_loc, _existing, incoming) -> - return RT.RSubstitute(RT.DString incoming) - | RT.CRuntimeError rte -> return RT.RFailLoudly rte - | RT.CFnNotFound name -> return RT.RFailLoudly(RTE.FnNotFound name) - } + match conflict with + | PT.SyncConflict.Divergence(_loc, _local :: incoming :: _) -> + Sync.OverrideTo incoming + | PT.SyncConflict.Divergence _ -> Sync.AcceptLww let private liveHash (loc : PT.PackageLocation) : Task> = Sql.query @@ -159,13 +143,16 @@ type private Scenario = reconciled : int overridden : bool } -let private dispatchFor (policy : Policy) : RT.ConflictDispatch = +let private policyFor (policy : Policy) : Sync.SyncPolicy = match policy with - | Default -> defaultDispatch - | KeepLocal -> keepLocalDispatch - | KeepIncoming -> keepIncomingDispatch + | Default -> defaultPolicy + | KeepLocal -> keepLocalPolicy + | KeepIncoming -> keepIncomingPolicy | SubstituteUnrelated -> - fun _ _ -> uply { return RT.RSubstitute(RT.DString(hashChar 'z')) } + fun _ _ -> + Sync.OverrideTo( + PT.Reference.fromHashAndKind (PT.Hash(hashChar 'z'), PT.ItemKind.Fn) + ) let private runScenario (s : Scenario) : Test = testTask s.desc { @@ -177,12 +164,7 @@ let private runScenario (s : Scenario) : Test = setupDivergentPull loc s.kind localH s.localAge incomingH s.incomingAge remote Expect.equal (List.length divs) 1 $"{s.desc}: exactly one divergence surfaced" let! reconciled = - Sync.routeDivergences - (dispatchFor s.policy) - callCtx - remote - PT.mainBranchId - divs + Sync.routeDivergences (policyFor s.policy) callCtx remote PT.mainBranchId divs Expect.equal reconciled s.reconciled $"{s.desc}: reconciling-op count" let! winner = liveHash loc let expected = @@ -313,7 +295,7 @@ let private emptyConverged = "empty divergence list routes to a clean zero (the converged steady state)" { let remote = uniqueName "rempty" let! reconciled = - Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId [] + Sync.routeDivergences keepLocalPolicy callCtx remote PT.mainBranchId [] Expect.equal reconciled 0 "no divergences → nothing reconciled, no ops" } @@ -374,7 +356,7 @@ let private multiDivergenceBatch = let divs = divs1 @ divs2 Expect.equal (List.length divs) 2 "two divergences collected from the batch" let! reconciled = - Sync.routeDivergences defaultDispatch callCtx remote PT.mainBranchId divs + Sync.routeDivergences defaultPolicy callCtx remote PT.mainBranchId divs Expect.equal reconciled 0 "default policy reconciles nothing (surface-as-data)" let! w1 = liveHash loc1 let! w2 = liveHash loc2 @@ -389,8 +371,7 @@ let private multiDivergenceBatch = // asserts both the local effect (our hash wins) AND the propagation property (a new op, above the prior // max rowid, carrying the newest stamp). let private keepLocalAppendsPropagableOverride = - testTask - "keep-local override appends a distinct, newer op so it can propagate" { + testTask "keep-local override appends a distinct, newer op so it can propagate" { let loc : PT.PackageLocation = { owner = "Scenario"; modules = [ "Prop" ]; name = uniqueName "p" } let local, incoming = hashChar 'a', hashChar 'b' @@ -413,7 +394,7 @@ let private keepLocalAppendsPropagableOverride = [ "id", Sql.uuid incomingOpId ] // keep-local override (the same path the human 'mine' override uses) let! _ = - Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId divs + Sync.routeDivergences keepLocalPolicy callCtx remote PT.mainBranchId divs // 1. our hash is the live binding let! winner = liveHash loc Expect.equal winner (Some local) "keep-local: our hash is the live binding" @@ -442,8 +423,12 @@ let private overrideOpRoundTrips = let op = PT.PackageOp.OverrideName(loc, target, "2026-06-11T12:34:56.789Z") let id = Inserts.computeOpHash op let blob = LibSerialization.Binary.Serialization.PT.PackageOp.serialize id op - let decoded = LibSerialization.Binary.Serialization.PT.PackageOp.deserialize id blob - Expect.equal decoded op "OverrideName survives binary serialize → deserialize unchanged" + let decoded = + LibSerialization.Binary.Serialization.PT.PackageOp.deserialize id blob + Expect.equal + decoded + op + "OverrideName survives binary serialize → deserialize unchanged" } // End-to-end, the RECEIVER half: a peer currently bound to the incoming hash (it already pulled the @@ -471,7 +456,10 @@ let private overridePropagatesToPeer = [ theirsOp ] (Map.ofList [ (Inserts.computeOpHash theirsOp, relTs -60.0) ]) let! before = liveHash loc - Expect.equal before (Some theirs) "precondition: the peer holds the incoming hash" + Expect.equal + before + (Some theirs) + "precondition: the peer holds the incoming hash" // now it pulls the other machine's override op (re-bind to `ours`, newest stamp) over the wire path let overrideOp = PT.PackageOp.OverrideName( @@ -554,7 +542,7 @@ let private resolutionSticks = let! divs = setupDivergentPull loc PT.ItemKind.Fn local -120.0 incoming -60.0 remote let! _ = - Sync.routeDivergences keepLocalDispatch callCtx remote PT.mainBranchId divs + Sync.routeDivergences keepLocalPolicy callCtx remote PT.mainBranchId divs let! afterResolve = liveHash loc Expect.equal afterResolve From 88f753ed91402288c1cce2e42b67626091b407dd Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 10:33:39 -0400 Subject: [PATCH 12/25] conflicts: remove the speculative runtime conflict seam (RTE owns runtime "can't proceed") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the previous commit. The runtime Conflict/Resolution/ConflictDispatch seam had exactly one live consumer (the missing-package-fn site) and its only real behavior was raiseRTE(FnNotFound) — the Substitute arm was explicitly unwired. It duplicated RTE.FnNotFound and modeled a "runtime conflict" that doesn't exist yet. So "conflict" is now a sync-only concept: - RuntimeTypes: delete Conflict, Resolution, ConflictDispatch, and the ExecutionState.conflictDispatch field. Keep CallContext (it's now purely the sync-policy context, assembled from ExecutionState + VMState). - Execution: drop the default dispatch initializer. - Interpreter: the missing-package-fn site goes back to raiseRTE(FnNotFound). - Tests: drop ConflictDispatch.Tests (RT-only seam tests); sync-conflict coverage stays in SyncScenarios.Tests. A later PR will give the runtime genuine conflict handling — park-and-write-on-demand (PDD-style), then resume — at which point a seam returns, designed against that real requirement. Until then RuntimeError is the model. A breadcrumb at the interpreter site and on CallContext records this. Full backend suite green (9,785 passed). --- backend/src/LibDB/Sync.fs | 6 +- backend/src/LibExecution/Execution.fs | 10 --- backend/src/LibExecution/Interpreter.fs | 28 ++------ backend/src/LibExecution/RuntimeTypes.fs | 34 +++------- backend/tests/Tests/ConflictDispatch.Tests.fs | 66 ------------------- backend/tests/Tests/Tests.fs | 1 - backend/tests/Tests/Tests.fsproj | 1 - 7 files changed, 18 insertions(+), 128 deletions(-) delete mode 100644 backend/tests/Tests/ConflictDispatch.Tests.fs diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index f55c51261a..eb99c4ce89 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -332,9 +332,9 @@ type SyncPolicyChoice = | AcceptLww | OverrideTo of PT.Reference -/// How sync conflicts are decided — the sync-side analogue of the runtime `ConflictDispatch`. Pure -/// (the shipped LWW policy needs no IO); `CallContext` is passed for parity and future policies that -/// branch on it (e.g. per-branch rules). +/// How sync conflicts are decided: given a surfaced conflict + the execution `CallContext`, pick an +/// outcome. Pure (the shipped LWW policy needs no IO); `CallContext` is passed for future policies +/// that branch on it (e.g. per-branch rules). type SyncPolicy = PT.SyncConflict -> RT.CallContext -> SyncPolicyChoice /// The shipped default: accept the last-writer-wins outcome the fold already applied — surface as diff --git a/backend/src/LibExecution/Execution.fs b/backend/src/LibExecution/Execution.fs index c945f8f9aa..cdd87e7181 100644 --- a/backend/src/LibExecution/Execution.fs +++ b/backend/src/LibExecution/Execution.fs @@ -38,16 +38,6 @@ let createState reportException = reportException notify = notify - // Default: FailLoudly for every runtime conflict — unchanged from the behavior before this seam - // existed. (Sync conflicts no longer ride this seam; they're decided by a `Sync.SyncPolicy`.) - conflictDispatch = - fun conflict _ctx -> - uply { - match conflict with - | RT.Conflict.FnNotFound name -> - return RT.Resolution.FailLoudly(RTE.FnNotFound name) - } - lambdaInstrCache = System.Collections.Concurrent.ConcurrentDictionary() packageFnInstrCache = System.Collections.Concurrent.ConcurrentDictionary() diff --git a/backend/src/LibExecution/Interpreter.fs b/backend/src/LibExecution/Interpreter.fs index def53fd748..bd3e79009d 100644 --- a/backend/src/LibExecution/Interpreter.fs +++ b/backend/src/LibExecution/Interpreter.fs @@ -315,29 +315,11 @@ let rec private executeInner (exeState : ExecutionState) (vm : VMState) : Ply - // Route a missing package fn through the runtime conflict-dispatch seam — the - // runtime's shared "I can't proceed; here are the options" hook. The default policy - // returns `FailLoudly (FnNotFound …)` → raise, so this is byte-identical to before. - // The teed-up consumer is fetch-on-miss: a policy pulls the fn from a peer and - // resolves it, instead of failing. - let cc : CallContext = - { branchId = exeState.branchId; threadID = vm.threadID } - match! - exeState.conflictDispatch - (Conflict.FnNotFound(FQFnName.Package fn)) - cc - with - | Resolution.FailLoudly rte -> return raiseRTE rte - | Resolution.Substitute _ -> - // A policy substituted a value for the missing fn, but result-injection isn't wired - // at this call site yet (it needs the call to return a Dval, not instructions). Raise - // a DISTINCT internal error — not a bare FnNotFound — so if a policy ever returns - // Substitute here before fetch-on-miss lands, the unwired path is diagnosable rather - // than masquerading as "the fn doesn't exist". - return - Exception.raiseInternal - "conflict-dispatch returned Substitute for a missing package fn, but value-substitution is not wired at this call site yet" - [ "fn", fn ] + // A missing package fn is a runtime error today — surface it as one. (A later PR will + // give the runtime real conflict/resolution handling: e.g. PARK the execution, write + // the fn on demand — PDD-style — then resume, instead of failing outright. When that + // lands, a dispatch seam returns here; until then RuntimeError is the model.) + return raiseRTE (RTE.FnNotFound(FQFnName.Package fn)) } diff --git a/backend/src/LibExecution/RuntimeTypes.fs b/backend/src/LibExecution/RuntimeTypes.fs index b6610246f0..32e5b48fb3 100644 --- a/backend/src/LibExecution/RuntimeTypes.fs +++ b/backend/src/LibExecution/RuntimeTypes.fs @@ -1849,27 +1849,16 @@ and ExceptionReporter = ExecutionState -> VMState -> Metadata -> exn -> Ply VMState -> string -> Metadata -> Ply -// -- Conflict dispatch: the runtime "I can't proceed; here are my options" hook. -- -// These MUST live here (the and-chain), not a separate ConflictTypes.fs: they mention -// RuntimeError.Error/Dval (defined above) AND ExecutionState references ConflictDispatch, -// so a later file can't satisfy both. (Same constraint a buses field would have.) -and Conflict = - // A *runtime* conflict: execution reached a point it can't proceed past on its own, and a policy - // must decide. One case today — a missing package fn — but the seam is shared infra: future cases - // (a capability denial, a value-update race seen at execution time) join here and a policy decides - // each the same way (`Substitute` / `FailLoudly`). SYNC conflicts are modeled separately, over in - // `ProgramTypes.SyncConflict`, next to the op log they're about — they don't ride this seam. - | FnNotFound of FQFnName.FQFnName - -and Resolution = - // How a policy answers a Conflict: substitute a value to proceed, or fail loudly. (A future - // "park" resolution — pause and await external input — would be added here.) - | Substitute of Dval - | FailLoudly of RuntimeError.Error - -and CallContext = { branchId : BranchId; threadID : uuid } // assembled from ExecState + VMState - -and ConflictDispatch = Conflict -> CallContext -> Ply +// CallContext: the runtime coordinates (which branch, which thread) a sync-policy decision is made +// against — assembled from ExecutionState + VMState and handed to `LibDB.Sync`'s `SyncPolicy` when an +// execution triggers a pull. Lives in this and-chain because it's built from runtime state. +// +// There is deliberately NO runtime conflict/resolution model here: a runtime "I can't proceed" (e.g. +// a missing fn) is a `RuntimeError`, full stop. The only conflicts with real, choosable resolutions +// today are SYNC conflicts — `ProgramTypes.SyncConflict`, beside the op log. (A later PR will give the +// runtime genuine conflict handling — park-and-write-on-demand, PDD-style — at which point a seam +// returns here, designed against that real requirement rather than guessed at now.) +and CallContext = { branchId : BranchId; threadID : uuid } /// All state set when starting an execution; non-changing /// (as opposed to the VMState, which changes as the execution progresses) @@ -1877,9 +1866,6 @@ and ExecutionState = { // -- Set consistently across a runtime -- tracing : Tracing.Tracing - /// The conflict-dispatch hook. Default (createState) returns FailLoudly for every - /// conflict — the behavior before this seam existed. A policy can install substitute/park later. - conflictDispatch : ConflictDispatch test : TestContext /// Lambda instructions registered by `CreateLambda`, looked up on `Apply`. diff --git a/backend/tests/Tests/ConflictDispatch.Tests.fs b/backend/tests/Tests/ConflictDispatch.Tests.fs deleted file mode 100644 index 4009e993fa..0000000000 --- a/backend/tests/Tests/ConflictDispatch.Tests.fs +++ /dev/null @@ -1,66 +0,0 @@ -/// Tests for the runtime conflict-dispatch seam (LibExecution.RuntimeTypes -/// Conflict/Resolution/ConflictDispatch + ExecutionState.conflictDispatch). -/// Verifies the default dispatch is FailLoudly (the unchanged prior behavior) and -/// that an installed policy overrides it — the whole point of the hook. (SYNC conflicts -/// are decided separately by a `Sync.SyncPolicy`; see `SyncScenarios.Tests`.) -module Tests.ConflictDispatch - -open Expecto - -open System.Threading.Tasks -open FSharp.Control.Tasks - -open Prelude -open TestUtils.TestUtils - -module RT = LibExecution.RuntimeTypes -module RTE = RT.RuntimeError -module Exe = LibExecution.Execution -module PT = LibExecution.ProgramTypes - -let private freshState () : RT.ExecutionState = - let builtins = localBuiltIns pmPT - Exe.createState - builtins - pmRT - Exe.noTracing - (fun _ _ _ _ -> uply { return () }) - (fun _ _ _ _ -> uply { return () }) - PT.mainBranchId - { dbs = Map.empty } - -let private ctx (state : RT.ExecutionState) : RT.CallContext = - { branchId = state.branchId; threadID = System.Guid.NewGuid() } - -let private aName = RT.FQFnName.fqBuiltin "doesNotExist" 0 - -let tests = - testList - "ConflictDispatch" - [ testTask "default dispatch maps FnNotFound to a FailLoudly FnNotFound" { - let state = freshState () - let! res = - state.conflictDispatch (RT.Conflict.FnNotFound aName) (ctx state) - |> Ply.toTask - match res with - | RT.Resolution.FailLoudly(RTE.FnNotFound n) -> - Expect.equal n aName "name preserved" - | _ -> failtest "default dispatch should FailLoudly with FnNotFound" - } - - testTask "an installed policy overrides the default (Substitute)" { - let baseState = freshState () - let state = - { baseState with - conflictDispatch = - fun _ _ -> uply { return RT.Resolution.Substitute(RT.DInt64 0L) } } - let! res = - state.conflictDispatch (RT.Conflict.FnNotFound aName) (ctx state) - |> Ply.toTask - match res with - | RT.Resolution.Substitute(RT.DInt64 n) -> - Expect.equal n 0L "installed policy substituted" - | _ -> - failtest - "installed policy should Substitute, proving the hook is swappable" - } ] diff --git a/backend/tests/Tests/Tests.fs b/backend/tests/Tests/Tests.fs index 0747e2985b..04af3cdf79 100644 --- a/backend/tests/Tests/Tests.fs +++ b/backend/tests/Tests/Tests.fs @@ -63,7 +63,6 @@ let main (args : string array) : int = Tests.Stream.tests Tests.Capabilities.tests Tests.OpsProjections.tests - Tests.ConflictDispatch.tests Tests.SyncIdempotency.tests Tests.SyncScenarios.tests Tests.Releases.tests diff --git a/backend/tests/Tests/Tests.fsproj b/backend/tests/Tests/Tests.fsproj index fbf210f436..cb64ded620 100644 --- a/backend/tests/Tests/Tests.fsproj +++ b/backend/tests/Tests/Tests.fsproj @@ -54,7 +54,6 @@ - From eef680dc38822ddf8ec05f0113c48ce67d1bc048 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 10:49:34 -0400 Subject: [PATCH 13/25] conflicts: binary serializer for SyncConflict + DivergenceResolution Step 2 of the conflict redesign. Tag-byte write/read mirroring PackageOp.fs: - SyncConflict.Divergence: tag 0, then PackageLocation + List. - ResolvedBy: Auto (tag 0, policy string) | Human (tag 1). - DivergenceResolution: chosen Reference + ResolvedBy. Reference reuses the existing PackageOp serializer so a Reference has one wire shape everywhere. Exposed via Serialization.fs (SyncConflict / DivergenceResolution modules) the same way as PackageOp. A round-trip test covers SyncConflict + both ResolvedBy cases. Full backend suite green (9,786 passed, +1 for the new round-trip test). --- .../LibSerialization/Binary/Serialization.fs | 10 +++ .../Binary/Serializers/PT/SyncConflict.fs | 61 +++++++++++++++++++ .../LibSerialization/LibSerialization.fsproj | 1 + backend/tests/Tests/SyncScenarios.Tests.fs | 42 +++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs diff --git a/backend/src/LibSerialization/Binary/Serialization.fs b/backend/src/LibSerialization/Binary/Serialization.fs index ba7f4eb951..d2fd403d2e 100644 --- a/backend/src/LibSerialization/Binary/Serialization.fs +++ b/backend/src/LibSerialization/Binary/Serialization.fs @@ -130,6 +130,16 @@ module PT = let serialize id value = makeSerializer PT.PackageOp.write id value let deserialize id data = makeDeserializer PT.PackageOp.read id data + module SyncConflict = + let serialize id value = makeSerializer PT.SyncConflict.write id value + let deserialize id data = makeDeserializer PT.SyncConflict.read id data + + module DivergenceResolution = + let serialize id value = + makeSerializer PT.SyncConflict.DivergenceResolution.write id value + let deserialize id data = + makeDeserializer PT.SyncConflict.DivergenceResolution.read id data + module BranchOp = let serialize = PT.BranchOp.serialize let deserialize = PT.BranchOp.deserialize diff --git a/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs b/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs new file mode 100644 index 0000000000..80e0be4cbc --- /dev/null +++ b/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs @@ -0,0 +1,61 @@ +module LibSerialization.Binary.Serializers.PT.SyncConflict + +open System.IO +open Prelude + +open LibExecution.ProgramTypes + +open LibSerialization.Binary.Serializers.Common +open LibSerialization.Binary.Serializers.PT.Common + +// `Reference` (hash + kind) already has a serializer next to `PackageOp` — reuse it so a Reference +// has ONE wire shape everywhere (an op's target and a conflict's candidate serialize identically). +module Reference = LibSerialization.Binary.Serializers.PT.PackageOp.Reference + + +// -- ResolvedBy -- + +module ResolvedBy = + let write (w : BinaryWriter) (by : ResolvedBy) : unit = + match by with + | Auto policy -> + w.Write(0uy) + String.write w policy + | Human -> w.Write(1uy) + + let read (r : BinaryReader) : ResolvedBy = + match r.ReadByte() with + | 0uy -> Auto(String.read r) + | 1uy -> Human + | b -> raiseFormatError $"Invalid ResolvedBy tag: {b}" + + +// -- DivergenceResolution -- + +module DivergenceResolution = + let write (w : BinaryWriter) (res : DivergenceResolution) : unit = + Reference.write w res.chosen + ResolvedBy.write w res.by + + let read (r : BinaryReader) : DivergenceResolution = + let chosen = Reference.read r + let by = ResolvedBy.read r + { chosen = chosen; by = by } + + +// -- SyncConflict -- + +let write (w : BinaryWriter) (conflict : SyncConflict) : unit = + match conflict with + | Divergence(location, candidates) -> + w.Write(0uy) + PackageLocation.write w location + List.write w Reference.write candidates + +let read (r : BinaryReader) : SyncConflict = + match r.ReadByte() with + | 0uy -> + let location = PackageLocation.read r + let candidates = List.read r Reference.read + Divergence(location, candidates) + | b -> raiseFormatError $"Invalid SyncConflict tag: {b}" diff --git a/backend/src/LibSerialization/LibSerialization.fsproj b/backend/src/LibSerialization/LibSerialization.fsproj index 5e9adf0c64..3fac97977a 100644 --- a/backend/src/LibSerialization/LibSerialization.fsproj +++ b/backend/src/LibSerialization/LibSerialization.fsproj @@ -27,6 +27,7 @@ + diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index 19a3967cb5..799a91513d 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -431,6 +431,47 @@ let private overrideOpRoundTrips = "OverrideName survives binary serialize → deserialize unchanged" } +// A `SyncConflict` and its `DivergenceResolution` must survive the binary codec — they're persisted +// (the recorded conflict's blob) and may travel, so a tag-byte round-trip is the contract. Covers both +// `ResolvedBy` cases (`Auto policy` and `Human`) since they have distinct tags. +let private syncConflictRoundTrips = + test "SyncConflict + DivergenceResolution round-trip through the binary serializer" { + let loc : PT.PackageLocation = { owner = "RT"; modules = [ "M" ]; name = "x" } + let refA = PT.Reference.fromHashAndKind (PT.Hash(hashChar 'a'), PT.ItemKind.Fn) + let refB = PT.Reference.fromHashAndKind (PT.Hash(hashChar 'b'), PT.ItemKind.Fn) + + let conflict = PT.SyncConflict.Divergence(loc, [ refA; refB ]) + let cBlob = + LibSerialization.Binary.Serialization.PT.SyncConflict.serialize "c" conflict + let cDecoded = + LibSerialization.Binary.Serialization.PT.SyncConflict.deserialize "c" cBlob + Expect.equal cDecoded conflict "SyncConflict survives serialize → deserialize" + + let resAuto : PT.DivergenceResolution = + { chosen = refB; by = PT.ResolvedBy.Auto "last-writer-wins" } + let aBlob = + LibSerialization.Binary.Serialization.PT.DivergenceResolution.serialize + "a" + resAuto + let aDecoded = + LibSerialization.Binary.Serialization.PT.DivergenceResolution.deserialize + "a" + aBlob + Expect.equal aDecoded resAuto "DivergenceResolution(Auto) survives round-trip" + + let resHuman : PT.DivergenceResolution = + { chosen = refA; by = PT.ResolvedBy.Human } + let hBlob = + LibSerialization.Binary.Serialization.PT.DivergenceResolution.serialize + "h" + resHuman + let hDecoded = + LibSerialization.Binary.Serialization.PT.DivergenceResolution.deserialize + "h" + hBlob + Expect.equal hDecoded resHuman "DivergenceResolution(Human) survives round-trip" + } + // End-to-end, the RECEIVER half: a peer currently bound to the incoming hash (it already pulled the // race) receives the OTHER machine's committed override op over the normal receive path and must ADOPT // our hash. This is the actual cross-machine propagation — the headline override-propagation claim — exercised through @@ -666,6 +707,7 @@ let tests = multiDivergenceBatch keepLocalAppendsPropagableOverride overrideOpRoundTrips + syncConflictRoundTrips overridePropagatesToPeer orderIndependent idempotentRePull From ef8947ed93a006670de20626de04f23d9b26723b Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 10:59:20 -0400 Subject: [PATCH 14/25] conflicts: reshape sync_conflicts to store the structured conflict + resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 3 of the conflict redesign. The table no longer stores flat local_hash/incoming_hash/resolution-prose/acknowledged/overridden; it stores the structured conflict and its resolution: (id, kind, location, conflict_blob, chosen_hash, resolved_by, override_op_id, remote, detected_at, status) - conflict_blob: a serialized PT.SyncConflict (the candidates). - chosen_hash + resolved_by: the resolution ('auto:last-writer-wins' | 'human' | ...). - override_op_id: the OverrideName op a deliberate override mints (NULL until step 5). - status: review lifecycle ('auto-resolved' | 'acknowledged' | 'overridden'). Conflicts.fs rewritten: record() builds + serializes the Divergence and dedups on the exact blob; list()/getById() deserialize it. localHash/incomingHash/acknowledged/ overridden are now derived members over the structured record, so the builtins + tests that read them stay unchanged. recordDivergences + Merge pass chosen_hash + resolved_by. A temporary `resolution` prose bridge keeps the .dark display green until the next step restructures it. Local/disposable table → schema.sql change only (no Release step; the test DB is disk-mode, rebuilt fresh). Full backend suite green (9,786). --- backend/migrations/schema.sql | 19 +- backend/src/LibDB/Conflicts.fs | 214 +++++++++++++------ backend/src/LibDB/Merge.fs | 3 +- backend/src/LibDB/Sync.fs | 19 +- backend/tests/Tests/SyncIdempotency.Tests.fs | 22 +- 5 files changed, 193 insertions(+), 84 deletions(-) diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 797194a37a..3375b0cd4f 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -402,18 +402,23 @@ CREATE TABLE IF NOT EXISTS sync_cursors ( folded_through_rowid INTEGER NOT NULL DEFAULT 0 ); --- The recorded, reviewable log of auto-resolved name-binding divergences (`dark conflicts`). --- Recorded at pull time; auto-resolved by policy (default last-writer-wins) but never silently lost. +-- The recorded, reviewable log of auto-resolved sync conflicts (`dark conflicts`). Recorded at pull +-- time; auto-resolved by policy (default last-writer-wins) but never silently lost. Local-only, never +-- synced, re-derivable by replaying the op log. Stores the STRUCTURED conflict (`conflict_blob` = a +-- serialized PT.SyncConflict — the candidates) plus its resolution flattened into columns: +-- `chosen_hash` (which content won), `resolved_by` ('auto:' e.g. 'auto:last-writer-wins', or +-- 'human'), and `override_op_id` (the OverrideName op a deliberate override mints, NULL until then). CREATE TABLE IF NOT EXISTS sync_conflicts ( id TEXT PRIMARY KEY, + kind TEXT NOT NULL, -- SyncConflict discriminator, e.g. 'divergence' location TEXT NOT NULL, - local_hash TEXT NOT NULL, - incoming_hash TEXT NOT NULL, - resolution TEXT NOT NULL, + conflict_blob BLOB NOT NULL, -- serialized PT.SyncConflict (the candidates) + chosen_hash TEXT NOT NULL, -- the resolution's chosen content hash + resolved_by TEXT NOT NULL, -- 'auto:' or 'human' + override_op_id TEXT, -- the OverrideName op id, once an override mints one remote TEXT NOT NULL, detected_at TEXT NOT NULL DEFAULT (datetime('now')), - acknowledged INTEGER NOT NULL DEFAULT 0, - overridden INTEGER NOT NULL DEFAULT 0 + status TEXT NOT NULL DEFAULT 'auto-resolved' -- 'auto-resolved' | 'acknowledged' | 'overridden' ); -- Structured telemetry from the autosync daemon: one row per poll cycle, so `sync events` (and a diff --git a/backend/src/LibDB/Conflicts.fs b/backend/src/LibDB/Conflicts.fs index 58a8ef2d10..08a8f09d40 100644 --- a/backend/src/LibDB/Conflicts.fs +++ b/backend/src/LibDB/Conflicts.fs @@ -6,6 +6,12 @@ /// records the conflict here, so nothing is *silently* lost: it's raised to the user, who usually /// acknowledges ("the auto thing was right") and occasionally overrides. /// +/// What's stored is the STRUCTURED conflict, not prose: a serialized `PT.SyncConflict` (the +/// `conflict_blob` — the candidates), the `chosen_hash` (which content won) + `resolved_by` (the +/// policy that picked it, e.g. `auto:last-writer-wins`, or `human`), and a `status` lifecycle +/// (`auto-resolved` → `acknowledged` | `overridden`). The op id of a human/keep-local override lands +/// in `override_op_id`. The display reconstructs everything from these fields — no string parsing. +/// /// Why a recorded log, not a pure op-log projection: everyone's "main" shares the constant /// `PT.mainBranchId`, so two competing edits are SAME-branch — the log can't distinguish "a peer /// overwrote me" from "I re-edited." The pull is the one place that knows it was a sync, so the @@ -22,62 +28,146 @@ open Prelude open Fumble open LibDB.Sqlite -/// One recorded conflict: a name we'd bound to `localHash` that a pull from `remote` rebound to -/// `incomingHash`, auto-resolved by `resolution`. `acknowledged` = the user said "auto was right"; -/// `overridden` = the user emitted a different resolution (a reconciling op). +module PT = LibExecution.ProgramTypes +module Serialize = LibSerialization.Binary.Serialization + +/// One recorded conflict. `conflict` is the deserialized `SyncConflict` (its candidates); `chosenHash` +/// + `resolvedBy` are the resolution (which content won, and the policy/human that chose it); +/// `overrideOpId` is the `OverrideName` op a deliberate override minted (None until then); `status` is +/// the review lifecycle (`auto-resolved` | `acknowledged` | `overridden`). type Conflict = { id : string + kind : string location : string - localHash : string - incomingHash : string - resolution : string + conflict : PT.SyncConflict + chosenHash : string + resolvedBy : string + overrideOpId : string option remote : string - acknowledged : bool - overridden : bool } + status : string } + + /// The contending content hashes, ordered [local; incoming], read off the divergence's candidates + /// (so callers needn't re-deserialize). "" for a candidate a future kind doesn't carry. + member this.candidateHashes : string * string = + match this.conflict with + | PT.SyncConflict.Divergence(_, candidates) -> + let hashOf (r : PT.Reference) = + match r.hash with + | PT.Hash h -> h + match candidates with + | a :: b :: _ -> (hashOf a, hashOf b) + | [ a ] -> (hashOf a, "") + | [] -> ("", "") + + /// The local (loser-by-default) hash — candidate 0. + member this.localHash = fst this.candidateHashes + /// The incoming (auto-resolved winner by default) hash — candidate 1. + member this.incomingHash = snd this.candidateHashes + /// Has the user handled this? (`acknowledged` or `overridden` both count as reviewed.) + member this.acknowledged = + this.status = "acknowledged" || this.status = "overridden" + /// Did a deliberate override replace the auto-resolution? + member this.overridden = this.status = "overridden" + + /// LEGACY prose bridge — reconstructs the human resolution string the `.dark` display still parses. + /// Removed in the next step, which renders `chosenHash`/`resolvedBy` structurally (no string match). + member this.resolution = + match this.resolvedBy with + | "auto:merge-child-wins" -> "MergeChildWins" + | _ -> + if this.chosenHash = this.incomingHash then + "auto: timestamp-LWW — incoming won (newer creation)" + elif this.chosenHash = this.localHash then + "auto: timestamp-LWW — kept local (newer creation)" + else + "auto: timestamp-LWW" -/// Record an auto-resolved divergence. Idempotent on the live (location, remote, both hashes): the -/// same conflict re-detected on a re-pull doesn't pile up duplicate rows. +// ── location parsing + kind lookup (to rebuild the structured conflict from raw hashes) ── + +/// Parse "owner[.modules].name" → PackageLocation (head = owner, last = name, middle = modules). +let private parseLoc (location : string) : PT.PackageLocation = + match location.Split('.') |> List.ofArray with + | owner :: rest when not (List.isEmpty rest) -> + match List.rev rest with + | name :: revModules -> + { owner = owner; modules = List.rev revModules; name = name } + | [] -> { owner = owner; modules = []; name = "" } + | _ -> { owner = ""; modules = []; name = location } + +/// The item kind bound at a location — needed to rebuild the candidate `Reference`s. Falls back to +/// `Fn` when the location isn't (or isn't yet) in `locations` (e.g. a synthetic test record); the +/// candidate hashes are preserved regardless, so the fallback is harmless. +let private itemKindFor (location : string) : Task = + task { + let loc = parseLoc location + let! rows = + Sql.query + "SELECT item_type FROM locations WHERE owner=@o AND modules=@m AND name=@n LIMIT 1" + |> Sql.parameters + [ "o", Sql.string loc.owner + "m", Sql.string (String.concat "." loc.modules) + "n", Sql.string loc.name ] + |> Sql.executeAsync (fun read -> read.string "item_type") + return + rows + |> List.tryHead + |> Option.map PT.ItemKind.fromString + |> Option.defaultValue PT.ItemKind.Fn + } + +// ── record / read / resolve ── + +/// Record an auto-resolved divergence: build the structured `SyncConflict` from the contending hashes, +/// store its blob + the chosen winner + the policy that chose it. Idempotent on the live (remote, +/// location, exact conflict): the same divergence re-detected on a re-pull doesn't pile up a duplicate +/// row (unless the prior one was already overridden — then a fresh divergence is worth recording). let record (remote : string) (location : string) (localHash : string) (incomingHash : string) - (resolution : string) + (chosenHash : string) + (resolvedBy : string) : Task = task { - // dedup: skip if this exact unresolved conflict is already on record + let! kind = itemKindFor location + let loc = parseLoc location + let localRef = PT.Reference.fromHashAndKind (PT.Hash localHash, kind) + let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incomingHash, kind) + let conflict = PT.SyncConflict.Divergence(loc, [ localRef; incomingRef ]) + let id = System.Guid.NewGuid() |> string + let blob = Serialize.PT.SyncConflict.serialize id conflict + + // dedup on the exact serialized conflict (encodes location + both hashes), still un-overridden let! existing = Sql.query """ SELECT id FROM sync_conflicts - WHERE location = @loc AND remote = @remote - AND local_hash = @local AND incoming_hash = @incoming - AND overridden = 0 + WHERE remote = @remote AND location = @loc AND conflict_blob = @blob + AND status <> 'overridden' LIMIT 1 """ |> Sql.parameters - [ "loc", Sql.string location - "remote", Sql.string remote - "local", Sql.string localHash - "incoming", Sql.string incomingHash ] + [ "remote", Sql.string remote + "loc", Sql.string location + "blob", Sql.bytes blob ] |> Sql.executeAsync (fun read -> read.string "id") match existing with | _ :: _ -> () | [] -> - let id = System.Guid.NewGuid() |> string do! Sql.query """ INSERT INTO sync_conflicts - (id, location, local_hash, incoming_hash, resolution, remote) - VALUES (@id, @loc, @local, @incoming, @resolution, @remote) + (id, kind, location, conflict_blob, chosen_hash, resolved_by, remote) + VALUES (@id, 'divergence', @loc, @blob, @chosen, @by, @remote) """ |> Sql.parameters [ "id", Sql.string id "loc", Sql.string location - "local", Sql.string localHash - "incoming", Sql.string incomingHash - "resolution", Sql.string resolution + "blob", Sql.bytes blob + "chosen", Sql.string chosenHash + "by", Sql.string resolvedBy "remote", Sql.string remote ] |> Sql.executeStatementAsync } @@ -88,39 +178,44 @@ let list () : Task> = return! Sql.query """ - SELECT id, location, local_hash, incoming_hash, resolution, remote, - acknowledged, overridden + SELECT id, kind, location, conflict_blob, chosen_hash, resolved_by, + override_op_id, remote, status FROM sync_conflicts ORDER BY detected_at DESC """ |> Sql.executeAsync (fun read -> - { id = read.string "id" + let id = read.string "id" + let conflict = + Serialize.PT.SyncConflict.deserialize id (read.bytes "conflict_blob") + { id = id + kind = read.string "kind" location = read.string "location" - localHash = read.string "local_hash" - incomingHash = read.string "incoming_hash" - resolution = read.string "resolution" + conflict = conflict + chosenHash = read.string "chosen_hash" + resolvedBy = read.string "resolved_by" + overrideOpId = read.stringOrNone "override_op_id" remote = read.string "remote" - acknowledged = read.int64 "acknowledged" <> 0L - overridden = read.int64 "overridden" <> 0L }) + status = read.string "status" }) } -/// The user agrees with the auto-resolution — stop surfacing it (the common case). +/// The user agrees with the auto-resolution — mark it reviewed (the common case). Only moves an +/// `auto-resolved` row; an `overridden` one stays overridden. let acknowledge (id : string) : Task = - task { - do! - Sql.query "UPDATE sync_conflicts SET acknowledged = 1 WHERE id = @id" - |> Sql.parameters [ "id", Sql.string id ] - |> Sql.executeStatementAsync - } + Sql.query + "UPDATE sync_conflicts SET status = 'acknowledged' WHERE id = @id AND status = 'auto-resolved'" + |> Sql.parameters [ "id", Sql.string id ] + |> Sql.executeStatementAsync -/// Acknowledge ALL currently-unacknowledged conflicts at once — the bulk "yeah, the auto thing was -/// right" path. Returns how many were newly acknowledged. +/// Acknowledge ALL currently-pending conflicts at once — the bulk "yeah, the auto thing was right" +/// path. Returns how many were newly acknowledged. let acknowledgeAll () : Task = task { let! pending = - Sql.query "SELECT COUNT(*) AS n FROM sync_conflicts WHERE acknowledged = 0" + Sql.query + "SELECT COUNT(*) AS n FROM sync_conflicts WHERE status = 'auto-resolved'" |> Sql.executeAsync (fun read -> read.int64 "n") do! - Sql.query "UPDATE sync_conflicts SET acknowledged = 1 WHERE acknowledged = 0" + Sql.query + "UPDATE sync_conflicts SET status = 'acknowledged' WHERE status = 'auto-resolved'" |> Sql.executeStatementAsync return (match pending with @@ -130,30 +225,23 @@ let acknowledgeAll () : Task = /// The user chose a different resolution (a reconciling op was emitted) — mark it overridden. let markOverridden (id : string) : Task = - task { - do! - Sql.query - "UPDATE sync_conflicts SET overridden = 1, acknowledged = 1 WHERE id = @id" - |> Sql.parameters [ "id", Sql.string id ] - |> Sql.executeStatementAsync - } + Sql.query "UPDATE sync_conflicts SET status = 'overridden' WHERE id = @id" + |> Sql.parameters [ "id", Sql.string id ] + |> Sql.executeStatementAsync /// Mark the most recent un-overridden conflict at a location overridden — used when a resolution /// POLICY (not a human) keeps local: `Sync.routeDivergences` emitted a reconciling op, so the /// recorded auto-LWW outcome no longer reflects the live binding. Keyed by remote + location. let markOverriddenByLocation (remote : string) (location : string) : Task = - task { - do! - Sql.query - """ - UPDATE sync_conflicts SET overridden = 1, acknowledged = 1 - WHERE id = (SELECT id FROM sync_conflicts - WHERE remote = @remote AND location = @loc AND overridden = 0 - ORDER BY detected_at DESC LIMIT 1) - """ - |> Sql.parameters [ "remote", Sql.string remote; "loc", Sql.string location ] - |> Sql.executeStatementAsync - } + Sql.query + """ + UPDATE sync_conflicts SET status = 'overridden' + WHERE id = (SELECT id FROM sync_conflicts + WHERE remote = @remote AND location = @loc AND status <> 'overridden' + ORDER BY detected_at DESC LIMIT 1) + """ + |> Sql.parameters [ "remote", Sql.string remote; "loc", Sql.string location ] + |> Sql.executeStatementAsync /// Look up one conflict by id (for the resolve flow). let getById (id : string) : Task> = diff --git a/backend/src/LibDB/Merge.fs b/backend/src/LibDB/Merge.fs index 85bd260e50..bb5a5baae6 100644 --- a/backend/src/LibDB/Merge.fs +++ b/backend/src/LibDB/Merge.fs @@ -150,7 +150,8 @@ let merge (branchId : PT.BranchId) : Task> = locStr parentHash childHash - "MergeChildWins" + childHash // the child binding won + "auto:merge-child-wins" do! BranchOpPlayback.insertAndApply ( diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index eb99c4ce89..11dea5ade1 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -212,16 +212,17 @@ let recordDivergences task { for (location, localHash, incomingHash) in divergences do // record the ACTUAL outcome after timestamp-LWW (the fold may have kept the local op if the - // incoming was older-by-creation) — not a blanket "incoming won". Read the live binding. + // incoming was older-by-creation) — read the live binding to get the chosen winner. let! winner = liveBindingHash location - let resolution = - if winner = Some incomingHash then - "auto: timestamp-LWW — incoming won (newer creation)" - elif winner = Some localHash then - "auto: timestamp-LWW — kept local (newer creation)" - else - "auto: timestamp-LWW" - do! Conflicts.record remote location localHash incomingHash resolution + let chosenHash = winner |> Option.defaultValue incomingHash + do! + Conflicts.record + remote + location + localHash + incomingHash + chosenHash + "auto:last-writer-wins" } diff --git a/backend/tests/Tests/SyncIdempotency.Tests.fs b/backend/tests/Tests/SyncIdempotency.Tests.fs index 80247c466d..9bd403269a 100644 --- a/backend/tests/Tests/SyncIdempotency.Tests.fs +++ b/backend/tests/Tests/SyncIdempotency.Tests.fs @@ -621,10 +621,23 @@ let tests = "Conflicts: record an auto-resolved divergence, list it, dedup on re-detect, acknowledge" { let remote = $"conflict-peer-{System.Guid.NewGuid()}" let loc = $"Stachu.Foo.bar-{System.Guid.NewGuid()}" - let res = "auto: last-writer-wins (incoming)" - do! Conflicts.record remote loc "hashLocal" "hashIncoming" res + do! + Conflicts.record + remote + loc + "hashLocal" + "hashIncoming" + "hashIncoming" + "auto:last-writer-wins" // dedup — the same conflict re-detected on a re-pull doesn't pile up a second row - do! Conflicts.record remote loc "hashLocal" "hashIncoming" res + do! + Conflicts.record + remote + loc + "hashLocal" + "hashIncoming" + "hashIncoming" + "auto:last-writer-wins" let! all = Conflicts.list () match all |> List.filter (fun (x : Conflicts.Conflict) -> x.location = loc) @@ -929,7 +942,8 @@ let tests = loc (System.String('a', 64)) (System.String('b', 64)) - "auto" + (System.String('b', 64)) // chosen winner + "auto:last-writer-wins" let! before = Conflicts.list () Expect.isTrue (before From 93d3ec2e39a16f233e0db4442049ea7faf8bf497 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 11:09:18 -0400 Subject: [PATCH 15/25] =?UTF-8?q?conflicts:=20structured=20display=20?= =?UTF-8?q?=E2=80=94=20render=20from=20chosen=5Fhash=20+=20resolved=5Fby,?= =?UTF-8?q?=20no=20prose=20parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 4 of the conflict redesign. The `dark conflicts` display now renders the structured resolution instead of parsing prose: - pmConflictsList returns (id, location, status, chosenHash, resolvedBy, localHash, incomingHash, remote) — chosen hash + the policy that picked it. - display.dark: conflictWinner reads the winner STRUCTURALLY (chosen == local/incoming) instead of String.contains on resolution prose; conflictVerdict labels by resolvedBy ('last-write-wins' / 'override' / 'merge'). The conflicts-list testfile rewritten to the structured signatures. - Conflicts.fs: the temporary `resolution` prose bridge is gone; the two F# tests that read it now assert resolvedBy directly. Full backend suite green (9,788). --- .../Builtins/Builtins.Matter/Libs/PM/Sync.fs | 23 ++++++-- backend/src/LibDB/Conflicts.fs | 13 ----- .../execution/pre-s-and-s/conflicts-list.dark | 44 ++++++++------- backend/tests/Tests/BranchOps.Tests.fs | 4 +- backend/tests/Tests/SyncIdempotency.Tests.fs | 6 +- packages/darklang/cli/conflicts.dark | 6 +- packages/darklang/sync/display.dark | 56 +++++++++++-------- 7 files changed, 82 insertions(+), 70 deletions(-) diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs index 20e2a21110..a0244cad12 100644 --- a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs @@ -442,14 +442,19 @@ let fns () : List = "true = include acked/overridden (history); false = only pending" ] returnType = TList( - TTuple(TString, TString, [ TString; TString; TString; TString; TString ]) + TTuple( + TString, + TString, + [ TString; TString; TString; TString; TString; TString ] + ) ) description = "Recorded sync conflicts (auto-resolved name-binding divergences), one STRUCTURED tuple each for `dark conflicts` to format in Dark (so the display is package-testable + iterable): - `(id, location, status, resolution, localHash, incomingHash, remote)`. `status` is - NEW/acked/overridden; hashes are full (the Dark formatter shortens). `includeResolved=false` - shows only pending (the actionable view — acked/overridden drop out, the ack-to-dismiss + `(id, location, status, chosenHash, resolvedBy, localHash, incomingHash, remote)`. `status` + is NEW/acked/overridden; `chosenHash` is which content won and `resolvedBy` the policy that + picked it (e.g. `auto:last-writer-wins`); hashes are full (the Dark formatter shortens). + `includeResolved=false` shows only pending (acked/overridden drop out, the ack-to-dismiss model). Empty if none match." fn = (function @@ -474,7 +479,8 @@ let fns () : List = DString c.id, DString c.location, [ DString status - DString c.resolution + DString c.chosenHash + DString c.resolvedBy DString c.localHash DString c.incomingHash DString c.remote ] @@ -484,7 +490,12 @@ let fns () : List = (KTTuple( VT.string, VT.string, - [ VT.string; VT.string; VT.string; VT.string; VT.string ] + [ VT.string + VT.string + VT.string + VT.string + VT.string + VT.string ] )) rows } diff --git a/backend/src/LibDB/Conflicts.fs b/backend/src/LibDB/Conflicts.fs index 08a8f09d40..9d47f4bc3c 100644 --- a/backend/src/LibDB/Conflicts.fs +++ b/backend/src/LibDB/Conflicts.fs @@ -69,19 +69,6 @@ type Conflict = /// Did a deliberate override replace the auto-resolution? member this.overridden = this.status = "overridden" - /// LEGACY prose bridge — reconstructs the human resolution string the `.dark` display still parses. - /// Removed in the next step, which renders `chosenHash`/`resolvedBy` structurally (no string match). - member this.resolution = - match this.resolvedBy with - | "auto:merge-child-wins" -> "MergeChildWins" - | _ -> - if this.chosenHash = this.incomingHash then - "auto: timestamp-LWW — incoming won (newer creation)" - elif this.chosenHash = this.localHash then - "auto: timestamp-LWW — kept local (newer creation)" - else - "auto: timestamp-LWW" - // ── location parsing + kind lookup (to rebuild the structured conflict from raw hashes) ── /// Parse "owner[.modules].name" → PackageLocation (head = owner, last = name, middle = modules). diff --git a/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark b/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark index cc9a799a20..d37f71d491 100644 --- a/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark +++ b/backend/testfiles/execution/pre-s-and-s/conflicts-list.dark @@ -1,22 +1,25 @@ -// `dark conflicts` — the review surface for last-write-wins auto-resolutions. Sync NEVER blocks: a -// `name → two hashes` divergence is auto-resolved by timestamp-LWW (newest authoring time wins) and -// RECORDED so it's never silently lost — the user eventually `ack`s (agrees) or overrides. These pure -// formatters (`Darklang.Sync.Display.conflict*`) render that surface; this file pins the UX. +// `dark conflicts` — the review surface for auto-resolved sync conflicts. Sync NEVER blocks: a +// `name → two hashes` divergence is auto-resolved by policy (default last-writer-wins) and RECORDED +// so it's never silently lost — the user eventually `ack`s (agrees) or overrides. These pure +// formatters (`Darklang.Sync.Display.conflict*`) render that surface STRUCTURALLY (from the chosen +// hash + the resolving policy, no prose parsing); this file pins the UX. -// ── winner read from the recorded resolution tag ── -Darklang.Sync.Display.conflictWinner "auto: timestamp-LWW — incoming won (newer creation)" = "them" -Darklang.Sync.Display.conflictWinner "auto: timestamp-LWW — kept local (newer creation)" = "you" -Darklang.Sync.Display.conflictWinner "auto: timestamp-LWW" = "" +// ── winner read structurally: the chosen hash equals the local ("you") or incoming ("them") side ── +Darklang.Sync.Display.conflictWinner "e5f6a7b80000" "a1b2c3d4ffff" "e5f6a7b80000" = "them" +Darklang.Sync.Display.conflictWinner "a1b2c3d4ffff" "a1b2c3d4ffff" "e5f6a7b80000" = "you" +Darklang.Sync.Display.conflictWinner "ffff0000" "a1b2c3d4ffff" "e5f6a7b80000" = "" -// ── verdict always names last-write-wins (the auto-resolution is never opaque) ── -Darklang.Sync.Display.conflictVerdict "auto: timestamp-LWW — incoming won (newer creation)" = "last-write-wins → kept theirs" -Darklang.Sync.Display.conflictVerdict "auto: timestamp-LWW — kept local (newer creation)" = "last-write-wins → kept yours" -Darklang.Sync.Display.conflictVerdict "auto: timestamp-LWW" = "auto-resolved (last-write-wins)" +// ── verdict names the resolving policy + which side it kept (never opaque) ── +Darklang.Sync.Display.conflictVerdict "auto:last-writer-wins" "them" = "last-write-wins → kept theirs" +Darklang.Sync.Display.conflictVerdict "auto:last-writer-wins" "you" = "last-write-wins → kept yours" +Darklang.Sync.Display.conflictVerdict "auto:last-writer-wins" "" = "auto-resolved (last-write-wins)" +Darklang.Sync.Display.conflictVerdict "human" "you" = "override → kept yours" +Darklang.Sync.Display.conflictVerdict "auto:merge-child-wins" "them" = "merge → kept theirs" // ── hashes: ✓ marks the winning side, the arrow points to it ── -Darklang.Sync.Display.conflictHashes "auto: timestamp-LWW — incoming won (newer creation)" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 → them e5f6a7b8 ✓" -Darklang.Sync.Display.conflictHashes "auto: timestamp-LWW — kept local (newer creation)" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 ✓ ← them e5f6a7b8" -Darklang.Sync.Display.conflictHashes "auto: timestamp-LWW" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 · them e5f6a7b8" +Darklang.Sync.Display.conflictHashes "e5f6a7b80000" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 → them e5f6a7b8 ✓" +Darklang.Sync.Display.conflictHashes "a1b2c3d4ffff" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 ✓ ← them e5f6a7b8" +Darklang.Sync.Display.conflictHashes "ffff0000" "a1b2c3d4ffff" "e5f6a7b80000" = "you a1b2c3d4 · them e5f6a7b8" // short hash (full → first 8; already-short unchanged) Darklang.Sync.Display.shortHash "abcdefgh12345" = "abcdefgh" @@ -32,8 +35,9 @@ Darklang.Sync.Display.conflictReport [] false = ["✓ no pending sync conflicts Darklang.Sync.Display.conflictReport [] true = ["✓ no sync conflicts on record"] // ── one PENDING conflict → header (frames the ack model) + 2-line block + footer (the actions) ── -// the block names the winner (them, via ✓/→) and prints the exact `ack ` to dismiss it -Darklang.Sync.Display.conflictReport [(("3f8a92c1deadbeef", "Stachu.MyApp.greeting", "NEW", "auto: timestamp-LWW — incoming won (newer creation)", "a1b2c3d4ffff", "e5f6a7b80000", "desktop"))] false = [ +// 8-tuple: (id, location, status, chosenHash, resolvedBy, localHash, incomingHash, remote) +// chosen == incoming → "them" won; the block names it (✓/→) and prints the exact `ack ` +Darklang.Sync.Display.conflictReport [(("3f8a92c1deadbeef", "Stachu.MyApp.greeting", "NEW", "e5f6a7b80000", "auto:last-writer-wins", "a1b2c3d4ffff", "e5f6a7b80000", "desktop"))] false = [ "1 auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:"; ""; "⚠ Stachu.MyApp.greeting"; @@ -42,8 +46,8 @@ Darklang.Sync.Display.conflictReport [(("3f8a92c1deadbeef", "Stachu.MyApp.greeti " ack (agree) · ack all · resolve mine|theirs (override)" ] -// an acked conflict in the history view → ✓ glyph, no ack hint (already dismissed) -Darklang.Sync.Display.conflictReport [(("7c0ffee0", "Stachu.Lib.parse", "acked", "auto: timestamp-LWW — kept local (newer creation)", "11112222", "33334444", "laptop"))] true = [ +// an acked conflict in the history view → ✓ glyph, no ack hint; chosen == local → "you" won +Darklang.Sync.Display.conflictReport [(("7c0ffee0", "Stachu.Lib.parse", "acked", "11112222", "auto:last-writer-wins", "11112222", "33334444", "laptop"))] true = [ "1 auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:"; ""; "✓ Stachu.Lib.parse"; @@ -53,7 +57,7 @@ Darklang.Sync.Display.conflictReport [(("7c0ffee0", "Stachu.Lib.parse", "acked", ] // TWO pending races from different peers → header counts both, one block each (a realistic review) -Darklang.Sync.Display.conflictReport [(("aaaa1111", "Stachu.MyApp.greeting", "NEW", "auto: timestamp-LWW — incoming won (newer creation)", "11111111", "22222222", "desktop")); (("bbbb2222", "Stachu.MyApp.config", "NEW", "auto: timestamp-LWW — kept local (newer creation)", "33333333", "44444444", "laptop"))] false = [ +Darklang.Sync.Display.conflictReport [(("aaaa1111", "Stachu.MyApp.greeting", "NEW", "22222222", "auto:last-writer-wins", "11111111", "22222222", "desktop")); (("bbbb2222", "Stachu.MyApp.config", "NEW", "33333333", "auto:last-writer-wins", "33333333", "44444444", "laptop"))] false = [ "2 auto-resolved conflict(s) awaiting ack — last-write-wins kept one side, nothing lost:"; ""; "⚠ Stachu.MyApp.greeting"; diff --git a/backend/tests/Tests/BranchOps.Tests.fs b/backend/tests/Tests/BranchOps.Tests.fs index ee90a50e0d..82d0407eef 100644 --- a/backend/tests/Tests/BranchOps.Tests.fs +++ b/backend/tests/Tests/BranchOps.Tests.fs @@ -490,8 +490,8 @@ let testMergeRecordsCollisions = cHash "child (winning) hash is the incoming side" Expect.equal - conflict.resolution - "MergeChildWins" + conflict.resolvedBy + "auto:merge-child-wins" "the merge auto-resolution is recorded for review" | other -> failtest $"expected one recorded merge collision, got {List.length other}" diff --git a/backend/tests/Tests/SyncIdempotency.Tests.fs b/backend/tests/Tests/SyncIdempotency.Tests.fs index 9bd403269a..0d586526c4 100644 --- a/backend/tests/Tests/SyncIdempotency.Tests.fs +++ b/backend/tests/Tests/SyncIdempotency.Tests.fs @@ -678,9 +678,9 @@ let tests = | [ c ] -> Expect.equal c.remote remote "recorded against the peer it came from" Expect.equal c.localHash "hMine" "records what we had" - Expect.stringContains - c.resolution - "timestamp-LWW" + Expect.equal + c.resolvedBy + "auto:last-writer-wins" "tagged with the auto-resolution policy" | other -> failtest $"expected the divergence recorded once, got {List.length other}" diff --git a/packages/darklang/cli/conflicts.dark b/packages/darklang/cli/conflicts.dark index 7514f2da37..c8e5ed2fcc 100644 --- a/packages/darklang/cli/conflicts.dark +++ b/packages/darklang/cli/conflicts.dark @@ -7,11 +7,11 @@ module Darklang.Cli.Conflicts // Single home for the pmConflictsList builtin — every other // caller (the display below, the `sync status` count) routes through this one fn. Returns STRUCTURED -// rows `(id, location, status, resolution, localHash, incomingHash, remote)`; the human formatting -// lives in `Darklang.Sync.Display.conflictReport` (pure → package-testable, iterable without a rebuild). +// rows `(id, location, status, chosenHash, resolvedBy, localHash, incomingHash, remote)`; the human +// formatting lives in `Darklang.Sync.Display.conflictReport` (pure → package-testable, no rebuild). let recorded (includeResolved: Bool) - : List<(String * String * String * String * String * String * String)> = + : List<(String * String * String * String * String * String * String * String)> = Builtin.pmConflictsList includeResolved // How many conflicts are still pending (unacknowledged) — the `dark sync status` glance shows this. diff --git a/packages/darklang/sync/display.dark b/packages/darklang/sync/display.dark index b2f7cb6c33..7d8983ba36 100644 --- a/packages/darklang/sync/display.dark +++ b/packages/darklang/sync/display.dark @@ -64,28 +64,34 @@ let divergenceNote (divCount: Int64) : String = let shortHash (h: String) : String = if Stdlib.String.length h > 8L then Stdlib.String.slice h 0L 8L else h -// Which side timestamp-LWW kept, read from the recorded resolution tag: "you" | "them" | "" (unknown). -let conflictWinner (resolution: String) : String = - if Stdlib.String.contains resolution "incoming won" then - "them" - else if Stdlib.String.contains resolution "kept local" then - "you" - else - "" - -// The human verdict phrase — always names last-write-wins so the auto-resolution is never opaque. -let conflictVerdict (resolution: String) : String = - match conflictWinner resolution with - | "them" -> "last-write-wins → kept theirs" - | "you" -> "last-write-wins → kept yours" - | _ -> "auto-resolved (last-write-wins)" +// Which side won, read STRUCTURALLY: the chosen hash equals the local ("you") or incoming ("them") +// candidate. No prose parsing — `chosenHash` comes straight off the recorded resolution. +let conflictWinner (chosenHash: String) (localHash: String) (incomingHash: String) : String = + if chosenHash == incomingHash then "them" + else if chosenHash == localHash then "you" + else "" + +// The human verdict phrase — names the resolving policy + which side it kept, so the auto-resolution +// is never opaque. `resolvedBy` is the structured tag ('auto:last-writer-wins' | 'human' | …). +let conflictVerdict (resolvedBy: String) (winner: String) : String = + let label = + match resolvedBy with + | "auto:last-writer-wins" -> "last-write-wins" + | "auto:merge-child-wins" -> "merge" + | "human" -> "override" + | other -> other + + match winner with + | "them" -> label ++ " → kept theirs" + | "you" -> label ++ " → kept yours" + | _ -> "auto-resolved (" ++ label ++ ")" // you/them hashes with a ✓ on the side that won (and the arrow pointing to it). -let conflictHashes (resolution: String) (localHash: String) (incomingHash: String) : String = +let conflictHashes (chosenHash: String) (localHash: String) (incomingHash: String) : String = let you = shortHash localHash let them = shortHash incomingHash - match conflictWinner resolution with + match conflictWinner chosenHash localHash incomingHash with | "them" -> "you " ++ you ++ " → them " ++ them ++ " ✓" | "you" -> "you " ++ you ++ " ✓ ← them " ++ them | _ -> "you " ++ you ++ " · them " ++ them @@ -102,19 +108,22 @@ let conflictBlock (id: String) (location: String) (status: String) - (resolution: String) + (chosenHash: String) + (resolvedBy: String) (localHash: String) (incomingHash: String) (remote: String) : List = + let winner = conflictWinner chosenHash localHash incomingHash + let ackHint = if status == "NEW" then " · ack " ++ (shortHash id) else "" [ (conflictGlyph status) ++ " " ++ location " " - ++ (conflictVerdict resolution) + ++ (conflictVerdict resolvedBy winner) ++ " " - ++ (conflictHashes resolution localHash incomingHash) + ++ (conflictHashes chosenHash localHash incomingHash) ++ " · from " ++ remote ++ ackHint ] @@ -122,7 +131,8 @@ let conflictBlock // The full `dark conflicts` report over the structured rows from `pmConflictsList`. // Header frames the ack model; the footer lists the actions. Empty → a clean "nothing pending". let conflictReport - (conflicts: List<(String * String * String * String * String * String * String)>) + (conflicts: + List<(String * String * String * String * String * String * String * String)>) (includeResolved: Bool) : List = if Stdlib.List.isEmpty conflicts then @@ -139,8 +149,8 @@ let conflictReport let blocks = conflicts - |> Stdlib.List.map (fun (id, location, status, resolution, localHash, incomingHash, remote) -> - conflictBlock id location status resolution localHash incomingHash remote) + |> Stdlib.List.map (fun (id, location, status, chosenHash, resolvedBy, localHash, incomingHash, remote) -> + conflictBlock id location status chosenHash resolvedBy localHash incomingHash remote) |> Stdlib.List.flatten let footer = " ack (agree) · ack all · resolve mine|theirs (override)" From 52ae8fd8230f0f6eca123e6cb20af77c2927dab6 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 11:24:36 -0400 Subject: [PATCH 16/25] conflicts: resolution overlay foundation (replaces OverrideName, step 5a-i) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per the design correction, an override is not a new op — it's a synced Resolution overlaid on the op-fold. This adds that mechanism (additive; OverrideName still in place, removed in a later step): - schema: a `resolutions` table (id, location, item_type, chosen_hash, resolved_by, branch_id, at) — synced decisions that override the op-fold for a contested name. - LibDB.Resolutions: mk/record/applyToLocations/recordAndApply/list. applyToLocations re-binds the location to the chosen content, gated by the SAME timestamp-LWW applySetName uses (a resolution whose `at` is older than the current binding is skipped; an exact tie breaks by the higher hash, portably) — so it converges. The effective binding becomes: fold(ops) [LWW] → then apply resolutions per location [last-resolver-wins by `at`]. A resolution's fresh `at` is what lets a "keep mine" decision win where re-emitting the original SetName (same content hash → same op id) could not — the reason OverrideName existed. A test covers the overlay (newer resolution overrides; a stale one is skipped). Full backend suite green (9,789). --- backend/migrations/schema.sql | 23 +++ backend/src/LibDB/LibDB.fsproj | 1 + backend/src/LibDB/Resolutions.fs | 170 +++++++++++++++++++++ backend/tests/Tests/SyncScenarios.Tests.fs | 46 +++++- 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 backend/src/LibDB/Resolutions.fs diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 3375b0cd4f..53c4d80113 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -402,6 +402,29 @@ CREATE TABLE IF NOT EXISTS sync_cursors ( folded_through_rowid INTEGER NOT NULL DEFAULT 0 ); +-- Resolutions — synced decisions that OVERRIDE the op-fold for a contested name. A conflict (e.g. a +-- name diverged across instances) auto-resolves by policy (last-writer-wins), but a human (or a +-- keep-local policy) can decide differently. That decision is NOT a new op — the op log is authored +-- content/structure; a resolution is a thin overlay picking among existing candidates. The effective +-- binding is: fold(ops) [LWW] → then apply resolutions per location [last-resolver-wins by `at`]. A +-- resolution carries its own fresh `at` stamp, so it wins the same timestamp-LWW that orders bindings — +-- which is what makes a "keep mine" decision propagate where re-emitting the original SetName (same +-- content hash → same op id) could not. Synced (its own rowid cursors the wire); the implicit rowid +-- orders the sync. This REPLACES the old `OverrideName` op (an op invented only to dodge that hash +-- collision). `at` is the resolver time; `id` is a uuid carried over the wire for INSERT-OR-IGNORE dedup. +CREATE TABLE IF NOT EXISTS resolutions ( + id TEXT PRIMARY KEY, -- uuid, carried over the wire (idempotent apply) + owner TEXT NOT NULL, + modules TEXT NOT NULL, + name TEXT NOT NULL, + item_type TEXT NOT NULL, -- 'fn' | 'type' | 'value' (kind of the chosen content) + chosen_hash TEXT NOT NULL, -- the content hash this resolution binds the name to + resolved_by TEXT NOT NULL, -- 'human' | 'auto:keep-local' | … + branch_id TEXT NOT NULL, + at TEXT NOT NULL, -- resolver timestamp — the LWW stamp this binding competes on + created_at TIMESTAMP NOT NULL DEFAULT (datetime('now')) +); + -- The recorded, reviewable log of auto-resolved sync conflicts (`dark conflicts`). Recorded at pull -- time; auto-resolved by policy (default last-writer-wins) but never silently lost. Local-only, never -- synced, re-derivable by replaying the op log. Stores the STRUCTURED conflict (`conflict_blob` = a diff --git a/backend/src/LibDB/LibDB.fsproj b/backend/src/LibDB/LibDB.fsproj index 7f594828d0..ac62c248fc 100644 --- a/backend/src/LibDB/LibDB.fsproj +++ b/backend/src/LibDB/LibDB.fsproj @@ -34,6 +34,7 @@ + diff --git a/backend/src/LibDB/Resolutions.fs b/backend/src/LibDB/Resolutions.fs new file mode 100644 index 0000000000..8c0bc80e6c --- /dev/null +++ b/backend/src/LibDB/Resolutions.fs @@ -0,0 +1,170 @@ +/// Resolutions — synced decisions that OVERRIDE the op-fold for a contested name. +/// +/// A sync conflict (a name diverged across instances) auto-resolves by policy (last-writer-wins). A +/// human — or a keep-local policy — can decide differently. That decision is NOT a new op: the op log +/// is authored content/structure; a resolution is a thin overlay that picks among EXISTING candidates. +/// The effective binding is `fold(ops)` [LWW] → then apply resolutions per location [last-resolver-wins +/// by `at`]. A resolution carries its own fresh `at` stamp, so it competes in the SAME timestamp-LWW +/// that orders bindings (`applySetName`) — which is what lets a "keep mine" decision propagate where +/// re-emitting the original `SetName` (same content hash → same op id → no new rowid) could not. +/// +/// This REPLACES the old `OverrideName` op (an op invented only to dodge that hash collision). The +/// table is `resolutions` (schema.sql); a resolution's `id` is a uuid carried over the wire so peers +/// apply it idempotently (INSERT OR IGNORE). +module LibDB.Resolutions + +open System.Threading.Tasks +open FSharp.Control.Tasks + +open Prelude + +open Fumble +open LibDB.Sqlite + +module PT = LibExecution.ProgramTypes + +/// One resolution: a decision to bind `location` to `chosenHash` (a `chosen` content of `itemKind`), +/// made `by` (a policy name or "human") at time `at`. `id` is the wire-carried idempotency key. +type Resolution = + { id : string + location : PT.PackageLocation + itemKind : PT.ItemKind + chosenHash : string + resolvedBy : string + branchId : PT.BranchId + at : string } + +/// Mint a resolution for a "keep this candidate" decision. `at` is the resolver time (UTC, the same +/// stamp format the op log + locations use), which becomes the LWW stamp the binding competes on. +let mk + (location : PT.PackageLocation) + (chosen : PT.Reference) + (resolvedBy : string) + (branchId : PT.BranchId) + (at : string) + : Resolution = + let (PT.Hash h) = chosen.hash + { id = System.Guid.NewGuid() |> string + location = location + itemKind = chosen.kind + chosenHash = h + resolvedBy = resolvedBy + branchId = branchId + at = at } + +/// Persist a resolution (idempotent on `id` — a re-pulled resolution from a peer doesn't duplicate). +let record (r : Resolution) : Task = + Sql.query + """ + INSERT OR IGNORE INTO resolutions + (id, owner, modules, name, item_type, chosen_hash, resolved_by, branch_id, at) + VALUES (@id, @o, @m, @n, @t, @hash, @by, @b, @at) + """ + |> Sql.parameters + [ "id", Sql.string r.id + "o", Sql.string r.location.owner + "m", Sql.string (String.concat "." r.location.modules) + "n", Sql.string r.location.name + "t", Sql.string (r.itemKind.toString ()) + "hash", Sql.string r.chosenHash + "by", Sql.string r.resolvedBy + "b", Sql.string (string r.branchId) + "at", Sql.string r.at ] + |> Sql.executeStatementAsync + +/// Apply a resolution to the `locations` projection — the OVERLAY step. Re-binds the location to the +/// chosen content, gated by the SAME timestamp-LWW `applySetName` uses: a resolution whose `at` is +/// older than the current binding's `origin_ts` is stale and skipped (an exact tie breaks by the higher +/// content hash, portably). So every instance converges on the same winner regardless of arrival order. +let applyToLocations (r : Resolution) : Task = + task { + let modulesStr = String.concat "." r.location.modules + let itemTypeStr = r.itemKind.toString () + + let! cur = + Sql.query + """ + SELECT item_hash, origin_ts FROM locations + WHERE owner = @o AND modules = @m AND name = @n AND item_type = @t + AND branch_id = @b AND unlisted_at IS NULL + LIMIT 1 + """ + |> Sql.parameters + [ "o", Sql.string r.location.owner + "m", Sql.string modulesStr + "n", Sql.string r.location.name + "t", Sql.string itemTypeStr + "b", Sql.string (string r.branchId) ] + |> Sql.executeAsync (fun read -> + (read.string "item_hash", read.stringOrNone "origin_ts")) + + let isStale = + match cur with + | (curHash, Some curTs) :: _ when curHash <> r.chosenHash -> + r.at < curTs || (r.at = curTs && r.chosenHash < curHash) + | _ -> false + + if isStale then + return () + else + // supersede the existing binding at this path, then insert the resolved one with origin_ts = at + do! + Sql.query + """ + UPDATE locations SET unlisted_at = datetime('now') + WHERE owner = @o AND modules = @m AND name = @n AND item_type = @t + AND branch_id = @b AND unlisted_at IS NULL + """ + |> Sql.parameters + [ "o", Sql.string r.location.owner + "m", Sql.string modulesStr + "n", Sql.string r.location.name + "t", Sql.string itemTypeStr + "b", Sql.string (string r.branchId) ] + |> Sql.executeStatementAsync + do! + Sql.query + """ + INSERT INTO locations + (location_id, item_hash, owner, modules, name, item_type, branch_id, commit_hash, origin_ts) + VALUES (@lid, @hash, @o, @m, @n, @t, @b, NULL, @at) + """ + |> Sql.parameters + [ "lid", Sql.string (System.Guid.NewGuid() |> string) + "hash", Sql.string r.chosenHash + "o", Sql.string r.location.owner + "m", Sql.string modulesStr + "n", Sql.string r.location.name + "t", Sql.string itemTypeStr + "b", Sql.string (string r.branchId) + "at", Sql.string r.at ] + |> Sql.executeStatementAsync + } + +/// Record + immediately apply (the local-authoring path: a human/keep-local decision takes effect now). +let recordAndApply (r : Resolution) : Task = + task { + do! record r + do! applyToLocations r + } + +/// All resolutions, oldest first (creation order) — for inspection and (later) syncing to peers. +let list () : Task> = + Sql.query + """ + SELECT id, owner, modules, name, item_type, chosen_hash, resolved_by, branch_id, at + FROM resolutions ORDER BY rowid ASC + """ + |> Sql.executeAsync (fun read -> + { id = read.string "id" + location = + { owner = read.string "owner" + modules = + let m = read.string "modules" + if m = "" then [] else String.split "." m + name = read.string "name" } + itemKind = PT.ItemKind.fromString (read.string "item_type") + chosenHash = read.string "chosen_hash" + resolvedBy = read.string "resolved_by" + branchId = read.uuid "branch_id" + at = read.string "at" }) diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index 799a91513d..4af33e195e 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -21,6 +21,7 @@ open LibDB.Sqlite module Inserts = LibDB.Inserts module Conflicts = LibDB.Conflicts +module Resolutions = LibDB.Resolutions module Sync = LibDB.Sync module PT = LibExecution.ProgramTypes module RT = LibExecution.RuntimeTypes @@ -695,6 +696,48 @@ let private threeWayConverge = "converges to the newest ('b' @ -30) — arrival order b, a, c (same winner)" } +// The resolution OVERLAY: a resolution overrides the op-fold binding when its `at` is newer, and is +// skipped when stale — the same timestamp-LWW that orders ops, so it converges. This is the mechanism +// that replaces `OverrideName` (a synced decision over the fold, not a new op). +let private resolutionOverlayApplies = + testTask + "a resolution overrides the op-fold binding by its newer `at` (a stale one is skipped)" { + let loc : PT.PackageLocation = + { owner = "Resln"; modules = [ "R" ]; name = uniqueName "r" } + let a, b, c = hashChar 'a', hashChar 'b', hashChar 'c' + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + // op-fold binds loc -> a (authored -60) + let aOp = PT.PackageOp.SetName(loc, refOf a) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ aOp ] + (Map.ofList [ (Inserts.computeOpHash aOp, relTs -60.0) ]) + let! before = liveHash loc + Expect.equal before (Some a) "precondition: op-fold bound loc -> a" + // a resolution choosing b, stamped NEWER (now) -> overrides to b + do! + Resolutions.recordAndApply ( + Resolutions.mk loc (refOf b) "human" PT.mainBranchId (relTs 0.0) + ) + let! afterB = liveHash loc + Expect.equal + afterB + (Some b) + "the resolution (newer `at`) overrode the binding to b" + // a STALE resolution choosing c, stamped OLDER (-120) -> skipped, b stays + do! + Resolutions.recordAndApply ( + Resolutions.mk loc (refOf c) "human" PT.mainBranchId (relTs -120.0) + ) + let! afterC = liveHash loc + Expect.equal + afterC + (Some b) + "the stale resolution (older `at`) was skipped — b stays" + } + // ── all scenarios ────────────────────────────────────────────────────────────────────────────── let tests = @@ -713,4 +756,5 @@ let tests = idempotentRePull resolutionSticks lateStaleArrival - threeWayConverge ]) + threeWayConverge + resolutionOverlayApplies ]) From fe488eb678723ec6148e8f52e50fa43b948b783b Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 11:34:28 -0400 Subject: [PATCH 17/25] =?UTF-8?q?conflicts:=20resolution=20sync=20channel?= =?UTF-8?q?=20=E2=80=94=20wire=20+=20cursor=20+=20apply=20(step=205b-i)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resolution overlay now has a sync channel, mirroring the op channel so a synced decision propagates cross-machine without a new op: - sync_cursors gains resolutions_through_rowid; SyncCursors gets resolutionCursorFor/advanceResolutionCursor (a separate per-peer cursor). - Resolutions: a shared row-reader (ofRow), `since cursor` (the sender read), and applyToLocations is now idempotent (skips when already bound to chosen, so a re-pulled resolution doesn't churn locations). - Sync: encodeResolutions/decodeResolutions (version-guarded, mirroring encodeBatch) + applyRemoteResolutions (record + fold each + advance the cursor). A test ships a resolution over the wire (encode→decode) and a peer adopts it, idempotently. Additive — overrides still mint OverrideName for now; the switch + pull-path integration follow. Full backend suite green (9,790). --- backend/migrations/schema.sql | 5 +- backend/src/LibDB/Resolutions.fs | 53 +++++++++------ backend/src/LibDB/Sync.fs | 78 ++++++++++++++++++++++ backend/src/LibDB/SyncCursors.fs | 29 ++++++++ backend/tests/Tests/SyncScenarios.Tests.fs | 41 +++++++++++- 5 files changed, 184 insertions(+), 22 deletions(-) diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index 53c4d80113..ab102e141a 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -399,7 +399,10 @@ CREATE TABLE IF NOT EXISTS sync_remotes ( -- The cursor is a `package_ops` rowid (SQLite's monotonic insertion order). CREATE TABLE IF NOT EXISTS sync_cursors ( remote TEXT PRIMARY KEY, - folded_through_rowid INTEGER NOT NULL DEFAULT 0 + folded_through_rowid INTEGER NOT NULL DEFAULT 0, + -- how far we've applied this remote's RESOLUTIONS stream (a separate `resolutions` rowid cursor, + -- since resolutions sync on their own channel alongside the op log) + resolutions_through_rowid INTEGER NOT NULL DEFAULT 0 ); -- Resolutions — synced decisions that OVERRIDE the op-fold for a contested name. A conflict (e.g. a diff --git a/backend/src/LibDB/Resolutions.fs b/backend/src/LibDB/Resolutions.fs index 8c0bc80e6c..cc835fe779 100644 --- a/backend/src/LibDB/Resolutions.fs +++ b/backend/src/LibDB/Resolutions.fs @@ -98,13 +98,16 @@ let applyToLocations (r : Resolution) : Task = |> Sql.executeAsync (fun read -> (read.string "item_hash", read.stringOrNone "origin_ts")) - let isStale = + let skip = match cur with + // already bound to the chosen content — idempotent no-op (so a re-pulled resolution doesn't churn) + | (curHash, _) :: _ when curHash = r.chosenHash -> true + // stale: this resolution is older-by-creation than the live binding (exact tie → higher hash wins) | (curHash, Some curTs) :: _ when curHash <> r.chosenHash -> r.at < curTs || (r.at = curTs && r.chosenHash < curHash) | _ -> false - if isStale then + if skip then return () else // supersede the existing binding at this path, then insert the resolved one with origin_ts = at @@ -148,23 +151,33 @@ let recordAndApply (r : Resolution) : Task = do! applyToLocations r } -/// All resolutions, oldest first (creation order) — for inspection and (later) syncing to peers. +/// Read a `Resolution` off a `resolutions` row (shared by `list` + the sync read). +let ofRow (read : RowReader) : Resolution = + { id = read.string "id" + location = + { owner = read.string "owner" + modules = + let m = read.string "modules" + if m = "" then [] else String.split "." m + name = read.string "name" } + itemKind = PT.ItemKind.fromString (read.string "item_type") + chosenHash = read.string "chosen_hash" + resolvedBy = read.string "resolved_by" + branchId = read.uuid "branch_id" + at = read.string "at" } + +let private cols = + "id, owner, modules, name, item_type, chosen_hash, resolved_by, branch_id, at" + +/// All resolutions, oldest first (creation order) — for inspection. let list () : Task> = + Sql.query $"SELECT {cols} FROM resolutions ORDER BY rowid ASC" + |> Sql.executeAsync ofRow + +/// Resolutions authored with rowid > `cursor`, oldest first, paired with their rowid — the sender read +/// for a peer pull (the resolution channel's analogue of `Inserts.opsSince`). +let since (cursor : int64) : Task> = Sql.query - """ - SELECT id, owner, modules, name, item_type, chosen_hash, resolved_by, branch_id, at - FROM resolutions ORDER BY rowid ASC - """ - |> Sql.executeAsync (fun read -> - { id = read.string "id" - location = - { owner = read.string "owner" - modules = - let m = read.string "modules" - if m = "" then [] else String.split "." m - name = read.string "name" } - itemKind = PT.ItemKind.fromString (read.string "item_type") - chosenHash = read.string "chosen_hash" - resolvedBy = read.string "resolved_by" - branchId = read.uuid "branch_id" - at = read.string "at" }) + $"SELECT rowid, {cols} FROM resolutions WHERE rowid > @cursor ORDER BY rowid ASC" + |> Sql.parameters [ "cursor", Sql.int64 cursor ] + |> Sql.executeAsync (fun r -> (r.int64 "rowid", ofRow r)) diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index 11dea5ade1..1c3ebfd846 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -95,6 +95,84 @@ let decodeBatch (bytes : byte[]) : List = let opsToSend (cursor : int64) : Task> = Inserts.opsSinceCommitted cursor + +// ── the RESOLUTION channel: synced decisions ride alongside the op log (their own rowid cursor) ── + +/// Encode a resolution batch: int32 version, int32 count, then per resolution `rowid:int64` followed by +/// the fields as length-prefixed UTF-8 strings (id, owner, modules, name, item_type, chosen_hash, +/// resolved_by, branch_id, at). The resolution-channel analogue of `encodeBatch`. +let encodeResolutions (rs : List) : byte[] = + use ms = new System.IO.MemoryStream() + use w = new System.IO.BinaryWriter(ms) + w.Write(wireFormatVersion) + w.Write(List.length rs) + for (rowid, r) in rs do + w.Write(rowid) + w.Write(r.id) + w.Write(r.location.owner) + w.Write(String.concat "." r.location.modules) + w.Write(r.location.name) + w.Write(r.itemKind.toString ()) + w.Write(r.chosenHash) + w.Write(r.resolvedBy) + w.Write(string r.branchId) + w.Write(r.at) + w.Flush() + ms.ToArray() + +/// Decode a resolution wire buffer (inverse of `encodeResolutions`). +let decodeResolutions (bytes : byte[]) : List = + use ms = new System.IO.MemoryStream(bytes) + use r = new System.IO.BinaryReader(ms) + let version = r.ReadInt32() + if version <> wireFormatVersion then + Exception.raiseInternal + $"decodeResolutions: wire-format version mismatch — peer sent {version}, this instance speaks {wireFormatVersion}. Upgrade to sync." + [] + let count = r.ReadInt32() + [ for _ in 1..count do + let rowid = r.ReadInt64() + let id = r.ReadString() + let owner = r.ReadString() + let modules = r.ReadString() + let name = r.ReadString() + let itemType = r.ReadString() + let chosenHash = r.ReadString() + let resolvedBy = r.ReadString() + let branchId = r.ReadString() + let at = r.ReadString() + yield + (rowid, + ({ id = id + location = + { owner = owner + modules = (if modules = "" then [] else String.split "." modules) + name = name } + itemKind = PT.ItemKind.fromString itemType + chosenHash = chosenHash + resolvedBy = resolvedBy + branchId = System.Guid.Parse branchId + at = at } + : Resolutions.Resolution)) ] + +/// Apply a peer's resolutions: record each (idempotent by id) + fold it into `locations` (the overlay, +/// LWW by `at`), then advance this peer's resolution cursor. Returns the new cursor (max rowid applied, +/// or the existing cursor if the batch is empty). +let applyRemoteResolutions + (remote : string) + (rs : List) + : Task = + task { + match rs with + | [] -> return! SyncCursors.resolutionCursorFor remote + | _ -> + for (_, r) in rs do + do! Resolutions.recordAndApply r + let maxRowid = rs |> List.map fst |> List.max + do! SyncCursors.advanceResolutionCursor remote maxRowid + return maxRowid + } + /// Render a `PackageLocation` as the FQ "owner[.modules].name" string sync uses on the wire and in /// the conflict store — the inverse of `parseLocation`. let private formatLocation (loc : PT.PackageLocation) : string = diff --git a/backend/src/LibDB/SyncCursors.fs b/backend/src/LibDB/SyncCursors.fs index 3374127b00..1d144ed262 100644 --- a/backend/src/LibDB/SyncCursors.fs +++ b/backend/src/LibDB/SyncCursors.fs @@ -45,6 +45,35 @@ let advanceCursor (remote : string) (rowid : int64) : Task = |> Sql.parameters [ "remote", Sql.string remote; "rowid", Sql.int64 rowid ] |> Sql.executeStatementAsync +/// How far we've applied `remote`'s RESOLUTIONS stream (0 if never). A separate cursor from the op +/// cursor — resolutions sync on their own channel (their own `resolutions` rowid). +let resolutionCursorFor (remote : string) : Task = + task { + let! rows = + Sql.query + "SELECT resolutions_through_rowid AS r FROM sync_cursors WHERE remote = @remote" + |> Sql.parameters [ "remote", Sql.string remote ] + |> Sql.executeAsync (fun read -> read.int64 "r") + return + (match rows with + | r :: _ -> r + | [] -> 0L) + } + +/// Advance `remote`'s RESOLUTIONS cursor to `rowid` (monotonic upsert; never rewinds), mirroring +/// `advanceCursor`. Coexists with the op cursor on the same `sync_cursors` row. +let advanceResolutionCursor (remote : string) (rowid : int64) : Task = + Sql.query + """ + INSERT INTO sync_cursors (remote, resolutions_through_rowid) + VALUES (@remote, @rowid) + ON CONFLICT(remote) DO UPDATE SET + resolutions_through_rowid = + MAX(sync_cursors.resolutions_through_rowid, excluded.resolutions_through_rowid) + """ + |> Sql.parameters [ "remote", Sql.string remote; "rowid", Sql.int64 rowid ] + |> Sql.executeStatementAsync + /// All known peers and how far we've synced each — the `dark sync status` surface. Empty if we've /// never synced. Ordered by `remote` for a stable display. let listCursors () : Task> = diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index 4af33e195e..aa27c87dbc 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -738,6 +738,44 @@ let private resolutionOverlayApplies = "the stale resolution (older `at`) was skipped — b stays" } +// The resolution CHANNEL: a resolution rides its own wire (encode → decode) and a peer applies it to +// its binding — idempotently. This is what propagates an override cross-machine WITHOUT a new op. +let private resolutionWireRoundTripsAndApplies = + testTask + "a resolution rides the wire (encode→decode) and a peer applies it (idempotently)" { + let loc : PT.PackageLocation = + { owner = "RWire"; modules = [ "W" ]; name = uniqueName "w" } + let other, chosen = hashChar 'a', hashChar 'b' + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + // the receiver currently has loc -> other (older) + let otherOp = PT.PackageOp.SetName(loc, refOf other) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ otherOp ] + (Map.ofList [ (Inserts.computeOpHash otherOp, relTs -60.0) ]) + // a sender authored a resolution choosing `chosen`; serialize + ship it + let resn = Resolutions.mk loc (refOf chosen) "human" PT.mainBranchId (relTs 0.0) + let decoded = Sync.decodeResolutions (Sync.encodeResolutions [ (1L, resn) ]) + Expect.equal (List.length decoded) 1 "one resolution survived the wire" + let remote = uniqueName "rwire" + let! cursor = Sync.applyRemoteResolutions remote decoded + Expect.equal cursor 1L "cursor advanced to the applied rowid" + let! after = liveHash loc + Expect.equal + after + (Some chosen) + "the peer adopted the resolution's chosen binding" + // idempotent re-apply: same resolution again is a no-op + let! _ = Sync.applyRemoteResolutions remote decoded + let! after2 = liveHash loc + Expect.equal + after2 + (Some chosen) + "re-applying the resolution is a no-op — chosen stays" + } + // ── all scenarios ────────────────────────────────────────────────────────────────────────────── let tests = @@ -757,4 +795,5 @@ let tests = resolutionSticks lateStaleArrival threeWayConverge - resolutionOverlayApplies ]) + resolutionOverlayApplies + resolutionWireRoundTripsAndApplies ]) From f75e288af94067e507a09a4b2600e2ed8d8d5c1a Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 11:46:35 -0400 Subject: [PATCH 18/25] conflicts: pull a peer's resolutions over the file sync (step 5b-ii) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pullFromFile now also pulls the resolution channel: pullResolutionsFromStore reads the peer's `resolutions` table above our resolution-cursor, records + folds each into locations (the overlay), and advances the cursor — alongside the op + blob pulls. So `dark sync pull ` propagates override decisions cross-machine. Tolerant of a peer with no `resolutions` table (older store / minimal test db → nothing to pull). A test builds a peer db whose only content is one resolution and asserts pullFromFile applies it to this instance's binding (the op/blob channels empty, so it isolates the resolution pull). Full backend suite green (9,791; a flaky cross-list global-connection contention cleared on re-run — unrelated to this change). --- backend/src/LibDB/Sync.fs | 55 +++++++++++++++++- backend/tests/Tests/SyncIdempotency.Tests.fs | 60 ++++++++++++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index 1c3ebfd846..696ef325e0 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -627,10 +627,58 @@ let private pullBlobsFromStore (sourceConnStr : string) : Task = return fetched } +/// Pull a file peer's RESOLUTIONS (the synced override decisions) into this instance, on the +/// resolution channel: read the rows above our resolution-cursor for this peer, record + fold each +/// (the overlay, LWW by `at`), and advance the cursor. Mirrors `pullBlobsFromStore` for resolutions. +/// Tolerates a peer with no `resolutions` table (an older store / a minimal test db) → nothing to pull. +let private pullResolutionsFromStore + (sourcePath : string) + (sourceConnStr : string) + : Task = + task { + use source = new Microsoft.Data.Sqlite.SqliteConnection(sourceConnStr) + source.Open() + + use existsCmd = source.CreateCommand() + existsCmd.CommandText <- + "SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = 'resolutions'" + if isNull (existsCmd.ExecuteScalar()) then + return 0 + else + let! cursor = SyncCursors.resolutionCursorFor sourcePath + use cmd = source.CreateCommand() + cmd.CommandText <- + "SELECT rowid, id, owner, modules, name, item_type, chosen_hash, resolved_by, branch_id, at " + + "FROM resolutions WHERE rowid > $cursor ORDER BY rowid ASC" + cmd.Parameters.AddWithValue("$cursor", cursor) + |> ignore + let rows = ResizeArray() + use reader = cmd.ExecuteReader() + while reader.Read() do + let modules = reader.GetString 3 + rows.Add( + reader.GetInt64 0, + { id = reader.GetString 1 + location = + { owner = reader.GetString 2 + modules = (if modules = "" then [] else String.split "." modules) + name = reader.GetString 4 } + itemKind = PT.ItemKind.fromString (reader.GetString 5) + chosenHash = reader.GetString 6 + resolvedBy = reader.GetString 7 + branchId = System.Guid.Parse(reader.GetString 8) + at = reader.GetString 9 } + ) + reader.Close() + let rowList = List.ofSeq rows + let! _ = applyRemoteResolutions sourcePath rowList + return List.length rowList + } + /// `dark sync pull `, F# half: resume from the stored cursor for this peer, -/// `pull` its new ops into the local instance, fetch any content blobs we're missing, then -/// persist the advanced cursor — so the next pull resumes where this left off. The peer key is -/// the source path. Returns the new cursor. +/// `pull` its new ops into the local instance, fetch any content blobs we're missing, apply any +/// resolutions it has, then persist the advanced cursor — so the next pull resumes where this left +/// off. The peer key is the source path. Returns the new cursor. let pullFromFile (sourcePath : string) : Task> = @@ -639,6 +687,7 @@ let pullFromFile let! cursor = SyncCursors.cursorFor sourcePath let! (newCursor, divergences) = pull connStr cursor let! _blobsFetched = pullBlobsFromStore connStr + let! _resolutionsApplied = pullResolutionsFromStore sourcePath connStr do! SyncCursors.advanceCursor sourcePath newCursor // auto-resolved (last-writer-wins) — record so it's reviewable, not silently lost do! recordDivergences sourcePath divergences diff --git a/backend/tests/Tests/SyncIdempotency.Tests.fs b/backend/tests/Tests/SyncIdempotency.Tests.fs index 0d586526c4..2644aec4f8 100644 --- a/backend/tests/Tests/SyncIdempotency.Tests.fs +++ b/backend/tests/Tests/SyncIdempotency.Tests.fs @@ -484,6 +484,66 @@ let tests = if System.IO.File.Exists srcPath then System.IO.File.Delete srcPath } + // The RESOLUTION channel end-to-end over the file pull: a peer's `resolutions` table carries an + // override decision; `pullFromFile` applies it to this instance's `locations` (the overlay). The + // op/blob channels are empty here, so this isolates the resolution pull. + testTask + "Sync.pullFromFile applies a peer's resolutions (the override channel)" { + let srcPath = + $"{System.IO.Path.GetTempPath()}sync-resn-src-{System.Guid.NewGuid()}.db" + let srcConn = + $"Data Source={srcPath};Mode=ReadWriteCreate;Foreign Keys=False" + let execOn (sql : string) (ps : (string * obj) list) : unit = + use conn = new SqliteConnection(srcConn) + conn.Open() + use cmd = conn.CreateCommand() + cmd.CommandText <- sql + ps + |> List.iter (fun (k, v) -> + cmd.Parameters.AddWithValue(k, v) |> ignore) + cmd.ExecuteNonQuery() |> ignore + conn.Close() + try + // minimal peer store: empty op/commit/blob tables (those channels are no-ops) + one resolution + execOn + "CREATE TABLE package_ops (id TEXT NOT NULL, op_blob BLOB NOT NULL, branch_id TEXT NOT NULL, commit_hash TEXT, origin_ts TEXT, PRIMARY KEY (id, branch_id))" + [] + execOn "CREATE TABLE commits (hash TEXT PRIMARY KEY)" [] + execOn + "CREATE TABLE package_blobs (hash TEXT PRIMARY KEY, length INTEGER NOT NULL, bytes BLOB NOT NULL)" + [] + execOn + "CREATE TABLE resolutions (id TEXT PRIMARY KEY, owner TEXT, modules TEXT, name TEXT, item_type TEXT, chosen_hash TEXT, resolved_by TEXT, branch_id TEXT, at TEXT, created_at TEXT)" + [] + let nm = "resnpull" + System.Guid.NewGuid().ToString().Replace("-", "") + let chosen = System.String('b', 64) + let at = System.DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffZ") + execOn + "INSERT INTO resolutions (id, owner, modules, name, item_type, chosen_hash, resolved_by, branch_id, at) VALUES ($id, $o, $m, $n, $t, $h, $by, $b, $at)" + [ "$id", box (System.Guid.NewGuid() |> string) + "$o", box "ResnPull" + "$m", box "R" + "$n", box nm + "$t", box "fn" + "$h", box chosen + "$by", box "human" + "$b", box (string PT.mainBranchId) + "$at", box at ] + let! _ = Sync.pullFromFile srcPath + let! bound = + Sql.query + "SELECT item_hash FROM locations WHERE owner=@o AND modules=@m AND name=@n AND unlisted_at IS NULL LIMIT 1" + |> Sql.parameters + [ "o", Sql.string "ResnPull"; "m", Sql.string "R"; "n", Sql.string nm ] + |> Sql.executeAsync (fun read -> read.string "item_hash") + Expect.equal + (List.tryHead bound) + (Some chosen) + "pull applied the peer's resolution → the location is bound to chosen" + finally + if System.IO.File.Exists srcPath then System.IO.File.Delete srcPath + } + // Sync wire codec (the HTTP transport): an op batch round-trips through encode → decode // byte-for-byte. The HTTP body reuses the existing op_blob bytes, so the wire carries exactly // what the file-based pull reads; only the carrier differs. From 327d475b9c3a950bcad5100c3e461b229f0be141 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 11:56:39 -0400 Subject: [PATCH 19/25] conflicts: overrides become synced Resolutions, not OverrideName ops (step 5a-ii) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit overrideBinding (the keep-local policy + the human 'mine' override) now records a Resolution and applies it via the overlay, instead of minting an OverrideName op: - Sync.overrideBinding: writes Resolutions.recordAndApply with a fresh `at` and the resolver tag ('auto:keep-local' or 'human'). No op; the decision rides the resolution channel and wins timestamp-LWW. - Seed.rebuildProjections: after the op-fold, Resolutions.applyAll re-applies every resolution over the rebuilt locations — so overrides survive a projection refold (the op log alone doesn't carry them). resolutions isn't a projection table, so a rebuild doesn't clear it. - The keep-local regression test now asserts NO op is appended + a resolution choosing our hash is recorded. CLI copy updated: a resolution syncs on the next pull (no commit). OverrideName is now unused (removed next). Full backend suite green (9,791). --- backend/src/LibDB/Resolutions.fs | 9 +++ backend/src/LibDB/Seed.fs | 7 ++- backend/src/LibDB/Sync.fs | 45 ++++++--------- backend/tests/Tests/SyncScenarios.Tests.fs | 66 ++++++++++------------ packages/darklang/cli/conflicts.dark | 4 +- 5 files changed, 63 insertions(+), 68 deletions(-) diff --git a/backend/src/LibDB/Resolutions.fs b/backend/src/LibDB/Resolutions.fs index cc835fe779..e0cec8589e 100644 --- a/backend/src/LibDB/Resolutions.fs +++ b/backend/src/LibDB/Resolutions.fs @@ -181,3 +181,12 @@ let since (cursor : int64) : Task> = $"SELECT rowid, {cols} FROM resolutions WHERE rowid > @cursor ORDER BY rowid ASC" |> Sql.parameters [ "cursor", Sql.int64 cursor ] |> Sql.executeAsync (fun r -> (r.int64 "rowid", ofRow r)) + +/// Re-apply every recorded resolution to `locations`, in creation (rowid) order — the overlay run after +/// a projection refold rebuilds `locations` from the op log alone, so overrides survive the rebuild. +let applyAll () : Task = + task { + let! all = list () + for r in all do + do! applyToLocations r + } diff --git a/backend/src/LibDB/Seed.fs b/backend/src/LibDB/Seed.fs index c292501dd6..26e90ccb69 100644 --- a/backend/src/LibDB/Seed.fs +++ b/backend/src/LibDB/Seed.fs @@ -337,7 +337,12 @@ let rebuildProjections () : Task = // 2. mark all ops unapplied so the fold reprocesses the whole log do! Sql.query "UPDATE package_ops SET applied = 0" |> Sql.executeStatementAsync // 3. re-fold ops -> projections via the existing playback path - return! applyUnappliedOps () + let! folded = applyUnappliedOps () + // 4. re-apply resolutions OVER the rebuilt op-fold — they overlay the contested bindings, and the + // op log alone doesn't carry them (a resolution isn't an op), so a refold would otherwise lose + // every override. Idempotent + LWW-gated, so this is safe to run after every rebuild. + do! Resolutions.applyAll () + return folded } diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index 696ef325e0..b0a39a39c5 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -376,36 +376,23 @@ let private kindOfHash (hash : string) : Task> = /// Re-bind a location to OUR hash as a deliberate override ("keep mine"). Shared by the automatic /// keep-local policy (`routeDivergences`) and the human 'mine' override (`resolveConflict`). /// -/// It emits a DISTINCT `OverrideName` op carrying a fresh resolver stamp — so its content hash (and thus -/// its op id and commit-rowid) differs from the original `SetName`. That distinctness is the whole point: -/// sync is incremental by commit-rowid, so re-stamping the original op IN PLACE (same rowid) would never -/// reach a peer that already pulled it. A fresh `OverrideName` op rides the next incremental pull, and its -/// resolver stamp (the newest `origin_ts`) wins timestamp-LWW — so peers actually adopt our choice. -/// -/// The op is inserted as WIP (uncommitted) and folded immediately (re-binds locally now); the user's next -/// `commit` ships it to peers. +/// It records a `Resolution` — a synced decision that overrides the op-fold for this location — and +/// applies it immediately (re-binds locally now). The resolution's fresh `at` stamp is the newest, so it +/// wins timestamp-LWW; and because resolutions sync on their own channel (rowid-cursored, not gated by +/// commit), a re-pulling peer re-adopts our choice. This is what the old `OverrideName` op did, without +/// minting an op to dodge the content-hash collision — the override is a resolution, not new content. let private overrideBinding (branchId : PT.BranchId) (loc : PT.PackageLocation) (target : PT.Reference) + (resolvedBy : string) : Task = - task { - let resolvedAt = System.DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffZ") - let overrideOp = PT.PackageOp.OverrideName(loc, target, resolvedAt) - // stamp origin_ts = the resolver time (newest) so playback's timestamp-LWW re-activates this binding - let opId = Inserts.computeOpHash overrideOp - let! _inserted = - Inserts.insertAndApplyOpsWithOrigin - branchId - None - [ overrideOp ] - (Map.ofList [ (opId, resolvedAt) ]) - return () - } + let at = System.DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffZ") + Resolutions.recordAndApply (Resolutions.mk loc target resolvedBy branchId at) /// A sync policy's verdict on a `SyncConflict`: accept the convergent last-writer-wins outcome the /// fold already applied, or override the location to a specific reference. `OverrideTo` the LOCAL -/// reference is the "keep mine" move — it mints a propagating `OverrideName` op; `OverrideTo` the +/// reference is the "keep mine" move — it records a propagating `Resolution`; `OverrideTo` the /// incoming (or anything already applied) is a no-op. type SyncPolicyChoice = | AcceptLww @@ -426,8 +413,8 @@ let defaultSyncPolicy : SyncPolicy = fun _conflict _ctx -> AcceptLww /// blocks); HERE it becomes a first-class `PT.SyncConflict.Divergence` the policy resolves — /// - `AcceptLww` (the default) → no reconciling op: the divergence stays surfaced and the /// timestamp-LWW outcome the fold already applied stands. Behaviorally unchanged. -/// - `OverrideTo localRef` → KEEP LOCAL: emit + apply a reconciling `OverrideName` re-binding the -/// location to our hash (a fresh op that also propagates the decision to peers, like a human +/// - `OverrideTo localRef` → KEEP LOCAL: record + apply a reconciling `Resolution` re-binding the +/// location to our hash (a synced decision that also propagates to peers, like a human /// override), and mark the recorded conflict overridden. /// - `OverrideTo` the incoming ref / anything else → no-op: that bind already applied. /// `branchId` is the branch the reconcile op is written to (the receiver's current branch — sync @@ -455,9 +442,9 @@ let routeDivergences match policy conflict callCtx with | OverrideTo target when target = localRef -> // keep local: re-bind the location to our hash via `overrideBinding` (the same move a - // human 'mine' override makes in `resolveConflict`) — a fresh `OverrideName` op that rides - // sync so peers re-adopt our hash too — and mark the recorded conflict overridden. - do! overrideBinding branchId loc target + // human 'mine' override makes in `resolveConflict`) — a synced `Resolution` that rides the + // resolution channel so peers re-adopt our hash too — and mark the recorded conflict overridden. + do! overrideBinding branchId loc target "auto:keep-local" do! Conflicts.markOverriddenByLocation remote location reconciled <- reconciled + 1 | AcceptLww @@ -765,7 +752,7 @@ let resolveConflict (conflictId : string) (keepMine : bool) : Task = return true else // "mine" — re-bind the location to our hash. Parse the FQ "owner[.modules].name", read the - // binding's kind + branch from `locations`, then emit an `OverrideName` op for our hash. A human + // binding's kind + branch from `locations`, then record a `Resolution` for our hash. A human // override is the LATEST decision, so `overrideBinding` makes it win timestamp-LWW // (last-resolver-wins) and — as a distinct op with a fresh rowid — rides sync so peers re-adopt it. match parseLocation c.location with @@ -787,7 +774,7 @@ let resolveConflict (conflictId : string) (keepMine : bool) : Task = | (itemType, branchId) :: _ -> let kind = PT.ItemKind.fromString itemType let target = PT.Reference.fromHashAndKind (PT.Hash c.localHash, kind) - do! overrideBinding branchId loc target + do! overrideBinding branchId loc target "human" do! Conflicts.markOverridden c.id return true | [] -> return false // the location no longer exists locally diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index aa27c87dbc..c7b52883de 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -365,53 +365,47 @@ let private multiDivergenceBatch = Expect.equal w2 (Some incoming2) "second location converged to its LWW winner" } -// Regression: a keep-local override must APPEND a distinct, newer op (a fresh `OverrideName` with its -// own rowid), not re-stamp the existing op in place. Incremental sync is by commit-rowid, so a re-stamp -// (same rowid) never reaches a peer that already pulled the op — the binding stays diverged. A fresh op -// above the peer's cursor DOES ride the next pull, and its newest `origin_ts` wins timestamp-LWW. This -// asserts both the local effect (our hash wins) AND the propagation property (a new op, above the prior -// max rowid, carrying the newest stamp). -let private keepLocalAppendsPropagableOverride = - testTask "keep-local override appends a distinct, newer op so it can propagate" { +// Regression: a keep-local override does NOT append an op — it records a synced `Resolution` (a fresh +// decision over the op-fold). The resolution re-binds OUR hash locally now AND carries the newest `at`, +// so it rides the resolution channel + wins timestamp-LWW — a re-pulling peer re-adopts our hash. This +// asserts the local effect (our hash wins), that NO op was appended (it's a resolution, not new +// content), and the propagation property (a recorded resolution choosing our hash). +let private keepLocalRecordsPropagableResolution = + testTask "keep-local override records a propagable resolution (no new op)" { let loc : PT.PackageLocation = { owner = "Scenario"; modules = [ "Prop" ]; name = uniqueName "p" } let local, incoming = hashChar 'a', hashChar 'b' let remote = uniqueName "rprop" let! divs = setupDivergentPull loc PT.ItemKind.Fn local -120.0 incoming -60.0 remote - let maxRowid () : Task = - Sql.query "SELECT COALESCE(MAX(rowid), 0) AS m FROM package_ops" + let opCount () : Task = + Sql.query "SELECT COUNT(*) AS m FROM package_ops" |> Sql.executeRowAsync (fun read -> read.int64 "m") - let stampOf sql ps : Task = - Sql.query sql - |> Sql.parameters ps - |> Sql.executeRowAsync (fun read -> read.string "origin_ts") - let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incoming, PT.ItemKind.Fn) - let incomingOpId = Inserts.computeOpHash (PT.PackageOp.SetName(loc, incomingRef)) - let! cursorBefore = maxRowid () - let! incomingStamp = - stampOf - "SELECT origin_ts FROM package_ops WHERE id = @id LIMIT 1" - [ "id", Sql.uuid incomingOpId ] + let! opsBefore = opCount () // keep-local override (the same path the human 'mine' override uses) let! _ = Sync.routeDivergences keepLocalPolicy callCtx remote PT.mainBranchId divs - // 1. our hash is the live binding + // 1. our hash is the live binding (the overlay re-bound it) let! winner = liveHash loc Expect.equal winner (Some local) "keep-local: our hash is the live binding" - // 2. a NEW op was appended above any synced peer's cursor (an in-place re-stamp would add no row) - let! cursorAfter = maxRowid () - Expect.isGreaterThan - cursorAfter - cursorBefore - "the override appended a new op above the peer's cursor — so it rides the next incremental pull" - // 3. that newest op carries the newest origin_ts → a re-pulling peer adopts our hash by timestamp-LWW - let! overrideStamp = - stampOf "SELECT origin_ts FROM package_ops ORDER BY rowid DESC LIMIT 1" [] - Expect.isGreaterThan - overrideStamp - incomingStamp - "the override op's origin_ts is the newest — a re-pulling peer re-adopts our hash" + // 2. NO new op was appended — the override is a resolution, not an op + let! opsAfter = opCount () + Expect.equal + opsAfter + opsBefore + "keep-local appended no op (the override is a synced resolution, not new content)" + // 3. a resolution choosing our hash was recorded — that's what rides the resolution channel + let! resns = Resolutions.list () + let mine = + resns + |> List.filter (fun (r : Resolutions.Resolution) -> + r.location.owner = loc.owner + && r.location.name = loc.name + && r.chosenHash = local) + Expect.equal + (List.length mine) + 1 + "a resolution choosing our hash was recorded (rides the resolution channel)" } // An override only propagates if it survives the wire: a peer DESERIALIZES the op_blob (read tag 8) and @@ -786,7 +780,7 @@ let tests = @ [ emptyConverged sameMsTie multiDivergenceBatch - keepLocalAppendsPropagableOverride + keepLocalRecordsPropagableResolution overrideOpRoundTrips syncConflictRoundTrips overridePropagatesToPeer diff --git a/packages/darklang/cli/conflicts.dark b/packages/darklang/cli/conflicts.dark index c8e5ed2fcc..f10fbd3924 100644 --- a/packages/darklang/cli/conflicts.dark +++ b/packages/darklang/cli/conflicts.dark @@ -65,7 +65,7 @@ let execute (state: AppState) (args: List) : AppState = if Builtin.pmConflictResolve id keepMine then let what = if keepMine then - "kept YOUR version — re-bound here (a WIP override; commit to share it with peers)" + "kept YOUR version — re-bound here (a resolution; it syncs to peers on the next pull)" else "kept the incoming version" @@ -104,7 +104,7 @@ let help (state: AppState) : AppState = " list all Show every recorded conflict, including acked/overridden history" " ack Acknowledge — 'the auto-resolution was right' (the common case)" " ack all Acknowledge ALL pending conflicts at once (bulk 'all were right')" - " resolve mine Override — re-bind to YOUR version (a WIP override; commit to share it)" + " resolve mine Override — re-bind to YOUR version (a resolution that syncs to peers)" " resolve theirs Override — keep the incoming version (what already won)" "" " is the short id from `conflicts list` (a unique prefix is fine)." ] From c33248f7388fece67f55a1317f5f2328faf36cd5 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 12:04:03 -0400 Subject: [PATCH 20/25] conflicts: delete the now-dead OverrideName op (step 5c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nothing emits OverrideName anymore (overrides are synced Resolutions), so remove it. A clean F#-only deletion — it already surfaced to the Dark side AS SetName (so the Dark PackageOp type is unchanged, no hash ripple), and under the clean break no stored op is an OverrideName: - ProgramTypes: drop the PackageOp case. - Binary serializer: drop tag 8 (write + read). Other ops' tags/hashes unchanged. - ProgramTypesToDarkTypes / PackageOpPlayback / PackageManager / Seed: drop the arms (each folded/mapped exactly like SetName). - SyncScenarios.Tests: remove overrideOpRoundTrips + overridePropagatesToPeer (they constructed OverrideName ops); the resolution overlay/wire/file-pull tests cover the replacement end to end. The op log is now purely authored content/structure; overrides live in the resolution overlay. Full backend suite green (9,789). --- backend/src/LibDB/Conflicts.fs | 2 +- backend/src/LibDB/PackageManager.fs | 6 -- backend/src/LibDB/PackageOpPlayback.fs | 17 +---- backend/src/LibDB/Seed.fs | 3 - backend/src/LibExecution/ProgramTypes.fs | 11 ---- .../LibExecution/ProgramTypesToDarkTypes.fs | 5 -- .../Binary/Serializers/PT/PackageOp.fs | 10 --- backend/tests/Tests/SyncScenarios.Tests.fs | 65 ------------------- 8 files changed, 2 insertions(+), 117 deletions(-) diff --git a/backend/src/LibDB/Conflicts.fs b/backend/src/LibDB/Conflicts.fs index 9d47f4bc3c..a575722dd2 100644 --- a/backend/src/LibDB/Conflicts.fs +++ b/backend/src/LibDB/Conflicts.fs @@ -33,7 +33,7 @@ module Serialize = LibSerialization.Binary.Serialization /// One recorded conflict. `conflict` is the deserialized `SyncConflict` (its candidates); `chosenHash` /// + `resolvedBy` are the resolution (which content won, and the policy/human that chose it); -/// `overrideOpId` is the `OverrideName` op a deliberate override minted (None until then); `status` is +/// `overrideOpId` is the id of the `Resolution` a deliberate override records (None until wired); `status` is /// the review lifecycle (`auto-resolved` | `acknowledged` | `overridden`). type Conflict = { id : string diff --git a/backend/src/LibDB/PackageManager.fs b/backend/src/LibDB/PackageManager.fs index 4017bfdc38..01bf3e8198 100644 --- a/backend/src/LibDB/PackageManager.fs +++ b/backend/src/LibDB/PackageManager.fs @@ -124,12 +124,6 @@ let createInMemory (ops : List) : PT.PackageManager = | PT.PackageType h -> typeLocations.Add(loc, h) | PT.PackageValue h -> valueLocations.Add(loc, h) | PT.PackageFn h -> fnLocations.Add(loc, h) - | PT.PackageOp.OverrideName(loc, target, _) -> - // an override binds a name just like SetName - match target with - | PT.PackageType h -> typeLocations.Add(loc, h) - | PT.PackageValue h -> valueLocations.Add(loc, h) - | PT.PackageFn h -> fnLocations.Add(loc, h) | PT.PackageOp.AddType _ | PT.PackageOp.AddValue _ | PT.PackageOp.AddFn _ -> () diff --git a/backend/src/LibDB/PackageOpPlayback.fs b/backend/src/LibDB/PackageOpPlayback.fs index 76fb8b24ec..0bba161510 100644 --- a/backend/src/LibDB/PackageOpPlayback.fs +++ b/backend/src/LibDB/PackageOpPlayback.fs @@ -262,8 +262,7 @@ let private applyAddFn (ctx : Ctx) (fn : PT.PackageFn.PackageFn) : Task = /// isRename = true when this SetName is a standalone rename (not paired with Add*), /// meaning old locations for the same hash should be deprecated. /// The op's id as stored in `package_ops` (UUID derived from the content hash) — used to read the op's -/// own `origin_ts` back. Must be computed from the ACTUAL op (`SetName` vs `OverrideName` hash to -/// different ids), so an override reads its own resolver stamp, not the original SetName's stale one. +/// own `origin_ts` back (so `applySetName` orders the binding by this op's creation time). let private opIdOf (op : PT.PackageOp) : System.Guid = let (Hash h) = LibSerialization.Hashing.Hashing.computeOpHash op System.Guid(System.Convert.FromHexString(h)[0..15]) @@ -632,20 +631,6 @@ let private applyOp loc target.kind (opIdOf op) - | PT.PackageOp.OverrideName(loc, target, _resolvedAt) -> - // Folds exactly like SetName — re-bind the location to `target`. The op's `origin_ts` (a fresh - // resolver stamp) is the newest, so the timestamp-LWW playback re-activates this binding. - let isRename = not (Set.contains target.hash addedHashes) - do! - applySetName - ctx - branchId - commitHash - isRename - target.hash - loc - target.kind - (opIdOf op) | PT.PackageOp.Deprecate(target, kind, message) -> do! applyDeprecate ctx branchId commitHash target kind message | PT.PackageOp.Undeprecate target -> diff --git a/backend/src/LibDB/Seed.fs b/backend/src/LibDB/Seed.fs index 26e90ccb69..1fd27f88d2 100644 --- a/backend/src/LibDB/Seed.fs +++ b/backend/src/LibDB/Seed.fs @@ -285,9 +285,6 @@ let opKindName (op : PT.PackageOp) : string = | PT.PackageOp.AddValue _ -> "AddValue" | PT.PackageOp.AddFn _ -> "AddFn" | PT.PackageOp.SetName _ -> "SetName" - // An override folds exactly like SetName (same `locations` projection, same rename detection), so for - // the dirty-tracking refold it IS a SetName — this keeps it in SetName's dirtied-set + Add* handling. - | PT.PackageOp.OverrideName _ -> "SetName" | PT.PackageOp.Deprecate _ -> "Deprecate" | PT.PackageOp.Undeprecate _ -> "Undeprecate" | PT.PackageOp.PropagateUpdate _ -> "PropagateUpdate" diff --git a/backend/src/LibExecution/ProgramTypes.fs b/backend/src/LibExecution/ProgramTypes.fs index 92c26f6203..550a4a8620 100644 --- a/backend/src/LibExecution/ProgramTypes.fs +++ b/backend/src/LibExecution/ProgramTypes.fs @@ -673,17 +673,6 @@ type PackageOp = // branch-scoped FQ path. | SetName of location : PackageLocation * target : Reference - // Conflict OVERRIDE: re-bind a name to a content we already have, as a deliberate resolution - // ("keep mine"). Folds exactly like SetName, but it is a DISTINCT op carrying a `resolvedAt` - // resolver stamp — so its content hash differs from the original SetName, giving it a fresh op id - // and a fresh commit-rowid. That's what lets an override propagate: incremental sync is by - // commit-rowid, so re-stamping the original op in place (same rowid) never reaches a peer that - // already pulled it; a fresh OverrideName op rides the next pull and wins last-writer-wins. - | OverrideName of - location : PackageLocation * - target : Reference * - resolvedAt : string - // Deprecation: author-initiated annotation on a specific content hash. // // Future: implicit deprecations as Constraints. diff --git a/backend/src/LibExecution/ProgramTypesToDarkTypes.fs b/backend/src/LibExecution/ProgramTypesToDarkTypes.fs index 71b7204d29..5684fcd0cd 100644 --- a/backend/src/LibExecution/ProgramTypesToDarkTypes.fs +++ b/backend/src/LibExecution/ProgramTypesToDarkTypes.fs @@ -1637,11 +1637,6 @@ module PackageOp = | PT.PackageOp.AddFn f -> "AddFn", [ PackageFn.toDT f ] | PT.PackageOp.SetName(loc, target) -> "SetName", [ PackageLocation.toDT loc; Reference.toDT target ] - // An override binds a name like SetName; Dark code never authors one (it's created in F# by the - // conflict resolver), so it surfaces to the Dark side as a plain SetName — the `resolvedAt` stamp - // exists only to distinguish the op for sync and isn't part of the binding's meaning. - | PT.PackageOp.OverrideName(loc, target, _resolvedAt) -> - "SetName", [ PackageLocation.toDT loc; Reference.toDT target ] | PT.PackageOp.Deprecate(target, kind, message) -> "Deprecate", [ Reference.toDT target; DeprecationKind.toDT kind; DString message ] diff --git a/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs b/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs index 741205952c..44176c5e97 100644 --- a/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs +++ b/backend/src/LibSerialization/Binary/Serializers/PT/PackageOp.fs @@ -83,11 +83,6 @@ let write (w : BinaryWriter) (op : PackageOp) : unit = w.Write(3uy) PackageLocation.write w location Reference.write w target - | PackageOp.OverrideName(location, target, resolvedAt) -> - w.Write(8uy) - PackageLocation.write w location - Reference.write w target - String.write w resolvedAt | PackageOp.Deprecate(target, kind, message) -> w.Write(4uy) Reference.write w target @@ -135,11 +130,6 @@ let read (r : BinaryReader) : PackageOp = let location = PackageLocation.read r let target = Reference.read r PackageOp.SetName(location, target) - | 8uy -> - let location = PackageLocation.read r - let target = Reference.read r - let resolvedAt = String.read r - PackageOp.OverrideName(location, target, resolvedAt) | 4uy -> let target = Reference.read r let kind = DeprecationKind.read r diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index c7b52883de..8101a3f0ad 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -408,24 +408,6 @@ let private keepLocalRecordsPropagableResolution = "a resolution choosing our hash was recorded (rides the resolution channel)" } -// An override only propagates if it survives the wire: a peer DESERIALIZES the op_blob (read tag 8) and -// folds it. The keep-local path only ever serializes + folds the in-memory op, so exercise the read path -// directly — `OverrideName` (with its `resolvedAt`) must round-trip byte-for-byte through the op codec. -let private overrideOpRoundTrips = - test "OverrideName round-trips through the op serializer (rides the wire — tag 8)" { - let loc : PT.PackageLocation = { owner = "RT"; modules = [ "M" ]; name = "x" } - let target = PT.Reference.fromHashAndKind (PT.Hash(hashChar 'a'), PT.ItemKind.Fn) - let op = PT.PackageOp.OverrideName(loc, target, "2026-06-11T12:34:56.789Z") - let id = Inserts.computeOpHash op - let blob = LibSerialization.Binary.Serialization.PT.PackageOp.serialize id op - let decoded = - LibSerialization.Binary.Serialization.PT.PackageOp.deserialize id blob - Expect.equal - decoded - op - "OverrideName survives binary serialize → deserialize unchanged" - } - // A `SyncConflict` and its `DivergenceResolution` must survive the binary codec — they're persisted // (the recorded conflict's blob) and may travel, so a tag-byte round-trip is the contract. Covers both // `ResolvedBy` cases (`Auto policy` and `Human`) since they have distinct tags. @@ -467,51 +449,6 @@ let private syncConflictRoundTrips = Expect.equal hDecoded resHuman "DivergenceResolution(Human) survives round-trip" } -// End-to-end, the RECEIVER half: a peer currently bound to the incoming hash (it already pulled the -// race) receives the OTHER machine's committed override op over the normal receive path and must ADOPT -// our hash. This is the actual cross-machine propagation — the headline override-propagation claim — exercised through -// `applyRemoteOps` (the same path an HTTP/file pull uses). An `OverrideName` is NOT re-flagged as a new -// divergence (it isn't a SetName), so it just folds, and its newer stamp wins timestamp-LWW. -let private overridePropagatesToPeer = - testTask - "a peer receiving a committed override op adopts our hash (end-to-end, receiver side)" { - let loc : PT.PackageLocation = - { owner = "Recv"; modules = [ "O" ]; name = uniqueName "r" } - let ours, theirs = hashChar 'a', hashChar 'b' - let remote = uniqueName "rrecv" - // the peer is currently bound to the incoming hash `theirs` (older), having pulled the race already - let theirsOp = - PT.PackageOp.SetName( - loc, - PT.Reference.fromHashAndKind (PT.Hash theirs, PT.ItemKind.Fn) - ) - let! _ = - Inserts.insertAndApplyOpsWithOrigin - PT.mainBranchId - None - [ theirsOp ] - (Map.ofList [ (Inserts.computeOpHash theirsOp, relTs -60.0) ]) - let! before = liveHash loc - Expect.equal - before - (Some theirs) - "precondition: the peer holds the incoming hash" - // now it pulls the other machine's override op (re-bind to `ours`, newest stamp) over the wire path - let overrideOp = - PT.PackageOp.OverrideName( - loc, - PT.Reference.fromHashAndKind (PT.Hash ours, PT.ItemKind.Fn), - relTs 0.0 - ) - let! _ = - Sync.applyRemoteOps remote PT.mainBranchId None [ (1L, relTs 0.0, overrideOp) ] - let! after = liveHash loc - Expect.equal - after - (Some ours) - "the peer adopted our override — the resolution propagated cross-machine" - } - let private orderIndependent = testTask "order-independent: both machines converge to the newer op regardless of arrival side" { @@ -781,9 +718,7 @@ let tests = sameMsTie multiDivergenceBatch keepLocalRecordsPropagableResolution - overrideOpRoundTrips syncConflictRoundTrips - overridePropagatesToPeer orderIndependent idempotentRePull resolutionSticks From 4becd72a47c3c2164e173a6c5c6a342bd190b71d Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 12:12:29 -0400 Subject: [PATCH 21/25] conflicts: resolution sync over HTTP (the tailnet/daemon path) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The override channel now rides HTTP too, not just the file pull — so resolutions propagate over the tailnet path the autosync daemon uses. Mirrors the op channel: - builtins: pmSyncResolutionsSince (server read), pmSyncResolutionCursorFor (the client's separate resolution cursor), pmSyncApplyResolutions (client decode + fold). - server.dark: a GET /sync/resolutions?since= route. - api.dark: resolutionsUrl + pullResolutions; pullHttp now pulls ops → blobs → resolutions. Each new builtin has exactly one .dark call site. Resolutions sync immediately (a decision is published when made, not gated by commit). Full backend suite green (9,789). --- .../Builtins/Builtins.Matter/Libs/PM/Sync.fs | 87 +++++++++++++++++++ packages/darklang/sync/api.dark | 19 +++- packages/darklang/sync/server.dark | 20 ++++- 3 files changed, 123 insertions(+), 3 deletions(-) diff --git a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs index a0244cad12..1582b2f07c 100644 --- a/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs +++ b/backend/src/Builtins/Builtins.Matter/Libs/PM/Sync.fs @@ -174,6 +174,93 @@ let fns () : List = deprecated = NotDeprecated } + // ── the RESOLUTION channel over HTTP — override decisions ride alongside the op log ── + { name = fn "pmSyncResolutionsSince" 0 + typeParams = [] + parameters = + [ Param.make + "cursor" + TInt64 + "Resume point — the last resolution rowid the puller already has" ] + returnType = TString + description = + "The resolution-channel server read (`GET /sync/resolutions?since=cursor`): the override + decisions the puller hasn't seen, encoded as a base64 wire batch (the client decodes + folds + them via `pmSyncApplyResolutions`). Resolutions sync immediately — a decision is published + when made, not gated by commit." + fn = + (function + | _, _, _, [ DInt64 cursor ] -> + uply { + let! rs = LibDB.Resolutions.since cursor + return + DString(System.Convert.ToBase64String(LibDB.Sync.encodeResolutions rs)) + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncResolutionCursorFor" 0 + typeParams = [] + parameters = + [ Param.make + "remote" + TString + "Peer identity (path or URL) — the resolution-cursor key" ] + returnType = TInt64 + description = + "The stored RESOLUTION cursor for this peer — the last resolution rowid we've applied from it, + or 0 if never synced. The HTTP client passes this as `?since=` so the server returns only the + resolutions we don't yet have (a separate cursor from the op cursor)." + fn = + (function + | _, _, _, [ DString remote ] -> + uply { + let! cursor = LibDB.SyncCursors.resolutionCursorFor remote + return DInt64 cursor + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + + { name = fn "pmSyncApplyResolutions" 0 + typeParams = [] + parameters = + [ Param.make + "remote" + TString + "Peer identity (e.g. its URL) — keys the resolution cursor" + Param.make + "wireB64" + TString + "A base64 resolution wire batch from the peer's `pmSyncResolutionsSince`" ] + returnType = TInt64 + description = + "The resolution-channel client apply (after `httpRequest`-ing a peer's `/sync/resolutions`): + decode the base64 batch and fold each resolution into this instance's bindings (the overlay, + idempotent), advancing this peer's resolution cursor. Returns the new cursor." + fn = + (function + | _, _, _, [ DString remote; DString wireB64 ] -> + uply { + let bytes = System.Convert.FromBase64String wireB64 + let decoded = LibDB.Sync.decodeResolutions bytes + let! cursor = LibDB.Sync.applyRemoteResolutions remote decoded + return DInt64 cursor + } + | _ -> incorrectArgs ()) + sqlSpec = NotQueryable + previewable = Impure + capabilities = LibExecution.Capabilities.noCaps + deprecated = NotDeprecated } + + { name = fn "pmSyncOpKindsSince" 0 typeParams = [] parameters = diff --git a/packages/darklang/sync/api.dark b/packages/darklang/sync/api.dark index d874e09187..c252f135da 100644 --- a/packages/darklang/sync/api.dark +++ b/packages/darklang/sync/api.dark @@ -53,6 +53,10 @@ let blobsUrl (baseUrl: String) : String = let blobUrl (baseUrl: String) (hash: String) : String = (normalizeBase baseUrl) ++ "/sync/blob?hash=" ++ hash +// The `/sync/resolutions` URL for an incremental pull of the override decisions we don't yet have. +let resolutionsUrl (baseUrl: String) (cursor: Int64) : String = + (normalizeBase baseUrl) ++ "/sync/resolutions?since=" ++ (Stdlib.Int64.toString cursor) + // One trusted-peer HTTP GET (the tailnet is the trust boundary). The single call site for the // SSRF-unguarded fetch builtin, so it's referenced exactly once even as more GETs (events, blobs) // are added. @@ -84,8 +88,18 @@ let fetchMissingBlobs (baseUrl: String) : Int64 = inserted |> Stdlib.List.filter (fun r -> r) |> Stdlib.List.length -// The full HTTP pull: the peer's new ops THEN its missing content blobs — the reusable -// core behind both `dark sync pull ` and HTTP autosync. Returns +// Pull a peer's RESOLUTIONS (the override decisions) over HTTP — the override channel's HTTP +// counterpart to the op pull. Resume from + advance a SEPARATE resolution cursor. Best-effort: a +// failed GET doesn't advance (retries next pull). Returns the new resolution cursor. +let pullResolutions (baseUrl: String) : Int64 = + let cursor = Builtin.pmSyncResolutionCursorFor baseUrl + + match httpGet (resolutionsUrl baseUrl cursor) with + | Ok body -> Builtin.pmSyncApplyResolutions baseUrl body + | Error _ -> cursor + +// The full HTTP pull: the peer's new ops, its missing content blobs, THEN its resolutions (the +// override channel) — the reusable core behind both `dark sync pull ` and HTTP autosync. Returns // `(newCursor, divergenceCount, blobsFetched)` on success, or the GET error (unreachable peer) so // the caller can react (the CLI shows an actionable hint; autosync just doesn't advance). let pullHttp @@ -97,6 +111,7 @@ let pullHttp | Ok body -> let (newCursor, divCount) = applyWire baseUrl body let blobs = fetchMissingBlobs baseUrl + let _resolutions = pullResolutions baseUrl Stdlib.Result.Result.Ok((newCursor, divCount, blobs)) | Error e -> Stdlib.Result.Result.Error e diff --git a/packages/darklang/sync/server.dark b/packages/darklang/sync/server.dark index 27dd9535f5..5dff22d1ce 100644 --- a/packages/darklang/sync/server.dark +++ b/packages/darklang/sync/server.dark @@ -99,8 +99,26 @@ let blobHandler: Stdlib.HttpServer.Handler = handler = blobHandlerFn } +// GET /sync/resolutions?since= — the override decisions (resolutions) after `since`, as a +// base64 wire batch. A separate channel + cursor from `/sync/events`: resolutions overlay the op-fold +// and sync immediately (a decision is published when made, not gated by commit). +let resolutionsHandlerFn (req: Stdlib.Http.Request) : Stdlib.Http.Response = + Stdlib.Http.responseWithText (Builtin.pmSyncResolutionsSince (parseSince req.url)) 200L + + +let resolutionsHandler: Stdlib.HttpServer.Handler = + Stdlib.HttpServer.Handler + { route = "/sync/resolutions" + method = "GET" + handler = resolutionsHandlerFn } + + let handlers: List = - [ eventsHandler; healthHandler; blobsHandler; blobHandler ] + [ eventsHandler + healthHandler + blobsHandler + blobHandler + resolutionsHandler ] /// Router for the sync server — pass to `dark serve` (a named fn so the builtin can call it). From 56fbe80653e2db837845e3ca75396ae979e1df34 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 12:19:12 -0400 Subject: [PATCH 22/25] =?UTF-8?q?conflicts:=20more=20resolution=20coverage?= =?UTF-8?q?=20=E2=80=94=20newer-op=20supersede=20+=20refold=20safety?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two scenarios the overlay model needs pinned: - a genuinely NEWER authored op supersedes an older resolution (the overlay isn't a permanent pin — convergence is still timestamp-LWW across ops AND resolutions). - Resolutions.applyAll re-applies a recorded override after its binding is cleared (the refold-safety integration: rebuildProjections re-applies resolutions over the rebuilt op-fold). Exercised at single-location grain to avoid a global rebuild. Full backend suite green (9,791). --- backend/tests/Tests/SyncScenarios.Tests.fs | 82 +++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index 8101a3f0ad..e3de05bc82 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -707,6 +707,84 @@ let private resolutionWireRoundTripsAndApplies = "re-applying the resolution is a no-op — chosen stays" } +// The overlay is NOT a permanent pin: a genuinely NEWER authored op (a later `SetName`) supersedes an +// older resolution — convergence is still by timestamp-LWW across BOTH ops and resolutions. +let private resolutionSupersededByNewerOp = + testTask + "a newer authored op supersedes an older resolution (the overlay isn't a pin)" { + let loc : PT.PackageLocation = + { owner = "Resln"; modules = [ "Sup" ]; name = uniqueName "s" } + let a, b, c = hashChar 'a', hashChar 'b', hashChar 'c' + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + // op binds loc -> a @ -120 + let aOp = PT.PackageOp.SetName(loc, refOf a) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ aOp ] + (Map.ofList [ (Inserts.computeOpHash aOp, relTs -120.0) ]) + // a resolution overrides to b @ -60 (newer than the op) -> b + do! + Resolutions.recordAndApply ( + Resolutions.mk loc (refOf b) "human" PT.mainBranchId (relTs -60.0) + ) + let! mid = liveHash loc + Expect.equal mid (Some b) "the resolution (newer than the op) bound loc -> b" + // a genuinely NEWER op binds loc -> c @ now -> it wins (authored after the resolution) + let! _ = + Sync.applyRemoteOps + (uniqueName "rsup") + PT.mainBranchId + None + [ (1L, relTs 0.0, PT.PackageOp.SetName(loc, refOf c)) ] + let! after = liveHash loc + Expect.equal after (Some c) "a newer authored op supersedes the older resolution" + } + +// Refold safety: after a projection refold clears `locations`, `Resolutions.applyAll` re-applies the +// recorded overrides over the rebuilt op-fold (the op log alone doesn't carry them). Exercised at the +// single-location grain (clear this binding, then applyAll restores it) to avoid a global rebuild. +let private applyAllReappliesOverrides = + testTask + "Resolutions.applyAll re-applies a recorded override after its binding is cleared" { + let loc : PT.PackageLocation = + { owner = "Resln"; modules = [ "All" ]; name = uniqueName "a" } + let a, b = hashChar 'a', hashChar 'b' + let refOf h = PT.Reference.fromHashAndKind (PT.Hash h, PT.ItemKind.Fn) + let aOp = PT.PackageOp.SetName(loc, refOf a) + let! _ = + Inserts.insertAndApplyOpsWithOrigin + PT.mainBranchId + None + [ aOp ] + (Map.ofList [ (Inserts.computeOpHash aOp, relTs -60.0) ]) + do! + Resolutions.recordAndApply ( + Resolutions.mk loc (refOf b) "human" PT.mainBranchId (relTs 0.0) + ) + let! bound = liveHash loc + Expect.equal bound (Some b) "override bound loc -> b" + // clear THIS location's binding (as a refold does before re-folding) + do! + Sql.query + "UPDATE locations SET unlisted_at = datetime('now') WHERE owner=@o AND modules=@m AND name=@n AND unlisted_at IS NULL" + |> Sql.parameters + [ "o", Sql.string loc.owner + "m", Sql.string "All" + "n", Sql.string loc.name ] + |> Sql.executeStatementAsync + let! wiped = liveHash loc + Expect.equal wiped None "binding cleared (simulating a refold's pre-fold state)" + // applyAll re-applies the recorded resolution -> b restored + do! Resolutions.applyAll () + let! restored = liveHash loc + Expect.equal + restored + (Some b) + "applyAll restored the override (b) over the cleared binding" + } + // ── all scenarios ────────────────────────────────────────────────────────────────────────────── let tests = @@ -725,4 +803,6 @@ let tests = lateStaleArrival threeWayConverge resolutionOverlayApplies - resolutionWireRoundTripsAndApplies ]) + resolutionWireRoundTripsAndApplies + resolutionSupersededByNewerOp + applyAllReappliesOverrides ]) From 67e51eadca05f2e96f9b2bcc1da660aa45279202 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 12:38:58 -0400 Subject: [PATCH 23/25] =?UTF-8?q?conflicts:=20tightening=20=E2=80=94=20tre?= =?UTF-8?q?e-shake=20dead=20code=20+=20consolidate=20location=20parsing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tightening pass (no behavior change): - Tree-shake the dead DivergenceResolution binary serializer. A resolution is stored flattened to columns (chosen_hash + resolved_by), never serialized — so the serializer (Serialization.fs module + SyncConflict.fs's ResolvedBy/DivergenceResolution write/read) and its round-trip test were used only by the test. Removed; SyncConflict's serializer stays (it backs conflict_blob). The DivergenceResolution/ResolvedBy *types* are kept as the model vocabulary (now orphan — flagged for a possible follow-up). - Tree-shake the unused override_op_id column + Conflict.overrideOpId field: always NULL, nothing wrote or read it. - Consolidate location FQN parse/format: Sync.formatLocation/parseLocation + Conflicts.parseLoc + the inline modules-split (3 copies) now route through PackageLocation.toFQN/fromFQN/modulesOfString (one home). Full backend suite green (9,791). --- backend/migrations/schema.sql | 5 +-- backend/src/LibDB/Conflicts.fs | 24 ++++-------- backend/src/LibDB/PackageLocation.fs | 18 ++++++++- backend/src/LibDB/Resolutions.fs | 4 +- backend/src/LibDB/Sync.fs | 32 ++++----------- .../LibSerialization/Binary/Serialization.fs | 6 --- .../Binary/Serializers/PT/SyncConflict.fs | 32 +-------------- backend/tests/Tests/SyncScenarios.Tests.fs | 39 ++++--------------- 8 files changed, 44 insertions(+), 116 deletions(-) diff --git a/backend/migrations/schema.sql b/backend/migrations/schema.sql index ab102e141a..b35284f638 100644 --- a/backend/migrations/schema.sql +++ b/backend/migrations/schema.sql @@ -432,8 +432,8 @@ CREATE TABLE IF NOT EXISTS resolutions ( -- time; auto-resolved by policy (default last-writer-wins) but never silently lost. Local-only, never -- synced, re-derivable by replaying the op log. Stores the STRUCTURED conflict (`conflict_blob` = a -- serialized PT.SyncConflict — the candidates) plus its resolution flattened into columns: --- `chosen_hash` (which content won), `resolved_by` ('auto:' e.g. 'auto:last-writer-wins', or --- 'human'), and `override_op_id` (the OverrideName op a deliberate override mints, NULL until then). +-- `chosen_hash` (which content won) and `resolved_by` ('auto:' e.g. 'auto:last-writer-wins', or +-- 'human'). CREATE TABLE IF NOT EXISTS sync_conflicts ( id TEXT PRIMARY KEY, kind TEXT NOT NULL, -- SyncConflict discriminator, e.g. 'divergence' @@ -441,7 +441,6 @@ CREATE TABLE IF NOT EXISTS sync_conflicts ( conflict_blob BLOB NOT NULL, -- serialized PT.SyncConflict (the candidates) chosen_hash TEXT NOT NULL, -- the resolution's chosen content hash resolved_by TEXT NOT NULL, -- 'auto:' or 'human' - override_op_id TEXT, -- the OverrideName op id, once an override mints one remote TEXT NOT NULL, detected_at TEXT NOT NULL DEFAULT (datetime('now')), status TEXT NOT NULL DEFAULT 'auto-resolved' -- 'auto-resolved' | 'acknowledged' | 'overridden' diff --git a/backend/src/LibDB/Conflicts.fs b/backend/src/LibDB/Conflicts.fs index a575722dd2..3d5037b76a 100644 --- a/backend/src/LibDB/Conflicts.fs +++ b/backend/src/LibDB/Conflicts.fs @@ -9,8 +9,8 @@ /// What's stored is the STRUCTURED conflict, not prose: a serialized `PT.SyncConflict` (the /// `conflict_blob` — the candidates), the `chosen_hash` (which content won) + `resolved_by` (the /// policy that picked it, e.g. `auto:last-writer-wins`, or `human`), and a `status` lifecycle -/// (`auto-resolved` → `acknowledged` | `overridden`). The op id of a human/keep-local override lands -/// in `override_op_id`. The display reconstructs everything from these fields — no string parsing. +/// (`auto-resolved` → `acknowledged` | `overridden`). The display reconstructs everything from these +/// fields — no string parsing. /// /// Why a recorded log, not a pure op-log projection: everyone's "main" shares the constant /// `PT.mainBranchId`, so two competing edits are SAME-branch — the log can't distinguish "a peer @@ -32,8 +32,7 @@ module PT = LibExecution.ProgramTypes module Serialize = LibSerialization.Binary.Serialization /// One recorded conflict. `conflict` is the deserialized `SyncConflict` (its candidates); `chosenHash` -/// + `resolvedBy` are the resolution (which content won, and the policy/human that chose it); -/// `overrideOpId` is the id of the `Resolution` a deliberate override records (None until wired); `status` is +/// + `resolvedBy` are the resolution (which content won, and the policy/human that chose it); `status` is /// the review lifecycle (`auto-resolved` | `acknowledged` | `overridden`). type Conflict = { id : string @@ -42,7 +41,6 @@ type Conflict = conflict : PT.SyncConflict chosenHash : string resolvedBy : string - overrideOpId : string option remote : string status : string } @@ -71,15 +69,11 @@ type Conflict = // ── location parsing + kind lookup (to rebuild the structured conflict from raw hashes) ── -/// Parse "owner[.modules].name" → PackageLocation (head = owner, last = name, middle = modules). +/// Parse "owner[.modules].name" → PackageLocation via the shared `PackageLocation.fromFQN`, with a +/// lenient fallback (a degenerate location named after the raw string) for a malformed key. let private parseLoc (location : string) : PT.PackageLocation = - match location.Split('.') |> List.ofArray with - | owner :: rest when not (List.isEmpty rest) -> - match List.rev rest with - | name :: revModules -> - { owner = owner; modules = List.rev revModules; name = name } - | [] -> { owner = owner; modules = []; name = "" } - | _ -> { owner = ""; modules = []; name = location } + PackageLocation.fromFQN location + |> Option.defaultValue { owner = ""; modules = []; name = location } /// The item kind bound at a location — needed to rebuild the candidate `Reference`s. Falls back to /// `Fn` when the location isn't (or isn't yet) in `locations` (e.g. a synthetic test record); the @@ -165,8 +159,7 @@ let list () : Task> = return! Sql.query """ - SELECT id, kind, location, conflict_blob, chosen_hash, resolved_by, - override_op_id, remote, status + SELECT id, kind, location, conflict_blob, chosen_hash, resolved_by, remote, status FROM sync_conflicts ORDER BY detected_at DESC """ |> Sql.executeAsync (fun read -> @@ -179,7 +172,6 @@ let list () : Task> = conflict = conflict chosenHash = read.string "chosen_hash" resolvedBy = read.string "resolved_by" - overrideOpId = read.stringOrNone "override_op_id" remote = read.string "remote" status = read.string "status" }) } diff --git a/backend/src/LibDB/PackageLocation.fs b/backend/src/LibDB/PackageLocation.fs index a73bf03713..b93c5d45cc 100644 --- a/backend/src/LibDB/PackageLocation.fs +++ b/backend/src/LibDB/PackageLocation.fs @@ -2,10 +2,26 @@ module LibDB.PackageLocation module PT = LibExecution.ProgramTypes -/// Dot-separated FQN string for use as map keys and debug output. +/// Dot-separated FQN string for use as map keys, the conflict/sync wire, and debug output. let toFQN (loc : PT.PackageLocation) : string = match loc.modules with | [] -> $"{loc.owner}.{loc.name}" | modules -> let modulesStr = modules |> String.concat "." $"{loc.owner}.{modulesStr}.{loc.name}" + +/// Parse a dot-FQN "owner[.modules].name" back into a PackageLocation (owner = head, name = last, +/// modules = the middle). `None` if there's no name segment. The inverse of `toFQN`. +let fromFQN (fqn : string) : Option = + match fqn.Split('.') |> List.ofArray with + | owner :: rest -> + match List.rev rest with + | name :: revModules -> + Some { owner = owner; modules = List.rev revModules; name = name } + | [] -> None + | _ -> None + +/// The `modules` column is stored dot-joined; split it back to a list ("" → []). Inverse of the +/// `String.concat "."` the writers use. +let modulesOfString (modules : string) : List = + if modules = "" then [] else modules.Split('.') |> Array.toList diff --git a/backend/src/LibDB/Resolutions.fs b/backend/src/LibDB/Resolutions.fs index e0cec8589e..94aff96be1 100644 --- a/backend/src/LibDB/Resolutions.fs +++ b/backend/src/LibDB/Resolutions.fs @@ -156,9 +156,7 @@ let ofRow (read : RowReader) : Resolution = { id = read.string "id" location = { owner = read.string "owner" - modules = - let m = read.string "modules" - if m = "" then [] else String.split "." m + modules = LibDB.PackageLocation.modulesOfString (read.string "modules") name = read.string "name" } itemKind = PT.ItemKind.fromString (read.string "item_type") chosenHash = read.string "chosen_hash" diff --git a/backend/src/LibDB/Sync.fs b/backend/src/LibDB/Sync.fs index b0a39a39c5..9a07457bb1 100644 --- a/backend/src/LibDB/Sync.fs +++ b/backend/src/LibDB/Sync.fs @@ -146,7 +146,7 @@ let decodeResolutions (bytes : byte[]) : List = ({ id = id location = { owner = owner - modules = (if modules = "" then [] else String.split "." modules) + modules = PackageLocation.modulesOfString modules name = name } itemKind = PT.ItemKind.fromString itemType chosenHash = chosenHash @@ -173,25 +173,7 @@ let applyRemoteResolutions return maxRowid } -/// Render a `PackageLocation` as the FQ "owner[.modules].name" string sync uses on the wire and in -/// the conflict store — the inverse of `parseLocation`. -let private formatLocation (loc : PT.PackageLocation) : string = - let modulesStr = String.concat "." loc.modules - if modulesStr = "" then - $"{loc.owner}.{loc.name}" - else - $"{loc.owner}.{modulesStr}.{loc.name}" - -/// Parse an FQ "owner[.modules].name" location back into a `PackageLocation` (owner = head, -/// name = last, modules = the middle) — the inverse of `formatLocation`. -let private parseLocation (location : string) : Option = - match location.Split('.') |> List.ofArray with - | owner :: rest -> - match List.rev rest with - | name :: revModules -> - Some { owner = owner; modules = List.rev revModules; name = name } - | [] -> None - | _ -> None +// Location FQ rendering/parsing is shared: `PackageLocation.toFQN` (render) + `.fromFQN` (parse). /// Detect sync divergences in a remote batch BEFORE applying it. For each incoming /// `SetName`, if the location is already bound LOCALLY to a *different*, non-deprecated hash, @@ -243,7 +225,7 @@ let detectDivergences return triples |> List.map (fun (loc, existingHash, incomingHash) -> - (formatLocation loc, existingHash, incomingHash)) + (PackageLocation.toFQN loc, existingHash, incomingHash)) } @@ -264,7 +246,7 @@ let detectDivergences // the `LIMIT 1` deterministic (newest row) in the meantime. let private liveBindingHash (location : string) : Task> = task { - match parseLocation location with + match PackageLocation.fromFQN location with | Some loc -> let! rows = Sql.query @@ -434,7 +416,7 @@ let routeDivergences // same location, hence the same kind; `kindOfHash` reads it from the binding we're restoring. match! kindOfHash existingHash with | Some kind -> - match parseLocation location with + match PackageLocation.fromFQN location with | Some loc -> let localRef = PT.Reference.fromHashAndKind (PT.Hash existingHash, kind) let incomingRef = PT.Reference.fromHashAndKind (PT.Hash incomingHash, kind) @@ -648,7 +630,7 @@ let private pullResolutionsFromStore { id = reader.GetString 1 location = { owner = reader.GetString 2 - modules = (if modules = "" then [] else String.split "." modules) + modules = PackageLocation.modulesOfString modules name = reader.GetString 4 } itemKind = PT.ItemKind.fromString (reader.GetString 5) chosenHash = reader.GetString 6 @@ -755,7 +737,7 @@ let resolveConflict (conflictId : string) (keepMine : bool) : Task = // binding's kind + branch from `locations`, then record a `Resolution` for our hash. A human // override is the LATEST decision, so `overrideBinding` makes it win timestamp-LWW // (last-resolver-wins) and — as a distinct op with a fresh rowid — rides sync so peers re-adopt it. - match parseLocation c.location with + match PackageLocation.fromFQN c.location with | Some loc -> let modulesStr = String.concat "." loc.modules let! meta = diff --git a/backend/src/LibSerialization/Binary/Serialization.fs b/backend/src/LibSerialization/Binary/Serialization.fs index d2fd403d2e..a2cd0544bf 100644 --- a/backend/src/LibSerialization/Binary/Serialization.fs +++ b/backend/src/LibSerialization/Binary/Serialization.fs @@ -134,12 +134,6 @@ module PT = let serialize id value = makeSerializer PT.SyncConflict.write id value let deserialize id data = makeDeserializer PT.SyncConflict.read id data - module DivergenceResolution = - let serialize id value = - makeSerializer PT.SyncConflict.DivergenceResolution.write id value - let deserialize id data = - makeDeserializer PT.SyncConflict.DivergenceResolution.read id data - module BranchOp = let serialize = PT.BranchOp.serialize let deserialize = PT.BranchOp.deserialize diff --git a/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs b/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs index 80e0be4cbc..a8d5ed14d1 100644 --- a/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs +++ b/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs @@ -13,37 +13,9 @@ open LibSerialization.Binary.Serializers.PT.Common module Reference = LibSerialization.Binary.Serializers.PT.PackageOp.Reference -// -- ResolvedBy -- - -module ResolvedBy = - let write (w : BinaryWriter) (by : ResolvedBy) : unit = - match by with - | Auto policy -> - w.Write(0uy) - String.write w policy - | Human -> w.Write(1uy) - - let read (r : BinaryReader) : ResolvedBy = - match r.ReadByte() with - | 0uy -> Auto(String.read r) - | 1uy -> Human - | b -> raiseFormatError $"Invalid ResolvedBy tag: {b}" - - -// -- DivergenceResolution -- - -module DivergenceResolution = - let write (w : BinaryWriter) (res : DivergenceResolution) : unit = - Reference.write w res.chosen - ResolvedBy.write w res.by - - let read (r : BinaryReader) : DivergenceResolution = - let chosen = Reference.read r - let by = ResolvedBy.read r - { chosen = chosen; by = by } - - // -- SyncConflict -- +// (only `SyncConflict` is serialized — it backs the recorded `conflict_blob`. A resolution is stored +// flattened to columns, `chosen_hash` + `resolved_by`, so `DivergenceResolution` has no wire form.) let write (w : BinaryWriter) (conflict : SyncConflict) : unit = match conflict with diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index e3de05bc82..7bcf7ae845 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -408,45 +408,20 @@ let private keepLocalRecordsPropagableResolution = "a resolution choosing our hash was recorded (rides the resolution channel)" } -// A `SyncConflict` and its `DivergenceResolution` must survive the binary codec — they're persisted -// (the recorded conflict's blob) and may travel, so a tag-byte round-trip is the contract. Covers both -// `ResolvedBy` cases (`Auto policy` and `Human`) since they have distinct tags. +// A `SyncConflict` must survive the binary codec — it backs the recorded `conflict_blob` and may +// travel, so a tag-byte round-trip is the contract. let private syncConflictRoundTrips = - test "SyncConflict + DivergenceResolution round-trip through the binary serializer" { + test "SyncConflict round-trips through the binary serializer" { let loc : PT.PackageLocation = { owner = "RT"; modules = [ "M" ]; name = "x" } let refA = PT.Reference.fromHashAndKind (PT.Hash(hashChar 'a'), PT.ItemKind.Fn) let refB = PT.Reference.fromHashAndKind (PT.Hash(hashChar 'b'), PT.ItemKind.Fn) let conflict = PT.SyncConflict.Divergence(loc, [ refA; refB ]) - let cBlob = + let blob = LibSerialization.Binary.Serialization.PT.SyncConflict.serialize "c" conflict - let cDecoded = - LibSerialization.Binary.Serialization.PT.SyncConflict.deserialize "c" cBlob - Expect.equal cDecoded conflict "SyncConflict survives serialize → deserialize" - - let resAuto : PT.DivergenceResolution = - { chosen = refB; by = PT.ResolvedBy.Auto "last-writer-wins" } - let aBlob = - LibSerialization.Binary.Serialization.PT.DivergenceResolution.serialize - "a" - resAuto - let aDecoded = - LibSerialization.Binary.Serialization.PT.DivergenceResolution.deserialize - "a" - aBlob - Expect.equal aDecoded resAuto "DivergenceResolution(Auto) survives round-trip" - - let resHuman : PT.DivergenceResolution = - { chosen = refA; by = PT.ResolvedBy.Human } - let hBlob = - LibSerialization.Binary.Serialization.PT.DivergenceResolution.serialize - "h" - resHuman - let hDecoded = - LibSerialization.Binary.Serialization.PT.DivergenceResolution.deserialize - "h" - hBlob - Expect.equal hDecoded resHuman "DivergenceResolution(Human) survives round-trip" + let decoded = + LibSerialization.Binary.Serialization.PT.SyncConflict.deserialize "c" blob + Expect.equal decoded conflict "SyncConflict survives serialize → deserialize" } let private orderIndependent = From e1f919fd9e449135028f82a7b28cd4c8997e19bf Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 12:44:54 -0400 Subject: [PATCH 24/25] =?UTF-8?q?conflicts:=20tightening=20=E2=80=94=20one?= =?UTF-8?q?=20home=20for=20the=20timestamp-LWW=20comparison?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The subtle convergence rule (older-by-stamp loses; an exact same-stamp tie breaks by the higher content hash, portably) was duplicated verbatim in the op fold (applySetName) and the resolution overlay (applyToLocations) — two copies of the logic that MUST agree for instances to converge. Extracted to PackageLocation.bindingIsStale, used by both, with a direct unit test pinning the contract. No behavior change. Full backend suite green (9,792). --- backend/src/LibDB/PackageLocation.fs | 12 +++++++++++ backend/src/LibDB/PackageOpPlayback.fs | 13 +++++------- backend/src/LibDB/Resolutions.fs | 4 ++-- backend/tests/Tests/SyncScenarios.Tests.fs | 23 +++++++++++++++++++++- 4 files changed, 41 insertions(+), 11 deletions(-) diff --git a/backend/src/LibDB/PackageLocation.fs b/backend/src/LibDB/PackageLocation.fs index b93c5d45cc..864619f772 100644 --- a/backend/src/LibDB/PackageLocation.fs +++ b/backend/src/LibDB/PackageLocation.fs @@ -25,3 +25,15 @@ let fromFQN (fqn : string) : Option = /// `String.concat "."` the writers use. let modulesOfString (modules : string) : List = if modules = "" then [] else modules.Split('.') |> Array.toList + +/// The portable timestamp-LWW comparison both the op fold (`applySetName`) and the resolution overlay +/// (`Resolutions.applyToLocations`) use to order two DIFFERENT bindings of one name: is the NEW binding +/// (content `newHash`, creation/resolver stamp `newTs`) stale vs the CURRENT (`curHash`/`curTs`)? +/// Older-by-stamp loses; an exact same-stamp tie is broken by the HIGHER content hash — a tie-break over +/// CONTENT (not arrival), so every instance and a from-scratch refold converge on the same winner. +/// (Callers handle the same-content case — a re-bind to what's already there — separately.) +let bindingIsStale + (curHash : string, curTs : string) + (newHash : string, newTs : string) + : bool = + newTs < curTs || (newTs = curTs && newHash < curHash) diff --git a/backend/src/LibDB/PackageOpPlayback.fs b/backend/src/LibDB/PackageOpPlayback.fs index 0bba161510..405f0d9325 100644 --- a/backend/src/LibDB/PackageOpPlayback.fs +++ b/backend/src/LibDB/PackageOpPlayback.fs @@ -334,15 +334,12 @@ let private applySetName let isStale = match curBinding, thisTs with - // Order a binding by its op's CREATION time (origin_ts); a stale op arriving late via sync loses - // to the newer binding. On an EXACT TIE (two DIFFERENT ops for one name stamped the same - // millisecond — a genuine cross-instance race), break deterministically by item hash: the higher - // hash wins. That tie-break is PORTABLE (content, not arrival/rowid), so every instance — and a - // from-scratch projection rebuild — converges on the same winner. Local sequential authoring - // (v2 replacing v1 in one batch) never reaches this tie: `Inserts` self-stamps each op in a - // local batch with a strictly-increasing origin_ts, so v2 is newer-by-creation and just wins. + // Order a binding by its op's CREATION time (origin_ts); a stale op arriving late via sync loses to + // the newer binding, with an exact-tie hash tie-break — the shared `bindingIsStale` LWW rule (the + // resolution overlay uses the identical comparison). Local sequential authoring never reaches the + // tie: `Inserts` self-stamps each op in a batch with a strictly-increasing origin_ts, so v2 wins. | Some(curHash, Some curTs), Some t when curHash <> itemHashStr -> - t < curTs || (t = curTs && itemHashStr < curHash) + PackageLocation.bindingIsStale (curHash, curTs) (itemHashStr, t) | _ -> false if isStale then diff --git a/backend/src/LibDB/Resolutions.fs b/backend/src/LibDB/Resolutions.fs index 94aff96be1..2362be33b9 100644 --- a/backend/src/LibDB/Resolutions.fs +++ b/backend/src/LibDB/Resolutions.fs @@ -102,9 +102,9 @@ let applyToLocations (r : Resolution) : Task = match cur with // already bound to the chosen content — idempotent no-op (so a re-pulled resolution doesn't churn) | (curHash, _) :: _ when curHash = r.chosenHash -> true - // stale: this resolution is older-by-creation than the live binding (exact tie → higher hash wins) + // stale: older-by-stamp than the live binding (the shared LWW rule, exact tie → higher hash wins) | (curHash, Some curTs) :: _ when curHash <> r.chosenHash -> - r.at < curTs || (r.at = curTs && r.chosenHash < curHash) + LibDB.PackageLocation.bindingIsStale (curHash, curTs) (r.chosenHash, r.at) | _ -> false if skip then diff --git a/backend/tests/Tests/SyncScenarios.Tests.fs b/backend/tests/Tests/SyncScenarios.Tests.fs index 7bcf7ae845..400d0f41d4 100644 --- a/backend/tests/Tests/SyncScenarios.Tests.fs +++ b/backend/tests/Tests/SyncScenarios.Tests.fs @@ -760,6 +760,26 @@ let private applyAllReappliesOverrides = "applyAll restored the override (b) over the cleared binding" } +// The shared LWW rule, directly: both the op fold and the resolution overlay route through it, so pin +// its contract — older loses, newer wins, and an exact same-stamp tie breaks by the higher content hash. +let private bindingLwwRule = + test + "PackageLocation.bindingIsStale: older loses, newer wins, exact tie → higher hash wins" { + let stale = LibDB.PackageLocation.bindingIsStale + Expect.isTrue + (stale ("aaaa", "2025-01-02") ("bbbb", "2025-01-01")) + "older-by-stamp is stale" + Expect.isFalse + (stale ("aaaa", "2025-01-01") ("bbbb", "2025-01-02")) + "newer-by-stamp wins" + Expect.isTrue + (stale ("bbbb", "2025-01-01") ("aaaa", "2025-01-01")) + "exact tie: lower hash loses" + Expect.isFalse + (stale ("aaaa", "2025-01-01") ("bbbb", "2025-01-01")) + "exact tie: higher hash wins" + } + // ── all scenarios ────────────────────────────────────────────────────────────────────────────── let tests = @@ -780,4 +800,5 @@ let tests = resolutionOverlayApplies resolutionWireRoundTripsAndApplies resolutionSupersededByNewerOp - applyAllReappliesOverrides ]) + applyAllReappliesOverrides + bindingLwwRule ]) From e022cd0f60859150731086edf19d82f9ba45e7f6 Mon Sep 17 00:00:00 2001 From: Stachu Korick Date: Fri, 12 Jun 2026 13:04:21 -0400 Subject: [PATCH 25/25] conflicts: tree-shake the orphan DivergenceResolution + ResolvedBy types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Their only consumer (the binary serializer) was removed in the prior tightening commit, leaving them fully orphan — the code represents a resolution as chosen_hash + resolved_by columns ('auto:' | 'human'), never the typed value. Remove them. SyncConflict (the one serialized, constructed type) stays; its doc now describes the resolution as the flattened decision it actually is, and drops the stale "its resolution is itself an op" line (it's a synced overlay, not an op). Full backend suite green (9,792). --- backend/src/LibExecution/ProgramTypes.fs | 24 +++++-------------- .../Binary/Serializers/PT/SyncConflict.fs | 4 ++-- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/backend/src/LibExecution/ProgramTypes.fs b/backend/src/LibExecution/ProgramTypes.fs index 550a4a8620..e68cee7362 100644 --- a/backend/src/LibExecution/ProgramTypes.fs +++ b/backend/src/LibExecution/ProgramTypes.fs @@ -735,30 +735,18 @@ type PackageOp = /// A SYNC conflict — surfaced when applying a peer's ops reveals state two instances disagree on. /// It lives HERE, beside `PackageOp`, because every sync conflict is ultimately a disagreement about -/// the op log, and its resolution is itself an op. This is deliberately distinct from a *runtime* -/// conflict (`RuntimeTypes.Conflict`, e.g. a missing fn mid-execution): different lifetime, different -/// surface — only the dispatch PATTERN is shared. One case today; a new kind (a move collision, a -/// value-update race) joins here and resolves the same way — a per-kind resolution recorded as an op — -/// with no change to the sync engine. +/// the op log. Distinct from a *runtime* error (`RuntimeError`, e.g. a missing fn mid-execution): a +/// sync conflict has choosable resolutions; a runtime wall doesn't. One case today; a new kind (a move +/// collision, a value-update race) joins here and is resolved the same way — a per-kind decision +/// recorded as a synced `Resolution` overlaid on the op-fold (`LibDB.Resolutions`) — with no change to +/// the sync engine. (A resolution is stored flattened: the chosen content hash + who/what chose it, +/// `'auto:'` | `'human'` — see `LibDB.Conflicts`/`Resolutions`.) and SyncConflict = /// One location bound to two different contents across instances (the `name → two hashes` /// divergence). `candidates` are the contending references — today exactly two, ordered /// [local; incoming]. | Divergence of location : PackageLocation * candidates : List -/// WHO/what chose a conflict's resolution. `Auto` carries the policy name that picked it (e.g. -/// `"last-writer-wins"`) so a surfaced auto-resolution is self-describing; `Human` is an explicit -/// override. There is no "unresolved" case — an unresolved conflict has no resolution recorded yet -/// (that's a status, not a resolution). -and ResolvedBy = - | Auto of policy : string - | Human - -/// The resolution of a `Divergence`: which reference won the location, and by whom/what. This is the -/// per-kind resolution shape — as `SyncConflict` gains cases, each gets its own resolution record, -/// rather than one global `Resolution` enum straining to cover every conflict. -and DivergenceResolution = { chosen : Reference; by : ResolvedBy } - /// The kind of package item (function, type, or value) and ItemKind = diff --git a/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs b/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs index a8d5ed14d1..e0ac4fecdf 100644 --- a/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs +++ b/backend/src/LibSerialization/Binary/Serializers/PT/SyncConflict.fs @@ -14,8 +14,8 @@ module Reference = LibSerialization.Binary.Serializers.PT.PackageOp.Reference // -- SyncConflict -- -// (only `SyncConflict` is serialized — it backs the recorded `conflict_blob`. A resolution is stored -// flattened to columns, `chosen_hash` + `resolved_by`, so `DivergenceResolution` has no wire form.) +// (`SyncConflict` is serialized to back the recorded `conflict_blob`. A resolution has no wire form of +// its own — it's stored flattened to columns: `chosen_hash` + `resolved_by`.) let write (w : BinaryWriter) (conflict : SyncConflict) : unit = match conflict with