From 0eaaca21cfe771a0a0546504cd0b119e8595cd7b Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Fri, 12 Jun 2026 22:54:18 +0200 Subject: [PATCH 01/13] Increment versions --- android/build.gradle.kts | 4 ++-- ios/Sources/Where/Info.plist | 2 +- ios/Where.xcodeproj/project.pbxproj | 8 ++++---- ios/project.yml | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/android/build.gradle.kts b/android/build.gradle.kts index bd544a31..45780cce 100644 --- a/android/build.gradle.kts +++ b/android/build.gradle.kts @@ -61,8 +61,8 @@ android { applicationId = "net.af0.where" minSdk = 26 targetSdk = 35 - versionCode = 95 - versionName = "2026.06.10.1" + versionCode = 96 + versionName = "2026.06.12.1" manifestPlaceholders["MAPS_API_KEY"] = localProperties.getProperty("MAPS_API_KEY") ?: System.getenv("MAPS_API_KEY") ?: "" } diff --git a/ios/Sources/Where/Info.plist b/ios/Sources/Where/Info.plist index df0aad49..967a8a7a 100644 --- a/ios/Sources/Where/Info.plist +++ b/ios/Sources/Where/Info.plist @@ -21,7 +21,7 @@ CFBundleShortVersionString $(MARKETING_VERSION) CFBundleVersion - 7 + 10 NSCameraUsageDescription Where needs the camera to scan friend invite QR codes. NSLocationAlwaysAndWhenInUseUsageDescription diff --git a/ios/Where.xcodeproj/project.pbxproj b/ios/Where.xcodeproj/project.pbxproj index 6da37154..d3a8e1de 100644 --- a/ios/Where.xcodeproj/project.pbxproj +++ b/ios/Where.xcodeproj/project.pbxproj @@ -444,7 +444,7 @@ CODE_SIGN_ENTITLEMENTS = Sources/Where/Where.entitlements; CODE_SIGN_IDENTITY = "Apple Distribution"; CODE_SIGN_STYLE = Manual; - CURRENT_PROJECT_VERSION = 7; + CURRENT_PROJECT_VERSION = 10; DEVELOPMENT_TEAM = 5PM2V9LTHC; FRAMEWORK_SEARCH_PATHS = ( "$(inherited) $(SRCROOT)/../shared/build/XCFrameworks/$(CONFIGURATION:lower)", @@ -457,7 +457,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 1.14; + MARKETING_VERSION = 1.15; PRODUCT_BUNDLE_IDENTIFIER = net.af0.WhereApp; PROVISIONING_PROFILE_SPECIFIER = "Where AppStore"; SDKROOT = iphoneos; @@ -473,7 +473,7 @@ CODE_SIGN_ENTITLEMENTS = Sources/Where/Where.entitlements; CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; - CURRENT_PROJECT_VERSION = 7; + CURRENT_PROJECT_VERSION = 10; FRAMEWORK_SEARCH_PATHS = ( "$(inherited) $(SRCROOT)/../shared/build/XCFrameworks/$(CONFIGURATION:lower)", "\".\"", @@ -485,7 +485,7 @@ "$(inherited)", "@executable_path/Frameworks", ); - MARKETING_VERSION = 1.14; + MARKETING_VERSION = 1.15; PRODUCT_BUNDLE_IDENTIFIER = net.af0.WhereApp; SDKROOT = iphoneos; TARGETED_DEVICE_FAMILY = "1,2"; diff --git a/ios/project.yml b/ios/project.yml index 5994b813..4526bdd2 100644 --- a/ios/project.yml +++ b/ios/project.yml @@ -83,8 +83,8 @@ targets: FRAMEWORK_SEARCH_PATHS: $(inherited) $(SRCROOT)/../shared/build/XCFrameworks/$(CONFIGURATION:lower) CODE_SIGN_IDENTITY: Apple Development CODE_SIGN_STYLE: Automatic - MARKETING_VERSION: "1.14" - CURRENT_PROJECT_VERSION: 6 + MARKETING_VERSION: "1.15" + CURRENT_PROJECT_VERSION: 9 WhereTests: type: bundle.unit-test From c045d0ed703b5da54767e15a3f22b575246f754a Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Fri, 12 Jun 2026 23:15:23 +0200 Subject: [PATCH 02/13] docs: correct EK_B.priv lifetime and bootstrap window security note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit §3.1, §4.1, §4.4 incorrectly claimed Bob's ephemeral private key is deleted immediately after SK derivation. In fact it survives as localDhPriv until his first DH ratchet step, which is required to process Alice's eager-ratchet message. §5.5 was already correct. Also adds a security note in §5.5 on the bootstrap window: during the interval between Bob posting KeyExchangeInit and completing his first ratchet, recovery of device state plus the public QR payload is sufficient to reconstruct SK. Closes issue #2 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 177 +++++++++++++++++++++++++++++++++++++ docs/e2ee-location-sync.md | 7 +- 2 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 TODO_SPEC_UPDATES.md diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md new file mode 100644 index 00000000..7c9f495c --- /dev/null +++ b/TODO_SPEC_UPDATES.md @@ -0,0 +1,177 @@ +# TODO: Spec & Implementation Updates from E2EE Design Doc Review + +Source: review of `docs/e2ee-location-sync.md` (2026-06-12). + +## 1. Remove plaintext `token` field from `KeyExchangeInit` (breaking — two-stage rollout) + +The plaintext `T_AB_0` in `KeyExchangeInit` lets the server link the discovery +mailbox to the session's first-epoch routing token, defeating the stated purpose +of `discovery_secret` (§4.2). The field is redundant: Alice derives `T_AB_0` +independently and must use her derived value regardless. + +- [x] **Stage 1 (client tolerance):** In the `aliceProcessInit` path, parse + `token` as optional and ignore it entirely (do not conditionally verify). + Only the inviter/receiver side checks this field; sender untouched. +- [x] **Stage 1 (spec):** Removed `token` from §4.4 (step 7) and §9.3 entirely + (single implementation — no deprecation period needed in the spec). +- [ ] **Stage 2:** Stop sending the field in `KeyExchangeInit` (`bobProcessQr` + and `processScannedQr`). +- No protocol version bump needed: removing an ignored optional field is not a + wire break once stage 1 is deployed. Failure mode during version skew is + benign — pairing is a live, interactive event; a failed pairing is visible + and retriable, with no persisted-session corruption. + +## 2. Correct the "private keys deleted immediately" claims (doc only) + +§3.1, §4.1, §4.4 claim `EK_B.priv` is deleted immediately after `SK` +derivation, but §5.5 (correctly) copies it into `localDhPriv`, where it +persists until Bob's first DH ratchet completes. Required — Bob could not +otherwise process Alice's eager-ratchet message. + +- [x] Fix §3.1, §4.1, §4.4 to state that Bob's bootstrap private key survives + as `localDhPriv` until his first ratchet. +- [x] Add a note on the security implication: during that window, a device or + backup compromise plus the public QR payload allows reconstruction of `SK`. + +## 3. Resolve the `prev_recv_token` contradiction (doc only — pick one) + +§5.4.2 says the receiver polls exactly one token (no `prev_recv_token` +polling); §9.2 says the receiver MUST also poll `prev_recv_token` during epoch +transition; §8.2 `SessionState` carries `prev_recv_token`. + +- [ ] Determine which behavior the implementation actually has. +- [ ] Make §5.4.2, §8.2, and §9.2 agree. Note metadata impact if two-token + polling is the real behavior (doubles per-friend poll fingerprint). + +## 4. Reconcile the two KDF_CK definitions (doc only) + +§8.3: `MK = HMAC(CK, 0x01)`, `CK' = HMAC(CK, 0x02)`, nonce via separate HKDF. +§11 table: single 76-byte HKDF expand for message key + nonce. These produce +different bytes. + +- [ ] Check the implementation, fix whichever section is wrong. + +## 5. Specify discovery-mailbox multiple-responder behavior (doc, maybe impl) + +First-responder-wins is currently implicit; a hijacker racing Bob causes +legitimate Bob to fail *silently* (he believes pairing succeeded). Note also +that the server controls GET ordering ("up to 50 from the front of the +queue"), i.e., a malicious server picks which `KeyExchangeInit` Alice sees +first. + +- [ ] Specify behavior when multiple `KeyExchangeInit` messages are present in + the discovery mailbox (e.g., surface an error to Alice), turning a silent + hijack into a detectable event. +- [ ] If specified behavior differs from implementation, file follow-up impl work. + +## 6. Reconcile MAX_GAP (10,000) vs skipped-key cache (1,000) (doc, maybe impl) + +A near-MAX_GAP jump derives up to 10,000 skipped keys into a 1,000-entry +cache; 90% are evicted immediately and those messages are silently lost. + +- [ ] Either bound MAX_GAP to cache capacity or document that gaps beyond + cache size cause silent loss (§8.3.1). +- [ ] Note the DoS angle: a malicious server can replay a genuine high-`msg_num` + envelope to force up to MAX_GAP HMAC derivations per frame while + censoring the intervening band. (It cannot forge headers, so it cannot + pick arbitrary `msg_num` values — only replay/withhold genuine ones.) + +## 7. Document residual FS cost of failed-body key caching (doc only) + +§8.3.1(4) deviation caches `MK_n` for body-failed messages; a malicious server +can trigger this at will, keeping keys alive until age-based eviction. §5.5 +rule 2 doesn't cover the never-used case. + +- [ ] Add a sentence in §8.3.1(4) and/or §5.5 stating the bounded + forward-secrecy cost. + +## 8. Specify handling of header-undecryptable frames (doc, maybe impl) + +Frames failing `tryDecryptHeader` are "dropped" but never DELETEd; they are +re-fetched every poll for up to 7 days and can starve the 50-message GET window. +Also tension with §5.4.4 "duplicates MUST be ACKable": a duplicate transition +message arriving after ratchet is header-undecryptable. + +- [ ] Specify whether clients delete header-undecryptable frames (and the + replay implications) or how queue starvation is otherwise avoided. + +## 9. Soften §12.3 cross-epoch correlation claim (doc only) + +"Impossible for the server to correlate messages across epochs" is overstated: +the transition message posts to the old token, and the receiver's IP switches +polling targets in adjacent cycles. Align with §2.3/§7.3 admissions. + +- [ ] Reword §12.3 to "content-layer correlation" only. +- [ ] Optional: decorrelate the receiver's polling switch from T_old to T_new + in time (jitter, or switch on the next regular cycle) to weaken the + deterministic linkage. + +## 11. Freshness lower-bound: stale-pinning by a withholding server (NEW — HIGH) + +§2.3 treats message dropping as DoS, but for a location app withholding is an +*integrity* attack: GET is non-destructive and the server controls delivery. +Once Bob has processed up to seq=N and DELETEd, a malicious server simply never +serves seq>N. Bob's UI shows an authentic-but-stale location with a +trustworthy-looking authenticated `ts` — a confident false belief ("she's still +here"), not a visible gap. The only staleness signal is `isStale` at 7 days +(§13.4), useless against minutes-to-hours pinning. `seq` gives ordering but no +wall-clock anchor. + +- [ ] Receiver-side (no wire change): the protocol already mandates regular + Keepalives (§5.7.2), so the client knows the expected cadence. Enforce a + max-age on the last authenticated `ts` and surface "no fresh update in + >X minutes" prominently; treat a stalled seq against expected cadence as + suspicious, not benign. +- [ ] Optional wire-level strengthening (plaintext-schema only, rides §5.7.3 + forward-compat): sender includes an `expected_next_interval` / + heartbeat-commitment field so the receiver's max-age is sender-declared + rather than client-guessed. +- [ ] Document the attack in §2.2/§2.3 (currently mis-filed under DoS). + +## 12. Safety number does not authenticate the rest of the invite payload (NEW) + +The safety number covers only the two `ek_pub`s. `suggested_name` (and the +redundant `fingerprint`) in the QR/link are not bound to it: an attacker who +tampers the invite's name but leaves `ek_pub` intact gets no keys, yet +pre-seeds Bob's naming dialog with an attacker-chosen label while safety-number +verification still passes. Severity is capped by §3.2 (name is only a pre-fill +the user confirms), but the doc implies more coverage than exists. + +- [ ] State explicitly in §3.4 that safety-number verification authenticates + keys only, not the displayed/suggested name. +- [ ] Consider folding a hash of the full invite payload into the + safety-number/confirmation transcript (wire-compatible for the QR; + check `key_confirmation` implications). +- [ ] Drop the redundant `fingerprint` field from the invite payload, or + document it as a non-security convenience (it is derivable from `ek_pub` + and provides no integrity an adversary can't recompute). + +## 10. Minor spec corrections (doc only) + +- [ ] §5.3 step 2: `prev_chain_len` is in the encrypted *header* (§9.1.1), not + the body payload; fix wording (header must be read before body keys can + be derived). +- [ ] §8.3.1(5) vs §5.4.1: msg_num "resets to 0" vs first message is `seq == 1`. + Pick one convention; state initial `recv_msg_num`. +- [ ] §4.2: observer sees `EK_B.pub` (not `EK_A.pub`) in `KeyExchangeInit`. +- [ ] §9.1.1 vs §9.1.2 vs §9 top: PROTOCOL_VERSION is 1 byte in header, 4 bytes + in body AAD, plus a JSON `"v": 1` — three encodings of one logical field. + Nail down a single canonical byte layout. +- [ ] §3.4 / §4.4: `formatSafetyNumber` is unspecified. 60 bytes rendered as + "12 groups of 5 decimal digits" doesn't define the reduction (e.g., + 5-byte chunks mod 10^5, a la Signal) — the encoding determines the real + comparison strength. Define it. +- [ ] §4.4 / §8.3: specify the full header-key schedule, not just the initial + assignment. Assign direction for the initial shared `next_header_key` + (send-next vs recv-next, given Alice's eager ratchet), AND state how the + single `new_header_key` emitted by each KDF_RK step rolls into the + current/next header-key pair per epoch. As written, a reasonable + implementer can produce a schedule that cannot decrypt out-of-order + new-epoch headers, breaking the §5.3 reliability property. +- [ ] §9.1.2: state max plaintext is 511 bytes (≥1 pad byte always required) + and the de-padding rule (scan back to last 0x80; reject if absent). +- [ ] §7.4.2: "GCM overhead" → ChaCha20-Poly1305. Also fix stale "AES-256-GCM" + claim in CLAUDE.md's E2EE summary. +- [ ] Fix stale cross-references: ToC lists 12 sections (doc has 13); §2.3 / + §12.5 quantum pointers; §5.7.2 cites §7.2 for the 7-day window (it's + §10.2/§13.4); §3.4 cites "§8.3 format" for safety-number rendering. diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index 6783cb08..bc575032 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -130,7 +130,7 @@ This protocol uses **no long-term identity keys**. There is no `IK`, no `SigIK`, This design decision is dependent upon the device management policy (§3.3): when a device is lost or the app is reinstalled, all contacts must be manually re-added. Dropping long-term keys eliminates the exposure of a stable device identifier to the server (§4.4), risking social graph leakage, and considerably simplifies the protocol. -Each friendship session is identified by the pair `(EK_A.pub, EK_B.pub)` — the initial bootstrap ephemeral public keys from Alice and Bob respectively. These keys are used to derive the Safety Number (§3.4) and session fingerprints (§8.3). The session's root key `SK` is derived from a single X25519 operation over these keys; both private keys are deleted immediately after derivation. +Each friendship session is identified by the pair `(EK_A.pub, EK_B.pub)` — the initial bootstrap ephemeral public keys from Alice and Bob respectively. These keys are used to derive the Safety Number (§3.4) and session fingerprints (§8.3). The session's root key `SK` is derived from a single X25519 operation over these keys. Alice's private key `EK_A.priv` is deleted immediately after derivation. Bob's private key `EK_B.priv` is **not** deleted at this point — it is copied into `localDhPriv` and persists until Bob completes his first DH ratchet step (see §4.4 and §5.5). ### 3.2 Naming and Local Aliases @@ -172,7 +172,7 @@ Each exchange requires only one ephemeral X25519 key pair per side, generated fr - **Alice:** Generates a fresh ephemeral key pair `EK_A` when displaying a QR/link. No persistent key material required. - **Bob:** Generates a fresh ephemeral key pair `EK_B` when scanning Alice's QR. -Both private keys are deleted immediately after `SK` is computed and verified. +Alice's private key `EK_A.priv` is deleted immediately after `SK` is computed and verified. Bob's private key `EK_B.priv` is retained as `localDhPriv` until his first DH ratchet step completes (see §4.4 and §5.5). ### 4.2 Option A: In-Person QR Code Exchange (Recommended) @@ -326,7 +326,7 @@ Alice receives the `KeyExchangeInit` and: 10. **Eager Ratchet (Deadlock Breaker):** To prevent the session from being stuck in the initial symmetric chain (Epoch 0), Alice immediately generates a new DH keypair (`A1`) and performs one DH ratchet step using `EK_B.pub` before returning the session. This ensures her very first location message is sent in Epoch 1. When Bob receives this message, he will observe the new `A1` and perform his own DH ratchet step, completing the transition to a fully ratcheted state. This eager approach is a deliberate deadlock breaker; while a Keepalive mechanism (§5.3) provides an alternative path for rotation, the implementation chooses this eager transition to ensure post-compromise security from the first message. 11. Stores the session. -Bob **deletes `EK_B.priv` immediately** after posting the `KeyExchangeInit`. +Bob **does not delete `EK_B.priv` immediately** after posting the `KeyExchangeInit`. Instead, it is copied into `localDhPriv` in the session state — Bob needs it to perform his first DH ratchet step when he receives Alice's eager-ratchet message (§4.4 step 10). The original `EK_B.priv` buffer is zeroed after the copy. `localDhPriv` is deleted (zeroed) after Bob completes that first ratchet step. --- @@ -433,6 +433,7 @@ To maximize forward secrecy, implementations should adhere to the following hygi 3. **Persistence Policy:** - Full `SessionState` (including `localDhPriv`) is persisted to local storage to ensure session stability across app restarts and crashes. - **Initial State Hygiene:** Bob's initial ephemeral private key (`ekB.priv`) is copied into `localDhPriv` in the `SessionState` to enable the first DH ratchet step when Alice responds. The original buffer is zeroed immediately after session initialization. + - **Bootstrap window security implication:** Between Bob posting `KeyExchangeInit` and completing his first DH ratchet step, `localDhPriv` holds a copy of `EK_B.priv`. During this window, an attacker who recovers Bob's device state (e.g., via a backup) *and* has access to the public QR payload (which contains `EK_A.pub`) can reconstruct `SK = X25519(EK_B.priv, EK_A.pub)` and derive all session keys. This window closes as soon as Bob processes Alice's first message and ratchets forward. The QR payload should be treated as sensitive for the duration of the pairing interaction. - To mitigate backup-recovery risks, this state MUST be stored using device-local, backup-excluded security controls (e.g., `kSecAttrAccessibleAfterFirstUnlockThisDeviceOnly` on iOS, `KeyStore`-backed encryption with `allowBackup=false` on Android). - **Android 9+:** Use `setIsStrongBoxBacked(true)` if available. If hardware is unavailable, fall back to TEE-backed Keystore with `setUserAuthenticationRequired(true)` and ensure the manifest has `allowBackup=false`. - **JVM Memory Hygiene:** On the JVM, `Arrays.fill()` is inherently limited by garbage collector behavior, which may relocate byte arrays and leave stale copies in memory. For improved hygiene, consider using off-heap storage (`DirectByteBuffer`), Conscrypt's `SecretKey` wrappers, or native bindings to libsodium (which handles zeroization natively). From 97e25cb3bea96af75bec7b9cf1569f17150a25e1 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Fri, 12 Jun 2026 23:20:10 +0200 Subject: [PATCH 03/13] docs: resolve prev_recv_token contradiction (issue #3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The implementation polls exactly one recv_token per cycle (§5.4.2 was correct). There is no prevRecvToken in SessionState or polling logic. §9.2 incorrectly required polling a prev_recv_token during epoch transitions; §8.2 SessionState spec carried a non-existent field. Fix: - Remove prev_recv_token from §8.2 SessionState struct - Rewrite §9.2 to match §5.4.2 and the implementation: single-token polling; note the metadata advantage over two-token polling Closes issue #3 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 8 ++++++-- docs/e2ee-location-sync.md | 3 +-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 7c9f495c..cd0fb7a5 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -39,9 +39,13 @@ otherwise process Alice's eager-ratchet message. polling); §9.2 says the receiver MUST also poll `prev_recv_token` during epoch transition; §8.2 `SessionState` carries `prev_recv_token`. -- [ ] Determine which behavior the implementation actually has. -- [ ] Make §5.4.2, §8.2, and §9.2 agree. Note metadata impact if two-token +- [x] Determine which behavior the implementation actually has. + (Single-token polling per §5.4.2; `prevRecvToken` does not exist in SessionState + or polling logic; §9.2 and §8.2 were wrong.) +- [x] Make §5.4.2, §8.2, and §9.2 agree. Note metadata impact if two-token polling is the real behavior (doubles per-friend poll fingerprint). + (Removed `prev_recv_token` from §8.2 SessionState; rewrote §9.2 to match §5.4.2; + noted the metadata benefit of single-token design.) ## 4. Reconcile the two KDF_CK definitions (doc only) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index bc575032..29fc739a 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -631,7 +631,6 @@ SessionState { recv_chain_key: [32]byte // CK for incoming messages send_token: [16]byte // token for highest_peer_dh_pub_seen recv_token: [16]byte // token for current local ratchet key - prev_recv_token: [16]byte // token for previous local ratchet key (pending retirement); unset at bootstrap send_msg_num: uint64 // sender chain message number recv_msg_num: uint64 // highest received msg_num in the current epoch (for replay rejection) highest_peer_dh_pub_seen: [32]byte // highest peer DH pub key successfully processed @@ -794,7 +793,7 @@ Senders MUST emit the `"type"` field on every plaintext message. Receivers MUST Bob retrieves pending messages by performing a `GET` request to his pairwise receive token: `GET /inbox/{hex(recv_token_T)}`. There is no JSON payload for the poll request itself. -During an epoch transition, Bob MUST also poll `prev_recv_token` if it is set (non-zero): `GET /inbox/{hex(prev_recv_token)}`. Both polls occur within the same polling cycle. `prev_recv_token` is unset at bootstrap and after it has been retired per §5.4.3. +The receiver polls **exactly one token per cycle** — the current `recv_token`. Epoch transitions are handled entirely on the send side (§5.4.1): the sender's transition-message rule routes the first new-epoch message onto the token the receiver is already polling, so the receiver naturally observes the new `dh_pub`, ratchets forward, and switches its polling target without needing a two-token window. The server returns a JSON array of `MailboxPayload` objects, or an empty array `[]` if no messages are pending (§7.2). From 27a98dd382a2c586849c825be4d59ec157778c93 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Fri, 12 Jun 2026 23:25:00 +0200 Subject: [PATCH 04/13] docs: reconcile KDF_CK definition (issue #4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit §8.3 was correct: KDF_CK uses HMAC-SHA-256 for both the message key and next chain key, then a separate HKDF-SHA-256 for the nonce. The §11 primitives table incorrectly described it as a "single 76-byte HKDF expand". The file-header comment in Ratchet.kt had the same error. Fix §11 table and Ratchet.kt comment to match §8.3 and the implementation. Closes issue #4 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 4 +++- docs/e2ee-location-sync.md | 2 +- shared/src/commonMain/kotlin/net/af0/where/e2ee/Ratchet.kt | 6 ++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index cd0fb7a5..c7fe2eb1 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -53,7 +53,9 @@ transition; §8.2 `SessionState` carries `prev_recv_token`. §11 table: single 76-byte HKDF expand for message key + nonce. These produce different bytes. -- [ ] Check the implementation, fix whichever section is wrong. +- [x] Check the implementation, fix whichever section is wrong. + (§8.3 is correct: HMAC for MK/CK', separate HKDF for nonce. Fixed §11 table + and the incorrect file-header comment in Ratchet.kt.) ## 5. Specify discovery-mailbox multiple-responder behavior (doc, maybe impl) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index 29fc739a..f502d052 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -861,7 +861,7 @@ With this design, and assuming only the advertised mailbox API and payload encry | Asymmetric key agreement | X25519 (ECDH) | Diffie-Hellman key exchange at bootstrap and each DH ratchet step | libsodium / Tink / CryptoKit | | Symmetric encryption | ChaCha20-Poly1305 (IETF) | Encrypt location payloads and control messages (AEAD) | libsodium | | Key derivation (KDF_RK) | HKDF-SHA-256 | Derive new root key and chain key from DH output | libsodium | -| Chain KDF (KDF_CK) | HKDF-SHA-256 | Advance symmetric ratchet; derive message key (32 B) and nonce (12 B) via single 76-byte HKDF expand | libsodium | +| Chain KDF (KDF_CK) | HMAC-SHA-256 + HKDF-SHA-256 | Advance symmetric ratchet: `MK = HMAC(CK, 0x01)`, `CK' = HMAC(CK, 0x02)`, `nonce = HKDF(ikm=MK, info="Where-v1-MsgNonce", length=12)` | libsodium | | Suggested name KDF | HKDF-SHA-256 | Derive name encryption key `K_name` from shared secret `SK` | libsodium | | Suggested name encryption | ChaCha20-Poly1305 (IETF) | Encrypt/decrypt suggested name during key exchange | libsodium | | Session auth | HMAC-SHA-256 | Authenticate `KeyExchangeInit` key confirmation | libsodium | diff --git a/shared/src/commonMain/kotlin/net/af0/where/e2ee/Ratchet.kt b/shared/src/commonMain/kotlin/net/af0/where/e2ee/Ratchet.kt index fb32d61a..79b2f427 100644 --- a/shared/src/commonMain/kotlin/net/af0/where/e2ee/Ratchet.kt +++ b/shared/src/commonMain/kotlin/net/af0/where/e2ee/Ratchet.kt @@ -6,8 +6,10 @@ package net.af0.where.e2ee * KDF_RK – DH ratchet step. Inputs the current root key as HKDF salt and a fresh DH output * as IKM. Produces 96 bytes: [new_root_key (32) || new_chain_key (32) || new_header_key (32)]. * - * KDF_CK – Symmetric chain step. A single HKDF-SHA-256 call producing 76 bytes: - * [new_chain_key (32) || message_key (32) || message_nonce (12)]. + * KDF_CK – Symmetric chain step using HMAC-SHA-256: + * message_key = HMAC(chain_key, 0x01) + * new_chain_key = HMAC(chain_key, 0x02) + * message_nonce = HKDF-SHA-256(ikm=message_key, info="Where-v1-MsgNonce", length=12) * The old chain key MUST be discarded immediately after this call. */ From 8b3e44c2a524b4db7f44199edcec7f01a26ceaf6 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Fri, 12 Jun 2026 23:48:09 +0200 Subject: [PATCH 05/13] docs: specify process-all discovery semantics, resolving issue #5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace first-responder-wins with process-all: Alice processes every KeyExchangeInit received during the discovery window, establishing one independent session per scanner. This eliminates silent hijack-via- displacement (a rogue init can no longer crowd out a legitimate one) and aligns the spec with the multi-scan FR (issue #233). Key spec changes in §4.2: - Discovery window closes on UI dismissal, not after first init - Multiple-init UX: surface count, prompt Safety Number per session - Security note: server ordering no longer determines who gets a session - Honest implementation status note: current client is still first- responder-wins; upgrade tracked in issue #233 Closes issue #5 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 10 ++++++---- docs/e2ee-location-sync.md | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index c7fe2eb1..3b3e29e8 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -65,10 +65,12 @@ that the server controls GET ordering ("up to 50 from the front of the queue"), i.e., a malicious server picks which `KeyExchangeInit` Alice sees first. -- [ ] Specify behavior when multiple `KeyExchangeInit` messages are present in - the discovery mailbox (e.g., surface an error to Alice), turning a silent - hijack into a detectable event. -- [ ] If specified behavior differs from implementation, file follow-up impl work. +- [x] Specify behavior when multiple `KeyExchangeInit` messages are present in + the discovery mailbox: process all of them (process-all, not first-responder- + wins), surface a count to Alice, prompt Safety Number verification per session. + This eliminates silent displacement and aligns with issue #233. +- [x] Specified behavior differs from implementation (current: first-responder-wins). + Impl work tracked in https://github.com/danmarg/where/issues/233. ## 6. Reconcile MAX_GAP (10,000) vs skipped-key cache (1,000) (doc, maybe impl) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index f502d052..fbb24712 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -204,8 +204,10 @@ Using a random secret (rather than `EK_A.pub`) as HKDF IKM ensures that only som - Alice begins polling `GET /inbox/{hex(discovery_token_A)}` immediately. - Bob derives the same `discovery_token_A` from the scanned `discovery_secret` and POSTs his `KeyExchangeInit` there. -- Once Alice retrieves and processes the `KeyExchangeInit`, she switches to polling `recv_token` for all subsequent messages. -- The discovery token is single-use and ephemeral: implementations MUST discard it after `aliceProcessInit` completes. +- Alice processes **all** `KeyExchangeInit` messages received during the discovery window, establishing one fully independent session per scanner. Each scanner's `EK_B` is fresh and produces a distinct `SK`, so sessions are cryptographically isolated from one another. +- The discovery window closes when Alice dismisses the Add Friend UI (or an implementation-defined timeout). The discovery token MUST be discarded at that point. +- **Multiple-init UX:** When more than one `KeyExchangeInit` is processed in a single discovery window, Alice's UI SHOULD make this visible (e.g. "Added 3 friends from this QR") and SHOULD prompt Safety Number verification for each resulting session. This ensures that a rogue init — which Alice cannot distinguish cryptographically from a legitimate one — produces a visible, verifiable event rather than a silent side-session. +- **Security note:** A malicious server controlling GET response ordering cannot displace a legitimate scanner under this model, because all inits in the mailbox are processed. The server can still withhold a specific `KeyExchangeInit` entirely (DoS — see §2.3), but it cannot cause Alice to silently pair with an attacker *instead of* a legitimate scanner. ### 4.3 Option B: Out-of-Band (URI / Manual) From 11d570a7612373e97c3add77c88170b8a9ed76f3 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 08:41:44 +0200 Subject: [PATCH 06/13] fix: remove MAX_GAP, unify gap limit with MAX_SKIPPED_KEYS (issue #6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MAX_GAP (10,000) was dead code: the cache-capacity pre-check in Session.decryptMessage always fired first, bounding same-epoch key derivation to ~MAX_SKIPPED_KEYS (1,000) steps regardless. Remove the separate constant and use MAX_SKIPPED_KEYS directly at the one call site in Session.kt. Update §8.3.1 to: - State the real gap limit (MAX_SKIPPED_KEYS = 1,000, checked before any derivation begins) - Correctly bound the server-forced HMAC DoS to ~1,000 ops/frame - Document cross-epoch pn gap-filling silent eviction behavior Closes issue #6 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 10 ++++------ docs/e2ee-location-sync.md | 6 ++++-- .../kotlin/net/af0/where/e2ee/ProtocolConstants.kt | 1 - .../commonMain/kotlin/net/af0/where/e2ee/Session.kt | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 3b3e29e8..9c95d466 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -77,12 +77,10 @@ first. A near-MAX_GAP jump derives up to 10,000 skipped keys into a 1,000-entry cache; 90% are evicted immediately and those messages are silently lost. -- [ ] Either bound MAX_GAP to cache capacity or document that gaps beyond - cache size cause silent loss (§8.3.1). -- [ ] Note the DoS angle: a malicious server can replay a genuine high-`msg_num` - envelope to force up to MAX_GAP HMAC derivations per frame while - censoring the intervening band. (It cannot forge headers, so it cannot - pick arbitrary `msg_num` values — only replay/withhold genuine ones.) +- [x] Removed MAX_GAP constant; same-epoch gap limit is now MAX_SKIPPED_KEYS (1,000) + at both the coarse check and the cache pre-check. Updated §8.3.1 and Session.kt. +- [x] Noted DoS bound (≤1,000 HMACs/frame, not 10,000) and cross-epoch silent-loss + behavior in §8.3.1. ## 7. Document residual FS cost of failed-body key caching (doc only) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index fbb24712..2e832915 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -688,8 +688,10 @@ The `dh_pub` is included in the AAD to cryptographically bind the message to the Each message frame carries a `msg_num` counter. Recipients enforce: 1. **Replay rejection:** Any frame with `msg_num <= max_msg_num_received` (within the same DH epoch) is dropped, EXCEPT if the key for that message number is present in the **skipped message key cache**. -2. **Maximum gap (MAX_GAP):** recipients MUST enforce a maximum gap (default 10,000) for chain advancement to prevent resource exhaustion attacks. -3. **OutOfOrder Support:** If a message is skipped (e.g., recipient receives `msg_num=10` after `msg_num=8`), the recipient advances the symmetric ratchet to `msg_num=10` and stores the intermediate message keys in a bounded cache (1000 entries, `MAX_SKIPPED_KEYS`). The bound is sized to comfortably absorb a full `MAX_MESSAGES_PER_POLL = 500` backlog with headroom. +2. **Maximum gap:** recipients MUST reject a frame whose same-epoch gap would cause the skipped-key cache to exceed `MAX_SKIPPED_KEYS` (1,000). This pre-check fires before any key derivation begins, bounding the HMAC work a malicious server can force to ~1,000 operations per frame. The server cannot inflate `msg_num` beyond what a genuine sender transmitted (the field is inside the encrypted header), so it can only replay or withhold genuine frames. +3. **OutOfOrder Support:** If a message is skipped (e.g., recipient receives `msg_num=10` after `msg_num=8`), the recipient advances the symmetric ratchet to `msg_num=10` and stores the intermediate message keys in a bounded cache (1,000 entries, `MAX_SKIPPED_KEYS`). The bound is sized to comfortably absorb a full `MAX_MESSAGES_PER_POLL = 500` backlog with headroom. + + **Cross-epoch gap-filling:** when a new-DH-epoch frame arrives, the receiver also fills in skipped keys for the *previous* epoch's chain up to `prev_chain_len`. This fill is uncapped and evicts oldest cache entries if the combined cache overflows. If a peer sent more messages in the previous epoch than fit in the remaining cache, those skipped keys are silently lost and late-arriving stragglers for that epoch will be undeliverable. 4. **Transactional Commitment:** The receiving state (receiving chain, root key, skipped keys) MUST only be updated if the message AEAD authentication succeeds. The receiving state MUST not be committed earlier. *Deliberate deviation in this implementation:* if the header authenticated but the body AEAD failed, the receiver advances `recv_msg_num` and the chain key to prevent permanent DH desync (§5.5), AND caches the message key for the failed `msg_num` itself in `skipped_message_keys`. The motivation: a malicious server can deliver a bit-flipped copy of a genuine message (header is encrypted under a secret key, so the server cannot fabricate one, but it can flip body bytes). Without the cache, when the clean original later arrives in the same batch, it fails the `msg_num <= recv_msg_num` check and is permanently undeliverable. Caching the seq key lets the clean copy decrypt via the skipped-key path. This keeps the deviation inside the existing "server can drop messages" threat boundary. diff --git a/shared/src/commonMain/kotlin/net/af0/where/e2ee/ProtocolConstants.kt b/shared/src/commonMain/kotlin/net/af0/where/e2ee/ProtocolConstants.kt index fcb4c290..f2eec8c8 100644 --- a/shared/src/commonMain/kotlin/net/af0/where/e2ee/ProtocolConstants.kt +++ b/shared/src/commonMain/kotlin/net/af0/where/e2ee/ProtocolConstants.kt @@ -21,7 +21,6 @@ internal const val MAX_MESSAGES_PER_POLL = 500 // accepted as lost; the session may need re-pairing if they contained DH keys. // At a 30-second poll interval, 5 retries = ~2.5 minutes before force-ACK. internal const val MAX_SILENT_DROP_RETRIES = 5 -internal const val MAX_GAP = 10000 // Bound on the skipped-message-key cache. Sized to comfortably absorb a full // MAX_MESSAGES_PER_POLL backlog (500) with headroom; peer-influenceable but // bounded at ~60 bytes/entry ≈ 60 KB worst case. diff --git a/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt b/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt index 5b78dc52..6a4792a1 100644 --- a/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt +++ b/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt @@ -184,7 +184,7 @@ object Session { } val stepsNeeded = seq - speculativeState.recvSeq - if (stepsNeeded > MAX_GAP + 1) { + if (stepsNeeded > MAX_SKIPPED_KEYS + 1) { throw ProtocolGapException("gap too large: stepsNeeded $stepsNeeded") } From 4f034068432bec5229a077a97dd5ef386fc3adbc Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 09:58:59 +0200 Subject: [PATCH 07/13] fix: remove body-fail seq-key caching (issue #7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Caching MK_n on body-AEAD failure provides no robustness: a server willing to deliver a corrupted copy can equally just drop the message. The cache only extended key lifetime by up to 7 days with no benefit. Remove the caching. On body-fail after a valid header, the receiver still advances recvSeq and the chain key (required to prevent DH desync), but does not cache the key. The failed message is lost, equivalent to a server drop. Update §8.3.1(4) to require this behaviour and explicitly forbid the cache. Update ReceiveRatchetFailureTest to verify recvSeq advances, the key is not cached, and a subsequent clean copy is rejected as a replay. Closes issue #7 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 5 ++- docs/e2ee-location-sync.md | 2 +- .../kotlin/net/af0/where/e2ee/Session.kt | 27 +++--------- .../where/e2ee/ReceiveRatchetFailureTest.kt | 44 +++++++------------ 4 files changed, 28 insertions(+), 50 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 9c95d466..6ebc4e41 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -88,8 +88,9 @@ cache; 90% are evicted immediately and those messages are silently lost. can trigger this at will, keeping keys alive until age-based eviction. §5.5 rule 2 doesn't cover the never-used case. -- [ ] Add a sentence in §8.3.1(4) and/or §5.5 stating the bounded - forward-secrecy cost. +- [x] Removed the body-fail seq-key caching entirely (no robustness benefit; + server can drop instead). §8.3.1(4) now requires advancing state on body-fail + but explicitly forbids caching the key. Test updated accordingly. ## 8. Specify handling of header-undecryptable frames (doc, maybe impl) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index 2e832915..3fad61d5 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -694,7 +694,7 @@ Each message frame carries a `msg_num` counter. Recipients enforce: **Cross-epoch gap-filling:** when a new-DH-epoch frame arrives, the receiver also fills in skipped keys for the *previous* epoch's chain up to `prev_chain_len`. This fill is uncapped and evicts oldest cache entries if the combined cache overflows. If a peer sent more messages in the previous epoch than fit in the remaining cache, those skipped keys are silently lost and late-arriving stragglers for that epoch will be undeliverable. 4. **Transactional Commitment:** The receiving state (receiving chain, root key, skipped keys) MUST only be updated if the message AEAD authentication succeeds. The receiving state MUST not be committed earlier. - *Deliberate deviation in this implementation:* if the header authenticated but the body AEAD failed, the receiver advances `recv_msg_num` and the chain key to prevent permanent DH desync (§5.5), AND caches the message key for the failed `msg_num` itself in `skipped_message_keys`. The motivation: a malicious server can deliver a bit-flipped copy of a genuine message (header is encrypted under a secret key, so the server cannot fabricate one, but it can flip body bytes). Without the cache, when the clean original later arrives in the same batch, it fails the `msg_num <= recv_msg_num` check and is permanently undeliverable. Caching the seq key lets the clean copy decrypt via the skipped-key path. This keeps the deviation inside the existing "server can drop messages" threat boundary. + *Exception:* if the header authenticated but the body AEAD failed, the receiver MUST still advance `recv_msg_num` and the chain key. Without this, a server-dropped message and a server-corrupted message would leave different ratchet states, causing permanent DH desync. The failed message's key MUST NOT be cached — caching it confers no robustness benefit (a server willing to corrupt a message can equally drop it) and unnecessarily extends the key's lifetime. 5. **Epoch Transition:** When a message with a new `dh_pub` is received, the `msg_num` counter resets to 0. All skipped message keys belonging to epochs older than the *previous* valid epoch MUST be cleared. 6. **Across-Epoch Replay:** Recipients hold only two receive header keys at any time — the current epoch's `header_key` and the next epoch's `next_header_key`. The previous epoch's receive header key is discarded on DH ratchet (`Session.performDhRatchet`). A replayed frame from a retired epoch therefore fails `tryDecryptHeader` and is dropped before any ratchet logic runs, with no dedicated `retired_dh_pubs` set required. Within-epoch replay is caught by the `msg_num <= recv_msg_num` check plus the single-use skipped-key cache. diff --git a/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt b/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt index 6a4792a1..d793af03 100644 --- a/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt +++ b/shared/src/commonMain/kotlin/net/af0/where/e2ee/Session.kt @@ -256,24 +256,13 @@ object Session { try { aeadDecrypt(finalStep.messageKey, finalStep.messageNonce, message.ct, aad) } catch (e: Exception) { - // Decryption failure. We persist the ratcheted state if the header - // authenticated, to prevent permanent DH desync (§5.5), AND cache the - // message key for `seq` itself so that a later genuine copy of the same - // message — e.g. the clean original after a malicious server bit-flipped - // the first delivery — can still decrypt via the skipped-key cache path - // instead of failing the recvSeq replay check. - // (Deliberate deviation from spec §8.3.1(4); see spec note.) - val seqCacheKey = remoteDhPub.toHex() + ":" + seq - derivationSkippedKeys[seqCacheKey] = - finalStep.messageKey + finalStep.messageNonce + longToBeBytes(now) - if (derivationSkippedKeys.size > MAX_SKIPPED_KEYS) { - val oldestKey = derivationSkippedKeys.keys.first() - if (oldestKey != seqCacheKey) { - derivationSkippedKeys[oldestKey]?.zeroize() - derivationSkippedKeys.remove(oldestKey) - } - } - + // Body AEAD failed despite a valid header. Advance recvSeq and the chain + // key to prevent permanent DH desync — without this, a server that drops + // the message entirely and a server that delivers a corrupted copy would + // have different effects on session state, breaking the ratchet. + // The failed message's key is NOT cached: caching it would keep MK_n alive + // for up to 7 days with no benefit, since a server willing to deliver a + // corrupted copy can equally just drop the message. val failedState = speculativeState.deepCopy().copy( recvChainKey = chainKey.copyOf(), @@ -282,8 +271,6 @@ object Session { needsRatchet = cleanState.needsRatchet || isNewDhEpoch, ) // Wipe any speculative intermediate keys derived during this failed call. - // The seq cache entry above is intentionally NOT in addedSkippedKeys — - // failedState already holds a copy of it. addedSkippedKeys.forEach { it.zeroize() } chainKey.zeroize() throw DecryptionExceptionWithState(failedState, e) diff --git a/shared/src/commonTest/kotlin/net/af0/where/e2ee/ReceiveRatchetFailureTest.kt b/shared/src/commonTest/kotlin/net/af0/where/e2ee/ReceiveRatchetFailureTest.kt index 4e87877e..2c45da30 100644 --- a/shared/src/commonTest/kotlin/net/af0/where/e2ee/ReceiveRatchetFailureTest.kt +++ b/shared/src/commonTest/kotlin/net/af0/where/e2ee/ReceiveRatchetFailureTest.kt @@ -1,8 +1,8 @@ package net.af0.where.e2ee import kotlin.test.Test -import kotlin.test.assertEquals -import kotlin.test.assertIs +import kotlin.test.assertFailsWith +import kotlin.test.assertFalse import kotlin.test.assertTrue import kotlin.test.fail @@ -12,17 +12,13 @@ class ReceiveRatchetFailureTest { } /** - * Issue #2: a malicious server bit-flips the body of a genuine message. - * The header still authenticates (header keys are secret), so decryptMessage - * advances the receive ratchet (per §5.5 — to prevent permanent DH desync). - * Without the seq-key cache, when the clean original later arrives in the - * same batch, it is `seq <= recvSeq` → ReplayException and permanently lost. - * - * This test exercises the within-batch case (Bob's pre-decrypted header is - * reused across both decryption attempts, matching E2eeProtocol.decryptBatch). + * A body-AEAD failure after a valid header must advance recvSeq and the chain key + * to prevent permanent DH desync (§8.3.1(4)). The failed message's key must NOT + * be cached — there is no robustness benefit to caching it, since a server willing + * to deliver a corrupted copy can equally just drop the message. */ @Test - fun cleanCopyDecryptsAfterTamperedAdvanceCachesSeqKey() { + fun bodyFailAdvancesStateWithoutCachingSeqKey() { val (qr, aliceEkPriv) = KeyExchange.aliceCreateQrPayload("Alice") val (msg, bobSession) = KeyExchange.bobProcessQr(qr, "Bob") val aliceSession = KeyExchange.aliceProcessInit(msg, aliceEkPriv, qr.ekPub) @@ -32,9 +28,6 @@ class ReceiveRatchetFailureTest { MessagePlaintext.Location(1.0, 2.0, 3.0, 4L), ) - // Pre-decrypt the header once (matching the batch path). Both the - // tampered and the clean copy share the same envelope, so this header - // applies to both. val sessionAad = bobSession.aliceFp + bobSession.bobFp val header = try { Session.decryptHeader(bobSession.headerKey, original.envelope, sessionAad) @@ -42,7 +35,6 @@ class ReceiveRatchetFailureTest { Session.decryptHeader(bobSession.nextHeaderKey, original.envelope, sessionAad) } - // Simulate a malicious server bit-flipping the body. val tampered = original.copy( ct = original.ct.copyOf().also { it[it.size - 1] = (it.last().toInt() xor 0xFF).toByte() }, ) @@ -54,20 +46,18 @@ class ReceiveRatchetFailureTest { e.newState } + // recvSeq must advance so the ratchet state stays consistent. assertTrue(bobAfterFailure.recvSeq >= 1, "recvSeq should have advanced past the failed message") - // The clean original now arrives. The cached seq key must rescue it - // from the recvSeq replay rejection. - val (bobFinal, plaintext) = Session.decryptMessage(bobAfterFailure, original, header) - assertIs(plaintext) - assertEquals(1.0, plaintext.lat) - assertEquals(2.0, plaintext.lng) - assertEquals(4L, plaintext.ts) - - // Cache entry for this seq was consumed by the successful decryption. - assertTrue( - bobFinal.skippedMessageKeys.keys.none { it.endsWith(":${header.seq}") }, - "seq=${header.seq} cache entry should have been consumed", + // The seq key must NOT be cached — the message is lost, equivalent to a drop. + assertFalse( + bobAfterFailure.skippedMessageKeys.keys.any { it.endsWith(":${header.seq}") }, + "seq=${header.seq} key must not be cached after body-fail", ) + + // A subsequent attempt to decrypt the (uncorrupted) original is rejected as a replay. + assertFailsWith { + Session.decryptMessage(bobAfterFailure, original, header) + } } } From 4d05006f3cbc15f293eec7292d1c770077ecf074 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 10:13:28 +0200 Subject: [PATCH 08/13] docs: specify header-undecryptable frame handling (issue #8) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add to §5.4.4: - Header-undecryptable frames are not ACKed immediately; the client cannot distinguish garbage from a genuine future-epoch message. - Starvation prevention: clients MUST force-ACK an entire batch after MAX_SILENT_DROP_RETRIES consecutive polls with zero successes (~2.5 min at 30s interval with default of 5 retries). - Clarify the §5.4.4 post-ratchet duplicate tension: a duplicate transition message is header-undecryptable after ratchet advance and will be cleared by the force-ACK path. Closes issue #8 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 6 ++++-- docs/e2ee-location-sync.md | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 6ebc4e41..46a7b081 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -99,8 +99,10 @@ re-fetched every poll for up to 7 days and can starve the 50-message GET window. Also tension with §5.4.4 "duplicates MUST be ACKable": a duplicate transition message arriving after ratchet is header-undecryptable. -- [ ] Specify whether clients delete header-undecryptable frames (and the - replay implications) or how queue starvation is otherwise avoided. +- [x] Specified in §5.4.4: header-undecryptable frames are not immediately deleted + (can't distinguish garbage from a future-epoch message); starvation is bounded + by force-ACK after MAX_SILENT_DROP_RETRIES consecutive failed polls (~2.5 min). + Addressed the post-ratchet duplicate tension. ## 9. Soften §12.3 cross-epoch correlation claim (doc only) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index 3fad61d5..d92f37ef 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -406,6 +406,13 @@ Implementation: `E2eeManager` clears outbox entries still targeting `prevSendTok **Duplicate Handling:** Duplicates MUST be ACKable. If a peer receives multiple copies of the same transition message, the first advances state. Subsequent duplicates must not poison the batch and should still allow the receiver to generate the authenticated ACK needed to drain the old queue. +**Header-Undecryptable Frames:** +Frames that fail header decryption (both `header_key` and `next_header_key` fail) are silently skipped — not ACKed and not deleted. They remain in the server queue and are re-fetched on subsequent polls. Clients MUST NOT delete them immediately: the client cannot distinguish a corrupted frame from a genuine future-epoch message it has not yet ratcheted to. + +To prevent header-undecryptable frames from permanently filling the server's 50-message GET window, clients MUST force-ACK (delete) an entire batch after `MAX_SILENT_DROP_RETRIES` consecutive polls in which no message from that batch could be processed. At a 30-second poll interval and the default of 5 retries, the maximum starvation window is approximately 2.5 minutes. + +Note: a duplicate transition message arriving after the ratchet has already advanced is header-undecryptable (the previous epoch's receive header key has been discarded). It will be cleared by the force-ACK mechanism above. If the queue contains a mix of decryptable and undecryptable frames, successfully decryptable frames reset the retry counter, so an undecryptable duplicate may linger longer — but since it is not blocking progress it only occupies a queue slot until it is eventually force-ACKed or ages out (7 days, §10.2). + **Batch Ordering:** When processing a batch of messages already retrieved from the server, clients SHOULD process older epoch classes before newer ones, then lower `prev_chain_len`, then lower `msg_num`. From 744e71211b06ca57b693d86ed1d37bb7b7ca1cb5 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 16:52:33 +0200 Subject: [PATCH 09/13] =?UTF-8?q?docs:=20soften=20=C2=A712.3=20cross-epoch?= =?UTF-8?q?=20correlation=20claim=20(issue=20#9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Token rotation prevents content-layer correlation across epochs, but the same client IP polling T_old then T_new in adjacent cycles is an observable metadata linkage. Clarify that the "impossible to correlate" claim applies to content only, consistent with §2.3 and §7.3. Closes the required sub-item of issue #9 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 3 ++- docs/e2ee-location-sync.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 46a7b081..645fc7e9 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -110,7 +110,8 @@ message arriving after ratchet is header-undecryptable. the transition message posts to the old token, and the receiver's IP switches polling targets in adjacent cycles. Align with §2.3/§7.3 admissions. -- [ ] Reword §12.3 to "content-layer correlation" only. +- [x] Reworded §12.3: token rotation prevents content-layer correlation only; + same client IP polling T_old then T_new remains a metadata linkage (§2.3/§7.3). - [ ] Optional: decorrelate the receiver's polling switch from T_old to T_new in time (jitter, or switch on the next regular cycle) to weaken the deterministic linkage. diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index d92f37ef..03bf1210 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -905,7 +905,7 @@ While this protocol shares the core **Double Ratchet** design with Signal, it ma - **Signal:** Historically identified users by stable identifiers (phone numbers, now UUIDs). "Sealed Sender" was added later as an extension to hide the sender from the server. - **Where:** Metadata protection is integrated into the base protocol. - **Header Encryption:** Every message uses an **Encrypted Envelope** that hides the DH public key, message number (`msg_num`), and previous chain length (`prev_chain_len`) from the server. - - **Dynamic Routing:** Instead of stable user IDs, Where uses **Pairwise Routing Tokens** derived from the session root key. These tokens rotate automatically with the DH ratchet, making it impossible for the server to correlate messages across epochs without session state. + - **Dynamic Routing:** Instead of stable user IDs, Where uses **Pairwise Routing Tokens** derived from the session root key. These tokens rotate automatically with the DH ratchet, making it impossible for the server to correlate message *content* across epochs without session state. Cross-epoch correlation at the IP level remains possible: the same client IP that was polling T_old will begin polling T_new in the next cycle. This is a metadata limitation acknowledged in §2.3 and §7.3, not a content-layer weakness. ### 12.4 Group Messaging (Sender Keys vs Per-Friend) From 6132f74c532cfb344556c462cbf97e835d9bd15b Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 17:24:03 +0200 Subject: [PATCH 10/13] =?UTF-8?q?docs:=20close=20issue=20#11=20as=20WAI,?= =?UTF-8?q?=20add=20=C2=A72.3=20withholding=20note?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Message withholding is indistinguishable from the sender going offline or staying stationary. The authenticated ts field means the receiver always displays an honest last-seen time. The stationary-flag edge case ("here since X" indefinitely) is also WAI -- indistinguishable from genuine stationarity. No protocol change warranted. Added a note to §2.3 capturing this reasoning including the stationary edge case. Closes issue #11 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 25 ++++++------------------- docs/e2ee-location-sync.md | 2 +- 2 files changed, 7 insertions(+), 20 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 645fc7e9..ab167afd 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -118,25 +118,12 @@ polling targets in adjacent cycles. Align with §2.3/§7.3 admissions. ## 11. Freshness lower-bound: stale-pinning by a withholding server (NEW — HIGH) -§2.3 treats message dropping as DoS, but for a location app withholding is an -*integrity* attack: GET is non-destructive and the server controls delivery. -Once Bob has processed up to seq=N and DELETEd, a malicious server simply never -serves seq>N. Bob's UI shows an authentic-but-stale location with a -trustworthy-looking authenticated `ts` — a confident false belief ("she's still -here"), not a visible gap. The only staleness signal is `isStale` at 7 days -(§13.4), useless against minutes-to-hours pinning. `seq` gives ordering but no -wall-clock anchor. - -- [ ] Receiver-side (no wire change): the protocol already mandates regular - Keepalives (§5.7.2), so the client knows the expected cadence. Enforce a - max-age on the last authenticated `ts` and surface "no fresh update in - >X minutes" prominently; treat a stalled seq against expected cadence as - suspicious, not benign. -- [ ] Optional wire-level strengthening (plaintext-schema only, rides §5.7.3 - forward-compat): sender includes an `expected_next_interval` / - heartbeat-commitment field so the receiver's max-age is sender-declared - rather than client-guessed. -- [ ] Document the attack in §2.2/§2.3 (currently mis-filed under DoS). +- [x] Won't fix at the protocol layer. A withholding server is indistinguishable + from the sender going offline or staying stationary. The authenticated `ts` + ensures the receiver always displays an honest last-seen time — no fabricated + location or timestamp is possible. The stationary-flag edge case ("here since X" + displayed indefinitely) is WAI: it is indistinguishable from genuine stationarity + and the timestamp remains authentic. Added a note to §2.3. ## 12. Safety number does not authenticate the rest of the invite payload (NEW) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index 03bf1210..ff96c34d 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -117,7 +117,7 @@ To ensure robustness against network failures, the protocol employs **Server-sid - **Metadata about the social graph.** The server never sees user IDs or UUIDs — routing tokens are opaque and pseudorandom. However, the server observes which IP addresses `POST` to and `GET` from each token. If Alice's IP consistently posts to token T and Bob's IP consistently polls T, the server (or a passive network attacker) can infer they share a friendship, even without decrypting any payload. See §7 for partial mitigations (constant-rate polling, dummy tokens). - **Compromised backups revealing future epochs.** Because the current implementation persists the active ratchet private key (`localDhPriv`) to ensure session stability across app restarts (§5.5), an attacker who recovers a device backup gains access to the current `localDhPriv` and header keys. They can decrypt headers of future messages to observe the peer's new DH public keys, and use `localDhPriv` to advance the root key through each subsequent peer DH epoch — tracking all future message keys until the compromised device generates a fresh DH keypair and the peer ratchets against it. In practice, the exposure window is bounded by the peer's message cadence: the ratchet self-heals after the next complete DH exchange (roughly one location-update interval if both parties are active), but stalls if the peer is offline. Historical messages remain protected by forward secrecy: deleted chain keys cannot be recovered from the backup snapshot. See §5.6 for a detailed analysis of compromise consequences and self-healing (PCS). As mitigation, the `localDhPriv` key is excluded from cloud backups (see §5.5). - **Map tile server leakage.** When a recipient views a friend's location on a map, the map provider (e.g., Google Maps, Apple Maps, Mapbox) may infer the friend's location from which tiles the recipient's device requests. This can be mitigated at the application layer via tile pre-fetching or caching, but is outside the scope of this protocol. -- **Denial of service.** This protocol does not protect against a server that drops or delays messages. +- **Denial of service / message withholding.** This protocol does not protect against a server that drops or delays messages. From the receiver's perspective, a server withholding new updates is indistinguishable from the sender going offline or staying stationary. The authenticated `ts` field in every message means the receiver can always display an honest "last seen at X" timestamp; no fabricated location or timestamp is possible. The one edge case is a `stationary` flag (§5.7.1): if a server happens to withhold all messages after a stationary update, the receiver's UI may display "here since X" indefinitely — but this is indistinguishable from the sender genuinely remaining stationary, and the timestamp is still authentic. - **Quantum adversaries.** All DH operations here use X25519 (256-bit elliptic curve). A cryptographically relevant quantum computer running Shor's algorithm could break these. See §12. --- From b3a458947a22ca26597a207c95aeb2805613aadc Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 17:44:18 +0200 Subject: [PATCH 11/13] docs: minor spec corrections batch (issue #10) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §5.3 step 2: prev_chain_len is in the encrypted header, not the body - §8.3.1(5): clarify counter resets to 0 so first message is seq=1; state initial recv_msg_num=0 - §4.2: fix EK_A.pub → EK_B.pub in the observer/retroactive sentence - §9: document all three PROTOCOL_VERSION encodings (JSON int, 1-byte header plaintext, 4-byte body AAD) with rationale for each width - §3.4: define formatSafetyNumber algorithm (5-byte chunks, 40-bit big-endian, mod 100,000, zero-padded, 3×4 groups); §4.4 back-ref - §4.4: specify full header-key schedule across bootstrap and each KDF_RK step, including Alice's eager-ratchet promotion - §9.1.2: add max plaintext (511 bytes) and de-padding rule (0x80 marker, scan backwards, reject if absent or non-zero before marker) - §7.4.2: "GCM overhead" → "Poly1305 tag (16 bytes)" - AGENTS.md: AES-256-GCM → ChaCha20-Poly1305 - ToC: add missing section 12 (Signal comparison), renumber to 13 - §2.3 quantum: §12 → §13 - §5.7.2: 7-day window §7.2 → §10.2 - §12.5: §13 → §13.2 Closes issue #10 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- AGENTS.md | 2 +- TODO_SPEC_UPDATES.md | 36 +++++++--------------------- docs/e2ee-location-sync.md | 48 ++++++++++++++++++++++++++++---------- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index d12c9d56..c44df193 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -44,7 +44,7 @@ Trust On First Use is NOT a bug; it's a choice (and accepted risk) of the current design. ### E2EE -Uses a standard, bidirectional Double Ratchet protocol with X25519 ephemeral keys, HKDF-SHA-256 for ratcheting, and AES-256-GCM for encryption. See `docs/e2ee-location-sync.md` for the full protocol spec. +Uses a standard, bidirectional Double Ratchet protocol with X25519 ephemeral keys, HKDF-SHA-256 for ratcheting, and ChaCha20-Poly1305 for encryption. See `docs/e2ee-location-sync.md` for the full protocol spec. --- diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index ab167afd..3c595cf0 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -145,30 +145,12 @@ the user confirms), but the doc implies more coverage than exists. ## 10. Minor spec corrections (doc only) -- [ ] §5.3 step 2: `prev_chain_len` is in the encrypted *header* (§9.1.1), not - the body payload; fix wording (header must be read before body keys can - be derived). -- [ ] §8.3.1(5) vs §5.4.1: msg_num "resets to 0" vs first message is `seq == 1`. - Pick one convention; state initial `recv_msg_num`. -- [ ] §4.2: observer sees `EK_B.pub` (not `EK_A.pub`) in `KeyExchangeInit`. -- [ ] §9.1.1 vs §9.1.2 vs §9 top: PROTOCOL_VERSION is 1 byte in header, 4 bytes - in body AAD, plus a JSON `"v": 1` — three encodings of one logical field. - Nail down a single canonical byte layout. -- [ ] §3.4 / §4.4: `formatSafetyNumber` is unspecified. 60 bytes rendered as - "12 groups of 5 decimal digits" doesn't define the reduction (e.g., - 5-byte chunks mod 10^5, a la Signal) — the encoding determines the real - comparison strength. Define it. -- [ ] §4.4 / §8.3: specify the full header-key schedule, not just the initial - assignment. Assign direction for the initial shared `next_header_key` - (send-next vs recv-next, given Alice's eager ratchet), AND state how the - single `new_header_key` emitted by each KDF_RK step rolls into the - current/next header-key pair per epoch. As written, a reasonable - implementer can produce a schedule that cannot decrypt out-of-order - new-epoch headers, breaking the §5.3 reliability property. -- [ ] §9.1.2: state max plaintext is 511 bytes (≥1 pad byte always required) - and the de-padding rule (scan back to last 0x80; reject if absent). -- [ ] §7.4.2: "GCM overhead" → ChaCha20-Poly1305. Also fix stale "AES-256-GCM" - claim in CLAUDE.md's E2EE summary. -- [ ] Fix stale cross-references: ToC lists 12 sections (doc has 13); §2.3 / - §12.5 quantum pointers; §5.7.2 cites §7.2 for the 7-day window (it's - §10.2/§13.4); §3.4 cites "§8.3 format" for safety-number rendering. +- [x] §5.3 step 2: fixed — pn is in the encrypted header, not body payload. +- [x] §8.3.1(5): clarified — counter resets to 0, first message is seq=1, initial recv_msg_num=0. +- [x] §4.2: fixed EK_A.pub → EK_B.pub in observer sentence. +- [x] §9: documented all three PROTOCOL_VERSION encodings (JSON int, 1-byte header, 4-byte AAD) with rationale. +- [x] §3.4: defined formatSafetyNumber algorithm inline (5-byte chunks, mod 100,000, zero-pad); §4.4 back-references §3.4. +- [x] §4.4: specified full header-key schedule across bootstrap and DH ratchet steps. +- [x] §9.1.2: added max plaintext (511 bytes) and de-padding rule. +- [x] §7.4.2: "GCM overhead" → "Poly1305 tag (16 bytes)"; CLAUDE.md/AGENTS.md AES-256-GCM → ChaCha20-Poly1305. +- [x] Cross-references: ToC updated to 13 sections; §2.3 quantum → §13; §5.7.2 §7.2 → §10.2; §12.5 → §13.2; §3.4 formatSafetyNumber self-contained. diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index ff96c34d..4a7060e2 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -19,7 +19,8 @@ 9. [Wire Format](#9-wire-format) 10. [Server Changes](#10-server-changes) 11. [Cryptographic Primitives Summary](#11-cryptographic-primitives-summary) -12. [Open Questions and Future Work](#12-open-questions-and-future-work) +12. [Comparison with Signal Protocol (libsignal)](#12-comparison-with-signal-protocol-libsignal) +13. [Open Questions and Future Work](#13-open-questions-and-future-work) --- @@ -118,7 +119,7 @@ To ensure robustness against network failures, the protocol employs **Server-sid - **Compromised backups revealing future epochs.** Because the current implementation persists the active ratchet private key (`localDhPriv`) to ensure session stability across app restarts (§5.5), an attacker who recovers a device backup gains access to the current `localDhPriv` and header keys. They can decrypt headers of future messages to observe the peer's new DH public keys, and use `localDhPriv` to advance the root key through each subsequent peer DH epoch — tracking all future message keys until the compromised device generates a fresh DH keypair and the peer ratchets against it. In practice, the exposure window is bounded by the peer's message cadence: the ratchet self-heals after the next complete DH exchange (roughly one location-update interval if both parties are active), but stalls if the peer is offline. Historical messages remain protected by forward secrecy: deleted chain keys cannot be recovered from the backup snapshot. See §5.6 for a detailed analysis of compromise consequences and self-healing (PCS). As mitigation, the `localDhPriv` key is excluded from cloud backups (see §5.5). - **Map tile server leakage.** When a recipient views a friend's location on a map, the map provider (e.g., Google Maps, Apple Maps, Mapbox) may infer the friend's location from which tiles the recipient's device requests. This can be mitigated at the application layer via tile pre-fetching or caching, but is outside the scope of this protocol. - **Denial of service / message withholding.** This protocol does not protect against a server that drops or delays messages. From the receiver's perspective, a server withholding new updates is indistinguishable from the sender going offline or staying stationary. The authenticated `ts` field in every message means the receiver can always display an honest "last seen at X" timestamp; no fabricated location or timestamp is possible. The one edge case is a `stationary` flag (§5.7.1): if a server happens to withhold all messages after a stationary update, the receiver's UI may display "here since X" indefinitely — but this is indistinguishable from the sender genuinely remaining stationary, and the timestamp is still authentic. -- **Quantum adversaries.** All DH operations here use X25519 (256-bit elliptic curve). A cryptographically relevant quantum computer running Shor's algorithm could break these. See §12. +- **Quantum adversaries.** All DH operations here use X25519 (256-bit elliptic curve). A cryptographically relevant quantum computer running Shor's algorithm could break these. See §13. --- @@ -157,7 +158,7 @@ This protocol uses **Trust-on-First-Use (TOFU)** with local session pinning. **Safety Numbers:** Two users can optionally verify their connection by comparing a safety number fingerprint. - **Calculation:** `HKDF-SHA-256(ikm=SHA-256(lower_EK.pub || higher_EK.pub), salt=null, info="Where-v1-SafetyNumber", length=60)`. -- The result is displayed as 12 groups of 5 decimal digits (consistent with §8.3 format). +- **Rendering (`formatSafetyNumber`):** Split the 60 bytes into 12 consecutive 5-byte chunks. Interpret each chunk as a 40-bit big-endian unsigned integer, take the value modulo 100,000, and zero-pad to 5 decimal digits. Display as 3 lines of 4 space-separated groups (e.g. `"12345 67890 11111 22222"`). This encoding gives each group ~17 bits of entropy (log₂(100,000) ≈ 16.6 bits), for a total of ~199 bits across 12 groups. - This is **session-scoped**: the Safety Number is unique to the specific pairing event, not to a device. Every re-pairing after a device reset produces a new Safety Number. **Risk:** If the invite link (Option B, §4.3) is intercepted over an unauthenticated channel (e.g., SMS), an attacker can substitute their own key. Fingerprint verification is the primary countermeasure. @@ -200,7 +201,7 @@ discovery_token_A = HKDF-SHA-256(IKM = Alice.discovery_secret, // 32-byte ran info = "Where-v1-Discovery")[0:16] ``` -Using a random secret (rather than `EK_A.pub`) as HKDF IKM ensures that only someone who received the QR out-of-band can compute `discovery_token_A`. A network observer who later sees `EK_A.pub` in Bob's `KeyExchangeInit` message cannot retroactively map it to the discovery-phase mailbox. +Using a random secret (rather than `EK_A.pub`) as HKDF IKM ensures that only someone who received the QR out-of-band can compute `discovery_token_A`. A network observer who later sees `EK_B.pub` in Bob's `KeyExchangeInit` message cannot retroactively map it to the discovery-phase mailbox. - Alice begins polling `GET /inbox/{hex(discovery_token_A)}` immediately. - Bob derives the same `discovery_token_A` from the scanned `discovery_secret` and POSTs his `KeyExchangeInit` there. @@ -237,7 +238,7 @@ bob_fp = SHA-256(EK_B.pub) // 32 bytes // Safety Number (for out-of-band verification) safety_number_bytes = HKDF-SHA-256(ikm=SHA-256(lower_EK.pub || higher_EK.pub), salt=null, info="Where-v1-SafetyNumber", length=60) -safety_number = formatSafetyNumber(safety_number_bytes) +safety_number = formatSafetyNumber(safety_number_bytes) // encoding defined in §3.4 ``` **Key Confirmation:** @@ -272,6 +273,18 @@ Both parties initialize their Double Ratchet state (§8.2) seeded with a root ke - **Alice:** Uses `send_chain = chain_key_0`, `recv_chain = chain_key_1`, `send_header_key = header_key_0`, `recv_header_key = header_key_1`. - **Bob:** Uses `send_chain = chain_key_1`, `recv_chain = chain_key_0`, `send_header_key = header_key_1`, `recv_header_key = header_key_0`. - **Root Key:** Both start with `root_key = root_key_0`. +- **`next_header_key`** (bytes [160:192]) is shared by both parties as the seed for the first DH-ratchet receive header key. + +**Header-key schedule across DH ratchet steps:** + +After Alice's eager ratchet (§4.4 step 10), Alice promotes `next_header_key` to her `send_header_key` for Epoch 1, retaining `header_key_1` as her receive key for Bob's Epoch 0 messages. Her new `next_header_key` comes from the KDF_RK send step of the eager ratchet. + +On each subsequent `KDF_RK` (DH ratchet step), two sub-steps are performed — one for the receive direction, one for the send direction — each producing a `new_header_key`. The schedule is: +- `recv_header_key` ← old `next_header_key` (the previous next becomes the current receive key) +- `send_header_key` ← `new_header_key` from the **recv** KDF_RK sub-step +- `next_header_key` ← `new_header_key` from the **send** KDF_RK sub-step + +This means a receiver always holds exactly two receive header keys at any time (`recv_header_key` and `next_header_key`), enabling decryption of both current-epoch and one-step-ahead new-epoch headers without retaining any retired keys. Initial routing tokens are also derived from `SK`: @@ -362,7 +375,7 @@ To handle out-of-order delivery across DH ratchet steps, the receiver maintains **Out-of-Order DH Epoch Transitions:** If Alice ratchets her DH key from `dh_1` to `dh_2`, Bob may receive `Msg(dh_2, seq=1)` before he receives `Msg(dh_1, seq=last)`. 1. **Speculative Ratchet:** Bob moves to the new DH epoch upon receiving `Msg(dh_2)`. -2. **Decryption:** Bob decrypts the payload of `Msg(dh_2)` to extract the **encrypted `prev_chain_len` field** (§7.4). +2. **Decryption:** Bob decrypts the *header* of `Msg(dh_2)` to extract `prev_chain_len` (`pn` in the header plaintext, §9.1.1). The header must be decrypted before body keys can be derived. 3. **Gap Filling:** The `prev_chain_len` field tells Bob exactly how many messages Alice sent in epoch `dh_1`. Bob goes back to the old chain, derives all remaining keys up to `prev_chain_len`, and stores them in the cache. 4. **Historical Delivery:** When `Msg(dh_1, msg_num=last)` eventually arrives, Bob retrieves the key from the cache, verifies the AAD, and decrypts. @@ -492,7 +505,7 @@ A `Location` carrying `stationary: true` is a signal that the sender does not ex A `StoppedSharing` message signals the deliberate end of a share session (manual toggle-off, or expiry of a time-limited share — the protocol does not know which). Receivers SHOULD reflect a terminal state in their UI for some bounded window, then suppress the peer's pin entirely. **Critical sender rule: "stop sharing" means "stop sending Locations," not "stop talking."** -After emitting `StoppedSharing`, the sender MUST continue its normal Keepalive cadence. The 7-day inactivity window (§7.2) tears down a peer's session entirely once `lastRecvTs` exceeds the timeout; if a sender both stopped Locations *and* stopped Keepalives, every recipient would silently expire the session within a week and lose all post-compromise security guarantees on the channel. Resuming sharing later would require fresh pairing. Keepalives also continue to drive the DH ratchet forward, preserving PCS during the quiet period. +After emitting `StoppedSharing`, the sender MUST continue its normal Keepalive cadence. The 7-day inactivity window (§10.2) tears down a peer's session entirely once `lastRecvTs` exceeds the timeout; if a sender both stopped Locations *and* stopped Keepalives, every recipient would silently expire the session within a week and lose all post-compromise security guarantees on the channel. Resuming sharing later would require fresh pairing. Keepalives also continue to drive the DH ratchet forward, preserving PCS during the quiet period. #### 5.7.3 Forward-Compatibility — Unknown Message Types @@ -594,7 +607,7 @@ This removes session-related metadata from the server's payload view, but it doe #### 7.4.2 Payload padding -- **Payload padding (mandatory):** All payloads MUST be padded to a fixed length (512 bytes recommended) before encryption. 256 bytes is insufficient: a JSON location payload plus GCM overhead already approaches ~150 bytes, leaving little headroom for variable-length fields. 512 bytes provides comfortable clearance while remaining a small fixed multiple of a cache line. +- **Payload padding (mandatory):** All payloads MUST be padded to a fixed length (512 bytes recommended) before encryption. 256 bytes is insufficient: a JSON location payload plus the 16-byte Poly1305 tag already approaches ~150 bytes, leaving little headroom for variable-length fields. 512 bytes provides comfortable clearance while remaining a small fixed multiple of a cache line. #### 7.4.3 Polling Strategy @@ -702,7 +715,7 @@ Each message frame carries a `msg_num` counter. Recipients enforce: 4. **Transactional Commitment:** The receiving state (receiving chain, root key, skipped keys) MUST only be updated if the message AEAD authentication succeeds. The receiving state MUST not be committed earlier. *Exception:* if the header authenticated but the body AEAD failed, the receiver MUST still advance `recv_msg_num` and the chain key. Without this, a server-dropped message and a server-corrupted message would leave different ratchet states, causing permanent DH desync. The failed message's key MUST NOT be cached — caching it confers no robustness benefit (a server willing to corrupt a message can equally drop it) and unnecessarily extends the key's lifetime. -5. **Epoch Transition:** When a message with a new `dh_pub` is received, the `msg_num` counter resets to 0. All skipped message keys belonging to epochs older than the *previous* valid epoch MUST be cleared. +5. **Epoch Transition:** When a message with a new `dh_pub` is received, the `msg_num` counter resets to 0 (so the first message in the new epoch has `seq == 1`). All skipped message keys belonging to epochs older than the *previous* valid epoch MUST be cleared. The initial `recv_msg_num` is 0; the replay check is `seq <= recv_msg_num`, so seq=1 is accepted. 6. **Across-Epoch Replay:** Recipients hold only two receive header keys at any time — the current epoch's `header_key` and the next epoch's `next_header_key`. The previous epoch's receive header key is discarded on DH ratchet (`Session.performDhRatchet`). A replayed frame from a retired epoch therefore fails `tryDecryptHeader` and is dropped before any ratchet logic runs, with no dedicated `retired_dh_pubs` set required. Within-epoch replay is caught by the `msg_num <= recv_msg_num` check plus the single-use skipped-key cache. @@ -712,6 +725,13 @@ Each message frame carries a `msg_num` counter. Recipients enforce: All messages are JSON-encoded. Every message MUST include a top-level `"v"` field set to the current protocol version (currently `1`). This enables recipients to reject messages from incompatible future versions. +**PROTOCOL_VERSION encodings:** the version field appears in three distinct contexts with different byte widths, each serving a different purpose: +- **JSON `"v"` field** (integer `1`): outer versioning, allows a recipient to reject an incompatible message before attempting any crypto. +- **Encrypted header** (1 byte, `0x01`, offset 0 of the 82-byte header plaintext): checked after header decryption to catch version mismatches inside the sealed envelope. +- **Body AAD** (4 bytes big-endian, `0x00000001`): bound into the AEAD tag, ensuring the version is authenticated as part of the message integrity check. + +All three must be consistent for a valid message. The different widths are intentional: the header byte is space-constrained (fixed 82-byte plaintext); the AAD uses a 4-byte int for straightforward serialization. + ### 9.1 Encrypted Location Frame The mailbox payload for a standard location update is a JSON object with `type: "EncryptedMessage"`. Metadata (`dh_pub`, `msg_num`, `ack_remote_dh_pub`, etc.) is hidden within an encrypted `envelope` to prevent server correlation. @@ -753,7 +773,7 @@ The `ct` field contains the ChaCha20-Poly1305 ciphertext of the location payload **AAD for Ciphertext:** - `AAD_PREFIX` ("Where-v1-Message") -- `PROTOCOL_VERSION` (4 bytes, `0x01`) +- `PROTOCOL_VERSION` (4 bytes big-endian, `0x00000001`) - `sender_fp` (32 bytes) - `recipient_fp` (32 bytes) - `msg_num` (8 bytes, big-endian uint64) @@ -765,7 +785,11 @@ Implementations MUST parse the `msg_num` field as a uint64 integer and serialize Note that even though metadata is hidden from the server in the envelope, the client uses the *decrypted* values to verify the body AAD, ensuring the body and header are cryptographically bound together. **Plaintext (before encryption):** -The plaintext is a JSON object. All plaintext payloads MUST be padded with 0x80 then 0x00 bytes to a fixed 512-byte length **before** encryption. The padding is included in the plaintext and authenticated by the AEAD tag. +The plaintext is a JSON object. All plaintext payloads MUST be padded to a fixed 512-byte length **before** encryption. The padding is included in the plaintext and authenticated by the AEAD tag. + +**Padding rule:** append `0x80` immediately after the last byte of plaintext, then fill the remaining bytes with `0x00` to reach 512 bytes. The plaintext MUST be at most 511 bytes (at least one `0x80` byte is always required). Implementations MUST reject plaintexts of 512 bytes or more at encode time. + +**De-padding rule:** scan backwards from the end of the decrypted 512-byte buffer. Skip `0x00` bytes. The first non-zero byte MUST be `0x80`; everything before it is the plaintext. Reject (throw) if no `0x80` marker is found or if any non-`0x00` byte is encountered before the marker. Every plaintext object carries a string `"type"` discriminator identifying its variant. Receivers MUST dispatch on `"type"` and MUST treat any unknown value as a Keepalive (see §5.7.3 for the forward-compatibility contract). Receivers MUST also accept the legacy schemas defined below for backwards-compatibility. @@ -915,7 +939,7 @@ While this protocol shares the core **Double Ratchet** design with Signal, it ma ### 12.5 Post-Quantum Resistance - **Signal:** Recently introduced **PQXDH** and **SPQR**, incorporating Kyber into the initial handshake and ratchet to provide post-quantum confidentiality. -- **Where:** Currently **not quantum-resistant**. The protocol relies entirely on X25519 (ECDH). Quantum resistance is recognized as a future requirement but is not part of the v1 implementation (see §13). +- **Where:** Currently **not quantum-resistant**. The protocol relies entirely on X25519 (ECDH). Quantum resistance is recognized as a future requirement but is not part of the v1 implementation (see §13.2). ### 12.6 Safety Numbers From 514a398a82fd541978ff92e940ca9dc7124b8df9 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 22:25:55 +0200 Subject: [PATCH 12/13] docs: safety number scope and fingerprint field removal (issue #12) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - §3.4: state explicitly that safety-number verification authenticates keys only (EK_A.pub, EK_B.pub), not the suggested name or any other invite payload field - §4.2: remove fingerprint field from QR payload spec. QR codes have built-in Reed-Solomon error correction; the field is fully derivable from ek_pub and provides no integrity an adversary cannot recompute. Two-stage impl removal tracked in issues #313 (stop verifying) and #314 (stop sending). - Binding suggested_name into the safety number: won't fix (name is a user-confirmed pre-fill with no cryptographic standing per §3.2) - Token-follow IP linkage (#9 optional): tracked in issue #312 Closes issue #12 in TODO_SPEC_UPDATES.md. Co-Authored-By: Claude Sonnet 4.6 --- TODO_SPEC_UPDATES.md | 20 +++++++++++--------- docs/e2ee-location-sync.md | 3 ++- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md index 3c595cf0..5527fa45 100644 --- a/TODO_SPEC_UPDATES.md +++ b/TODO_SPEC_UPDATES.md @@ -114,7 +114,7 @@ polling targets in adjacent cycles. Align with §2.3/§7.3 admissions. same client IP polling T_old then T_new remains a metadata linkage (§2.3/§7.3). - [ ] Optional: decorrelate the receiver's polling switch from T_old to T_new in time (jitter, or switch on the next regular cycle) to weaken the - deterministic linkage. + deterministic linkage. Tracked in https://github.com/danmarg/where/issues/312. ## 11. Freshness lower-bound: stale-pinning by a withholding server (NEW — HIGH) @@ -134,14 +134,16 @@ pre-seeds Bob's naming dialog with an attacker-chosen label while safety-number verification still passes. Severity is capped by §3.2 (name is only a pre-fill the user confirms), but the doc implies more coverage than exists. -- [ ] State explicitly in §3.4 that safety-number verification authenticates - keys only, not the displayed/suggested name. -- [ ] Consider folding a hash of the full invite payload into the - safety-number/confirmation transcript (wire-compatible for the QR; - check `key_confirmation` implications). -- [ ] Drop the redundant `fingerprint` field from the invite payload, or - document it as a non-security convenience (it is derivable from `ek_pub` - and provides no integrity an adversary can't recompute). +- [x] Stated explicitly in §3.4 that safety-number verification authenticates + keys only, not the suggested name. +- [x] Binding the name into the safety number: won't fix. The name is a + user-confirmed pre-fill with no cryptographic standing (§3.2); the attack + is social engineering, not a key-layer weakness. +- [x] Removed `fingerprint` from the spec QR payload. QR codes have built-in + Reed-Solomon error correction; the field is redundant with `ek_pub` and + provides no integrity an adversary can't recompute. Two-stage impl removal + tracked in https://github.com/danmarg/where/issues/313 (Stage 1: stop verifying) + and https://github.com/danmarg/where/issues/314 (Stage 2: stop sending). ## 10. Minor spec corrections (doc only) diff --git a/docs/e2ee-location-sync.md b/docs/e2ee-location-sync.md index 4a7060e2..0f8bfa13 100644 --- a/docs/e2ee-location-sync.md +++ b/docs/e2ee-location-sync.md @@ -161,6 +161,8 @@ This protocol uses **Trust-on-First-Use (TOFU)** with local session pinning. - **Rendering (`formatSafetyNumber`):** Split the 60 bytes into 12 consecutive 5-byte chunks. Interpret each chunk as a 40-bit big-endian unsigned integer, take the value modulo 100,000, and zero-pad to 5 decimal digits. Display as 3 lines of 4 space-separated groups (e.g. `"12345 67890 11111 22222"`). This encoding gives each group ~17 bits of entropy (log₂(100,000) ≈ 16.6 bits), for a total of ~199 bits across 12 groups. - This is **session-scoped**: the Safety Number is unique to the specific pairing event, not to a device. Every re-pairing after a device reset produces a new Safety Number. +**What safety-number verification covers:** The safety number is derived solely from the two bootstrap public keys `EK_A.pub` and `EK_B.pub`. It authenticates the key material only — it does NOT cover the `suggested_name` pre-fill or the `fingerprint` convenience field in the invite payload. An attacker who tampers the invite's suggested name while leaving `ek_pub` intact will pass safety-number verification; the name is merely a user-confirmed pre-fill (§3.2) with no cryptographic standing. + **Risk:** If the invite link (Option B, §4.3) is intercepted over an unauthenticated channel (e.g., SMS), an attacker can substitute their own key. Fingerprint verification is the primary countermeasure. --- @@ -184,7 +186,6 @@ Alice opens "Add Friend" and generates a fresh ephemeral key pair `EK_A` and a f { "ek_pub": base64(Alice.EK_A.pub), // X25519 ephemeral public key (32 bytes) "suggested_name": "Alice", - "fingerprint": hex(SHA-256(EK_A.pub)[0:20]), "discovery_secret": base64(random_32_bytes) // fresh per QR; HKDF IKM for discovery token } ``` From 037efbc71925036fb76792195648ff9f9b5087a0 Mon Sep 17 00:00:00 2001 From: Dan Margolis Date: Sat, 13 Jun 2026 22:43:54 +0200 Subject: [PATCH 13/13] chore: untrack TODO_SPEC_UPDATES.md, add to .gitignore Working document; should not be in version control. Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 1 + TODO_SPEC_UPDATES.md | 158 ------------------------------------------- 2 files changed, 1 insertion(+), 158 deletions(-) delete mode 100644 TODO_SPEC_UPDATES.md diff --git a/.gitignore b/.gitignore index bc01d3a4..84637359 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ cli/where_cli_state.json *.swp *.txt *.log +TODO_SPEC_UPDATES.md diff --git a/TODO_SPEC_UPDATES.md b/TODO_SPEC_UPDATES.md deleted file mode 100644 index 5527fa45..00000000 --- a/TODO_SPEC_UPDATES.md +++ /dev/null @@ -1,158 +0,0 @@ -# TODO: Spec & Implementation Updates from E2EE Design Doc Review - -Source: review of `docs/e2ee-location-sync.md` (2026-06-12). - -## 1. Remove plaintext `token` field from `KeyExchangeInit` (breaking — two-stage rollout) - -The plaintext `T_AB_0` in `KeyExchangeInit` lets the server link the discovery -mailbox to the session's first-epoch routing token, defeating the stated purpose -of `discovery_secret` (§4.2). The field is redundant: Alice derives `T_AB_0` -independently and must use her derived value regardless. - -- [x] **Stage 1 (client tolerance):** In the `aliceProcessInit` path, parse - `token` as optional and ignore it entirely (do not conditionally verify). - Only the inviter/receiver side checks this field; sender untouched. -- [x] **Stage 1 (spec):** Removed `token` from §4.4 (step 7) and §9.3 entirely - (single implementation — no deprecation period needed in the spec). -- [ ] **Stage 2:** Stop sending the field in `KeyExchangeInit` (`bobProcessQr` - and `processScannedQr`). -- No protocol version bump needed: removing an ignored optional field is not a - wire break once stage 1 is deployed. Failure mode during version skew is - benign — pairing is a live, interactive event; a failed pairing is visible - and retriable, with no persisted-session corruption. - -## 2. Correct the "private keys deleted immediately" claims (doc only) - -§3.1, §4.1, §4.4 claim `EK_B.priv` is deleted immediately after `SK` -derivation, but §5.5 (correctly) copies it into `localDhPriv`, where it -persists until Bob's first DH ratchet completes. Required — Bob could not -otherwise process Alice's eager-ratchet message. - -- [x] Fix §3.1, §4.1, §4.4 to state that Bob's bootstrap private key survives - as `localDhPriv` until his first ratchet. -- [x] Add a note on the security implication: during that window, a device or - backup compromise plus the public QR payload allows reconstruction of `SK`. - -## 3. Resolve the `prev_recv_token` contradiction (doc only — pick one) - -§5.4.2 says the receiver polls exactly one token (no `prev_recv_token` -polling); §9.2 says the receiver MUST also poll `prev_recv_token` during epoch -transition; §8.2 `SessionState` carries `prev_recv_token`. - -- [x] Determine which behavior the implementation actually has. - (Single-token polling per §5.4.2; `prevRecvToken` does not exist in SessionState - or polling logic; §9.2 and §8.2 were wrong.) -- [x] Make §5.4.2, §8.2, and §9.2 agree. Note metadata impact if two-token - polling is the real behavior (doubles per-friend poll fingerprint). - (Removed `prev_recv_token` from §8.2 SessionState; rewrote §9.2 to match §5.4.2; - noted the metadata benefit of single-token design.) - -## 4. Reconcile the two KDF_CK definitions (doc only) - -§8.3: `MK = HMAC(CK, 0x01)`, `CK' = HMAC(CK, 0x02)`, nonce via separate HKDF. -§11 table: single 76-byte HKDF expand for message key + nonce. These produce -different bytes. - -- [x] Check the implementation, fix whichever section is wrong. - (§8.3 is correct: HMAC for MK/CK', separate HKDF for nonce. Fixed §11 table - and the incorrect file-header comment in Ratchet.kt.) - -## 5. Specify discovery-mailbox multiple-responder behavior (doc, maybe impl) - -First-responder-wins is currently implicit; a hijacker racing Bob causes -legitimate Bob to fail *silently* (he believes pairing succeeded). Note also -that the server controls GET ordering ("up to 50 from the front of the -queue"), i.e., a malicious server picks which `KeyExchangeInit` Alice sees -first. - -- [x] Specify behavior when multiple `KeyExchangeInit` messages are present in - the discovery mailbox: process all of them (process-all, not first-responder- - wins), surface a count to Alice, prompt Safety Number verification per session. - This eliminates silent displacement and aligns with issue #233. -- [x] Specified behavior differs from implementation (current: first-responder-wins). - Impl work tracked in https://github.com/danmarg/where/issues/233. - -## 6. Reconcile MAX_GAP (10,000) vs skipped-key cache (1,000) (doc, maybe impl) - -A near-MAX_GAP jump derives up to 10,000 skipped keys into a 1,000-entry -cache; 90% are evicted immediately and those messages are silently lost. - -- [x] Removed MAX_GAP constant; same-epoch gap limit is now MAX_SKIPPED_KEYS (1,000) - at both the coarse check and the cache pre-check. Updated §8.3.1 and Session.kt. -- [x] Noted DoS bound (≤1,000 HMACs/frame, not 10,000) and cross-epoch silent-loss - behavior in §8.3.1. - -## 7. Document residual FS cost of failed-body key caching (doc only) - -§8.3.1(4) deviation caches `MK_n` for body-failed messages; a malicious server -can trigger this at will, keeping keys alive until age-based eviction. §5.5 -rule 2 doesn't cover the never-used case. - -- [x] Removed the body-fail seq-key caching entirely (no robustness benefit; - server can drop instead). §8.3.1(4) now requires advancing state on body-fail - but explicitly forbids caching the key. Test updated accordingly. - -## 8. Specify handling of header-undecryptable frames (doc, maybe impl) - -Frames failing `tryDecryptHeader` are "dropped" but never DELETEd; they are -re-fetched every poll for up to 7 days and can starve the 50-message GET window. -Also tension with §5.4.4 "duplicates MUST be ACKable": a duplicate transition -message arriving after ratchet is header-undecryptable. - -- [x] Specified in §5.4.4: header-undecryptable frames are not immediately deleted - (can't distinguish garbage from a future-epoch message); starvation is bounded - by force-ACK after MAX_SILENT_DROP_RETRIES consecutive failed polls (~2.5 min). - Addressed the post-ratchet duplicate tension. - -## 9. Soften §12.3 cross-epoch correlation claim (doc only) - -"Impossible for the server to correlate messages across epochs" is overstated: -the transition message posts to the old token, and the receiver's IP switches -polling targets in adjacent cycles. Align with §2.3/§7.3 admissions. - -- [x] Reworded §12.3: token rotation prevents content-layer correlation only; - same client IP polling T_old then T_new remains a metadata linkage (§2.3/§7.3). -- [ ] Optional: decorrelate the receiver's polling switch from T_old to T_new - in time (jitter, or switch on the next regular cycle) to weaken the - deterministic linkage. Tracked in https://github.com/danmarg/where/issues/312. - -## 11. Freshness lower-bound: stale-pinning by a withholding server (NEW — HIGH) - -- [x] Won't fix at the protocol layer. A withholding server is indistinguishable - from the sender going offline or staying stationary. The authenticated `ts` - ensures the receiver always displays an honest last-seen time — no fabricated - location or timestamp is possible. The stationary-flag edge case ("here since X" - displayed indefinitely) is WAI: it is indistinguishable from genuine stationarity - and the timestamp remains authentic. Added a note to §2.3. - -## 12. Safety number does not authenticate the rest of the invite payload (NEW) - -The safety number covers only the two `ek_pub`s. `suggested_name` (and the -redundant `fingerprint`) in the QR/link are not bound to it: an attacker who -tampers the invite's name but leaves `ek_pub` intact gets no keys, yet -pre-seeds Bob's naming dialog with an attacker-chosen label while safety-number -verification still passes. Severity is capped by §3.2 (name is only a pre-fill -the user confirms), but the doc implies more coverage than exists. - -- [x] Stated explicitly in §3.4 that safety-number verification authenticates - keys only, not the suggested name. -- [x] Binding the name into the safety number: won't fix. The name is a - user-confirmed pre-fill with no cryptographic standing (§3.2); the attack - is social engineering, not a key-layer weakness. -- [x] Removed `fingerprint` from the spec QR payload. QR codes have built-in - Reed-Solomon error correction; the field is redundant with `ek_pub` and - provides no integrity an adversary can't recompute. Two-stage impl removal - tracked in https://github.com/danmarg/where/issues/313 (Stage 1: stop verifying) - and https://github.com/danmarg/where/issues/314 (Stage 2: stop sending). - -## 10. Minor spec corrections (doc only) - -- [x] §5.3 step 2: fixed — pn is in the encrypted header, not body payload. -- [x] §8.3.1(5): clarified — counter resets to 0, first message is seq=1, initial recv_msg_num=0. -- [x] §4.2: fixed EK_A.pub → EK_B.pub in observer sentence. -- [x] §9: documented all three PROTOCOL_VERSION encodings (JSON int, 1-byte header, 4-byte AAD) with rationale. -- [x] §3.4: defined formatSafetyNumber algorithm inline (5-byte chunks, mod 100,000, zero-pad); §4.4 back-references §3.4. -- [x] §4.4: specified full header-key schedule across bootstrap and DH ratchet steps. -- [x] §9.1.2: added max plaintext (511 bytes) and de-padding rule. -- [x] §7.4.2: "GCM overhead" → "Poly1305 tag (16 bytes)"; CLAUDE.md/AGENTS.md AES-256-GCM → ChaCha20-Poly1305. -- [x] Cross-references: ToC updated to 13 sections; §2.3 quantum → §13; §5.7.2 §7.2 → §10.2; §12.5 → §13.2; §3.4 formatSafetyNumber self-contained.