diff --git a/.github/workflows/multi_node_tests.yml b/.github/workflows/multi_node_tests.yml
new file mode 100644
index 0000000..0a557ee
--- /dev/null
+++ b/.github/workflows/multi_node_tests.yml
@@ -0,0 +1,92 @@
+name: Multi-Node Integration Tests
+
+# R-28b — multi-node Testcontainers Compose CI runs every PR.
+# Spins up a 3-node OpenSearch cluster and runs the [TestCategory("MultiNode")]
+# tests that exercise behaviors single-node Testcontainers fundamentally
+# masks (GREEN-threshold, replica allocation, shard relocation under load,
+# PA-2 lock-index replicas:0 invariant).
+#
+# This workflow is intentionally separate from the shared `Run Tests`
+# workflow because:
+# - It requires Docker (the shared workflow may not).
+# - It is heavier than unit tests (3 JVMs at ~512MB each, ~30s cluster
+# formation per test class).
+# - It compiles the integration test assembly with EnableIntegrationTests
+# (which flips the `#if INTEGRATIONS` gate) — a property-driven
+# define-constants flip rather than a source-level edit.
+
+on:
+ schedule:
+ # Nightly at 03:00 UTC. Multi-node Testcontainers (3 OpenSearch JVMs)
+ # is too heavy and currently too flaky on shared `ubuntu-latest` PR
+ # runners to gate PRs (connection-reset under load on a single-endpoint
+ # connection pool, and inter-class container churn). The tests pass
+ # locally; running them nightly catches regressions without holding up
+ # PR merges. Stabilization for PR-trigger is tracked as follow-up work.
+ - cron: '0 3 * * *'
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+concurrency:
+ group: multi-node-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ multi-node:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Setup .NET
+ uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: |
+ 8.0.x
+ 9.0.x
+ 10.0.x
+
+ - name: Restore
+ run: dotnet restore tests/Hyperbee.Migrations.Integration.Tests/Hyperbee.Migrations.Integration.Tests.csproj
+
+ - name: Build (with EnableIntegrationTests)
+ run: >-
+ dotnet build
+ tests/Hyperbee.Migrations.Integration.Tests/Hyperbee.Migrations.Integration.Tests.csproj
+ -c Release
+ --no-restore
+ /p:EnableIntegrationTests=true
+
+ - name: Run multi-node tests (TestCategory=MultiNode)
+ # Tests use [TestCategory("MultiNode")] so this filter picks them up
+ # without affecting other test classes. The MultiNode test class's
+ # [ClassInitialize] spins up the 3-node cluster.
+ # HYPERBEE_TESTS_SKIP_SINGLE_NODE=true bypasses the assembly-level
+ # single-node container startup (Mongo, Postgres, Couchbase,
+ # Aerospike, single-node OpenSearch) since the MultiNode tests
+ # don't need any of them.
+ env:
+ HYPERBEE_TESTS_SKIP_SINGLE_NODE: "true"
+ run: >-
+ dotnet test
+ tests/Hyperbee.Migrations.Integration.Tests/Hyperbee.Migrations.Integration.Tests.csproj
+ -c Release
+ -f net10.0
+ --no-build
+ --filter "TestCategory=MultiNode"
+ --logger "trx;LogFileName=multinode.trx"
+ --logger "console;verbosity=normal"
+ /p:EnableIntegrationTests=true
+
+ - name: Upload test results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: multi-node-test-results
+ path: '**/*.trx'
+ if-no-files-found: warn
diff --git a/Directory.Packages.props b/Directory.Packages.props
index 73e3fbc..b4e888c 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -41,6 +41,10 @@
+
+
+
+
diff --git a/Hyperbee.Migrations.slnx b/Hyperbee.Migrations.slnx
index c332121..1a62abc 100644
--- a/Hyperbee.Migrations.slnx
+++ b/Hyperbee.Migrations.slnx
@@ -4,12 +4,14 @@
+
+
@@ -40,6 +42,8 @@
+
+
diff --git a/README.md b/README.md
index a4d7649..bd6e64c 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,9 @@ The Cron Helper uses HangFire Cronos.
### Features include:
* Easy integration
-* Supports **Aerospike**, **Couchbase**, **MongoDB** and **PostgreSQL**
+* Supports **Aerospike**, **Couchbase**, **MongoDB**, **OpenSearch**, and **PostgreSQL**
* Resource Migrations
- * Migrations can be defined as embedded resource files (SQL, N1QL, AQL, MongoDB commands, JSON documents) alongside code-based migrations, enabling database changes without recompilation.
+ * Migrations can be defined as embedded resource files (SQL, N1QL, AQL, MongoDB commands, OpenSearch DDL, JSON documents) alongside code-based migrations, enabling database changes without recompilation.
* Preventing simultaneous migrations
* By default, Hyperbee Migrations prevents parallel migration runner execution.
* Profiles
diff --git a/docs/decisions/0011-hybrid-parser-runtime-injection.md b/docs/decisions/0011-hybrid-parser-runtime-injection.md
new file mode 100644
index 0000000..ed2d363
--- /dev/null
+++ b/docs/decisions/0011-hybrid-parser-runtime-injection.md
@@ -0,0 +1,60 @@
+# ADR-0011: Hybrid Parser+Runtime Injection for OpenSearch Safe Defaults
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+The OpenSearch provider must apply safe defaults to prevent silent data corruption. Two are load-bearing:
+
+- `op_type: create` injection on `REINDEX` request bodies (closes PM-3 from assessment 0002 — re-runs of a partially-completed reindex would otherwise double-write or skip new docs)
+- `dynamic: strict` injection on `CREATE INDEX` mappings (eliminates mapping explosion; per R-17 must be component-template-aware: skipped when body has `composed_of`)
+
+Two extreme architectures were rejected:
+
+1. **Pure runtime middleware** (Approach A in `/nop:propose` for this provider) — applied during request dispatch on fully-built JSON. Cannot satisfy R-18's parse-time syntactic detection of unsafe ops with file/line/recognized-verb error context; component-template detection requires a JSON-tree walk on every dispatch; UNSAFE/NO WAIT justification token validation must happen at parse anyway. Existing providers (Couchbase, Aerospike, MongoDB) use pure runtime patterns, but those providers don't face JSON-body-merging hazards at OpenSearch's scale.
+
+2. **Pure parser** (Approach B in propose) — AST emits a final correct payload; runtime is a thin transport. Cannot route logs through `SecretScrubber` (R-10/R-25), cannot emit structured WARN events from response paths, cannot observe Tasks API progress. Loses runtime observability entirely.
+
+The assessment 0002 meta-finding established that *"documentation as a fix for correctness hazards on the laziest path is anti-pattern."* Safe defaults must be enforced in code, not documented in samples. The Independent Review's pattern claim (Red-Blue₂ Phase 3.75) was validated 4-of-5 contested, demanding parser-level enforcement for `op_type: create`, component-template-aware `dynamic: strict`, and `ALIAS SWAP` atomic-precondition.
+
+The forces in tension: parse-time correctness (error messages, structural detection, AST-level intent) vs. runtime concerns (live request/response observation, secret scrubbing, structured event emission). Neither extreme satisfies the requirements.
+
+## Decision
+
+We will use a hybrid: parser owns *intent*, runtime owns *execution*.
+
+**Parser layer (Parlot, per ADR-0001) produces:**
+- AST nodes carrying safe-default flags (`op_type:create=true` on `REINDEX`, `dynamic:strict=auto` on `CREATE INDEX`)
+- Component-template-aware flag computation (`dynamic:strict=auto` resolves to off when AST body has `composed_of`)
+- Parse-time syntactic enumeration of unsafe operations (R-18) with file/index/recognized-verb error context
+- UNSAFE/NO WAIT justification token validation (non-empty reason required)
+- Semantic version comparison (R-15a) — parsed to `System.Version` at parse time
+- `MIGRATE INDEX` composite verb decomposition into `CREATE INDEX` + `REINDEX` + `ALIAS SWAP` AST nodes (R-30)
+
+**Runtime middleware layer applies:**
+- `SafeDefaultMergeMiddleware` — merges AST flags into the JSON tree during request build
+- `ImplicitWaitMiddleware` — issues scoped `_cluster/health` per `WaitMode` (R-12)
+- `TasksApiPollMiddleware` — handles `?wait_for_completion=false` flow (R-11)
+- `SecretScrubberSink` — wraps `ILogger`; redacts `SecretMarker` content-hashes from all output (R-10/R-25)
+
+The two layers communicate through the AST. The parser cannot dispatch HTTP; the runtime cannot reject ill-formed grammar.
+
+## Consequences
+
+**Easier:**
+- Parse-time errors carry full positional context (file, statement index, recognized-verb-so-far) — operators don't debug runtime stack traces for grammar issues
+- Component-template detection is structural (presence of `composed_of` key on the AST) — no fragile JSON-tree walking at runtime
+- Safe-default behavior changes are localized: new safe-default → new AST flag + new merge rule; observability changes are middleware-only
+- Consumers extending the grammar add AST nodes with flags; they don't write middleware
+- Unit tests against the parser are fast and don't require an OpenSearch container
+
+**Harder:**
+- Two layers must stay coordinated; the merge logic in middleware must correctly handle arbitrary user-supplied JSON bodies without losing AST flag intent
+- The riskiest assumption in this architecture: runtime middleware can correctly merge AST safe-default flags into user-supplied JSON. This must be validated via a Phase 1 spike before any other implementation work
+- Documentation must distinguish "parser-resolvable" decisions (compile-time) from "runtime-resolvable" decisions (dispatch-time) — failing to teach this distinction breeds confusion among future maintainers
+
+**Constrains:**
+- Any new safe-default behavior must declare its intent at the AST level (parser-resolvable) AND provide a runtime merge path
+- Extending grammar via consumer DI is a parser-side decision (Parlot grammar composition); extending observability is a middleware-side decision
+- Future ADRs about parser changes must consider whether the change requires a corresponding middleware update
diff --git a/docs/decisions/0012-with-production-defaults-extension.md b/docs/decisions/0012-with-production-defaults-extension.md
new file mode 100644
index 0000000..05f04c4
--- /dev/null
+++ b/docs/decisions/0012-with-production-defaults-extension.md
@@ -0,0 +1,61 @@
+# ADR-0012: WithProductionDefaults() Extension Method (Not Environment Profile Enum)
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+Several requirements coordinate dev-vs-prod safety defaults that must change together:
+
+- `ClusterHealthThreshold` (R-03): Yellow / Green
+- `WaitMode` (R-12): PerStatement / PerMigration
+- `RequireUnsafeJustification` (R-18): false / true
+- `ContextResolutionPolicy` (R-15): SkipIfUnset / RequireExplicit
+
+In assessment 0002's Synthesis phase (Phase 2), the proposed solution was an `EnvironmentProfile = Development | Production` enum: one operator decision would flip all four behaviors. The synthesis explicitly flagged this as load-bearing — if the maintainer rejected the enum, the entire synthesis would collapse.
+
+Independent Review (Phase 3.5) rejected the enum on three grounds:
+
+1. **Hidden coupling** — flipping `Profile` silently flips four behaviors. The operator sees `Profile = Production` and must remember (or look up) what that implies. This is the laziest-path footgun the Mechanism Design analysis explicitly warns against.
+2. **Contradicts a stated goal** — the user goal "same migrations run unchanged across all three topologies" applies to migration *files*, not DI configuration. An environment enum in DI re-introduces environment-aware switches that consumers reasoned about *not* having.
+3. **Discoverability** — an enum value is set once at config time; an extension method shows in IntelliSense at the registration site, is grep-able in code review, and is callable as part of an audit trail.
+
+Red-Blue₂ (Phase 3.75) resolved this contested point: Red (the IR's position) won; the synthesis was modified.
+
+The forces in tension: operator ergonomics (one decision flips four defaults coherently) vs lazy-path safety (no hidden coupling); maintainer simplicity (one named noun consolidates the behaviors) vs IntelliSense-level discoverability.
+
+## Decision
+
+We will provide `services.AddOpenSearchMigrations(opts => { ... }).WithProductionDefaults();` as the single forcing function for production safety defaults.
+
+The extension method explicitly sets:
+- `ClusterHealthThreshold = Green`
+- `WaitMode = PerMigration`
+- `RequireUnsafeJustification = true`
+- `ContextResolutionPolicy = RequireExplicit`
+
+Per-option settings the operator chains AFTER `WithProductionDefaults()` win — the extension does not re-apply defaults if values were explicitly set later in the chain.
+
+We will NOT provide an `EnvironmentProfile` enum. We will NOT auto-detect production environment from `DOTNET_ENVIRONMENT` / `ASPNETCORE_ENVIRONMENT` and apply defaults silently.
+
+The startup banner (R-25) emits all resolved defaults at INFO so operators verify what's set in production logs.
+
+## Consequences
+
+**Easier:**
+- Production deployments call one discoverable extension; the call site shows what changed without operators reading documentation
+- Audit trails (git blame, code review) trivially identify which deployments use production defaults
+- Resolved defaults visible in production logs (R-25 banner) so operators verify what's actually set
+- Per-option overrides chain after the extension and win cleanly — no inheritance/override magic
+- Extension method approach generalizes: future named bundles (`.WithCanaryDefaults()`, `.WithMigrationDryRunDefaults()`) follow the same pattern
+
+**Harder:**
+- Operators must explicitly call the extension; no implicit "set environment" gives prod safety
+- Developers running locally with `DOTNET_ENVIRONMENT=Production` won't get prod defaults unless they call the extension explicitly — this is intentional but requires onboarding
+- The runner project (R-26) must document the extension call in its sample `Program.cs`; new adopters who skip docs may ship dev defaults to prod
+- A future regret about explicit-only opt-in cannot be reversed without superseding this ADR
+
+**Constrains:**
+- Future "named profile" requests (Staging, Canary) must justify avoiding the same hidden-coupling concern; if added, they should be additional extension methods, not enum values
+- Per-option default changes must be reflected in the extension method's body; drift between "what's documented as production-safe" and "what the extension sets" must be tested
+- The startup banner is required for completeness — without it, the extension's effects are invisible in deployed environments
diff --git a/docs/decisions/0013-always-create-indices-with-override.md b/docs/decisions/0013-always-create-indices-with-override.md
new file mode 100644
index 0000000..532bfc7
--- /dev/null
+++ b/docs/decisions/0013-always-create-indices-with-override.md
@@ -0,0 +1,56 @@
+# ADR-0013: Always-Create Lock and Ledger Indices in InitializeAsync with Explicit Override
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+The OpenSearch provider's lock document (R-04) and migration ledger (R-06) must exist before `MigrationRunner.RunAsync` can do meaningful work. Three init strategies were considered during `/nop:propose`:
+
+1. **Always-create in `InitializeAsync`** (Approach A and C in propose) — provider performs idempotent `PUT` operations on both indices at startup; consistent with how Couchbase/Aerospike/MongoDB providers handle similar setup.
+
+2. **Provision-on-demand** (Approach B in propose) — lock index created on first `CreateLockAsync`, ledger created on first `WriteAsync`. `InitializeAsync` is light. Defers cluster errors until first use.
+
+3. **Explicit-only** — operator must call a separate `EnsureIndicesAsync()` or set up indices via deployment automation. Provider treats indices as preconditions.
+
+The forces in tension:
+
+- **Concurrent runner race window** — provision-on-demand introduces a race during the very first concurrent acquire attempt (the laziest CI matrix run is the worst case for race exposure; assessment 0002 R-24b lock contention test explicitly exercises this).
+- **AWS Managed OpenSearch IAM scoping** — production deployments may use IAM policies that grant migration runners read/write but deny `indices:admin/create`. Always-create breaks for these consumers.
+- **House-style consistency** — Couchbase/Aerospike/MongoDB always-create. Diverging here costs operator muscle memory.
+- **Bootstrap simplicity** — light `InitializeAsync` is easier to reason about than one that does multiple cluster mutations.
+
+Approach B's provision-on-demand was eliminated in propose because it introduces a race window in concurrent CI runs and defers errors that should fail at deploy-time, not first-acquire-time. Explicit-only was not seriously considered because it diverges from house style without compensating benefit.
+
+## Decision
+
+We will always create the lock and ledger indices in `InitializeAsync` with idempotent semantics:
+
+- `PUT /` with `IF NOT EXISTS` behavior; assert `number_of_replicas: 0` to eliminate replica-write coupling on the lock primary shard (PA-2 mitigation, requirement R-04)
+- `PUT /` with `IF NOT EXISTS` behavior and the strict mapping defined in R-06 (including `appliedBy`, `direction`, `failedStatementIndex` forensic fields)
+
+For consumers in tightly-scoped IAM contexts where the migration runner cannot create indices, we will provide an explicit opt-out: `OpenSearchMigrationOptions.AssumeIndicesExist` (default `false`). When `true`:
+
+- `InitializeAsync` skips creation
+- `InitializeAsync` verifies both indices exist via `HEAD /` and validates the mapping shape via `GET //_mapping`
+- Missing indices fail at startup with a remediation message naming the indices and the expected mapping
+- Mapping mismatches fail at startup with a diff summary
+
+## Consequences
+
+**Easier:**
+- Zero-race-window for lock acquisition; concurrent CI matrix runs converge on a single created index
+- Consistent with house-style provider initialization; operators in cross-provider deployments don't context-switch
+- Cluster errors (network, auth, missing permission) surface at deploy-time, not first-acquire-time
+- Backup/restore of the cluster automatically covers migration state (no out-of-band ledger setup)
+
+**Harder:**
+- Bootstrap path must handle `index_already_exists` (409) cleanly as success — easy in code, easy to test
+- Verification under `AssumeIndicesExist=true` requires a parallel mapping-shape check that is non-trivial; this code path is exercised in integration tests but is the lowest-traffic branch
+- Operators in IAM-scoped contexts must explicitly opt out; documentation must surface this as a first-class scenario in the runner project's README
+- Always-create wastes a small amount of cluster work on every deploy where indices already exist — measurable but not significant against R-07's `?refresh=wait_for` cost (R-24c measures both)
+
+**Constrains:**
+- Future schema changes to the lock or ledger indices cannot rely on auto-migration — they must be explicit migration steps because R-06's strict mapping is **immutable** per the Forbidden trust boundary. Adding fields after v1 release means a ledger reindex via `MIGRATE INDEX` (R-30)
+- The `AssumeIndicesExist` option is part of the public contract; once set, deprecating it requires a superseding ADR
+- Any future "ephemeral migration runner" mode (e.g., dry-run) must explicitly state its index-handling behavior
diff --git a/docs/decisions/0014-state-machine-facade-over-pipeline.md b/docs/decisions/0014-state-machine-facade-over-pipeline.md
new file mode 100644
index 0000000..b4fd24f
--- /dev/null
+++ b/docs/decisions/0014-state-machine-facade-over-pipeline.md
@@ -0,0 +1,75 @@
+# ADR-0014: State-Machine Façade over IBootstrapStep[] Pipeline
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+The OpenSearch provider's bootstrapper (R-02) must orchestrate cluster readiness checks, ledger init, lock-index init, and optional warmup. Three architectures were considered during `/nop:propose`:
+
+1. **Direct port of Couchbase state machine** — `CouchbaseBootstrapper`'s 7-state design, transliterated to OpenSearch states (REST ping → cluster health → ledger ready → lock ready → sacrificial query). Verbose but battle-tested in production.
+
+2. **Pure pipeline (`IBootstrapStep[]`)** — bootstrapper composed of DI-registered steps; consumers add custom steps. Cleanly testable in isolation; loses the simple house-style public contract that operators expect when reading bootstrapper logs across providers.
+
+3. **Simpler async sequence** — flat `await` calls in `InitializeAsync`. Smallest surface area but loses both testability and consumer extension points.
+
+The forces in tension:
+
+- **House-style consistency** — Couchbase's state-machine pattern is the precedent; operators reading bootstrap logs across providers benefit from a uniform shape.
+- **Internal testability** — testing the state machine end-to-end requires a real cluster; testing individual steps in isolation against mocked clients is significantly faster.
+- **Consumer extensibility** — some consumers will want to add domain-specific bootstrap behavior (e.g., custom warmup queries); a pluggable step list accommodates this without subclassing.
+- **YAGNI risk** — if no consumer ever extends the bootstrapper, the pipeline pluggability is dead weight.
+- **Public-contract simplicity** — exposing `IBootstrapStep[]` as the public bootstrap API forces every operator to learn the pipeline concept; exposing a state machine keeps the public surface small.
+
+Assessment 0002 (Phase 1 Performance Audit, PA-12 + PA-3) flagged that bootstrap `_cluster/health` storms at rolling-deploy startup are a real concern; future optimization may want to parallelize independent steps. A pipeline structure makes that trivial; a state machine makes that surgery.
+
+## Decision
+
+We will implement the bootstrapper as a state-machine façade whose internal implementation is composed of `IBootstrapStep` instances registered in DI.
+
+**Public contract** (`OpenSearchBootstrapper`):
+
+```csharp
+public sealed class OpenSearchBootstrapper {
+ public OpenSearchBootstrapper(IEnumerable steps, ...);
+ public Task RunAsync(CancellationToken ct);
+}
+
+public sealed record BootstrapResult(
+ BootstrapStatus Status,
+ IReadOnlyList Steps,
+ Exception? FailedAt
+);
+```
+
+The result projects the per-step outcomes so operators see exactly which step failed without parsing log strings.
+
+**Internal pipeline** — the default registration ships these steps in order:
+- `RestPingStep` — verifies cluster reachability
+- `ClusterHealthStep` — `_cluster/health` poll per R-03 threshold
+- `EndpointCapabilityStep` — AWS endpoint loud-fail + ISM endpoint detection (R-21)
+- `LedgerIndexInitStep` — R-06 strict mapping creation/verification
+- `LockIndexInitStep` — R-04 lock index with `number_of_replicas: 0`
+- `SacrificialQueryStep` — optional warmup (skip-able by config)
+
+Consumers extend by registering an additional `IBootstrapStep` in DI; default ordering is preserved unless the consumer explicitly opts into reordering via a position attribute.
+
+## Consequences
+
+**Easier:**
+- Each step is a small unit testable in isolation against a mocked `IOpenSearchClient` — unit suite (R-24) covers all steps without Docker
+- The state-machine façade exposes `BootstrapResult.Steps` for log aggregation; operators see which step failed at a glance
+- Consumers add custom steps by registering an additional `IBootstrapStep` — no subclassing required
+- Future parallelization (PA-12 mitigation) is internal: two independent steps can declare no `DependsOn` constraint and run concurrently without changing the public API
+- Documentation can teach the state machine *as the contract*; the pipeline is implementation detail
+
+**Harder:**
+- Two layers must stay coordinated; documentation must clarify that "extending the bootstrapper" means registering an `IBootstrapStep` in DI, not subclassing the façade
+- The pipeline-with-position-attributes ordering scheme has edge cases (consumer registers a step with a position that conflicts with a built-in step) that need explicit policy
+- Per-step error wrapping must preserve exception types so callers can pattern-match on `OpenSearchNotReadyException`, `AwsSigV4NotConfiguredException`, etc. — easy to get wrong if not designed up-front
+
+**Constrains:**
+- Future bootstrapper changes must respect that pluggable steps may declare dependencies; ordering must be deterministic and documented
+- If pipeline pluggability proves YAGNI in practice, we may seal the internal pipeline (mark it `internal sealed`) without breaking the public contract — but doing so requires a superseding ADR
+- The default step list is part of the contract; adding a step that runs by default is a breaking change for consumers who registered steps with explicit positions
+- Custom consumer steps run with the same `BootstrapContext` and `CancellationToken`; they must handle cancellation correctly and must not throw unhandled exceptions
diff --git a/docs/decisions/0015-parser-offline-pure-all-io-runtime.md b/docs/decisions/0015-parser-offline-pure-all-io-runtime.md
new file mode 100644
index 0000000..7305459
--- /dev/null
+++ b/docs/decisions/0015-parser-offline-pure-all-io-runtime.md
@@ -0,0 +1,46 @@
+# ADR-0015: Parser is Offline-Pure; All I/O is Runtime Middleware
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+ADR-0011 established a hybrid parser+runtime injection architecture: parser owns intent (AST flags, parse-time syntactic validation, justification token validation, semver comparison); runtime middleware owns execution (JSON tree merge, scoped implicit waits, Tasks API polling, secret scrubbing).
+
+During plan assessment 0003, the Independent Review identified an architectural commitment buried in R-30's `MIGRATE INDEX ... WITH TEMPLATE ` semantics that ADR-0011 did not explicitly address: the original R-30 wording suggested the parser would perform `GET /_index_template/` *at parse time* to resolve the template body. This contradicts ADR-0011's intent in three ways:
+
+1. **Offline parse becomes impossible.** Parser unit tests cannot run without a live OpenSearch cluster (or extensive mocking) — parser tests should be fast and not require Docker.
+2. **Error semantics are confused.** "Template not found at parse time" surfaces as a grammar/parse error to consumers; "template not found at execute time" surfaces as an operational error. The two should not be conflated.
+3. **The parser/runtime boundary becomes ambiguous.** ADR-0011 said "parser owns intent; runtime owns execution," but did not state explicitly that the parser performs no I/O. Implementers reading R-30 in isolation could reasonably build either architecture.
+
+The forces in tension: implementer convenience (parser doing template lookup gives early feedback) vs. architectural invariants (parser purity, test speed, predictable error semantics, clear concern boundaries).
+
+## Decision
+
+The Parlot grammar and AST construction layer is **offline-pure**: it performs no network I/O, no file I/O, and no live cluster lookups. All I/O — including `GET /_index_template/` lookups for `MIGRATE INDEX ... WITH TEMPLATE` — happens in runtime middleware immediately before the dispatched request executes.
+
+Specifically:
+
+- **Parser produces unresolved-reference AST nodes** for any value that requires live cluster state. `MIGRATE INDEX ... WITH TEMPLATE foo` produces an AST whose `CreateIndex` sub-node carries `BodySource = TemplateRef("foo")` rather than a resolved body.
+- **Runtime resolution middleware** materializes those unresolved references during request build, immediately before HTTP dispatch. Errors at this stage surface as `OpenSearchTemplateResolutionException` (or similar typed exception), not as parse errors.
+- **Parse-time errors** are restricted to grammar (malformed verb), syntactic (forbidden patterns per R-18), name-policy (reserved scope/identifier collisions per R-09), and value-shape (semver per R-15a).
+
+This is a clarifying corollary of ADR-0011, not a supersedure: ADR-0011's hybrid decision stands. ADR-0015 makes the parser/runtime boundary explicit so future verb additions don't drift across it.
+
+## Consequences
+
+**Easier:**
+- Parser unit tests run without Docker — fast feedback loop on grammar work
+- Parse errors and runtime errors have distinct, untangled error types
+- New verbs that need runtime context (e.g., `WHEN INDEX EXISTS`) follow a clear pattern: emit unresolved-reference AST, resolve at runtime
+- The "where does I/O happen?" question has one answer for every verb
+
+**Harder:**
+- Author who writes `MIGRATE INDEX ... WITH TEMPLATE foo` doesn't get parse-time feedback that `foo` doesn't exist — discovery is delayed to execution. Mitigated: error message at execute time names the template explicitly and links to documented alternatives
+- Implementers must resist the urge to "validate during parse for better UX" — every such case becomes a justification-required ADR amendment, not a casual decision
+- Some structural validations (e.g., "CREATE INDEX statement's $body actually exists") happen at parse, but reference resolution does not — implementers must distinguish "this name is a syntactic identifier" from "this name resolves to live state"
+
+**Constrains:**
+- All future verbs that need cluster state must use unresolved-reference AST + runtime middleware. No exceptions without a superseding ADR
+- The Parlot grammar definitions must not import OpenSearch.Client types for I/O (they may import value types like `IndexName` for parsing)
+- Runtime middleware exception types are part of the public contract — naming and behavior are stable
diff --git a/docs/decisions/0016-no-file-level-templating.md b/docs/decisions/0016-no-file-level-templating.md
new file mode 100644
index 0000000..f8085e2
--- /dev/null
+++ b/docs/decisions/0016-no-file-level-templating.md
@@ -0,0 +1,65 @@
+# ADR-0016: OpenSearch Provider Does Not Use File-Level Templating
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+During Phase 0 of the OpenSearch provider implementation, requirement R-10 introduced Hyperbee.Templating as a four-scope renderer that would run before the Parlot parser. The justification was that OpenSearch resource bodies (settings + mappings + properties + analyzers + ISM policies) are larger and have env-variant pieces embedded inside JSON, not at the call site — so file-level substitution / conditionals / iteration looked attractive.
+
+After Task 0.4 landed (the Templating first-contact spike) the maintainer raised a sharper question: *no other provider uses Hyperbee.Templating; why does this one?*
+
+Audit of the existing four providers confirms the divergence:
+
+| Provider | Env-variant handling |
+|---|---|
+| Aerospike | Typed options: `Namespace`, `MigrationSet`, `LockName` resolved at runtime by the resource runner |
+| Couchbase | Typed options: bucket/scope/collection identifiers; component template bodies vary by code, not by templated text |
+| MongoDB | Typed options: `DatabaseName`, `CollectionName` |
+| Postgres | Typed options: `Schema`; raw `.sql` files use Postgres-side parameter binding |
+
+None ship a templating engine. Env-variation is handled by typed `MigrationOptions` properties + per-environment `appsettings.{Environment}.json`.
+
+The forces in tension during the original decision:
+
+- **House-style consistency** vs **OpenSearch's larger body sizes**
+- **Speculative needs** (conditional sections, iteration) vs **demonstrated needs** (string substitution)
+- **In-house engine reuse** vs **first-contact bug class** (PM-5 from assessment 0002 specifically warned about this — the spike did surface 4 real first-contact issues in Hyperbee.Templating 3.4.1)
+
+The Phase 0 spike (Task 0.4) DID validate that the engine works. But validation that *something is feasible* is not the same as *justification that it should be adopted*.
+
+Re-examination shows: the only concrete need is **string substitution** (env-variant index names, replica counts, analyzer paths). Conditional sections and iteration are speculative — no current sample, no R-30 example, and no production scenario test requires them. String substitution is exactly what typed options + runtime substitution already provide in the other four providers.
+
+## Decision
+
+The OpenSearch provider does NOT use Hyperbee.Templating or any other file-level templating engine. It matches the house pattern of the other four providers:
+
+- **Env-variant values** are typed properties on `OpenSearchMigrationOptions` (e.g., `IndexPrefix`, future `ReplicaCount`)
+- **Resource files** use bracketed identifiers or sibling JSON properties that the runtime substitutes by name (the same `WITH BODY $name` pattern from R-09)
+- **Per-environment configuration** flows through `appsettings.{Environment}.json` and `IConfiguration` binding, identical to the runner pattern of the other providers
+
+Specifically, this ADR strikes/amends:
+
+- **R-10 (Hyperbee.Templating renderer)** — struck entirely
+- **R-25 SecretScrubber routing** — amended to plain structured logging; secret redaction (if needed) is a future Serilog-config concern, not a provider design concern
+- **Phase 0 Task 0.4** — work product (Templating spike code) deleted; the validation that the engine works is preserved as a learning, not as code
+- **Phase 6 Tasks 6.1, 6.2** — removed from the plan
+- **R-30 `MIGRATE INDEX` `WITH TEMPLATE`** — runtime template-body resolution still happens (per ADR-0015) but no Hyperbee.Templating involvement; the template body is a JSON document fetched from the cluster, not a rendered text artifact
+
+## Consequences
+
+**Easier:**
+- House style consistency — operators reading code across all five providers see the same env-variation pattern
+- Zero first-contact bug risk class from Hyperbee.Templating; eliminates the four documented PM-5 quirks (`{{if}}` vs `{{#if}}`, dotted-key validator override, fat-arrow rewriter limitation, missing `each n,i` index variant)
+- Smaller dependency graph — `Hyperbee.Templating` removed from `Directory.Packages.props`
+- Smaller surface area for review and maintenance
+
+**Harder:**
+- Authors who genuinely need conditional sections or iteration in resource files must either (a) write them in code via the migration class's `UpAsync`, (b) split into multiple migrations, or (c) generate the resource file at build time with their own templating tool
+- The `WHEN VERSION`/`context` runtime conditional execution (R-15) remains the only conditional mechanism; it operates on whole statements, not on JSON-body fragments
+- If a future need for conditional bodies emerges, that's a new ADR + new design — not a quiet feature add
+
+**Constrains:**
+- Re-introducing Hyperbee.Templating (or any templating engine) requires a superseding ADR with a documented use case that typed options cannot satisfy
+- Future verbs that need env-variant pieces inside their JSON bodies must follow the typed-options + runtime-substitution pattern, not introduce templating ad hoc
+- The `SecretMarker`/`SecretScrubber` design surface is removed from the provider; option-value redaction in logs (if desired) belongs at the host Serilog/ILogger configuration level, applying uniformly across all providers
diff --git a/docs/decisions/0017-body-source-grammar.md b/docs/decisions/0017-body-source-grammar.md
new file mode 100644
index 0000000..542180c
--- /dev/null
+++ b/docs/decisions/0017-body-source-grammar.md
@@ -0,0 +1,214 @@
+# ADR-0017: Body-Source Grammar — Three Resolution Forms
+
+**Status:** Accepted
+**Date:** 2026-05-02
+
+## Context
+
+The OpenSearch provider's resource format pairs each statement with an
+optional JSON body that becomes the request payload. R-09 originally
+specified body refs as **sibling properties** on the statement object:
+
+```json
+{
+ "statement": "CREATE INDEX users WITH BODY $usersIndex",
+ "usersIndex": { "settings": {...}, "mappings": {...} }
+}
+```
+
+This shape was load-bearing for early Phase-1 development — atomic
+versioning, single-file IDE validation, no external file plumbing. After
+shipping the v1 verb set and the runner+samples projects, two design
+smells surfaced during a maintainer review of the samples:
+
+1. **Heterogeneous statement objects.** A `statements[]` entry mixes
+ one well-known field (`statement`) with arbitrary other-named keys
+ that the parser interprets. JSON Schema can't usefully describe
+ that shape; tooling can't tell which keys are bodies vs. metadata
+ vs. typos.
+
+2. **No graceful path for large or reusable bodies.** Production
+ OpenSearch index mappings routinely run 200+ lines (multi-language
+ analyzers, completion suggesters, nested types, multi-field).
+ Production ISM policies (hot/warm/cold/delete with rollover, force-
+ merge, allocation requirements) run 100+ lines. Inline-only puts
+ that mass into `statements.json`; PR review becomes "find the actual
+ change in a sea of mapping JSON." Nothing supports the natural
+ "extract to file, reference by name" pattern that
+ Couchbase/Aerospike/MongoDB use for *documents* (their analogous
+ external-resource concern).
+
+A reviewer questioned the divergence from the house pattern (folder of
+JSON files mapping to collections) and flagged the lack of a structured
+body section as a smell to fix before more migrations were written
+against the original shape. The cost of changing the format grows
+quickly with adopter count; only the OpenSearch provider has shipped
+and no external consumers exist yet, so this is the cheapest moment to
+revisit.
+
+Three forces in tension:
+
+- **Atomic versioning** — statement and body should change together
+ (R-09's original rationale).
+- **PR review ergonomics** — large bodies belong in their own files so
+ diffs are scoped to the actual change.
+- **Schema validation** — the resource format should be describable to
+ IDE tooling and JSON Schema.
+
+The original sibling-property form satisfies the first force but
+nothing else. Replacing it wholesale would break ADR-0009 and force a
+migration on hypothetical future consumers. Augmenting it with new
+forms that retain the original as a back-compat case satisfies all
+three without breaking anything.
+
+## Decision
+
+We will support **three body-source resolution forms**, ranked by
+ceremony, all coexisting:
+
+### Form 1 — Direct file reference (least ceremony)
+
+```json
+{ "statement": "CREATE INDEX users WITH BODY @bodies/users-mapping.json" }
+```
+
+The path is parsed as a `BodyFileRef` AST node. Resolution loads an
+embedded resource at the given path **relative to the migration's own
+resource folder**. The file must be marked `EmbeddedResource` in the
+project's csproj — same convention as `statements.json` itself.
+
+This is the recommended form for any body that would dominate the
+`statements.json` file when inlined.
+
+### Form 2 — Named body in the `bodies` section (inline JSON)
+
+```json
+{
+ "statement": "CREATE INDEX users WITH BODY $usersIndex",
+ "bodies": {
+ "usersIndex": { "settings": {...}, "mappings": {...} }
+ }
+}
+```
+
+The parser produces a `BodyRef("usersIndex")` AST node. Resolution
+looks up `bodies.usersIndex` and uses its value verbatim. This is the
+recommended form for tiny bodies tightly coupled to a single statement.
+
+### Form 3 — Named body in the `bodies` section pointing at a file
+
+```json
+{
+ "statement": "CREATE INDEX users WITH BODY $usersIndex",
+ "bodies": {
+ "usersIndex": "@bodies/users-mapping.json"
+ }
+}
+```
+
+When the value of a `bodies.` entry is a string starting with
+`@`, the resolver treats it as a path reference and loads the
+embedded resource. Use this form when you want to address bodies by
+name (e.g., for clarity in PR review) but keep them in their own
+files. Rare in practice — form 1 covers the common case.
+
+### Back-compat (form 0) — Top-level sibling property (ADR-0009/R-09)
+
+```json
+{
+ "statement": "CREATE INDEX users WITH BODY $usersIndex",
+ "usersIndex": { "settings": {...} }
+}
+```
+
+When `bodies.` is missing, the resolver falls back to a
+top-level sibling property of the same name. Preserves the
+ADR-0009/R-09 shape for migrations written before this ADR. The
+fallback is silent — no warning — because the form was the documented
+contract; migrating existing resources is optional.
+
+### Resolution order
+
+1. `BodyFileRef` (the `@path` form): load the embedded resource, parse
+ as JSON.
+2. `BodyRef` with a `bodies` section entry: structured form wins.
+3. `BodyRef` with a sibling property: ADR-0009 fallback.
+4. None of the above: throw `InvalidOperationException` with a
+ remediation message naming both the preferred form and the
+ fallback.
+
+### Path validation (parse-time)
+
+The grammar accepts characters `[a-zA-Z0-9_\-./\\]` in `@path`.
+Validation rejects at parse time:
+
+- Absolute paths (leading `/` or `\`) — body files must be inside the
+ migration's resource folder.
+- `..` segments — no parent-directory traversal; each migration's
+ body files stay self-contained.
+
+Filenames legitimately containing dots (e.g., `users.v2.json`) are not
+mistaken for parent-traversal because the validator splits on `/` and
+checks each segment.
+
+## Consequences
+
+**Easier:**
+
+- Large bodies live in their own files. PR diffs scope to one concern.
+- Schema validation describable: a `bodies` object with named
+ values that are either inline JSON or `@`-prefixed strings.
+- The most common case (single body, lives in a file) takes one line:
+ `WITH BODY @bodies/foo.json`. No `bodies` section needed.
+- Authors learning the format see the structured `bodies` section in
+ samples first; they discover the back-compat sibling form only when
+ inheriting existing migrations.
+
+**Harder:**
+
+- The resolver has more cases to maintain (3 forms + 1 fallback).
+ Mitigated by a single `ResolveBody` helper called from both Up and
+ Down dispatch paths.
+- Authors face a small "which form do I use?" decision per body. The
+ README provides clear guidance: small inline → form 2; large or
+ reusable → form 1.
+
+**Constrained:**
+
+- Embedded resources only. No filesystem-relative paths, no absolute
+ paths, no parent traversal. Keeps `dotnet publish` boundaries
+ honest and prevents migration content from depending on runtime
+ filesystem layout.
+- File extensions are open (`.json` is conventional but not enforced)
+ — the file is parsed as JSON regardless of extension.
+
+**Backwards-compatible:**
+
+- ADR-0009/R-09 sibling-property semantics preserved as the silent
+ fallback. No existing migration needs to be rewritten.
+
+## Relation to other ADRs
+
+- **ADR-0009 (Convention-Based Record ID Generation)** — unaffected.
+ This ADR addresses body-ref resolution, not record IDs.
+- **ADR-0011 (Hybrid Parser+Runtime Injection)** — preserved. The
+ parser still owns intent (BodyRef vs BodyFileRef discrimination at
+ parse time); runtime resolves the reference to a JSON tree.
+- **ADR-0015 (Parser is Offline-Pure)** — preserved. Parsing produces
+ AST nodes carrying paths/names; no resource loading or filesystem
+ access at parse time. Embedded-resource loading is runtime concern.
+
+## Implementation
+
+- `BodySource` abstract base record with two variants: `BodyRef(Name)`
+ and `BodyFileRef(Path)`.
+- All body-bearing AST records (`CreateIndexAst`, `ReindexAst`,
+ `UpdateMappingAst`, `UpdateSettingsAst`, `CreateTemplateAst`,
+ `CreateComponentAst`, `CreatePolicyAst`) carry `BodySource? Body`.
+- Grammar's `bodyRef` parser is `OneOf(siblingBodyRef, fileBodyRef)`
+ with parse-time path validation in the `fileBodyRef` callback.
+- `OpenSearchResourceRunner.ResolveBody` is the single resolution
+ helper called from both `RunStatementsFromJsonAsync` and
+ `RollbackStatementsFromJsonAsync`.
+- Sample migrations 1, 2, 5, 6, 7, 8 use form 2; sample 3 uses form 3
+ (one body) + form 2 (others); sample 4 uses form 1.
diff --git a/docs/decisions/INDEX.md b/docs/decisions/INDEX.md
new file mode 100644
index 0000000..8aa475c
--- /dev/null
+++ b/docs/decisions/INDEX.md
@@ -0,0 +1,21 @@
+# decisions/INDEX.md
+
+| # | Title | Status | Date | Summary |
+|------|------------------------------------------------------------------------|----------|------------|------------------------------------------------------------------------------------------|
+| 0001 | [Use Parlot for Statement Parsers](0001-parlot-for-statement-parsers.md) | Accepted | 2026-04-03 | Adopt Parlot combinator parsing across providers; reject regex, ANTLR, Sprache/Pidgin |
+| 0002 | [Standardize Resource Migration Pattern for NoSQL Providers](0002-resource-migration-pattern.md) | Accepted | 2026-04-03 | StatementsFromAsync + DocumentsFromAsync pattern across NoSQL providers from JSON resources |
+| 0003 | [Provider Record Store Contract](0003-provider-record-store-contract.md) | Accepted | 2026-04-03 | Single IMigrationRecordStore interface (5 ops) abstracts provider-specific state storage |
+| 0004 | [Reflection-Based Migration Discovery with Attribute Metadata](0004-reflection-based-migration-discovery.md) | Accepted | 2026-04-03 | Discover migrations via reflection over assemblies; metadata via [Migration] attribute |
+| 0005 | [Provider-Native Distributed Locking](0005-provider-native-distributed-locking.md) | Accepted | 2026-04-03 | Each provider locks using its DB's native primitives; no external lock dependency |
+| 0006 | [Options Inheritance Hierarchy with DI Registration](0006-options-inheritance-with-di-registration.md) | Accepted | 2026-04-03 | Base MigrationOptions + per-provider subclasses; Add{Provider}Migrations DI extensions |
+| 0007 | [Lifecycle Hooks and Cron Support](0007-lifecycle-hooks-and-cron-support.md) | Accepted | 2026-04-03 | StartMethod/StopMethod hooks + Cronos-based scheduling for conditional/repeating runs |
+| 0008 | [Composable Wait/Retry Infrastructure](0008-wait-retry-infrastructure.md) | Accepted | 2026-04-03 | Strategy pattern (RetryStrategy + Backoff + Pause) for async readiness across providers |
+| 0009 | [Convention-Based Record ID Generation](0009-convention-based-record-ids.md) | Accepted | 2026-04-03 | IMigrationConventions.GetRecordId yields {version}.{normalized-name} stable identifiers |
+| 0010 | [Dual-Tier Testing Strategy (Unit + Integration with Testcontainers)](0010-dual-tier-testing-strategy.md) | Accepted | 2026-04-03 | Two-tier tests: MSTest unit + Testcontainers integration with real provider containers |
+| 0011 | [Hybrid Parser+Runtime Injection for OpenSearch Safe Defaults](0011-hybrid-parser-runtime-injection.md) | Accepted | 2026-05-02 | Parser owns intent (AST flags, parse-time detection); runtime owns execution (JSON merge, observability, secret scrub) |
+| 0012 | [WithProductionDefaults() Extension Method (Not Environment Profile Enum)](0012-with-production-defaults-extension.md) | Accepted | 2026-05-02 | Discoverable extension method replaces rejected environment-profile enum (assessment 0002 IR meta-finding) |
+| 0013 | [Always-Create Lock and Ledger Indices with Explicit Override](0013-always-create-indices-with-override.md) | Accepted | 2026-05-02 | InitializeAsync always creates indices; AssumeIndicesExist opt-out for tightly-scoped IAM contexts |
+| 0014 | [State-Machine Façade over IBootstrapStep[] Pipeline](0014-state-machine-facade-over-pipeline.md) | Accepted | 2026-05-02 | Public Couchbase-style state-machine contract; internal pluggable IBootstrapStep[] for testability and extension |
+| 0015 | [Parser is Offline-Pure; All I/O is Runtime Middleware](0015-parser-offline-pure-all-io-runtime.md) | Accepted | 2026-05-02 | Clarifying corollary of ADR-0011; resolves R-30 template lookup ambiguity by deferring all I/O (including template body resolution) to runtime middleware |
+| 0016 | [OpenSearch Provider Does Not Use File-Level Templating](0016-no-file-level-templating.md) | Accepted | 2026-05-02 | Strikes R-10; matches Aerospike/Couchbase/MongoDB/Postgres house style (typed options + runtime substitution); deletes Phase 0 Task 0.4 work; removes Hyperbee.Templating dependency |
+| 0017 | [Body-Source Grammar — Three Resolution Forms](0017-body-source-grammar.md) | Accepted | 2026-05-02 | `WITH BODY @path` direct file reference + `bodies.` structured section + ADR-0009 sibling-property fallback for back-compat; parse-time path validation rejects absolute paths and `..` traversal |
diff --git a/docs/design/INDEX.md b/docs/design/INDEX.md
new file mode 100644
index 0000000..1472f98
--- /dev/null
+++ b/docs/design/INDEX.md
@@ -0,0 +1,5 @@
+# design/INDEX.md
+
+| # | Title | Status | Date | Summary |
+|--------------------|------------------------------------------------------------------------|-----------|------------|------------------------------------------------------------------------------------------|
+| opensearch-provider | [OpenSearch Provider — Pragmatic Hybrid Architecture](opensearch-provider.md) | Proposed | 2026-05-02 | Selected hybrid parser+runtime injection; state-machine façade over IBootstrapStep[] pipeline; always-create indices with override; WithProductionDefaults() extension. Recommends ADRs 0011-0014 |
diff --git a/docs/design/opensearch-provider.md b/docs/design/opensearch-provider.md
new file mode 100644
index 0000000..822905b
--- /dev/null
+++ b/docs/design/opensearch-provider.md
@@ -0,0 +1,209 @@
+# Design: OpenSearch Provider — Pragmatic Hybrid Architecture
+
+**Status:** Proposed
+**Date:** 2026-05-02
+**Requirements:** [docs/requirements/opensearch-provider.md](../requirements/opensearch-provider.md)
+**Research:** [docs/research/0001-opensearch-provider.md](../research/0001-opensearch-provider.md)
+**Assessment:** [docs/research/0002-opensearch-provider-assessment.md](../research/0002-opensearch-provider-assessment.md)
+
+## Selected Approach
+
+**Pragmatic Hybrid.** Parser owns *intent* (AST enrichment, syntactic safety detection, grammar-level safe-default flags); runtime owns *execution* (request-body merge, observability, secret scrubbing, response handling). The bootstrapper presents a Couchbase-style state-machine *façade* over an internal `IBootstrapStep[]` pipeline — simple external contract, testable internal composition. Lock and ledger indices are always-created during `InitializeAsync` with an explicit `AssumeIndicesExist` opt-out for tightly-scoped IAM contexts.
+
+## Fitness Evaluation Summary
+
+| Candidate | Req. Compliance | ADR Compliance | Temporal | Interface | Scale | Design | Overall |
+|-----------|----------------|----------------|----------|-----------|-------|--------|---------|
+| A: Couchbase-Clone (runtime middleware only, full state machine, always-create) | ~85% | ✓ all | Medium | Medium | Medium | Moderate | Moderate |
+| B: Parser-First Composition (parser-only, pipeline-only, provision-on-demand) | ~82% | ✓ all | High | Small | High | Clean | Moderate |
+| **C: Pragmatic Hybrid** | **~96%** | ✓ all | High | Small | High | Clean | **Strong** |
+
+C dominates because the requirements *force* a hybrid: R-08a (`op_type: create` injection), R-17 (component-template-aware `dynamic: strict`), and R-18 (parse-time syntactic unsafe-op detection) all require parser-level work; R-25 (structured event emission) requires runtime work. Pure runtime (A) loses parse-time error message contracts; pure parser (B) cannot observe live request/response. Hybrid is the only architecture that satisfies both classes natively.
+
+**Note (post-Phase-0):** R-10 (Hyperbee.Templating renderer) was struck per [ADR-0016](../decisions/0016-no-file-level-templating.md) — env-variation flows through typed options, matching the other four providers. The architecture below has been amended to remove the Templating Renderer block and the SecretScrubberSink that depended on it. The hybrid argument still stands on the parse-time-detection / runtime-middleware split.
+
+## Architecture
+
+### Component sketch
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│ Application │
+│ services.AddOpenSearchMigrations(opts => { ... }) │
+│ .WithProductionDefaults() ← (extension method) │
+└─────────────────────────────────────────────────────────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────────────────────────────────────────┐
+│ MigrationRunner (core, ADR-0003) │
+│ InitializeAsync → CreateLockAsync → discover → run → journal │
+└─────────────────────────────────────────────────────────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────────────────────────────────────────┐
+│ OpenSearchRecordStore : IMigrationRecordStore │
+│ ┌──────────────────────────────────────────────────────────────────┐ │
+│ │ OpenSearchBootstrapper (state-machine façade) │ │
+│ │ ┌──────────────────────────────────────────────────────┐ │ │
+│ │ │ IBootstrapStep[] pipeline (DI-registered) │ │ │
+│ │ │ • RestPingStep │ │ │
+│ │ │ • ClusterHealthStep (uses R-03 threshold) │ │ │
+│ │ │ • EndpointCapabilityStep (AWS detection — R-21) │ │ │
+│ │ │ • LedgerIndexInitStep (R-06 strict mapping) │ │ │
+│ │ │ • LockIndexInitStep (number_of_replicas: 0 — R-04) │ │ │
+│ │ │ • SacrificialQueryStep (warmup) │ │ │
+│ │ └──────────────────────────────────────────────────────┘ │ │
+│ └──────────────────────────────────────────────────────────────────┘ │
+│ ┌──────────────────────────────────────────────────────────────────┐ │
+│ │ LockHandle : IDisposable (auto-renew per R-05) │ │
+│ │ • CAS via if_seq_no/if_primary_term │ │
+│ │ • Heartbeat timer (LockRenewInterval) │ │
+│ │ • Realtime GET on takeover (NF-1, PM-1) │ │
+│ │ • CancellationToken cancelled on LockMaxLifetime (PM-12) │ │
+│ └──────────────────────────────────────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────────────────────┘
+ │
+ ▼
+┌─────────────────────────────────────────────────────────────────────────────┐
+│ Statement Pipeline │
+│ (Per ADR-0016: no file-level templating renderer — resource files are │
+│ consumed by the Parlot parser directly. Env-variation is handled by │
+│ typed OpenSearchMigrationOptions + IConfiguration.) │
+│ │
+│ ┌──────────────────────────────────────────────────────────────────┐ │
+│ │ Parlot Statement Parser (PARSE-TIME — R-08, R-09) │ │
+│ │ • Verb grammar (R-08a) │ │
+│ │ • Sibling $body resolution │ │
+│ │ • Reserved namespace policy (MD-3) │ │
+│ │ • Syntactic unsafe-op enumeration (R-18) │ │
+│ │ • UNSAFE("...") / NO WAIT("...") justification token check │ │
+│ │ • Semantic version comparator (R-15a) │ │
+│ │ • AST nodes carry safe-default flags: │ │
+│ │ - op_type:create=true (REINDEX) │ │
+│ │ - dynamic:strict=auto (CREATE INDEX, skip on composed_of) │ │
+│ │ • MIGRATE INDEX composite (R-30) decomposed at parse time │ │
+│ │ into CREATE INDEX + REINDEX + ALIAS SWAP AST nodes │ │
+│ └──────────────────────────────────────────────────────────────────┘ │
+│ │ │
+│ ▼ │
+│ ┌──────────────────────────────────────────────────────────────────┐ │
+│ │ Statement Compiler (AST → IRequest) │ │
+│ │ • Translates AST verb to OpenSearchClient request shape │ │
+│ │ • Resolves $body sibling JSON object │ │
+│ └──────────────────────────────────────────────────────────────────┘ │
+│ │ │
+│ ▼ │
+│ ┌──────────────────────────────────────────────────────────────────┐ │
+│ │ Runtime Request Middleware (RUN-TIME) │ │
+│ │ • SafeDefaultMergeMiddleware — applies AST safe-default flags │ │
+│ │ to the JSON tree (op_type, dynamic) before serialization │ │
+│ │ • ImplicitWaitMiddleware — issues scoped _cluster/health call │ │
+│ │ post-statement per WaitMode (R-12) │ │
+│ │ • TasksApiPollMiddleware — handles wait_for_completion=false │ │
+│ │ (R-11) with progress threshold logging │ │
+│ │ • (No SecretScrubberSink per ADR-0016 — host Serilog config │ │
+│ │ handles option-value redaction if needed) │ │
+│ └──────────────────────────────────────────────────────────────────┘ │
+│ │ │
+│ ▼ │
+│ OpenSearchClient │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+### Key interfaces
+
+```csharp
+// Public extension surface
+public static class OpenSearchMigrationsExtensions {
+ public static IServiceCollection AddOpenSearchMigrations(
+ this IServiceCollection services,
+ Action configure);
+
+ public static IServiceCollection WithProductionDefaults(
+ this IServiceCollection services); // R-29
+}
+
+// Bootstrapper (façade)
+public sealed class OpenSearchBootstrapper {
+ public OpenSearchBootstrapper(IEnumerable steps, /* ... */);
+ public async Task RunAsync(CancellationToken ct);
+}
+
+// Pluggable pipeline step
+public interface IBootstrapStep {
+ string Name { get; }
+ Task ExecuteAsync(BootstrapContext ctx, CancellationToken ct);
+}
+
+// Lock handle
+public sealed class LockHandle : IAsyncDisposable {
+ public CancellationToken LockExpired { get; } // cancelled on LockMaxLifetime
+ public Task RenewLoopAsync(CancellationToken ct);
+}
+
+// AST safe-default flag carriers (parser output)
+internal abstract record StatementAst {
+ public required string Verb { get; init; }
+ public required JsonNode? Body { get; init; }
+ public required IReadOnlyDictionary SafeDefaults { get; init; }
+}
+
+// Runtime middleware contract
+internal interface IStatementMiddleware {
+ Task InvokeAsync(StatementContext ctx, StatementDelegate next);
+}
+```
+
+### Data flow (single statement, end-to-end)
+
+1. `MigrationRunner.RunAsync` → `OpenSearchRecordStore.InitializeAsync` → `OpenSearchBootstrapper.RunAsync` → each `IBootstrapStep` executes; failure on any step aborts with typed exception
+2. `MigrationRunner` discovers migration class, constructs it; calls `UpAsync`
+3. Migration loads `statements.json` resource; provider passes file content directly to the Parlot parser (no templating renderer — per ADR-0016)
+4. Parlot parser produces `StatementAst[]`; safe-default flags computed at parse; UNSAFE/NO WAIT justification tokens validated; unsafe-op detection runs; version comparators parsed semantically
+5. For each AST node: `StatementCompiler` builds an `IRequest`; runtime middleware chain processes (`SafeDefaultMergeMiddleware` merges flags into JSON tree → `ImplicitWaitMiddleware` runs scoped health check post-execute → `TasksApiPollMiddleware` polls if applicable)
+6. All logs / exceptions emit structured events; option-value redaction (if needed) is configured at the host Serilog/ILogger sink layer (per ADR-0016, not provider-specific)
+7. `MigrationRunner` calls `OpenSearchRecordStore.WriteAsync(record)` — CAS write with `?refresh=wait_for` and forensic fields (`appliedBy`, `direction`)
+8. `LockHandle.DisposeAsync` releases lock
+
+### Distribution
+
+- `src/Hyperbee.Migrations.Providers.OpenSearch/` — provider library
+- `runners/Hyperbee.MigrationRunner.OpenSearch/` — standalone runner (R-26)
+- `runners/samples/Hyperbee.Migrations.OpenSearch.Samples/` — verb showcase (R-27)
+- `tests/Hyperbee.Migrations.Integration.Tests/OpenSearch/` — integration tests; multi-node Compose harness (R-28b is now Must)
+
+## Key Decisions (recorded ADRs)
+
+These decisions cross the ADR threshold (reversal would touch multiple components):
+
+1. **[ADR-0011](../decisions/0011-hybrid-parser-runtime-injection.md): Hybrid parser+runtime injection for OpenSearch safe defaults** — parser owns intent (AST flags + parse-time enumeration), runtime owns merge (JSON tree mutation during request build). Reversal would touch every safe-default verb plus all observability hooks.
+2. **[ADR-0012](../decisions/0012-with-production-defaults-extension.md): `WithProductionDefaults()` extension method instead of `EnvironmentProfile` enum** — driven by the IR's hidden-coupling concern in assessment 0002. Reversal would change the entire DI surface for the provider.
+3. **[ADR-0013](../decisions/0013-always-create-indices-with-override.md): Always-create lock and ledger indices in `InitializeAsync` with explicit override** — `AssumeIndicesExist` option for tightly-scoped IAM contexts. Reversal would change the contract of `InitializeAsync` and affect lock-acquire path performance.
+4. **[ADR-0014](../decisions/0014-state-machine-facade-over-pipeline.md): State-machine façade over `IBootstrapStep[]` pipeline** — public API matches Couchbase house style; internal composition is testable and replaceable. Reversal would either flatten the pipeline (breaking testability) or expose the pipeline (breaking the simple public contract).
+5. **[ADR-0015](../decisions/0015-parser-offline-pure-all-io-runtime.md): Parser is offline-pure; all I/O is runtime middleware** — clarifying corollary of ADR-0011. Resolves R-30 template-lookup ambiguity. Future verbs that need cluster state must use unresolved-reference AST + runtime middleware.
+6. **[ADR-0016](../decisions/0016-no-file-level-templating.md): OpenSearch provider does not use file-level templating** — strikes R-10; matches Aerospike/Couchbase/MongoDB/Postgres house style. Re-introducing templating requires a superseding ADR.
+
+## Rejected Approaches
+
+- **Approach A — Couchbase-Clone (runtime middleware only):** Lost on requirements compliance (~85%). Pure runtime middleware sees fully-built JSON and cannot satisfy R-08a/R-17/R-18's parse-time error contracts. Component-template detection (`composed_of` presence in AST vs JSON tree walk) is harder at runtime; UNSAFE token validation must happen at parse anyway. State machine alone (no pipeline) is verbose and harder to test in isolation than the façade-over-pipeline shape C adopts.
+- **Approach B — Parser-First Composition (parser only, provision-on-demand, IBootstrapStep pipeline):** Lost on requirements compliance (~82%) and lock-init race. Pure parser cannot route logs through SecretScrubber (R-25); cannot emit structured WARN events from response paths; cannot observe Tasks API progress. Provision-on-demand for lock index introduces a race window during the very first concurrent acquire (the laziest CI matrix run becomes the worst case for race exposure). Pipeline-only public API loses the simple Couchbase-shaped contract that house-style consistency demands.
+
+## Risks and Open Questions
+
+### Riskiest assumption (validate early)
+
+**The runtime middleware can correctly merge AST safe-default flags into arbitrary user-supplied JSON bodies.** Specifically: `op_type: create` injection on `_reindex` request bodies that already contain a `dest` object; `dynamic: strict` injection into `mappings.properties` when only `mappings` is present at the top level; preservation of an existing `dynamic: true` set explicitly by the author. This must be the first integration test written — it validates the parser/runtime split before any other component is built. If the merge logic is fragile, the architecture's primary advantage collapses.
+
+### Other open questions worth surfacing
+
+- **Pipeline parallelism within bootstrapper:** the `IBootstrapStep[]` pipeline could run independent steps (ledger + lock init) in parallel. Worth doing? If yes, step dependencies must be declared (`DependsOn` attribute or topological sort). If no, the linear sequential model is simpler. Recommend **linear in v1** unless a concrete bottleneck emerges in R-24c's measured-cost test.
+- **Middleware ordering:** if a consumer adds a custom `IStatementMiddleware`, the position in the chain matters. Need a documented order convention (`Order` attribute) and a test that asserts the built-in middleware order.
+- **`AssumeIndicesExist = true` validation:** when set, `InitializeAsync` skips create but does it *verify* the indices exist with the expected mapping? Recommend yes — verification is cheap; silent acceptance of missing indices is worse than the cost.
+- ~~Hyperbee.Templating + SecretMarker integration~~ — REMOVED per ADR-0016. The first-contact bug class PM-5 worried about is fully eliminated by not adopting the engine.
+- **State-machine façade observability:** the public `BootstrapResult` should expose per-step status for log aggregation. Recommend enumerating the steps in `BootstrapResult.Steps` so operators can see exactly which step failed without parsing log strings.
+
+## Recommended next steps
+
+1. **Run `/nop:adr` four times** to materialize ADRs 0011-0014 (or run `/nop:adr derive` to mine them from this spec in one pass)
+2. **Run `/nop:plan`** to decompose into phased tasks. Suggest first phase = riskiest-assumption validation: parser AST + runtime middleware merge logic + tests against representative bodies (the validation listed above)
+3. **Optional:** `/nop:assess` on this design before planning — the design is mid-stakes (production-capable provider but with mature precedent in Couchbase). Stakes don't justify a second Full Assessment, but a `/nop:red-blue` pass on the design could catch design-level gold-plating before plan-time
diff --git a/docs/plans/active/INDEX.md b/docs/plans/active/INDEX.md
new file mode 100644
index 0000000..894c167
--- /dev/null
+++ b/docs/plans/active/INDEX.md
@@ -0,0 +1,6 @@
+# plans/active/INDEX.md
+
+| Plan | Title | Status | Created | Summary |
+|------|-------|--------|---------|---------|
+
+_No active plans. See `../archive/` for completed plans._
diff --git a/docs/plans/archive/2026-05-opensearch-provider.md b/docs/plans/archive/2026-05-opensearch-provider.md
new file mode 100644
index 0000000..a0913f8
--- /dev/null
+++ b/docs/plans/archive/2026-05-opensearch-provider.md
@@ -0,0 +1,384 @@
+# Plan: OpenSearch Provider for Hyperbee.Migrations
+
+**Status:** Active
+**Created:** 2026-05-02 (collapsed from 8-phase to 4-phase after assessment 0003 calibration)
+**Branch:** `devs/bfarmer/provider-opensearch`
+**Inputs:**
+- Requirements: [docs/requirements/opensearch-provider.md](../../requirements/opensearch-provider.md) (31 testable requirements)
+- Design: [docs/design/opensearch-provider.md](../../design/opensearch-provider.md) (Pragmatic Hybrid)
+- Research: [0001](../../research/0001-opensearch-provider.md), [0002](../../research/0002-opensearch-provider-assessment.md), [0003](../../research/0003-opensearch-plan-assessment.md)
+- ADRs: 0001-0015 (especially 0011-0015 for this provider)
+
+## Velocity calibration
+
+This plan is sized to the maintainer's actual velocity:
+- Aerospike provider (with auto-renewing lock + Parlot grammar) shipped in **1 day**
+- Couchbase provider (most complex, 7-state bootstrapper + N1QL grammar) shipped in **under 1 week**
+
+Realistic estimate: **3-7 days of focused work** for the core provider, **1-2 days polish**. The plan structure follows that cadence.
+
+## Objective
+
+Build a production-capable OpenSearch provider satisfying all 31 requirements and complying with all 15 ADRs:
+
+- Zero data loss during reindex/alias swaps
+- No permanent lockouts from crashed runners
+- Same migrations run unchanged across single-node dev, multi-node CI, AWS Managed (scheduled)
+- Parser-level safe defaults per ADR-0011 (`op_type: create`, component-template-aware `dynamic: strict`)
+- Parser is offline-pure; all I/O in runtime middleware per ADR-0015
+- `WithProductionDefaults()` extension surface per ADR-0012
+- Always-create indices with `AssumeIndicesExist` override per ADR-0013
+- State-machine façade over `IBootstrapStep[]` pipeline per ADR-0014
+
+## Style Reference
+
+Citations across 6 patterns (≥10 file:line refs).
+
+### Pattern 1 — Auto-renewing lock with TimeProvider (R-04, R-05, ADR-0005)
+
+- **CAS acquire**: [AerospikeRecordStore.cs:53-90](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs#L53-L90) — `WritePolicy.recordExistsAction = CREATE_ONLY` is server-enforced atomicity; `KEY_EXISTS_ERROR` translates to `MigrationLockUnavailableException`. **OpenSearch analogue:** `if_seq_no`/`if_primary_term` returning 409 → `MigrationLockUnavailableException` (per ADR-0011 + R-04).
+- **Heartbeat renewal loop**: [AerospikeRecordStore.cs:92-144](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs#L92-L144) — uses `Task.Delay(interval, _timeProvider, ct)` for test-time virtualization; deadline check enforces `LockMaxLifetime`; transient errors logged but not re-thrown (TTL provides recovery buffer). **OpenSearch must extend this**: per R-05 + NF-1 from assessment 0002, OpenSearch heartbeat must use `realtime: true` GET on takeover (refresh-lag would otherwise produce false-takeovers).
+- **LockHandle disposal**: [AerospikeRecordStore.cs:199-244](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs#L199-L244) — `Interlocked.CompareExchange` for idempotent dispose; cancels renew before deleting record; logs critical on cleanup failure.
+- **Parameter validation (sample)**: [AerospikeRecordStore.cs:44-48](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs#L44-L48) — only validates `LockRenewInterval < LockExpireInterval`. **OpenSearch must add** `LockStaleAfter ≥ 2 * LockRenewInterval` per R-05.
+- **Options shape**: [AerospikeMigrationOptions.cs:17-44](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeMigrationOptions.cs#L17-L44) — `LockExpireInterval` (60s), `LockRenewInterval` (30s), `LockMaxLifetime` (1h). OpenSearch will rename `LockExpireInterval` → `LockStaleAfter` for clarity.
+
+### Pattern 2 — Multi-state bootstrapper (ADR-0014, R-02)
+
+- **State-machine façade**: [CouchbaseBootstrapper.cs:36-67](../../../src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs#L36-L67) — single public `WaitForSystemReadyAsync(TimeSpan? timeout, CancellationToken)`; uses `TimeoutTokenSource` + linked CTS; sequential `WaitForCluster` → `WaitForBuckets` → `Warmup`.
+- **6-state cluster wait**: [CouchbaseBootstrapper.cs:91-180](../../../src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs#L91-L180) — Start → WaitForUri → StateUriReady → WaitForHealthy → StateHealthy → WaitForReady; explicit 5s sleep at StateHealthy works around the SDK bootstrap race. **OpenSearch's analogue**: per ADR-0014 we wrap `IBootstrapStep[]` with this state-machine shape, exposing `BootstrapResult.Steps` for diagnostics.
+- **Notify interval pattern**: [CouchbaseBootstrapper.cs:28-34](../../../src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs#L28-L34) — bounded by `Math.Min(timeoutSeconds, reportSeconds)`; logs progress at interval without blocking actual operation timeout.
+- **Sacrificial query warmup**: [CouchbaseBootstrapper.cs:214-235](../../../src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs#L214-L235) — first `system:*` query after hard shutdown returns unpredictable results; this query primes N1QL. OpenSearch analogue: optional final step (skip-able) that primes a known system index.
+
+### Pattern 3 — Parlot grammar (ADR-0001, R-08)
+
+- **`static readonly Parser` cache**: [StatementParser.cs:35](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs#L35) — parser built once at class load (PA-8 already pattern-encoded; satisfies ADR-0011 spike test).
+- **Keyword definitions**: [StatementParser.cs:40-62](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs#L40-L62) — `Terms.Text("CREATE", caseInsensitive: true)` for SQL-style keywords. OpenSearch reuses this exactly.
+- **Identifier with backtick escape**: [StatementParser.cs:69-73](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs#L69-L73) — `Between(Terms.Char('`'), pattern, Terms.Char('`')).Or(plainIdentifier)` — OpenSearch index names with dots/dashes need this same shape.
+- **Composed reference grammars with disambiguation**: [StatementParser.cs:88-110](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs#L88-L110) — `keyspaceRef = OneOf(keyspaceNs3, keyspace3, ..., keyspace1)` for 1/2/3-part graceful disambiguation.
+- **Statement disambiguation order**: [StatementParser.cs:286-301](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs#L286-L301) — `createPrimaryIndex` BEFORE `createIndex` (both start with CREATE) — order matters in `OneOf`. OpenSearch will need similar care for `CREATE INDEX` vs `CREATE TEMPLATE` vs `CREATE COMPONENT` vs `CREATE POLICY`.
+- **Public parse entry**: [StatementParser.cs:304-314](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs#L304-L314) — `TryParse` + throw `NotSupportedException` with full statement. **OpenSearch must do better** per assessment 0002 — include file/index/recognized-verb in error.
+
+### Pattern 4 — DI registration (ADR-0006, ADR-0012)
+
+- **Two-overload entrypoint**: [Aerospike/ServiceCollectionExtensions.cs:12-20](../../../src/Hyperbee.Migrations.Providers.Aerospike/ServiceCollectionExtensions.cs#L12-L20) — no-config + `Action` overloads delegate to private with caller `Assembly`.
+- **Options factory closure**: [Aerospike/ServiceCollectionExtensions.cs:24-52](../../../src/Hyperbee.Migrations.Providers.Aerospike/ServiceCollectionExtensions.cs#L24-L52) — factory builds options with `DefaultMigrationActivator(provider)`, applies user config, merges `IConfiguration` `Migrations:FromAssemblies`/`FromPaths` with code assemblies, deduplicates, defaults to caller.
+- **Singleton registrations**: [Aerospike/ServiceCollectionExtensions.cs:54-62](../../../src/Hyperbee.Migrations.Providers.Aerospike/ServiceCollectionExtensions.cs#L54-L62) — `OptionsType` singleton, upcast to `MigrationOptions` for runner, `IMigrationRecordStore` singleton, `MigrationRunner` singleton, resource runner generic transient, `TryAddSingleton(TimeProvider.System)`. **OpenSearch adds**: `IBootstrapStep[]` registrations (per ADR-0014), `WithProductionDefaults()` extension that mutates options post-registration (per ADR-0012).
+- **IConfiguration helper**: [Aerospike/ServiceCollectionExtensions.cs:65-66](../../../src/Hyperbee.Migrations.Providers.Aerospike/ServiceCollectionExtensions.cs#L65-L66) — `GetEnumerable` returns empty for missing sections (defensive).
+
+### Pattern 5 — Options inheritance (ADR-0006)
+
+- **Base + provider-specific shape**: [AerospikeMigrationOptions.cs:3](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeMigrationOptions.cs#L3) — `class AerospikeMigrationOptions : MigrationOptions`.
+- **Default-named constants**: [AerospikeMigrationOptions.cs:5-7](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeMigrationOptions.cs#L5-L7) — `public const string DefaultNamespace = "test"` style.
+- **Two-constructor pattern**: [AerospikeMigrationOptions.cs:29-44](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeMigrationOptions.cs#L29-L44) — parameterless ctor delegates to activator overload; activator overload sets defaults.
+- **Deconstruct convenience**: [AerospikeMigrationOptions.cs:46-51](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeMigrationOptions.cs#L46-L51) — tuple unpacking for ergonomic access.
+
+### Pattern 6 — Project file shape (ADR-0006, R-21)
+
+- **Csproj template**: [Hyperbee.Migrations.Providers.Aerospike.csproj:21-31](../../../src/Hyperbee.Migrations.Providers.Aerospike/Hyperbee.Migrations.Providers.Aerospike.csproj#L21-L31) — central package management (versions implicit at solution level), ``, ``, `PackageId`/`Authors`/license metadata, `InternalsVisibleTo` for unit tests, `` to core, `` for client SDKs + DI/Hosting/Logging abstractions + Parlot. OpenSearch project mirrors this exactly with `OpenSearch.Client` substituting for `Aerospike.Client`; AwsSigV4 NuGet is opt-in (separate package or conditional reference per ADR-0011).
+
+### Anti-patterns to avoid (extracted from audit)
+
+- **Don't dispatch network I/O from the parser** (per ADR-0015). Aerospike/Couchbase parsers don't; OpenSearch's `MIGRATE INDEX ... WITH TEMPLATE` must produce an `unresolved-reference` AST node — runtime middleware resolves the template body.
+- **Don't bare-`UNSAFE`** — Couchbase has nothing like this, but OpenSearch's `UNSAFE` and `NO WAIT` modifiers must require non-empty justification per R-18 (assessment 0002 MD-2).
+- **Don't fold safe-default injection into runtime middleware alone** — assessment 0002 PM-3, PM-4, MD-9 prove parser-level enforcement is required (per ADR-0011 hybrid).
+- **Don't return null from `IMigrationRecordStore.ReadAsync` without doc**: [AerospikeRecordStore.cs:165-166](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs#L165-L166) returns null — works today because no caller hits that path; OpenSearch should match for contract consistency.
+
+## Git workflow
+
+| Phase | Snapshot tag | When taken |
+|-------|--------------|------------|
+| 0 | `opensearch/phase-0-spike-validated` | After Phase 0 (scaffold + spike) — gate before further work |
+| 1 | `opensearch/phase-1-foundation` | After foundation + foundation verbs work end-to-end |
+| 2 | `opensearch/phase-2-atomic-composite` | After REINDEX/ALIAS/MIGRATE/templates/cross-cutting features land |
+| 3 | `opensearch/phase-3-shippable` | After distribution + multi-topology CI green |
+
+Branch: `devs/bfarmer/provider-opensearch` from `main`. Per-phase PRs.
+
+---
+
+## Phase 0: Scaffold + Risk-First Spike
+
+**Goal:** Project structure exists; harness boots; **the riskiest assumption (parser-emitted AST safe-default flags merge cleanly into arbitrary user-supplied JSON bodies) is validated against real OpenSearch.** If the spike fails, ADR-0011 needs revision and Approach A (runtime-middleware-only — see design rejected approaches) becomes the documented fallback.
+
+**Estimated effort:** Half a day to one day.
+
+**Completion Criteria:**
+- Solution builds clean across all four projects (provider, runner, samples, tests)
+- Style Reference section populated with ≥10 file:line citations across ≥4 patterns
+- Single-node Testcontainers harness boots; cluster reaches yellow
+- 10 representative spike tests pass against real OpenSearch (5 CREATE INDEX shapes + 5 REINDEX shapes — see kill criterion below)
+- Phase 0 snapshot tagged
+
+**Phase 0 kill criterion (verbatim per assessment 0003 / A8):**
+> *Merge logic cannot deterministically produce expected JSON without ambiguity for any of the 5 documented edge cases.*
+
+If this fires, escalate per `/nop:debug` and consider whether ADR-0011 needs superseding before Phase 1 starts. **Fallback architecture:** Approach A (Couchbase-Clone, runtime middleware only) per design rejected approaches. AST types and grammar (Tasks 0.3, 0.4) remain reusable; only the merge middleware (Task 0.5) becomes rework.
+
+### Tasks
+
+#### 0.1: Codebase audit + Style Reference (promoted to first task per A4)
+
+Audit existing providers; populate the Style Reference section above with concrete citations. Without this, downstream "follow existing pattern" claims are unverifiable.
+
+- [x] Read [AerospikeRecordStore.cs](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs) — auto-renewing lock pattern, TimeProvider injection
+- [x] Read [CouchbaseBootstrapper.cs](../../../src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs) — state-machine pattern
+- [x] Read [Couchbase StatementParser.cs](../../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs) — Parlot grammar shape
+- [x] Read [Aerospike/ServiceCollectionExtensions.cs](../../../src/Hyperbee.Migrations.Providers.Aerospike/ServiceCollectionExtensions.cs) — DI pattern
+- [x] Read [AerospikeMigrationOptions.cs](../../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeMigrationOptions.cs) — options inheritance
+- [x] Read [Aerospike csproj](../../../src/Hyperbee.Migrations.Providers.Aerospike/Hyperbee.Migrations.Providers.Aerospike.csproj) — project file shape
+- [x] Populate Style Reference section: 6 patterns, ≥20 file:line citations, anti-patterns extracted
+
+#### 0.2: Project scaffolding
+
+**Scope-trimmed**: only the provider library is needed for Phase 0/1 work. Runner project + Samples project are deferred to Phase 3 (Distribution) where they belong with the other distribution work. Existing test projects (`tests/Hyperbee.Migrations.Tests`, `tests/Hyperbee.Migrations.Integration.Tests`) get OpenSearch subdirectories — no new test csproj needed.
+
+- [x] Create `src/Hyperbee.Migrations.Providers.OpenSearch/` provider library — net10.0;net9.0;net8.0 (inherited from Directory.Build.props), Apache 2.0
+- [x] Add NuGet versions to `Directory.Packages.props`: `OpenSearch.Client` 1.8.0, `OpenSearch.Net` 1.8.0, `OpenSearch.Net.Auth.AwsSigV4` 1.8.0 (used in Phase 3)
+- [x] Add to `Hyperbee.Migrations.slnx`; `dotnet build` clean (provider library: 0 warnings, 0 errors across net8/9/10)
+- [x] Initial source files: `OpenSearchMigrationOptions.cs` (with WaitMode, ClusterHealthThreshold, ContextResolutionPolicy enums + lock parameters), `ServiceCollectionExtensions.cs` (`AddOpenSearchMigrations` + `WithProductionDefaults` scaffolded; full impl in Phase 6), README.md
+- [x] ~~Hyperbee.Templating package reference~~ — added then removed per ADR-0016 (see Task 0.4)
+- [x] **Defer**: Testcontainers OpenSearch image setup — moved to Task 0.3
+
+#### 0.3: Single-node Testcontainers harness + hello-world
+
+- [x] `OpenSearchTestContainer.cs` mirroring Aerospike harness shape — `discovery.type=single-node`, security plugin disabled, mapped 9200, captures both `IOpenSearchClient` (high-level) and `OpenSearchLowLevelClient` (low-level for raw HTTP)
+- [x] Hello-world test (`OpenSearchHarnessTest.HelloWorld_ClusterHealthYellowOrGreen`): gated by `#if INTEGRATIONS` per ADR-0010; calls `Cluster.HealthAsync()` and asserts `status` is yellow or green
+- [x] Version-support contract documented in `OpenSearchTestContainer.cs` header (per A11/NF-6): tested 2.18.0, minimum 2.0.0, AWS Managed caveat about ISM endpoint path
+- [x] OpenSearch container added to `InitializeTestContainers.AssemblyInitialize`
+- [x] `dotnet build` clean (0 errors; 27 warnings, all pre-existing CS0618 plus 1 matching one in my code per house style)
+
+#### 0.4: ~~Hyperbee.Templating first-contact spike~~ — **REVERTED per ADR-0016**
+
+Spike was completed by a parallel sub-agent and then removed wholesale per ADR-0016 (the OpenSearch provider matches the house style of the other four providers — env-variation through typed options + `IConfiguration`, no file-level templating engine).
+
+The work product is preserved in commit `b2febba` (added) and `95825f0` (removed); see Learnings Ledger for the four PM-5 first-contact issues the spike documented in Hyperbee.Templating 3.4.1 (these findings ARE preserved as durable learnings — they prompted a separate fix to Hyperbee.Templating's README/docs).
+
+- [x] Spike validated the engine works for the use case
+- [x] Decision documented in [ADR-0016](../../decisions/0016-no-file-level-templating.md): **don't adopt** — house-style consistency outweighs speculative needs (conditional sections, iteration) that no current sample requires
+- [x] Code deleted in commit `95825f0`
+
+#### 0.5: Spike — minimal AST + grammar + SafeDefaultMergeMiddleware
+
+Smallest implementation that validates the parser/runtime split.
+
+- [x] `StatementAst` abstract record + `BodyRef` record (sibling JSON property reference); concrete `CreateIndexAst` and `ReindexAst` records carrying typed safe-default flags (`InjectDynamicStrict`, `InjectOpTypeCreate`, `UnsafeJustification`)
+- [x] Parlot grammar parsing `CREATE INDEX [IF NOT EXISTS] [WITH BODY $body]` and `REINDEX [UNSAFE("")] FROM TO [WITH BODY $body]` — backtick-or-plain identifiers, case-insensitive keywords, ordered `OneOf` per Style Reference Pattern 3
+- [x] `SafeDefaultMergeMiddleware` operating on `JsonNode` trees: merges `op_type: create` (REINDEX `dest` path) with idempotent + conflict detection; merges `dynamic: strict` (CREATE INDEX `mappings` path) with `composed_of` detection per R-17 / PM-4 fix; preserves user-explicit values; never mutates caller's tree (deep clone via round-trip)
+- [x] **`SafeDefaultConflictException`** surfaces conflicting `op_type` with remediation message pointing to `REINDEX UNSAFE("...")`
+- [x] **`OpenSearchParseException`** with file/recognized-verb context in message
+- [x] **36 unit tests across 3 test classes**: 6 AST equality tests, 18 grammar tests (positive/negative cases including bare-UNSAFE rejection per R-18), 12 merge middleware tests covering all 5 CREATE INDEX edge cases + all REINDEX edge cases + tree-mutation invariant
+- [x] All tests pass on net8/9/10 (36 total OpenSearch tests after Templating removal, 108 test runs, 0 failures; was 39/117 with the now-removed Templating spike)
+
+#### 0.6: Spike — 10 wire-level integration tests against real OpenSearch
+
+Captures actual HTTP request bodies via `ConnectionSettings.DisableDirectStreaming()` (set on the test harness client); asserts merge correctness via `ApiCall.RequestBodyInBytes`. Tests live in `tests/Hyperbee.Migrations.Integration.Tests/OpenSearchSpikeTests.cs`, gated by `#if INTEGRATIONS` per ADR-0010. Compiles clean both with and without `INTEGRATIONS` defined.
+
+- [x] Test: CreateIndex flat body without `mappings` → request has `mappings.dynamic: strict`
+- [x] Test: CreateIndex with explicit `mappings.dynamic: true` → preserves user value
+- [x] Test: CreateIndex with `composed_of` → injection skipped (cluster rejection acceptable; we audit the wire body)
+- [x] Test: CreateIndex with `mappings.properties` only → injection adds `dynamic: strict` alongside properties
+- [x] Test: CreateIndex with settings only → injection creates `mappings.dynamic: strict` block
+- [x] Test: Reindex without body → request has `{ "source": {...}, "dest": {..., "op_type": "create"} }`
+- [x] Test: Reindex with existing body and `dest` object → preserves user fields, adds `op_type: create`
+- [x] Test: Reindex with body specifying `op_type: index` → fails at merge time with `SafeDefaultConflictException` pointing to UNSAFE remediation per R-18
+- [x] Test: Reindex with body specifying `op_type: create` explicitly → exactly one `op_type: create` on the wire (idempotent inject)
+- [x] **Keystone test** — Reindex round-trip with `op_type: create` prevents double-write: seeds 3 docs in src, pre-seeds dst with same `_id`=2 (simulating partial prior run), runs reindex; asserts `version_conflicts: 1`, dst contains exactly 3 docs (no double-write), pre-seeded doc was NOT overwritten
+
+**Phase 0 gate:** All 10 tests must run green against real OpenSearch in user's Docker env. To run: uncomment `//#define INTEGRATIONS` at file top, then `dotnet test tests/Hyperbee.Migrations.Integration.Tests/Hyperbee.Migrations.Integration.Tests.csproj --filter "TestCategory=Spike"`. If green, tag `opensearch/phase-0-spike-validated` and proceed to Phase 1.
+
+---
+
+## Phase 1: Foundation + Foundation Verbs
+
+**Goal:** Empty migration runs end-to-end against single-node Testcontainers. Lock acquired and renewed; ledger initialized; bootstrapper completes. Foundation verbs (CREATE/DROP INDEX, UPDATE MAPPING/SETTINGS, REFRESH, WAIT) execute correctly. Lock contention and crash recovery scenarios pass.
+
+**Estimated effort:** 1-2 days.
+
+**Completion Criteria:**
+- DI surface complete: `services.AddOpenSearchMigrations(opts => {}).WithProductionDefaults()` (ADR-0012)
+- Bootstrapper façade with `IBootstrapStep[]` pipeline (ADR-0014)
+- Ledger schema with all forensic fields per R-06 (`appliedBy`, `direction`, `failedStatementIndex`)
+- LockHandle: CAS acquire + heartbeat renew + realtime-GET takeover + `LockMaxLifetime` cancellation contract (R-05)
+- Lock parameter validation at startup (`LockRenewInterval < LockStaleAfter < LockMaxLifetime` AND `LockStaleAfter ≥ 2 * LockRenewInterval`)
+- `AssumeIndicesExist` override path (ADR-0013)
+- Foundation verbs all parse, compile, execute integration-green: `CREATE INDEX [IF NOT EXISTS]`, `DROP INDEX [IF EXISTS]`, `UPDATE MAPPING ON`, `UPDATE SETTINGS ON [CLOSE]`, `REFRESH`, `WAIT FOR [ON ]`, `WAIT UNTIL TASK`
+- `IF [NOT] EXISTS` markers check live cluster state
+- `UNSAFE("...")` and `NO WAIT("...")` justification tokens parse-validated; bare forms reject at parse
+- WaitMode enum with `PerStatement` (default), `Off`; scoped implicit waits (per-index) per R-12 (PerMigration deferred to Phase 2 since it depends on cross-statement dirty-index tracking)
+- Parse-time syntactic unsafe-op enumeration per R-18
+- $body sibling resolution + reserved namespace policy per R-09 (reserved: `$body`, `$query`, `$script`, scope names `env`, `config`, `runtime`, `secrets`)
+- R-24b lock contention + crash recovery integration tests pass
+
+### Tasks (subtasks added during execution)
+
+- **1.1** Options + DI extension + `WithProductionDefaults()` (ADR-0012); IConfiguration binding from `Migrations:OpenSearch:*`
+- **1.2** `IBootstrapStep` interface + initial steps (RestPing, ClusterHealth, LedgerInit, LockInit) + `OpenSearchBootstrapper` state-machine façade (ADR-0014)
+- **1.3** Ledger init step with strict mapping + forensic fields; `AssumeIndicesExist` verification path (ADR-0013)
+- **1.4** Lock init step with `number_of_replicas: 0` (ADR-0013, PA-2)
+- **1.5** `LockHandle` — CAS acquire, heartbeat renewal loop with TimeProvider, realtime-GET on takeover, `LockMaxLifetime` cancellation contract; lock parameter validation (R-05)
+- **1.6** `OpenSearchRecordStore : IMigrationRecordStore` (ADR-0003); ledger CAS write with `?refresh=wait_for`
+- **1.7** Full Parlot grammar for foundation verbs (extends spike grammar from 0.5); reserved namespace policy
+- **1.8** Statement compilers (AST → IRequest) for foundation verbs
+- **1.9** `IF [NOT] EXISTS` live HEAD checks
+- **1.10** `UNSAFE` + `NO WAIT` justification tokens; structured WARN log events
+- **1.11** WaitMode enum + scoped `ImplicitWaitMiddleware` (R-12)
+- **1.12** Parse-time R-18 syntactic unsafe-op enumeration
+- **1.13** Startup banner emitting all resolved configuration (R-25)
+- **1.14** Integration tests: empty migration end-to-end + R-24b lock contention/crash recovery suite (uses controllable TimeProvider for determinism)
+
+Tag `opensearch/phase-1-foundation` after completion criteria met.
+
+---
+
+## Phase 2: Atomic Operations + Composite + Cross-Cutting
+
+**Goal:** Zero-downtime alias swap reindex pattern works against multi-node cluster. `MIGRATE INDEX` composite verb decomposes correctly with **runtime template lookup** (per ADR-0015). Templates, ISM policies, partial rollback, all cross-cutting safety features land. Multi-node Testcontainers Compose CI integrated.
+
+**Estimated effort:** 2-3 days.
+
+**Completion Criteria:**
+- REINDEX with Tasks API polling (R-11); `op_type: create` auto-injection (validated against Phase 0 spike)
+- ALIAS SWAP with in-body atomic precondition (R-16, NF-2)
+- ALIAS ADD / ALIAS REMOVE
+- TEMPLATE / COMPONENT / POLICY / APPLY POLICY verbs
+- **MIGRATE INDEX composite (R-30)** — parser produces decomposed AST sequence (CREATE + REINDEX + ALIAS SWAP) with `BodySource = TemplateRef("foo")` for `WITH TEMPLATE`; runtime middleware resolves template body via `GET /_index_template/` immediately before CREATE INDEX dispatch (per ADR-0015 — parser is offline-pure)
+- `WHEN VERSION` semver comparator (R-15a) — `'2.9' < '2.10'` correct
+- Component-template-aware `dynamic: strict` injection (R-17 — skipped on `composed_of`)
+- ~~Hyperbee.Templating four-scope renderer~~ — REMOVED per ADR-0016
+- ~~`SecretMarker` + `SecretScrubber` log sink wrapper~~ — REMOVED per ADR-0016 (host-level Serilog config handles option-value redaction if needed)
+- `ActiveContext` + `ContextResolutionPolicy` (R-15)
+- `WaitMode.PerMigration` implementation (dirty-index tracking + consolidated end-of-migration wait)
+- Down direction execution; partial-rollback ledger semantics (R-19) — `status: partially_rolled_back` + `failedStatementIndex`; runner exposes `--force-resume`
+- **Multi-node Testcontainers Compose harness** (per A2/A3 — built here, not in Phase 0)
+- All R-24c production scenarios pass (15 enumerated tests; see table below)
+
+### R-24c production scenario test table (per A11)
+
+| Test | Description | Phase introducing | Required topology |
+|------|-------------|-------------------|-------------------|
+| (a) | Zero-downtime alias swap with active background writes | Phase 2 | Multi-node |
+| (b) | ISM policy attachment to existing index (`POST /_plugins/_ism/add`) | Phase 2 | Single-node |
+| (c) | Mapping update on existing index "no reindex" gotcha + diagnostic warning | Phase 2 | Single-node |
+| (d) | Static settings update fails clearly without `CLOSE`, succeeds with it | Phase 1 | Single-node |
+| (e) | Reindex of 100K docs streams progress, doesn't time out at HTTP layer | Phase 2 | Single-node |
+| (f) | Bulk-load with simulated 429 retries | Phase 3 | Single-node |
+| (g) | `dynamic: strict` rejects unexpected fields | Phase 1 | Single-node |
+| (h) | Lock false-takeover scenario with simulated refresh-lag | Phase 1 | Single-node |
+| (i) | Reindex stale-dst recovery — `op_type:create` skips partial prior-run docs safely | Phase 2 | Single-node |
+| (j) | `LockMaxLifetime` cancellation contract — in-flight migration aborts cleanly | Phase 1 | Single-node |
+| (k) | Lock primary-shard contention — N concurrent acquires, replicas:0 verified | Phase 1 | Multi-node |
+| (l) | ~~Templating JSON-context~~ — REMOVED per ADR-0016 | — | — |
+| (m) | Ledger refresh budget — 100-migration bootstrap completes within budget | Phase 1 | Multi-node |
+| (n) | Partial-rollback ledger state — `status: partially_rolled_back` with `failedStatementIndex` | Phase 2 | Single-node |
+| (o) | `MIGRATE INDEX` composite produces identical end-state to hand-composed sequence | Phase 2 | Single-node |
+
+### Tasks (subtasks added during execution)
+
+- **2.1** REINDEX verb + Tasks API polling middleware with progress thresholds (R-11; INFO at 10/25/50/75/90%, DEBUG every poll)
+- **2.2** ALIAS SWAP with in-body atomic precondition; ALIAS ADD / ALIAS REMOVE
+- **2.3** TEMPLATE / COMPONENT / POLICY / APPLY POLICY verbs
+- **2.4** `MIGRATE INDEX` composite — parser decomposition + runtime template resolution middleware (per ADR-0015)
+- **2.5** WHEN VERSION semver parser + comparator (R-15a)
+- **2.6** Component-template-aware `dynamic: strict` injection refinement
+- **2.7** ~~Hyperbee.Templating renderer~~ — REMOVED per ADR-0016. Env-variation flows through typed `OpenSearchMigrationOptions` properties + `IConfiguration` binding (matches Aerospike/Couchbase/MongoDB/Postgres pattern)
+- **2.8** ActiveContext + ContextResolutionPolicy (R-15)
+- **2.9** WaitMode.PerMigration (dirty-index tracking)
+- **2.10** Down direction execution; partial-rollback ledger semantics; runner `--force-resume` flag
+- **2.11** Multi-node Testcontainers Compose harness (3 nodes, Compose-style)
+- **2.12** R-24c production scenario tests — full 15-test suite per table above
+
+Tag `opensearch/phase-2-atomic-composite` after completion criteria met.
+
+---
+
+## Phase 3: Distribution + Polish
+
+**Goal:** Provider is shippable. SigV4 works on AWS Managed; runner project, samples, multi-topology CI, AWS scheduled validation runbook all in place.
+
+**Estimated effort:** 1-2 days.
+
+**Completion Criteria:**
+- Auth: basic, API key, mTLS in core package; SigV4 via opt-in extension
+- AWS endpoint loud-fail (R-21); ISM endpoint capability detection
+- SigV4 per-request credential resolution (PM-2 mitigation)
+- `BulkAllObservable` wrapper with documented defaults (R-20)
+- Runner project mirrors existing pattern (R-26)
+- Samples project includes all 10 samples per R-27 — featured: `MIGRATE INDEX` composite, `UNSAFE("...")` and `NO WAIT("...")` justification idioms with explicit syntactic enumeration of operations requiring them
+- Multi-node Testcontainers Compose CI runs every PR (R-28b Must)
+- AWS Managed scheduled validation runbook in repo (R-28c Should); release-checklist line: "AWS validation status documented in README with date of last successful run, OR 'AWS unverified for this release' notice with reason."
+- Documentation: README, getting-started guide, **template-propagation FAQ** explicitly answering "how do I apply template changes to existing data?" with `MIGRATE INDEX` as the answer
+- ADR compliance audit — verify each of ADR 0001-0015 has either a passing test or doc reference
+
+### Tasks (subtasks added during execution)
+
+- **3.1** Basic auth, API key, mTLS in core package
+- **3.2** SigV4 opt-in extension; AWS endpoint loud-fail; ISM endpoint capability detection (R-21); per-request credential resolution
+- **3.3** `BulkAllObservable` wrapper with R-20 defaults
+- **3.4** `Hyperbee.MigrationRunner.OpenSearch` runner project mirroring existing runner
+- **3.5** `Hyperbee.Migrations.OpenSearch.Samples` — all 10 samples; `MIGRATE INDEX` featured
+- **3.6** Multi-node Testcontainers Compose CI integration (uses Phase 2 harness from Task 2.11)
+- **3.7** AWS Managed scheduled validation runbook (`docs/runbooks/opensearch-aws-validation.md`)
+- **3.8** Documentation: README, getting-started, template-propagation FAQ
+- **3.9** ADR compliance audit (final regression check, not first-time)
+
+Tag `opensearch/phase-3-shippable` after completion criteria met.
+
+---
+
+## Definition of Done (per phase)
+
+Before tagging a phase snapshot:
+- [ ] All phase completion criteria checked
+- [ ] All tests green (unit + integration)
+- [ ] `dotnet build` clean across all projects
+- [ ] No new warnings introduced
+- [ ] Plan checkboxes updated for completed tasks
+- [ ] Status Summary updated; Learnings appended if applicable
+- [ ] ADRs touched by this phase verified against acceptance criteria (per B1 / NF-5)
+
+## Learnings Ledger
+
+### Phase 0 Task 0.4 — Hyperbee.Templating decision (rejected → ADR-0016)
+
+After the spike landed, maintainer review surfaced that no other provider uses Hyperbee.Templating. Decision: don't adopt — see [ADR-0016](../../decisions/0016-no-file-level-templating.md). The spike code was removed in commit `95825f0`. The first-contact issues the spike documented in Hyperbee.Templating 3.4.1 are preserved here because they (a) prompted a separate fix to the templating engine's README/docs, and (b) are useful if the decision is ever revisited.
+
+PM-5 from assessment 0002 was right to worry about first-contact bugs. Background sub-agent found four:
+
+1. **README misleading on `{{#if}}` syntax**. Engine 3.4.1 does NOT accept the leading `#` for control-flow tokens (only the README says it does). Production migrations must use `{{if config.x}}{{else}}{{/if}}` — drop the `#`. Documented in test code.
+
+2. **Default `KeyHelper.ValidateKey` forbids `.` in identifiers**. Without a `Validator` override on `TemplateOptions`, scope-prefixed keys like `config.indexPrefix` fail validation. The renderer ships a custom `IsValidScopedKey` that admits a single `.` joining two letter-led segments plus the bracket-suffix indexing rule (`runtime.nodes[0]`). Future provider work that uses Templating directly must either reuse this validator or invent equivalent rules.
+
+3. **Fat-arrow rewriter cannot traverse dotted keys**. Inside `each`/`while`/`if` fat-arrow expressions, `x.config.indexPrefix` rewrites to `x["config"].indexPrefix` (string has no `.indexPrefix` member). Use the indexer form: `x["runtime.nodes"].Split(",")`. Literal token form `{{config.indexPrefix}}` works directly via the validator override (#2).
+
+4. **`each n,i:...` index variant is documented in source comments but not implemented in 3.4.1**. Workaround used in iteration test: an inline define token (`{{seen:1}}`) flipped after each body to track first-iteration sentinel. Worth checking in future Templating versions.
+
+These are documented inline in the renderer + test code so future contributors don't re-discover them.
+
+### Phase 0 Task 0.5 — Architecture validated at unit level
+
+ADR-0011 hybrid + ADR-0015 offline-pure parser holds: parser produces AST flags, runtime middleware merges into JSON tree. 36 unit tests covering all 5 CREATE INDEX edge cases + REINDEX edge cases + tree-immutability invariant pass on net8/9/10. Phase 0 kill criterion not fired at this level — live-cluster validation (Task 0.6) remains.
+
+## Status Summary
+
+| Phase | Status | Notes |
+|-------|--------|-------|
+| 0 — Scaffold + Spike | **Done** | Spike kill criterion cleared; ADR-0011 hybrid parser+runtime injection validated against real OpenSearch. |
+| 1 — Foundation + Foundation Verbs | **Done** | Bootstrapper façade + 4 default steps; auto-renewing LockHandle with realtime-GET takeover; ledger with forensic fields; OpenSearchRecordStore; foundation verb grammar (8 verbs); StatementDispatcher; OpenSearchResourceRunner; ImplicitWaitMiddleware (PerStatement). |
+| 2 — Atomic + Composite + Cross-Cutting | **Done** | All atomic/composite verbs (CREATE TEMPLATE, ALIAS SWAP, REINDEX, MIGRATE INDEX, APPLY POLICY, UPDATE SETTINGS/MAPPING, WAIT FOR/UNTIL TASK); R-15 context filter; R-12 WaitMode.PerMigration + NO WAIT justification; R-24b/c integration tests; multi-node Testcontainers harness (3-node Docker network). |
+| 3 — Distribution + Polish | **Done** | Auth (Basic, ApiKey, mTLS); SigV4 separate package per ADR-0021/option-E; AWS endpoint loud-fail; ISM capability detection; BulkAllObservable wrapper (R-20); runner project; samples project (10 samples); multi-node CI workflow; AWS validation runbook; top-level docs (provider page, FAQ, README); ADR compliance audit (0001-0017 PASS, 17/17 honored). |
+
+**Current task:** All phases complete. ADR compliance audit (0001-0017) PASS — 17/17 honored, 3 soft spots noted (none blocking). See `docs/research/0004-adr-compliance-audit.md`.
+**Next action:** Move plan to `docs/plans/archive/` after final user signoff and tag `opensearch/v1`.
+**Blockers:** None.
+
+---
+
+## Plan Self-Check
+
+- **Dependencies:** Tasks ordered with blockers first (audit before scaffolding; spike validates before foundation; foundation before composite; composite before distribution).
+- **Clarity:** Phase 0 is subtask-detailed; Phases 1-3 are task-level with subtasks expanded by `/nop:implement` at phase start.
+- **Vertical slices:** Phase 0 demoable (spike tests pass); Phase 1 demoable (empty migration runs end-to-end); Phase 2 demoable (zero-downtime alias swap test passes); Phase 3 demoable (shippable).
+- **ADRs written:** 0001-0015 in `docs/decisions/`; per-phase DoD includes ADR check.
+- **Riskiest assumption isolated:** Phase 0's spike is gated by an objective kill criterion; fallback (Approach A) documented if spike fails.
+- **R-24c enumerated:** 15-test table specifies which phase introduces each scenario and required topology.
+- **Velocity-calibrated:** estimated 3-7 days focused work (1-2 days polish), matching maintainer's actual provider-development pace.
diff --git a/docs/plans/archive/INDEX.md b/docs/plans/archive/INDEX.md
new file mode 100644
index 0000000..1ac494e
--- /dev/null
+++ b/docs/plans/archive/INDEX.md
@@ -0,0 +1,5 @@
+# plans/archive/INDEX.md
+
+| Plan | Title | Completed | Summary |
+|------|-------|-----------|---------|
+| 2026-05-opensearch-provider | [OpenSearch Provider for Hyperbee.Migrations](2026-05-opensearch-provider.md) | 2026-05-03 | All 4 phases delivered. Production-capable OpenSearch provider with Parlot-based statement grammar, ADR-0011 hybrid parser+runtime injection, ADR-0017 body-source grammar (3 forms), ADR-0014 state-machine bootstrapper façade, auto-renewing distributed lock with realtime-GET takeover, ledger with forensic fields, multi-node Testcontainers CI, AWS validation runbook. ADR compliance audit (0001-0017) PASS. |
diff --git a/docs/requirements/INDEX.md b/docs/requirements/INDEX.md
new file mode 100644
index 0000000..ae748e7
--- /dev/null
+++ b/docs/requirements/INDEX.md
@@ -0,0 +1,5 @@
+# requirements/INDEX.md
+
+| # | Title | Status | Date | Summary |
+|-----------|------------------------------------------------------------------------|--------|------------|------------------------------------------------------------------------------------------|
+| opensearch-provider | [OpenSearch Provider for Hyperbee.Migrations](opensearch-provider.md) | Draft (revised post-assessment + MIGRATE composite) | 2026-05-02 | 31 testable requirements: P0/P1 amendments from assessment 0002 (parser-level safe defaults, realtime-GET lock takeover, partial-rollback ledger, ledger forensics, atomic alias, secret scrubbing, multi-node CI Must) + R-30 MIGRATE INDEX composite verb + R-29 WithProductionDefaults + R-15a semver. Parlot non-negotiable per ADR-0001 |
diff --git a/docs/requirements/opensearch-provider.md b/docs/requirements/opensearch-provider.md
new file mode 100644
index 0000000..6e5230d
--- /dev/null
+++ b/docs/requirements/opensearch-provider.md
@@ -0,0 +1,833 @@
+# OpenSearch Provider for Hyperbee.Migrations
+
+**Status:** Draft (revised after assessment)
+**Date:** 2026-05-02
+**Research:** [docs/research/0001-opensearch-provider.md](../research/0001-opensearch-provider.md)
+**Assessment:** [docs/research/0002-opensearch-provider-assessment.md](../research/0002-opensearch-provider-assessment.md)
+**Existing ADRs constraining the design:** ADR-0001 through ADR-0010
+
+## Problem
+
+Hyperbee.Migrations ships providers for Aerospike, Couchbase, MongoDB, and Postgres but has no OpenSearch provider. Teams that use OpenSearch for search, log analytics, or vector workloads have no first-class migration story in the .NET ecosystem — the only viable options are JVM tools (elasticsearch-evolution, hubrick), the Liquibase OpenSearch extension (single `httpRequest` change type, gives up on abstraction), or hand-rolled imperative scripts. The result is undocumented schema drift, unsafe ad-hoc reindexes, and no shared lock against concurrent CI runners.
+
+A native provider closes the gap and lets the same teams that use Hyperbee.Migrations for Postgres/Couchbase use it for OpenSearch with consistent ergonomics: versioned migrations, distributed locks, JSON resource files, and a thin DSL over native APIs.
+
+## Requirements
+
+### Lifecycle & Warmup
+
+#### R-01: Provider implements the standard IMigrationRecordStore contract
+
+**Actor:** Hyperbee.Migrations runtime — invoked by application startup
+**Intention:**
+- *Immediate:* OpenSearch provider plugs into existing `MigrationRunner` without core changes
+- *Outcome:* Consumers compose providers identically across databases
+- *Metric:* `MigrationRunner` has zero OpenSearch-specific code paths
+
+**Friction today:**
+- Current: No provider exists; teams either skip migrations or hand-roll one-off scripts
+- Failure mode: Schema drift across environments; nothing tracks what's been applied
+- Frequency: Every team adopting OpenSearch hits this on first deploy
+
+**Given:** A consumer registers `services.AddOpenSearchMigrations(...)`
+**When:** `MigrationRunner.RunAsync` is invoked
+**Then:** The runner discovers, locks, applies, and journals migrations using only the existing core contract; provider supplies an `IMigrationRecordStore` implementation
+**Otherwise:** Any deviation from the contract is a defect, not an extension point
+
+**Priority:** Must — this is the contract gate
+**Confidence:** High (ADR-0003 fixes the contract)
+
+#### R-02: Cluster bootstrapper waits for cluster readiness before any migration runs
+
+**Actor:** Provider startup path — once per `MigrationRunner.RunAsync` invocation
+**Intention:**
+- *Immediate:* Migrations don't fail on transient cluster unavailability during deploy
+- *Outcome:* Pod start order doesn't matter; eventually-consistent cluster startups still succeed
+- *Metric:* Zero "cluster_not_ready"-class failures on healthy clusters
+
+**Friction today:**
+- Current: Couchbase provider already solves this with a 7-state bootstrapper; OpenSearch needs equivalent
+- Failure mode: Deploys race the cluster's startup and fail intermittently
+- Frequency: Every cold-start deploy and every CI run with a fresh container
+
+**Given:** Provider has just been initialized; cluster reachability is unknown
+**When:** `InitializeAsync` runs before any migration is applied
+**Then:** Provider polls `GET /_cluster/health?wait_for_status=&timeout=` with bounded retries until ready, OR fails with a clear `OpenSearchNotReadyException` after the configured global timeout
+**Otherwise:** A clear distinction is logged between "cluster unreachable" (network) and "cluster reachable but unhealthy" (status red / pending tasks)
+
+**Depends on:** R-03
+**Priority:** Must
+**Confidence:** High
+
+#### R-03: Cluster health threshold is per-environment configurable
+
+**Actor:** Operator wiring up the provider for a given environment
+**Intention:**
+- *Immediate:* Single-node dev clusters and multi-node prod clusters both work without code changes
+- *Outcome:* Same migration code runs in unit tests, dev, staging, and prod
+- *Metric:* No environment-specific forks of the migration runner config
+
+**Friction today:**
+- Current: Tools that hardcode green never run on single-node dev (replicas have nowhere to go)
+- Failure mode: Hardcoded threshold blocks dev or weakens prod
+- Frequency: Every multi-environment rollout
+
+**Given:** Provider options expose a `ClusterHealthThreshold` property accepting `Yellow` or `Green`
+**When:** Bootstrapper or implicit waits run
+**Then:** They wait for the configured threshold (SDK default `Yellow` so dev/CI single-node clusters work out of the box; production deployments call `WithProductionDefaults()` per R-29 to flip to `Green`)
+**Otherwise:** Setting an unrecognized value throws at options-binding time, not runtime; resolved value is logged at INFO via the startup banner (R-25)
+
+**Depends on:** R-29
+**Priority:** Must
+**Confidence:** High
+
+### Distributed Locking
+
+#### R-04: Lock acquired via optimistic concurrency on a singleton lock document
+
+**Actor:** Provider — once per migration run, before any migration applies
+**Intention:**
+- *Immediate:* Concurrent CI/deploy runners cannot overlap migrations
+- *Outcome:* Deterministic single-writer semantics on schema operations
+- *Metric:* Zero observed concurrent migration runs in production
+
+**Friction today:**
+- Current: OpenSearch has no native lock primitive; no .NET library implements one
+- Failure mode: Without a lock, two pods racing to apply the same migration produces partial state
+- Frequency: Every deploy with replicas > 1; every CI matrix run
+
+**Given:** Two runners attempt `CreateLockAsync` simultaneously
+**When:** Both read the lock doc, attempt to write with `if_seq_no`/`if_primary_term`
+**Then:** Exactly one succeeds; the loser receives a 409 `version_conflict_engine_exception` and surfaces `MigrationLockUnavailableException`. The lock index is created (or asserted) with `number_of_replicas: 0` to eliminate replica-write coupling on the lock primary shard (PA-2 mitigation)
+**Otherwise:** Loser does not retry implicitly; caller decides
+
+**Depends on:** R-06
+**Priority:** Must
+**Confidence:** High (ADR-0005 — provider-native locking; pattern ports from Aerospike)
+
+#### R-05: Lock auto-renews via background heartbeat with bounded lifetime, validated parameters, realtime takeover, and explicit cancellation
+
+**Actor:** Provider lock handle — runs for the duration of `MigrationRunner.RunAsync`
+**Intention:**
+- *Immediate:* Long-running migrations don't lose their lock and get crashed by takeover; misconfigured lock parameters fail loudly at startup
+- *Outcome:* Crashed runners' stale locks are reclaimable by the next runner; refresh-lag does not cause false takeovers
+- *Metric:* Zero false-takeovers during active migrations; zero permanent lock-out from crashed runners; zero "ledger written but lock was lost" silent corruptions
+
+**Friction today:**
+- Current: Aerospike provider just shipped this exact pattern; OpenSearch needs equivalent — but OpenSearch has refresh-interval visibility lag that Aerospike does not
+- Failure mode: Without renewal, a long migration loses its lock; without bounded lifetime, a crashed runner blocks indefinitely; without realtime takeover, search-staleness causes false takeover; without an explicit cancellation contract, max-lifetime can be hit while the runner blindly continues
+- Frequency: Reindexes and policy rollouts can take minutes-to-hours; crashes happen
+
+**Given:** A lock has been acquired with `Acquired_At` and `Last_Heartbeat` timestamps
+**When:** The lock handle's heartbeat timer fires every `LockRenewInterval` (default 30s)
+**Then:**
+1. Heartbeat updates `Last_Heartbeat` via CAS (`if_seq_no`/`if_primary_term`)
+2. Takeover candidates that observe staleness MUST use `GET /{lockIndex}/_doc/{id}?realtime=true` (not search) to verify the lock document's actual write recency, eliminating refresh-lag false positives
+3. Reaching `LockMaxLifetime` triggers an explicit cancellation contract: the in-flight migration's `CancellationToken` is cancelled, current statement aborts, ledger write for the in-progress migration is skipped, and `MigrationLockExpiredException` is surfaced — the runner does NOT silently continue
+4. Options are validated at startup: `LockRenewInterval < LockStaleAfter < LockMaxLifetime` AND `LockStaleAfter ≥ 2 * LockRenewInterval`; violations throw `OptionsValidationException` with the offending pair and the recommended adjustment
+
+**Otherwise:** A would-be acquirer that finds `Last_Heartbeat` older than `LockStaleAfter` (default 60s = 2x renew interval) AND confirms staleness via realtime GET overwrites the lock via CAS
+
+**Depends on:** R-04
+**Priority:** Must
+**Confidence:** High (direct port of Aerospike `LockHandle` with OpenSearch-specific realtime/cancellation additions)
+
+**Notes:**
+- Convenience presets `LockTuning.Default` / `LockTuning.LongRunningReindex` / `LockTuning.FastCi` are documented in code comments and samples (R-27), not as requirements; setting one parameter explicitly without the others uses the preset's coherent values, not framework defaults
+
+### Ledger Storage
+
+#### R-06: Migration ledger stored in a strict-mapped OpenSearch index
+
+**Actor:** Provider — read on startup, written after each migration
+**Intention:**
+- *Immediate:* Authoritative record of what's been applied lives in OpenSearch itself
+- *Outcome:* No external dependency for migration state; backups include migration state
+- *Metric:* Ledger and data live in the same cluster snapshot
+
+**Friction today:**
+- Current: Tools like elastic-migrations (PHP) split ledger into a separate DB — operationally awkward
+- Failure mode: External-DB ledger introduces a second system that must be backed up coherently with OpenSearch
+- Frequency: Every backup/restore exercise
+
+**Given:** Provider initializes for the first time
+**When:** `InitializeAsync` runs
+**Then:** Provider creates an index (default name `.migrations`, configurable) with `dynamic: strict` mapping containing typed fields:
+- `id` (keyword) — migration record id (per ADR-0009 convention)
+- `runOn` (date) — UTC timestamp
+- `direction` (keyword) — `Up` | `Down`
+- `status` (keyword) — `succeeded` | `failed` | `partially_rolled_back`
+- `appliedBy` (keyword) — runner identity: `{machineName}/{processId}[/{RunnerId}]` for postmortem forensics
+- `checksum` (keyword) — content hash of statements + body
+- `error` (text) — exception details on failure
+- `failedStatementIndex` (integer, nullable) — when `partially_rolled_back`, the index of the rollback statement that failed
+
+Creation is idempotent. Strict mapping is **immutable per the Forbidden trust boundary** — schema changes are not supported in v1; field additions must land before release.
+
+**Otherwise:** If the index exists with an incompatible mapping (missing required fields), fail at startup with a clear remediation message naming the missing fields
+
+**Priority:** Must
+**Confidence:** High
+
+#### R-07: Ledger writes use optimistic concurrency with refresh-wait
+
+**Actor:** Provider — once per migration applied
+**Intention:**
+- *Immediate:* Concurrent runners can't double-apply the same migration even if R-04 lock fails
+- *Outcome:* Defense in depth against split-brain
+- *Metric:* Re-running a journaled migration is a no-op (returns from `ExistsAsync`)
+
+**Given:** A migration has just completed `UpAsync` successfully
+**When:** Provider calls `WriteAsync(record)`
+**Then:** Write uses `if_seq_no`/`if_primary_term` and `?refresh=wait_for`; subsequent `ExistsAsync` returns true without delay
+**Otherwise:** A 409 indicates concurrent writer; surface as a typed exception so the caller can bail out cleanly
+
+**Depends on:** R-06
+**Priority:** Must
+**Confidence:** High
+
+**Performance budget:** R-24c includes a measured-cost test asserting "100-migration bootstrap completes in < N seconds" (N to be determined empirically against a 3-node Testcontainers cluster). If the budget is exceeded, the alternative is `?refresh=true` for ledger writes (the ledger is a hot single-doc index where the cost of forced refresh is bounded). Removing the refresh wait is **not** an alternative — `ExistsAsync` read-after-write would be unreliable.
+
+### Statement Grammar & Resources
+
+#### R-08: Statement grammar is a thin Parlot verb prefix over opaque JSON
+
+**Actor:** Migration author — writing JSON resource files
+**Intention:**
+- *Immediate:* Author writes one statement per logical operation in a familiar Couchbase-provider style
+- *Outcome:* Migrations are reviewable in PRs without understanding a custom format
+- *Metric:* New authors are productive within an hour of seeing a sample
+
+**Friction today:**
+- Current: Existing Couchbase, Aerospike, MongoDB providers use Parlot grammars over JSON resource files; OpenSearch should match the house style
+- Failure mode: Inventing a new file format fragments author muscle memory
+- Frequency: Every new migration
+
+**Given:** A migration ships a `statements.json` resource alongside its class
+**When:** The provider runs the migration
+**Then:** Each entry in `statements[]` is parsed by Parlot recognizing the verb set in R-09; verb prefix is matched, remainder of payload is opaque JSON passed through to OpenSearch
+**Otherwise:** Parser failures include the file name, statement index, and the recognized verb-so-far in the error message
+
+**Priority:** Must
+**Confidence:** High (ADR-0001, ADR-0002)
+
+**Parser choice is non-negotiable.** Parlot is the house standard across all Hyperbee.Migrations providers per ADR-0001 — no alternative parser (regex, ANTLR, Sprache/Pidgin, hand-rolled state machine) is acceptable for this provider or any future grammar work. Future verb additions extend the Parlot grammar; they do not introduce a second parsing path.
+
+#### R-08a: Verb set covers index/mapping/settings/template/alias/policy/reindex/refresh/wait
+
+**Given:** R-08 grammar is in place
+**When:** A migration uses any of the v1 verb set
+**Then:** Each verb compiles to the corresponding OpenSearch REST call:
+- `CREATE INDEX [IF NOT EXISTS] [WITH BODY $body]`
+- `DROP INDEX [IF EXISTS]`
+- `UPDATE MAPPING ON WITH BODY $body`
+- `UPDATE SETTINGS ON [CLOSE] WITH BODY $body`
+- `CREATE TEMPLATE WITH BODY $body`
+- `CREATE COMPONENT WITH BODY $body`
+- `ALIAS SWAP FROM TO ` / `ALIAS ADD ON ` / `ALIAS REMOVE ON `
+- `CREATE POLICY WITH BODY $body` / `APPLY POLICY TO `
+- `REINDEX FROM TO [WITH BODY $body] [WAIT FOR COMPLETION true|false]` — **provider auto-injects `op_type: create` into the request body by default** (parser-level safe-default; closes PM-3). Authors who explicitly want re-write semantics opt out with `REINDEX UNSAFE FROM TO ...` (justification required per R-18)
+- `MIGRATE INDEX TO [WITH TEMPLATE | WITH BODY $body] [VIA ALIAS ]` — composite verb encoding the canonical zero-downtime reindex-and-swap pattern (see R-30)
+- `REFRESH `
+- `WAIT FOR [ON ] [TIMEOUT ]` — `WAIT FOR YELLOW` is the documented "not red" idiom; no separate `WAIT FOR not red` verb in v1
+- `WAIT UNTIL TASK COMPLETE [TIMEOUT ]`
+
+**Depends on:** R-08
+**Priority:** Must
+**Confidence:** High (verb set derived from research §2.2 / §3.4)
+
+**Safe-default principle:** Where the lazy-path call would produce silently incorrect behavior, the parser injects the safe default at compile time — same precedent as R-17's `dynamic: strict` injection. R-24c integration test asserts `op_type: create` is on the wire by default for `REINDEX`.
+
+#### R-09: JSON bodies are sibling object references, not embedded strings
+
+**Actor:** Migration author
+**Intention:**
+- *Immediate:* Mappings/settings/policies are real JSON objects in the resource file, not escaped strings
+- *Outcome:* IDE JSON tooling validates payloads; no quote-escaping bugs
+- *Metric:* Zero migrations fail in production due to JSON-string escaping errors
+
+**Given:** A statement uses `WITH BODY $name`
+**When:** Provider executes the statement
+**Then:** Provider resolves `$name` against sibling properties on the same statement object; the resolved value is sent verbatim as the request body
+**Otherwise:** Missing or undefined `$name` reference fails at parse time with file/index/name in the error
+
+**Examples:**
+```json
+{
+ "statement": "CREATE INDEX `users-v2` WITH BODY $usersIndex",
+ "usersIndex": { "settings": { "number_of_shards": 2 }, "mappings": { "properties": { ... } } }
+}
+```
+
+**Namespace policy** (closes MD-3 at parser level, not docs):
+- `$` references in statement strings (Parlot-resolved) MUST resolve against sibling JSON properties on the same statement object — no other resolution path
+- `{{.}}` references in any string (templating-resolved) MUST resolve against R-10 scopes — no other resolution path
+- Reserved `$` identifiers are checked at parse time: `$body`, `$query`, `$script` are reserved keywords; sibling properties using these names without a corresponding verb consumer fail at parse
+- Reserved templating scope names (`env`, `config`, `runtime`, `secrets`) cannot be used as `$name` body references (parse-time error names the conflict)
+
+**Depends on:** R-08
+**Priority:** Must
+**Confidence:** High
+
+### Env-variation (no file-level templating)
+
+#### R-10: ~~Hyperbee.Templating renders resources before parse~~ **STRUCK per ADR-0016**
+
+This requirement was removed. The OpenSearch provider matches the house style of the other four providers (Aerospike, Couchbase, MongoDB, Postgres): env-variation is handled through typed `OpenSearchMigrationOptions` properties + `IConfiguration` binding from `appsettings.{Environment}.json`, not through a file-level templating engine. Per ADR-0016, re-introducing templating requires a superseding ADR with a documented use case that typed options cannot satisfy.
+
+The Phase 0 spike (Task 0.4) that wired Hyperbee.Templating was reverted. The validation that the engine works for this use case is preserved as a Learnings Ledger entry, not as code.
+
+### Async & Wait Semantics
+
+#### R-11: Long-running operations use the Tasks API with polling
+
+**Actor:** Provider — automatic for `REINDEX`, snapshot, restore, force-merge
+**Intention:**
+- *Immediate:* Reindexes longer than 30s don't time out at the HTTP layer
+- *Outcome:* Migrations of any duration succeed; progress is visible in logs
+- *Metric:* Successful reindex of an index with 10M+ docs without operator intervention
+
+**Given:** A statement triggers an operation that supports `wait_for_completion=false`
+**When:** Provider sends the request
+**Then:** Request includes `?wait_for_completion=false`; provider polls `GET /_tasks/{task_id}` with exponential backoff (start 500ms, cap 30s) until `completed: true`, then surfaces `response.error` if non-null; intermediate `status.created`/`status.total` is logged at **DEBUG** every poll, with INFO emitted only on percentage-progress thresholds (10%, 25%, 50%, 75%, 90%) or backoff-state transitions
+**Otherwise:** Task cancellation via `CancellationToken` calls `POST /_tasks/{id}/_cancel` and waits for confirmation before returning
+
+**Depends on:** R-08a
+**Priority:** Must
+**Confidence:** High
+
+#### R-12: Implicit cluster-health wait follows mutating structural operations, scoped and mode-controlled
+
+**Actor:** Provider — automatic after mutating statements per `WaitMode`
+**Intention:**
+- *Immediate:* Authors don't have to remember to add `WAIT FOR YELLOW` after every `CREATE INDEX`, but production deployments don't suffer N+1 health-check storms
+- *Outcome:* Migrations are robust by default; cluster master is not flooded by per-statement waits at scale
+- *Metric:* No "index_not_found_exception" failures on subsequent statements within the same migration; no observable master-task-queue pressure from health checks even at 1000-statement runs
+
+**Given:** Provider options expose a `WaitMode` enum: `PerStatement` (current behavior; SDK default), `PerMigration` (one wait at migration end gating all dirty indices touched; default in production via R-29), `Off` (only R-13 explicit waits run). A statement of type `CREATE INDEX`, `REINDEX`, `ALIAS SWAP`, `UPDATE SETTINGS`, or `APPLY POLICY` completes
+**When:** Provider moves to the next statement (PerStatement) or finishes the migration (PerMigration)
+**Then:**
+1. Implicit waits scope to the mutated index by default: `GET /_cluster/health/?wait_for_status=&timeout=` — a permanently-yellow unrelated index (e.g., `.opendistro_security` with unallocated replicas) does NOT stall waits scoped to other indices (closes NF-3)
+2. Cluster-wide health waits are only invoked via explicit `WAIT FOR ` (no `ON `) per R-13
+3. Under `PerMigration`, the provider tracks "dirty indices" touched during the migration and issues one consolidated health check at migration end — health is checked per-index in parallel, results aggregated
+
+**Otherwise:** Implicit wait can be skipped per-statement with `NO WAIT("")` modifier — bare `NO WAIT` fails at parse time. Justification token requires a non-empty reason string; structured WARN log `migration.no_wait{reason, statementIdx, migrationId}` emitted on every use. Under `PerMigration` mode, per-statement `NO WAIT` is parsed but no-op (logged at DEBUG)
+
+**Depends on:** R-03, R-08a, R-29
+**Priority:** Must
+**Confidence:** High (resolves prior Open Question on `NO WAIT` escape syntax; replaces previous Medium-confidence per-statement design)
+
+#### R-13: Explicit `WAIT FOR ...` verbs are first-class statements
+
+**Given:** R-12 is in place
+**When:** An author writes `WAIT FOR GREEN ON users-v2 TIMEOUT 60s` or `WAIT UNTIL TASK COMPLETE TIMEOUT 5m`
+**Then:** The verb runs as a standalone statement (no associated mutation), with the same wait/poll semantics
+**Otherwise:** Timeout exceeded surfaces a typed exception with the operation context
+
+**Depends on:** R-08a
+**Priority:** Must
+**Confidence:** High
+
+### Idempotency & Safety
+
+#### R-14: Idempotency markers (`IF [NOT] EXISTS`) check live cluster state
+
+**Given:** A statement carries `IF NOT EXISTS` (create) or `IF EXISTS` (drop)
+**When:** Provider executes the statement
+**Then:** Provider checks the live cluster state (e.g., `HEAD /{idx}`) before issuing the mutating request; non-matching state results in a no-op with INFO log
+**Otherwise:** Race conditions between check and mutate produce a clean error, not a silent failure
+
+**Depends on:** R-08a
+**Priority:** Must
+**Confidence:** High
+
+#### R-15: Conditional execution via `WHEN VERSION` and contexts
+
+**Given:**
+- A statement carries `WHEN VERSION ''` (e.g., `WHEN VERSION > '2.10'`)
+- The wrapper carries `context: ["prod", "staging"]`
+- Provider options expose `ActiveContext` (string, comma-separated tags), bindable from `IConfiguration` key `Migrations:ActiveContext`
+- Provider options expose `ContextResolutionPolicy` enum: `RequireExplicit` (any migration with a `context:` block requires `ActiveContext` to be non-null; null = `MissingActiveContextException` at startup) and `SkipIfUnset` (SDK default). Production deployments call `WithProductionDefaults()` (R-29) which forces `RequireExplicit`. `RunIfUnset` is **not exposed** — silent prod-everywhere behavior is forbidden
+
+**When:** Provider evaluates the statement
+**Then:** Statement is skipped (with INFO log) if the active runtime context isn't in the wrapper's list, or if the version comparison evaluates false
+**Otherwise:** Unparseable version or context expression fails at parse time. Missing `ActiveContext` under `RequireExplicit` policy fails at startup with the exact configuration key to set
+
+**Depends on:** R-15a, R-29
+**Priority:** Must (was Should — promoted because MD-1 was Critical)
+**Confidence:** High (resolves prior Open Question on context source-of-truth)
+
+#### R-15a: `WHEN VERSION` uses semantic version comparison
+
+**Actor:** Migration author writing version-conditional statements
+**Intention:**
+- *Immediate:* `'2.9' < '2.10'` evaluates correctly (it does NOT under string comparison)
+- *Outcome:* Version-gated migrations behave consistently across normal OpenSearch 2.x version bumps
+- *Metric:* Integration test asserts `'2.9' < '2.10'`, `'2.10.0' = '2.10'`, `'2.11.0-SNAPSHOT' > '2.11.0-rc1'`
+
+**Friction today:**
+- Current: A naive string comparator returns `'2.9' > '2.10'` (lexically TRUE), flipping a guarded statement from skipped to executed on a normal point release
+- Failure mode: Silent wrong-execution on cluster version bumps
+- Frequency: Every consumer running `WHEN VERSION` against a 2.x → 2.10+ cluster
+
+**Given:** A statement carries `WHEN VERSION ''` where `` is one of `=`, `!=`, `<`, `<=`, `>`, `>=`
+**When:** Provider parses the statement
+**Then:** Provider parses `` to `System.Version` (or equivalent SemVer type) at parse time; cluster version reported by `GET /` is normalized to the same type. Suffix handling: known suffixes (`-SNAPSHOT`, `-rc`, AWS `OpenSearch_` prefix) are normalized via documented rules; unrecognized suffixes are rejected at parse time with a remediation pointing to the canonical forms
+**Otherwise:** Unparseable version literal fails at parse time with the file/index and the canonical forms in the error message
+
+**Depends on:** R-15
+**Priority:** Must (correctness)
+**Confidence:** High (parse-time validation closes the entire silent-mismatch class)
+
+#### R-16: `ALIAS SWAP` compiles to one atomic `_aliases` request body with in-body precondition
+
+**Given:** A statement `ALIAS SWAP FROM TO `
+**When:** Provider executes the statement
+**Then:** Provider issues a single `POST /_aliases` with both `remove` and `add` actions in one body — atomic on the cluster master; never two separate requests. The precondition (`` currently points at ``) is expressed **inside the same atomic body** — the `remove` action targets `` so the cluster rejects the entire body atomically if `` is not the current target
+**Otherwise:** No separate precondition GET — TOCTOU windows are eliminated by relying on the cluster's atomic rejection of the multi-action body when the precondition fails. Failure surfaces as `AliasSwapPreconditionFailedException` with the actual current target named in the message
+
+**Depends on:** R-08a
+**Priority:** Must — this is the headline value-add for zero-downtime patterns
+**Confidence:** High (closes NF-2 TOCTOU)
+
+#### R-17: Component-template-aware `dynamic: strict` injection on flat `CREATE INDEX` bodies only
+
+**Given:** A `CREATE INDEX` statement omits an explicit `dynamic` setting in the body AND the body does NOT include a `composed_of` clause (component-template composition)
+**When:** Provider sends the create request
+**Then:** Provider injects `"mappings": { "dynamic": "strict" }` into the body (preserving existing properties)
+**Otherwise:**
+- If the body contains `composed_of`, injection is **skipped** — component templates layer mappings differently and silent injection at index-create time can clobber a component's `dynamic: false` (closes PM-4)
+- If `dynamic` is explicitly set in the body (`true`, `runtime`, etc.), the author's value is preserved and a structured INFO log emits `migration.dynamic_strict_skipped{reason: "explicit_value", value: "true"}` so the author can verify their value won (closes MD-9)
+- A `CREATE INDEX` body using `composed_of` should set `dynamic: strict` at the component-template level (`CREATE COMPONENT`) — sample R-27 demonstrates the pattern
+
+**Priority:** Must — eliminates the most common silent-failure migration bug (mapping explosion)
+**Confidence:** High (component-template detection is syntactic — `composed_of` key presence)
+
+#### R-18: Parse-time syntactic detection of unsafe operations + UNSAFE justification token
+
+**Given:** A statement attempts a known-unsafe operation. Syntactic enumeration covers: `DELETE INDEX` without `IF EXISTS`, `_delete_by_query`, mapping field type change in `UPDATE MAPPING` body, mapping field removal in `UPDATE MAPPING` body, static settings update without `CLOSE` flag, `REINDEX` without `op_type: create` (covered by R-08a auto-injection), `_close` without explicit pairing
+**When:** Provider parses the statement (before execution)
+**Then:** Parse fails with a remediation hint pointing to the safe alternative (reindex via alias swap; close-update-open with explicit `CLOSE` flag)
+**Otherwise:** Author can override with `UNSAFE("")` modifier — bare `UNSAFE` fails at parse time. Justification token requires a non-empty reason string. Provider emits structured WARN log `migration.unsafe_bypass{reason, statementIdx, migrationId, operation}` on every bypass. Provider options expose `RequireUnsafeJustification` (SDK default false; `WithProductionDefaults()` flips to true so dev exploration is friction-free but production runs reject bare UNSAFE). The full enumeration of UNSAFE-required operations ships in R-27 samples documentation
+
+**Depends on:** R-08
+**Priority:** Must (was Should — promoted because MD-2 visibility was Critical and the justification token closes the laziest-path bypass)
+**Confidence:** High (syntactic detection only; semantic detection — actually understanding query effects — is deferred to v1.1)
+
+### Rollback
+
+#### R-19: Optional rollback block per statement, best-effort
+
+**Actor:** Migration author writing reversible operations (alias swaps, ISM policy changes)
+**Intention:**
+- *Immediate:* Author can attach an inverse statement that runs on `DownAsync`
+- *Outcome:* Common reversible operations are reversible; irreversible ones are flagged
+- *Metric:* Authors don't try to "undo" mapping changes (which is impossible)
+
+**Given:** A statement object has a `rollback` property containing another statement string
+**When:** Migration runs in `Down` direction
+**Then:**
+1. Each rollback statement is parsed and executed in reverse order
+2. **Partial-rollback semantics (closes NF-5):** If rollback statement N fails after statements N+1..M have already rolled back successfully, the ledger entry for the migration is updated to `status: partially_rolled_back` with `failedStatementIndex: N` (per R-06 schema)
+3. Subsequent runs refuse to retry the migration in either direction without an explicit `--force-resume` operator override; the failure error lists which statements rolled back and which didn't, plus a remediation pointing to `--force-resume`
+4. `--force-resume` is an opt-in CLI flag on the runner project (R-26) that allows the operator to manually drive recovery after they have inspected and reconciled the cluster state
+
+**Otherwise:** Statements without a `rollback` block raise `RollbackNotSupportedException` on Down with the missing-rollback statement index in the message; documentation states this clearly so authors don't expect auto-inverse
+
+**Priority:** Must (was Should — promoted because partial-rollback ledger state is a correctness gap)
+**Confidence:** High (semantics now explicit; ledger state is well-defined)
+
+### Bulk Operations
+
+#### R-20: Bulk loads use `BulkAllObservable` with backoff defaults
+
+**Given:** A migration uses the bulk-load helper to seed many documents
+**When:** Provider issues bulk requests
+**Then:** Defaults are: 5MB batches, exponential backoff on 429 (1s → 2s → 4s, 5 retries), 8x parallelism, `refresh=false`; explicit `_refresh` is invoked once at end
+**Otherwise:** All defaults are overridable via options; 429 responses are logged at WARN with batch size and retry count
+
+**Priority:** Should
+**Confidence:** High
+
+### Authentication
+
+#### R-21: Auth supports basic, API key, mTLS, and AWS SigV4
+
+**Given:** Provider options include auth configuration
+**When:** Provider initializes the OpenSearch client
+**Then:**
+1. Basic auth, API key, and mTLS are supported via the core package; AWS SigV4 is supported via the optional `OpenSearch.Net.Auth.AwsSigV4` package, registered only when an opt-in extension is called
+2. **AWS endpoint loud-fail (closes MD-6, PM-2 partial):** if the configured endpoint matches `*.amazonaws.com` or `*.aoss.amazonaws.com` AND SigV4 has not been registered, provider throws `AwsSigV4NotConfiguredException` at startup with the exact one-line `services.AddAwsSigV4(...)` snippet to add. Inverse mismatch (SigV4 configured against a non-AWS endpoint) emits WARN
+3. **AWS ISM endpoint capability detection (closes PM-6):** when the AWS endpoint pattern matches, the provider probes `_plugins/_ism` capability at bootstrap. AWS Managed domains on older versions exposing ISM at `_opendistro/_ism` (or with insufficient `restapi` IAM permissions) fail loudly with the actual endpoint path tried and the IAM action required
+4. **Credential resolver lifetime (closes PM-2):** SigV4 signer is wired to an identity resolver that re-resolves credentials per request, not cached at client construction — required for IRSA / instance-profile rotation scenarios
+
+**Otherwise:** Missing required auth credentials fail at startup with a clear error indicating which auth mode was configured
+
+**Priority:** Must (basic + SigV4 + AWS endpoint detection); Should (API key, mTLS)
+**Confidence:** High
+
+### DI, Discovery & Conventions
+
+#### R-22: DI extension follows the house pattern
+
+**Given:** Consumer registers `services.AddOpenSearchMigrations(opts => { ... })`
+**When:** Service provider builds
+**Then:** Provider registers `IMigrationRecordStore`, `MigrationRunner`, options factory, and resource runner with the same lifetimes and binding patterns as Couchbase/Aerospike/MongoDB/Postgres providers; `IConfiguration` sections (`Migrations:FromAssemblies`, `Migrations:FromPaths`) merge with the lambda
+**Otherwise:** Misregistration (e.g., calling without an OpenSearchClient configured) fails at startup, not first migration
+
+**Priority:** Must
+**Confidence:** High (ADR-0006)
+
+#### R-23: Reflection-based discovery and convention-based record IDs apply unchanged
+
+**Given:** R-22 is in place
+**When:** `MigrationRunner.RunAsync` runs
+**Then:** Migrations are discovered via reflection per ADR-0004 and IDs generated per ADR-0009 — no provider-specific overrides
+**Otherwise:** Custom conventions are still pluggable via `IMigrationConventions`
+
+**Priority:** Must
+**Confidence:** High
+
+#### R-29: `WithProductionDefaults()` extension method explicitly configures production-safety defaults
+
+**Actor:** Operator wiring up the provider for a production environment
+**Intention:**
+- *Immediate:* One discoverable IntelliSense-visible call sets all production-safety defaults coherently
+- *Outcome:* No hidden coupling via an environment enum; the call site shows what changed; behavior is auditable in source
+- *Metric:* Production deployments call `.WithProductionDefaults()` exactly once, at the DI registration site
+
+**Friction today:**
+- Current: First-time-use of an environment enum risks "I set Profile=Production and forgot what that implies"; an extension method shows in IntelliSense and is grep-able in code review
+- Failure mode: Without an explicit forcing function, operators inherit dev defaults silently into production (MD-4, PM-7)
+- Frequency: Every production deployment
+
+**Given:** Consumer registers
+```csharp
+services.AddOpenSearchMigrations(opts => { ... }).WithProductionDefaults();
+```
+**When:** Service provider builds
+**Then:** Extension method explicitly sets:
+- `ClusterHealthThreshold = Green` (R-03)
+- `WaitMode = PerMigration` (R-12)
+- `RequireUnsafeJustification = true` (R-18)
+- `ContextResolutionPolicy = RequireExplicit` (R-15)
+
+Per-option settings the operator chains AFTER `WithProductionDefaults()` win (the extension does not re-apply defaults). The startup banner (R-25) emits all resolved values at INFO so the operator can verify the configuration in production logs
+
+**Otherwise:** No environment enum exists; "production" is a behavior set the operator opts into, not a profile that silently changes behavior. Calling `WithProductionDefaults()` against a single-node cluster will hit the Green-threshold ceiling — this is the intended trade and is documented
+
+**Depends on:** R-03, R-12, R-15, R-18, R-25
+**Priority:** Must
+**Confidence:** High (replaces the rejected `EnvironmentProfile` enum design — IR meta-finding)
+
+#### R-30: `MIGRATE INDEX` composite verb encodes the zero-downtime reindex-and-swap pattern
+
+**Actor:** Migration author propagating a template/mapping/settings change to existing data
+**Intention:**
+- *Immediate:* Authors who need to migrate existing data to a new index shape get one verb that does it correctly — they don't compose four statements and risk a wrong intermediate state
+- *Outcome:* The canonical pattern (create new versioned index → reindex with `op_type: create` → atomic alias swap) is encoded as the lazy path; no sample reading required
+- *Metric:* Production scenario test (R-24c) demonstrates `MIGRATE INDEX` produces identical end-state to the hand-composed four-statement equivalent
+
+**Friction today:**
+- Current: A teammate who runs `CREATE TEMPLATE` thinking it propagates to existing indices gets a silent wrong-state failure (template only matches future indices). The four-statement workaround (`CREATE INDEX new` + `REINDEX` + `ALIAS SWAP` + optional `DROP INDEX old`) requires reading samples and remembering to add `op_type: create`, the alias swap precondition, the right wait modes
+- Failure mode: Author writes `UPDATE MAPPING` on an existing index expecting analyzers to apply to existing docs (they don't); or runs `CREATE TEMPLATE` and assumes propagation; or hand-composes a reindex that loses data on retry because they forgot `op_type: create`
+- Frequency: Every time a team needs to apply a mapping/settings/template change to a populated index — the common case in mature production deployments
+
+**Given:** A statement of the form `MIGRATE INDEX TO [WITH TEMPLATE | WITH BODY $body] [VIA ALIAS ] [TIMEOUT ]`
+**When:** Provider parses and executes the statement
+**Then:** Parser decomposes the verb into a deterministic sequence of AST nodes:
+1. `CREATE INDEX [IF NOT EXISTS]` — body resolved from either `WITH TEMPLATE ` (provider performs `GET /_index_template/` at execute-time and uses the resolved `template` block) OR `WITH BODY $body` (sibling reference per R-09). `dynamic: strict` injection per R-17 applies to the resolved body unless `composed_of` is present
+2. `REINDEX FROM TO ` with auto-injected `op_type: create` (per R-08a) and `WAIT FOR COMPLETION true` (per R-11 Tasks API polling)
+3. If `VIA ALIAS ` is present: `ALIAS SWAP FROM TO ` with in-body precondition (R-16). If absent, no swap is performed — author retains responsibility for cutover (this preserves migrations that intentionally retain both indices, e.g., for read-traffic comparison)
+
+The decomposition is **performed at parse time**, producing the same AST shape as the four-statement hand-composed equivalent. Each sub-statement is subject to all standard middleware (implicit waits per R-12, secret scrubbing per R-10/R-25, observability per R-25). Failure of any sub-statement halts the composite; partial-rollback ledger semantics (R-19) record which sub-statement failed for `--force-resume` recovery.
+
+**Otherwise:**
+- `WITH TEMPLATE ` referencing a non-existent template fails at **execute time** (parser produces an AST node carrying the template id as an unresolved reference; runtime middleware performs `GET /_index_template/` immediately before the CREATE INDEX is dispatched; missing template surfaces with the index-template name in the error). Per ADR-0015, the parser is offline-pure — no parse-time network I/O
+- `MIGRATE INDEX a TO a` (same source and destination) fails at parse time (purely syntactic check)
+- The verb does NOT support arbitrary author-provided sub-statements between create/reindex/swap. Authors who need custom intermediate logic (e.g., run a Painless script during reindex) hand-compose using the underlying verbs
+
+**Depends on:** R-08a, R-11, R-16, R-17, R-19
+**Priority:** Should — closes the template-propagation lazy-path gap; adopters with mature production data benefit immediately
+**Confidence:** High — runtime template resolution preserves offline parse, isolates I/O to middleware boundary (per ADR-0015)
+
+
+### Testing
+
+#### R-24: Unit tests cover all parser, lock, and compilation logic
+
+**Actor:** CI pipeline
+**Intention:**
+- *Immediate:* Fast feedback on grammar and lock correctness without Docker
+- *Outcome:* Most regressions caught before integration tier
+- *Metric:* Unit suite runs in under 10s; covers every verb's parse path and every lock state transition
+
+**Given:** ADR-0010 mandates unit + integration tiers
+**When:** Unit tests run
+**Then:** Unit tests cover (a) Parlot grammar for every verb in R-08a (positive and negative cases including malformed inputs and ambiguous prefixes), (b) statement compilation to OpenSearch request shapes via mocked `IConnection`, (c) lock CAS state machine including renewal, takeover-on-staleness, max-lifetime expiry, and crash mid-renewal, (d) implicit-wait insertion logic for R-12, (e) `dynamic: strict` injection (R-17), (f) parse-time unsafe-operation detection (R-18 syntactic tier)
+**Otherwise:** Each test names the requirement it validates in its DisplayName
+
+**Priority:** Must
+**Confidence:** High
+
+#### R-24a: Integration tests cover every verb against a real OpenSearch container
+
+**Actor:** CI pipeline
+**Intention:**
+- *Immediate:* Verify the provider end-to-end against real OpenSearch behavior, not mocks
+- *Outcome:* Confidence that production-representative scenarios actually work
+- *Metric:* Every verb in R-08a has at least one happy-path and one negative integration test
+
+**Friction today:**
+- Current: Existing `Hyperbee.Migrations.Integration.Tests` project uses Testcontainers for Aerospike — same pattern applies
+- Failure mode: Without a real cluster, parser/compiler bugs surface only in production
+- Frequency: Every release
+
+**Given:** Docker is available; tests run against a Testcontainers OpenSearch image **pinned by sha256 digest** (e.g., `opensearchproject/opensearch@sha256:...`); image bumps are explicit PR-level decisions, not silent CI-time drift (closes PM-11)
+**When:** Integration suite runs
+**Then:** Tests verify (a) bootstrapper waits for cluster ready and fails cleanly when not, (b) ledger index is created with strict mapping (including `appliedBy`, `direction`, `failedStatementIndex`) and survives re-init, (c) every verb in R-08a executes its OpenSearch operation correctly (CRUD round-trips assert state via `_cat`/`_search`), (d) atomic `ALIAS SWAP` is single-request and atomic with in-body precondition (R-16 / NF-2), (e) `REINDEX` polls Tasks API, surfaces progress, and asserts `op_type: create` is on the wire by default (R-08a / PM-3), (f) `dynamic: strict` injection is applied for flat bodies and SKIPPED for `composed_of` bodies (R-17 / PM-4), (g) idempotency markers no-op correctly, (h) implicit waits gate subsequent statements per `WaitMode`, (i) WHEN VERSION semver: `'2.9' < '2.10'` (R-15a / PM-9)
+**Otherwise:** Integration tests are skipped (not failed) when Docker is unavailable, with a clear `[TestCategory("RequiresDocker")]` exclusion mechanism mirroring the Aerospike pattern
+
+**Depends on:** R-08a, R-24
+**Priority:** Must
+**Confidence:** High
+
+#### R-24b: Integration tests cover lock contention, crash recovery, and concurrent runners
+
+**Actor:** CI pipeline; this is the production-safety harness
+**Intention:**
+- *Immediate:* Prove the lock actually prevents concurrent migrations and recovers from crashes
+- *Outcome:* No production incident class "two pods migrated at once"; no class "crashed migration locked us out forever"
+- *Metric:* Concurrent-runner test runs 50 iterations without false acquisition or false starvation
+
+**Friction today:**
+- Current: Aerospike provider just shipped auto-renewing locks; that test pattern transfers
+- Failure mode: Without these tests, the lock works in theory but fails under real conditions (clock skew, network blips, OpenSearch slow refresh, etc.)
+- Frequency: Every blue/green deploy
+
+**Given:** Two `MigrationRunner` instances share the same cluster and ledger
+**When:** Both invoke `RunAsync` simultaneously with conflicting migrations
+**Then:** Tests verify (a) only one acquires the lock; the other receives `MigrationLockUnavailableException`, (b) heartbeat renewal extends the lock under sustained workload (>1 renewal interval), (c) abrupt termination of the lock holder allows the next runner to take over after `LockStaleAfter` and not before, (d) `LockMaxLifetime` ceiling stops renewal and surfaces the warning, (e) version conflict on ledger write (R-07) surfaces as a typed exception, (f) lock acquisition CAS handles 409 retry semantics correctly under refresh-interval lag
+**Otherwise:** Test uses controllable `TimeProvider` (already wired via DI per the Aerospike pattern) so timing is deterministic, not wall-clock
+
+**Depends on:** R-04, R-05, R-07, R-24a
+**Priority:** Must
+**Confidence:** High (pattern is proven on Aerospike)
+
+#### R-24c: Integration tests cover production-representative scenarios
+
+**Actor:** CI pipeline; this is the soak harness for "does it really work"
+**Intention:**
+- *Immediate:* Validate scenarios that bite real teams, not just synthetic happy paths
+- *Outcome:* Provider is provably production-capable, not just feature-complete
+- *Metric:* Each named production scenario has a passing test
+
+**Given:** Realistic data shapes (10K-100K docs in a seed index)
+**When:** Integration suite runs the production-scenario subset
+**Then:** Tests verify:
+- (a) Zero-downtime alias swap pattern: create v2 → reindex from v1 with active background writes to v1 → atomic alias swap → asserts no docs lost, no docs double-written. Asserts `op_type: create` is auto-injected by R-08a even when the migration body omits it
+- (b) ISM policy attachment to existing index works (`POST /_plugins/_ism/add` after policy create)
+- (c) Mapping update on existing index produces expected "no reindex" gotcha and the provider's diagnostic warns about it
+- (d) Static settings update fails clearly without `CLOSE` flag and succeeds with it
+- (e) Reindex of 100K docs streams progress and does not time out at HTTP layer (Tasks API); progress logs at INFO only on percentage thresholds, DEBUG every poll
+- (f) Bulk-load with simulated 429 retries via toxiproxy or chaos provider
+- (g) `dynamic: strict` rejects unexpected fields with the documented error
+- (h) **Lock false-takeover scenario (PM-1, PA-5):** simulated refresh-lag during heartbeat verifies takeover candidate uses realtime GET and does NOT take over a healthy holder
+- (i) **Reindex stale-dst scenario (PM-3):** crashed prior run leaves dst with partial docs; new run with `op_type: create` (auto-injected) skips them safely, no double-write
+- (j) **LockMaxLifetime cancellation contract (PM-12):** simulated long-running migration that exceeds `LockMaxLifetime` aborts the in-flight statement, skips ledger write, surfaces `MigrationLockExpiredException`
+- (k) **Lock primary-shard contention (PA-2):** N concurrent `CreateLockAsync` invocations against the same lock index; assert lock-index settings include `number_of_replicas: 0`; assert tail latency for losers is bounded
+- (l) ~~Templating JSON-context~~ — **REMOVED** per ADR-0016. Slot reserved for a future cross-cutting test if templating is reintroduced.
+- (m) **Ledger refresh budget (R-07 / PA-1):** 100-migration bootstrap completes within budget against 3-node Testcontainers cluster
+- (n) **Partial-rollback ledger state (R-19 / NF-5):** rollback statement N fails after N+1..M succeeded → ledger has `status: partially_rolled_back` with `failedStatementIndex: N`; subsequent runs require `--force-resume`
+- (o) **`MIGRATE INDEX` composite (R-30):** end-to-end test asserts the composite verb produces identical end-state to the hand-composed `CREATE INDEX` + `REINDEX` + `ALIAS SWAP` sequence (cluster state diff is empty); also asserts `WITH TEMPLATE` resolves to the same body as the template's `template` block
+
+**Otherwise:** Each scenario has a single named test with clear assertions; failures surface the specific assertion that failed, not just "test failed"
+
+**Depends on:** R-24a
+**Priority:** Must — this is the "production-capable" gate
+**Confidence:** Medium (some scenarios like 429 simulation need infra choices made)
+
+### Distribution & Production Readiness
+
+#### R-26: Runner project follows the existing per-provider pattern
+
+**Actor:** Operator deploying migrations as a standalone executable
+**Intention:**
+- *Immediate:* Operators run migrations the same way they run Aerospike/Couchbase/MongoDB/Postgres migrations
+- *Outcome:* No special-casing in deploy pipelines per provider
+- *Metric:* The same Helm chart / Dockerfile / Octopus deploy template works for OpenSearch by swapping the package
+
+**Friction today:**
+- Current: Existing providers ship `runners/Hyperbee.MigrationRunner.` projects; OpenSearch must match
+- Failure mode: Diverging from the runner pattern fragments operator muscle memory
+- Frequency: Every deploy
+
+**Given:** A `runners/Hyperbee.MigrationRunner.OpenSearch` project exists
+**When:** Operator runs the binary with standard configuration (appsettings.json + env overrides)
+**Then:** Runner reads connection details, profile, target version, and locking from `IConfiguration`; binds to `OpenSearchMigrationOptions` per ADR-0006; loads embedded migration assemblies; invokes `MigrationRunner.RunAsync`; exits with non-zero on failure and zero on success
+**Otherwise:** Runner produces structured JSON logs (matching the Aerospike runner) suitable for log aggregation; emits a final summary of applied/skipped/failed migrations
+
+**Depends on:** R-22
+**Priority:** Must
+**Confidence:** High
+
+#### R-27: Samples project demonstrates every v1 verb
+
+**Actor:** New adopter or PR reviewer
+**Intention:**
+- *Immediate:* Authors can copy-paste a sample for any operation
+- *Outcome:* Adoption time measured in minutes, not hours
+- *Metric:* Each verb in R-08a appears in at least one sample migration with a meaningful body
+
+**Given:** A `runners/samples/Hyperbee.Migrations.OpenSearch.Samples` project exists
+**When:** Adopter browses samples
+**Then:** Samples include (a) initial index creation with mapping and settings, (b) alias swap zero-downtime reindex (hand-composed), (c) ISM policy creation and attachment, (d) component template + composable index template pattern, (e) bulk seed of N docs, (f) conditional migration via `WHEN VERSION`, (g) rollback example for a reversible operation, (h) templating example with environment-specific values, (i) **`MIGRATE INDEX` composite verb (R-30) — the recommended pattern for propagating template/mapping changes to existing data**, (j) `UNSAFE("...")` and `NO WAIT("...")` justification idioms with the syntactic enumeration of operations requiring them
+**Otherwise:** Each sample is a runnable migration class with a comment explaining the production scenario it demonstrates. Sample (i) is featured prominently in the README as the answer to "how do I apply template changes to existing data?"
+
+**Depends on:** R-08a, R-19, R-26
+**Priority:** Should
+**Confidence:** High
+
+#### R-28: Multi-topology validation: single-node, multi-node, AWS Managed OpenSearch
+
+**Actor:** CI pipeline + manual validation cycle
+**Intention:**
+- *Immediate:* Provider works on the topologies real teams use, not just CI single-node
+- *Outcome:* Production deploys to AWS Managed OpenSearch and on-prem multi-node clusters succeed without surprises
+- *Metric:* Documented test results against each topology before each release
+
+**Friction today:**
+- Current: Tools tested only against single-node fail in subtle ways on multi-node (replica allocation, cluster state propagation, refresh timing, SigV4 auth path)
+- Failure mode: Production-only bugs (yellow vs green hardcoding; SigV4 auth misconfiguration; replica allocation timeouts)
+- Frequency: First production deploy of every release
+
+**Given:** Three target topologies are recognized: (a) single-node Testcontainers (CI default), (b) multi-node (3-node) Testcontainers Compose for replica behavior, (c) AWS Managed OpenSearch domain with SigV4 auth (scheduled CI cycle)
+**When:** Release validation runs
+**Then:**
+- Topology (a) and (b) are **fully automated in CI on every PR** — multi-node is no longer optional; OpenSearch's Docker image runs as a 3-node cluster trivially via Testcontainers `INetwork` + `discovery.seed_hosts` + `cluster.initial_master_nodes`. Topology (b) verifies: green-threshold behavior, replica allocation, shard relocation during `ALIAS SWAP`, the lock index `number_of_replicas: 0` setting prevents replica-write coupling under concurrent acquire (PA-2)
+- Topology (c) is a scheduled validation (e.g., nightly or pre-release) with a runbook covering the smoke-test verbs (R-08a), SigV4 connectivity, and ISM endpoint capability probing (R-21)
+
+**Otherwise:** When AWS Managed validation cannot be reached in scheduled CI (no AWS account credentials available), this is logged on the release checklist as "deferred"; manual validation results are recorded in the release notes
+
+**Depends on:** R-21, R-24a
+**Priority:** Must (a, b — both CI-automated); Should (c — scheduled)
+**Confidence:** High (multi-node Compose is well-supported by Testcontainers-dotnet)
+
+### Observability
+
+#### R-25: Structured logging at key state transitions, with secret scrubbing
+
+**Given:** Standard ILogger is configured
+**When:** Provider runs
+**Then:**
+- DEBUG: every statement compiled and dispatched; Tasks API per-poll progress
+- INFO: bootstrapper state transitions, lock acquired/renewed/released, each migration start/end with duration, Tasks API percentage thresholds (10/25/50/75/90%), Tasks API backoff transitions, **startup banner emitting all resolved defaults** (`Profile`, `ClusterHealthThreshold`, `WaitMode`, `RequireUnsafeJustification`, `ContextResolutionPolicy`, `ActiveContext`, rollback enabled/disabled, lock parameters)
+- WARN: 429 retries (with batch size and retry count), lock takeover events, slow waits, structured `migration.unsafe_bypass` and `migration.no_wait` events with justification reasons
+- ERROR: parse failures (with file/index/recognized-verb-so-far), lock conflicts, task errors, `MigrationLockExpiredException`
+- Correlation includes migration id and task id where applicable
+
+**Otherwise:** Per ADR-0016, the provider does not ship a `SecretScrubber` log sink. If host applications need value-coupled redaction (e.g., scrubbing connection-string passwords from logs), that is configured at the Serilog/ILogger sink level — applied uniformly across all five providers, not provider-specific. MD-15 is no longer in scope here.
+
+**Priority:** Must (was Should — promoted because the startup banner closes operator-visibility gaps)
+**Confidence:** High
+
+## Constraints
+
+- **Compatibility with ADRs 0001-0010:** Must comply or supersede explicitly. No requirement currently supersedes any ADR.
+- **Client packages:** OpenSearch.Client 1.8+ and OpenSearch.Net 1.8+; AWS SigV4 via optional package
+- **TFM:** net8.0 / net9.0 to match the rest of Hyperbee.Migrations
+- **License:** Apache 2.0 compatible
+- **Async-only API surface** (matches existing providers)
+- **Cancellation:** `CancellationToken` propagates from runner through all async paths
+- **No file-level templating** (ADR-0016) — env-variation through typed options + `IConfiguration`, matching all other providers
+- **Parser:** Parlot (ADR-0001) — non-negotiable house standard; no alternative parser permitted
+- **No external lock dependency** (Redis/etcd) — must be OpenSearch-native (ADR-0005)
+- **Minimum cluster version:** OpenSearch 2.0+ (decide on legacy ES support — see Open Questions)
+
+## Trust Boundaries
+
+**Autonomous** (provider acts without human approval):
+- Acquire and renew the migration lock; take over a stale lock that exceeds `LockStaleAfter` after **realtime GET verification** (R-05)
+- Apply migrations in version order
+- Skip statements gated by `IF [NOT] EXISTS` or `WHEN` conditions (subject to `ContextResolutionPolicy`)
+- Inject `dynamic: strict` into flat managed-index mappings (NOT into `composed_of` bodies — R-17)
+- Inject `op_type: create` into `REINDEX` request bodies by default (R-08a)
+- Poll Tasks API and surface progress
+- Atomic alias swap as a single `_aliases` request with in-body precondition (R-16)
+- Emit the startup banner with resolved configuration defaults (R-25)
+- Cancel the in-flight migration's `CancellationToken` when `LockMaxLifetime` is reached (R-05)
+
+**Escalate** (caller decides):
+- Lock contention (`MigrationLockUnavailableException`) — caller chooses retry or bail
+- Bootstrapper timeout — caller chooses to fail the deploy or retry later
+- 409 on ledger write — caller bails (concurrent runner detected)
+- `MigrationLockExpiredException` (max-lifetime hit mid-migration) — caller decides to retry after operator review
+- Partial-rollback recovery (`status: partially_rolled_back`) — operator must invoke `--force-resume` after reconciling cluster state
+
+**Forbidden** (provider never does):
+- Run migrations without acquiring the lock (when locking is enabled)
+- Bypass parse-time unsafe-operation detection silently (must require `UNSAFE("")` opt-in with non-empty reason)
+- Bypass implicit waits silently (must require `NO WAIT("")` opt-in with non-empty reason under `WaitMode = PerStatement`)
+- Auto-generate inverse operations (rollback is opt-in only)
+- Modify the migration ledger index mapping after creation (immutable per R-06)
+- Silently apply `context`-gated migrations when `ActiveContext` is unset under `ContextResolutionPolicy = RequireExplicit` (R-15)
+- Log secret values from any scope — value-coupled scrubbing applies regardless of source (R-10, R-25)
+- Run two `MigrationRunner.RunAsync` calls concurrently within a single process
+- Take over a lock based on search-staleness alone (must verify via realtime GET — R-05)
+- Execute a `REINDEX` without `op_type: create` unless explicit `REINDEX UNSAFE("") FROM ...` is used (R-08a, R-18)
+- Inject `dynamic: strict` into a body with `composed_of` (must defer to component template — R-17)
+
+## Out of Scope
+
+- **OpenSearch Dashboards saved objects** — different host/port; use Dashboards' own export API
+- **k-NN, ML connectors, anomaly detection plugin objects** — ecosystem extras for v1
+- **Remote reindex (`reindex.remote.allowlist`)** — supported as a body verbatim pass-through but no provider-level allowlist management
+- **Auto-generated rollbacks** — too dangerous; rollback is opt-in only
+- **Multi-cluster migration orchestration** — one cluster per provider instance
+- **Snapshot repository plugin installation** — repos are pre-existing; provider configures, does not install
+- **Pre-OpenSearch Elasticsearch 7.x and earlier** — see Open Questions
+- **Schema diffing or auto-generated migrations** — out of band; teams write migrations manually
+
+## Decisions & Open Questions
+
+### Decided
+
+- **Hybrid Parlot grammar over opaque JSON bodies** — *rationale:* matches Couchbase/Aerospike/MongoDB house style and ADR-0001/ADR-0002. *Influences:* R-08, R-08a, R-09
+- **Sibling `$name` body references over inline JSON strings** — *rationale:* eliminates quote-escaping; real JSON tooling can format and lint. Reserved Parlot identifiers (`$body`, `$query`, `$script`) and reserved templating scope names (`env`, `config`, `runtime`, `secrets`) cannot collide. *Influences:* R-09
+- **No file-level templating engine (ADR-0016)** — *rationale:* matches house style of all other providers; env-variation via typed `OpenSearchMigrationOptions` + `IConfiguration` is sufficient for substitution; conditional/iteration in resource files is speculative and can be added later via a superseding ADR if a real use case emerges. *Influences:* R-10 (struck), R-25 (amended)
+- **Auto-renewing lock heartbeat ported from Aerospike, with realtime-GET takeover and explicit max-lifetime cancellation contract** — *rationale:* OpenSearch refresh-lag invalidates pure search-based staleness checks; max-lifetime must abort, not warn. *Influences:* R-04, R-05
+- **Ledger lives in OpenSearch itself** — *rationale:* operational simplicity (one system to back up); ADR-0005 prefers provider-native. Strict mapping is immutable; forensic fields (`appliedBy`, `direction`, `failedStatementIndex`) MUST land before v1. *Influences:* R-06, R-07
+- **Implicit + explicit wait grammar with `WaitMode` enum (PerStatement / PerMigration / Off)** — *rationale:* default robustness without N+1 master storms; PerMigration is production default. Implicit waits scope to the mutated index by default. *Influences:* R-12, R-13
+- **Optional best-effort rollback with explicit partial-rollback ledger semantics** — *rationale:* most NoSQL operations are not safely reversible; partial-rollback failure mid-sequence requires `partially_rolled_back` state and `--force-resume` recovery. *Influences:* R-19, R-06
+- **`WithProductionDefaults()` extension method instead of an environment enum** — *rationale:* discoverable in IntelliSense, grep-able in code review, no hidden coupling. Replaces an earlier `EnvironmentProfile` proposal that was rejected during assessment for hidden-coupling concerns. *Influences:* R-03, R-12, R-15, R-18, R-29
+- **`Yellow` SDK default health threshold; `Green` via `WithProductionDefaults()`** — *rationale:* dev/CI single-node clusters cannot reach Green; safer default for SDK while production explicitly opts in. *Influences:* R-03, R-29
+- **`UNSAFE("")` and `NO WAIT("")` modifiers require non-empty reasons** — *rationale:* MD-2/MD-11 single-token bypasses are silent in PR review; justification token gives high-signal grep target. *Influences:* R-12, R-18, Trust Boundaries
+- **`op_type: create` auto-injected into `REINDEX` bodies by default (parser-level, opt-out via `REINDEX UNSAFE`)** — *rationale:* same precedent as R-17 dynamic-strict injection; sample-based fix to a laziest-path correctness hazard is anti-pattern. *Influences:* R-08a
+- **Component-template-aware `dynamic: strict` injection (skipped when body has `composed_of`)** — *rationale:* layered mappings; injection at index level clobbers component-level `dynamic: false`. *Influences:* R-17
+- **`ALIAS SWAP` precondition is in-body, not a separate GET** — *rationale:* eliminates TOCTOU window; cluster atomically rejects entire body. *Influences:* R-16
+- **Semantic version comparison for `WHEN VERSION`** — *rationale:* string compare returns wrong answer on `'2.9' < '2.10'`; correctness gap, not future concern. *Influences:* R-15a
+- **`ActiveContext` option as source-of-truth for context filter; `ContextResolutionPolicy.RequireExplicit` in production** — *rationale:* silent-skip and silent-run are both worse than fail-loud; production must require explicit context. *Influences:* R-15
+- **No provider-shipped secret scrubber (ADR-0016)** — *rationale:* with templating removed, the secret-leakage risk class shrinks dramatically (no JSON-rendering pathway). Option-value redaction in logs (if needed) belongs at the Serilog/ILogger sink layer, applied uniformly across all providers. *Influences:* R-10 (struck), R-25 (amended)
+- **Multi-node Testcontainers Compose CI is Must, not Should** — *rationale:* Green-threshold and replica-allocation behaviors are never exercised on single-node; OpenSearch image runs as 3-node cluster trivially. *Influences:* R-28
+- **Testcontainers OpenSearch image pinned by sha256 digest** — *rationale:* "2.x latest" is mutable; CI silently picks up new image, prod runs older cluster, behavior diverges. *Influences:* R-24a
+- **Lock index `number_of_replicas: 0`** — *rationale:* eliminates replica-write coupling on the lock primary shard under N concurrent runners (PA-2). *Influences:* R-04
+- **AWS endpoint loud-fail + ISM endpoint capability detection** — *rationale:* MD-6/PM-2/PM-6 are caught at startup with the exact remediation snippet, not silently in production. *Influences:* R-21
+- **Tasks API per-poll progress logged at DEBUG, INFO only on percentage thresholds** — *rationale:* PA-4 log flood for long reindexes. *Influences:* R-11, R-25
+- **`MIGRATE INDEX` composite verb encoding the canonical reindex-and-swap pattern** — *rationale:* template/mapping changes do not propagate to existing data in OpenSearch; sample-only documentation is anti-pattern (assessment 0002 meta-finding). The composite verb makes the safe pattern the lazy path. *Influences:* R-08a, R-30, R-27
+
+### Open
+
+- **Legacy Elasticsearch 7.x support** — Status: deferred. Reason: API surface is identical to OpenSearch 1.x but the package and license differ. Leaning: NOT in v1 — keep this OpenSearch-specific; add a sibling `Elasticsearch` provider later if demand exists. Depends on: user/maintainer call. Influences: client package choice in Constraints.
+- **Snapshot/restore as v1 verbs** — Status: deferred. Reason: snapshot repos require pre-existing config; long-running operations stress the warmup model. Leaning: include `WAIT UNTIL TASK` infrastructure in v1 (R-11) and add `SNAPSHOT`/`RESTORE` verbs in v1.1. Depends on: scope decision. Influences: verb set in R-08a.
+- **Security-plugin objects (roles, role mappings) as v1 verbs** — Status: deferred. Reason: requires admin-cert auth which complicates DI; tenant model is a separate design problem. Leaning: not in v1. Depends on: scope decision. Influences: verb set in R-08a, Out of Scope.
+- **Semantic unsafe-operation detection (R-18 deep tier)** — Status: deferred. Reason: requires reading live mapping/index state at parse or pre-execute time; semantic understanding of query effects is a research project. Leaning: ship syntactic enumeration in v1; semantic detection in v1.1 if real-world incidents justify. Depends on: post-v1 incident telemetry. Influences: R-18.
+- **`WHEN VERSION` long-tail suffix support** — Status: deferred. Reason: AWS `OpenSearch_` prefix and `-rc` / `-SNAPSHOT` qualifiers will need normalization rules as they appear in real clusters. Leaning: ship clean `MAJOR.MINOR.PATCH` + documented suffix rules in v1; tighten as needed. Depends on: production diversity. Influences: R-15a.
+- **AWS Managed OpenSearch CI automation** — Status: deferred (Should). Reason: requires AWS account scaffolding and credentials in CI. Leaning: scheduled validation runbook in v1; full automation v1.1+. Depends on: project AWS account access. Influences: R-28.
+- **JSON Schema for `statements.json` (IDE help)** — Status: deferred. Reason: nice-to-have IDE ergonomics; not blocking correctness. Leaning: v1.1. Depends on: adopter feedback. Influences: R-08, R-09.
+- **Topology-aware bulk-load parallelism** — Status: deferred. Reason: PA-6 says default 8x parallelism saturates small-node thread pools and triggers self-induced 429s. Leaning: ship with conservative defaults documented; add adaptive tuning in v1.1. Depends on: real-cluster benchmarks. Influences: R-20.
+- **OpenSearch.Client v2 / cluster 3.x compatibility** — Status: monitor. Reason: PM-8 says client may go stagnant against 3.x clusters. Leaning: track upgrade cadence; canary against `next-major` Testcontainers image; bump pinned image when 3.x ships. Depends on: OpenSearch project release schedule. Influences: R-24a, Constraints.
+
+## Recommended next steps
+
+1. **`/nop:propose`** to evaluate concrete implementation strategies against these requirements as fitness criteria. The remaining tensions (Open Questions) are mostly scope decisions; the load-bearing implementation choices to evaluate are: (a) parser-level injection vs runtime middleware for `op_type: create` and `dynamic: strict`; (b) lock-index initialization (provision-on-demand vs explicit options); (c) `WithProductionDefaults()` implementation (extension method vs builder pattern); (d) bootstrapper architecture (state machine like Couchbase vs simpler async sequence).
+2. **`/nop:plan`** once propose selects a winner.
diff --git a/docs/research/0001-opensearch-provider.md b/docs/research/0001-opensearch-provider.md
new file mode 100644
index 0000000..a8b1604
--- /dev/null
+++ b/docs/research/0001-opensearch-provider.md
@@ -0,0 +1,400 @@
+# Research: OpenSearch Provider for Hyperbee.Migrations
+
+**Date:** 2026-05-02
+**Status:** Draft
+**Author:** Brenton Farmer (with research agents)
+**Related:** Future ADRs for OpenSearch provider design
+
+## Purpose
+
+Scope a new OpenSearch provider for Hyperbee.Migrations. The library currently ships providers for Aerospike, Couchbase, MongoDB, and Postgres. The user identified three concern areas requiring deep investigation before design:
+
+1. **Resource migrations** — how OpenSearch's JSON-heavy artifacts (mappings, settings, templates, ISM policies) map to the existing `statements.json` + Parlot grammar pattern
+2. **Template management** — variable substitution across environments
+3. **Async/sync and warmup concerns** — Aerospike's special index-ready polling and Couchbase's complex bootstrapper as baselines for OpenSearch's cluster-health and Tasks API
+
+This document captures the research synthesis. It does not commit to an implementation; that is the role of the follow-on `nop:propose` evaluating concrete grammar/architecture options.
+
+---
+
+## 1. Existing Provider Patterns (In-House Prior Art)
+
+### 1.1 Core contract
+
+[MigrationRunner](../../src/Hyperbee.Migrations/MigrationRunner.cs) orchestrates: `InitializeAsync` → `CreateLockAsync` (returns IDisposable) → reflection discovery → sequential `UpAsync`/`DownAsync` → journal `WriteAsync`/`DeleteAsync`.
+
+[IMigrationRecordStore](../../src/Hyperbee.Migrations/IMigrationRecordStore.cs) defines seven methods total. [MigrationRecord](../../src/Hyperbee.Migrations/MigrationRecord.cs) is minimal: `{ Id, RunOn }`. The runner is stateless; the store holds all state.
+
+All providers implement `IMigrationRecordStore` directly and inherit from `MigrationOptions`. ADR-0003 formalizes this contract; ADR-0006 formalizes the options hierarchy.
+
+### 1.2 Resource migrations
+
+The convention across NoSQL providers (ADR-0002):
+
+```json
+{ "statements": [ { "statement": "..." } ] }
+```
+
+| Provider | Statement language | Document loader | Grammar |
+|-----------|-----------------------------|-----------------------|---------|
+| Aerospike | Subset of AQL | `DocumentsFromAsync` | Parlot |
+| Couchbase | Partial N1QL | `DocumentsFromAsync` | Parlot |
+| MongoDB | Mongo shell-like commands | `DocumentsFromAsync` | Parlot |
+| Postgres | Raw SQL files (no parsing) | None (procedural) | None |
+
+Resource discovery is via embedded assembly resources, addressed by `Migration.VersionedName()` (ADR-0009).
+
+### 1.3 Templating
+
+**No provider currently uses templating.** Hyperbee.Templating exists in-house with `{{name}}`, `{{x => x.Foo()}}`, `{{#if}}`/`{{/if}}`, `{{each}}`/`{{while}}`, and `{{name:value}}` syntax — but no migration provider has wired it in. Substitution is currently done from typed options at runtime (e.g., `_options.Namespace` in Aerospike). OpenSearch will be the first provider to require true file-level templating because mappings, replica counts, analyzer chains, and ISM policy values vary across environments.
+
+### 1.4 Statement grammar
+
+Three providers use Parlot (ADR-0001) for partial DSLs. Each grammar:
+
+- **Aerospike**: `CREATE INDEX [IF NOT EXISTS] [RECREATE] [WAIT] name ON ns.set (bin) [STRING|NUMERIC|GEO2DSPHERE]`, `DROP INDEX ns indexname`, `CREATE SET`, `INSERT INTO`, `DELETE FROM`
+- **Couchbase**: `CREATE BUCKET ... TYPE ... RAMQUOTA ... FLUSH ENABLED ... REPLICAS`, `CREATE [PRIMARY] INDEX`, `CREATE SCOPE`, `CREATE COLLECTION`, `BUILD INDEX ON`, `UPDATE ... SET`, `DROP {BUCKET|SCOPE|COLLECTION}`
+- **MongoDB**: `CREATE COLLECTION`, `DROP COLLECTION`, `CREATE [UNIQUE] INDEX name ON db.collection(field, ...)`, `DROP INDEX name ON db.collection`
+
+All grammars are deliberately partial — they recognize verb prefixes; everything past that point is passed through to the database client. This is the key idea worth replicating for OpenSearch: thin shell over opaque payloads.
+
+### 1.5 Async/sync model
+
+All record store methods are `async Task`. Cancellation tokens thread through runner → store → resource runners. Timeouts use a custom [TimeoutTokenSource](../../src/Hyperbee.Migrations/Wait/TimeoutTokenSource.cs) + linked CTS pattern.
+
+### 1.6 Warmup / readiness
+
+Spectrum across the four providers:
+
+| Provider | Warmup style |
+|-----------|------------------------------------------------------------------------------------------------------------|
+| Postgres | None; `InitializeAsync` creates schema + table inline |
+| MongoDB | None; just acquires the database handle |
+| Aerospike | Per-operation: `WaitForIndexReadyAsync` polls `sindex//` info command, 500ms→5s exponential, 60s default |
+| Couchbase | 7-state bootstrapper: REST ping → cluster healthy → 5s settle → `WaitUntilReadyAsync` → bucket ready → sacrificial query |
+
+Couchbase is the most complex by a wide margin and is the closest behavioral analog for OpenSearch (multi-node cluster, eventual consistency on metadata, "ready vs healthy" distinction). [WaitHelper.WaitUntilAsync](../../src/Hyperbee.Migrations/Wait/WaitHelper.cs) + [PauseRetryStrategy](../../src/Hyperbee.Migrations/Wait/RetryStrategy.cs) (ADR-0008) are the reusable primitives.
+
+### 1.7 Distributed locking
+
+| Provider | Lock pattern |
+|-----------|-----------------------------------------------------------------------------------------------|
+| Aerospike | CAS `Put` with `RecordExistsAction.CREATE_ONLY` + TTL + background `Touch` renewal loop using `TimeProvider` |
+| Couchbase | `RequestMutexAsync` + `AutoRenew()` from `Couchbase.Extensions.Locks` |
+| MongoDB | Document with `LockedOn`/`ReleaseOn` timestamps; manual expiry check; no renewal |
+| Postgres | Dedicated `ledger_lock` row with `release_on`; manual expiry check; no renewal |
+
+The Aerospike auto-renewing lock (recently shipped) is the freshest and most robust pattern. ADR-0005 documents the provider-native locking decision. The Aerospike pattern translates directly to OpenSearch via `_seq_no`/`_primary_term` CAS — there is no native lock primitive in OpenSearch, and no .NET library provides one.
+
+### 1.8 Migration record stores
+
+| Provider | Storage | Lock storage |
+|-----------|--------------------------------------|-----------------------------------------|
+| Aerospike | Set `SchemaMigrations`, key=record id, bins `Name`/`ExecutedAt` | Same set, key `migration_lock` |
+| Couchbase | Bucket `ledger`, scope `migrations`, collection `ledger` | Same collection, doc id = lock name |
+| MongoDB | Database `migration`, collection `ledger` | Same collection, fixed id `1` |
+| Postgres | Schema `migration`, table `ledger` | Separate table `ledger_lock` |
+
+### 1.9 DI shape
+
+```csharp
+services.AddXxxMigrations( options => {
+ options.Assemblies.Add( typeof(MyMigration).Assembly );
+ options.LockingEnabled = true;
+} );
+```
+
+Options factory binds `IConfiguration` (`Migrations:FromAssemblies`, `Migrations:FromPaths`) merged with the lambda. `IMigrationRecordStore` and `MigrationRunner` register as singletons; resource runner is generic transient.
+
+### 1.10 Testing
+
+[Hyperbee.Migrations.Integration.Tests](../../tests/Hyperbee.Migrations.Integration.Tests/) uses Testcontainers per ADR-0010. Pattern: spin container, embed migrations in test assembly, run as subprocess, capture logs, assert database state. Testcontainers ships an OpenSearch image — the same pattern applies.
+
+---
+
+## 2. OpenSearch as a Migration Target
+
+### 2.1 .NET clients (state of the world, 2026)
+
+| Aspect | OpenSearch.Net (low-level) | OpenSearch.Client (high-level) |
+|-----------------|-----------------------------------|----------------------------------|
+| Forked from | Elasticsearch.Net | NEST |
+| Role | Transport, raw request/response | Strongly-typed POCOs, fluent DSL |
+| Version | 1.8.0 stable | 1.8.0 stable |
+| TFMs | netstandard2.0 + net6.0 | netstandard2.0 + net4.6.1 |
+| License | Apache 2.0 | Apache 2.0 |
+| Async | Every method has `*Async` | Every method has `*Async` |
+
+Forked from Elastic 7.10.2 in 2021. There is no v8 rewrite; `main` continues 2.0.0 development on the same surface area. API is essentially identical to NEST 7 — NEST documentation and StackOverflow knowledge transfers.
+
+Auth: basic auth, API key, mTLS, fine-grained security plugin via the high-level client. AWS SigV4 via separate package `OpenSearch.Net.Auth.AwsSigV4`.
+
+### 2.2 Migratable artifacts
+
+| Artifact | API | Idempotency | Pitfall |
+|---|---|---|---|
+| Index | `PUT /{name}` | No (errors on exists) | Static settings frozen at create |
+| Mapping update | `PUT /{idx}/_mapping` | Additive only | **Existing docs are NOT reindexed** |
+| Settings update | `PUT /{idx}/_settings` | Idempotent (dynamic only) | Static settings need close→update→open |
+| Composable index template | `PUT /_index_template/{name}` | Idempotent | Only matches future indices |
+| Component template | `PUT /_component_template/{name}` | Idempotent | Cannot delete if referenced |
+| Alias | `POST /_aliases` | Atomic across multi-action body | `is_write_index` exactly one |
+| Ingest pipeline | `PUT /_ingest/pipeline/{id}` | Idempotent | Order migrations carefully |
+| Stored script | `PUT /_scripts/{id}` | Idempotent | |
+| ISM policy | `PUT /_plugins/_ism/policies/{id}` | Update needs `if_seq_no`/`if_primary_term` | `ism_template` only matches future indices |
+| Data stream | `PUT /_data_stream/{name}` | Not idempotent | Requires backing template first |
+| Reindex | `POST /_reindex?wait_for_completion=false` | Not idempotent | 30s default sync timeout — always async |
+| Snapshot/restore | `_snapshot` APIs | Idempotent in name | Restore can't target an open index |
+| Security objects | `/_plugins/_security/api/...` | Idempotent | Requires admin role |
+| Cluster settings | `PUT /_cluster/settings` | Idempotent | Transient settings vanish on full restart |
+
+### 2.3 Async / long-running operations
+
+This is the section the user flagged as critical. **Most "structural" operations apply asynchronously inside the cluster — the HTTP call returns when the cluster master accepts the change, not when shards are allocated and ready.**
+
+Operations that return before applying:
+- `PUT /{idx}` — accepts `?wait_for_active_shards=N|all` and `?timeout=`
+- `PUT /{idx}/_settings` — dynamic instant; static needs close+update+open
+- `PUT /{idx}/_mapping` — published in cluster state; existing docs unmodified
+- `POST /_reindex` — always pass `?wait_for_completion=false` for migrations
+- `POST /{idx}/_forcemerge` — supports async
+- `_snapshot` and restore — both default async; status via `_status` and `_recovery`
+- `POST /{idx}/_close|_open` — async; triggers shard reallocation
+- `POST /{idx}/_refresh` — synchronous, cheap
+
+**The three primitives every migration must use:**
+
+1. **Tasks API** — `?wait_for_completion=false` returns `task_id`; poll `GET /_tasks/{task_id}` until `completed: true`. Cancellation via `POST /_tasks/{task_id}/_cancel`.
+2. **Cluster health** — `GET /_cluster/health?wait_for_status=yellow|green&wait_for_no_relocating_shards&timeout=` is the canonical "ready" gate. Single-node clusters can never reach green when `number_of_replicas >= 1`; threshold must be configurable.
+3. **Optimistic concurrency** — `_seq_no` + `_primary_term` for the migration ledger and lock document. 409 `version_conflict_engine_exception` is the signal another runner won.
+
+### 2.4 Warmup and consistency concerns
+
+Direct mapping of Hyperbee.Migrations' existing concerns:
+
+| Concern (existing provider) | OpenSearch analog |
+|---------------------------------------------------|----------------------------------------------------------------------------------|
+| Aerospike: wait for index ready | Wait for cluster health + active shards after `PUT /{idx}` |
+| Couchbase: bucket warmup | Wait for cluster status `yellow` (or `green`) after structural changes |
+| Couchbase: sacrificial query post-warmup | Optional `_refresh` on managed indices; `wait_for` on critical writes |
+| All: index visibility post-create | 1s default refresh interval; use `?refresh=wait_for` for read-after-write tests |
+
+Specific gotchas:
+- Mapping changes do NOT reindex existing docs.
+- Static settings (`number_of_shards`, `analysis.*`, codec) require close/open — destructive to writes.
+- Aliases switching during reindex is the canonical zero-downtime pattern (atomic multi-action `_aliases` body).
+- ISM policy attachment to existing indices is a separate `POST /_plugins/_ism/add` step beyond `ism_template`.
+
+### 2.5 Existing migration tools (prior art)
+
+| Tool | Lang | Format | State | Lock | Notable |
+|---|---|---|---|---|---|
+| [senacor/elasticsearch-evolution](https://github.com/senacor/elasticsearch-evolution) | Java | `.http` files | Internal index, checksum-on-replay | Lock-doc | Flyway-style; closest to "ready to use" |
+| [babenkoivan/elastic-migrations](https://github.com/babenkoivan/elastic-migrations) | PHP | PHP class up/down | Laravel migration table | Laravel | Mixes ES with external state DB |
+| [hubrick/elasticsearch-migration](https://github.com/hubrick/elasticsearch-migration) | Java | YAML with verb enum | Internal index | — | Closest prior art to a typed-statement DSL |
+| [quandoo/elasticsearch-migration](https://github.com/quandoo/elasticsearch-migration) | Java | YAML changesets | Internal index | — | |
+| [liquibase-opensearch](https://github.com/liquibase/liquibase-opensearch) | Java | Liquibase changelog with one `httpRequest` change type | Liquibase changelog table | Liquibase | Concedes abstraction; pure pass-through |
+| [zobayer1/elastic-migrate](https://github.com/zobayer1/elastic-migrate) | Python | JSON config | — | — | Small CLI |
+| [medcl/esm](https://github.com/medcl/esm) | Go | CLI flags | — | — | Pure data-mover, not schema-migration |
+
+**No widely-used .NET-native ES/OpenSearch migration library exists.** Thomas Ardal's [NEST migration pattern](https://thomasardal.com/elasticsearch-migrations-with-c-and-nest/) is a 2018 blog example, not a packaged library. This OpenSearch provider would fill a real gap.
+
+### 2.6 State / metadata index
+
+Recommended baseline:
+- One index, doc-per-migration, keyed by migration id (e.g., `1000.m1000-createindex`)
+- `dynamic: strict` mapping — typo-proof
+- Update with `if_seq_no`/`if_primary_term` — concurrent runners get clean 409
+- Index ledger writes with `?refresh=wait_for` — ledger is tiny, cost is irrelevant
+
+### 2.7 Distributed locking
+
+There is **no native lock primitive** in OpenSearch and **no .NET library** implements one. (The Java OpenDistro `LockService` is internal, used by ISM, not a public client API.) Practical options:
+
+1. **Lock-doc with explicit heartbeat** — owner periodically updates `last_heartbeat` with `if_seq_no`. Takeover requires staleness check + CAS overwrite. Mirrors Aerospike auto-renewing pattern.
+2. **Lock-doc with TTL via ISM** — ISM policy deletes docs older than N minutes. Same renewal-vs-TTL race as Aerospike.
+3. **External lock (Redis/etcd/ZooKeeper)** — clean semantically; biggest dependency cost.
+
+Option 1 (heartbeat CAS) is the recommendation. Aerospike's `LockHandle` design is directly portable.
+
+### 2.8 Resource file conventions
+
+Three live patterns in the wider ecosystem:
+- Raw HTTP method + path + JSON body (elasticsearch-evolution)
+- Typed verbs over JSON bodies (hubrick) ← closest to in-house Couchbase pattern
+- Pure C# fluent (Mongock-style)
+
+Templated mappings with `{{var}}` substitution are mandatory for any real-world tool — index names, replica counts, and analyzer chains differ across environments.
+
+---
+
+## 3. Statement Grammar Considerations
+
+### 3.1 Granularity
+
+The Couchbase pattern (one DSL statement per logical operation; multiple statements per migration class) is sound prior art:
+- One DSL block per migration would force authors to invent intra-block sequencing
+- One statement per migration would force class proliferation
+- The `statements[]` array is the unit; each element is one verb invocation
+
+### 3.2 JSON embedding
+
+OpenSearch payloads are large and almost always JSON. Strategies:
+
+| Strategy | Used by | Pros | Cons |
+|---|---|---|---|
+| Inline string in `"body"` | liquibase-opensearch | Simple, one file | Quote-escaping hell |
+| Heredoc/folded YAML | Liquibase YAML | Readable | YAML quirks |
+| `.http` file with blank-line body | elasticsearch-evolution | Best readability | Custom file format |
+| External `bodyFile` reference | (rare) | Clean | Two-file lookup |
+| **Sibling JSON object referenced by `$name`** | (proposed) | Real JSON tooling, no escaping | Slightly novel |
+
+The proposal: keep the `statements.json` wrapper; each statement object can carry inline `body` as a sibling JSON object referenced by `WITH BODY $name`. Mirrors SQL parameters; avoids quote escaping.
+
+```json
+{
+ "statement": "CREATE INDEX `users-v2` WITH BODY $usersIndex",
+ "usersIndex": { "settings": { "number_of_shards": 2 }, "mappings": { ... } }
+}
+```
+
+### 3.3 Templating
+
+Wire Hyperbee.Templating (existing in-house) for the first time. Render the entire wrapper before parse. Recommended scopes:
+- **env** — process env vars (`{{env.NODE_ENV}}`)
+- **config** — IConfiguration values
+- **runtime** — current migration name, version, timestamp, target cluster
+- **secrets** — separate scope so secrets can be redacted in logs
+
+Distinguish template-time `{{#if}}` (controls whether the statement string exists at all) from grammar-time `WHEN VERSION > '...'` (runtime check against live cluster). Both are valuable; do not conflate.
+
+### 3.4 Verb set
+
+| Verb | Maps to | Notes |
+|---|---|---|
+| `CREATE INDEX [IF NOT EXISTS] WITH BODY $body` | `PUT /{name}` | Idempotency marker |
+| `DROP INDEX [IF EXISTS]` | `DELETE /{name}` | |
+| `UPDATE MAPPING ON WITH BODY $body` | `PUT /{idx}/_mapping` | Reject unsafe changes at parse |
+| `UPDATE SETTINGS ON [CLOSE] WITH BODY $body` | `PUT /{idx}/_settings` | Explicit `CLOSE` for static |
+| `REINDEX FROM TO [WITH BODY $body] [WAIT FOR COMPLETION true\|false]` | `POST /_reindex?wait_for_completion=false` + Tasks API poll | Always async by default |
+| `ALIAS SWAP FROM TO ` | One atomic `POST /_aliases` body | Killer feature |
+| `ALIAS ADD ON ` / `ALIAS REMOVE ON ` | `POST /_aliases` | |
+| `CREATE TEMPLATE WITH BODY $body` | `PUT /_index_template/{name}` | |
+| `CREATE COMPONENT WITH BODY $body` | `PUT /_component_template/{name}` | |
+| `CREATE POLICY WITH BODY $body` | `PUT /_plugins/_ism/policies/{id}` | |
+| `APPLY POLICY TO ` | `POST /_plugins/_ism/add` | |
+| `WAIT FOR [ON ] [TIMEOUT ]` | `GET /_cluster/health?wait_for_status=...` | First-class wait |
+| `WAIT UNTIL TASK COMPLETE [TIMEOUT ]` | `GET /_tasks/{id}` poll | First-class wait |
+| `REFRESH ` | `POST /{name}/_refresh` | |
+
+### 3.5 Async/wait grammar
+
+Two models: implicit (Cassandra cqlmigrate auto-waits for schema agreement) vs explicit (`WAIT FOR ...` is its own verb). Recommendation: **both**. Default implicit `WAIT FOR YELLOW TIMEOUT 30s` after `CREATE INDEX`/`REINDEX`/`ALIAS SWAP`/`UPDATE SETTINGS`/`APPLY POLICY`, configurable. Explicit `WAIT FOR` available for stronger guarantees or async-task waits.
+
+### 3.6 Conditional execution
+
+Liquibase preconditions are gold standard. Minimum useful set:
+- `IF EXISTS ` / `IF NOT EXISTS ` — live cluster state
+- `IF VERSION > ''` — cluster version
+- `IF CONTEXT IN (prod, staging)` — Liquibase-style env tags
+- Wrapper-level `context` array filters whole migration
+
+### 3.7 Rollback
+
+OpenSearch reality:
+- Index creation has clean inverse (delete)
+- Mapping changes are largely one-way
+- Reindex reversible only if source kept
+- ISM policies have inverses
+- Alias swaps trivially reversible
+
+Recommendation: optional `rollback` block per statement (Liquibase-style), documented as best-effort. Don't auto-generate rollbacks. Don't pretend mapping changes are reversible.
+
+### 3.8 Atomicity
+
+OpenSearch has no transactions. Don't pretend otherwise. Provider's contributions:
+- The framework lock (already in core)
+- Idempotency from `IF [NOT] EXISTS`
+- Compensating actions via `rollback` block
+- `ALIAS SWAP` compiles to one atomic multi-action `_aliases` body — closest thing to a transaction
+
+---
+
+## 4. Risks and Footguns
+
+1. **Yellow-vs-green hardcoding** — single-node dev clusters can't reach green; must be per-environment configurable.
+2. **Mapping changes silently no-op for existing docs** — provider should detect type/analyzer changes at parse and require explicit reindex.
+3. **Static settings require close/open** — destructive; needs explicit `CLOSE` flag.
+4. **Bulk back-pressure (429)** — must use `BulkAllObservable` with backoff; expose policy.
+5. **Reindex from remote auth** — requires cluster-side `reindex.remote.allowlist`; produce clear error.
+6. **ISM policy attachment timing** — `ism_template` only matches future indices; existing need explicit `_plugins/_ism/add`.
+7. **Lock TTL vs heartbeat race** — same gotcha already solved in Aerospike; reuse the pattern.
+8. **Composable templates not retroactive** — only future indices.
+9. **Reindex doesn't copy aliases/templates/settings** — only docs. New index must be created first.
+10. **Cluster state size** — large template counts and deep mappings make every PUT propagate slowly.
+11. **Default `dynamic: true` is dangerous** — managed indices should default `dynamic: strict`.
+12. **`op_type: create` on reindex** — eliminates double-write on re-runs.
+13. **Anti-pattern: SQL-style WHERE clauses** — OpenSearch is not relational; don't borrow concepts that don't map.
+14. **Anti-pattern: parser without escape hatch** — every typed verb must accept `WITH BODY $body` for unforeseen edge cases.
+15. **Anti-pattern: comment rules that break JSON** — comments belong in the wrapper, not the payload.
+16. **Anti-pattern: hidden waits without timeout** — implicit waits must always have a finite default.
+17. **Anti-pattern: unversioned grammar** — embed `dsl_version` in wrapper.
+
+---
+
+## 5. Top Design Implications
+
+1. **Build on OpenSearch.Client 1.8 + OpenSearch.Net 1.8.** Optional `OpenSearch.Net.Auth.AwsSigV4`. Target net8.0/net9.0 to match the rest of Hyperbee.Migrations.
+2. **Ledger lives in OpenSearch itself**, in a `dynamic: strict` index. Update with `if_seq_no`/`if_primary_term`. Index with `?refresh=wait_for`.
+3. **Reuse the Aerospike auto-renewing lock pattern** ported to `_seq_no`/`_primary_term` CAS. No native primitive; no community .NET library.
+4. **`WAIT FOR HEALTH` and `WAIT FOR TASK` as first-class statements.** Yellow-vs-green configurable per environment.
+5. **Default async for reindex/snapshot/restore/force-merge** with Tasks API polling and exponential backoff.
+6. **`BulkAllObservable` with sane defaults** (5MB batches, exponential backoff on 429, 8x parallelism). Default `refresh=false`; explicit `_refresh` at end.
+7. **Hybrid resource format**: thin verb grammar + opaque JSON bodies via `WITH BODY $name`. Mustache-style templating from per-environment variables file.
+8. **Atomic `ALIAS SWAP` as a built-in idiom**, compiling to one `_aliases` request body.
+9. **Default-strict dynamic mapping; default `op_type: create` on reindex.**
+10. **Front-load detection of unsafe operations** (type changes, field removals, static settings on open indices) at parse time with clear error messages.
+
+---
+
+## 6. Open Questions for nop:propose
+
+1. **Statement grammar shape**: hybrid Parlot verb grammar (Couchbase-style) vs pure JSON action objects (hubrick-style) vs raw HTTP files (elasticsearch-evolution-style)?
+2. **Body embedding**: sibling JSON object referenced by `$name` vs inline string vs external file reference?
+3. **Wait policy**: implicit + explicit hybrid (recommended) vs implicit-only vs explicit-only?
+4. **Ledger location**: dedicated `.migrations` index vs system index pattern vs configurable?
+5. **Lock implementation depth**: full auto-renewing port from Aerospike (recommended) vs simple TTL-only vs external lock dependency?
+6. **Templating engine wiring**: full Hyperbee.Templating integration vs simple `${var}` substitution vs none?
+7. **Bootstrapper complexity**: Couchbase-style multi-state vs simpler health-poll-only?
+
+These will be evaluated head-to-head in the follow-on `nop:propose` design exercise.
+
+---
+
+## Sources
+
+External:
+- [OpenSearch .NET clients](https://docs.opensearch.org/latest/clients/dot-net/)
+- [Cluster health API](https://docs.opensearch.org/latest/api-reference/cluster-api/cluster-health/)
+- [Reindex API](https://docs.opensearch.org/latest/api-reference/document-apis/reindex/)
+- [ISM API](https://docs.opensearch.org/latest/im-plugin/ism/api/)
+- [Index aliases](https://docs.opensearch.org/latest/im-plugin/index-alias/)
+- [senacor/elasticsearch-evolution](https://github.com/senacor/elasticsearch-evolution)
+- [hubrick/elasticsearch-migration](https://github.com/hubrick/elasticsearch-migration)
+- [liquibase/liquibase-opensearch](https://github.com/liquibase/liquibase-opensearch)
+- [Flyway concepts](https://github.com/flyway/flywaydb.org/blob/gh-pages/documentation/concepts/migrations.md)
+- [Liquibase changeSet](https://docs.liquibase.com/concepts/changelogs/changeset.html)
+- [Mongock v5](https://docs.mongock.io/v5/migration/)
+- [cqlmigrate](https://github.com/sky-uk/cqlmigrate)
+- [Hyperbee.Templating](https://github.com/Stillpoint-Software/hyperbee.templating)
+- [Parlot](https://github.com/sebastienros/parlot)
+
+In-house:
+- [src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs](../../src/Hyperbee.Migrations.Providers.Couchbase/Parsers/StatementParser.cs)
+- [src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs](../../src/Hyperbee.Migrations.Providers.Couchbase/CouchbaseBootstrapper.cs)
+- [src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs](../../src/Hyperbee.Migrations.Providers.Aerospike/AerospikeRecordStore.cs)
+- [src/Hyperbee.Migrations/Wait/WaitHelper.cs](../../src/Hyperbee.Migrations/Wait/WaitHelper.cs)
+- [docs/decisions/0001-parlot-for-statement-parsers.md](../decisions/0001-parlot-for-statement-parsers.md)
+- [docs/decisions/0002-resource-migration-pattern.md](../decisions/0002-resource-migration-pattern.md)
+- [docs/decisions/0005-provider-native-distributed-locking.md](../decisions/0005-provider-native-distributed-locking.md)
+- [docs/decisions/0008-wait-retry-infrastructure.md](../decisions/0008-wait-retry-infrastructure.md)
diff --git a/docs/research/0002-opensearch-provider-assessment.md b/docs/research/0002-opensearch-provider-assessment.md
new file mode 100644
index 0000000..a403d66
--- /dev/null
+++ b/docs/research/0002-opensearch-provider-assessment.md
@@ -0,0 +1,190 @@
+# Assessment: OpenSearch Provider Requirements
+
+**Date:** 2026-05-02
+**Status:** Final
+**Subject:** [docs/requirements/opensearch-provider.md](../requirements/opensearch-provider.md)
+**Mode:** Standard Full Assessment (Triage → 3 Discovery → Synthesis → Red-Blue → Independent Review → Red-Blue₂ → Consolidation)
+**Goals:** Production-capable OpenSearch provider; zero data loss during reindex/alias swaps; no permanent lockouts; same migrations run unchanged across single-node dev, multi-node prod, and AWS Managed OpenSearch.
+
+## Triage
+
+| Skill | Value | Selected |
+|-------|-------|----------|
+| Pre-mortem | High | Yes |
+| Mechanism Design | High | Yes |
+| Performance Audit | High | Yes |
+
+## Headline finding
+
+**The Independent Review's meta-claim was validated and is the most important takeaway:** the synthesis recurringly defers to "samples and documentation" as fixes for correctness hazards on the *laziest* code path. This contradicts the mechanism design premise that consumers take the path of least resistance. R-17's existing `dynamic: strict` injection is the correct precedent — silent-default insertion enforced by the parser, not by docs. Apply that shape to **PM-3** (`op_type: create` injection on `REINDEX`), **MD-3** ($body namespace policy), **PA-2** (lock-index settings), **MD-9** (component-template-aware injection logic). The test: *can a competent author who ignores the samples still ship a correct migration?* If no, parser/runtime must enforce.
+
+## Convergence summary
+
+- **Red-Blue₁ balance:** ~55% Red / ~45% Blue. Balanced.
+- **Independent Review:** 5 disagreements + 5 new findings + 1 meta-pattern.
+- **Red-Blue₂ balance:** Red won 4 of 5 contested points; Blue conceded 4 of 5 new findings; meta-pattern validated.
+
+## Final consolidated verdicts
+
+### Synthesis amendments (revised after Red-Blue₂)
+
+| Amendment | Final Verdict | Action |
+|---|---|---|
+| 1. ~~R-29 EnvironmentProfile enum~~ → `WithProductionDefaults()` extension | **Redesign** | Replace enum with extension method `services.AddOpenSearchMigrations(...).WithProductionDefaults()` that explicitly sets the four options (ClusterHealthThreshold=Green, WaitMode=PerMigration, RequireUnsafeJustification=true, ContextResolutionPolicy=RequireExplicit). Keep the startup-log banner invariant. No hidden coupling. |
+| 2. R-03 profile-driven threshold | Keep | Production = Green via the extension; Yellow remains the SDK default for dev |
+| 3. R-10 SecretMarker + log-time SecretScrubber by hash | Keep | Ship as designed |
+| 4. R-12 WaitMode enum | **Keep + scope amendment (NF-3)** | Implicit wait is `PerMigration` by default in production. Implicit waits scope to the mutated index by default (e.g., `?index=users-v2`) so a permanently-yellow `.opendistro_security` doesn't stall unrelated migrations. Cluster-wide is explicit `WAIT FOR GREEN` with no `ON `. NO WAIT requires justification token |
+| 5. R-15 ActiveContext + RequireExplicit policy | Keep | Resolves Open Question; Production forces RequireExplicit |
+| 6. R-18 UNSAFE justification | Keep | Token requires justification string; structured WARN log; explicit syntactic enumeration in samples |
+| 7. R-21 SigV4 loud-fail + endpoint-capability detection | Keep | Detects `*.amazonaws.com` / `*.aoss.amazonaws.com` and AWS-specific ISM endpoint paths |
+| 8. R-05 lock validation + realtime GET on takeover | Keep | Validation enforces `LockRenewInterval < LockStaleAfter < LockMaxLifetime` AND `LockStaleAfter ≥ 2*LockRenewInterval`. Takeover uses `GET /{idx}/_doc/{id}?realtime=true` to avoid search-staleness false positives. LockTuning presets demoted to docs |
+| 9. R-25 logs route through SecretScrubber | Keep | Pairs with #3 |
+| 10. Trust Boundaries / startup banner | Keep | Banner shows resolved defaults including rollback enabled/disabled state |
+| 11. R-27 samples expanded | Keep | Demonstrate WaitMode, UNSAFE justification, $body namespace, op_type behavior |
+| 12. Decided list cleanup | Keep | Hygiene |
+| 13. R-17 dynamic:strict opt-in | **Redesign** | Make injection opt-in (not default), or component-template-aware (skip injection when body has `composed_of`) — apply uniform shape with new Amendment 14 |
+| **14 (new). R-08a `REINDEX SAFE` default** | **Add** | `REINDEX FROM x TO y` injects `op_type: create` by default; opt out with `REINDEX UNSAFE FROM x TO y` (with justification, per Amendment 6). Closes PM-3 at parser level. R-24c integration test asserts `op_type: create` is on the wire by default |
+| **15 (new). R-15a semantic version comparison** | **Add (Must)** | `WHEN VERSION` parses to `System.Version` / SemVer; rejects unparseable inputs at parse time; integration test asserts `'2.9' < '2.10'` (lexically false but semantically true). Documented suffix-normalization for `-SNAPSHOT`, `-rc1`, AWS `OpenSearch_2.x` prefix |
+| **16 (new). R-16 atomic precondition** | **Add (Must, correctness)** | `ALIAS SWAP` precondition is expressed inside the single `_aliases` POST body (e.g., the `remove` action targets `` so the cluster rejects the body atomically if `` is not the current target). Strike the separate precondition GET from R-16 Otherwise clause |
+| **17 (new). R-06 ledger forensic fields** | **Add (Must)** | Ledger mapping includes `appliedBy` (string: machine + pid + optional `RunnerId`) and `direction` (`Up`/`Down`). Strict mapping is immutable per Forbidden list — must land before v1 |
+| **18 (new). R-19 partial rollback semantics** | **Add (Must, correctness)** | When Down rollback fails mid-sequence: ledger entry marked `status: partially_rolled_back` with failed-statement index; subsequent runs refuse to retry in either direction without explicit `--force-resume`; error lists failed + already-rolled-back statements |
+| **19 (new). R-28 multi-node CI as Must** | **Promote** | Multi-node Testcontainers Compose (3-node) is Must with CI automation; AWS Managed remains Should + scheduled |
+| **20 (new). R-07 ledger refresh budget** | **Monitor** | Keep `?refresh=wait_for` as default; R-24c adds measured-cost test ("100-migration bootstrap completes in < N seconds"). If budget breaks, alternative is `refresh=true` for ledger writes (hot single-doc index, bounded cost) |
+
+### Discovery findings (final consolidated)
+
+#### Pre-mortem
+| ID | Final Verdict | Action |
+|----|---------------|--------|
+| PM-1 heartbeat false takeover | Redesign | Amendment 8 (validation + realtime GET on takeover) |
+| PM-2 SigV4 creds caching | Redesign | Amendment 7 |
+| PM-3 reindex stale dst | **Redesign at parser level** | Amendment 14 (auto-inject `op_type: create`) |
+| PM-4 dynamic:strict clobbers | Redesign | Amendment 13 (opt-in or component-template-aware) |
+| PM-5 templating JSON-context bugs | Monitor | Add to R-24c test list |
+| PM-6 AWS ISM endpoint differences | Redesign | Amendment 7 (expanded) |
+| PM-7 yellow alias swap | Keep | Resolved by Amendment 2 + multi-node CI (Amendment 19) |
+| PM-8 stagnant 1.8 client | Defer | Track upgrade cadence; revisit when OpenSearch 3.x ships |
+| PM-9 WHEN VERSION semver | **Promoted to Must** | Amendment 15 |
+| PM-10 mapping drift via hand-edit | Monitor | Operator-discipline; no design fix |
+| PM-11 Testcontainers mutable pin | Redesign | Pin by sha; trivial |
+| PM-12 LockMaxLifetime ceiling | Redesign | Amendment 8 (explicit cancellation contract) |
+
+#### Mechanism Design
+| ID | Final Verdict | Action |
+|----|---------------|--------|
+| MD-1 context source-of-truth | Keep | Amendment 5 |
+| MD-2 UNSAFE single-token | Keep | Amendment 6 |
+| MD-3 templating $body collision | **Re-examine** | Apply meta-pattern: parser-level namespace policy + reserved name list (not just docs) |
+| MD-4 Yellow default ships | Keep | Amendment 2 + WithProductionDefaults() extension |
+| MD-5 Lock TTL coupling | Keep | Amendment 8 validation |
+| MD-6 SigV4 invisible | Keep | Amendment 7 |
+| MD-7 implicit wait scope | Keep | Amendment 4 |
+| MD-8 raw mapping JSON / no schema | Defer | Nice-to-have JSON Schema for IDE help; v1.1 |
+| MD-9 dynamic:strict copy-paste | **Re-examine** | Apply meta-pattern: component-template-aware injection at parser level (Amendment 13) |
+| MD-10 WHEN VERSION lazy strings | **Promoted to Must** | Amendment 15 |
+| MD-11 NO WAIT shape | Keep | Amendment 4 |
+| MD-12 bulk-load _refresh appears hung | Monitor | Log-line clarity fix; trivial |
+| MD-13 rollback opt-in invisible | Keep | Amendment 10 startup banner |
+| MD-14 IF NOT EXISTS omitted | Defer | Doc warning (this one IS appropriate for docs — author actively writes the verb) |
+| MD-15 secrets in config scope | Keep | Amendment 3 |
+
+#### Performance Audit
+| ID | Final Verdict | Action |
+|----|---------------|--------|
+| PA-1 ledger refresh=wait_for serial | **Promoted to Monitor** | Amendment 20 (measured-cost test) |
+| PA-2 lock shard contention | **Re-examine** | Apply meta-pattern: parser/runtime sets `number_of_replicas: 0` on lock index at create — not just doc |
+| PA-3 implicit health-wait N+1 | Keep | Amendment 4 (PerMigration default) |
+| PA-4 Tasks API INFO log flood | Redesign | Demote to DEBUG; trivial |
+| PA-5 lock false-positive | Keep | Amendment 8 (realtime GET) |
+| PA-6 bulk parallelism topology-blind | Defer | Topology-aware tuning is v1.1 |
+| PA-7 templating no caching spec | Defer | Specify if profiling shows hot path |
+| PA-8 Parlot construction cost | Defer | Per-runner caching when profiled |
+| PA-9 SigV4 signing overhead | Defer | Re-evaluate if AWS users hit limit |
+| PA-10 conn pool pins one node | Defer | Pairs with PM-8 client upgrade |
+| PA-11 WAIT UNTIL TASK 30s ceiling | Defer | Minor |
+| PA-12 bootstrap health storm | Defer | Pairs with PA-3 fix |
+
+### New findings (from Independent Review)
+
+| ID | Severity | Final Verdict | Action |
+|----|----------|---------------|--------|
+| NF-1 R-06 ledger unforensic | Medium | **Redesign** | Amendment 17 — add `appliedBy` + `direction` |
+| NF-2 R-16 ALIAS SWAP TOCTOU | High | **Redesign** | Amendment 16 — atomic precondition inside `_aliases` body |
+| NF-3 wait_for_status stalls on yellow indices | Medium | **Redesign** | Amendment 4 (scoped implicit wait) |
+| NF-4 No WAIT FOR not red verb | Low | Defer | `WAIT FOR YELLOW` covers it; v1.1 if asked |
+| NF-5 R-19 partial rollback semantics | High | **Redesign** | Amendment 18 — `partially_rolled_back` ledger state |
+
+## Convergence Analysis
+
+**Strong convergence (act now):**
+- PM-1 + PA-5 + MD-5 + Amendment 8 — lock CAS correctness reached via three independent reasoning paths (temporal: refresh lag; performance: takeover false-positive zone; mechanism design: TTL coupling)
+- MD-1 + Amendment 5 — context source-of-truth resolved by direct evidence (Open Question in artifact + lazy-path analysis)
+- PM-7 + MD-4 + Amendment 2 — Yellow default unsafe in prod confirmed by both temporal failure and consumer modeling
+
+**Weak convergence (review individually):**
+- The "documentation as fix" pattern across PM-3, MD-3, PA-2, MD-9 — all four reached the same flawed conclusion via shared prior (the framework already has docs/samples, so leveraging them feels natural). IR caught it. Re-examined.
+- Yellow vs Green threshold flagged independently by PM, MD — but these may share the surface observation (R-03's default is Yellow), not deep independent analysis. Convergence holds because the lazy-path failure (operator never reviews) is independently confirmable.
+
+**Disagreements that resolved:**
+- IR vs synthesis on R-29 EnvironmentProfile (resolved as `WithProductionDefaults()` extension)
+- Red-Blue on PA-1 perf vs correctness (resolved as Monitor with measured budget)
+- Red-Blue on R-28 multi-node CI cost (resolved by checking Testcontainers actual capability)
+
+**Shared-prior check:** "Would a developer reading the artifact for 5 minutes notice the same thing?" Yes for MD-4 (Yellow default), Yes for MD-1 (Open Question is literally flagged), No for PM-1 (refresh-lag interaction with TTL math) — that one is genuine deep analysis. Confidence high on PM-1 / Amendment 8.
+
+## Action plan (prioritized)
+
+### P0 — Must land before v1 (correctness)
+1. Amendment 14 — `REINDEX` injects `op_type: create` by default (PM-3)
+2. Amendment 15 — `WHEN VERSION` semantic comparison (PM-9, MD-10)
+3. Amendment 16 — `ALIAS SWAP` atomic precondition (NF-2)
+4. Amendment 17 — Ledger forensic fields (`appliedBy`, `direction`) (NF-1)
+5. Amendment 18 — Partial rollback ledger semantics (NF-5)
+6. Amendment 13 — `dynamic: strict` opt-in or component-template-aware (PM-4, MD-9)
+7. Amendment 8 — Lock validation + realtime GET on takeover (PM-1, PA-5, MD-5, PM-12)
+8. Amendment 2 — `WithProductionDefaults()` extension; Green threshold default (MD-4, PM-7)
+9. Amendment 5 — `ActiveContext` + RequireExplicit policy (MD-1)
+10. Amendment 7 — SigV4 + AWS endpoint loud-fail (PM-2, PM-6, MD-6)
+11. Amendment 3 + 9 — SecretMarker + log-time scrubber (MD-15)
+12. Amendment 19 — Multi-node Testcontainers Compose CI as Must (PM-7, R-28)
+
+### P1 — Land in v1 (production safety)
+13. Amendment 4 — `WaitMode` enum + scoped implicit wait (MD-7, MD-11, PA-3, NF-3)
+14. Amendment 6 — `UNSAFE` justification token (MD-2)
+15. Amendment 10 — Startup banner (MD-13)
+16. Amendment 11 — Samples (R-27 expansion)
+17. Amendment 20 — Ledger refresh budget test (PA-1)
+18. Re-examine MD-3, PA-2 with meta-pattern (parser enforcement, not docs)
+19. PM-11 — Pin Testcontainers image by sha
+20. PA-4 — Tasks API logs to DEBUG
+
+### P2 — Defer to v1.1 (perf, ergonomics)
+- PA-1, PA-6, PA-7, PA-8, PA-9, PA-10, PA-11, PA-12 (perf optimization)
+- PM-8 client upgrade tracking (when OpenSearch 3.x lands)
+- MD-8 JSON Schema for IDE help
+- MD-12 bulk `_refresh` log-line clarity
+- NF-4 `WAIT FOR not red` verb
+- AWS Managed OpenSearch CI automation
+
+### P3 — Open backlog with explicit triggers
+- PM-9 long-tail semver suffixes (revisit when AWS prefix issues reported)
+- PM-10 mapping drift detection (revisit if hand-edit incidents observed)
+- MD-14 IF NOT EXISTS lint (revisit if ledger-wipe incidents observed)
+- R-15 PRD `context` granularity beyond `RequireExplicit`/`SkipIfUnset`
+
+## Recommendations to user
+
+1. **Update the requirements doc** with all P0 and P1 amendments; promote ledger forensics, atomic precondition, semver, partial rollback, REINDEX safe-default, multi-node CI to Must.
+2. **Replace R-29 enum proposal** with `WithProductionDefaults()` extension method — document this as a Decided item; resolve the IR's "hidden coupling" concern.
+3. **Apply the meta-pattern systematically**: re-examine MD-3 (templating namespace), PA-2 (lock index settings), MD-9 (component-template injection) with parser/runtime enforcement, not docs. The test is "can a lazy path still be wrong?" — if yes, fix in code.
+4. **Run `/nop:propose` next** with the updated requirements as fitness criteria. Several decisions still require evaluation across competing implementation strategies (e.g., parser-level injection vs runtime middleware for `op_type: create`; opt-in vs component-template-aware for `dynamic: strict`).
+
+## Out of scope (confirmed during assessment)
+
+These were explicitly evaluated and rejected from v1:
+- AWS Managed OpenSearch CI automation (Should + scheduled, not Must — Amendment 19 only covers multi-node)
+- Semantic detection of unsafe ops (vs syntactic enumeration) — research project deferred
+- `WAIT FOR not red` verb — `WAIT FOR YELLOW` covers
+- JSON Schema for `statements.json` — v1.1 IDE ergonomics
+- Topology-aware bulk parallelism — v1.1 perf
+- ES 7.x legacy compatibility — separate provider if demand emerges
diff --git a/docs/research/0003-opensearch-plan-assessment.md b/docs/research/0003-opensearch-plan-assessment.md
new file mode 100644
index 0000000..1b1bf91
--- /dev/null
+++ b/docs/research/0003-opensearch-plan-assessment.md
@@ -0,0 +1,205 @@
+# Assessment: OpenSearch Provider Implementation Plan
+
+**Date:** 2026-05-02
+**Status:** Final
+**Subject:** [docs/plans/active/opensearch-provider.md](../plans/active/opensearch-provider.md)
+**Mode:** Standard Full Assessment (Triage → 3 Discovery → Synthesis-skipped → Red-Blue → Independent Review → Red-Blue₂ → Consolidation)
+**Goals:** Production-capable OpenSearch provider; same migrations across single-node dev, multi-node prod (CI-automated), AWS Managed (scheduled validation); zero data loss; no permanent lockouts.
+
+## Triage
+
+| Skill | Value | Selected |
+|-------|-------|----------|
+| Pre-mortem | High | Yes |
+| Mechanism Design | High | Yes |
+| Performance Audit (project-scale) | High | Yes |
+
+## Headline finding
+
+**The plan is structurally sound but needs four targeted amendments before Phase 1 starts.** The risk-first phasing concept survives intact; the cuts are about scoping (Phase 2 is hidden mega-phase, Compose scaffold rots, ADR audit deferred too late) not about reorganizing the architecture. The single highest-ROI mitigation is converting the Style Reference's "non-empty" test into "≥10 file:line citations across ≥4 patterns" — this one change closes a class of cascade risks across all subsequent phases.
+
+The IR identified a critical buried architectural commitment in Phase 5 task 5.3 — *"parse-time `GET /_index_template/` lookup"* — that contradicts ADR-0011's intent that parsers be offline-pure. Resolution: move template-body resolution to runtime, amend ADR-0011 to state "parser is offline-pure; all I/O is runtime middleware."
+
+## Convergence summary
+
+- **Red-Blue₁:** 47% Red / 53% Blue. Balanced.
+- **Independent Review:** 5 disagreements + 6 new findings + 3 meta-patterns
+- **Red-Blue₂ after IR:** **Red 4 wins / Blue 0 wins / Synthesis 3.** All 6 new findings acknowledged actionable.
+
+## Final consolidated verdicts
+
+### Plan amendments — Must land before Phase 1 starts
+
+| # | Amendment | Source | Severity |
+|---|---|---|---|
+| **A1** | Split Phase 2 into 2a (DI + ledger + bootstrapper skeleton) and 2b (lock state machine + R-24b suite) | PM-2, PA-1, MD-2 | **High** |
+| **A2** | Delete Task 0.6 (multi-node Compose scaffold); rebuild as Phase 4 prereq subtask | PA-3 + Round 2 win | **High** |
+| **A3** | Move Task 7.7 (multi-node CI integration) into Phase 4 prereq window — Phase 4 cannot meet its own R-24c-(a) criterion otherwise | PA-7, PM-3, IR | **Critical** (ordering bug) |
+| **A4** | Promote Task 0.3 (codebase audit + Style Reference) to Task 0.1 — current Task 0.1 ("Mirror Aerospike runner exactly") cannot run before audit completes | PA-12, IR | **High** |
+| **A5** | Add Phase 1.5 gate between Phases 1 and 2 — spike must validate at least one body resolved via live template lookup OR validate the parser/runtime boundary that NF-2 will redraw | PM-1, MD-1, PA-2 | **High** |
+| **A6** | Move Hyperbee.Templating spike to Phase 0 — first-contact bugs cascade if left to Phase 6 | PM-4 + design line 201 | **High** |
+| **A7** | Style Reference test strategy → "must contain ≥10 file:line citations across ≥4 patterns (lock, bootstrapper, grammar, DI registration)" | MD-4, MD-10 | **High (highest single-mitigation ROI)** |
+| **A8** | Phase 1 kill-criterion verbatim: *"merge logic cannot deterministically produce expected JSON without ambiguity for any of the 5 documented edge cases"* | MD-11 + IR Contested 2 (Red wins) | **High** |
+| **A9** | Move parse-time template-body resolution to **runtime**; amend ADR-0011 to state "parser is offline-pure; all I/O is runtime middleware" | NF-2 (IR) | **High** (architectural) |
+| **A10** | Add Phase 1 fallback paragraph: if spike fails, Approach A (Couchbase-Clone, runtime middleware only) becomes the documented fallback architecture; AST types + grammar (Tasks 1.1-1.2) are reusable | NF-3 (IR) | **High** |
+| **A11** | Phase 0 deliverable: enumerated R-24c a-o test table (the suite is referenced 4 times but never enumerated) | NF-4 (IR) | **High** |
+
+### Plan amendments — Should land
+
+| # | Amendment | Source |
+|---|---|---|
+| **B1** | Per-phase ADR-touched checklist in Definition of Done; shrink Task 7.11 to final regression cross-check, not first-time audit | MD-12, NF-5 |
+| **B2** | R-24c forward-reference table (test → phase → covered combinations) | MD-6, NF-4 |
+| **B3** | Pair tests with implementation per task; req/ADR cross-reference per task | MD-2 |
+| **B4** | Mark each completion criterion `[CI]` or `[judgment]` | MD-9 |
+| **B5** | Phase 1 explicit "Spike Iteration 2" subtask — spikes rarely converge first try | PA-2 |
+| **B6** | Phase 6 internal ordering: Templating spike (Phase 0 already) → core state-sharing (PerMigration, partial rollback) → consumer surface (banner, samples). One mid-phase checkpoint commit between core and surface. **Not split into 6a/6b/6c.** | IR Contested 1 (Synthesis) |
+| **B7** | AWS validation Phase 7 Completion Criteria line: "AWS validation status documented in README with date of last successful run, OR an 'AWS unverified for this release' notice with reason." | IR Contested 3 (Red wins) |
+| **B8** | Plan-vs-code authoritative rule: explicit statement | MD-14 |
+| **B9** | Weekly main rebase policy stated explicitly | MD-13, PM-5 |
+| **B10** | Reflect-step entry template (no checkbox; just template) | MD-15 |
+| **B11** | Phase end DoD: append Learnings, update Status Summary, tag snapshot — single line restatement of plan intent | MD-8 (compressed) |
+| **B12** | Phase 5 Task 5.3: move template lookup to runtime per A9 | NF-2 |
+| **B13** | Task 3.9: cite reserved names from R-09 (`$body`, `$query`, `$script`, `env`, `config`, `runtime`, `secrets`) | NF-1 |
+| **B14** | Task 0.4: declare OpenSearch version-support contract (minimum supported, pinned digest, AWS Managed caveat) | NF-6 |
+| **B15** | Phase 1 add explicit context object for "tracked indices" — Phase 6's PerMigration dirty-index tracker extends it later | PM-11 |
+| **B16** | Sample authoring incremental in Phases 3-5 (one sample per verb as the verb is built) — tag "do-not-cut under deadline" | PM-12 |
+| **B17** | Project-level 18-22 week estimate (single buffer; no per-phase 20% buffers) | PA-8 |
+| **B18** | Phase 1.5 gate documentation includes family-of-shapes paragraph (folded artifact, not standalone) | MD-1 (folded) |
+
+### Cuts (verdicts the assessment proposed but Red-Blue rejected)
+
+| Cut | Rationale |
+|---|---|
+| Pre-commit hook for plan updates | Hook ceremony rots; replaced by B11 phase-end DoD |
+| Per-phase Style Reference refresh | Folded into B1 ADR-touched checklist |
+| Intra-phase tagging policy | Defer — phase + weekly rebase is enough granularity |
+| Review SLA | Defer — bus factor 1; resurface when second engineer joins |
+| Harness-validation test | Tasks 0.5 (smoke) and 1.4 (wire-level) jointly cover the gap; intermediate test is redundant (IR Contested 4, Red wins) |
+| "parallelizable: yes/no" line per phase | Bus factor 1 makes this speculative ceremony (IR Contested 5, Red wins) |
+| 20% per-phase buffer | Per-phase buffers compound to Parkinson's Law; project-level buffer instead |
+| Splitting Phase 6 into 6a/6b/6c | After moving Templating to Phase 0 (A6), Phase 6 shrinks; remaining tasks loosely coupled — internal ordering + one checkpoint commit suffice (IR Contested 1, synthesis) |
+
+### Discovery findings — final consolidated
+
+#### Pre-mortem
+| ID | Final Verdict | Action |
+|----|---------------|--------|
+| PM-1 heartbeat false takeover spike under-scope | Redesign | A5 (Phase 1.5 gate) + A8 (kill criterion) |
+| PM-2 Phase 2 packs 12 tasks | Redesign | A1 split |
+| PM-3 Compose scaffold bit-rots | Redesign | A2 delete + A3 move CI work earlier |
+| PM-4 Phase 6 nine cross-cutting features | Redesign | A6 (Templating to Phase 0) + B6 (internal ordering) |
+| PM-5 Long-lived branch + Style Reference stale | Keep | B9 weekly rebase |
+| PM-6 AWS runbook never run | Keep | B7 release checklist |
+| PM-7 living-doc under deadline | Monitor | B11 phase-end DoD; no hook |
+| PM-8 hello-world only checks cluster health | Cut | Tasks 0.5 + 1.4 cover (IR Contested 4) |
+| PM-9 ADR-0011 ages | Keep | B12 + ADR amendment per A9 |
+| PM-10 IAM-scoped AWS Managed | Monitor | B7 release checklist surfaces this |
+| PM-11 Phase 3/6 shared dirty-index state | Keep | B15 explicit context object |
+| PM-12 samples treated as docs | Keep | B16 incremental sample authoring |
+
+#### Mechanism Design
+| ID | Final Verdict | Action |
+|----|---------------|--------|
+| MD-1 family-of-shapes | Keep (folded) | B18 paragraph in Phase 1.5 gate spec |
+| MD-2 task lists missing test pairing | Keep | B3 |
+| MD-3 Phase 6 ordering arbitrary | Keep | B6 internal ordering |
+| MD-4 Style Reference subjective | Keep | A7 (highest ROI) |
+| MD-5 ADR-0002 not cited in Phase 3 | Keep | B13 covers reserved names; ADR-0002 cite to be added Task 3.1 |
+| MD-6 R-24c tests scattered | Keep | B2 forward-reference table |
+| MD-7 intra-phase tagging | Defer | — |
+| MD-8 living-doc enforcement | Keep (criterion only) | B11 |
+| MD-9 subjective vs objective criteria | Keep | B4 |
+| MD-10 audit quality | Keep (subsumed) | A7 |
+| MD-11 kill-criterion soft phrasing | Keep | A8 (Red's verbatim wording) |
+| MD-12 ADR drift end-audit only | Keep | B1 |
+| MD-13 no rebase strategy | Keep | B9 |
+| MD-14 plan-vs-code authoritative | Keep | B8 |
+| MD-15 ITRV Reflect not actionable | Keep (template only) | B10 |
+
+#### Performance Audit (project-scale)
+| ID | Final Verdict | Action |
+|----|---------------|--------|
+| PA-1 Phase 2 12 tasks | Redesign | A1 split |
+| PA-2 No spike re-spin budget | Keep | B5 explicit Iteration 2 subtask |
+| PA-3 Phase 0 Compose harness rots | Redesign | A2 delete |
+| PA-4 Phase 6 9 sub-tasks | Synthesis | B6 ordering, not split |
+| PA-5 Phase 5/6 prereq | Keep | B12 covers (move template lookup runtime) |
+| PA-6 bus factor 1 | Monitor | — |
+| PA-7 Phase 7 hidden critical path | Redesign | A3 |
+| PA-8 zero slack budget | Keep | B17 project-level buffer |
+| PA-9 no review SLA | Defer | — |
+| PA-10 ADR audit at end | Keep (subsumed) | B1 |
+| PA-11 Compose hardening before 4.6 | Keep | Subtask of A2's Phase 4 prereq |
+| PA-12 Task 0.3 buried | Redesign | A4 |
+
+### Independent Review new findings — final consolidated
+
+| ID | Severity | Verdict | Action |
+|----|----------|---------|--------|
+| NF-1 R-09 reserved namespace policy | Medium | Acknowledge | B13 — list exists in requirements; just cite it |
+| NF-2 parse-time template lookup | High | Redesign | A9 — move to runtime; amend ADR-0011 |
+| NF-3 No Phase 1 fallback strategy | High | Redesign | A10 — Approach A as documented fallback |
+| NF-4 R-24c "15 tests" never enumerated | High | Redesign | A11 — Phase 0 produces a-o table |
+| NF-5 ADR audit Phase 7 too late | Medium | Redesign | B1 — per-phase DoD |
+| NF-6 No version matrix | Medium | Acknowledge | B14 — declare in Task 0.4 |
+
+## Convergence Analysis
+
+**Strong convergence (act now):**
+- Phase 2 packs too much — flagged independently by PM (cascading failure mode), MD (test bundling), PA (calendar weeks). Three reasoning paths, same finding. Strong.
+- Compose scaffold rots — PM (bit-rot from neglect) + PA (throwaway scaffolding) reach the same conclusion. Strong.
+- Phase 7 hidden critical path — PA flagged scheduling, PM flagged ordering coincidence with Phase 4 R-24c-(a) requirement. Strong.
+
+**Weak convergence (review individually):**
+- Phase 6 grab-bag — three audits flagged but the convergence may be shared-prior (the same draft was problematic for the same reason, not three independent failure modes). IR's pushback (don't split; reorder) shows this convergence was less robust than it seemed.
+- Style Reference subjective — MD-4 + MD-10 are the same finding photographed twice.
+
+**Disagreement that resolved:**
+- IR Contested 1 (Phase 6 split): three lenses said split, IR pushed back, resolution was reorder-not-split. The convergence was real but the prescription was over-engineered.
+- IR Contested 4 (harness-validation test): Blue advocated; Red showed the gap doesn't exist between Tasks 0.5 and 1.4. Cut.
+
+**Shared-prior check:** "Would a developer reading the plan for 5 minutes notice the same thing?" Yes for MD-4 (trivially-passable test), Yes for PA-1 (12 tasks visible at a glance), No for NF-2 (parse-time GET requires careful reading of plan line 354 + design line 158-167 cross-reference). Confidence high on NF-2 — genuine deep finding.
+
+## Action plan (prioritized)
+
+### P0 — Must land before Phase 1 starts
+1. **A1** Split Phase 2 into 2a/2b
+2. **A2** Delete Task 0.6 (Compose scaffold); rebuild in Phase 4 prereq
+3. **A3** Move Task 7.7 multi-node CI work to Phase 4 prereq window
+4. **A4** Promote Task 0.3 to Task 0.1
+5. **A5** Add Phase 1.5 gate (template lookup boundary validation)
+6. **A6** Move Hyperbee.Templating spike to Phase 0
+7. **A7** Style Reference objective criteria (≥10 citations / ≥4 patterns)
+8. **A8** Phase 1 kill-criterion verbatim wording
+9. **A9** Move parse-time template lookup to runtime; amend ADR-0011
+10. **A10** Phase 1 fallback paragraph (Approach A as fallback)
+11. **A11** Phase 0 deliverable: enumerated R-24c a-o table
+
+### P1 — Land in v1 (during execution)
+12. **B1-B18** as listed above
+
+### P2 — Defer to v1.1
+- AWS Managed CI automation (existing Open Question)
+- Multi-node performance optimization (PA-class deferrals)
+- JSON Schema for `statements.json` (MD-8 IDE help)
+
+## Recommendations
+
+1. **Apply all 11 P0 amendments to the plan now** — they're all editing-not-rewriting; ~30 minutes. The plan is otherwise sound.
+2. **Amend ADR-0011** to state "parser is offline-pure; all I/O is runtime middleware" — this resolves NF-2 and prevents the Phase 5 architectural surprise.
+3. **Project estimate: 18-22 weeks calendar for one experienced engineer at full focus.** Plan timeline must reflect this; do not under-estimate to user (Brenton).
+4. **Recommended order before kicking off `/nop:implement`:**
+ - Apply A1-A11 plan amendments
+ - Amend ADR-0011 per A9
+ - Re-read the plan top-to-bottom checking nothing else cascaded
+ - Tag `opensearch/plan-frozen` snapshot
+ - Run Phase 0 (Task 0.1 = audit; deliverables include R-24c a-o table)
+ - Run Phase 1 spike with the new gate language
+5. **No second `/nop:assess` recommended.** This assessment was thorough; the IR's Red-strong outcome shows the plan was modestly gold-plated but had real architectural finds (NF-2, NF-3) that are now addressed. Further assessment without intervening implementation work would surface diminishing returns.
+
+## Out of scope (confirmed during assessment)
+
+- Per-task PR strategy (per-phase PRs are right for solo-maintainer; per-task is ceremony)
+- Splitting Phase 0 into 0a (mechanical) / 0b (research) — bounded enough as one phase
+- Changing the 8-phase count itself — the count is appropriate for production library scope; the issue is *task distribution*, not phase count
diff --git a/docs/research/0004-adr-compliance-audit.md b/docs/research/0004-adr-compliance-audit.md
new file mode 100644
index 0000000..63076ff
--- /dev/null
+++ b/docs/research/0004-adr-compliance-audit.md
@@ -0,0 +1,77 @@
+# ADR Compliance Audit — OpenSearch Provider Release
+
+**Date:** 2026-05-03
+**Scope:** ADRs 0001-0017 (10 cross-provider, 7 OpenSearch-specific)
+**Method:** for each Accepted ADR, locate (a) the code path that implements the decision and (b) the passing test or doc reference that verifies it. ADRs with neither are flagged for follow-up before release.
+
+This is the regression check called for by phase Definition-of-Done item "ADRs touched by this phase verified against acceptance criteria" (per B1 / NF-5). It is intentionally NOT the first verification — each ADR was verified at the time its slice landed; this audit is the cross-cutting sweep that confirms nothing has decayed or been silently superseded.
+
+## Audit table
+
+| ADR | Title | Code | Verification |
+|-----|-------|------|--------------|
+| 0001 | Use Parlot for Statement Parsers | `src/.../Internal/Grammar/OpenSearchStatementParser.cs` (Parlot.Fluent productions); existing Aerospike statement parsers also use Parlot | `tests/.../Internal/FoundationVerbParserTests.cs` (51+ verb tests), `OpenSearchStatementParserTests`, `BodySourceParserTests`, `WhenVersionTests`, `NoWaitParserTests` |
+| 0002 | Resource Migration Pattern | `src/.../Resources/OpenSearchResourceRunner.cs` exposes `StatementsFromAsync` and `RunStatementsFromJsonAsync`; Aerospike/Couchbase/MongoDB providers mirror | `tests/.../OpenSearchResourceRunnerIntegrationTests.cs`, `OpenSearchContextFilterTests` |
+| 0003 | Provider Record Store Contract | `src/Hyperbee.Migrations/IMigrationRecordStore.cs` (5-method interface); `src/.../OpenSearchRecordStore.cs` implements | `tests/.../OpenSearchRecordStoreTests.cs` (lock tuning), `OpenSearchRecordStoreIntegrationTests`, `OpenSearchPartialRollbackIntegrationTests` |
+| 0004 | Reflection-Based Migration Discovery | `src/Hyperbee.Migrations/MigrationRunner.cs::DiscoverMigrations`; `[Migration]` attribute drives ordering | `tests/.../RunnerTests.cs` (multiple discovery + ordering scenarios) |
+| 0005 | Provider-Native Distributed Locking | `src/.../OpenSearchRecordStore.cs::CreateLockAsync` (op_type=create + realtime-GET takeover); other providers use their native primitives | `tests/.../OpenSearchLockContentionTests.cs`, `OpenSearchRecordStoreLockTuningTests` |
+| 0006 | Options Inheritance + DI Registration | `src/.../OpenSearchMigrationOptions.cs : MigrationOptions`; `services.AddOpenSearchMigrations(...)` extension; mirrors Aerospike/Couchbase/MongoDB | `tests/.../OpenSearchAuthenticationOptionsTests.cs` covers IConfiguration overload |
+| 0007 | Lifecycle Hooks + Cron | `src/Hyperbee.Migrations/IContinuousMigration.cs`; `src/Hyperbee.Migrations/Helper/MigrationCronHelper.cs` | `tests/.../RunnerTests.cs` cron + continuous-migration test cases |
+| 0008 | Composable Wait/Retry Infrastructure | `src/Hyperbee.Migrations/Wait/` (RetryStrategy, Backoff, Pause); `src/.../Internal/Dispatch/StatementDispatcher.cs::DispatchWaitUntilTaskAsync` uses exponential backoff | Existing wait infra tests + `OpenSearchTemplatePolicyIntegrationTests` exercises WAIT FOR + WAIT UNTIL TASK |
+| 0009 | Convention-Based Record IDs | `src/Hyperbee.Migrations/IMigrationConventions.cs::GetRecordId`; `DefaultMigrationConventions` returns `{version}-{type-name}` | Indirectly via `RunnerTests` (ledger writes) and `OpenSearchPartialRollbackIntegrationTests` |
+| 0010 | Dual-Tier Testing Strategy | `tests/Hyperbee.Migrations.Tests/` (MSTest unit, no Docker); `tests/Hyperbee.Migrations.Integration.Tests/` (MSTest + Testcontainers) | Self-evident from project structure; `334 unit tests pass`, integration tests gated by `#if INTEGRATIONS` and run in CI via `multi_node_tests.yml` |
+| 0011 | Hybrid Parser+Runtime Injection | Parser sets `InjectDynamicStrict` / `InjectOpTypeCreate` / `NoWaitJustification` / `UnsafeJustification` flags on AST records; `SafeDefaultMergeMiddleware` and `StatementDispatcher` consume at dispatch time | `tests/.../SafeDefaultMergeMiddlewareTests.cs` (R-17 dynamic:strict, composed_of skip); `tests/.../OpenSearchR24cGapFillIntegrationTests.cs::DynamicStrict_AutoInjected_RejectsUnmappedFields` (live-cluster R-24c (g)) |
+| 0012 | WithProductionDefaults() Extension | `src/.../ServiceCollectionExtensions.cs::WithProductionDefaults()`; placeholder marker in DI today, options-factory wiring deferred to a follow-up slice noted in ADR consequences | Smoke registration (the marker is registered); follow-up noted in plan if the four defaults need automated coverage |
+| 0013 | Always-Create Indices + Override | `src/.../Internal/Bootstrap/Steps/LedgerIndexInitStep.cs` and `LockIndexInitStep.cs` honor `AssumeIndicesExist` | `tests/.../OpenSearchRecordStoreIntegrationTests.cs` covers create-on-bootstrap + verify-on-bootstrap |
+| 0014 | State-Machine Façade over Pipeline | `src/.../Internal/Bootstrap/OpenSearchBootstrapper.cs` (public `RunAsync` returning `BootstrapResult`); `IBootstrapStep[]` plug-in order | `tests/.../Bootstrap/OpenSearchBootstrapperTests.cs` (step ordering, failure surfacing) |
+| 0015 | Parser Offline-Pure; All I/O Runtime Middleware | Parser produces `TemplateBodyRef` (name only, no fetch); `TemplateResolutionMiddleware` performs `GET /_index_template/` immediately before CREATE INDEX dispatch | `tests/.../TemplateResolutionMiddlewareTests.cs` (extraction logic); `tests/.../OpenSearchMigrateIndexIntegrationTests.cs::MigrateIndex_ProducesIdenticalEndState_ToHandComposedSequence` (R-24c (o)) |
+| 0016 | No File-Level Templating | OpenSearch provider has no Hyperbee.Templating dependency (verified via `grep` over the project file); typed options + IConfiguration binding handle env-variation per the house pattern | Code search; no positive test (absence of a feature is the point) |
+| 0017 | Body-Source Grammar (Three Forms) | `src/.../Internal/Ast/StatementAst.cs` defines `BodySource`, `BodyRef`, `BodyFileRef`; `src/.../Internal/Grammar/OpenSearchStatementParser.cs` produces both via `OneOf`; `src/.../Resources/OpenSearchResourceRunner.cs::ResolveBody` resolves with `bodies` first, sibling fallback, file load | `tests/.../Internal/BodySourceParserTests.cs` (14 grammar tests); `tests/.../OpenSearchBodySourceIntegrationTests.cs` (5 live resolver tests including bodies-section beats sibling, missing-ref remediation) |
+
+## Findings
+
+### Compliant (17 of 17)
+
+Every Accepted ADR has both a code implementation path and a verification mechanism. No ADR is dangling.
+
+### Soft spots noted for follow-up
+
+These are NOT compliance failures — the ADRs are honored. They are areas where the verification could be tighter:
+
+1. ~~**ADR-0012 (WithProductionDefaults)**~~ — **CLOSED 2026-05-03**. Options-factory wiring landed in `ServiceCollectionExtensions.AddOpenSearchMigrations`: when the `UseProductionDefaultsMarker` is registered, the factory flips the four documented defaults (Green threshold, PerMigration waits, RequireUnsafeJustification, RequireExplicit context) on the `OpenSearchMigrationOptions` instance BEFORE invoking the user's configuration callback, so explicit user overrides still win. Coverage: `tests/Hyperbee.Migrations.Tests/Providers/OpenSearch/WithProductionDefaultsTests.cs` (3 tests).
+
+2. ~~**ADR-0009 (Convention-Based Record IDs)**~~ — **CLOSED 2026-05-03 (commit 163196f)**. Focused convention test added at `tests/Hyperbee.Migrations.Tests/DefaultMigrationConventionsTests.cs` covering the documented `record..` format and the missing-attribute throw path.
+
+3. ~~**ADR-0016 (No File-Level Templating)**~~ — **CLOSED 2026-05-03 (commit 163196f)**. Dependency-scan unit test added at `tests/Hyperbee.Migrations.Tests/Providers/OpenSearch/OpenSearchProviderDependencyTests.cs` that asserts the OpenSearch provider assembly references no `Hyperbee.Templating*` package. CI fails if a future contributor adds the dependency.
+
+### Hardening landed alongside the audit
+
+Items addressed in commits 163196f and the follow-up:
+
+- **EOF-anchored parser** — the OpenSearch statement parser now applies `.Eof()` to the top-level Parlot parser, so trailing tokens after a successful prefix-match are reported as parse errors instead of silently dropped. Closes the documented `NO WAIT` UX gap (bare `NO WAIT` without parens-and-justification used to parse as `` + trailing garbage; now correctly fails). Four parse-time-rejection tests previously deferred are now passing.
+- **Domain-exception wrapping** — grammar-level `InvalidOperationException` (raised inside Parlot `.Then(...)` callbacks for empty-justification and malformed version-literal validation) is now wrapped into `OpenSearchParseException` at the `Parse()` boundary. Callers handle one exception type.
+- **R-24c (f) bulk-load 429 retry coverage** — the OpenSearch.Net library owns the actual 429-retry mechanism (configured via `BulkAll`'s `BackOffRetries` / `BackOffTime` options, threaded through from `BulkLoadOptions` per R-20). The provider-owned behavior is the `BulkAllObserver`'s WARN-logging path when `response.Retries > 0`. Coverage: `tests/Hyperbee.Migrations.Tests/Providers/OpenSearch/BulkAllObserverRetryTests.cs` (4 unit tests driving the observer with synthetic responses) plus the joint cluster-level scenario added as Step 4 of `docs/runbooks/opensearch-aws-validation.md` (chaos via cluster-saturation against an undersized AWS instance).
+
+### Open Questions during the audit
+
+None. All ADRs cleanly map to code + tests; all soft spots noted in the original audit have been closed.
+
+## Release readiness
+
+The OpenSearch provider's ADR set (0011-0017) plus the cross-provider ADRs (0001-0010) are all honored by the v1 implementation. No ADR has been silently superseded, deferred-without-record, or violated. The provider clears the ADR-compliance gate for release.
+
+The DoD line on the release checklist:
+
+> 2026-05-03 ADR compliance audit (0001-0017): PASS (17/17 honored; all soft spots closed). See docs/research/0004-adr-compliance-audit.md
+
+## Method
+
+This audit was performed by:
+
+1. Listing all Accepted ADRs (17) from `docs/decisions/INDEX.md`.
+2. For each ADR, reading the Decision and Consequences sections.
+3. Locating the code path or paths where the decision is implemented (file + symbol).
+4. Locating the test class or classes that exercise the decision, OR identifying the doc artifact that documents the verification approach if no automated test applies (ADR-0010 self-evidence; ADR-0016 absence-of-feature).
+5. Flagging anything that doesn't fit either bucket as a soft spot.
+
+The audit document itself is durable and version-controlled; future drift will surface in the diff against this baseline.
diff --git a/docs/research/INDEX.md b/docs/research/INDEX.md
new file mode 100644
index 0000000..447c2eb
--- /dev/null
+++ b/docs/research/INDEX.md
@@ -0,0 +1,8 @@
+# research/INDEX.md
+
+| # | Title | Status | Date | Summary |
+|------|--------------------------------------------------------------------------------------|--------|------------|------------------------------------------------------------------------------------------|
+| 0001 | [OpenSearch Provider for Hyperbee.Migrations](0001-opensearch-provider.md) | Draft | 2026-05-02 | Scopes a new OpenSearch provider; surveys existing providers, OpenSearch APIs, prior-art DSLs |
+| 0002 | [OpenSearch Provider Requirements Assessment](0002-opensearch-provider-assessment.md) | Final | 2026-05-02 | Full Assessment (PM/MD/PA + Synthesis + Red-Blue + IR + Red-Blue₂); 39 findings → 20 amendments; meta-pattern: docs as fix for correctness hazards is anti-pattern; 12 P0 / 7 P1 amendments |
+| 0003 | [OpenSearch Provider Plan Assessment](0003-opensearch-plan-assessment.md) | Final | 2026-05-02 | Full Assessment of the implementation plan (PM/MD/PA + Red-Blue + IR + Red-Blue₂; Synthesis skipped); 11 P0 amendments + 18 P1 mitigations + ADR-0011 architectural amendment (parse-time template lookup → runtime); IR Red-strong outcome (4-0-3); 18-22 week project estimate |
+| 0004 | [ADR Compliance Audit — OpenSearch Provider Release](0004-adr-compliance-audit.md) | Final | 2026-05-03 | Cross-cutting audit of ADRs 0001-0017 against code + tests; 17/17 honored; 3 soft spots noted (none blocking); release-readiness PASS |
diff --git a/docs/runbooks/INDEX.md b/docs/runbooks/INDEX.md
new file mode 100644
index 0000000..ecf7e22
--- /dev/null
+++ b/docs/runbooks/INDEX.md
@@ -0,0 +1,5 @@
+# runbooks/INDEX.md
+
+| File | Purpose | Cadence |
+|------|---------|---------|
+| [opensearch-aws-validation.md](opensearch-aws-validation.md) | Manual / scheduled validation of AWS-specific behaviors (SigV4, endpoint loud-fail, ISM capability detection, credential rotation) for the OpenSearch provider. | Pre-release; nightly when AWS credentials available. |
diff --git a/docs/runbooks/opensearch-aws-validation.md b/docs/runbooks/opensearch-aws-validation.md
new file mode 100644
index 0000000..62c49ee
--- /dev/null
+++ b/docs/runbooks/opensearch-aws-validation.md
@@ -0,0 +1,191 @@
+# AWS Managed OpenSearch — Scheduled Validation Runbook
+
+**Status:** Draft v1
+**Owner:** Hyperbee Migrations maintainers
+**Cadence:** pre-release; nightly when AWS credentials are available in CI
+**Per:** R-28c (scheduled validation), R-21 (auth), R-24c (production scenarios)
+
+## Purpose
+
+Single-node Testcontainers (every PR) and 3-node multi-node Testcontainers (every PR via [`multi_node_tests.yml`](../../.github/workflows/multi_node_tests.yml)) cover the in-cluster correctness behaviors. Neither exercises the AWS-specific surface:
+
+- **SigV4 request signing** (transport-replacing auth, separate `.Aws` extension package)
+- **AWS endpoint loud-fail** behavior at startup against a real domain hostname
+- **ISM endpoint capability detection** against AWS Managed domains, which historically expose the legacy `/_opendistro/_ism` surface on older versions
+- **IRSA / instance-profile credential rotation** — credentials resolve per request via `AWSCredentials.GetCredentials()`; only a real AWS environment exercises that lifecycle
+
+This runbook is the manual-or-scheduled equivalent of `multi_node_tests.yml` for AWS-specific behaviors. Run it before each release, and as often as account access permits in between.
+
+## Prerequisites
+
+- An AWS Managed OpenSearch domain in a region you have permissions in. Free-tier `t3.small` is sufficient for smoke testing; a `t3.medium` two-AZ domain better mirrors production replica behavior.
+- IAM identity (user, role, or assumed role via STS) with at least `es:ESHttp*` against `/*`. For the ISM scenario, `es:ESHttp*` against `/_plugins/_ism/*` is also required (or `_opendistro_*` on older domains).
+- AWS credentials resolvable via the standard chain — env vars (`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` / optional `AWS_SESSION_TOKEN`), instance profile, IRSA, or `aws configure` profile.
+- The runner project's published binary (or `dotnet run`-able source). The `Hyperbee.Migrations.Providers.OpenSearch.Aws` package must be referenced.
+
+## Runner configuration
+
+```jsonc
+// runners/Hyperbee.MigrationRunner.OpenSearch/appsettings.aws-validation.json
+{
+ "OpenSearch": {
+ "ConnectionString": "https://..es.amazonaws.com",
+ "Authentication": {
+ "Mode": "AwsSigV4",
+ "Region": "us-east-1",
+ "Service": "es"
+ }
+ },
+ "Migrations": {
+ "LedgerIndex": ".migrations-aws-validation",
+ "LockIndex": ".migrations-aws-validation-lock",
+ "LockName": "validation-lock",
+ "Lock": { "Enabled": true },
+ "FromPaths": [
+ "..\\..\\..\\..\\..\\runners\\samples\\Hyperbee.Migrations.OpenSearch.Samples\\bin\\Debug\\net10.0\\Hyperbee.Migrations.OpenSearch.Samples.dll"
+ ]
+ }
+}
+```
+
+For OpenSearch Serverless, use `Service: "aoss"` and a `..aoss.amazonaws.com` endpoint.
+
+## Validation steps
+
+### 1 — Loud-fail check (negative test)
+
+Confirms the core `AddOpenSearchClient` path correctly rejects an AWS endpoint when the `.Aws` extension wasn't wired (R-21 #2).
+
+```bash
+# Run with AwsSigV4 mode but DON'T reference the .Aws extension — this won't
+# happen against a runner that depends on the extension package, but the
+# core's URL guard is the safety net for misconfigured deployments. To
+# exercise it, point a non-AWS-aware host at an AWS URL:
+DOTNET_ENVIRONMENT=aws-validation \
+ ./Hyperbee.MigrationRunner.OpenSearch \
+ --connection https://..es.amazonaws.com \
+ --auth-mode Anonymous
+```
+
+**Expected:** `AwsSigV4NotConfiguredException` at startup with the `services.AddOpenSearchAwsClient(...)` snippet in the message. Process exits non-zero before any wire request.
+
+**Pass criterion:** the exception message includes both `amazonaws.com` and `AddOpenSearchAwsClient`.
+
+### 2 — Smoke test (positive path, all v1 verbs)
+
+Run the samples against the AWS domain. Each sample exercises a different verb family.
+
+```bash
+DOTNET_ENVIRONMENT=aws-validation \
+ ./Hyperbee.MigrationRunner.OpenSearch
+```
+
+**Expected:** all 8 samples (1000–8000) complete successfully. The runner's exit code is 0.
+
+**Verify on the cluster:**
+
+```bash
+# All sample indices created
+aws es-http GET --domain /_cat/indices/sample_*?format=json
+
+# Ledger entries written, with forensic fields populated (R-06)
+aws es-http GET --domain /.migrations-aws-validation/_search?pretty
+```
+
+Each ledger entry should show:
+- `direction: "Up"`
+- `status: "succeeded"`
+- `appliedBy: "/"`
+
+If `appliedBy` shows a stable hostname (e.g., the EC2 instance id or k8s pod name), credential resolution is working through IRSA/instance profile (R-21 #4).
+
+### 3 — ISM endpoint detection
+
+Confirms the bootstrap step correctly resolves to the modern or legacy ISM surface depending on the AWS domain's version.
+
+```bash
+# Examine the bootstrapper's log output. The IsmEndpointDetectStep
+# emits an INFO log on success:
+# "ism-detect resolved to `_plugins/_ism` (modern OpenSearch ISM surface)"
+# OR
+# "ism-detect resolved to `_opendistro/_ism` (legacy opendistro ISM surface — common on older AWS Managed domains)"
+grep "ism-detect" runner.log
+```
+
+**Expected:** exactly one `ism-detect resolved` line per bootstrap. The resolved prefix matches what `aws es-http HEAD --domain /_plugins/_ism/policies` returns (200 → modern; 404 → check legacy).
+
+**If neither prefix works**, the runbook surfaces the IAM-permission failure: the bootstrap step fails with `OpenSearchProviderException` naming `es:ESHttp*` against the ISM resource ARN. Add the IAM action to the deploy role and rerun.
+
+### 4 — Bulk-load 429 chaos injection (R-24c (f))
+
+Verifies end-to-end that the bulk-load wrapper retries on 429 against a real cluster. The unit suite covers the observer's WARN-logging path (BulkAllObserverRetryTests); this step exercises the joint behavior under load.
+
+The simplest reproducible path uses a small AWS instance type (`t3.small.search`) and bursts a 50K-document bulk into the cluster. The cluster's request queue saturates, OpenSearch returns 429, the OpenSearch.Net library backs off per `BulkLoadOptions.InitialBackOff`, the wrapper logs `Bulk load: page N succeeded after R retries`, and the bulk eventually completes.
+
+```bash
+# Run the bulk-seed sample (sample 5 in the runner) against the AWS domain.
+# 50K docs at default 1000-doc batches and 8x parallelism is enough to
+# induce 429s on t3.small.search.
+DOTNET_ENVIRONMENT=aws-validation \
+ ./Hyperbee.MigrationRunner.OpenSearch \
+ --target 5000
+
+# Watch for the WARN log line:
+grep "Bulk load: page" runner.log
+```
+
+**Expected:** at least one `Bulk load: page N succeeded after R retries` line in the log (R > 0). The bulk completes; the runner exits 0.
+
+**Pass criterion:** retries observed AND bulk completes successfully. Zero retries observed on a single run is acceptable on larger instance types — record the instance type alongside the validation result.
+
+**If the bulk fails** with `RejectedExecutionException` after exhausting retries, the cluster is undersized OR `BackOffRetries` is too aggressive for the workload. Increase the instance type for validation; production deployments should size for the steady-state bulk rate, not a worst-case validation burst.
+
+### 5 — Credential rotation (long-running)
+
+Optional. If the validation runs for ≥1 hour against an IRSA-authenticated workload, the IAM session token should rotate at least once during the run without runner restart.
+
+```bash
+# Start a long-running migration scenario (e.g., bulk-load 100K docs)
+# while watching for credential refresh in the AWS SDK debug log.
+DOTNET_ENVIRONMENT=aws-validation AWS_SDK_DEBUG=true \
+ ./Hyperbee.MigrationRunner.OpenSearch &
+sleep 3700 # > 1 hour
+```
+
+**Expected:** the migration completes successfully. AWS SDK debug log shows multiple credential resolution events (one per request, with the same identity but potentially different session tokens after rotation).
+
+**Pass criterion:** no 403 / signature-mismatch errors during the run. R-21 #4 spec: "credential resolver lifetime — SigV4 signer is wired to an identity resolver that re-resolves credentials per request, not cached at client construction."
+
+## Reporting
+
+Add a single line to the release checklist after each run:
+
+```
+2026-05-XX AWS Managed OpenSearch validation: PASS (us-east-1 / domain-X / runbook v1)
+```
+
+If validation can't be performed for a release (no account access in CI; account locked; etc.), add the deferral notice instead:
+
+```
+2026-05-XX AWS Managed OpenSearch validation: DEFERRED (reason: )
+```
+
+The release process MUST include either a PASS or a DEFERRED line — never just silently skip the validation.
+
+## When validation fails
+
+Failure during step 1 (loud-fail) → core's `AddOpenSearchClient` URL guard regressed. Check the AWS-pattern matcher in `ServiceCollectionExtensions.ThrowIfAwsEndpoint`.
+
+Failure during step 2 (smoke) → look at the FIRST failing sample and which verb it tests. Compare to single-node Testcontainers behavior; AWS-specific failures usually involve auth, region mismatch, or IAM permissions on a specific endpoint (e.g., `_index_template` on older domains).
+
+Failure during step 3 (ISM detection) → the `IsmEndpointDetectStep`'s probe path is failing for non-404 reasons. Common causes: the IAM role lacks `es:ESHttp*` against `_plugins/_ism/*` (or `_opendistro_*` for older domains). The exception message names the IAM action required.
+
+Failure during step 4 (bulk 429 chaos) → if no retries are observed across multiple runs against a small instance, either the cluster has more headroom than the burst exercises (record the instance type and consider a larger burst) or the BackOffRetries config did not propagate (the unit-test suite's `BulkAllObserverRetryTests` and `BulkLoadOptionsTests` would have caught this — check that they're passing on the same commit).
+
+Failure during step 5 (rotation) → uncommon. Check the AWS SDK version pinned by the OpenSearch.Net.Auth.AwsSigV4 package; older AWSSDK.Core versions had IRSA refresh bugs. Workaround: explicit `Credentials = new InstanceProfileAWSCredentials()` with a refresh interval rather than the default chain.
+
+## Out of scope
+
+- **Full CI automation of this runbook** — deferred to v1.1 per the requirements doc Open Questions section. Requires AWS account scaffolding in CI plus credential management; not blocking v1.
+- **OpenSearch Serverless validation against a `_plugins/_ism` endpoint** — Serverless doesn't support ISM. The runbook's step 3 is skipped for `aoss` deployments.
+- **Cross-region failover testing** — out of scope for migration tooling; that's a deployment-architecture concern.
diff --git a/docs/site/aerospike.md b/docs/site/aerospike.md
index 9992064..ef4ddf6 100644
--- a/docs/site/aerospike.md
+++ b/docs/site/aerospike.md
@@ -6,9 +6,7 @@ nav_order: 8
# Aerospike Provider
-The `Hyperbee.Migrations.Providers.Aerospike` package provides Aerospike support for Hyperbee Migrations.
-It handles schema changes, index management, and data seeding through both code and resource-based migrations.
-For cross-cutting concepts like profiles, cron, and journaling, see [Concepts](concepts.md).
+The `Hyperbee.Migrations.Providers.Aerospike` package provides Aerospike support for Hyperbee Migrations. It handles schema changes, index management, and data seeding through both code and resource-based migrations. For cross-cutting concepts like profiles, cron, and journaling, see [Concepts](concepts.md).
## Installation
@@ -26,25 +24,165 @@ services.AddSingleton( sp => sp.GetRequiredService
{
- options.Namespace = "test"; // Aerospike namespace
- options.MigrationSet = "SchemaMigrations"; // set for journal records
-});
+ options.Namespace = "test"; // Aerospike namespace
+ options.MigrationSet = "SchemaMigrations"; // set for journal records
+} );
```
-## Locking
+### Provider options
+
+| Option | Type | Default |
+|--------|------|---------|
+| Namespace | string | "test" |
+| MigrationSet | string | "SchemaMigrations" |
+| LockName | string | "migration_lock" |
+| LockMaxLifetime | TimeSpan | 1 hour |
+| LockingEnabled | bool | true |
+
+### Locking
The provider uses a distributed lock stored as an Aerospike record to prevent simultaneous migration runners.
```csharp
services.AddAerospikeMigrations( options =>
{
- options.LockingEnabled = true; // default
- options.LockName = "migration_lock"; // lock record key
- options.LockMaxLifetime = TimeSpan.FromHours( 1 ); // max time-to-live
-});
+ options.LockingEnabled = true; // default
+ options.LockName = "migration_lock"; // lock record key
+ options.LockMaxLifetime = TimeSpan.FromHours( 1 ); // max time-to-live
+} );
+```
+
+## Resource layout
+
+A migration's resources live in a folder named after the migration class (or version). Statements live in `statements.json`; seed documents (optional) live in `//.json` subfolders.
+
+```
+Resources/
+ 1000-CreateInitialSchema/
+ statements.json
+ test/
+ users/
+ admin.json
+ user1.json
+ user2.json
+ 2000-AddSecondaryIndexes/
+ statements.json
+```
+
+Mark each file `EmbeddedResource` in the project file:
+
+```xml
+
+
+
+
+
+```
+
+## Statement grammar
+
+Statements use AQL-flavored syntax inside a JSON wrapper. Statement keywords are case-insensitive. Identifiers may be plain (`users`, `idx_users_email`) or backtick-quoted (`` `users.archive` ``) for names containing characters the plain-form parser does not accept.
+
+The grammar is a subset of AQL focused on the operations that make sense as migrations -- index lifecycle and intent-only declarations for set creation and bulk record I/O.
+
+### Statement summary
+
+| Family | Form |
+|--------|------|
+| Index lifecycle | `CREATE INDEX [IF NOT EXISTS] [RECREATE] [WAIT] ON . () [STRING|NUMERIC|GEO2DSPHERE]` |
+| | `DROP INDEX ` |
+| Set lifecycle | `CREATE SET .` |
+| Records | `INSERT INTO . () VALUES ()` |
+| | `DELETE FROM . WHERE PK = ''` |
+
+## Statement reference
+
+### CREATE INDEX
+
+```
+CREATE INDEX [IF NOT EXISTS] [RECREATE] [WAIT] ON . () [STRING|NUMERIC|GEO2DSPHERE]
```
-## Code Migration Example
+Creates a secondary index on a bin. Aerospike indexes are async by default (the cluster builds them in the background); use the `WAIT` flag to block the migration until the index is ready.
+
+| Flag | Meaning |
+|------|---------|
+| `IF NOT EXISTS` | Parsed for AQL-familiarity. `CREATE INDEX` is already idempotent at the Aerospike API level, so the flag is accepted but does not change behavior. |
+| `RECREATE` | Drop the index first if it already exists, then create it. Use when you need to change the bin or index type for an existing index name. |
+| `WAIT` | Block until the index is fully built across the cluster before continuing. Without `WAIT`, the statement returns as soon as the index creation request is accepted. |
+
+The index type defaults to `STRING` when omitted. Supported types:
+
+- `STRING` -- secondary index on a string bin
+- `NUMERIC` -- secondary index on an integer bin
+- `GEO2DSPHERE` -- secondary index on a GeoJSON bin (point or region)
+
+```json
+{
+ "statements": [
+ { "statement": "CREATE INDEX WAIT idx_users_email ON test.users (email) STRING" },
+ { "statement": "CREATE INDEX WAIT idx_users_active ON test.users (active) NUMERIC" },
+ { "statement": "CREATE INDEX WAIT idx_stores_location ON test.stores (location) GEO2DSPHERE" }
+ ]
+}
+```
+
+Replace an existing index in place:
+
+```json
+{
+ "statement": "CREATE INDEX RECREATE WAIT idx_users_role ON test.users (role) STRING"
+}
+```
+
+### DROP INDEX
+
+```
+DROP INDEX
+```
+
+Removes a secondary index. Note that AQL's `DROP INDEX` shape uses a space (not a dot, not `ON`) between namespace and index name -- the parser follows that convention exactly.
+
+```json
+{ "statement": "DROP INDEX test idx_users_active" }
+```
+
+### CREATE SET
+
+```
+CREATE SET .
+```
+
+Declarative intent-only statement. Aerospike creates sets implicitly on first write, so no explicit set-creation API exists at the protocol level. The provider logs an INFO message when this statement is encountered and proceeds. Use it to make set ownership explicit at the migration level, e.g., to record that a particular migration introduced a particular set.
+
+```json
+{ "statement": "CREATE SET test.audit_log" }
+```
+
+### INSERT INTO / DELETE FROM
+
+```
+INSERT INTO . () VALUES ()
+DELETE FROM . WHERE PK = ''
+```
+
+Both statements are intent-only: the parser captures the namespace and set names but does not perform the actual I/O. For seeding records, use the resource runner's `DocumentsFromAsync` method instead (see "Seed documents" below). For surgical record edits, inject `IAsyncClient` and use the client API directly from a code migration.
+
+```json
+{
+ "statements": [
+ { "statement": "INSERT INTO test.users (PK, name, email) VALUES ('user-001', 'Alice', 'a@x.com')" },
+ { "statement": "DELETE FROM test.users WHERE PK = 'user-orphan'" }
+ ]
+}
+```
+
+The provider logs each as INFO with a pointer to the supported alternative. Choose the supported path for production migrations:
+
+- Bulk seed -> `DocumentsFromAsync` (resource files)
+- Surgical edit -> code migration with `IAsyncClient` injection
+
+## Code migration example
Inject `IAsyncClient` to interact with Aerospike directly:
@@ -58,54 +196,34 @@ public class SeedData( IAsyncClient asyncClient, ILogger logger ) : Mi
await asyncClient.Put( null, cancellationToken,
new Key( "test", "users", "user-003" ),
- new Bin( "name", "Bob Johnson" ),
- new Bin( "email", "bob@example.com" ),
+ new Bin( "name", "Bob Johnson" ),
+ new Bin( "email", "bob@example.com" ),
new Bin( "active", 1 )
).ConfigureAwait( false );
}
}
```
-## Resource Migration Example
+## Resource migration example
-Use `AerospikeResourceRunner` to execute embedded resource files:
+Use `AerospikeResourceRunner` to execute embedded resource files. `StatementsFromAsync` runs the AQL statements; `DocumentsFromAsync` writes seed records.
```csharp
[Migration( 1000 )]
-public class CreateInitialSchema( AerospikeResourceRunner resourceRunner ) : Migration
+public class CreateInitialSchema( AerospikeResourceRunner runner ) : Migration
{
public override async Task UpAsync( CancellationToken cancellationToken = default )
{
- await resourceRunner.StatementsFromAsync( [
- "statements.json"
- ], cancellationToken );
+ await runner.StatementsFromAsync( ["statements.json"], cancellationToken );
- await resourceRunner.DocumentsFromAsync( [
- "test/users"
- ], cancellationToken );
+ await runner.DocumentsFromAsync( ["test/users"], cancellationToken );
}
}
```
-## Statement Syntax
+## Seed documents
-Statements use AQL syntax inside a JSON wrapper. The `WAIT` keyword blocks until the index is built.
-
-```json
-{
- "statements": [
- { "statement": "CREATE INDEX WAIT idx_users_email ON test.users (email) STRING" },
- { "statement": "CREATE INDEX WAIT idx_users_active ON test.users (active) NUMERIC" }
- ]
-}
-```
-
-Supported index types: `STRING`, `NUMERIC`, `GEO2DSPHERE`.
-
-## Document Format
-
-Documents are JSON files stored at `namespace/set/key.json`. Each file must contain an `id` or `PK`
-field that becomes the Aerospike record key. All other properties are stored as bins.
+Seed documents are JSON files stored at `//.json`. Each file must contain an `id` (or `PK`) field -- this becomes the Aerospike record key. All other top-level properties are stored as bins.
```
Resources/1000-CreateInitialSchema/
@@ -119,19 +237,27 @@ Example document (`test/users/admin.json`):
```json
{
- "id": "user-admin",
- "name": "Admin User",
- "email": "admin@example.com",
+ "id": "user-admin",
+ "name": "Admin User",
+ "email": "admin@example.com",
"active": 1
}
```
-## Provider Options Reference
+The resource runner discovers documents by walking the `/` path passed to `DocumentsFromAsync`. Each `.json` file becomes one record; the `id`/`PK` field is removed from the bin set and used as the record key.
+
+## Locking semantics
+
+The provider uses a single Aerospike record as a distributed lock. Acquisition uses a generation-aware put so two runners cannot both claim the lock; the holder's heartbeat refreshes the record TTL. `LockMaxLifetime` caps total wall-clock hold so a hung migration cannot lock forever -- when reached, the in-flight migration is canceled cleanly via the cancellation token.
+
+## Production deployment
+
+The companion runner project (`runners/Hyperbee.MigrationRunner.Aerospike`) is the recommended deployment shape. See [Runners](runners.md) for CLI flags and configuration.
+
+## Samples
+
+`runners/samples/Hyperbee.Migrations.Aerospike.Samples` ships sample migrations covering the full statement surface plus seed-document patterns:
-| Option | Type | Default |
-|--------------------|------------|----------------------|
-| Namespace | string | "test" |
-| MigrationSet | string | "SchemaMigrations" |
-| LockName | string | "migration_lock" |
-| LockMaxLifetime | TimeSpan | 1 hour |
-| LockingEnabled | bool | true |
+- `1000-CreateInitialSchema` -- `CREATE INDEX WAIT` for users; `DocumentsFromAsync` for seeded users
+- `2000-AddSecondaryIndexes` -- additional `CREATE INDEX WAIT` statements for products
+- `3000-SeedData` -- code-migration pattern using `IAsyncClient.Put` directly
diff --git a/docs/site/getting-started.md b/docs/site/getting-started.md
index a15c634..88f8b86 100644
--- a/docs/site/getting-started.md
+++ b/docs/site/getting-started.md
@@ -16,10 +16,11 @@ Install the NuGet package for your database provider:
dotnet add package Hyperbee.Migrations.Providers.Aerospike
dotnet add package Hyperbee.Migrations.Providers.Couchbase
dotnet add package Hyperbee.Migrations.Providers.MongoDB
+dotnet add package Hyperbee.Migrations.Providers.OpenSearch
dotnet add package Hyperbee.Migrations.Providers.Postgres
```
-You only need the package for the provider you are using.
+You only need the package for the provider you are using. For AWS Managed OpenSearch (SigV4 request signing), also reference the optional `Hyperbee.Migrations.Providers.OpenSearch.Aws` extension package.
## Create Your First Migration
diff --git a/docs/site/index.md b/docs/site/index.md
index 606e368..752970d 100644
--- a/docs/site/index.md
+++ b/docs/site/index.md
@@ -14,9 +14,9 @@ are discovered, ordered, and executed automatically.
## Key Features
-- Supports **Aerospike**, **Couchbase**, **MongoDB**, and **PostgreSQL**
+- Supports **Aerospike**, **Couchbase**, **MongoDB**, **OpenSearch**, and **PostgreSQL**
- Code migrations with full dependency injection
-- Resource migrations with embedded SQL, N1QL, AQL, and MongoDB commands
+- Resource migrations with embedded SQL, N1QL, AQL, MongoDB commands, and OpenSearch DDL
- Document seeding from JSON files
- Distributed locking to prevent concurrent migrations
- Profile-based environment scoping
diff --git a/docs/site/opensearch-template-propagation-faq.md b/docs/site/opensearch-template-propagation-faq.md
new file mode 100644
index 0000000..16c58e0
--- /dev/null
+++ b/docs/site/opensearch-template-propagation-faq.md
@@ -0,0 +1,143 @@
+---
+layout: default
+title: OpenSearch FAQ - Template Propagation
+parent: OpenSearch Provider
+nav_order: 1
+---
+
+# Template Propagation FAQ - OpenSearch
+
+The single most common production question on OpenSearch migrations is some form of:
+
+> I changed my mapping (or template, or settings, or analyzer). Why isn't existing data seeing the change?
+
+This page is the canonical answer.
+
+## Why this surprises people
+
+If you're coming from a relational database, you probably expect "alter the schema, the data conforms." OpenSearch doesn't work that way. Each document is indexed against the mapping that existed at the time of write. Changing the mapping changes how new documents get indexed; it does NOT reindex existing ones.
+
+The same applies to:
+
+- **Index templates and component templates.** Templates apply at index-creation time. Existing indices that matched a previous template aren't retroactively rewritten when you update the template.
+- **Static index settings.** number_of_shards, codec, analysis chain - any setting marked "static" is fixed at creation. UPDATE SETTINGS without CLOSE rejects them; UPDATE SETTINGS with CLOSE applies them only to the index in question, not to historic data.
+- **Analyzers.** Changing an analyzer changes how new tokens get produced for new documents. Existing documents still carry the tokens they were indexed with.
+
+The provider's UPDATE MAPPING dispatcher emits a diagnostic INFO log on every successful mapping update naming this gotcha and pointing at the answer (below). If you're seeing that log, the diagnostic is working as intended.
+
+## The answer: MIGRATE INDEX
+
+```
+MIGRATE INDEX users-v1 TO users-v2 WITH TEMPLATE users-template VIA ALIAS users-current
+```
+
+That one line is the canonical mapping-propagation pattern. It expands at parse time into:
+
+1. `CREATE INDEX users-v2` with the body fetched from the live `users-template`.
+2. `REINDEX FROM users-v1 TO users-v2` with `op_type: create` auto-injected (so retries don't double-write).
+3. `ALIAS SWAP users-current FROM users-v1 TO users-v2` atomically.
+
+Application reads come through the alias `users-current`. After the swap, the alias points at v2. Zero downtime; no writes lost; mapping changes are now applied to the data.
+
+## Step-by-step walkthrough
+
+### Before
+
+You have an index `users-v1` with the old mapping, and your application reads from the alias `users-current`:
+
+```
+users-v1 <-- users-current (alias)
+```
+
+### Author the new shape
+
+Update the template to reflect the new mapping:
+
+```
+CREATE TEMPLATE users-template WITH BODY @users-template-v2.json
+```
+
+The template file holds the new mapping. New indices matching the template's `index_patterns` will pick it up.
+
+But existing data is still on v1 with the old shape. UPDATE MAPPING ON users-v1 won't retroactively rewrite anything.
+
+### Run the migration
+
+```
+MIGRATE INDEX users-v1 TO users-v2 WITH TEMPLATE users-template VIA ALIAS users-current
+```
+
+What happens at dispatch time:
+
+1. **CREATE INDEX users-v2** - the provider fetches the live template body and uses it as the new index shape.
+2. **REINDEX FROM users-v1 TO users-v2** - the cluster bulk-copies documents from v1 to v2. New mapping applies; documents that don't fit the new mapping fail explicitly (rather than silently mis-typing).
+3. **ALIAS SWAP users-current FROM users-v1 TO users-v2** - one atomic _aliases body containing both the remove-from-v1 and add-to-v2 actions. The cluster atomically rejects the whole body if v1 is no longer where the alias points (no TOCTOU window).
+
+### After
+
+```
+users-v1 (still exists, no alias)
+users-v2 <-- users-current (alias)
+```
+
+Application reads through the alias now hit v2. v1 is still around for safety; you can drop it in a follow-up migration once you're confident.
+
+## Common variations
+
+### Inline body instead of a template
+
+If you don't want to manage the new shape via an index template:
+
+```
+MIGRATE INDEX users-v1 TO users-v2 WITH BODY $newShape VIA ALIAS users-current
+```
+
+with the new mapping in the `bodies` section of the same statement.
+
+### Without the alias swap
+
+```
+MIGRATE INDEX users-v1 TO users-v2 WITH TEMPLATE users-template
+```
+
+Creates v2 and reindexes, but leaves the alias alone. Use this if your application doesn't read through an alias, or if you intend to retain both indices for read-traffic comparison before cutover.
+
+### When the source has active writes during the migration
+
+The standard reindex captures only documents present at the time it starts. Writes against v1 during the reindex do NOT make it into v2 automatically. For write-during-migration scenarios, two patterns:
+
+- **Dual write**: application writes to both v1 and v2 during the migration window, then reads switch over.
+- **Post-swap delta reindex**: rerun the reindex from a saved checkpoint after the swap to catch v1 writes that arrived during the window.
+
+The composite verb explicitly does NOT solve the dual-write problem - that's an application concern, not a migration tool concern.
+
+## Why not just UPDATE MAPPING?
+
+You can use UPDATE MAPPING to add fields to an existing index. New documents will have the new fields available; queries that filter on the new field will work for those new documents.
+
+You CANNOT use UPDATE MAPPING to:
+
+- Change the type of an existing field (string -> integer, keyword -> text, etc.)
+- Remove a field
+- Change an analyzer's output for existing documents
+- Apply a new dynamic-mapping policy to historic data
+
+Those changes require a reindex. MIGRATE INDEX is the canonical way to do that reindex safely.
+
+## Why not just reindex by hand?
+
+You can. The OpenSearch provider's REINDEX verb is a first-class statement; you can write CREATE + REINDEX + ALIAS SWAP as three separate statements. Sample 2 (`AliasSwapReindexHandComposed`) shows that long-form pattern.
+
+The reasons MIGRATE INDEX is the recommended pattern:
+
+- **Safe defaults are baked in.** `op_type: create` is auto-injected on REINDEX so retried runs don't double-write. The ALIAS SWAP precondition is in-body so there's no TOCTOU window.
+- **Atomicity is explicit.** The sub-statements run as a halting sequence; failure of any sub-statement halts the rest and feeds R-19 partial-rollback ledger semantics.
+- **The intent is readable.** "MIGRATE INDEX users-v1 TO users-v2" reads as the operation it is. Three separate statements bury the intent across multiple lines.
+- **Template resolution is offline-pure.** The parser carries the template name unresolved; the runtime fetches the live template body just before CREATE INDEX dispatch (ADR-0015). Authors can update the template independently of the migration that uses it.
+
+## Related
+
+- [OpenSearch Provider](opensearch.md) - main provider page
+- [Resource Migrations](resource-migrations.md) - file-based migration patterns
+- [Concepts](concepts.md) - cross-cutting concepts (profiles, contexts, journaling, locking)
+- Sample 6 in `runners/samples/Hyperbee.Migrations.OpenSearch.Samples` - working demonstration of the full pattern
diff --git a/docs/site/opensearch.md b/docs/site/opensearch.md
new file mode 100644
index 0000000..4bc2a8c
--- /dev/null
+++ b/docs/site/opensearch.md
@@ -0,0 +1,684 @@
+---
+layout: default
+title: OpenSearch Provider
+nav_order: 11
+---
+
+# OpenSearch Provider
+
+The `Hyperbee.Migrations.Providers.OpenSearch` package provides OpenSearch support for Hyperbee Migrations. It manages indices, mappings, settings, aliases, templates, ISM policies, and reindex orchestration through resource-based migrations using a Parlot-parsed statement grammar. AWS Managed OpenSearch Service is supported via the optional `Hyperbee.Migrations.Providers.OpenSearch.Aws` extension package. For cross-cutting concepts, see [Concepts](concepts.md).
+
+## Installation
+
+```shell
+dotnet add package Hyperbee.Migrations.Providers.OpenSearch
+```
+
+For AWS Managed OpenSearch (SigV4 request signing):
+
+```shell
+dotnet add package Hyperbee.Migrations.Providers.OpenSearch.Aws
+```
+
+## Configuration
+
+Register the OpenSearch client and migration services with the DI container. The two registration paths are mutually exclusive: call `AddOpenSearchClient` for header-based auth (Basic, ApiKey, mTLS, Anonymous) OR `AddOpenSearchAwsClient` for AWS SigV4. Each guards against the other being called first.
+
+```csharp
+// Local dev, on-prem, or any non-AWS deployment
+services.AddOpenSearchClient( new Uri( "http://localhost:9200" ), auth =>
+{
+ auth.Mode = OpenSearchAuthenticationMode.Basic;
+ auth.UserName = "admin";
+ auth.Password = "password";
+} );
+
+services.AddOpenSearchMigrations( options =>
+{
+ options.LedgerIndex = ".migrations"; // default
+ options.LockIndex = ".migrations-lock"; // default
+ options.LockingEnabled = true;
+} );
+```
+
+For AWS Managed OpenSearch:
+
+```csharp
+services.AddOpenSearchAwsClient( new Uri( "https://my-domain.us-east-1.es.amazonaws.com" ), aws =>
+{
+ aws.Region = "us-east-1";
+ aws.Service = "es"; // "aoss" for OpenSearch Serverless
+} );
+
+services.AddOpenSearchMigrations( /* migration options */ );
+```
+
+### Provider options
+
+| Option | Type | Default |
+|--------|------|---------|
+| LedgerIndex | string | ".migrations" |
+| LockIndex | string | ".migrations-lock" |
+| LockName | string | "migration_lock" |
+| LockingEnabled | bool | false |
+| ClusterHealthThreshold | enum | Yellow |
+| WaitMode | enum | PerStatement |
+| RequireUnsafeJustification | bool | false |
+| ContextResolutionPolicy | enum | SkipIfUnset |
+| ActiveContext | string | null |
+| ImplicitWaitTimeout | TimeSpan | 30 seconds |
+| LockRenewInterval | TimeSpan | 30 seconds |
+| LockStaleAfter | TimeSpan | 60 seconds |
+| LockMaxLifetime | TimeSpan | 1 hour |
+| AssumeIndicesExist | bool | false |
+| ForceResume | bool | false |
+
+### WithProductionDefaults
+
+`WithProductionDefaults()` flips four options to production-safe values BEFORE the user's configuration callback runs, so explicit overrides still win:
+
+| Option | Library default | Production default |
+|--------|-----------------|--------------------|
+| ClusterHealthThreshold | Yellow | Green |
+| WaitMode | PerStatement | PerMigration |
+| RequireUnsafeJustification | false | true |
+| ContextResolutionPolicy | SkipIfUnset | RequireExplicit |
+
+```csharp
+services
+ .WithProductionDefaults()
+ .AddOpenSearchMigrations( options =>
+ {
+ // Per-option overrides win over the production defaults above.
+ options.WaitMode = WaitMode.Off;
+ } );
+```
+
+## Resource layout
+
+A migration's resources live in a folder named after the migration class (or version). The folder ships as embedded resources in the migration project's csproj.
+
+```
+Resources/
+ 1000-CreateInitialIndex/
+ statements.json
+ 3000-ComponentAndIndexTemplate/
+ statements.json
+ bodies/
+ common-mappings-component.json
+ 4000-IsmPolicyAndApply/
+ statements.json
+ hot-warm-cold-policy.json
+```
+
+Mark each file `EmbeddedResource` in the project file:
+
+```xml
+
+
+
+
+
+```
+
+The migration class loads its resources via `OpenSearchResourceRunner`:
+
+```csharp
+[Migration( 1000 )]
+public class CreateInitialIndex( OpenSearchResourceRunner runner ) : Migration
+{
+ public override Task UpAsync( CancellationToken ct = default )
+ => runner.StatementsFromAsync( "statements.json", ct );
+}
+```
+
+## Statement grammar
+
+The grammar is a small SQL-flavored DSL. Each statement is one line; one or more statements live inside a `statements.json` resource. Statement keywords are case-insensitive. Identifiers may be plain (`users`, `users-v1`, `users.archive`) or backtick-quoted (`` `users.v2` ``) for names containing characters the plain-form parser does not accept. The grammar is offline-pure (ADR-0015) -- no network I/O at parse time. Anything that needs the live cluster (template resolution, version checks) happens at dispatch time.
+
+Durations use `` (e.g., `30s`, `5m`, `2h`). Pure integers are rejected -- the suffix is required.
+
+### Statement summary
+
+| Family | Form |
+|--------|------|
+| Index lifecycle | `CREATE INDEX [IF NOT EXISTS] [WITH BODY $body] [NO WAIT("")]` |
+| | `DROP INDEX [IF EXISTS]` |
+| | `UPDATE MAPPING ON [WITH BODY $body]` |
+| | `UPDATE SETTINGS ON [CLOSE] [WITH BODY $body] [NO WAIT("")]` |
+| | `REFRESH ` |
+| Alias | `ALIAS SWAP FROM TO [NO WAIT("")]` |
+| | `ALIAS ADD ON ` |
+| | `ALIAS REMOVE ON ` |
+| Reindex | `REINDEX [UNSAFE("")] FROM TO [WITH BODY $body] [NO WAIT("")]` |
+| Composite | `MIGRATE INDEX TO [WITH TEMPLATE | WITH BODY $body] [VIA ALIAS ] [TIMEOUT ]` |
+| Templates | `CREATE TEMPLATE [WITH BODY $body]` |
+| | `CREATE COMPONENT [WITH BODY $body]` |
+| | `DROP TEMPLATE [IF EXISTS]` |
+| | `DROP COMPONENT [IF EXISTS]` |
+| ISM | `CREATE POLICY [WITH BODY $body]` |
+| | `APPLY POLICY TO [NO WAIT("")]` |
+| Cluster waits | `WAIT FOR [ON ] [TIMEOUT ]` |
+| | `WAIT UNTIL TASK COMPLETE [TIMEOUT