diff --git a/.github/workflows/terraform-apply-production.yml b/.github/workflows/terraform-apply-production.yml
deleted file mode 100644
index e4322b4..0000000
--- a/.github/workflows/terraform-apply-production.yml
+++ /dev/null
@@ -1,154 +0,0 @@
----
-# infra — gated Terraform apply for the PRODUCTION Cloudflare workspace.
-#
-# APPROVAL MODEL: workflow_dispatch + GitHub Environment "production"
-# with required reviewers. No push trigger. No "promote from staging"
-# trigger. Every production apply is a separate, deliberate decision
-# made by a human reviewer on a human-triggered run.
-#
-# Confirm phrase is stricter than staging — operator must type a
-# matching staging RUN_ID so they cannot apply prod without having
-# first applied + observed the same change in staging.
-#
-# Security note: every GHA expression consumed in a run: block is
-# wrapped through env: to prevent script injection.
-
-name: terraform-apply-production
-
-on:
- workflow_dispatch:
- inputs:
- confirm:
- description: 'Type APPLY-PRODUCTION to confirm'
- required: true
- type: string
- staging_run_id:
- description: 'GH Actions run_id of the matching staging apply (must be a numeric id)'
- required: true
- type: string
-
-permissions:
- contents: read
-
-concurrency:
- group: terraform-apply-production
- cancel-in-progress: false # never cancel an in-flight apply
-
-env:
- TF_VERSION: '1.9.8'
- TF_IN_AUTOMATION: 'true'
- TF_ENV: 'production'
- CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }}
- CLOUDFLARE_API_KEY: ${{ secrets.CLOUDFLARE_API_KEY }}
- AWS_ACCESS_KEY_ID: ${{ secrets.TF_STATE_R2_ACCESS_KEY_ID }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.TF_STATE_R2_SECRET_ACCESS_KEY }}
- AWS_REGION: 'auto'
- CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
-
-jobs:
- guard:
- name: confirm-input + staging-precedent guard
- runs-on: ubuntu-latest
- env:
- CONFIRM_INPUT: ${{ inputs.confirm }}
- STAGING_RUN_ID: ${{ inputs.staging_run_id }}
- steps:
- - name: Reject if confirm phrase wrong
- run: |
- if [ "${CONFIRM_INPUT}" != "APPLY-PRODUCTION" ]; then
- echo "::error::confirm input must be exactly 'APPLY-PRODUCTION'"
- exit 1
- fi
-
- - name: Reject if staging_run_id is not numeric
- run: |
- # ref-injection mitigation: validate strictly before any use.
- case "${STAGING_RUN_ID}" in
- ''|*[!0-9]*)
- echo "::error::staging_run_id must be a numeric GH Actions run id (got '${STAGING_RUN_ID}')"
- exit 1
- ;;
- esac
-
- - name: Verify staging run exists + succeeded
- env:
- GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- # STAGING_RUN_ID already validated as numeric above; safe to use.
- run: |
- conclusion=$(gh run view "${STAGING_RUN_ID}" --repo "${GITHUB_REPOSITORY}" --json conclusion --jq '.conclusion')
- name=$(gh run view "${STAGING_RUN_ID}" --repo "${GITHUB_REPOSITORY}" --json name --jq '.name')
- if [ "${name}" != "terraform-apply-staging" ]; then
- echo "::error::staging_run_id ${STAGING_RUN_ID} is not a terraform-apply-staging run (got: ${name})"
- exit 1
- fi
- if [ "${conclusion}" != "success" ]; then
- echo "::error::staging_run_id ${STAGING_RUN_ID} did not succeed (conclusion: ${conclusion})"
- exit 1
- fi
- echo "staging precedent ✓ (run ${STAGING_RUN_ID} = success)"
-
- apply:
- name: apply production
- needs: guard
- runs-on: ubuntu-latest
- # GitHub Environment "production" must be configured with Required
- # Reviewers — operator sets this up at repo Settings → Environments →
- # production → Deployment protection rules. This is the second gate
- # on top of the confirm input + staging-precedent checks above.
- environment: production
- defaults:
- run:
- working-directory: terraform/cloudflare
- steps:
- - uses: actions/checkout@v6
-
- - uses: hashicorp/setup-terraform@v3
- with:
- terraform_version: ${{ env.TF_VERSION }}
-
- - name: Verify operator secrets are set
- run: |
- missing=""
- [ -z "${CLOUDFLARE_EMAIL}" ] && missing="${missing} CLOUDFLARE_EMAIL"
- [ -z "${CLOUDFLARE_API_KEY}" ] && missing="${missing} CLOUDFLARE_API_KEY"
- [ -z "${AWS_ACCESS_KEY_ID}" ] && missing="${missing} TF_STATE_R2_ACCESS_KEY_ID"
- [ -z "${AWS_SECRET_ACCESS_KEY}" ] && missing="${missing} TF_STATE_R2_SECRET_ACCESS_KEY"
- [ -z "${CF_ACCOUNT_ID}" ] && missing="${missing} CF_ACCOUNT_ID"
- if [ -n "${missing}" ]; then
- echo "::error::Operator action required — these repo secrets are not set:${missing}"
- echo "::error::See https://github.com/InstaNode-dev/infra/blob/master/terraform/cloudflare/README.md#bootstrap-one-time"
- exit 1
- fi
-
- - name: terraform init
- run: |
- terraform init \
- -backend-config="endpoints={s3=\"https://${CF_ACCOUNT_ID}.r2.cloudflarestorage.com\"}" \
- -backend-config="workspace_key_prefix=${TF_ENV}"
-
- - name: terraform workspace select
- run: terraform workspace select "${TF_ENV}"
-
- - name: terraform plan
- run: |
- terraform plan \
- -var-file="${TF_ENV}.auto.tfvars" \
- -no-color \
- -out=tfplan.bin
-
- - name: terraform apply
- run: terraform apply -no-color tfplan.bin
-
- - name: Surface non-sensitive outputs (ids only, NO token values)
- run: |
- terraform output -no-color account_id || true
- terraform output -no-color zone_id || true
- terraform output -no-color deploy_token_id || true
- terraform output -no-color admin_tunnel_token_id || true
-
- - name: Reminder
- run: |
- echo "::notice::PRODUCTION APPLY COMPLETE."
- echo "::notice::If tokens were created or rotated, run on an operator workstation:"
- echo "::notice:: make install-secrets ENV=production"
- echo "::notice::Confirm the CF dashboard audit log shows the change before revoking the prior token."
diff --git a/.github/workflows/terraform-apply-staging.yml b/.github/workflows/terraform-apply-staging.yml
deleted file mode 100644
index bceb50b..0000000
--- a/.github/workflows/terraform-apply-staging.yml
+++ /dev/null
@@ -1,120 +0,0 @@
----
-# infra — gated Terraform apply for the STAGING Cloudflare workspace.
-#
-# APPROVAL MODEL: workflow_dispatch ONLY. Never on push, never on merge,
-# never auto-promoted from a previous apply. Operator deliberately
-# triggers this from the Actions tab.
-#
-# Why split per env: staging and production must not share an apply
-# trigger. Splitting prevents a "promote-on-success" pipeline from
-# ever existing for production — every prod apply is a separate human
-# decision (see terraform-apply-production.yml).
-#
-# Security note: every GHA expression consumed in a run: block is
-# wrapped through env: to prevent script injection.
-
-name: terraform-apply-staging
-
-on:
- workflow_dispatch:
- inputs:
- confirm:
- description: 'Type APPLY-STAGING to confirm'
- required: true
- type: string
-
-permissions:
- contents: read
-
-concurrency:
- group: terraform-apply-staging
- cancel-in-progress: false # never cancel an in-flight apply
-
-env:
- TF_VERSION: '1.9.8'
- TF_IN_AUTOMATION: 'true'
- TF_ENV: 'staging'
- # Global Key via EMAIL+KEY env vars (CLOUDFLARE_API_TOKEN forces Bearer
- # which Global Keys fail on Rulesets/R2/account-scoped endpoints — 9106).
- CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }}
- CLOUDFLARE_API_KEY: ${{ secrets.CLOUDFLARE_API_KEY }}
- AWS_ACCESS_KEY_ID: ${{ secrets.TF_STATE_R2_ACCESS_KEY_ID }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.TF_STATE_R2_SECRET_ACCESS_KEY }}
- AWS_REGION: 'auto'
- CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
-
-jobs:
- guard:
- name: confirm-input guard
- runs-on: ubuntu-latest
- env:
- CONFIRM_INPUT: ${{ inputs.confirm }}
- steps:
- - name: Reject if confirm phrase wrong
- run: |
- if [ "${CONFIRM_INPUT}" != "APPLY-STAGING" ]; then
- echo "::error::confirm input must be exactly 'APPLY-STAGING'"
- exit 1
- fi
-
- apply:
- name: apply staging
- needs: guard
- runs-on: ubuntu-latest
- environment: staging
- defaults:
- run:
- working-directory: terraform/cloudflare
- steps:
- - uses: actions/checkout@v6
-
- - uses: hashicorp/setup-terraform@v3
- with:
- terraform_version: ${{ env.TF_VERSION }}
-
- - name: Verify operator secrets are set
- run: |
- missing=""
- [ -z "${CLOUDFLARE_EMAIL}" ] && missing="${missing} CLOUDFLARE_EMAIL"
- [ -z "${CLOUDFLARE_API_KEY}" ] && missing="${missing} CLOUDFLARE_API_KEY"
- [ -z "${AWS_ACCESS_KEY_ID}" ] && missing="${missing} TF_STATE_R2_ACCESS_KEY_ID"
- [ -z "${AWS_SECRET_ACCESS_KEY}" ] && missing="${missing} TF_STATE_R2_SECRET_ACCESS_KEY"
- [ -z "${CF_ACCOUNT_ID}" ] && missing="${missing} CF_ACCOUNT_ID"
- if [ -n "${missing}" ]; then
- echo "::error::Operator action required — these repo secrets are not set:${missing}"
- echo "::error::See https://github.com/InstaNode-dev/infra/blob/master/terraform/cloudflare/README.md#bootstrap-one-time"
- exit 1
- fi
-
- - name: terraform init
- run: |
- terraform init \
- -backend-config="endpoints={s3=\"https://${CF_ACCOUNT_ID}.r2.cloudflarestorage.com\"}" \
- -backend-config="workspace_key_prefix=${TF_ENV}"
-
- - name: terraform workspace select
- run: terraform workspace select "${TF_ENV}"
-
- - name: terraform plan
- run: |
- terraform plan \
- -var-file="${TF_ENV}.auto.tfvars" \
- -no-color \
- -out=tfplan.bin
-
- - name: terraform apply
- run: terraform apply -no-color tfplan.bin
-
- - name: Surface non-sensitive outputs (ids only, NO token values)
- run: |
- terraform output -no-color account_id || true
- terraform output -no-color zone_id || true
- terraform output -no-color deploy_token_id || true
- terraform output -no-color admin_tunnel_token_id || true
-
- - name: Reminder
- run: |
- echo "::notice::STAGING APPLY COMPLETE."
- echo "::notice::If tokens were created or rotated, run on an operator workstation:"
- echo "::notice:: make install-secrets ENV=staging"
- echo "::notice::Promoting to production is a SEPARATE manual decision via terraform-apply-production.yml."
diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml
deleted file mode 100644
index f301985..0000000
--- a/.github/workflows/terraform.yml
+++ /dev/null
@@ -1,179 +0,0 @@
----
-# infra — Terraform fmt + validate + plan for CF resources.
-#
-# Runs on every push to master and on PRs touching terraform/**.
-# Plan is read-only. Apply is split into per-env manual workflows
-# (terraform-apply-staging.yml, terraform-apply-production.yml).
-# This file NEVER applies — see those workflows for the apply path.
-#
-# Posts the plan diff as a PR comment so reviewers see what apply
-# would do without granting CI apply rights.
-#
-# Security note: all GHA expressions consumed in run: blocks are
-# referenced through env vars to prevent script injection.
-
-name: terraform
-
-on:
- push:
- branches: [master]
- paths:
- - 'terraform/**'
- - '.github/workflows/terraform*.yml'
- pull_request:
- paths:
- - 'terraform/**'
- - '.github/workflows/terraform*.yml'
- workflow_dispatch:
-
-permissions:
- contents: read
- pull-requests: write # for the plan comment
-
-concurrency:
- group: terraform-plan-${{ github.ref }}
- cancel-in-progress: true
-
-env:
- TF_VERSION: '1.9.8'
- TF_IN_AUTOMATION: 'true'
-
-jobs:
- fmt-validate:
- name: fmt + validate
- runs-on: ubuntu-latest
- defaults:
- run:
- working-directory: terraform/cloudflare
- steps:
- - uses: actions/checkout@v6
-
- - uses: hashicorp/setup-terraform@v3
- with:
- terraform_version: ${{ env.TF_VERSION }}
-
- - name: terraform fmt -check
- run: terraform fmt -check -recursive
-
- - name: terraform init (backend-bypassed)
- run: terraform init -backend=false
-
- - name: terraform validate
- run: terraform validate -no-color
-
- plan:
- name: plan (${{ matrix.env }})
- needs: fmt-validate
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- env: [staging, production]
- defaults:
- run:
- working-directory: terraform/cloudflare
- # CF creds + state-backend creds passed in via env, not inlined in run:.
- env:
- # Global Key via EMAIL+KEY env vars (provider uses X-Auth-* headers).
- # NOT CLOUDFLARE_API_TOKEN — that's Bearer-only and Global Keys fail
- # Bearer auth on Rulesets / R2 / account-scoped endpoints (9106).
- CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }}
- CLOUDFLARE_API_KEY: ${{ secrets.CLOUDFLARE_API_KEY }}
- AWS_ACCESS_KEY_ID: ${{ secrets.TF_STATE_R2_ACCESS_KEY_ID }}
- AWS_SECRET_ACCESS_KEY: ${{ secrets.TF_STATE_R2_SECRET_ACCESS_KEY }}
- AWS_REGION: 'auto'
- CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
- TF_ENV: ${{ matrix.env }}
- steps:
- - uses: actions/checkout@v6
-
- - uses: hashicorp/setup-terraform@v3
- with:
- terraform_version: ${{ env.TF_VERSION }}
-
- - name: Verify operator secrets are set
- # Bootstrap chicken-and-egg: plan needs CF + R2-HMAC creds, but
- # those are operator-only one-time setup (see README §Bootstrap).
- # Without this guard the failure mode is a cryptic AWS-IAM stack
- # trace from `terraform init`. With it, the error is one line
- # pointing at the README and the exact missing variable names.
- run: |
- missing=""
- [ -z "${CLOUDFLARE_EMAIL}" ] && missing="${missing} CLOUDFLARE_EMAIL"
- [ -z "${CLOUDFLARE_API_KEY}" ] && missing="${missing} CLOUDFLARE_API_KEY"
- [ -z "${AWS_ACCESS_KEY_ID}" ] && missing="${missing} TF_STATE_R2_ACCESS_KEY_ID"
- [ -z "${AWS_SECRET_ACCESS_KEY}" ] && missing="${missing} TF_STATE_R2_SECRET_ACCESS_KEY"
- [ -z "${CF_ACCOUNT_ID}" ] && missing="${missing} CF_ACCOUNT_ID"
- if [ -n "${missing}" ]; then
- echo "::error::Operator action required — these repo secrets are not set:${missing}"
- echo "::error::See https://github.com/InstaNode-dev/infra/blob/master/terraform/cloudflare/README.md#bootstrap-one-time"
- exit 1
- fi
- echo "all 5 operator secrets present"
-
- - name: terraform init
- run: |
- terraform init \
- -backend-config="endpoints={s3=\"https://${CF_ACCOUNT_ID}.r2.cloudflarestorage.com\"}" \
- -backend-config="workspace_key_prefix=${TF_ENV}"
-
- - name: terraform workspace select-or-create
- run: terraform workspace select "${TF_ENV}" 2>/dev/null || terraform workspace new "${TF_ENV}"
-
- - name: terraform plan
- id: plan
- run: |
- set +e
- terraform plan \
- -var-file="${TF_ENV}.auto.tfvars" \
- -no-color \
- -out=tfplan.bin \
- -detailed-exitcode 2>&1 | tee /tmp/plan.out
- ec=${PIPESTATUS[0]}
- echo "exitcode=${ec}" >> "$GITHUB_OUTPUT"
- # 0 = no changes, 2 = changes, 1 = error
- [ "${ec}" -eq 1 ] && exit 1 || exit 0
-
- - name: Comment plan on PR
- if: github.event_name == 'pull_request'
- uses: actions/github-script@v7
- env:
- PLAN_ENV: ${{ matrix.env }}
- PLAN_CODE: ${{ steps.plan.outputs.exitcode }}
- RUN_ID: ${{ github.run_id }}
- with:
- script: |
- const fs = require('fs');
- let plan = fs.readFileSync('/tmp/plan.out', 'utf8');
- if (plan.length > 60000) {
- plan = plan.slice(0, 60000) + '\n\n... (truncated; full plan in job log)';
- }
- const env = process.env.PLAN_ENV;
- const code = process.env.PLAN_CODE;
- const verdict = code === '0' ? '✅ no changes'
- : code === '2' ? '🟡 changes present — review before manual apply'
- : '❌ plan failed';
- const body = [
- `### Terraform plan — \`${env}\``,
- verdict,
- '',
- 'plan output
',
- '',
- '```hcl',
- plan,
- '```',
- '',
- ' ',
- '',
- `_Posted by terraform.yml run ${process.env.RUN_ID}. Apply requires manual trigger of terraform-apply-${env}.yml._`,
- ].join('\n');
- await github.rest.issues.createComment({
- issue_number: context.issue.number,
- owner: context.repo.owner,
- repo: context.repo.repo,
- body,
- });
-
-# (cf/staging-drop-cache touched this file so the Lint + schema-check
-# k8s manifests required check on the infra repo fires for this PR.
-# No semantic change.)
diff --git a/.github/workflows/wrangler-build-staging-images.yml b/.github/workflows/wrangler-build-staging-images.yml
deleted file mode 100644
index c53ce26..0000000
--- a/.github/workflows/wrangler-build-staging-images.yml
+++ /dev/null
@@ -1,203 +0,0 @@
----
-# infra — Build custom Docker images for CF Containers (staging only).
-#
-# Builds images that don't ship a usable upstream:
-# - pg-platform: postgres + pgvector + all 63 platform migrations baked in
-#
-# api / worker / provisioner images are built by their own repos' deploy.yml
-# (which now also pushes :staging — see api/.github/workflows/deploy.yml).
-# This workflow handles only the "wrapped upstream image" cases.
-#
-# Triggers:
-# - workflow_dispatch (with service input)
-# - daily cron 09:00 UTC (to pick up migrations merged in api repo)
-# - push to master touching infra/wrangler/pg-platform/**
-# - repository_dispatch event "migrations-changed" from the api repo
-#
-# Security: all GHA expressions consumed in run: blocks are wrapped
-# through env: to prevent script injection.
-
-name: wrangler-build-staging-images
-
-on:
- workflow_dispatch:
- inputs:
- service:
- description: 'Which custom image to build (or "all")'
- required: true
- type: choice
- default: 'all'
- options:
- - all
- - pg-platform
- - mongodb
- - redis-provision
- - nats
- push:
- branches: [master]
- paths:
- - 'wrangler/pg-platform/**'
- - 'wrangler/mongodb/**'
- - 'wrangler/redis-provision/**'
- - 'wrangler/nats/**'
- - '.github/workflows/wrangler-build-staging-images.yml'
- schedule:
- - cron: '0 9 * * *' # daily 09:00 UTC
- repository_dispatch:
- types: [migrations-changed]
-
-permissions:
- contents: read
- packages: write
-
-concurrency:
- group: wrangler-build-staging-${{ github.event.inputs.service || 'all' }}
- cancel-in-progress: false
-
-env:
- REGISTRY: ghcr.io
- ORG: instanode-dev
-
-jobs:
- pg-platform:
- name: build pg-platform :staging
- if: |
- github.event_name == 'schedule' ||
- github.event_name == 'push' ||
- github.event_name == 'repository_dispatch' ||
- (github.event_name == 'workflow_dispatch' && (github.event.inputs.service == 'all' || github.event.inputs.service == 'pg-platform'))
- runs-on: ubuntu-latest
- env:
- SERVICE: pg-platform
- steps:
- - name: Checkout infra repo
- uses: actions/checkout@v6
- with:
- path: infra
-
- - name: Checkout api repo (for the migrations)
- uses: actions/checkout@v6
- with:
- repository: ${{ vars.API_REPO || format('{0}/api', github.repository_owner) }}
- token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }}
- path: api
-
- - name: Verify migrations dir exists + count
- env:
- MIGRATIONS_DIR: api/internal/db/migrations
- run: |
- if [ ! -d "$MIGRATIONS_DIR" ]; then
- echo "::error::expected migrations dir $MIGRATIONS_DIR not found"
- exit 1
- fi
- count=$(find "$MIGRATIONS_DIR" -name '*.sql' | wc -l | tr -d ' ')
- echo "migrations found: $count"
- if [ "$count" -lt 50 ]; then
- echo "::warning::only $count migration files — expected ≥50 (live count was 63 as of 2026-05-30)"
- fi
-
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v4
-
- - name: Log in to GHCR
- uses: docker/login-action@v4
- with:
- registry: ${{ env.REGISTRY }}
- username: ${{ github.actor }}
- # GHCR_PUSH_TOKEN is a classic PAT with write:packages, same
- # pattern as the api/worker/provisioner deploy.yml workflows.
- password: ${{ secrets.GHCR_PUSH_TOKEN || secrets.GITHUB_TOKEN }}
-
- - name: Build and push
- env:
- IMAGE: ${{ env.REGISTRY }}/${{ env.ORG }}/instant-pg-platform
- run: |
- docker buildx build \
- --platform linux/amd64 \
- -f infra/wrangler/pg-platform/Dockerfile \
- -t "${IMAGE}:staging" \
- -t "${IMAGE}:staging-$(date -u +%Y%m%d)" \
- --push \
- .
-
- - name: Reminder
- run: |
- echo "::notice::pg-platform :staging image rebuilt with current migrations."
- echo "::notice::Next CF Container cold start will re-apply them from the new image."
- echo "::notice::Trigger a rolling restart with: wrangler deployments tail --env staging"
-
- # ---------------------------------------------------------------------------
- # mongodb / redis-provision / nats — small wrapped images.
- #
- # These don't need cross-repo migration sync (the wrapping config is fully
- # self-contained under infra/wrangler//). Single-repo checkout +
- # build + push to GHCR. Same SERVICE-input gating as pg-platform.
- # ---------------------------------------------------------------------------
-
- small-images:
- name: build ${{ matrix.svc }} :staging
- if: |
- github.event_name == 'schedule' ||
- github.event_name == 'push' ||
- github.event_name == 'repository_dispatch' ||
- (github.event_name == 'workflow_dispatch' && (github.event.inputs.service == 'all' || github.event.inputs.service == 'mongodb' || github.event.inputs.service == 'redis-provision' || github.event.inputs.service == 'nats'))
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- svc: [mongodb, redis-provision, nats]
- env:
- SVC: ${{ matrix.svc }}
- steps:
- - name: Checkout infra repo
- uses: actions/checkout@v6
- with:
- path: infra
-
- - name: Skip if matrix svc doesn't match workflow_dispatch input
- # Avoids spurious matrix entries when operator selected a single
- # svc via workflow_dispatch. push / cron / dispatch run all 3.
- id: gate
- run: |
- if [ "${{ github.event_name }}" != "workflow_dispatch" ]; then
- echo "skip=false" >> "$GITHUB_OUTPUT"
- exit 0
- fi
- INPUT="${{ github.event.inputs.service }}"
- if [ "$INPUT" = "all" ] || [ "$INPUT" = "$SVC" ]; then
- echo "skip=false" >> "$GITHUB_OUTPUT"
- else
- echo "skip=true" >> "$GITHUB_OUTPUT"
- echo "::notice::skipping $SVC (workflow_dispatch input was '$INPUT')"
- fi
-
- - name: Set up Docker Buildx
- if: steps.gate.outputs.skip == 'false'
- uses: docker/setup-buildx-action@v4
-
- - name: Log in to GHCR
- if: steps.gate.outputs.skip == 'false'
- uses: docker/login-action@v4
- with:
- registry: ${{ env.REGISTRY }}
- username: ${{ github.actor }}
- password: ${{ secrets.GHCR_PUSH_TOKEN || secrets.GITHUB_TOKEN }}
-
- - name: Build and push
- if: steps.gate.outputs.skip == 'false'
- env:
- IMAGE: ${{ env.REGISTRY }}/${{ env.ORG }}/instant-${{ matrix.svc }}
- run: |
- docker buildx build \
- --platform linux/amd64 \
- -f "infra/wrangler/${SVC}/Dockerfile" \
- -t "${IMAGE}:staging" \
- -t "${IMAGE}:staging-$(date -u +%Y%m%d)" \
- --push \
- .
-
- - name: Reminder
- if: steps.gate.outputs.skip == 'false'
- run: |
- echo "::notice::${SVC} :staging image rebuilt."
- echo "::notice::Trigger a rolling restart with: wrangler containers deploy --env staging"
diff --git a/.github/workflows/wrangler-deploy-staging.yml b/.github/workflows/wrangler-deploy-staging.yml
deleted file mode 100644
index 69c63cb..0000000
--- a/.github/workflows/wrangler-deploy-staging.yml
+++ /dev/null
@@ -1,110 +0,0 @@
----
-# infra — CF Containers deploy for staging via wrangler.
-#
-# APPROVAL MODEL: workflow_dispatch ONLY for the first ~10 runs (manual
-# verification). After staging stabilizes, can be promoted to auto-run on
-# merge to master (controlled by the `auto_deploy` input).
-#
-# Production does NOT use this workflow — see the eventual
-# production-deploy.yml when the prod target is settled.
-#
-# Security: all GHA expressions consumed in run: blocks are wrapped
-# through env: to prevent script injection.
-
-name: wrangler-deploy-staging
-
-on:
- workflow_dispatch:
- inputs:
- service:
- description: 'Which service to deploy (or "all")'
- required: true
- type: choice
- options:
- - all
- - api
- - worker
- - provisioner
- - pg-platform
- - pg-customers
- - mongodb
- - redis-provision
- - nats
- confirm:
- description: 'Type DEPLOY-STAGING to confirm'
- required: true
- type: string
-
-permissions:
- contents: read
-
-concurrency:
- group: wrangler-deploy-staging-${{ inputs.service }}
- cancel-in-progress: false
-
-jobs:
- guard:
- name: confirm-input guard
- runs-on: ubuntu-latest
- env:
- CONFIRM_INPUT: ${{ inputs.confirm }}
- steps:
- - name: Reject if confirm phrase wrong
- run: |
- if [ "${CONFIRM_INPUT}" != "DEPLOY-STAGING" ]; then
- echo "::error::confirm must be exactly 'DEPLOY-STAGING'"
- exit 1
- fi
-
- deploy:
- name: deploy ${{ inputs.service }}
- needs: guard
- runs-on: ubuntu-latest
- environment: staging
- env:
- CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
- CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
- SERVICE_INPUT: ${{ inputs.service }}
- steps:
- - uses: actions/checkout@v6
-
- - uses: actions/setup-node@v4
- with:
- node-version: '20'
-
- - name: Install wrangler
- run: npm install -g wrangler@latest
-
- - name: Validate service name
- run: |
- # Whitelist enforced — never embed user input into shell paths
- # without validating it matches a known service.
- case "${SERVICE_INPUT}" in
- all|api|worker|provisioner|pg-platform|pg-customers|mongodb|redis-provision|nats) : ;;
- *)
- echo "::error::Unknown service: ${SERVICE_INPUT}"
- exit 1
- ;;
- esac
-
- - name: Deploy
- run: |
- set -euo pipefail
- if [ "${SERVICE_INPUT}" = "all" ]; then
- SERVICES="api worker provisioner pg-platform pg-customers mongodb redis-provision nats"
- else
- SERVICES="${SERVICE_INPUT}"
- fi
- for svc in $SERVICES; do
- echo "::group::deploying $svc"
- cd "infra/wrangler/$svc"
- wrangler deploy --env staging
- cd - >/dev/null
- echo "::endgroup::"
- done
-
- - name: Reminder
- run: |
- echo "::notice::STAGING DEPLOY COMPLETE."
- echo "::notice::Verify with: curl https://api.staging.instanode.dev/healthz"
- echo "::notice::Note: stateful containers (pg-*/mongodb/redis-*/nats) have ephemeral disk."
diff --git a/terraform/cloudflare/.gitignore b/terraform/cloudflare/.gitignore
deleted file mode 100644
index 343dfcf..0000000
--- a/terraform/cloudflare/.gitignore
+++ /dev/null
@@ -1,27 +0,0 @@
-# TF state — lives in R2 backend, never in repo.
-*.tfstate
-*.tfstate.*
-*.tfstate.backup
-.terraform/
-.terraform.lock.hcl
-
-# Per-environment variable files — committable ONLY if they contain
-# no secrets. As of bootstrap there are no secrets in any tfvars (auth
-# is via env vars), so we DO commit the .auto.tfvars files. Below
-# excludes only the local ad-hoc ones.
-*.local.tfvars
-*.local.auto.tfvars
-
-# Operator-local overrides
-override.tf
-override.tf.json
-*_override.tf
-*_override.tf.json
-
-# Plan outputs (often contain post-apply secret values)
-*.tfplan
-*.tfplan.bin
-
-# crash logs from the provider
-crash.log
-crash.*.log
diff --git a/terraform/cloudflare/Makefile b/terraform/cloudflare/Makefile
deleted file mode 100644
index 23593ff..0000000
--- a/terraform/cloudflare/Makefile
+++ /dev/null
@@ -1,101 +0,0 @@
-# Terraform helpers for the CF migration. Run from this dir.
-#
-# Required env vars (export before any target):
-# CLOUDFLARE_API_TOKEN — Token A (deploy) for plan/apply
-# AWS_ACCESS_KEY_ID — R2 HMAC for TF state bucket
-# AWS_SECRET_ACCESS_KEY — R2 HMAC secret for TF state bucket
-# CF_ACCOUNT_ID — for backend endpoint URL
-#
-# ENV defaults to staging; pass ENV=production for prod.
-
-ENV ?= staging
-TF ?= terraform
-
-ifneq ($(filter $(ENV),staging production),$(ENV))
-$(error ENV must be 'staging' or 'production' (got '$(ENV)'))
-endif
-
-.PHONY: help init fmt validate plan apply destroy install-secrets rotate-tokens clean
-
-help:
- @echo "Targets:"
- @echo " init — terraform init with R2 backend (one-time per workspace)"
- @echo " fmt — terraform fmt -check (CI also enforces)"
- @echo " validate — terraform validate (offline)"
- @echo " plan — terraform plan (writes tfplan.bin)"
- @echo " apply — terraform apply (reads tfplan.bin from plan target)"
- @echo " install-secrets — pull token outputs and push to k8s + GH org secrets"
- @echo " rotate-tokens — bump expiry, plan, apply, install"
- @echo " destroy — DANGEROUS, only for tearing down ephemeral staging"
- @echo
- @echo "Env: ENV=$(ENV) (override with ENV=production)"
-
-init:
- @: $${CF_ACCOUNT_ID?CF_ACCOUNT_ID must be set}
- $(TF) init \
- -backend-config="endpoints={s3=\"https://$$CF_ACCOUNT_ID.r2.cloudflarestorage.com\"}" \
- -backend-config="workspace_key_prefix=$(ENV)"
- $(TF) workspace select $(ENV) 2>/dev/null || $(TF) workspace new $(ENV)
-
-fmt:
- $(TF) fmt -check -recursive
-
-validate:
- $(TF) validate -no-color
-
-plan:
- $(TF) plan -var-file=$(ENV).auto.tfvars -out=tfplan.bin
-
-apply:
- $(TF) apply tfplan.bin
- @echo
- @echo "==> Apply complete. If tokens were created/rotated, run:"
- @echo " make install-secrets ENV=$(ENV)"
-
-# Pull sensitive token outputs (one-shot, never written to disk) and
-# install them as k8s + GH secrets across all consuming repos. Token
-# VALUES are scrubbed from the env on exit.
-install-secrets:
- @: $${GH_TOKEN?GH_TOKEN must be set for 'gh secret set' calls}
- @DEPLOY_TOKEN="$$($(TF) output -raw deploy_token)"; \
- if [ -z "$$DEPLOY_TOKEN" ]; then echo "no deploy_token in state — apply first"; exit 1; fi; \
- echo "==> k8s: writing CLOUDFLARE_API_TOKEN to instant-secrets-cf in instant-$(ENV)"; \
- kubectl create secret generic instant-secrets-cf \
- -n instant-$(ENV) \
- --from-literal=CLOUDFLARE_API_TOKEN="$$DEPLOY_TOKEN" \
- --dry-run=client -o yaml | kubectl apply -f -; \
- echo "==> GH org secrets: CLOUDFLARE_API_TOKEN across instanodedev/{api,worker,provisioner,instanode-web,dashboard,infra,cli,mcp}"; \
- for repo in instanodedev/api instanodedev/worker instanodedev/provisioner \
- instanodedev/instanode-web instanodedev/dashboard \
- instanodedev/infra instanodedev/cli instanodedev/mcp; do \
- gh secret set CLOUDFLARE_API_TOKEN -b"$$DEPLOY_TOKEN" -R "$$repo" >/dev/null \
- && echo " ✓ $$repo" \
- || echo " ✗ $$repo (skipped — repo missing or not authorized)"; \
- done; \
- unset DEPLOY_TOKEN
- @echo
- @echo "==> Admin/tunnel token (Token B) is operator-only — NOT pushed to CI."
- @echo " To install into your local 1Password vault:"
- @echo " $(TF) output -raw admin_tunnel_token | op item create --category=ApiCredential --title='cf-admin-tunnel-$(ENV)' credential=-"
-
-# Bump expiry by 180d (deploy) / 90d (admin) — operator edits the .auto.tfvars
-# to set new dates, then this target runs the plan/apply/install loop.
-rotate-tokens:
- @echo "==> Edit $(ENV).auto.tfvars to set new *_expires_on dates, then:"
- @echo " make plan ENV=$(ENV)"
- @echo " make apply ENV=$(ENV)"
- @echo " make install-secrets ENV=$(ENV)"
- @echo " Confirm the rotation in the CF dashboard audit log before"
- @echo " revoking the previous token version."
-
-# Tearing down staging is OK (Phase 1 acceptance allows it). NEVER
-# run against production — D-3 cutover keeps state on DO throughout.
-destroy:
- @if [ "$(ENV)" = "production" ]; then \
- echo "ABORTING — destroy against production is forbidden (D-1/D-3)."; \
- exit 1; \
- fi
- $(TF) destroy -var-file=$(ENV).auto.tfvars
-
-clean:
- rm -f tfplan.bin
diff --git a/terraform/cloudflare/README.md b/terraform/cloudflare/README.md
deleted file mode 100644
index 8f29db0..0000000
--- a/terraform/cloudflare/README.md
+++ /dev/null
@@ -1,161 +0,0 @@
-# Cloudflare resources — Terraform
-
-Source of truth for everything we declare in Cloudflare for the InstaNode
-migration: API tokens (deploy + admin/tunnel), DNS records, R2 buckets,
-Pages projects, and (later) Workers + Load Balancers + Page Rules.
-
-> **k8s is NOT in scope here.** k8s manifests stay under `../../k8s/`,
-> managed by `kubectl set image` + the existing per-service auto-deploy
-> per CLAUDE.md rule 15. This dir is for Cloudflare-managed resources only.
-
-## Decision references
-
-This module implements:
-- **D-1** (scope — R2, Pages, CF proxy on api, staging-only Tunnel)
-- **D-2** (staging on full CF stack)
-- **D-3** (per-service DNS-weighted cutover; TTL 60s ≥48h)
-- **D-4** (separate `instant-staging-data` ns — k8s-side, not here, but the staging Pages project + R2 bucket parallel it)
-- **D-7** (NS delegation is CF; already verified)
-- **D-8** (R2 env-var canonical names: `R2_HMAC_KEY_ID` / `R2_HMAC_SECRET`)
-- **D-14** (operator credentials — outputs from `tokens.tf` install via `make install-secrets`)
-
-Source: `/tmp/cf-migration/shared/DECISIONS.md`.
-
-## Bootstrap (one-time)
-
-The TF state lives in R2, which means the R2 bucket for state and the
-HMAC creds to write to it must exist BEFORE `terraform init`. Manual
-chicken-and-egg step:
-
-```bash
-# 1. Create the state bucket via wrangler (operator-side, one time).
-wrangler r2 bucket create instanode-tf-state --location wnam
-
-# 2. Create R2 HMAC for state access only (scope: instanode-tf-state).
-# Dashboard → R2 → Manage R2 API Tokens → Create:
-# - Name: "tf-state-rw"
-# - Permission: Object Read & Write
-# - Specify buckets: instanode-tf-state
-# Save the Access Key ID + Secret + Endpoint.
-
-# 3. Export the state-backend creds + CF auth token for terraform.
-export AWS_ACCESS_KEY_ID=""
-export AWS_SECRET_ACCESS_KEY=""
-export CLOUDFLARE_API_TOKEN=""
-
-# 4. Init the backend with the env-specific account endpoint.
-terraform init \
- -backend-config="endpoints={s3=\"https://${CF_ACCOUNT_ID}.r2.cloudflarestorage.com\"}"
-
-# 5. Pick a workspace (staging first).
-terraform workspace new staging
-terraform workspace select staging
-
-# 6. Plan + apply.
-terraform plan -out=staging.tfplan
-terraform apply staging.tfplan
-```
-
-After `apply` succeeds you have:
-- Two CF API tokens in TF state (deploy + admin_tunnel).
-- The staging Pages project + R2 bucket + DNS records.
-- Output values for token secrets (sensitive — see next section).
-
-## Installing token secrets into k8s + GH
-
-Tokens are SENSITIVE outputs — they appear once in TF state and once
-when `terraform output -raw ` is run. To install:
-
-```bash
-# Read the tokens (do NOT redirect to a file you'll commit).
-DEPLOY_TOKEN="$(terraform output -raw deploy_token)"
-ADMIN_TUNNEL_TOKEN="$(terraform output -raw admin_tunnel_token)"
-
-# k8s — staging namespace.
-kubectl create secret generic instant-secrets-cf \
- -n instant-staging \
- --from-literal=CLOUDFLARE_API_TOKEN="$DEPLOY_TOKEN" \
- --dry-run=client -o yaml | kubectl apply -f -
-
-# GH org / repo secrets — for CI auto-deploys.
-for repo in instanodedev/api instanodedev/worker instanodedev/provisioner \
- instanodedev/instanode-web instanodedev/dashboard \
- instanodedev/infra; do
- gh secret set CLOUDFLARE_API_TOKEN -b"$DEPLOY_TOKEN" -R "$repo"
-done
-
-# Admin/tunnel token: ONLY into a separate operator-local Vault, never
-# into CI. Used break-glass for Tunnel/Access changes.
-op item create --category=ApiCredential --title="cf-admin-tunnel-staging" \
- --vault="instanode-prod" credential="$ADMIN_TUNNEL_TOKEN"
-
-unset DEPLOY_TOKEN ADMIN_TUNNEL_TOKEN
-```
-
-## Workflow during the migration
-
-1. **Plan-on-PR.** Every PR that changes a `.tf` file under this dir
- triggers `terraform plan` in CI; diff posted as PR comment.
-2. **Apply-on-merge.** Merge to `main` triggers `terraform apply` via
- the workflow (gated on approval — `instanodedev/infra` already has
- manual-apply discipline; rule 15 doesn't auto-deploy `infra`).
-3. **Per-PR contract checklist (rule 22)** still applies. A TF PR that
- adds a new host or changes the API base URL ALSO needs the
- synchronized code edits in `api/internal/handlers/openapi.go` +
- `content/llms.txt` + the dashboard/cli/mcp/sdk-go base-URL constants.
-4. **Per-PR observability checklist (rule 25)** still applies. New
- resources that emit metrics need an `instant_*` Prom rule + NR alert
- JSON + dashboard tile + METRICS-CATALOG row in the same PR.
-
-## Workspace conventions
-
-- `terraform workspace new staging` / `terraform workspace new production`
-- `terraform workspace select ` before any plan/apply
-- `var.environment` is set automatically via `*.auto.tfvars` files
- selected by workspace (TF auto-loads `staging.auto.tfvars` when the
- workspace is `staging` if your CI passes `-var-file` accordingly;
- during interactive use, pass `-var-file=staging.auto.tfvars` explicit-
- ly to avoid surprises).
-
-## File layout
-
-| File | Purpose |
-|---|---|
-| `versions.tf` | TF + provider pinning, R2 backend config |
-| `providers.tf` | CF provider (reads `CLOUDFLARE_API_TOKEN` env) |
-| `variables.tf` | account_id, zone_id, environment, token expiries |
-| `tokens.tf` | `cloudflare_account_token.deploy` + `.admin_tunnel` |
-| `r2.tf` | R2 bucket + 24h-TTL lifecycle rule on `anon/` prefix |
-| `dns.tf` | DNS records (apex / www / api / staging) with TTL 60s |
-| `pages.tf` | Pages project for `instanode-web` (Phase 2) |
-| `outputs.tf` | Sensitive token outputs (consumed by `make install-secrets`) |
-| `staging.auto.tfvars` | Workspace-scoped vars for staging |
-| `production.auto.tfvars` | Workspace-scoped vars for production |
-
-## What's NOT here (yet)
-
-- **Workers** — CEO D-1 deferred until measured TTFB benefit shows up.
-- **Hyperdrive** — same; api and DO Managed PG are same-region, no win today.
-- **D1** — KILLED per D-1.
-- **CF Email Routing** — DEFERRED; outbound stays on Brevo.
-- **Tunnels** — Phase 5 staging-only; add `tunnels.tf` when that PR ships, scoped to admin_tunnel token.
-- **Load Balancers** — pending the CF Startups operator ticket (D-6, 5–10 day lead). Once enabled, add `lb.tf`.
-- **Page Rules / Cache Rules** — Phase 4 only (api orange-cloud cut). Per D-12, the rule is an explicit path-allowlist for `/healthz`, `/openapi.json`, `/llms.txt`; NEVER Authorization-header-based.
-
-## R2 HMAC keys (NOT here)
-
-The R2 HMAC Access Key ID / Secret used by `common/storageprovider/r2/`
-are SEPARATE from the CF API token and are generated via the R2 dashboard
-"Manage R2 API Tokens" UI (NOT this Terraform). Reason: the
-`cloudflare_r2_bucket` resource doesn't issue per-bucket HMAC pairs;
-that's a one-off operator action, scoped to the specific bucket.
-
-After Phase 0 creates the staging bucket, the operator runs:
-1. Dashboard → R2 → Manage R2 API Tokens → Create
-2. Permissions: Object Read & Write
-3. Specify buckets: `instant-shared-staging` (NOT *Apply to all buckets*)
-4. TTL: 180 days
-5. Save the resulting `Access Key ID` + `Secret Access Key` into
- `instant-secrets` as `R2_HMAC_KEY_ID` + `R2_HMAC_SECRET` (D-8 names).
-
-Repeat for `instant-shared` (prod) after staging passes 48h green (D-9).
diff --git a/terraform/cloudflare/dns.tf b/terraform/cloudflare/dns.tf
deleted file mode 100644
index 9f3d9c4..0000000
--- a/terraform/cloudflare/dns.tf
+++ /dev/null
@@ -1,57 +0,0 @@
-# DNS records under management.
-#
-# Pre-cutover ritual (D-3): TTL must be 60s for ≥48h BEFORE any cut.
-# That ONLY applies to grey-cloud (proxied=false) records — CF requires
-# proxied=true records to have ttl=1 (CF manages TTL internally; setting
-# 60 returns a 400 "ttl must be set to 1 when `proxied` is true").
-#
-# `proxied = true` = CF orange-cloud (ttl=1); `false` = grey-cloud,
-# DNS only (ttl=60 for cutover ramp). Today: marketing apex is orange
-# (Phase 0 baseline), api is grey (becomes orange in Phase 4 — flip
-# both the proxied flag AND ttl=60→1 in that phase's PR).
-
-locals {
- marketing_origin = "instanode-web.pages.dev" # set per environment in staging.tfvars / production.tfvars after Pages project is created
- api_origin = "152.42.154.144" # DigitalOcean LB; replaced with LB pool resource in Phase 4
-}
-
-resource "cloudflare_dns_record" "apex" {
- zone_id = var.zone_id
- name = var.zone_name
- type = "CNAME"
- content = local.marketing_origin
- ttl = 1
- proxied = true
- comment = "marketing apex; CNAME-flattened to Pages project"
-}
-
-resource "cloudflare_dns_record" "www" {
- zone_id = var.zone_id
- name = "www.${var.zone_name}"
- type = "CNAME"
- content = var.zone_name
- ttl = 1
- proxied = true
- comment = "www → apex redirect handled by CF page rule"
-}
-
-resource "cloudflare_dns_record" "api" {
- zone_id = var.zone_id
- name = "api.${var.zone_name}"
- type = "A"
- content = local.api_origin
- ttl = 60
- proxied = false # Phase 4 flips this to true after CF orange-cloud cache rules are applied
- comment = "api; grey-cloud today, orange-cloud per Phase 4 cut (D-3)"
-}
-
-resource "cloudflare_dns_record" "staging" {
- count = var.environment == "staging" ? 1 : 0
- zone_id = var.zone_id
- name = "staging.${var.zone_name}"
- type = "CNAME"
- content = "instant-staging.${var.zone_name}.cdn.cloudflare.net" # Pages preview hostname; replaced after Pages project is up
- ttl = 1
- proxied = true
- comment = "staging mirror per D-2"
-}
diff --git a/terraform/cloudflare/outputs.tf b/terraform/cloudflare/outputs.tf
deleted file mode 100644
index 3b123f3..0000000
--- a/terraform/cloudflare/outputs.tf
+++ /dev/null
@@ -1,34 +0,0 @@
-# Token VALUES are sensitive — operator must `terraform output -raw deploy_token`
-# and immediately pipe into `kubectl create secret` / `gh secret set`. Never
-# `terraform output` (no -raw) in a CI log: the redacted form ("(sensitive)")
-# is still a footgun if anyone removes `sensitive = true`.
-
-output "deploy_token_id" {
- value = cloudflare_account_token.deploy.id
- description = "Token A id (non-sensitive; safe in CI logs)."
-}
-
-output "deploy_token" {
- value = cloudflare_account_token.deploy.value
- description = "Token A secret. Pipe directly into k8s/GH secret; never log."
- sensitive = true
-}
-
-output "admin_tunnel_token_id" {
- value = cloudflare_account_token.admin_tunnel.id
- description = "Token B id (non-sensitive)."
-}
-
-output "admin_tunnel_token" {
- value = cloudflare_account_token.admin_tunnel.value
- description = "Token B secret. Operator-only; never put into CI."
- sensitive = true
-}
-
-output "account_id" {
- value = var.account_id
-}
-
-output "zone_id" {
- value = var.zone_id
-}
diff --git a/terraform/cloudflare/pages.tf b/terraform/cloudflare/pages.tf
deleted file mode 100644
index 49f0037..0000000
--- a/terraform/cloudflare/pages.tf
+++ /dev/null
@@ -1,61 +0,0 @@
-# Cloudflare Pages project for instanode-web (marketing site).
-# Phase 2 in FINAL-PLAN.md. Dashboard-on-Pages is KILLED per D-5;
-# do NOT add a second `cloudflare_pages_project` for dashboard here.
-
-resource "cloudflare_pages_project" "instanode_web" {
- account_id = var.account_id
- name = var.environment == "production" ? "instanode-web" : "instanode-web-staging"
- production_branch = "main"
-
- build_config = {
- build_command = "npm run build"
- destination_dir = "dist"
- root_dir = ""
- web_analytics_tag = null
- web_analytics_token = null
- }
-
- source = {
- type = "github"
- config = {
- owner = "instanodedev"
- repo_name = "instanode-web"
- production_branch = "main"
- pr_comments_enabled = true
- production_deployment_enabled = true
- preview_deployment_setting = "all"
- preview_branch_includes = ["*"]
- preview_branch_excludes = []
- }
- }
-
- deployment_configs = {
- production = {
- compatibility_date = "2026-05-30"
- compatibility_flags = []
- env_vars = {
- VITE_API_URL = {
- type = "plain_text"
- value = var.environment == "production" ? "https://api.instanode.dev" : "https://api.staging.instanode.dev"
- }
- VITE_ENV = {
- type = "plain_text"
- value = var.environment
- }
- }
- }
- preview = {
- compatibility_date = "2026-05-30"
- compatibility_flags = []
- }
- }
-}
-
-# Custom domain binding — only after Phase 2 acceptance (D-9 equivalent
-# for marketing: zero broken-link diff). Until then, traffic stays on
-# GH Pages via DNS, and this resource is dormant.
-resource "cloudflare_pages_domain" "instanode_web" {
- account_id = var.account_id
- project_name = cloudflare_pages_project.instanode_web.name
- name = var.environment == "production" ? var.zone_name : "staging.${var.zone_name}"
-}
diff --git a/terraform/cloudflare/production.auto.tfvars b/terraform/cloudflare/production.auto.tfvars
deleted file mode 100644
index 0c188fe..0000000
--- a/terraform/cloudflare/production.auto.tfvars
+++ /dev/null
@@ -1,4 +0,0 @@
-environment = "production"
-
-deploy_token_expires_on = "2026-11-26T23:59:59Z"
-admin_tunnel_token_expires_on = "2026-08-28T23:59:59Z"
diff --git a/terraform/cloudflare/providers.tf b/terraform/cloudflare/providers.tf
deleted file mode 100644
index a89234e..0000000
--- a/terraform/cloudflare/providers.tf
+++ /dev/null
@@ -1,8 +0,0 @@
-provider "cloudflare" {
- # Reads CLOUDFLARE_API_TOKEN from env. Operator uses Token A
- # ("instanode-migration-deploy") for everything except Tunnel/Access
- # changes — for those, switch the env var to Token B in a separate
- # apply (see _modules/tunnel/README.md).
- #
- # Never commit a value here.
-}
diff --git a/terraform/cloudflare/r2.tf b/terraform/cloudflare/r2.tf
deleted file mode 100644
index 61206ba..0000000
--- a/terraform/cloudflare/r2.tf
+++ /dev/null
@@ -1,38 +0,0 @@
-# R2 buckets. Per CEO D-1 + DevOps D-4, staging gets a parallel bucket
-# (`instant-shared-staging`); production keeps the existing name and
-# moves traffic into it via the storageprovider env-flip (D-8 names).
-#
-# Lifecycle rule: anon/ prefix expires after 24h (matches the platform's
-# anon-resource TTL contract — pay-from-day-one, no trial creep).
-
-locals {
- bucket_name = var.environment == "production" ? "instant-shared" : "instant-shared-staging"
-}
-
-resource "cloudflare_r2_bucket" "shared" {
- account_id = var.account_id
- name = local.bucket_name
- location = "WNAM" # North America West — closest to our DO NYC3 cluster latency-wise
- storage_class = "Standard"
-}
-
-# 24h TTL on anon/ — matches platform contract that anonymous resources
-# expire after 24h (CLAUDE.md "anonymous (24h TTL) is the only free tier").
-resource "cloudflare_r2_bucket_lifecycle" "shared_anon_24h" {
- account_id = var.account_id
- bucket_name = cloudflare_r2_bucket.shared.name
-
- rules = [{
- id = "anon-24h"
- enabled = true
- conditions = {
- prefix = "anon/"
- }
- delete_objects_transition = {
- condition = {
- type = "Age"
- max_age = 86400 # 24h in seconds
- }
- }
- }]
-}
diff --git a/terraform/cloudflare/staging.auto.tfvars b/terraform/cloudflare/staging.auto.tfvars
deleted file mode 100644
index b7489f6..0000000
--- a/terraform/cloudflare/staging.auto.tfvars
+++ /dev/null
@@ -1,6 +0,0 @@
-environment = "staging"
-
-# Tokens rotate every 180d (deploy) / 90d (admin). Override per env
-# if staging is on a shorter cycle.
-deploy_token_expires_on = "2026-11-26T23:59:59Z"
-admin_tunnel_token_expires_on = "2026-08-28T23:59:59Z"
diff --git a/terraform/cloudflare/staging.tf b/terraform/cloudflare/staging.tf
deleted file mode 100644
index 08bc26a..0000000
--- a/terraform/cloudflare/staging.tf
+++ /dev/null
@@ -1,182 +0,0 @@
-# Staging-environment subdomains under staging.instanode.dev.
-#
-# All resources here are count-gated on `var.environment == "staging"` so
-# they only materialize in the staging workspace; the production workspace
-# plan shows no changes from this file.
-#
-# DIVISION OF RESPONSIBILITY between TF and wrangler:
-#
-# - **TF owns** wildcard records, env-level subdomains (dashboard, webhook),
-# and the deployment-app wildcard. These don't have a 1:1 Worker/Container
-# mapping or they're pre-deploy plumbing.
-# - **Wrangler owns** service-specific hostnames via `custom_domain = true`
-# in each wrangler.toml. wrangler auto-creates the DNS + cert + route on
-# first deploy. That covers: api.staging.instanode.dev (managed by
-# infra/wrangler/api/wrangler.toml).
-#
-# DO NOT add explicit TF records for hostnames wrangler is already
-# custom-domain-claiming — wrangler will fail to deploy with "DNS record
-# already exists" if both manage it.
-
-locals {
- is_staging = var.environment == "staging"
- # All staging subdomains live under this stem.
- staging_stem = "staging.${var.zone_name}"
-}
-
-# -----------------------------------------------------------------------------
-# Wildcards under *.staging.instanode.dev
-# -----------------------------------------------------------------------------
-#
-# Each per-tenant service in wrangler/ uses a hostname-shard pattern:
-# - pg-customer-.staging.instanode.dev (pg-customers Container)
-# - mongo-.staging.instanode.dev (mongodb Container)
-# - redis-.staging.instanode.dev (redis-provision Container)
-# - nats-.staging.instanode.dev (nats Container)
-#
-# A single proxied wildcard CNAME catches all of them; the Worker shells
-# in each wrangler service extract the tenant from the hostname and
-# dispatch to the right Durable Object via `idFromName(tenant)`.
-
-resource "cloudflare_dns_record" "staging_wildcard" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- name = "*.${local.staging_stem}"
- type = "CNAME"
- # CF requires SOME content for proxied CNAMEs; this is a placeholder. The
- # cloudflare_workers_route below routes traffic to the correct Worker
- # regardless of what's here. A 404 sink is intentional — any unrouted
- # subdomain hits CF's default 404 page.
- content = local.staging_stem
- ttl = 1
- proxied = true
- comment = "wildcard for per-tenant CF Container services in staging; routed via cloudflare_workers_route below"
-}
-
-# -----------------------------------------------------------------------------
-# Deployment-app wildcard: *.deployment.staging.instanode.dev
-# -----------------------------------------------------------------------------
-#
-# Mirror of prod's `*.deployment.instanode.dev`. Every /deploy/new staging
-# call provisions an app at `.deployment.staging.instanode.dev`.
-# Wrangler-managed Containers for the deploy compute target this wildcard;
-# the api Worker creates a DNS-less custom-domain claim per slug, but the
-# wildcard ensures any future deploy slug resolves to CF before its
-# custom-domain claim lands.
-
-resource "cloudflare_dns_record" "staging_deployment_wildcard" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- name = "*.deployment.${local.staging_stem}"
- type = "CNAME"
- content = "deployment.${local.staging_stem}"
- ttl = 1
- proxied = true
- comment = "wildcard for /deploy/new staging apps (mirrors prod *.deployment.instanode.dev)"
-}
-
-# Anchor for the deployment wildcard CNAME (the wildcard's content needs
-# a real record at the parent name).
-resource "cloudflare_dns_record" "staging_deployment_anchor" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- name = "deployment.${local.staging_stem}"
- type = "AAAA"
- content = "100::" # IPv6 discard prefix — never reachable; CF proxied front-end terminates
- ttl = 1
- proxied = true
- comment = "anchor for deployment wildcard CNAME (CF requires a real record at the parent)"
-}
-
-# -----------------------------------------------------------------------------
-# Webhook subdomain: webhook.staging.instanode.dev
-# -----------------------------------------------------------------------------
-#
-# /webhook/new staging endpoints return a URL at this host. Routed to the
-# api Container via a Worker route. Separate subdomain (vs api.staging.)
-# so customers can filter outbound webhook traffic by destination host.
-
-resource "cloudflare_dns_record" "staging_webhook" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- name = "webhook.${local.staging_stem}"
- type = "AAAA"
- content = "100::" # placeholder; CF orange-cloud handles routing
- ttl = 1
- proxied = true
- comment = "staging /webhook/new receiver subdomain"
-}
-
-# -----------------------------------------------------------------------------
-# Dashboard subdomain: dashboard.staging.instanode.dev
-# -----------------------------------------------------------------------------
-#
-# CEO killed dashboard-on-Pages for PROD (D-5) but staging dashboard is
-# useful for QA. Points at the same dashboard Pages project at the
-# `staging` branch preview hostname. NOT enabled for production — D-5
-# stands.
-
-resource "cloudflare_dns_record" "staging_dashboard" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- name = "dashboard.${local.staging_stem}"
- type = "CNAME"
- content = "instanode-dashboard-staging.pages.dev" # set after dashboard Pages project is created
- ttl = 1
- proxied = true
- comment = "staging dashboard — QA-only; D-5 keeps prod dashboard off Pages"
-}
-
-# -----------------------------------------------------------------------------
-# Workers Routes for per-tenant wildcards
-# -----------------------------------------------------------------------------
-#
-# `custom_domain = true` in wrangler.toml does NOT support wildcards.
-# Wildcards need cloudflare_workers_route + a wildcard DNS record (done
-# above). Each route binds a pattern to a specific Worker name; wrangler
-# deploys the Worker, TF wires the route.
-
-resource "cloudflare_workers_route" "staging_pg_customers" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- pattern = "pg-customer-*.${local.staging_stem}/*"
- script = "instanode-pg-customers-staging"
-}
-
-resource "cloudflare_workers_route" "staging_mongodb" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- pattern = "mongo-*.${local.staging_stem}/*"
- script = "instanode-mongodb-staging"
-}
-
-resource "cloudflare_workers_route" "staging_redis" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- pattern = "redis-*.${local.staging_stem}/*"
- script = "instanode-redis-provision-staging"
-}
-
-resource "cloudflare_workers_route" "staging_nats" {
- count = local.is_staging ? 1 : 0
- zone_id = var.zone_id
- pattern = "nats-*.${local.staging_stem}/*"
- script = "instanode-nats-staging"
-}
-
-# -----------------------------------------------------------------------------
-# Pages custom domain — staging marketing site
-# -----------------------------------------------------------------------------
-#
-# The Pages project itself is declared in pages.tf with the
-# `var.environment == "staging" ? "instanode-web-staging" : "instanode-web"`
-# name pattern. The custom-domain attachment is here so prod's pages.tf
-# stays simple.
-
-resource "cloudflare_pages_domain" "staging_marketing" {
- count = local.is_staging ? 1 : 0
- account_id = var.account_id
- project_name = "instanode-web-staging"
- name = local.staging_stem
- depends_on = [cloudflare_dns_record.staging]
-}
diff --git a/terraform/cloudflare/tokens.tf b/terraform/cloudflare/tokens.tf
deleted file mode 100644
index 8a37ad0..0000000
--- a/terraform/cloudflare/tokens.tf
+++ /dev/null
@@ -1,75 +0,0 @@
-# Two scoped API tokens replace the Global API Key for CI / DevOps use.
-# Source: exported from CF dashboard 2026-05-30, renamed to avoid the
-# default `example_account_token` collision.
-#
-# WARNING — token values are SENSITIVE outputs. They appear once in TF
-# state after `apply`. Operator MUST run the `make install-secrets`
-# helper (see Makefile) to push them into k8s + GH org secrets, then
-# rotate state.
-
-# Token A — day-to-day deploy + DNS + R2 + Pages + Workers + Page Rules
-# + Load Balancing + Cache Purge + Zone Settings. Account-broad, zone-
-# narrow on instanode.dev. Used by CI.
-resource "cloudflare_account_token" "deploy" {
- account_id = var.account_id
- name = "instanode-migration-deploy-${var.environment}"
- expires_on = var.deploy_token_expires_on
-
- policies = [
- # Zone-scoped permissions on instanode.dev (zone_id pinned).
- {
- effect = "allow"
- permission_groups = [
- { id = "c4df38be41c247b3b4b7702e76eadae0" }, # Zone:Read
- { id = "3030687196b94b638145a3953da2b699" }, # DNS:Edit
- { id = "c8fed203ed3043cba015a93ad1616f1f" }, # Zone Settings:Edit
- { id = "c03055bc037c4ea9afb9a9f104b7b721" }, # Cache Purge:Purge
- { id = "e17beae8b8cb423a99b1730f21238bed" }, # Page Rules:Edit
- { id = "ed07f6c337da4195b4e72a1fb2c6bcae" }, # SSL and Certificates:Edit
- { id = "6d7f2f5f5b1d4a0e9081fdc98d432fd1" }, # Load Balancers:Edit
- { id = "4755a26eedb94da69e1066d98aa820be" }, # Apps:Edit (zone-side)
- ]
- resources = jsonencode({
- "com.cloudflare.api.account.zone.${var.zone_id}" = "*"
- })
- },
- # Account-scoped permissions for resources that aren't zone-bound.
- {
- effect = "allow"
- permission_groups = [
- { id = "dc44f27f48ab405392a5f69fe822bd01" }, # Workers Scripts:Edit
- { id = "8d28297797f24fb8a0c332fe0866ec89" }, # Workers KV Storage:Edit
- { id = "bf7481a1826f439697cb59a20b22293e" }, # Workers R2 Storage:Edit
- { id = "f7f0eda5697f475c90846e879bab8666" }, # Cloudflare Pages:Edit
- { id = "e086da7e2179491d91ee5f35b3ca210a" }, # Account Settings:Read
- { id = "d2a1802cc9a34e30852f8b33869b2f3c" }, # LB Monitors & Pools:Edit
- { id = "c1fde68c7bcc44588cbb6ddbc16d6480" }, # Account Analytics:Read
- ]
- resources = jsonencode({
- "com.cloudflare.api.account.${var.account_id}" = "*"
- })
- },
- ]
-}
-
-# Token B — break-glass / rare-use Tunnel + Access. Smaller scope, shorter
-# expiry. NOT used by CI; kept as separate apply for blast-radius isolation.
-resource "cloudflare_account_token" "admin_tunnel" {
- account_id = var.account_id
- name = "instanode-migration-admin-tunnel-${var.environment}"
- expires_on = var.admin_tunnel_token_expires_on
-
- policies = [{
- effect = "allow"
- permission_groups = [
- { id = "ad7a6f88896d498f98eb30592abfbbf4" }, # Cloudflare Tunnel:Edit
- { id = "77efc2c0724d4c4eb94bfd9656247130" }, # Access: Apps and Policies:Edit
- { id = "db37e5f1cb1a4e1aabaef8deaea43575" }, # Access: Service Tokens:Edit
- { id = "a1c0fec57cf94af79479a6d827fa518c" }, # Access: Organizations, Identity Providers:Edit
- { id = "1e13c5124ca64b72b1969a67e8829049" }, # Account Settings:Read
- ]
- resources = jsonencode({
- "com.cloudflare.api.account.${var.account_id}" = "*"
- })
- }]
-}
diff --git a/terraform/cloudflare/variables.tf b/terraform/cloudflare/variables.tf
deleted file mode 100644
index 7e9f005..0000000
--- a/terraform/cloudflare/variables.tf
+++ /dev/null
@@ -1,37 +0,0 @@
-variable "account_id" {
- type = string
- description = "Cloudflare account ID (CF for Startups credit-tagged account)."
- default = "613a9e74136364c781a8e258326019f9"
-}
-
-variable "zone_id" {
- type = string
- description = "Cloudflare zone ID for instanode.dev."
- default = "08a1a569d2d6f9a713dc6d62103c5dc6"
-}
-
-variable "zone_name" {
- type = string
- default = "instanode.dev"
-}
-
-variable "environment" {
- type = string
- description = "staging or production. Selected via `terraform workspace`."
- validation {
- condition = contains(["staging", "production"], var.environment)
- error_message = "environment must be one of: staging, production."
- }
-}
-
-variable "deploy_token_expires_on" {
- type = string
- description = "RFC3339 expiry for the deploy token. Rotate every ≤180d."
- default = "2026-11-26T23:59:59Z"
-}
-
-variable "admin_tunnel_token_expires_on" {
- type = string
- description = "RFC3339 expiry for the admin/tunnel token. Rotate every ≤90d."
- default = "2026-08-28T23:59:59Z"
-}
diff --git a/terraform/cloudflare/versions.tf b/terraform/cloudflare/versions.tf
deleted file mode 100644
index 942c3ae..0000000
--- a/terraform/cloudflare/versions.tf
+++ /dev/null
@@ -1,27 +0,0 @@
-terraform {
- required_version = ">= 1.4"
-
- required_providers {
- cloudflare = {
- source = "cloudflare/cloudflare"
- version = "~> 5.0"
- }
- }
-
- # State lives in R2 (S3-compatible). The bucket "instanode-tf-state" must
- # be created out-of-band before `terraform init` — see README §Bootstrap.
- # Operator passes -backend-config="..." at init time; we DON'T hardcode
- # the account-specific endpoint or HMAC creds here.
- backend "s3" {
- bucket = "instanode-tf-state"
- key = "cloudflare/terraform.tfstate"
- region = "auto"
- use_path_style = true
- skip_credentials_validation = true
- skip_metadata_api_check = true
- skip_region_validation = true
- skip_requesting_account_id = true
- skip_s3_checksum = true
- encrypt = true
- }
-}
diff --git a/wrangler/README.md b/wrangler/README.md
deleted file mode 100644
index db2b867..0000000
--- a/wrangler/README.md
+++ /dev/null
@@ -1,97 +0,0 @@
-# Wrangler — CF Containers for staging
-
-This directory deploys instanode.dev services as **Cloudflare Containers**
-to the **staging** environment. Each service has its own subdir with a
-`wrangler.toml` + a tiny Worker shell (`src/worker.ts`) that exposes the
-Container via a Durable Object binding.
-
-Production does NOT use this — see the `production-` workflow when written.
-Per user direction 2026-05-30: staging is CF-only, ephemeral state acceptable.
-
-## Why wrangler, not Terraform
-
-The `cloudflare/cloudflare` Terraform provider (v5.19.1 as of bootstrap) does
-NOT yet expose a `cloudflare_container` resource. Verified by `terraform
-providers schema -json | jq '.. | keys?' | grep container` → empty.
-
-Until the provider catches up, we manage Containers via `wrangler` and
-**Terraform manages everything else**: DNS, R2, Pages, Hyperdrive, KV,
-Queues, secrets — see `../terraform/cloudflare/`.
-
-When `cloudflare_container` ships, we'll swap in. Until then, the
-boundary is clean:
-
-| Surface | Tool |
-|---|---|
-| DNS records, R2 buckets, Pages projects, Hyperdrive config, API tokens | **Terraform** (`../terraform/cloudflare/`) |
-| CF Containers (api/worker/provisioner + stateful staging services) | **Wrangler** (this dir) |
-| k8s manifests (production data plane until that migrates) | **kubectl** (`../k8s/`) |
-
-## Ephemeral-state acceptance criterion
-
-CF Containers wipe disk every time an instance goes to sleep (which fires
-on traffic-quiet, not just intentional restart). Source:
-https://developers.cloudflare.com/containers/platform-details/
-
-This means our staging Postgres / Mongo / Redis / NATS containers WILL
-lose their data, mid-test sometimes. E2E test design MUST tolerate this:
-
-1. **Every test seeds its own fixtures** at start; no test assumes state
- from a prior test.
-2. **No "deploy now, verify in 2h" tests** — the container may have
- slept and lost its state in between.
-3. **Tests that span multiple HTTP calls** must complete within one
- container-active window (typically minutes).
-4. **`/db/new` in staging** returns a connection string that may stop
- working when the backing Container sleeps. Documented in the staging
- API responses.
-5. **Synthetic monitors** keep the high-traffic Containers warm; cold
- ones are accepted as ephemeral.
-
-These tradeoffs are explicit and user-blessed per the CF-only staging
-decision. Production has a different host (TBD — not in this dir).
-
-## Per-service layout
-
-Each subdir contains:
-
-```
-infra/wrangler//
-├── wrangler.toml # CF Container + Worker config
-├── src/
-│ └── worker.ts # Tiny Worker shell that wraps the Container DO
-├── Dockerfile # Optional override; defaults to ../..//Dockerfile
-└── README.md # Service-specific notes (image source, env vars, ports)
-```
-
-The actual service code (api, worker, provisioner) lives in its own repo
-under `instanodedev/` and produces a Docker image that wrangler ships.
-For services without a separate repo (pg-platform, pg-customers, mongodb,
-redis-provision, nats), we use upstream public images (`postgres:16`,
-`mongo:7`, `redis:7`, `nats:2`) and a small staging-only init script.
-
-## Deploy
-
-CI auto-deploys on merge to `master` via `../.github/workflows/wrangler-deploy-staging.yml`.
-Manual deploy from an operator workstation:
-
-```bash
-cd infra/wrangler/
-wrangler login # one-time
-wrangler containers deploy --env staging
-```
-
-Requires `CLOUDFLARE_API_TOKEN` env (Token A from the TF outputs).
-
-## Service inventory
-
-| Subdir | What runs | Stateful? | Public hostname (staging) | Notes |
-|---|---|---|---|---|
-| `api/` | instanode.dev api binary | no | `api.staging.instanode.dev` | HTTP only |
-| `worker/` | River job worker | no | none (cron) | Triggered by CF Cron |
-| `provisioner/` | gRPC :50051 service | no | private (Container→Container only) | api calls it |
-| `pg-platform/` | postgres:16 | **yes, ephemeral** | private | `instance_type=standard`; data wiped on sleep |
-| `pg-customers/` | postgres:16 | **yes, ephemeral** | `pg-customer-.staging.instanode.dev` (one per tenant) | Customer-facing in staging only |
-| `mongodb/` | mongo:7 | **yes, ephemeral** | private | accessed by /nosql/new staging |
-| `redis-provision/` | redis:7 | **yes, ephemeral** | `redis-.staging.instanode.dev` | Customer-facing |
-| `nats/` | nats:2 (no JetStream — JS needs durable disk) | **yes, ephemeral** | `nats-.staging.instanode.dev` | Core NATS only in staging |
diff --git a/wrangler/api/README.md b/wrangler/api/README.md
deleted file mode 100644
index 80a190f..0000000
--- a/wrangler/api/README.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# api — CF Containers staging deploy
-
-Wraps the Go api binary (port 8080) in a CF Container. Image pulled from
-`ghcr.io/instanodedev/api:staging` — built by the api repo's CI on every
-push to master, tagged with `:staging` for staging deploys.
-
-## Env vars and secrets
-
-Config (committed):
-- `ENVIRONMENT=staging`
-- `OBJECT_STORE_BACKEND=r2`
-- `R2_BUCKET_NAME=instant-shared-staging`
-
-Secrets (via `wrangler secret put`):
-- `DATABASE_URL` — points at `pg-platform` Container DO via service binding
-- `CUSTOMER_DATABASE_URL` — points at `pg-customers` Container DO
-- `REDIS_URL` — service binding to `redis-platform`
-- `NATS_URL` — service binding to `nats`
-- `AES_KEY`, `JWT_SECRET`, `RAZORPAY_WEBHOOK_SECRET`, `BREVO_API_KEY` — same names as k8s prod
-- `R2_HMAC_KEY_ID`, `R2_HMAC_SECRET` — from R2 dashboard, scoped to `instant-shared-staging` bucket
-
-## Deploy
-
-```bash
-cd infra/wrangler/api
-wrangler containers deploy --env staging
-```
-
-CI auto-deploys on merge to master via the workflow in `infra/.github/workflows/`.
-
-## Known constraints
-
-- **Disk wipes on sleep** — api itself is stateless so this is fine; downstream PG/Mongo are NOT (see ../README.md acceptance criterion).
-- **HTTP only** — gRPC api→provisioner is fine (CF Containers support HTTP/2).
-- **No persistent customer port-forwards** — the dashboard's port-forward proxy is disabled on staging.
diff --git a/wrangler/api/src/worker.ts b/wrangler/api/src/worker.ts
deleted file mode 100644
index 7e78d5c..0000000
--- a/wrangler/api/src/worker.ts
+++ /dev/null
@@ -1,32 +0,0 @@
-// Tiny Worker shell for the api Container.
-//
-// CF Containers require a Worker entrypoint that forwards requests to
-// the Container's Durable Object. The container itself runs the actual
-// Go binary (instanodedev/api), listening on :8080.
-//
-// Every incoming HTTP request is routed to a Container instance; CF
-// handles spin-up/spin-down. Disk is ephemeral — see ../README.md.
-
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class ApiContainer extends Container {
- // The Go binary listens on :8080.
- defaultPort = 8080;
- // Sleep after 10 minutes of no traffic. CF will spin back up on the
- // next request, with a fresh disk. The api is stateless (state lives
- // in pg-platform Container), so cold-start is correctness-safe.
- sleepAfter = "10m";
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- // Route every request to a single Container instance (single-shard
- // for staging; production would shard by tenant or geo).
- const container = getContainer(env.API_CONTAINER);
- return container.fetch(request);
- },
-};
-
-interface Env {
- API_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/api/wrangler.toml b/wrangler/api/wrangler.toml
deleted file mode 100644
index a403a09..0000000
--- a/wrangler/api/wrangler.toml
+++ /dev/null
@@ -1,64 +0,0 @@
-# instanode-api on CF Containers (staging).
-#
-# The api is a Go binary listening on :8080. CF Containers wraps it in a
-# Durable Object; the Worker shell in src/worker.ts forwards every HTTP
-# request to the container.
-#
-# Image: pulled from GHCR (built by api repo's CI on every push to master).
-
-name = "instanode-api"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-# Per-environment config keeps the staging deploy isolated from any future
-# prod deploy (which won't live here — production goes to a non-CF k8s).
-[env.staging]
-name = "instanode-api-staging"
-routes = [
- { pattern = "api.staging.instanode.dev/*", custom_domain = true },
-]
-
-# Container backed by a Durable Object class.
-[[env.staging.containers]]
-class_name = "ApiContainer"
-image = "ghcr.io/instanode-dev/instant-api:staging"
-max_instances = 3
-instance_type = "standard" # 1 vCPU, 4 GiB RAM, 8 GiB ephemeral disk
-
-[[env.staging.durable_objects.bindings]]
-name = "API_CONTAINER"
-class_name = "ApiContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["ApiContainer"]
-
-# Env vars passed to the container. Secrets via `wrangler secret put`.
-[env.staging.vars]
-ENVIRONMENT = "staging"
-OBJECT_STORE_BACKEND = "r2"
-R2_BUCKET_NAME = "instant-shared-staging"
-# DATABASE_URL, REDIS_URL, NATS_URL, etc. resolve to other Container DOs
-# via service bindings — see [[env.staging.services]] block.
-
-# Service bindings — Worker can RPC into other Containers/Workers without
-# a public hostname.
-[[env.staging.services]]
-binding = "PG_PLATFORM"
-service = "instanode-pg-platform-staging"
-environment = "staging"
-
-[[env.staging.services]]
-binding = "PROVISIONER"
-service = "instanode-provisioner-staging"
-environment = "staging"
-
-[[env.staging.services]]
-binding = "REDIS_PLATFORM"
-service = "instanode-redis-platform-staging"
-environment = "staging"
-
-# Observability — send Container stdout/stderr to a CF Logpush sink.
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/mongodb/Dockerfile b/wrangler/mongodb/Dockerfile
deleted file mode 100644
index afbe234..0000000
--- a/wrangler/mongodb/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-# mongodb image for staging CF Container.
-#
-# Base: mongo:7. CF Containers' ephemeral disk means EVERY cold start
-# is a fresh init — there is no "first init vs subsequent restart"
-# distinction. The mongo image's docker-entrypoint runs initdb scripts
-# on every fresh /data/db, so the staging-bootstrap script below runs
-# every cold start.
-#
-# Why custom (vs pristine mongo:7):
-# - Bake the staging-bootstrap that creates the admin user + sets
-# the wire compression default so api can connect without
-# post-deploy operator action.
-# - Healthcheck via `mongosh ping` for the Worker shell's wait loop.
-# - Per-tenant database names are CREATED on demand by provisioner;
-# no per-tenant schema baked in here.
-
-FROM mongo:7
-
-# Staging-bootstrap: idempotent admin user. Mongo entrypoint reads
-# MONGO_INITDB_ROOT_USERNAME / MONGO_INITDB_ROOT_PASSWORD from env on
-# first init; this script is a defence-in-depth ensure path used by
-# the api's connection-test against `db.adminCommand({ ping: 1 })`.
-COPY infra/wrangler/mongodb/docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
-
-# `mongosh` is in the base image; the healthcheck just exercises a
-# round-trip via the admin DB to confirm the daemon is up + responsive.
-HEALTHCHECK --interval=10s --timeout=3s --start-period=30s --retries=3 \
- CMD mongosh --quiet --eval "db.adminCommand({ping:1}).ok" --host=localhost | grep -q '^1$' || exit 1
-
-EXPOSE 27017
diff --git a/wrangler/mongodb/docker-entrypoint-initdb.d/00_staging_bootstrap.js b/wrangler/mongodb/docker-entrypoint-initdb.d/00_staging_bootstrap.js
deleted file mode 100644
index ef7f31e..0000000
--- a/wrangler/mongodb/docker-entrypoint-initdb.d/00_staging_bootstrap.js
+++ /dev/null
@@ -1,27 +0,0 @@
-// Staging-bootstrap for mongodb CF Container. Runs on EVERY cold start
-// because CF Containers wipe /data/db on sleep.
-//
-// Idempotent: createUser fails with code 51003 ("user already exists")
-// if the admin already created the user in the same boot — we swallow
-// that. Other codes propagate.
-
-(function () {
- var adminDb = db.getSiblingDB('admin');
-
- // Mongo entrypoint already creates the root user from
- // MONGO_INITDB_ROOT_USERNAME/MONGO_INITDB_ROOT_PASSWORD. Confirm it
- // resolved successfully so the api connection doesn't hit "no users
- // configured" on the first call.
- var users = adminDb.system.users.find({ user: 'admin' }).count();
- if (users === 0) {
- print('00_staging_bootstrap: no admin user found, creating one from env vars');
- adminDb.createUser({
- user: process.env.MONGO_INITDB_ROOT_USERNAME || 'admin',
- pwd: process.env.MONGO_INITDB_ROOT_PASSWORD || 'staging-bootstrap',
- roles: [{ role: 'root', db: 'admin' }],
- });
- } else {
- print('00_staging_bootstrap: admin user already provisioned by mongo entrypoint');
- }
- print('00_staging_bootstrap: complete');
-})();
diff --git a/wrangler/mongodb/src/worker.ts b/wrangler/mongodb/src/worker.ts
deleted file mode 100644
index 5cc2570..0000000
--- a/wrangler/mongodb/src/worker.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class MongoContainer extends Container {
- defaultPort = 27017;
- sleepAfter = "20m";
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- const url = new URL(request.url);
- const tenant = url.hostname.split(".")[0].replace(/^mongo-/, "");
- const id = env.MONGO_CONTAINER.idFromName(tenant);
- return env.MONGO_CONTAINER.get(id).fetch(request);
- },
-};
-
-interface Env {
- MONGO_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/mongodb/wrangler.toml b/wrangler/mongodb/wrangler.toml
deleted file mode 100644
index 48d30dc..0000000
--- a/wrangler/mongodb/wrangler.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-# mongodb — per-tenant Mongo in a CF Container (staging).
-
-name = "instanode-mongodb"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-mongodb-staging"
-routes = [
- { pattern = "mongo-*.staging.instanode.dev/*", custom_domain = true },
-]
-
-[[env.staging.containers]]
-class_name = "MongoContainer"
-# Custom image — wraps mongo:7 with staging-bootstrap + healthcheck.
-image = "ghcr.io/instanode-dev/instant-mongodb:staging"
-max_instances = 10
-instance_type = "standard"
-
-[[env.staging.durable_objects.bindings]]
-name = "MONGO_CONTAINER"
-class_name = "MongoContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["MongoContainer"]
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/nats/Dockerfile b/wrangler/nats/Dockerfile
deleted file mode 100644
index e3cd67a..0000000
--- a/wrangler/nats/Dockerfile
+++ /dev/null
@@ -1,23 +0,0 @@
-# nats image for staging CF Container.
-#
-# Base: nats:2-alpine. JetStream needs durable disk — NOT viable on
-# CF Containers' ephemeral storage — so this image runs CORE NATS ONLY
-# (no -js flag). Customer-facing /queue/new in staging returns a
-# legacy_open connection string and tests that exercise JetStream
-# features are skipped (see test guard in api/internal/handlers/queue.go).
-#
-# Auth mode: legacy_open. Per CLAUDE.md "Known Design Gaps", prod
-# serves legacy_open until the operator runs `nsc generate` for
-# operator/sys NKeys (NATS-AUTH-RUNBOOK.md). Staging matches prod's
-# current auth posture.
-
-FROM nats:2-alpine
-
-COPY infra/wrangler/nats/nats-server.conf /etc/nats/nats-server.conf
-
-HEALTHCHECK --interval=10s --timeout=3s --start-period=10s --retries=3 \
- CMD wget -qO- http://localhost:8222/healthz | grep -q '"status":"ok"' || exit 1
-
-EXPOSE 4222 8222
-
-CMD ["-c", "/etc/nats/nats-server.conf"]
diff --git a/wrangler/nats/nats-server.conf b/wrangler/nats/nats-server.conf
deleted file mode 100644
index db33a4f..0000000
--- a/wrangler/nats/nats-server.conf
+++ /dev/null
@@ -1,33 +0,0 @@
-# Staging nats-server.conf — core NATS only (no JetStream — ephemeral
-# disk on CF Containers can't satisfy JetStream's durable WAL).
-#
-# Auth mode: legacy_open. No per-tenant JWT in staging. Production
-# eventually upgrades to per-tenant JWT once an operator runs
-# `nsc generate` for operator + sys NKeys (NATS-AUTH-RUNBOOK.md).
-# This staging config DOES NOT block on that.
-
-listen: 0.0.0.0:4222
-
-# HTTP monitoring endpoint used by the Worker shell's healthcheck.
-http: 0.0.0.0:8222
-
-# Connection + payload limits matched to CF Container "basic" class.
-max_connections: 1000
-max_payload: 1MB
-max_pending: 32MB
-
-# Logging to stdout for `wrangler tail`.
-debug: false
-trace: false
-logtime: true
-
-# Auth — legacy_open: no creds required. Customers connecting via
-# /queue/new staging endpoint get an open URL.
-authorization {
- # Empty block = no auth. Documented intentional choice.
-}
-
-# NO JetStream block — explicitly disabled because CF Container disk
-# is ephemeral. Tests that require JetStream skip on staging via the
-# `auth_mode=legacy_open` resource field (see CLAUDE.md /queue/new).
-# jetstream { ... } # DO NOT enable in staging
diff --git a/wrangler/nats/src/worker.ts b/wrangler/nats/src/worker.ts
deleted file mode 100644
index 45f2350..0000000
--- a/wrangler/nats/src/worker.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class NatsContainer extends Container {
- defaultPort = 4222;
- sleepAfter = "20m";
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- const url = new URL(request.url);
- const tenant = url.hostname.split(".")[0].replace(/^nats-/, "");
- const id = env.NATS_CONTAINER.idFromName(tenant);
- return env.NATS_CONTAINER.get(id).fetch(request);
- },
-};
-
-interface Env {
- NATS_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/nats/wrangler.toml b/wrangler/nats/wrangler.toml
deleted file mode 100644
index 7315949..0000000
--- a/wrangler/nats/wrangler.toml
+++ /dev/null
@@ -1,40 +0,0 @@
-# nats — per-tenant NATS in a CF Container (staging).
-# NATS JetStream needs durable disk — NOT viable on ephemeral. Staging
-# runs core NATS only (no streams). /queue/new in staging returns a
-# legacy_open connection string. JetStream features test-skipped.
-
-name = "instanode-nats"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-nats-staging"
-routes = [
- { pattern = "nats-*.staging.instanode.dev/*", custom_domain = true },
-]
-
-[[env.staging.containers]]
-class_name = "NatsContainer"
-# Custom image — wraps nats:2-alpine with /etc/nats/nats-server.conf
-# baked in (core NATS only, no JetStream, legacy_open auth — matches
-# prod's current auth posture).
-image = "ghcr.io/instanode-dev/instant-nats:staging"
-max_instances = 10
-instance_type = "basic"
-
-[[env.staging.durable_objects.bindings]]
-name = "NATS_CONTAINER"
-class_name = "NatsContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["NatsContainer"]
-
-[env.staging.vars]
-# No -js flag → core NATS only. Document that JetStream is staging-disabled
-# in /tmp/cf-migration/shared/STAGING-LIMITATIONS.md.
-NATS_ARGS = "-m 8222"
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/pg-customers/src/worker.ts b/wrangler/pg-customers/src/worker.ts
deleted file mode 100644
index 73ce9b0..0000000
--- a/wrangler/pg-customers/src/worker.ts
+++ /dev/null
@@ -1,22 +0,0 @@
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class PgCustomersContainer extends Container {
- defaultPort = 5432;
- sleepAfter = "20m";
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- // Per-tenant routing: extract tenant from subdomain.
- const url = new URL(request.url);
- const tenant = url.hostname.split(".")[0].replace(/^pg-customer-/, "");
- // ID by tenant → one DO instance per tenant (their isolated PG).
- const id = env.PG_CUSTOMERS_CONTAINER.idFromName(tenant);
- const container = env.PG_CUSTOMERS_CONTAINER.get(id);
- return container.fetch(request);
- },
-};
-
-interface Env {
- PG_CUSTOMERS_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/pg-customers/wrangler.toml b/wrangler/pg-customers/wrangler.toml
deleted file mode 100644
index 65a2b52..0000000
--- a/wrangler/pg-customers/wrangler.toml
+++ /dev/null
@@ -1,36 +0,0 @@
-# pg-customers — per-tenant Postgres in a CF Container (staging only).
-# Customer-facing: /db/new in staging returns a connection string here.
-# Data is EPHEMERAL — wipes on container sleep. Documented in ../README.md.
-
-name = "instanode-pg-customers"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-pg-customers-staging"
-# Public TCP exposure happens via the Worker shell; staging clients dial
-# `pg-customer-.staging.instanode.dev:5432`.
-routes = [
- { pattern = "pg-customer-*.staging.instanode.dev/*", custom_domain = true },
-]
-
-[[env.staging.containers]]
-class_name = "PgCustomersContainer"
-image = "postgres:16-alpine"
-max_instances = 10 # staging cap — bump if QA needs more
-instance_type = "standard"
-
-[[env.staging.durable_objects.bindings]]
-name = "PG_CUSTOMERS_CONTAINER"
-class_name = "PgCustomersContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["PgCustomersContainer"]
-
-[env.staging.vars]
-PGDATA = "/var/lib/postgresql/data/pgdata"
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/pg-platform/00_pre.sql b/wrangler/pg-platform/00_pre.sql
deleted file mode 100644
index f2c18fb..0000000
--- a/wrangler/pg-platform/00_pre.sql
+++ /dev/null
@@ -1,25 +0,0 @@
--- Runs FIRST in /docker-entrypoint-initdb.d/ (alphabetical sort puts
--- "00_pre.sql" ahead of "001_initial.sql"). Sets up extensions + log
--- markers that every later migration depends on.
---
--- This file is staging-only — production uses different operator-run
--- bootstrap. See infra/wrangler/pg-platform/Dockerfile for context.
-
--- pgvector — mig 040+ does CREATE EXTENSION vector and assumes the
--- shared library is loadable. pgvector/pgvector:pg16 ships the .so;
--- this just registers it in the freshly-init'd database.
-CREATE EXTENSION IF NOT EXISTS vector;
-
--- Standard extensions we use across migrations.
-CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
-CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-
--- Match prod timezone — every timestamp comparison in tests assumes UTC.
-SET TIME ZONE 'UTC';
-
--- Log marker. Shows in `wrangler tail` so operators know this is a
--- cold-start init (vs an unexpected mid-life restart).
-DO $$
-BEGIN
- RAISE NOTICE 'pg-platform staging cold start — re-applying 63 migrations against fresh PGDATA';
-END $$;
diff --git a/wrangler/pg-platform/Dockerfile b/wrangler/pg-platform/Dockerfile
deleted file mode 100644
index 3b83f11..0000000
--- a/wrangler/pg-platform/Dockerfile
+++ /dev/null
@@ -1,53 +0,0 @@
-# pg-platform image for staging CF Container.
-#
-# Base: pgvector/pgvector:pg16 — Postgres 16 + the pgvector extension
-# that platform_db's resource embeddings table requires (extension CREATE
-# in mig 040+; without pgvector the image init fails on the first
-# `CREATE EXTENSION vector` statement).
-#
-# Migrations: the 63 *.sql files from api/internal/db/migrations/ are
-# copied into /docker-entrypoint-initdb.d/. Postgres's official
-# entrypoint runs every *.sql alphabetically on first cluster init —
-# and CF Containers' ephemeral disk means EVERY cold start IS a first
-# cluster init, so the migrations re-apply on every wake-from-sleep.
-#
-# This is the explicit, user-blessed ephemeral-state tradeoff for the
-# CF-only staging design. See ../README.md acceptance criterion.
-#
-# Build context: workspace root (../../).
-# Build command (CI runs this; not for ad-hoc local use):
-# docker buildx build \
-# -f infra/wrangler/pg-platform/Dockerfile \
-# -t ghcr.io/instanode-dev/instant-pg-platform:staging \
-# --push \
-# .
-
-FROM pgvector/pgvector:pg16
-
-# Copy every migration file in numeric (=alphabetical) order. The
-# leading 0NN_*.sql naming guarantees the entrypoint applies them in
-# the same order as `make test-db-up` does locally.
-COPY api/internal/db/migrations/*.sql /docker-entrypoint-initdb.d/
-
-# A pre-script that runs before any migration. Names start with "00_"
-# so it sorts ahead of "001_initial.sql".
-#
-# We use it to:
-# 1. CREATE EXTENSION pgvector (idempotent — base image has the
-# shared lib; this enables it in the freshly-init'd database).
-# 2. Set timezone to UTC to match production.
-# 3. Print a one-line marker so the CF Container's logs make clear
-# this is a fresh init (operator confidence on cold start).
-COPY infra/wrangler/pg-platform/00_pre.sql /docker-entrypoint-initdb.d/00_pre.sql
-
-# postgres image expects POSTGRES_PASSWORD set; staging wrangler.toml
-# wires that through `wrangler secret put POSTGRES_PASSWORD`. The
-# image also reads POSTGRES_DB / POSTGRES_USER if provided (wrangler
-# env block sets POSTGRES_DB=instant_platform).
-
-# Healthcheck — pg_isready against the local socket. Used by the
-# Worker shell's container.fetch wait-loop.
-HEALTHCHECK --interval=10s --timeout=3s --start-period=30s --retries=3 \
- CMD pg_isready -U "${POSTGRES_USER:-postgres}" -d "${POSTGRES_DB:-instant_platform}" || exit 1
-
-EXPOSE 5432
diff --git a/wrangler/pg-platform/README.md b/wrangler/pg-platform/README.md
deleted file mode 100644
index 67b992a..0000000
--- a/wrangler/pg-platform/README.md
+++ /dev/null
@@ -1,87 +0,0 @@
-# pg-platform — staging CF Container
-
-Postgres 16 + pgvector. Image baked with all 63 platform migrations in
-`/docker-entrypoint-initdb.d/` so cold starts come up with a fully
-migrated schema.
-
-## Ephemeral acceptance
-
-Per the CF-only staging decision (2026-05-30): disk wipes every time the
-Container sleeps (which fires on traffic-quiet, not just intentional
-restart). Each cold start:
-
-1. CF Containers wakes the Container with a fresh disk.
-2. Postgres entrypoint sees PGDATA empty → runs `initdb`.
-3. `00_pre.sql` runs first — pgvector + uuid-ossp + pgcrypto extensions, UTC tz.
-4. The 63 migration files run in numeric order (001 → 063).
-5. Container reports healthy via `pg_isready`.
-6. api / worker / provisioner Containers can now connect via service binding.
-
-Total cold-start time: estimated 15–45s depending on Container class +
-migration count. Anything that talks to pg-platform must tolerate this
-warmup (Worker shell's `container.fetch` blocks until healthy).
-
-## Image build
-
-The image is built by `infra/.github/workflows/wrangler-build-staging-images.yml`
-on push to master that changes any of:
-- `api/internal/db/migrations/**` (cross-repo trigger via repository_dispatch — see below)
-- `infra/wrangler/pg-platform/**`
-
-Plus daily at 09:00 UTC to keep up with migrations merged in api repo without
-explicit infra commits.
-
-Manual rebuild:
-```bash
-gh workflow run wrangler-build-staging-images.yml \
- -R instanode-dev/infra \
- -f service=pg-platform
-```
-
-## Cross-repo migration sync
-
-Migrations live in the `api` repo, not infra. Two patterns to keep the
-image current:
-
-1. **Daily cron rebuild** — the build workflow runs nightly with a fresh
- checkout of both repos; any new `.sql` file lands within 24h.
-2. **`api` repo notifies on migration change** — `api/.github/workflows/notify-infra-on-migration.yml`
- sends a `repository_dispatch` event to infra when `api/internal/db/migrations/**`
- changes, triggering an immediate build.
-
-If neither runs, staging pg-platform will be behind on migrations and
-api startup will fail with "migration not applied" — operator-visible
-via `wrangler tail instanode-pg-platform-staging`.
-
-## Secrets
-
-Set via `wrangler secret put`, scoped to `--env staging`:
-
-| Secret | Source | Purpose |
-|---|---|---|
-| `POSTGRES_USER` | operator-defined (e.g. `instanode_admin`) | role for connection |
-| `POSTGRES_PASSWORD` | random, ≥32 chars | passed to connection_url |
-| `POSTGRES_DB` | `instant_platform` | initial DB created at first start |
-
-The actual connection string handed to api/worker/provisioner is built
-via service binding — they see `PG_PLATFORM` env binding, not a raw
-URL with the password.
-
-## Verifying
-
-```bash
-wrangler tail instanode-pg-platform-staging --format pretty
-# wait for: "pg-platform staging cold start — re-applying 63 migrations against fresh PGDATA"
-# then: "database system is ready to accept connections"
-
-# from a debug Worker shell:
-wrangler dev --env staging
-# Then inside the Worker: env.PG_PLATFORM.fetch("http://internal/healthz")
-```
-
-## Known limitations
-
-- **Cold-start cost is ~15-45s.** Synthetic warmer can keep it hot; without one, every traffic gap > sleepAfter (currently 30m) pays the full re-migration cost.
-- **No replication.** max_instances=1; HA is meaningless when disk is ephemeral. Production gets a different model entirely.
-- **No `pg_dump` artifacts persist.** If you need a snapshot for debugging, dump and immediately stream to R2 via the customer-backup pipeline; the local file dies on next sleep.
-- **63 migrations is the live count as of 2026-05-30.** When api repo adds mig 064+, the daily cron rebuild picks them up.
diff --git a/wrangler/pg-platform/src/worker.ts b/wrangler/pg-platform/src/worker.ts
deleted file mode 100644
index 7646da8..0000000
--- a/wrangler/pg-platform/src/worker.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-// pg-platform Worker shell. Postgres doesn't speak HTTP, but CF
-// Containers require a Worker entrypoint. The Worker accepts a
-// service-binding RPC from other Containers and forwards a connection
-// hint; the actual TCP traffic flows over the Container DO's internal
-// network using `container.fetch(request)` with `Upgrade: tcp` semantics
-// (CF Containers' raw-TCP mode, available since the GA release).
-
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class PgPlatformContainer extends Container {
- defaultPort = 5432;
- sleepAfter = "30m"; // Longer than api so platform_db survives test bursts.
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- const container = getContainer(env.PG_CONTAINER);
- // Container holds the TCP listener; CF routes the upgraded socket through.
- return container.fetch(request);
- },
-};
-
-interface Env {
- PG_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/pg-platform/wrangler.toml b/wrangler/pg-platform/wrangler.toml
deleted file mode 100644
index 274e033..0000000
--- a/wrangler/pg-platform/wrangler.toml
+++ /dev/null
@@ -1,48 +0,0 @@
-# pg-platform on CF Containers (staging).
-#
-# Runs `postgres:16` in a CF Container. Data dir is ephemeral —
-# every sleep wipes /var/lib/postgresql/data. This is the explicit
-# user-blessed tradeoff for CF-only staging.
-#
-# Production does NOT use this; prod platform_db lives elsewhere.
-
-name = "instanode-pg-platform"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-pg-platform-staging"
-# No public route — accessed only via service binding from api/worker/provisioner.
-
-[[env.staging.containers]]
-class_name = "PgPlatformContainer"
-# Custom image built by infra/.github/workflows/wrangler-build-staging-images.yml.
-# Bakes the 63 migrations from api/internal/db/migrations/*.sql into
-# /docker-entrypoint-initdb.d/ + pgvector extension. See ./Dockerfile.
-image = "ghcr.io/instanode-dev/instant-pg-platform:staging"
-max_instances = 1 # Single-writer; HA is meaningless when disk is ephemeral.
-instance_type = "standard" # 1 vCPU, 4 GiB RAM, 8 GiB ephemeral
-
-[[env.staging.durable_objects.bindings]]
-name = "PG_CONTAINER"
-class_name = "PgPlatformContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["PgPlatformContainer"]
-
-# Bootstrap secrets via wrangler secret put:
-# POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB
-# The Postgres image reads these env vars on first boot to initialize the
-# cluster — which it'll redo every sleep cycle.
-[env.staging.vars]
-POSTGRES_DB = "instant_platform"
-# POSTGRES_INITDB_ARGS controls locale; staging just uses default.
-PGDATA = "/var/lib/postgresql/data/pgdata"
-# Run our 62 migrations on container boot. The init script lives in
-# src/bootstrap.sh and is included in the image via Dockerfile.
-APPLY_MIGRATIONS_ON_BOOT = "true"
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/provisioner/src/worker.ts b/wrangler/provisioner/src/worker.ts
deleted file mode 100644
index 72fde55..0000000
--- a/wrangler/provisioner/src/worker.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class ProvisionerContainer extends Container {
- defaultPort = 50051; // gRPC
- sleepAfter = "20m";
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- return getContainer(env.PROVISIONER_CONTAINER).fetch(request);
- },
-};
-
-interface Env {
- PROVISIONER_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/provisioner/wrangler.toml b/wrangler/provisioner/wrangler.toml
deleted file mode 100644
index d1c93dc..0000000
--- a/wrangler/provisioner/wrangler.toml
+++ /dev/null
@@ -1,47 +0,0 @@
-# provisioner — gRPC service in a CF Container (staging).
-# No public route; api reaches it via service binding.
-
-name = "instanode-provisioner"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-provisioner-staging"
-
-[[env.staging.containers]]
-class_name = "ProvisionerContainer"
-image = "ghcr.io/instanode-dev/instant-provisioner:staging"
-max_instances = 2
-instance_type = "standard"
-
-[[env.staging.durable_objects.bindings]]
-name = "PROVISIONER_CONTAINER"
-class_name = "ProvisionerContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["ProvisionerContainer"]
-
-[env.staging.vars]
-ENVIRONMENT = "staging"
-
-# Provisioner reaches the customer-data Containers via service bindings.
-[[env.staging.services]]
-binding = "PG_CUSTOMERS"
-service = "instanode-pg-customers-staging"
-
-[[env.staging.services]]
-binding = "MONGODB"
-service = "instanode-mongodb-staging"
-
-[[env.staging.services]]
-binding = "REDIS_PROVISION"
-service = "instanode-redis-provision-staging"
-
-[[env.staging.services]]
-binding = "NATS"
-service = "instanode-nats-staging"
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/redis-provision/Dockerfile b/wrangler/redis-provision/Dockerfile
deleted file mode 100644
index 299d710..0000000
--- a/wrangler/redis-provision/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-# redis-provision image for staging CF Container.
-#
-# Base: redis:7-alpine. CF Containers' ephemeral disk means RDB
-# persistence is pointless — every sleep wipes /data. We disable
-# RDB + AOF entirely and run in-memory-only with `allkeys-lru`
-# eviction so the Container can't OOM under sustained writes.
-#
-# Why custom (vs pristine redis:7-alpine):
-# - Bake redis.conf with auth + memory + eviction policy so the
-# Worker shell doesn't have to pass them via wrangler.toml CMD.
-# - Healthcheck via `redis-cli -a $REDIS_PASSWORD ping`.
-# - Auth is via `requirepass` from REDIS_PASSWORD env (wrangler
-# secret).
-
-FROM redis:7-alpine
-
-COPY infra/wrangler/redis-provision/redis.conf /etc/redis/redis.conf
-
-# Entrypoint that templates REDIS_PASSWORD env into the conf at boot.
-# Without this, the conf can't contain the secret at build time.
-COPY infra/wrangler/redis-provision/entrypoint.sh /usr/local/bin/staging-entrypoint.sh
-RUN chmod +x /usr/local/bin/staging-entrypoint.sh
-
-HEALTHCHECK --interval=10s --timeout=3s --start-period=10s --retries=3 \
- CMD redis-cli -a "$REDIS_PASSWORD" --no-auth-warning ping | grep -q '^PONG$' || exit 1
-
-EXPOSE 6379
-
-ENTRYPOINT ["/usr/local/bin/staging-entrypoint.sh"]
-CMD ["redis-server", "/etc/redis/redis.conf"]
diff --git a/wrangler/redis-provision/entrypoint.sh b/wrangler/redis-provision/entrypoint.sh
deleted file mode 100644
index bc62464..0000000
--- a/wrangler/redis-provision/entrypoint.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-# Templating entrypoint for staging redis. Inlines REDIS_PASSWORD into
-# /etc/redis/redis.conf at boot (the file ships with __REDIS_PASSWORD__
-# as a literal marker; we never bake a real secret into the image).
-
-set -eu
-
-if [ -z "${REDIS_PASSWORD:-}" ]; then
- echo "redis-provision: REDIS_PASSWORD env var is required" >&2
- exit 1
-fi
-
-# In-place substitute. Using a temp file because sed -i on alpine
-# behaves differently than GNU sed; this is portable.
-TMP="$(mktemp)"
-sed "s|__REDIS_PASSWORD__|${REDIS_PASSWORD}|" /etc/redis/redis.conf > "$TMP"
-mv "$TMP" /etc/redis/redis.conf
-chmod 600 /etc/redis/redis.conf # only root reads — defense in depth
-
-# Hand off to the configured CMD (`redis-server /etc/redis/redis.conf`).
-exec "$@"
diff --git a/wrangler/redis-provision/redis.conf b/wrangler/redis-provision/redis.conf
deleted file mode 100644
index 7b423d0..0000000
--- a/wrangler/redis-provision/redis.conf
+++ /dev/null
@@ -1,28 +0,0 @@
-# Staging redis.conf — ephemeral, auth'd, LRU-capped.
-# REDIS_PASSWORD is substituted at container boot by entrypoint.sh.
-
-bind 0.0.0.0
-port 6379
-protected-mode yes
-
-# Auth — entrypoint.sh inlines REDIS_PASSWORD env value here.
-requirepass __REDIS_PASSWORD__
-
-# Memory cap + eviction. CF Container "basic" tier has 4 GiB; cap at 3
-# GiB to leave headroom for connection buffers + COW during eviction.
-maxmemory 3gb
-maxmemory-policy allkeys-lru
-
-# No persistence — CF Containers wipe /data on sleep, so RDB snapshots
-# only waste CPU. AOF same. Staging is in-memory-only by design.
-save ""
-appendonly no
-
-# Logging to stdout for `wrangler tail`.
-logfile ""
-loglevel notice
-
-# Connection limits matched to instance class.
-maxclients 1000
-timeout 300
-tcp-keepalive 60
diff --git a/wrangler/redis-provision/src/worker.ts b/wrangler/redis-provision/src/worker.ts
deleted file mode 100644
index 2b77911..0000000
--- a/wrangler/redis-provision/src/worker.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class RedisContainer extends Container {
- defaultPort = 6379;
- sleepAfter = "20m";
-}
-
-export default {
- async fetch(request: Request, env: Env): Promise {
- const url = new URL(request.url);
- const tenant = url.hostname.split(".")[0].replace(/^redis-/, "");
- const id = env.REDIS_CONTAINER.idFromName(tenant);
- return env.REDIS_CONTAINER.get(id).fetch(request);
- },
-};
-
-interface Env {
- REDIS_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/redis-provision/wrangler.toml b/wrangler/redis-provision/wrangler.toml
deleted file mode 100644
index 2896e8d..0000000
--- a/wrangler/redis-provision/wrangler.toml
+++ /dev/null
@@ -1,32 +0,0 @@
-# redis-provision — per-tenant Redis in a CF Container (staging).
-
-name = "instanode-redis-provision"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-redis-provision-staging"
-routes = [
- { pattern = "redis-*.staging.instanode.dev/*", custom_domain = true },
-]
-
-[[env.staging.containers]]
-class_name = "RedisContainer"
-# Custom image — wraps redis:7-alpine with auth + maxmemory + LRU
-# eviction baked into /etc/redis/redis.conf (entrypoint templates
-# REDIS_PASSWORD in at boot).
-image = "ghcr.io/instanode-dev/instant-redis-provision:staging"
-max_instances = 10
-instance_type = "basic" # Redis is lighter than PG/Mongo
-
-[[env.staging.durable_objects.bindings]]
-name = "REDIS_CONTAINER"
-class_name = "RedisContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["RedisContainer"]
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0
diff --git a/wrangler/worker/src/worker.ts b/wrangler/worker/src/worker.ts
deleted file mode 100644
index db330bb..0000000
--- a/wrangler/worker/src/worker.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-import { Container, getContainer } from "@cloudflare/containers";
-
-export class WorkerContainer extends Container {
- defaultPort = 8091; // worker exposes /metrics + /readyz on 8091
- sleepAfter = "20m";
-}
-
-export default {
- // HTTP path: forward to container (rare; mostly metrics scrapes).
- async fetch(request: Request, env: Env): Promise {
- return getContainer(env.WORKER_CONTAINER).fetch(request);
- },
- // Cron path: wake the container so River picks up due jobs.
- async scheduled(_event: ScheduledEvent, env: Env): Promise {
- const c = getContainer(env.WORKER_CONTAINER);
- // A no-op POST that the worker binary handles as "tick the job loop".
- await c.fetch("http://internal/tick", { method: "POST" });
- },
-};
-
-interface Env {
- WORKER_CONTAINER: DurableObjectNamespace;
-}
diff --git a/wrangler/worker/wrangler.toml b/wrangler/worker/wrangler.toml
deleted file mode 100644
index 05b555d..0000000
--- a/wrangler/worker/wrangler.toml
+++ /dev/null
@@ -1,40 +0,0 @@
-# worker — River jobs in a CF Container (staging).
-# Cron triggers via CF Cron Triggers (no public route).
-
-name = "instanode-worker"
-main = "src/worker.ts"
-compatibility_date = "2026-05-30"
-
-[env.staging]
-name = "instanode-worker-staging"
-
-[[env.staging.containers]]
-class_name = "WorkerContainer"
-image = "ghcr.io/instanode-dev/instant-worker:staging"
-max_instances = 2
-instance_type = "standard"
-
-[[env.staging.durable_objects.bindings]]
-name = "WORKER_CONTAINER"
-class_name = "WorkerContainer"
-
-[[env.staging.migrations]]
-tag = "v1"
-new_sqlite_classes = ["WorkerContainer"]
-
-# Cron — fires every 5 minutes; the Worker shell wakes the Container.
-[env.staging.triggers]
-crons = ["*/5 * * * *"]
-
-[env.staging.vars]
-ENVIRONMENT = "staging"
-OBJECT_STORE_BACKEND = "r2"
-R2_BUCKET_NAME = "instant-shared-staging"
-
-[[env.staging.services]]
-binding = "PG_PLATFORM"
-service = "instanode-pg-platform-staging"
-
-[env.staging.observability]
-enabled = true
-head_sampling_rate = 1.0