From 5de5f9f5de4eefdaf50a77adb6e3fffa13205409 Mon Sep 17 00:00:00 2001
From: Alexander <alexander@awideweb.com>
Date: Wed, 17 Jun 2026 07:51:57 -0400
Subject: [PATCH] fix(comfyui): autoswitch before prompt submit

---
 docs/guides/configure.mdx                     |   2 +-
 docs/guides/generate-images.mdx               |  25 +---
 .../comfyui/custom_nodes/hal0_gpu_gate.py     | 126 +++++++++++++-----
 installer/install.sh                          |   8 ++
 scripts/deploy.sh                             |  15 +++
 src/hal0/api/__init__.py                      |   2 +-
 src/hal0/api/routes/comfyui.py                |  42 +-----
 src/hal0/api/routes/health.py                 |   6 +-
 src/hal0/slots/arbiter.py                     |  10 +-
 tests/api/test_comfyui_proxy.py               |  50 +++----
 tests/api/test_features_health.py             |   4 +-
 tests/comfyui/test_hal0_gpu_gate.py           |  61 ++++++---
 tests/install/test_comfyui_scripts_shipped.py |   8 ++
 ui/src/api/endpoints.ts                       |   6 +-
 ui/src/api/hooks/useComfyui.ts                |  10 +-
 ui/tests/e2e/specs/mobile-nav-v3.spec.ts      |  10 +-
 16 files changed, 219 insertions(+), 166 deletions(-)
diff --git a/docs/guides/configure.mdx b/docs/guides/configure.mdx
index 229bae55..28fa7b80 100644
--- a/docs/guides/configure.mdx
+++ b/docs/guides/configure.mdx
@@ -23,7 +23,7 @@ with it.
 | File | What it holds |
 |------|---------------|
 | `hal0.toml` | Top-level config: `[meta] schema_version`, `[slots] port_range_start/end`, `[models] store`, telemetry channel. |
-| `api.env` | Environment for the hal0-api service (systemd `EnvironmentFile`) — feature flags and env knobs like `HAL0_MODEL_STORE`, `HAL0_COMFYUI_SWITCHOVER_ENABLED`, `HAL0_MCP_ALLOWED_HOSTS`. |
+| `api.env` | Environment for the hal0-api service (systemd `EnvironmentFile`) — feature flags and env knobs like `HAL0_MODEL_STORE`, `HAL0_MCP_ALLOWED_HOSTS`. |
 | `upstreams.toml` | External / upstream provider routing. |
 | `providers.toml` | Provider credential references (key **names**, never plaintext secrets). |
 | `profiles.toml` | Optional profile catalogue. Absent → built-in seed profiles are used. |
diff --git a/docs/guides/generate-images.mdx b/docs/guides/generate-images.mdx
index 931c8418..d3c3beef 100644
--- a/docs/guides/generate-images.mdx
+++ b/docs/guides/generate-images.mdx
@@ -21,9 +21,9 @@ reloaded.
 
 <Aside type="caution">
 Switching the iGPU to generation **stops the LLM stack** (chat bots and
-memory extraction go dark) for as long as generation holds the GPU. The
-switchover write path is gated off by default — see
-[Enable the switchover](#enable-the-switchover).
+memory extraction go dark) for as long as generation holds the GPU. hal0
+does this automatically when a render needs the iGPU, then idle-restore
+hands it back to inference.
 </Aside>
 
 ## Read the generation-engine status
@@ -86,8 +86,7 @@ Body fields:
 Responses: **202** (`switching`, runs in the background — poll the
 `switchover` block on `/status`), **200** (`noop`, already in the target
 mode), **409** (switch already in flight, or a busy queue without
-`force`), **501** (switchover disabled on this host), **503** (arbiter not
-wired).
+`force`), **503** (arbiter not wired).
 
 Pin or unpin image mode independently:
 
@@ -97,20 +96,6 @@ curl -X POST http://localhost:8080/api/comfyui/pin \
   -d '{"pinned": true}'
 ```
 
-### Enable the switchover
-
-Both write paths (`/switchover` and `/pin`) stay gated behind an
-environment flag because the switch takes the LLM stack offline — an
-operator decision per host. Set it on hal0-api:
-
-```sh
-HAL0_COMFYUI_SWITCHOVER_ENABLED=1
-```
-
-Without it, both endpoints answer **501** (`comfyui.switchover_disabled`).
-See [Configure](/docs/guides/configure) for where to set environment
-keys (`api.env`).
-
 ## Generate an image
 
 The OpenAI-compatible endpoint translates your request into a ComfyUI
@@ -170,5 +155,5 @@ caps a single render at 10 minutes before it gives up.
 ## See also
 
 - [Manage slots](/docs/guides/manage-slots) — the `img` slot lifecycle.
-- [Configure](/docs/guides/configure) — set `HAL0_COMFYUI_SWITCHOVER_ENABLED` and the `[image]` slot defaults.
+- [Configure](/docs/guides/configure) — set the `[image]` slot defaults.
 - [Observe the system](/docs/guides/logs-and-activity) — tail `hal0-slot@img`.
diff --git a/installer/comfyui/custom_nodes/hal0_gpu_gate.py b/installer/comfyui/custom_nodes/hal0_gpu_gate.py
index 10b0766d..785c4d9b 100644
--- a/installer/comfyui/custom_nodes/hal0_gpu_gate.py
+++ b/installer/comfyui/custom_nodes/hal0_gpu_gate.py
@@ -1,12 +1,12 @@
-"""hal0 GPU gate — ComfyUI custom node that 403-blocks job submission while
-the Strix Halo iGPU is serving the LLM stack.
+"""hal0 GPU prepare hook — asks hal0 to enter image mode before job submit.
 
 The ComfyUI container is RESIDENT on hal0 (its web UI stays up in both GPU
-modes so users can build workflows/prompts any time), but GPU memory is
-exclusive: running a generation while the LLM slots hold GTT would OOM or
-evict them. hal0-api's GpuArbiter guards its own dispatch path, and this
-middleware closes the remaining door — the web UI's own "Queue Prompt"
-(``POST /prompt`` / ``POST /api/prompt``), which goes straight to ComfyUI.
+modes so users can build workflows/prompts any time), and queueing a prompt is
+now allowed from either mode. When the web UI's own "Queue Prompt"
+(``POST /prompt`` / ``POST /api/prompt``) arrives while hal0 still reports
+inference mode, this middleware asks hal0-api's GpuArbiter to switch to image
+mode and waits briefly for the switch to complete before allowing ComfyUI to
+continue.
 
 Deployment: this single file is dropped into the host-mounted
 ``custom_nodes`` dir (``/mnt/ai-models/comfyui/custom_nodes/``) — no image
@@ -14,15 +14,16 @@
 middleware to the PromptServer app (the same mechanism ComfyUI-Login uses).
 The container runs with host networking, so hal0-api is on loopback.
 
-Fail-open by design: if hal0-api is unreachable the gate allows the prompt —
-a broken hal0 must never brick standalone ComfyUI use. The pure decision
-logic (``should_block``) is unit-tested in the hal0 repo
+Fail-open by design: if hal0-api is unreachable or the switch request fails,
+the hook allows the prompt — a broken hal0 must never brick standalone ComfyUI
+use. The pure decision logic (``should_prepare_image_mode``) is unit-tested in the hal0 repo
 (tests/comfyui/test_hal0_gpu_gate.py); the aiohttp wiring is exercised by
 the CT105 live verification.
 """
 
 import json
 import os
+import time
 import urllib.error
 import urllib.request
 
@@ -31,41 +32,52 @@
 HAL0_STATUS_URL = os.environ.get(
     "HAL0_COMFYUI_STATUS_URL", "http://127.0.0.1:8080/api/comfyui/status"
 )
+HAL0_SWITCHOVER_URL = os.environ.get(
+    "HAL0_COMFYUI_SWITCHOVER_URL", "http://127.0.0.1:8080/api/comfyui/switchover"
+)
 _STATUS_TIMEOUT_S = 2.0
 
+
+def _float_env(name: str, default: float) -> float:
+    try:
+        return float(os.environ.get(name, str(default)))
+    except ValueError:
+        return default
+
+
+_SWITCH_TIMEOUT_S = _float_env("HAL0_COMFYUI_SWITCH_TIMEOUT_S", 120.0)
+_SWITCH_POLL_S = _float_env("HAL0_COMFYUI_SWITCH_POLL_S", 0.5)
+
 #: Job-submission routes — and ONLY those. Everything the editor needs
 #: (/object_info, /queue GET, workflow save/load, uploads) always passes.
 _BLOCK_PATHS = frozenset({"/prompt", "/api/prompt"})
 
-#: Mirrors ComfyUI's own /prompt error envelope so the frontend renders the
-#: message instead of a generic failure toast.
-GATE_BODY = {
-    "error": {
-        "type": "hal0_gpu_gate",
-        "message": (
-            "The GPU is in inference mode (LLM slots loaded). Flip the "
-            "Image Gen switch in the hal0 dashboard, then queue again."
-        ),
-        "details": "hal0 GpuArbiter mode is 'inference'; generation is gated.",
-        "extra_info": {},
-    },
-    "node_errors": {},
-}
+def _is_prompt_submit(method: str, path: str) -> bool:
+    return method == "POST" and path in _BLOCK_PATHS
 
 
-def should_block(method: str, path: str, status: "dict | None") -> bool:
-    """True iff this request is a job submission while the GPU serves LLMs.
+def should_prepare_image_mode(method: str, path: str, status: "dict | None") -> bool:
+    """True iff this prompt submit should ask hal0 to enter image mode.
 
     ``status`` is hal0-api's /api/comfyui/status JSON (or None when
     unreachable / unparseable → fail-open).
     """
-    if method != "POST" or path not in _BLOCK_PATHS:
+    if not _is_prompt_submit(method, path):
         return False
     if not isinstance(status, dict):
         return False
     return status.get("mode") == "inference"
 
 
+def should_block(method: str, path: str, status: "dict | None") -> bool:
+    """Backward-compatible name for older tests/imports.
+
+    Prompt submission is no longer blocked by this custom node; it prepares
+    image mode best-effort, then lets ComfyUI handle the prompt.
+    """
+    return False
+
+
 def _fetch_status() -> "dict | None":
     """Blocking status fetch via urllib (run in a thread by the middleware).
 
@@ -80,8 +92,55 @@ def _fetch_status() -> "dict | None":
         return None
 
 
+def _post_switchover() -> bool:
+    """Ask hal0-api to enter generation mode; True means accepted/noop.
+
+    stdlib-only on purpose: custom nodes can't assume extra deps in the
+    ComfyUI venv.
+    """
+    body = json.dumps({"mode": "generation"}).encode("utf-8")
+    req = urllib.request.Request(
+        HAL0_SWITCHOVER_URL,
+        data=body,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=_STATUS_TIMEOUT_S) as resp:
+            return resp.status in (200, 202)
+    except urllib.error.HTTPError as exc:
+        # 409 can mean a switch is already in flight. Poll status below.
+        return exc.code == 409
+    except (urllib.error.URLError, OSError, ValueError):
+        return False
+
+
+def prepare_image_mode() -> None:
+    """Best-effort inference → generation handoff before forwarding /prompt.
+
+    Fail-open: exceptions and timeouts return without blocking the prompt.
+    """
+    status = _fetch_status()
+    if not should_prepare_image_mode("POST", "/prompt", status):
+        return
+    if not _post_switchover():
+        return
+
+    deadline = time.monotonic() + max(0.0, _SWITCH_TIMEOUT_S)
+    while time.monotonic() < deadline:
+        status = _fetch_status()
+        if not isinstance(status, dict):
+            return
+        if status.get("mode") == "generation":
+            return
+        sw = status.get("switchover")
+        if isinstance(sw, dict) and sw.get("error"):
+            return
+        time.sleep(max(0.05, _SWITCH_POLL_S))
+
+
 def _install() -> None:
-    """Attach the gate middleware to ComfyUI's PromptServer (fail-soft)."""
+    """Attach the prepare middleware to ComfyUI's PromptServer (fail-soft)."""
     try:
         import asyncio
 
@@ -90,14 +149,15 @@ def _install() -> None:
 
         @web.middleware
         async def hal0_gpu_gate_middleware(request, handler):
-            if request.method == "POST" and request.path in _BLOCK_PATHS:
-                status = await asyncio.to_thread(_fetch_status)
-                if should_block(request.method, request.path, status):
-                    return web.json_response(GATE_BODY, status=403)
+            if _is_prompt_submit(request.method, request.path):
+                await asyncio.to_thread(prepare_image_mode)
             return await handler(request)
 
         PromptServer.instance.app.middlewares.append(hal0_gpu_gate_middleware)
-        print(f"[hal0_gpu_gate] /prompt gated on hal0 GPU mode ({HAL0_STATUS_URL})")
+        print(
+            "[hal0_gpu_gate] /prompt prepares hal0 image mode "
+            f"({HAL0_STATUS_URL} → {HAL0_SWITCHOVER_URL})"
+        )
     except Exception as exc:  # outside ComfyUI (unit tests) or API drift
         print(f"[hal0_gpu_gate] not installed: {exc}")
 
diff --git a/installer/install.sh b/installer/install.sh
index 10feed9f..57e28e9a 100755
--- a/installer/install.sh
+++ b/installer/install.sh
@@ -953,6 +953,7 @@ done
 ui_step "ComfyUI control scripts"
 
 COMFYUI_SCRIPTS_SRC="${REPO_ROOT}/installer/comfyui/scripts"
+COMFYUI_CUSTOM_NODES_SRC="${REPO_ROOT}/installer/comfyui/custom_nodes"
 COMFYUI_DIR="/opt/comfyui"
 COMFYUI_MODELS_ROOT="/mnt/ai-models/comfyui"
 
@@ -973,6 +974,13 @@ else
     done
     info "ensured ${COMFYUI_MODELS_ROOT}/{models,output,input,user,custom_nodes}"
 
+    if [[ -d "${COMFYUI_CUSTOM_NODES_SRC}" ]]; then
+        install -m0644 "${COMFYUI_CUSTOM_NODES_SRC}"/*.py "${COMFYUI_MODELS_ROOT}/custom_nodes/"
+        info "wrote ComfyUI custom nodes → ${COMFYUI_MODELS_ROOT}/custom_nodes/"
+    else
+        warn "${COMFYUI_CUSTOM_NODES_SRC} not found — ComfyUI custom nodes not installed"
+    fi
+
     # Place extra_model_paths.yaml if not already present (operator may have a
     # customised copy — never overwrite).
     _EXTRA_PATHS_SRC="${REPO_ROOT}/installer/comfyui/extra_model_paths.yaml"
diff --git a/scripts/deploy.sh b/scripts/deploy.sh
index b6affd8b..9e0eb241 100755
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -145,6 +145,21 @@ elif [[ "$DO_BUILD" -eq 0 ]]; then
     warn "skipping UI build (--no-build)"
 fi
 
+# ── 2b. Sync runtime-mounted ComfyUI custom nodes ─────────────────────────────
+# ComfyUI imports custom nodes from the persistent model share, not the source
+# checkout. Keep shipped hal0 nodes in sync during runtime deploys; the ComfyUI
+# slot still needs a restart to import changed node code.
+comfy_nodes_src="${REPO_ROOT}/installer/comfyui/custom_nodes"
+comfy_nodes_dst="${HAL0_COMFYUI_CUSTOM_NODES_DIR:-/mnt/ai-models/comfyui/custom_nodes}"
+if [[ -d "$comfy_nodes_src" ]]; then
+    if install -d "$comfy_nodes_dst" 2>/dev/null \
+        && install -m0644 "$comfy_nodes_src"/*.py "$comfy_nodes_dst"/ 2>/dev/null; then
+        info "ComfyUI custom nodes synced → ${comfy_nodes_dst}"
+    else
+        warn "could not sync ComfyUI custom nodes to ${comfy_nodes_dst}"
+    fi
+fi
+
 # ── 3. Re-assert group-shared ownership ───────────────────────────────────────
 # The reset + build above just (re)created files as the deploying user. Hand the
 # tree back to the shared group so the hal0 service user (Hermes, agents) can
diff --git a/src/hal0/api/__init__.py b/src/hal0/api/__init__.py
index 5b56f055..f006f209 100644
--- a/src/hal0/api/__init__.py
+++ b/src/hal0/api/__init__.py
@@ -1105,7 +1105,7 @@ def create_app() -> FastAPI:
     )
     app.include_router(slots.router, prefix="/api/slots", tags=["slots"])
     # Read-only ComfyUI "generation engine" status for the slots-page Image-Gen
-    # tab (docker + systemd + ComfyUI HTTP), plus the feature-gated switchover.
+    # tab (docker + systemd + ComfyUI HTTP), plus arbiter switchover controls.
     app.include_router(comfyui.router, prefix="/api/comfyui", tags=["comfyui"])
     app.include_router(models.router, prefix="/api/models", tags=["models"])
     # Issue #311: HuggingFace Hub discovery (search proxy). Sits next
diff --git a/src/hal0/api/routes/comfyui.py b/src/hal0/api/routes/comfyui.py
index 8942304b..149d6907 100644
--- a/src/hal0/api/routes/comfyui.py
+++ b/src/hal0/api/routes/comfyui.py
@@ -1,4 +1,4 @@
-"""Read-only ComfyUI "generation engine" status aggregator (+ gated switchover).
+"""ComfyUI "generation engine" status aggregator + control routes.
 
 The dashboard models ComfyUI as ONE containerized generation engine, not a list
 of per-model slots (a single run loads many cooperating models at once, and it
@@ -23,8 +23,7 @@
 in the background behind a 202; the ``switchover`` block on /status tracks the
 transition. The API no longer shells out — the ``/opt/comfyui`` control scripts
 stay on disk for manual ops only. ``POST /api/comfyui/pin`` toggles the
-arbiter's manual pin (blocks idle-restore). Both write paths stay feature-gated
-behind ``HAL0_COMFYUI_SWITCHOVER_ENABLED`` (501 when off).
+arbiter's manual pin (blocks idle-restore).
 """
 
 from __future__ import annotations
@@ -445,27 +444,6 @@ async def _run_switch(arbiter: Any, mode: str, *, pin: bool = False, force: bool
         _switch["target"] = None
 
 
-def _gate_closed() -> JSONResponse | None:
-    """501 when the operator hasn't enabled the GPU-switch write path."""
-    if os.environ.get("HAL0_COMFYUI_SWITCHOVER_ENABLED", "0") == "1":
-        return None
-    return JSONResponse(
-        status_code=501,
-        content={
-            "error": {
-                "code": "comfyui.switchover_disabled",
-                "message": (
-                    "ComfyUI switchover is disabled on this host. It stops "
-                    "the LLM stack (bots + memory extraction go dark) while "
-                    "generation holds the iGPU; set "
-                    "HAL0_COMFYUI_SWITCHOVER_ENABLED=1 on hal0-api to "
-                    "enable it."
-                ),
-            }
-        },
-    )
-
-
 def _arbiter_unavailable() -> JSONResponse:
     return JSONResponse(
         status_code=503,
@@ -490,13 +468,10 @@ async def comfyui_switchover(request: Request, background_tasks: BackgroundTasks
     background — track completion via the ``switchover`` block on
     ``GET /status``.
 
-    Stays gated behind ``HAL0_COMFYUI_SWITCHOVER_ENABLED`` because the switch
-    takes the LLM stack (bots + memory extraction) offline (an operator
-    decision per host), not because the path is unwired.
+    The endpoint is always available when the SlotManager/GpuArbiter is wired:
+    ComfyUI prompt submission can call it as the implicit handoff before
+    enqueueing a render.
     """
-    gate = _gate_closed()
-    if gate is not None:
-        return gate
     try:
         body = await request.json()
     except ValueError:
@@ -577,12 +552,9 @@ async def comfyui_switchover(request: Request, background_tasks: BackgroundTasks
 async def comfyui_pin(request: Request) -> JSONResponse:
     """Toggle the arbiter's manual pin (holds image mode against idle-restore).
 
-    Body: ``{"pinned": bool}``. Gated by the same env flag as the switchover —
-    pinning only matters when the GPU-switch write path is live.
+    Body: ``{"pinned": bool}``. Pinning disables idle auto-restore while image
+    mode is active.
     """
-    gate = _gate_closed()
-    if gate is not None:
-        return gate
     try:
         body = await request.json()
     except ValueError:
diff --git a/src/hal0/api/routes/health.py b/src/hal0/api/routes/health.py
index 0e5b2e4c..50d72ccd 100644
--- a/src/hal0/api/routes/health.py
+++ b/src/hal0/api/routes/health.py
@@ -16,7 +16,6 @@
 
 from __future__ import annotations
 
-import os
 import shutil
 from pathlib import Path
 from typing import Any
@@ -246,8 +245,7 @@ async def list_features(request: Request) -> dict[str, Any]:
 
     Flat ``feature → bool | str`` map:
 
-      - ``comfyui_switchover``: image-gen engine switchover is unlocked
-        (``HAL0_COMFYUI_SWITCHOVER_ENABLED=1``).
+      - ``comfyui_switchover``: image-gen engine switchover route is present.
       - ``memory``: a memory provider is wired (HAL0_MEMORY_ENABLED + a
         successful init).
       - ``memory_engine``: the configured engine name (``hindsight`` |
@@ -257,7 +255,7 @@ async def list_features(request: Request) -> dict[str, Any]:
         restart) — not implemented yet (pending ADR-0015), always false.
     """
     features: dict[str, Any] = {
-        "comfyui_switchover": os.environ.get("HAL0_COMFYUI_SWITCHOVER_ENABLED", "") == "1",
+        "comfyui_switchover": True,
         "memory": getattr(request.app.state, "memory_provider", None) is not None,
         "mcp_supervisor": False,
     }
diff --git a/src/hal0/slots/arbiter.py b/src/hal0/slots/arbiter.py
index 542c584f..75f86f34 100644
--- a/src/hal0/slots/arbiter.py
+++ b/src/hal0/slots/arbiter.py
@@ -21,9 +21,9 @@
     (cold-)started when it isn't already dispatchable.
   - ``restore_llm`` — ``POST /free`` to ComfyUI (drop its models from GTT),
     then reload the saved llm set. The container keeps serving the UI.
-  - actually *running* a generation in llm mode is refused at three gates:
-    the dispatch guard (``guard_dispatch``), and ComfyUI's own ``/prompt``
-    via the host-mounted ``hal0_gpu_gate`` custom node (403).
+  - direct ComfyUI ``/prompt`` submission goes through the host-mounted
+    ``hal0_gpu_gate`` custom node, which now asks hal0-api to enter image
+    mode before forwarding the prompt.
 
 Crash-recovery ordering (locked): the arbiter persists its target state to
 ``/var/lib/hal0/gpu_arbiter.json`` BEFORE the first unload, so an api
@@ -207,8 +207,8 @@ class GpuInferenceMode(Hal0Error):
     """Image dispatch refused — the GPU is serving the LLM set.
 
     The resident ComfyUI container stays READY in llm mode (its web UI is
-    always up), so this guard is what stops a generation from grabbing GTT
-    under the loaded LLM weights. Flip the switch first.
+    always up). The OpenAI-compatible image route switches before dispatch,
+    so this guard mainly protects lower-level/img-slot dispatch callers.
     """
 
     code = "gpu.inference_mode"
diff --git a/tests/api/test_comfyui_proxy.py b/tests/api/test_comfyui_proxy.py
index bf1e6b99..b94751f9 100644
--- a/tests/api/test_comfyui_proxy.py
+++ b/tests/api/test_comfyui_proxy.py
@@ -1,4 +1,4 @@
-"""Tests for the read-only ComfyUI status aggregator + gated switchover.
+"""Tests for the ComfyUI status aggregator + arbiter switchover.
 
 The dashboard's Image-Gen tab renders a single "generation engine" pane backed
 by ``GET /api/comfyui/status``. That endpoint aggregates three sources, all of
@@ -10,14 +10,13 @@
     currently owns the single iGPU
   - ComfyUI's own HTTP API (``/system_stats`` + ``/queue``) for live telemetry
 
-The switchover *write* path (POST /api/comfyui/switchover) is feature-gated
-behind ``HAL0_COMFYUI_SWITCHOVER_ENABLED`` (501 when off). When on it validates
-the target mode, no-ops if already there, refuses to drop a busy render queue
-without ``force``, and drives the GpuArbiter in the background behind a 202 —
-the ``switchover`` block on /status is what tracks the transition to terminal.
-No subprocess is ever spawned for the switch (the shell-script path is retired):
-tests install a stub arbiter on ``app.state.slot_manager`` and assert the
-arbiter calls, mirroring the patched status seams.
+The switchover *write* path (POST /api/comfyui/switchover) validates the target
+mode, no-ops if already there, refuses to drop a busy render queue without
+``force``, and drives the GpuArbiter in the background behind a 202 — the
+``switchover`` block on /status is what tracks the transition to terminal. No
+subprocess is ever spawned for the switch (the shell-script path is retired):
+tests install a stub arbiter on ``app.state.slot_manager`` and assert the arbiter
+calls, mirroring the patched status seams.
 """
 
 from __future__ import annotations
@@ -448,15 +447,9 @@ def test_switchover_arbiter_error_recorded(
 
 
 def test_pin_endpoint_toggles(client: TestClient, monkeypatch: pytest.MonkeyPatch) -> None:
-    # Same 501 feature gate as the switchover, then a plain set_pin passthrough
-    # that reflects the new value back.
+    # Plain set_pin passthrough that reflects the new value back.
     arb = _install_arbiter(client)
     r = client.post("/api/comfyui/pin", json={"pinned": True})
-    assert r.status_code == 501
-    arb.set_pin.assert_not_called()
-
-    monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "1")
-    r = client.post("/api/comfyui/pin", json={"pinned": True})
     assert r.status_code == 200
     assert r.json() == {"pinned": True}
     arb.set_pin.assert_called_once_with(True)
@@ -521,21 +514,22 @@ def test_switchover_rejects_unknown_mode(
     assert r.json()["error"]["code"] == "comfyui.invalid_mode"
 
 
-def test_switchover_gated_off_returns_501(client: TestClient) -> None:
-    # Default: the privileged root path is NOT wired. Must refuse, not pretend.
-    r = client.post("/api/comfyui/switchover", json={"mode": "generation"})
-    assert r.status_code == 501
-    assert "switchover" in r.json()["error"]["code"]
+def test_switchover_is_not_env_gated(client: TestClient) -> None:
+    # Prompt submission can implicitly ask for image mode, so the switchover
+    # route must not depend on HAL0_COMFYUI_SWITCHOVER_ENABLED.
+    arb = _install_arbiter(client)
+    c, s, f = _patch(container="absent", hermes=True, fetch=_fetch_down)
+    with c, s, f:
+        r = client.post("/api/comfyui/switchover", json={"mode": "generation"})
+    assert r.status_code == 202
+    arb.ensure_img.assert_awaited_once_with(pin=False)
 
 
-def test_switchover_enabled_flag_is_read_per_request(
-    client: TestClient, monkeypatch: pytest.MonkeyPatch
-) -> None:
-    # When the flag is on, the gate opens (even though the stub then reports the
-    # root path is still unimplemented — proves the flag is consulted live).
-    monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "1")
+def test_switchover_unwired_returns_503_not_501(client: TestClient) -> None:
+    client.app.state.slot_manager = None
     r = client.post("/api/comfyui/switchover", json={"mode": "generation"})
-    assert r.status_code != 501
+    assert r.status_code == 503
+    assert r.json()["error"]["code"] == "comfyui.arbiter_unavailable"
 
 
 # ── _container_state: podman-first probe (#710) ──────────────────────────────
diff --git a/tests/api/test_features_health.py b/tests/api/test_features_health.py
index f32a4188..91773884 100644
--- a/tests/api/test_features_health.py
+++ b/tests/api/test_features_health.py
@@ -26,14 +26,14 @@ def test_features_shape(client: TestClient) -> None:
     assert body["mcp_supervisor"] is False
 
 
-def test_features_comfyui_gate_reads_env(
+def test_features_comfyui_switchover_always_available(
     client: TestClient,
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
     monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "1")
     assert client.get("/api/features").json()["comfyui_switchover"] is True
     monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "0")
-    assert client.get("/api/features").json()["comfyui_switchover"] is False
+    assert client.get("/api/features").json()["comfyui_switchover"] is True
 
 
 def test_features_memory_reflects_state(client: TestClient) -> None:
diff --git a/tests/comfyui/test_hal0_gpu_gate.py b/tests/comfyui/test_hal0_gpu_gate.py
index 725a8f9e..e80c521b 100644
--- a/tests/comfyui/test_hal0_gpu_gate.py
+++ b/tests/comfyui/test_hal0_gpu_gate.py
@@ -1,10 +1,10 @@
-"""hal0_gpu_gate — the ComfyUI custom node that 403-blocks job submission
-while the iGPU is in inference (llm) mode.
+"""hal0_gpu_gate — the ComfyUI custom node that prepares image mode before
+job submission while the iGPU is in inference (llm) mode.
 
 The node ships in ``installer/comfyui/custom_nodes/hal0_gpu_gate.py`` and is
 host-mounted into the resident ComfyUI container's ``custom_nodes`` dir, so
 the web UI stays fully usable (editor, /object_info, workflow save/load)
-while only ``POST /prompt`` is gated on the arbiter mode.
+while only ``POST /prompt`` triggers a best-effort arbiter handoff.
 
 Only the pure decision logic is unit-tested here — the aiohttp middleware /
 PromptServer wiring needs a live ComfyUI process and is exercised in the
@@ -42,42 +42,59 @@ def test_node_file_exists_and_imports_outside_comfyui() -> None:
     assert mod.NODE_CLASS_MAPPINGS == {}
 
 
-def test_blocks_prompt_post_in_inference_mode() -> None:
+def test_prompt_post_in_inference_mode_prepares_but_does_not_block() -> None:
     mod = _load_node()
     status = {"mode": "inference"}
-    assert mod.should_block("POST", "/prompt", status) is True
-    # the frontend posts to /api/prompt on recent ComfyUI versions
-    assert mod.should_block("POST", "/api/prompt", status) is True
+    assert mod.should_prepare_image_mode("POST", "/prompt", status) is True
+    # The frontend posts to /api/prompt on recent ComfyUI versions.
+    assert mod.should_prepare_image_mode("POST", "/api/prompt", status) is True
+    # The old 403 gate is gone; the middleware prepares image mode and forwards.
+    assert mod.should_block("POST", "/prompt", status) is False
 
 
 def test_allows_prompt_post_in_generation_mode() -> None:
     mod = _load_node()
-    assert mod.should_block("POST", "/prompt", {"mode": "generation"}) is False
+    assert mod.should_prepare_image_mode("POST", "/prompt", {"mode": "generation"}) is False
 
 
 def test_fails_open_when_hal0_api_unreachable() -> None:
     """hal0-api down → status unknown → never brick standalone ComfyUI use."""
     mod = _load_node()
-    assert mod.should_block("POST", "/prompt", None) is False
-    assert mod.should_block("POST", "/prompt", {"unexpected": "shape"}) is False
+    assert mod.should_prepare_image_mode("POST", "/prompt", None) is False
+    assert mod.should_prepare_image_mode("POST", "/prompt", {"unexpected": "shape"}) is False
 
 
 def test_never_blocks_other_routes_or_methods() -> None:
     """Everything that makes the editor usable must always pass."""
     mod = _load_node()
     status = {"mode": "inference"}
-    assert mod.should_block("GET", "/prompt", status) is False
-    assert mod.should_block("POST", "/object_info", status) is False
-    assert mod.should_block("GET", "/queue", status) is False
-    assert mod.should_block("POST", "/upload/image", status) is False
+    assert mod.should_prepare_image_mode("GET", "/prompt", status) is False
+    assert mod.should_prepare_image_mode("POST", "/object_info", status) is False
+    assert mod.should_prepare_image_mode("GET", "/queue", status) is False
+    assert mod.should_prepare_image_mode("POST", "/upload/image", status) is False
 
 
-def test_gate_body_is_comfyui_frontend_renderable() -> None:
-    """403 body mirrors ComfyUI's /prompt error envelope so the frontend
-    surfaces the message instead of a generic failure toast."""
+def test_prepare_image_mode_requests_switchover_and_waits(monkeypatch) -> None:
     mod = _load_node()
-    body = mod.GATE_BODY
-    assert body["node_errors"] == {}
-    err = body["error"]
-    assert err["type"] == "hal0_gpu_gate"
-    assert "Image Gen" in err["message"]
+    statuses = iter(
+        [
+            {"mode": "inference"},
+            {"mode": "inference", "switchover": {"active": True, "target": "generation"}},
+            {"mode": "generation"},
+        ]
+    )
+    calls: list[str] = []
+
+    monkeypatch.setattr(mod, "_fetch_status", lambda: next(statuses))
+    monkeypatch.setattr(mod, "_post_switchover", lambda: calls.append("switch") or True)
+    monkeypatch.setattr(mod.time, "sleep", lambda _seconds: None)
+
+    mod.prepare_image_mode()
+    assert calls == ["switch"]
+
+
+def test_prepare_image_mode_fails_open_when_switchover_fails(monkeypatch) -> None:
+    mod = _load_node()
+    monkeypatch.setattr(mod, "_fetch_status", lambda: {"mode": "inference"})
+    monkeypatch.setattr(mod, "_post_switchover", lambda: False)
+    mod.prepare_image_mode()
diff --git a/tests/install/test_comfyui_scripts_shipped.py b/tests/install/test_comfyui_scripts_shipped.py
index 7af33ed4..2a028003 100644
--- a/tests/install/test_comfyui_scripts_shipped.py
+++ b/tests/install/test_comfyui_scripts_shipped.py
@@ -16,6 +16,7 @@
 # Repo root = three levels up from tests/install/
 REPO = Path(__file__).parent.parent.parent
 SCRIPTS_DIR = REPO / "installer" / "comfyui" / "scripts"
+CUSTOM_NODES_DIR = REPO / "installer" / "comfyui" / "custom_nodes"
 COMFYUI_PY = REPO / "src" / "hal0" / "api" / "routes" / "comfyui.py"
 INSTALL_SH = REPO / "installer" / "install.sh"
 
@@ -101,3 +102,10 @@ def test_install_sh_uses_install_command_for_scripts():
         or "COMFYUI_DIR" in content
     )
     assert has_install_block, "install.sh does not contain 'install -d /opt/comfyui' or equivalent"
+
+
+def test_install_sh_places_comfyui_custom_nodes():
+    content = INSTALL_SH.read_text()
+    assert (CUSTOM_NODES_DIR / "hal0_gpu_gate.py").exists()
+    assert "installer/comfyui/custom_nodes" in content or "comfyui/custom_nodes" in content
+    assert "COMFYUI_MODELS_ROOT}/custom_nodes" in content
diff --git a/ui/src/api/endpoints.ts b/ui/src/api/endpoints.ts
index f5dbbf87..69eeec4d 100644
--- a/ui/src/api/endpoints.ts
+++ b/ui/src/api/endpoints.ts
@@ -11,12 +11,10 @@ export const ENDPOINTS = {
   slots: '/api/slots',
 
   // ── ComfyUI generation engine (slots-page Image-Gen tab) ─────────
-  // Read-only aggregate of docker + systemd + ComfyUI HTTP; the
-  // switchover write-path is feature-gated server-side.
+  // Read-only aggregate of docker + systemd + ComfyUI HTTP.
   comfyuiStatus: '/api/comfyui/status',
   comfyuiSwitchover: '/api/comfyui/switchover',
-  // Pin image mode (disables the arbiter's idle auto-restore). 501 when the
-  // switchover gate is off.
+  // Pin image mode (disables the arbiter's idle auto-restore).
   comfyuiPin: '/api/comfyui/pin',
   // Render control
   comfyuiRenderCancel: '/api/comfyui/render/cancel',
diff --git a/ui/src/api/hooks/useComfyui.ts b/ui/src/api/hooks/useComfyui.ts
index 2db824b6..ea2bc2ee 100644
--- a/ui/src/api/hooks/useComfyui.ts
+++ b/ui/src/api/hooks/useComfyui.ts
@@ -13,9 +13,7 @@
 // arbiter runs the transition in-process; the `switchover` block on /status
 // (active/target/error) is what tracks the transition to terminal — the pane's
 // poll renders it, per the async-job-must-poll-to-terminal rule. Tearing down a
-// non-empty queue needs `force: true` (the confirm dialog is that consent). The
-// whole path stays feature-gated server-side (HAL0_COMFYUI_SWITCHOVER_ENABLED,
-// 501 when off) — surfaced as a toast, never an optimistic flip.
+// non-empty queue needs `force: true` (the confirm dialog is that consent).
 //
 // `arbiter` is the arbiter-truth block ({mode img|llm, pinned, saved slots,
 // idle_restore_at}); it is null when the arbiter is unavailable (gate off /
@@ -120,8 +118,8 @@ export interface SwitchoverBody {
   pin?: boolean
 }
 
-// raw:true so the dev mockFetch GET-shim can't mask the 501/503 gate — we want
-// the real status code to drive the toast copy.
+// raw:true so the dev mockFetch GET-shim can't mask 503 arbiter failures — we
+// want the real status code to drive the toast copy.
 export function useComfyuiSwitchover() {
   return useMutation({
     mutationFn: (body: SwitchoverBody) =>
@@ -131,7 +129,7 @@ export function useComfyuiSwitchover() {
 
 // Pin / unpin image mode (disables / re-arms the arbiter's idle auto-restore).
 // Synchronous 200 {"pinned":bool} — the caller refetches /status to reflect
-// the new arbiter state. raw:true for the same 501-gate reason as switchover.
+// the new arbiter state. raw:true for the same failure handling as switchover.
 export function useComfyuiPin() {
   return useMutation({
     mutationFn: (body: { pinned: boolean }) =>
diff --git a/ui/tests/e2e/specs/mobile-nav-v3.spec.ts b/ui/tests/e2e/specs/mobile-nav-v3.spec.ts
index f9d8cb12..95746718 100644
--- a/ui/tests/e2e/specs/mobile-nav-v3.spec.ts
+++ b/ui/tests/e2e/specs/mobile-nav-v3.spec.ts
@@ -1,9 +1,9 @@
 /**
  * mobile-nav-v3 — at ≤720px the desktop sidebar is hidden and navigation
  * moves into a top-right hamburger (.tb-menu) that opens a slide-in
- * NavDrawer: command-palette launcher (folded in from the topbar), the full
- * nav (useNavItems — incl. Logs + MCP the old bottom-tabs never reached),
- * and the runtime status widget. Replaces the never-shown bottom-tab bar.
+ * NavDrawer: quick-actions launcher (folded in from the topbar) and the full
+ * nav (useNavItems — incl. Logs + MCP the old bottom-tabs never reached).
+ * Replaces the never-shown bottom-tab bar.
  */
 import { test, expect } from '../fixtures/apiMock'
 
@@ -48,8 +48,8 @@ test.describe('Mobile nav drawer (≤720px)', () => {
     await expect(drawer.locator('.sb-row .lbl', { hasText: 'Connections' })).toHaveCount(0)
     // command palette folded into the drawer on mobile
     await expect(drawer.locator('.nav-drawer-cmdk')).toBeVisible()
-    // runtime widget re-shown inside the drawer (despite the 1080px global hide)
-    await expect(drawer.locator('.sb-status').first()).toBeVisible()
+    // runtime health lives in the footer, not the drawer/sidebar.
+    await expect(drawer.locator('.sb-status')).toHaveCount(0)
   })
 
   test('selecting a destination navigates and closes the drawer', async ({ page }) => {