From 5de5f9f5de4eefdaf50a77adb6e3fffa13205409 Mon Sep 17 00:00:00 2001 From: Alexander Date: Wed, 17 Jun 2026 07:51:57 -0400 Subject: [PATCH] fix(comfyui): autoswitch before prompt submit --- docs/guides/configure.mdx | 2 +- docs/guides/generate-images.mdx | 25 +--- .../comfyui/custom_nodes/hal0_gpu_gate.py | 126 +++++++++++++----- installer/install.sh | 8 ++ scripts/deploy.sh | 15 +++ src/hal0/api/__init__.py | 2 +- src/hal0/api/routes/comfyui.py | 42 +----- src/hal0/api/routes/health.py | 6 +- src/hal0/slots/arbiter.py | 10 +- tests/api/test_comfyui_proxy.py | 50 +++---- tests/api/test_features_health.py | 4 +- tests/comfyui/test_hal0_gpu_gate.py | 61 ++++++--- tests/install/test_comfyui_scripts_shipped.py | 8 ++ ui/src/api/endpoints.ts | 6 +- ui/src/api/hooks/useComfyui.ts | 10 +- ui/tests/e2e/specs/mobile-nav-v3.spec.ts | 10 +- 16 files changed, 219 insertions(+), 166 deletions(-) diff --git a/docs/guides/configure.mdx b/docs/guides/configure.mdx index 229bae55..28fa7b80 100644 --- a/docs/guides/configure.mdx +++ b/docs/guides/configure.mdx @@ -23,7 +23,7 @@ with it. | File | What it holds | |------|---------------| | `hal0.toml` | Top-level config: `[meta] schema_version`, `[slots] port_range_start/end`, `[models] store`, telemetry channel. | -| `api.env` | Environment for the hal0-api service (systemd `EnvironmentFile`) — feature flags and env knobs like `HAL0_MODEL_STORE`, `HAL0_COMFYUI_SWITCHOVER_ENABLED`, `HAL0_MCP_ALLOWED_HOSTS`. | +| `api.env` | Environment for the hal0-api service (systemd `EnvironmentFile`) — feature flags and env knobs like `HAL0_MODEL_STORE`, `HAL0_MCP_ALLOWED_HOSTS`. | | `upstreams.toml` | External / upstream provider routing. | | `providers.toml` | Provider credential references (key **names**, never plaintext secrets). | | `profiles.toml` | Optional profile catalogue. Absent → built-in seed profiles are used. | diff --git a/docs/guides/generate-images.mdx b/docs/guides/generate-images.mdx index 931c8418..d3c3beef 100644 --- a/docs/guides/generate-images.mdx +++ b/docs/guides/generate-images.mdx @@ -21,9 +21,9 @@ reloaded. ## Read the generation-engine status @@ -86,8 +86,7 @@ Body fields: Responses: **202** (`switching`, runs in the background — poll the `switchover` block on `/status`), **200** (`noop`, already in the target mode), **409** (switch already in flight, or a busy queue without -`force`), **501** (switchover disabled on this host), **503** (arbiter not -wired). +`force`), **503** (arbiter not wired). Pin or unpin image mode independently: @@ -97,20 +96,6 @@ curl -X POST http://localhost:8080/api/comfyui/pin \ -d '{"pinned": true}' ``` -### Enable the switchover - -Both write paths (`/switchover` and `/pin`) stay gated behind an -environment flag because the switch takes the LLM stack offline — an -operator decision per host. Set it on hal0-api: - -```sh -HAL0_COMFYUI_SWITCHOVER_ENABLED=1 -``` - -Without it, both endpoints answer **501** (`comfyui.switchover_disabled`). -See [Configure](/docs/guides/configure) for where to set environment -keys (`api.env`). - ## Generate an image The OpenAI-compatible endpoint translates your request into a ComfyUI @@ -170,5 +155,5 @@ caps a single render at 10 minutes before it gives up. ## See also - [Manage slots](/docs/guides/manage-slots) — the `img` slot lifecycle. -- [Configure](/docs/guides/configure) — set `HAL0_COMFYUI_SWITCHOVER_ENABLED` and the `[image]` slot defaults. +- [Configure](/docs/guides/configure) — set the `[image]` slot defaults. - [Observe the system](/docs/guides/logs-and-activity) — tail `hal0-slot@img`. diff --git a/installer/comfyui/custom_nodes/hal0_gpu_gate.py b/installer/comfyui/custom_nodes/hal0_gpu_gate.py index 10b0766d..785c4d9b 100644 --- a/installer/comfyui/custom_nodes/hal0_gpu_gate.py +++ b/installer/comfyui/custom_nodes/hal0_gpu_gate.py @@ -1,12 +1,12 @@ -"""hal0 GPU gate — ComfyUI custom node that 403-blocks job submission while -the Strix Halo iGPU is serving the LLM stack. +"""hal0 GPU prepare hook — asks hal0 to enter image mode before job submit. The ComfyUI container is RESIDENT on hal0 (its web UI stays up in both GPU -modes so users can build workflows/prompts any time), but GPU memory is -exclusive: running a generation while the LLM slots hold GTT would OOM or -evict them. hal0-api's GpuArbiter guards its own dispatch path, and this -middleware closes the remaining door — the web UI's own "Queue Prompt" -(``POST /prompt`` / ``POST /api/prompt``), which goes straight to ComfyUI. +modes so users can build workflows/prompts any time), and queueing a prompt is +now allowed from either mode. When the web UI's own "Queue Prompt" +(``POST /prompt`` / ``POST /api/prompt``) arrives while hal0 still reports +inference mode, this middleware asks hal0-api's GpuArbiter to switch to image +mode and waits briefly for the switch to complete before allowing ComfyUI to +continue. Deployment: this single file is dropped into the host-mounted ``custom_nodes`` dir (``/mnt/ai-models/comfyui/custom_nodes/``) — no image @@ -14,15 +14,16 @@ middleware to the PromptServer app (the same mechanism ComfyUI-Login uses). The container runs with host networking, so hal0-api is on loopback. -Fail-open by design: if hal0-api is unreachable the gate allows the prompt — -a broken hal0 must never brick standalone ComfyUI use. The pure decision -logic (``should_block``) is unit-tested in the hal0 repo +Fail-open by design: if hal0-api is unreachable or the switch request fails, +the hook allows the prompt — a broken hal0 must never brick standalone ComfyUI +use. The pure decision logic (``should_prepare_image_mode``) is unit-tested in the hal0 repo (tests/comfyui/test_hal0_gpu_gate.py); the aiohttp wiring is exercised by the CT105 live verification. """ import json import os +import time import urllib.error import urllib.request @@ -31,41 +32,52 @@ HAL0_STATUS_URL = os.environ.get( "HAL0_COMFYUI_STATUS_URL", "http://127.0.0.1:8080/api/comfyui/status" ) +HAL0_SWITCHOVER_URL = os.environ.get( + "HAL0_COMFYUI_SWITCHOVER_URL", "http://127.0.0.1:8080/api/comfyui/switchover" +) _STATUS_TIMEOUT_S = 2.0 + +def _float_env(name: str, default: float) -> float: + try: + return float(os.environ.get(name, str(default))) + except ValueError: + return default + + +_SWITCH_TIMEOUT_S = _float_env("HAL0_COMFYUI_SWITCH_TIMEOUT_S", 120.0) +_SWITCH_POLL_S = _float_env("HAL0_COMFYUI_SWITCH_POLL_S", 0.5) + #: Job-submission routes — and ONLY those. Everything the editor needs #: (/object_info, /queue GET, workflow save/load, uploads) always passes. _BLOCK_PATHS = frozenset({"/prompt", "/api/prompt"}) -#: Mirrors ComfyUI's own /prompt error envelope so the frontend renders the -#: message instead of a generic failure toast. -GATE_BODY = { - "error": { - "type": "hal0_gpu_gate", - "message": ( - "The GPU is in inference mode (LLM slots loaded). Flip the " - "Image Gen switch in the hal0 dashboard, then queue again." - ), - "details": "hal0 GpuArbiter mode is 'inference'; generation is gated.", - "extra_info": {}, - }, - "node_errors": {}, -} +def _is_prompt_submit(method: str, path: str) -> bool: + return method == "POST" and path in _BLOCK_PATHS -def should_block(method: str, path: str, status: "dict | None") -> bool: - """True iff this request is a job submission while the GPU serves LLMs. +def should_prepare_image_mode(method: str, path: str, status: "dict | None") -> bool: + """True iff this prompt submit should ask hal0 to enter image mode. ``status`` is hal0-api's /api/comfyui/status JSON (or None when unreachable / unparseable → fail-open). """ - if method != "POST" or path not in _BLOCK_PATHS: + if not _is_prompt_submit(method, path): return False if not isinstance(status, dict): return False return status.get("mode") == "inference" +def should_block(method: str, path: str, status: "dict | None") -> bool: + """Backward-compatible name for older tests/imports. + + Prompt submission is no longer blocked by this custom node; it prepares + image mode best-effort, then lets ComfyUI handle the prompt. + """ + return False + + def _fetch_status() -> "dict | None": """Blocking status fetch via urllib (run in a thread by the middleware). @@ -80,8 +92,55 @@ def _fetch_status() -> "dict | None": return None +def _post_switchover() -> bool: + """Ask hal0-api to enter generation mode; True means accepted/noop. + + stdlib-only on purpose: custom nodes can't assume extra deps in the + ComfyUI venv. + """ + body = json.dumps({"mode": "generation"}).encode("utf-8") + req = urllib.request.Request( + HAL0_SWITCHOVER_URL, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=_STATUS_TIMEOUT_S) as resp: + return resp.status in (200, 202) + except urllib.error.HTTPError as exc: + # 409 can mean a switch is already in flight. Poll status below. + return exc.code == 409 + except (urllib.error.URLError, OSError, ValueError): + return False + + +def prepare_image_mode() -> None: + """Best-effort inference → generation handoff before forwarding /prompt. + + Fail-open: exceptions and timeouts return without blocking the prompt. + """ + status = _fetch_status() + if not should_prepare_image_mode("POST", "/prompt", status): + return + if not _post_switchover(): + return + + deadline = time.monotonic() + max(0.0, _SWITCH_TIMEOUT_S) + while time.monotonic() < deadline: + status = _fetch_status() + if not isinstance(status, dict): + return + if status.get("mode") == "generation": + return + sw = status.get("switchover") + if isinstance(sw, dict) and sw.get("error"): + return + time.sleep(max(0.05, _SWITCH_POLL_S)) + + def _install() -> None: - """Attach the gate middleware to ComfyUI's PromptServer (fail-soft).""" + """Attach the prepare middleware to ComfyUI's PromptServer (fail-soft).""" try: import asyncio @@ -90,14 +149,15 @@ def _install() -> None: @web.middleware async def hal0_gpu_gate_middleware(request, handler): - if request.method == "POST" and request.path in _BLOCK_PATHS: - status = await asyncio.to_thread(_fetch_status) - if should_block(request.method, request.path, status): - return web.json_response(GATE_BODY, status=403) + if _is_prompt_submit(request.method, request.path): + await asyncio.to_thread(prepare_image_mode) return await handler(request) PromptServer.instance.app.middlewares.append(hal0_gpu_gate_middleware) - print(f"[hal0_gpu_gate] /prompt gated on hal0 GPU mode ({HAL0_STATUS_URL})") + print( + "[hal0_gpu_gate] /prompt prepares hal0 image mode " + f"({HAL0_STATUS_URL} → {HAL0_SWITCHOVER_URL})" + ) except Exception as exc: # outside ComfyUI (unit tests) or API drift print(f"[hal0_gpu_gate] not installed: {exc}") diff --git a/installer/install.sh b/installer/install.sh index 10feed9f..57e28e9a 100755 --- a/installer/install.sh +++ b/installer/install.sh @@ -953,6 +953,7 @@ done ui_step "ComfyUI control scripts" COMFYUI_SCRIPTS_SRC="${REPO_ROOT}/installer/comfyui/scripts" +COMFYUI_CUSTOM_NODES_SRC="${REPO_ROOT}/installer/comfyui/custom_nodes" COMFYUI_DIR="/opt/comfyui" COMFYUI_MODELS_ROOT="/mnt/ai-models/comfyui" @@ -973,6 +974,13 @@ else done info "ensured ${COMFYUI_MODELS_ROOT}/{models,output,input,user,custom_nodes}" + if [[ -d "${COMFYUI_CUSTOM_NODES_SRC}" ]]; then + install -m0644 "${COMFYUI_CUSTOM_NODES_SRC}"/*.py "${COMFYUI_MODELS_ROOT}/custom_nodes/" + info "wrote ComfyUI custom nodes → ${COMFYUI_MODELS_ROOT}/custom_nodes/" + else + warn "${COMFYUI_CUSTOM_NODES_SRC} not found — ComfyUI custom nodes not installed" + fi + # Place extra_model_paths.yaml if not already present (operator may have a # customised copy — never overwrite). _EXTRA_PATHS_SRC="${REPO_ROOT}/installer/comfyui/extra_model_paths.yaml" diff --git a/scripts/deploy.sh b/scripts/deploy.sh index b6affd8b..9e0eb241 100755 --- a/scripts/deploy.sh +++ b/scripts/deploy.sh @@ -145,6 +145,21 @@ elif [[ "$DO_BUILD" -eq 0 ]]; then warn "skipping UI build (--no-build)" fi +# ── 2b. Sync runtime-mounted ComfyUI custom nodes ───────────────────────────── +# ComfyUI imports custom nodes from the persistent model share, not the source +# checkout. Keep shipped hal0 nodes in sync during runtime deploys; the ComfyUI +# slot still needs a restart to import changed node code. +comfy_nodes_src="${REPO_ROOT}/installer/comfyui/custom_nodes" +comfy_nodes_dst="${HAL0_COMFYUI_CUSTOM_NODES_DIR:-/mnt/ai-models/comfyui/custom_nodes}" +if [[ -d "$comfy_nodes_src" ]]; then + if install -d "$comfy_nodes_dst" 2>/dev/null \ + && install -m0644 "$comfy_nodes_src"/*.py "$comfy_nodes_dst"/ 2>/dev/null; then + info "ComfyUI custom nodes synced → ${comfy_nodes_dst}" + else + warn "could not sync ComfyUI custom nodes to ${comfy_nodes_dst}" + fi +fi + # ── 3. Re-assert group-shared ownership ─────────────────────────────────────── # The reset + build above just (re)created files as the deploying user. Hand the # tree back to the shared group so the hal0 service user (Hermes, agents) can diff --git a/src/hal0/api/__init__.py b/src/hal0/api/__init__.py index 5b56f055..f006f209 100644 --- a/src/hal0/api/__init__.py +++ b/src/hal0/api/__init__.py @@ -1105,7 +1105,7 @@ def create_app() -> FastAPI: ) app.include_router(slots.router, prefix="/api/slots", tags=["slots"]) # Read-only ComfyUI "generation engine" status for the slots-page Image-Gen - # tab (docker + systemd + ComfyUI HTTP), plus the feature-gated switchover. + # tab (docker + systemd + ComfyUI HTTP), plus arbiter switchover controls. app.include_router(comfyui.router, prefix="/api/comfyui", tags=["comfyui"]) app.include_router(models.router, prefix="/api/models", tags=["models"]) # Issue #311: HuggingFace Hub discovery (search proxy). Sits next diff --git a/src/hal0/api/routes/comfyui.py b/src/hal0/api/routes/comfyui.py index 8942304b..149d6907 100644 --- a/src/hal0/api/routes/comfyui.py +++ b/src/hal0/api/routes/comfyui.py @@ -1,4 +1,4 @@ -"""Read-only ComfyUI "generation engine" status aggregator (+ gated switchover). +"""ComfyUI "generation engine" status aggregator + control routes. The dashboard models ComfyUI as ONE containerized generation engine, not a list of per-model slots (a single run loads many cooperating models at once, and it @@ -23,8 +23,7 @@ in the background behind a 202; the ``switchover`` block on /status tracks the transition. The API no longer shells out — the ``/opt/comfyui`` control scripts stay on disk for manual ops only. ``POST /api/comfyui/pin`` toggles the -arbiter's manual pin (blocks idle-restore). Both write paths stay feature-gated -behind ``HAL0_COMFYUI_SWITCHOVER_ENABLED`` (501 when off). +arbiter's manual pin (blocks idle-restore). """ from __future__ import annotations @@ -445,27 +444,6 @@ async def _run_switch(arbiter: Any, mode: str, *, pin: bool = False, force: bool _switch["target"] = None -def _gate_closed() -> JSONResponse | None: - """501 when the operator hasn't enabled the GPU-switch write path.""" - if os.environ.get("HAL0_COMFYUI_SWITCHOVER_ENABLED", "0") == "1": - return None - return JSONResponse( - status_code=501, - content={ - "error": { - "code": "comfyui.switchover_disabled", - "message": ( - "ComfyUI switchover is disabled on this host. It stops " - "the LLM stack (bots + memory extraction go dark) while " - "generation holds the iGPU; set " - "HAL0_COMFYUI_SWITCHOVER_ENABLED=1 on hal0-api to " - "enable it." - ), - } - }, - ) - - def _arbiter_unavailable() -> JSONResponse: return JSONResponse( status_code=503, @@ -490,13 +468,10 @@ async def comfyui_switchover(request: Request, background_tasks: BackgroundTasks background — track completion via the ``switchover`` block on ``GET /status``. - Stays gated behind ``HAL0_COMFYUI_SWITCHOVER_ENABLED`` because the switch - takes the LLM stack (bots + memory extraction) offline (an operator - decision per host), not because the path is unwired. + The endpoint is always available when the SlotManager/GpuArbiter is wired: + ComfyUI prompt submission can call it as the implicit handoff before + enqueueing a render. """ - gate = _gate_closed() - if gate is not None: - return gate try: body = await request.json() except ValueError: @@ -577,12 +552,9 @@ async def comfyui_switchover(request: Request, background_tasks: BackgroundTasks async def comfyui_pin(request: Request) -> JSONResponse: """Toggle the arbiter's manual pin (holds image mode against idle-restore). - Body: ``{"pinned": bool}``. Gated by the same env flag as the switchover — - pinning only matters when the GPU-switch write path is live. + Body: ``{"pinned": bool}``. Pinning disables idle auto-restore while image + mode is active. """ - gate = _gate_closed() - if gate is not None: - return gate try: body = await request.json() except ValueError: diff --git a/src/hal0/api/routes/health.py b/src/hal0/api/routes/health.py index 0e5b2e4c..50d72ccd 100644 --- a/src/hal0/api/routes/health.py +++ b/src/hal0/api/routes/health.py @@ -16,7 +16,6 @@ from __future__ import annotations -import os import shutil from pathlib import Path from typing import Any @@ -246,8 +245,7 @@ async def list_features(request: Request) -> dict[str, Any]: Flat ``feature → bool | str`` map: - - ``comfyui_switchover``: image-gen engine switchover is unlocked - (``HAL0_COMFYUI_SWITCHOVER_ENABLED=1``). + - ``comfyui_switchover``: image-gen engine switchover route is present. - ``memory``: a memory provider is wired (HAL0_MEMORY_ENABLED + a successful init). - ``memory_engine``: the configured engine name (``hindsight`` | @@ -257,7 +255,7 @@ async def list_features(request: Request) -> dict[str, Any]: restart) — not implemented yet (pending ADR-0015), always false. """ features: dict[str, Any] = { - "comfyui_switchover": os.environ.get("HAL0_COMFYUI_SWITCHOVER_ENABLED", "") == "1", + "comfyui_switchover": True, "memory": getattr(request.app.state, "memory_provider", None) is not None, "mcp_supervisor": False, } diff --git a/src/hal0/slots/arbiter.py b/src/hal0/slots/arbiter.py index 542c584f..75f86f34 100644 --- a/src/hal0/slots/arbiter.py +++ b/src/hal0/slots/arbiter.py @@ -21,9 +21,9 @@ (cold-)started when it isn't already dispatchable. - ``restore_llm`` — ``POST /free`` to ComfyUI (drop its models from GTT), then reload the saved llm set. The container keeps serving the UI. - - actually *running* a generation in llm mode is refused at three gates: - the dispatch guard (``guard_dispatch``), and ComfyUI's own ``/prompt`` - via the host-mounted ``hal0_gpu_gate`` custom node (403). + - direct ComfyUI ``/prompt`` submission goes through the host-mounted + ``hal0_gpu_gate`` custom node, which now asks hal0-api to enter image + mode before forwarding the prompt. Crash-recovery ordering (locked): the arbiter persists its target state to ``/var/lib/hal0/gpu_arbiter.json`` BEFORE the first unload, so an api @@ -207,8 +207,8 @@ class GpuInferenceMode(Hal0Error): """Image dispatch refused — the GPU is serving the LLM set. The resident ComfyUI container stays READY in llm mode (its web UI is - always up), so this guard is what stops a generation from grabbing GTT - under the loaded LLM weights. Flip the switch first. + always up). The OpenAI-compatible image route switches before dispatch, + so this guard mainly protects lower-level/img-slot dispatch callers. """ code = "gpu.inference_mode" diff --git a/tests/api/test_comfyui_proxy.py b/tests/api/test_comfyui_proxy.py index bf1e6b99..b94751f9 100644 --- a/tests/api/test_comfyui_proxy.py +++ b/tests/api/test_comfyui_proxy.py @@ -1,4 +1,4 @@ -"""Tests for the read-only ComfyUI status aggregator + gated switchover. +"""Tests for the ComfyUI status aggregator + arbiter switchover. The dashboard's Image-Gen tab renders a single "generation engine" pane backed by ``GET /api/comfyui/status``. That endpoint aggregates three sources, all of @@ -10,14 +10,13 @@ currently owns the single iGPU - ComfyUI's own HTTP API (``/system_stats`` + ``/queue``) for live telemetry -The switchover *write* path (POST /api/comfyui/switchover) is feature-gated -behind ``HAL0_COMFYUI_SWITCHOVER_ENABLED`` (501 when off). When on it validates -the target mode, no-ops if already there, refuses to drop a busy render queue -without ``force``, and drives the GpuArbiter in the background behind a 202 — -the ``switchover`` block on /status is what tracks the transition to terminal. -No subprocess is ever spawned for the switch (the shell-script path is retired): -tests install a stub arbiter on ``app.state.slot_manager`` and assert the -arbiter calls, mirroring the patched status seams. +The switchover *write* path (POST /api/comfyui/switchover) validates the target +mode, no-ops if already there, refuses to drop a busy render queue without +``force``, and drives the GpuArbiter in the background behind a 202 — the +``switchover`` block on /status is what tracks the transition to terminal. No +subprocess is ever spawned for the switch (the shell-script path is retired): +tests install a stub arbiter on ``app.state.slot_manager`` and assert the arbiter +calls, mirroring the patched status seams. """ from __future__ import annotations @@ -448,15 +447,9 @@ def test_switchover_arbiter_error_recorded( def test_pin_endpoint_toggles(client: TestClient, monkeypatch: pytest.MonkeyPatch) -> None: - # Same 501 feature gate as the switchover, then a plain set_pin passthrough - # that reflects the new value back. + # Plain set_pin passthrough that reflects the new value back. arb = _install_arbiter(client) r = client.post("/api/comfyui/pin", json={"pinned": True}) - assert r.status_code == 501 - arb.set_pin.assert_not_called() - - monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "1") - r = client.post("/api/comfyui/pin", json={"pinned": True}) assert r.status_code == 200 assert r.json() == {"pinned": True} arb.set_pin.assert_called_once_with(True) @@ -521,21 +514,22 @@ def test_switchover_rejects_unknown_mode( assert r.json()["error"]["code"] == "comfyui.invalid_mode" -def test_switchover_gated_off_returns_501(client: TestClient) -> None: - # Default: the privileged root path is NOT wired. Must refuse, not pretend. - r = client.post("/api/comfyui/switchover", json={"mode": "generation"}) - assert r.status_code == 501 - assert "switchover" in r.json()["error"]["code"] +def test_switchover_is_not_env_gated(client: TestClient) -> None: + # Prompt submission can implicitly ask for image mode, so the switchover + # route must not depend on HAL0_COMFYUI_SWITCHOVER_ENABLED. + arb = _install_arbiter(client) + c, s, f = _patch(container="absent", hermes=True, fetch=_fetch_down) + with c, s, f: + r = client.post("/api/comfyui/switchover", json={"mode": "generation"}) + assert r.status_code == 202 + arb.ensure_img.assert_awaited_once_with(pin=False) -def test_switchover_enabled_flag_is_read_per_request( - client: TestClient, monkeypatch: pytest.MonkeyPatch -) -> None: - # When the flag is on, the gate opens (even though the stub then reports the - # root path is still unimplemented — proves the flag is consulted live). - monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "1") +def test_switchover_unwired_returns_503_not_501(client: TestClient) -> None: + client.app.state.slot_manager = None r = client.post("/api/comfyui/switchover", json={"mode": "generation"}) - assert r.status_code != 501 + assert r.status_code == 503 + assert r.json()["error"]["code"] == "comfyui.arbiter_unavailable" # ── _container_state: podman-first probe (#710) ────────────────────────────── diff --git a/tests/api/test_features_health.py b/tests/api/test_features_health.py index f32a4188..91773884 100644 --- a/tests/api/test_features_health.py +++ b/tests/api/test_features_health.py @@ -26,14 +26,14 @@ def test_features_shape(client: TestClient) -> None: assert body["mcp_supervisor"] is False -def test_features_comfyui_gate_reads_env( +def test_features_comfyui_switchover_always_available( client: TestClient, monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "1") assert client.get("/api/features").json()["comfyui_switchover"] is True monkeypatch.setenv("HAL0_COMFYUI_SWITCHOVER_ENABLED", "0") - assert client.get("/api/features").json()["comfyui_switchover"] is False + assert client.get("/api/features").json()["comfyui_switchover"] is True def test_features_memory_reflects_state(client: TestClient) -> None: diff --git a/tests/comfyui/test_hal0_gpu_gate.py b/tests/comfyui/test_hal0_gpu_gate.py index 725a8f9e..e80c521b 100644 --- a/tests/comfyui/test_hal0_gpu_gate.py +++ b/tests/comfyui/test_hal0_gpu_gate.py @@ -1,10 +1,10 @@ -"""hal0_gpu_gate — the ComfyUI custom node that 403-blocks job submission -while the iGPU is in inference (llm) mode. +"""hal0_gpu_gate — the ComfyUI custom node that prepares image mode before +job submission while the iGPU is in inference (llm) mode. The node ships in ``installer/comfyui/custom_nodes/hal0_gpu_gate.py`` and is host-mounted into the resident ComfyUI container's ``custom_nodes`` dir, so the web UI stays fully usable (editor, /object_info, workflow save/load) -while only ``POST /prompt`` is gated on the arbiter mode. +while only ``POST /prompt`` triggers a best-effort arbiter handoff. Only the pure decision logic is unit-tested here — the aiohttp middleware / PromptServer wiring needs a live ComfyUI process and is exercised in the @@ -42,42 +42,59 @@ def test_node_file_exists_and_imports_outside_comfyui() -> None: assert mod.NODE_CLASS_MAPPINGS == {} -def test_blocks_prompt_post_in_inference_mode() -> None: +def test_prompt_post_in_inference_mode_prepares_but_does_not_block() -> None: mod = _load_node() status = {"mode": "inference"} - assert mod.should_block("POST", "/prompt", status) is True - # the frontend posts to /api/prompt on recent ComfyUI versions - assert mod.should_block("POST", "/api/prompt", status) is True + assert mod.should_prepare_image_mode("POST", "/prompt", status) is True + # The frontend posts to /api/prompt on recent ComfyUI versions. + assert mod.should_prepare_image_mode("POST", "/api/prompt", status) is True + # The old 403 gate is gone; the middleware prepares image mode and forwards. + assert mod.should_block("POST", "/prompt", status) is False def test_allows_prompt_post_in_generation_mode() -> None: mod = _load_node() - assert mod.should_block("POST", "/prompt", {"mode": "generation"}) is False + assert mod.should_prepare_image_mode("POST", "/prompt", {"mode": "generation"}) is False def test_fails_open_when_hal0_api_unreachable() -> None: """hal0-api down → status unknown → never brick standalone ComfyUI use.""" mod = _load_node() - assert mod.should_block("POST", "/prompt", None) is False - assert mod.should_block("POST", "/prompt", {"unexpected": "shape"}) is False + assert mod.should_prepare_image_mode("POST", "/prompt", None) is False + assert mod.should_prepare_image_mode("POST", "/prompt", {"unexpected": "shape"}) is False def test_never_blocks_other_routes_or_methods() -> None: """Everything that makes the editor usable must always pass.""" mod = _load_node() status = {"mode": "inference"} - assert mod.should_block("GET", "/prompt", status) is False - assert mod.should_block("POST", "/object_info", status) is False - assert mod.should_block("GET", "/queue", status) is False - assert mod.should_block("POST", "/upload/image", status) is False + assert mod.should_prepare_image_mode("GET", "/prompt", status) is False + assert mod.should_prepare_image_mode("POST", "/object_info", status) is False + assert mod.should_prepare_image_mode("GET", "/queue", status) is False + assert mod.should_prepare_image_mode("POST", "/upload/image", status) is False -def test_gate_body_is_comfyui_frontend_renderable() -> None: - """403 body mirrors ComfyUI's /prompt error envelope so the frontend - surfaces the message instead of a generic failure toast.""" +def test_prepare_image_mode_requests_switchover_and_waits(monkeypatch) -> None: mod = _load_node() - body = mod.GATE_BODY - assert body["node_errors"] == {} - err = body["error"] - assert err["type"] == "hal0_gpu_gate" - assert "Image Gen" in err["message"] + statuses = iter( + [ + {"mode": "inference"}, + {"mode": "inference", "switchover": {"active": True, "target": "generation"}}, + {"mode": "generation"}, + ] + ) + calls: list[str] = [] + + monkeypatch.setattr(mod, "_fetch_status", lambda: next(statuses)) + monkeypatch.setattr(mod, "_post_switchover", lambda: calls.append("switch") or True) + monkeypatch.setattr(mod.time, "sleep", lambda _seconds: None) + + mod.prepare_image_mode() + assert calls == ["switch"] + + +def test_prepare_image_mode_fails_open_when_switchover_fails(monkeypatch) -> None: + mod = _load_node() + monkeypatch.setattr(mod, "_fetch_status", lambda: {"mode": "inference"}) + monkeypatch.setattr(mod, "_post_switchover", lambda: False) + mod.prepare_image_mode() diff --git a/tests/install/test_comfyui_scripts_shipped.py b/tests/install/test_comfyui_scripts_shipped.py index 7af33ed4..2a028003 100644 --- a/tests/install/test_comfyui_scripts_shipped.py +++ b/tests/install/test_comfyui_scripts_shipped.py @@ -16,6 +16,7 @@ # Repo root = three levels up from tests/install/ REPO = Path(__file__).parent.parent.parent SCRIPTS_DIR = REPO / "installer" / "comfyui" / "scripts" +CUSTOM_NODES_DIR = REPO / "installer" / "comfyui" / "custom_nodes" COMFYUI_PY = REPO / "src" / "hal0" / "api" / "routes" / "comfyui.py" INSTALL_SH = REPO / "installer" / "install.sh" @@ -101,3 +102,10 @@ def test_install_sh_uses_install_command_for_scripts(): or "COMFYUI_DIR" in content ) assert has_install_block, "install.sh does not contain 'install -d /opt/comfyui' or equivalent" + + +def test_install_sh_places_comfyui_custom_nodes(): + content = INSTALL_SH.read_text() + assert (CUSTOM_NODES_DIR / "hal0_gpu_gate.py").exists() + assert "installer/comfyui/custom_nodes" in content or "comfyui/custom_nodes" in content + assert "COMFYUI_MODELS_ROOT}/custom_nodes" in content diff --git a/ui/src/api/endpoints.ts b/ui/src/api/endpoints.ts index f5dbbf87..69eeec4d 100644 --- a/ui/src/api/endpoints.ts +++ b/ui/src/api/endpoints.ts @@ -11,12 +11,10 @@ export const ENDPOINTS = { slots: '/api/slots', // ── ComfyUI generation engine (slots-page Image-Gen tab) ───────── - // Read-only aggregate of docker + systemd + ComfyUI HTTP; the - // switchover write-path is feature-gated server-side. + // Read-only aggregate of docker + systemd + ComfyUI HTTP. comfyuiStatus: '/api/comfyui/status', comfyuiSwitchover: '/api/comfyui/switchover', - // Pin image mode (disables the arbiter's idle auto-restore). 501 when the - // switchover gate is off. + // Pin image mode (disables the arbiter's idle auto-restore). comfyuiPin: '/api/comfyui/pin', // Render control comfyuiRenderCancel: '/api/comfyui/render/cancel', diff --git a/ui/src/api/hooks/useComfyui.ts b/ui/src/api/hooks/useComfyui.ts index 2db824b6..ea2bc2ee 100644 --- a/ui/src/api/hooks/useComfyui.ts +++ b/ui/src/api/hooks/useComfyui.ts @@ -13,9 +13,7 @@ // arbiter runs the transition in-process; the `switchover` block on /status // (active/target/error) is what tracks the transition to terminal — the pane's // poll renders it, per the async-job-must-poll-to-terminal rule. Tearing down a -// non-empty queue needs `force: true` (the confirm dialog is that consent). The -// whole path stays feature-gated server-side (HAL0_COMFYUI_SWITCHOVER_ENABLED, -// 501 when off) — surfaced as a toast, never an optimistic flip. +// non-empty queue needs `force: true` (the confirm dialog is that consent). // // `arbiter` is the arbiter-truth block ({mode img|llm, pinned, saved slots, // idle_restore_at}); it is null when the arbiter is unavailable (gate off / @@ -120,8 +118,8 @@ export interface SwitchoverBody { pin?: boolean } -// raw:true so the dev mockFetch GET-shim can't mask the 501/503 gate — we want -// the real status code to drive the toast copy. +// raw:true so the dev mockFetch GET-shim can't mask 503 arbiter failures — we +// want the real status code to drive the toast copy. export function useComfyuiSwitchover() { return useMutation({ mutationFn: (body: SwitchoverBody) => @@ -131,7 +129,7 @@ export function useComfyuiSwitchover() { // Pin / unpin image mode (disables / re-arms the arbiter's idle auto-restore). // Synchronous 200 {"pinned":bool} — the caller refetches /status to reflect -// the new arbiter state. raw:true for the same 501-gate reason as switchover. +// the new arbiter state. raw:true for the same failure handling as switchover. export function useComfyuiPin() { return useMutation({ mutationFn: (body: { pinned: boolean }) => diff --git a/ui/tests/e2e/specs/mobile-nav-v3.spec.ts b/ui/tests/e2e/specs/mobile-nav-v3.spec.ts index f9d8cb12..95746718 100644 --- a/ui/tests/e2e/specs/mobile-nav-v3.spec.ts +++ b/ui/tests/e2e/specs/mobile-nav-v3.spec.ts @@ -1,9 +1,9 @@ /** * mobile-nav-v3 — at ≤720px the desktop sidebar is hidden and navigation * moves into a top-right hamburger (.tb-menu) that opens a slide-in - * NavDrawer: command-palette launcher (folded in from the topbar), the full - * nav (useNavItems — incl. Logs + MCP the old bottom-tabs never reached), - * and the runtime status widget. Replaces the never-shown bottom-tab bar. + * NavDrawer: quick-actions launcher (folded in from the topbar) and the full + * nav (useNavItems — incl. Logs + MCP the old bottom-tabs never reached). + * Replaces the never-shown bottom-tab bar. */ import { test, expect } from '../fixtures/apiMock' @@ -48,8 +48,8 @@ test.describe('Mobile nav drawer (≤720px)', () => { await expect(drawer.locator('.sb-row .lbl', { hasText: 'Connections' })).toHaveCount(0) // command palette folded into the drawer on mobile await expect(drawer.locator('.nav-drawer-cmdk')).toBeVisible() - // runtime widget re-shown inside the drawer (despite the 1080px global hide) - await expect(drawer.locator('.sb-status').first()).toBeVisible() + // runtime health lives in the footer, not the drawer/sidebar. + await expect(drawer.locator('.sb-status')).toHaveCount(0) }) test('selecting a destination navigates and closes the drawer', async ({ page }) => {