diff --git a/ui/src/dash/engine-panes.css b/ui/src/dash/engine-panes.css index c52c5b27..805e3410 100644 --- a/ui/src/dash/engine-panes.css +++ b/ui/src/dash/engine-panes.css @@ -110,6 +110,13 @@ 0 0 0 1px var(--comfy-line), 0 0 40px -20px var(--comfy-glow); } +/* Inference: keep the engine container neutral — no accent border/ring around + the whole pane. The "live" accent lives on the per-slot serving cards + (accent left-border + breathe), not the container. */ +.infer-pane .engine.active { + border-color: var(--line); + box-shadow: none; +} .infer-pane .engine-h, .npu-pane .engine-h { display: flex; @@ -749,6 +756,20 @@ :is(.infer-pane, .npu-pane) .scard.serving { border-left: 2px solid var(--comfy); } +/* living-grid breathe — a subtle glow on the headline serving cards (infer-pane + only; the NPU pane uses its own .cslot markup). */ +.infer-pane .scard.serving { + animation: breathe 3.4s ease-in-out infinite; +} +@keyframes breathe { + 0%, + 100% { + box-shadow: 0 0 0 0 transparent; + } + 50% { + box-shadow: 0 0 22px -10px var(--comfy-glow); + } +} :is(.infer-pane, .npu-pane) .scard.warming { border-left: 2px solid var(--warn); } @@ -775,69 +796,48 @@ text-overflow: ellipsis; white-space: nowrap; } -:is(.infer-pane, .npu-pane) .sdot { +/* ── ONE status vocabulary (round dot), yellow-family · scoped to .infer-pane ── + ready = solid yellow (loaded, healthy, idle-but-up) — no glow + serving = yellow + glow + pulse (active / working) + warming = orange + glow + pulse + error = red + glow + offline = grey, no glow + Green (--ok) no longer appears for any slot status in this pane. */ +.infer-pane .sdot { width: 8px; height: 8px; border-radius: 50%; background: var(--fg-5); flex-shrink: 0; } -:is(.infer-pane, .npu-pane) .sdot.serving { +.infer-pane .sdot.ready { background: var(--comfy); - box-shadow: 0 0 7px var(--comfy); - animation: pulse 1.4s ease-in-out infinite; } -:is(.infer-pane, .npu-pane) .sdot.ready { - background: var(--ok); +.infer-pane .sdot.serving { + background: var(--comfy); + box-shadow: 0 0 8px var(--comfy); + animation: pulse 1.4s ease-in-out infinite; } -:is(.infer-pane, .npu-pane) .sdot.warming { +.infer-pane .sdot.warming { background: var(--warn); - box-shadow: 0 0 7px var(--warn); + box-shadow: 0 0 8px var(--warn); animation: pulse 1.4s ease-in-out infinite; } -:is(.infer-pane, .npu-pane) .sdot.error { +.infer-pane .sdot.error { background: var(--err); + box-shadow: 0 0 8px var(--err); } -:is(.infer-pane, .npu-pane) .sdot.offline { +.infer-pane .sdot.offline { background: var(--fg-5); } -/* status pill — serving shows live tok/s, others the state word */ -:is(.infer-pane, .npu-pane) .spill { +/* port — replaces the old status pill; pushed right, plain mono */ +.infer-pane .sport { margin-left: auto; - display: inline-flex; - align-items: center; - gap: 5px; - padding: 2px 8px; - border-radius: 999px; font-family: var(--jbm); - font-size: 10px; - border: 1px solid var(--line); - color: var(--fg-3); - white-space: nowrap; - flex-shrink: 0; -} -:is(.infer-pane, .npu-pane) .spill.serving { - color: var(--comfy); - border-color: var(--comfy-line); - background: var(--comfy-soft); -} -:is(.infer-pane, .npu-pane) .spill.ready { - color: var(--ok); - border-color: var(--ok-line); - background: var(--ok-soft); -} -:is(.infer-pane, .npu-pane) .spill.warming { - color: var(--warn); - border-color: var(--warn-line); - background: var(--warn-soft); -} -:is(.infer-pane, .npu-pane) .spill.error { - color: var(--err); - border-color: var(--err-line); - background: var(--err-soft); -} -:is(.infer-pane, .npu-pane) .spill.offline { + font-size: 11px; color: var(--fg-4); + letter-spacing: 0.02em; + flex-shrink: 0; } :is(.infer-pane, .npu-pane) .scard-b { padding: 10px 12px; @@ -948,40 +948,60 @@ color: var(--fg-4); } -/* device chip + profile pill */ +/* device chip + profile pill — the device tag uses a SQUARE swatch (category, + not status) + a neutral name, so the device hue never reads as a status dot. + Round = status (the .sdot), square = device/category (matches the memory + legend swatches). */ :is(.infer-pane, .npu-pane) .dchip { display: inline-flex; align-items: center; - gap: 5px; + gap: 6px; padding: 2px 7px; border-radius: 3px; font-family: var(--jbm); font-size: 10px; border: 1px solid var(--line); background: var(--bg-1); - color: var(--fg-3); + color: var(--fg-2); white-space: nowrap; } +.infer-pane .dchip .sw { + width: 8px; + height: 8px; + border-radius: var(--rad-xs, 2px); + background: var(--dev-cpu); + flex-shrink: 0; +} +.infer-pane .dchip.vulkan .sw { + background: var(--dev-vulkan); +} +.infer-pane .dchip.rocm .sw { + background: var(--dev-rocm); +} +.infer-pane .dchip.npu .sw { + background: var(--dev-npu); +} +.infer-pane .dchip.cpu .sw { + background: var(--dev-cpu); +} +/* round .d dot — retained ONLY for the memory-track "iGPU" header label + (MemGtt), which is a hue-keyed category badge, not a slot device tag. The + `:has(.d)` guard hue-keys that chip while leaving slot device tags (which use + the neutral square .sw) untouched. */ :is(.infer-pane, .npu-pane) .dchip .d { width: 5px; height: 5px; border-radius: 50%; background: currentColor; } -:is(.infer-pane, .npu-pane) .dchip.vulkan { +:is(.infer-pane, .npu-pane) .dchip.vulkan:has(.d) { color: var(--dev-vulkan); - border-color: rgba(249, 216, 132, 0.3); } -:is(.infer-pane, .npu-pane) .dchip.rocm { +:is(.infer-pane, .npu-pane) .dchip.rocm:has(.d) { color: var(--dev-rocm); - border-color: rgba(127, 184, 255, 0.3); } -:is(.infer-pane, .npu-pane) .dchip.npu { +:is(.infer-pane, .npu-pane) .dchip.npu:has(.d) { color: var(--dev-npu); - border-color: rgba(200, 150, 255, 0.3); -} -:is(.infer-pane, .npu-pane) .dchip.cpu { - color: var(--fg-3); } :is(.infer-pane, .npu-pane) .flm-chip { display: inline-flex; @@ -1083,6 +1103,82 @@ background: var(--comfy-soft); } +/* ─── Utility tier — compact mini cards (embed / rerank / voice) ───────── + A tighter grid below the headline chat/agent cards: header = dot + name + + port, body = model text + a minimal Start/Stop/Restart cluster. No meta row, + no model picker. Scoped to .infer-pane (the NPU pane has its own markup). */ +.infer-pane .util-mini { + display: grid; + gap: 10px; + grid-template-columns: repeat(auto-fill, minmax(218px, 1fr)); +} +.infer-pane .mcard { + border: 1px solid var(--line); + border-radius: var(--rad); + background: var(--bg); + overflow: hidden; + transition: border-color 0.14s var(--ease); +} +.infer-pane .mcard:hover { + border-color: var(--line-strong); +} +.infer-pane .mcard.ready { + border-left: 2px solid var(--comfy); +} +.infer-pane .mcard.serving { + border-left: 2px solid var(--comfy); + animation: breathe 3.4s ease-in-out infinite; +} +.infer-pane .mcard.warming { + border-left: 2px solid var(--warn); +} +.infer-pane .mcard.error { + border-left: 2px solid var(--err); +} +.infer-pane .mcard.offline { + opacity: 0.6; +} +.infer-pane .mcard-h { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 10px; + border-bottom: 1px solid var(--line-soft); + background: var(--bg-1); +} +.infer-pane .mcard-h .snm { + font-family: var(--jbm); + font-size: 12px; + font-weight: 500; + color: var(--fg); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.infer-pane .mcard-b { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 10px; +} +.infer-pane .mcard-b .smodel { + flex: 1; + min-width: 0; + font-family: var(--jbm); + font-size: 10.5px; + color: var(--fg-2); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.infer-pane .mcard-b .slot-ctrls { + flex-shrink: 0; +} +.infer-pane .mcard .sctrl { + width: 22px; + height: 22px; +} + /* ════════════════════════════════════════════════════════════════════ NPU pane — engine shell wraps the existing .npu-stack trio (dashboard.css keeps the inner trio styling; the shell + tel-strip + master switch wrap diff --git a/ui/src/dash/inference-pane.jsx b/ui/src/dash/inference-pane.jsx index f486ec9c..77738643 100644 --- a/ui/src/dash/inference-pane.jsx +++ b/ui/src/dash/inference-pane.jsx @@ -107,6 +107,14 @@ function devKind(device) { return 'cpu' } +// Utility (support) slot groups — the non-conversational tier that renders as +// the compact mini-card row below the headline chat/agent cards. Anything else +// (chat/agent LLMs) is a headline slot. +const UTIL_GROUPS = new Set(['embed', 'rerank', 'tts', 'stt', 'voice']) +function isUtilGroup(group) { + return UTIL_GROUPS.has(String(group || '').toLowerCase()) +} + // Phase → dot class (reuses the design's .sdot vocabulary). Derived from the // shared slot-status classifier so the dot matches the rest of the page. function dotCls(ind) { @@ -290,7 +298,7 @@ function DevCell({ s, onProfile }) { FLM · npu ) : ( - + {kind} ) @@ -417,7 +425,6 @@ export function SlotScard({ s, ind, full, modelNode, controls, phase, onEdit }) const memGb = typeof s.mem_mb === 'number' && s.mem_mb > 0 ? round1(s.mem_mb / 1024) : null const tps = typeof s.metrics?.toks === 'number' && s.metrics.toks > 0 ? s.metrics.toks : null const ttft = typeof s.metrics?.ttft === 'number' && s.metrics.ttft > 0 ? s.metrics.ttft : null - const spill = dot === 'serving' ? (tps ? `${tps} tok/s` : 'serving') : dot return (
{s.name} - {spill} + {s.port ? ':' + s.port : ''}
{modelNode} @@ -506,6 +513,53 @@ function SlotCards({ rows, full, models, busyName, handlers }) { ) } +// Utility tier — compact mini cards for the support slots (embed / rerank / +// voice). No meta row, no model picker: just the dot + name + port header and +// a minimal model + Start/Stop/Restart control cluster (SlotControls compact). +function MiniCard({ s, ind, busy, handlers }) { + const dot = dotCls(ind) + const ph = slotCtrlPhase(s) + return ( +
+
+ + {s.name} + {s.port ? ':' + s.port : ''} +
+
+ + {s.model || '—'} + + handlers.onStart(s)} + onStop={() => handlers.onStop(s)} + onRestart={() => handlers.onRestart(s)} + /> +
+
+ ) +} + +function MiniCards({ rows, busyName, handlers }) { + if (!rows.length) return null + return ( +
+ {rows.map(({ s, ind }) => ( + + ))} +
+ ) +} + // Page-level hero band — the iGPU GTT memory map + combined-throughput tile, // lifted out of the Inference engine shell so it sits at the very top of the // Slots page (above the Inference ⇄ Image Gen tabs) and stays visible across @@ -576,6 +630,14 @@ export function InferencePane() { const servingN = rows.filter((r) => r.ind.cls === 'serving').length const loadedN = rows.filter((r) => isSlotLive(r.s)).length + // Tier split — headline = the conversational LLM slots (chat / agent groups); + // utility = the support slots (embed / rerank / voice). Keyed off slot.group + // so the split tracks the backend's own grouping; voice covers tts/stt too. + // This pane is always expanded (no accordion), so the utility tier shows ALL + // its slots; the live count drives the SubLabel note. + const headlineRows = rows.filter((r) => !isUtilGroup(r.s.group)) + const utilRows = rows.filter((r) => isUtilGroup(r.s.group)) + const gpuN = slots.filter((s) => { const k = devKind(s.device) return k === 'rocm' || k === 'vulkan' @@ -677,17 +739,35 @@ export function InferencePane() { in the page-level InferenceHeroBand above the tabs. */}
- - slots + + chat · agent
+ {utilRows.length > 0 && ( +
+ + utility · embed · rerank · voice + + +
+ )}
{servingN} serving {gttCapGb > 0 ? ` · ${gttFreeGb} GB free` : ''} diff --git a/ui/tests/e2e/specs/inference-pane-v3.spec.ts b/ui/tests/e2e/specs/inference-pane-v3.spec.ts index 7b196d7e..d9f0190d 100644 --- a/ui/tests/e2e/specs/inference-pane-v3.spec.ts +++ b/ui/tests/e2e/specs/inference-pane-v3.spec.ts @@ -41,13 +41,16 @@ test.describe('Inference engine pane (/slots · Inference tab)', () => { // engine pane: state pill summarises serving/loaded counts (primary serving). await expect(pane(page)).toBeVisible() await expect(page.getByTestId('infer-epill')).toContainText('serving') - // every slot renders as a FULL card now (no accordion); the serving primary - // card's status pill carries the live tok/s (45 in the seed). + // headline (chat · agent) slots render as FULL cards. The status pill is + // gone — the header now shows the slot PORT (mono, pushed right); readiness + // is the dot, and tok/s lives in the meta row. await expect(body(page).locator('.scards.full')).toHaveCount(1) const card = body(page).getByTestId('infer-slot-primary') await expect(card).toBeVisible() await expect(card).toHaveClass(/\bscard\b/) - await expect(card.locator('.spill')).toContainText('45 tok/s') + // serving primary → yellow serving dot + the port (:8092 in the seed). + await expect(card.locator('.scard-h .sdot')).toHaveClass(/\bserving\b/) + await expect(card.locator('.scard-h .sport')).toContainText(':8092') }) test('profile pill surfaces the runtime profile name (slot.profile)', async ({ page }) => {