Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion src/hal0/slots/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import asyncio
import contextlib
import logging
import os
import re
import shutil
import time
Expand Down Expand Up @@ -111,6 +112,8 @@
# /health) is demoted to ERROR — but only after this many CONSECUTIVE failures,
# so a single transient blip doesn't trigger a disruptive model reload.
_HEALTH_FAIL_STRIKES: int = 2
# Must match the exact flag spellings emitted by ContainerProvider's llama
# launch renderer; drift checks compare argv text, not llama-server aliases.
_CONFIG_DRIFT_KEYS: tuple[str, ...] = ("--ctx-size", "--model", "--alias", "-b", "-ub")

# Idle-monitor defaults. A READY slot whose last activity is older than
Expand Down Expand Up @@ -1252,7 +1255,7 @@ async def compute_config_drift(
diffs = [
{"key": key, "running": running_flags.get(key), "rendered": rendered_flags.get(key)}
for key in _CONFIG_DRIFT_KEYS
if running_flags.get(key) != rendered_flags.get(key)
if not _config_drift_values_equal(key, running_flags.get(key), rendered_flags.get(key))
]
return {"drifted": bool(diffs), "diffs": diffs}

Expand Down Expand Up @@ -2567,6 +2570,12 @@ def _argv_values(argv: list[str], keys: tuple[str, ...]) -> dict[str, str | None
return out


def _config_drift_values_equal(key: str, running: str | None, rendered: str | None) -> bool:
if key == "--model" and running is not None and rendered is not None:
return os.path.realpath(running) == os.path.realpath(rendered)
return running == rendered


def _normalize_ctx_key(cfg_dict: dict[str, Any]) -> None:
"""Fold the legacy ``[model].ctx_size`` alias into the canonical
``context_size`` (SlotConfig's field), in place (#585).
Expand Down
22 changes: 22 additions & 0 deletions tests/providers/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,28 @@ def test_expected_argv_uses_launch_plan_context_derive(self) -> None:
assert argv is not None
assert argv[argv.index("--ctx-size") + 1] == "32768"

def test_expected_argv_emits_config_drift_watched_flag_spellings(self) -> None:
"""#863: drift watches exact argv spellings, so renderer renames must fail."""
provider = self._provider()
with patch(
"hal0.providers.container._resolve_profile",
return_value=_moe_profile(),
):
argv = provider.expected_argv(
_slot_cfg(model={"default": "chadrock-35b-ace-saber", "context_size": 131072}),
_model_info(),
)

assert argv is not None
assert "--model" in argv
assert "--alias" in argv
assert "--ctx-size" in argv
assert "-b" in argv
assert "-ub" in argv
assert "-c" not in argv
assert "--batch-size" not in argv
assert "--ubatch-size" not in argv


# ── load_sync / unload_sync systemd interaction ───────────────────────────────

Expand Down
35 changes: 35 additions & 0 deletions tests/slots/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,41 @@ async def test_status_omits_config_drift_when_argv_matches(
assert snap.metadata.get("config_drift") == {"drifted": False, "diffs": []}


async def test_status_omits_config_drift_when_model_paths_resolve_to_same_file(
slot_root: Path,
tmp_path: Path,
container_stub: FakeContainerProvider,
) -> None:
"""#863: --model compares real paths so symlink/remount spelling is stable."""
real_dir = tmp_path / "models"
real_dir.mkdir()
model_path = real_dir / "qwen.gguf"
model_path.write_text("", encoding="utf-8")
link_dir = tmp_path / "model-link"
link_dir.symlink_to(real_dir, target_is_directory=True)

expected = [
"--host",
"0.0.0.0",
"--port",
"8081",
"--model",
str(model_path),
"--ctx-size",
"131072",
]
running = list(expected)
running[running.index("--model") + 1] = str(link_dir / "qwen.gguf")
container_stub.expected_argv_by_slot["chat"] = expected
container_stub.running_argv_by_slot["chat"] = running

sm = SlotManager()
await sm.load("chat")
snap = await sm.status("chat", include_config_drift=True)

assert snap.metadata.get("config_drift") == {"drifted": False, "diffs": []}


async def test_list_does_not_compute_config_drift_on_poll_path(
slot_root: Path,
container_stub: FakeContainerProvider,
Expand Down