From 6efbf434fd42abdfaf98e050c1e3d356c6c111e2 Mon Sep 17 00:00:00 2001 From: Joe Doss Date: Wed, 8 Apr 2026 00:15:04 -0500 Subject: [PATCH 1/2] Fix psi systemd install crashing inside a container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Installer's _daemon_reload went straight to subprocess systemctl, which blows up inside a container with 'System has not been booted with systemd as init system (PID 1)'. The quadlet files had already been written to disk before this point, leaving the operator with a half-applied install and a bug-report traceback. setup.py already had the right pattern — try D-Bus first, fall back to systemctl, warn instead of raise on failure — but installer.py never got updated to use it. Move the helper into psi.systemd as a public daemon_reload() and have both setup.py and installer.py call it. Downgrade CalledProcessError from systemctl to a warning so the install keeps going; the operator can run daemon-reload on the host afterward. The regression test patches subprocess.run to raise CalledProcessError and confirms daemon_reload() returns normally, matching the behavior needed when running inside the psi container. --- psi/installer.py | 10 ++---- psi/setup.py | 43 ++---------------------- psi/systemd.py | 53 +++++++++++++++++++++++++++++- tests/test_setup.py | 55 ------------------------------- tests/test_systemd.py | 76 ++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 132 insertions(+), 105 deletions(-) diff --git a/psi/installer.py b/psi/installer.py index 430d635..a11026c 100644 --- a/psi/installer.py +++ b/psi/installer.py @@ -11,6 +11,7 @@ from psi.files import write_text_secure from psi.models import DeployMode, SystemdScope +from psi.systemd import daemon_reload from psi.unitgen import ( generate_container_provider_setup_quadlet, generate_container_serve_quadlet, @@ -210,13 +211,8 @@ def _ensure_dir(path: Path) -> None: def _daemon_reload(scope: SystemdScope) -> None: - """Run systemctl daemon-reload.""" - cmd = ["systemctl"] - if scope == SystemdScope.USER: - cmd.append("--user") - cmd.append("daemon-reload") - subprocess.run(cmd, check=True) - logger.info("Reloaded systemd.") + """Reload systemd via the shared D-Bus-first helper.""" + daemon_reload(scope) def _enable_units( diff --git a/psi/setup.py b/psi/setup.py index a7e212f..6a1192d 100644 --- a/psi/setup.py +++ b/psi/setup.py @@ -3,7 +3,6 @@ from __future__ import annotations import os -import subprocess import time from typing import TYPE_CHECKING @@ -11,7 +10,7 @@ from loguru import logger from psi.errors import ProviderError -from psi.models import SystemdScope +from psi.systemd import daemon_reload if TYPE_CHECKING: from psi.cache import Cache @@ -69,7 +68,7 @@ def run_setup( cache.close() logger.info("Reloading systemd...") - _systemd_daemon_reload(settings.scope) + daemon_reload(settings.scope) logger.info("Setup complete.") @@ -212,44 +211,6 @@ def _fetch_and_register_infisical( provider.close() -def _systemd_daemon_reload(scope: SystemdScope) -> None: - """Reload systemd via D-Bus, falling back to systemctl. - - Logs a warning and skips if neither D-Bus nor systemctl is available - (e.g. minimal test containers without systemd). - """ - try: - _dbus_daemon_reload(scope) - return - except Exception as e: - logger.debug("D-Bus daemon-reload failed ({}), falling back to systemctl", e) - - cmd = ["systemctl"] - if scope == SystemdScope.USER: - cmd.append("--user") - cmd.append("daemon-reload") - try: - subprocess.run(cmd, check=True) - except FileNotFoundError: - logger.warning("systemctl not available, skipping daemon-reload") - - -def _dbus_daemon_reload(scope: SystemdScope) -> None: - """Reload systemd via D-Bus. Raises on any failure.""" - import dbus - - bus = dbus.SessionBus() if scope == SystemdScope.USER else dbus.SystemBus() - systemd = bus.get_object( - "org.freedesktop.systemd1", - "/org/freedesktop/systemd1", - ) - manager = dbus.Interface( - systemd, - "org.freedesktop.systemd1.Manager", - ) - manager.Reload() - - def _register_secrets( settings: PsiSettings, workload_name: str, diff --git a/psi/systemd.py b/psi/systemd.py index ef4c2cc..fa8da95 100644 --- a/psi/systemd.py +++ b/psi/systemd.py @@ -9,7 +9,58 @@ import subprocess from datetime import UTC, datetime -from psi.models import TimerInfo +from loguru import logger + +from psi.models import SystemdScope, TimerInfo + + +def daemon_reload(scope: SystemdScope) -> None: + """Reload systemd, preferring D-Bus and falling back to systemctl. + + Works correctly when called from inside a container that has the system + D-Bus socket mounted, and gracefully no-ops on minimal environments where + neither D-Bus nor systemctl is available (e.g. build containers). + + Args: + scope: System or user systemd instance. + """ + try: + _dbus_daemon_reload(scope) + return + except Exception as e: + logger.debug("D-Bus daemon-reload failed ({}), falling back to systemctl", e) + + cmd = ["systemctl"] + if scope == SystemdScope.USER: + cmd.append("--user") + cmd.append("daemon-reload") + try: + subprocess.run(cmd, check=True) + logger.info("Reloaded systemd.") + except FileNotFoundError: + logger.warning("systemctl not available, skipping daemon-reload") + except subprocess.CalledProcessError as e: + logger.warning( + "systemctl daemon-reload failed ({}); skipping — " + "run 'systemctl daemon-reload' on the host manually.", + e, + ) + + +def _dbus_daemon_reload(scope: SystemdScope) -> None: + """Reload systemd via D-Bus. Raises on any failure.""" + import dbus + + bus = dbus.SessionBus() if scope == SystemdScope.USER else dbus.SystemBus() + systemd = bus.get_object( + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + ) + manager = dbus.Interface( + systemd, + "org.freedesktop.systemd1.Manager", + ) + manager.Reload() def get_timer_info( diff --git a/tests/test_setup.py b/tests/test_setup.py index 0373994..15ee1a9 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -15,7 +15,6 @@ _generate_drop_in, _is_retryable, _setup_infisical_workload, - _systemd_daemon_reload, ) if TYPE_CHECKING: @@ -203,60 +202,6 @@ def test_other_exception_is_not_retryable(self) -> None: assert not _is_retryable(ValueError("nope")) -class TestSystemdDaemonReload: - def test_dbus_failure_falls_back_to_subprocess(self) -> None: - """D-Bus failures (import error, missing bus socket) fall back to systemctl.""" - with ( - patch( - "psi.setup._dbus_daemon_reload", - side_effect=RuntimeError("DBusException: bus not found"), - ), - patch("psi.setup.subprocess.run") as mock_run, - ): - _systemd_daemon_reload(SystemdScope.SYSTEM) - - mock_run.assert_called_once_with(["systemctl", "daemon-reload"], check=True) - - def test_dbus_import_error_falls_back(self) -> None: - with ( - patch("psi.setup._dbus_daemon_reload", side_effect=ImportError("no dbus")), - patch("psi.setup.subprocess.run") as mock_run, - ): - _systemd_daemon_reload(SystemdScope.SYSTEM) - - mock_run.assert_called_once_with(["systemctl", "daemon-reload"], check=True) - - def test_user_scope_uses_user_flag_in_fallback(self) -> None: - with ( - patch("psi.setup._dbus_daemon_reload", side_effect=ImportError("no dbus")), - patch("psi.setup.subprocess.run") as mock_run, - ): - _systemd_daemon_reload(SystemdScope.USER) - - mock_run.assert_called_once_with(["systemctl", "--user", "daemon-reload"], check=True) - - def test_dbus_success_skips_subprocess(self) -> None: - with ( - patch("psi.setup._dbus_daemon_reload") as mock_dbus, - patch("psi.setup.subprocess.run") as mock_run, - ): - _systemd_daemon_reload(SystemdScope.SYSTEM) - - mock_dbus.assert_called_once() - mock_run.assert_not_called() - - def test_missing_systemctl_is_skipped_with_warning(self) -> None: - """When neither D-Bus nor systemctl is available, log and skip.""" - with ( - patch("psi.setup._dbus_daemon_reload", side_effect=ImportError("no dbus")), - patch( - "psi.setup.subprocess.run", - side_effect=FileNotFoundError(2, "No such file", "systemctl"), - ), - ): - _systemd_daemon_reload(SystemdScope.SYSTEM) - - class TestSetupRetry: def test_retries_on_connect_error_then_succeeds(self, tmp_path: Path) -> None: call_count = 0 diff --git a/tests/test_systemd.py b/tests/test_systemd.py index d9b43f8..6d526eb 100644 --- a/tests/test_systemd.py +++ b/tests/test_systemd.py @@ -5,7 +5,14 @@ import unittest.mock from unittest.mock import patch -from psi.systemd import _systemctl_show, _usec_to_iso, get_timer_info, get_unit_state +from psi.models import SystemdScope +from psi.systemd import ( + _systemctl_show, + _usec_to_iso, + daemon_reload, + get_timer_info, + get_unit_state, +) class TestUsecToIso: @@ -113,3 +120,70 @@ def test_systemctl_show_no_user_flag_by_default(self) -> None: _systemctl_show("test.service", ["ActiveState"]) cmd = mock.call_args[0][0] assert "--user" not in cmd + + +class TestDaemonReload: + def test_dbus_success_skips_subprocess(self) -> None: + with ( + patch("psi.systemd._dbus_daemon_reload") as mock_dbus, + patch("psi.systemd.subprocess.run") as mock_run, + ): + daemon_reload(SystemdScope.SYSTEM) + mock_dbus.assert_called_once() + mock_run.assert_not_called() + + def test_dbus_failure_falls_back_to_subprocess(self) -> None: + with ( + patch( + "psi.systemd._dbus_daemon_reload", + side_effect=RuntimeError("bus not found"), + ), + patch("psi.systemd.subprocess.run") as mock_run, + ): + daemon_reload(SystemdScope.SYSTEM) + mock_run.assert_called_once_with(["systemctl", "daemon-reload"], check=True) + + def test_dbus_import_error_falls_back(self) -> None: + with ( + patch("psi.systemd._dbus_daemon_reload", side_effect=ImportError("no dbus")), + patch("psi.systemd.subprocess.run") as mock_run, + ): + daemon_reload(SystemdScope.SYSTEM) + mock_run.assert_called_once_with(["systemctl", "daemon-reload"], check=True) + + def test_user_scope_uses_user_flag_in_fallback(self) -> None: + with ( + patch("psi.systemd._dbus_daemon_reload", side_effect=ImportError("no dbus")), + patch("psi.systemd.subprocess.run") as mock_run, + ): + daemon_reload(SystemdScope.USER) + mock_run.assert_called_once_with(["systemctl", "--user", "daemon-reload"], check=True) + + def test_missing_systemctl_is_skipped_with_warning(self) -> None: + """When neither D-Bus nor systemctl is available, log and skip rather than raise.""" + with ( + patch("psi.systemd._dbus_daemon_reload", side_effect=ImportError("no dbus")), + patch( + "psi.systemd.subprocess.run", + side_effect=FileNotFoundError(2, "No such file", "systemctl"), + ), + ): + daemon_reload(SystemdScope.SYSTEM) + + def test_systemctl_error_is_skipped_with_warning(self) -> None: + """CalledProcessError from systemctl is downgraded to a warning (not raised). + + This is the regression test for the container-mode installer crash: inside + a psi container, `systemctl` exits with 'System has not been booted with + systemd' and CalledProcessError — installer must not abort. + """ + import subprocess + + with ( + patch("psi.systemd._dbus_daemon_reload", side_effect=ImportError("no dbus")), + patch( + "psi.systemd.subprocess.run", + side_effect=subprocess.CalledProcessError(1, "systemctl"), + ), + ): + daemon_reload(SystemdScope.SYSTEM) From 13f78d70aafbd7a5dfa1b0031a763bc487e3edae Mon Sep 17 00:00:00 2001 From: Joe Doss Date: Wed, 8 Apr 2026 00:23:30 -0500 Subject: [PATCH 2/2] Remove invalid Type=simple from serve quadlet; add ContainerName MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit psi systemd install wrote a quadlet that podman quadlet-generator rejected with: converting "psi-secrets.container": invalid service Type 'simple' The .service unit was never created, so systemctl restart psi-secrets.service failed with 'Unit not found' — a half-applied install with no clear error. Quadlet .container units only allow Type=notify (default), forking, oneshot, or exec. Long-running psi serve works fine under the Type=notify default, which podman wires up via its built-in sdnotify support. Also add ContainerName= to the serve, setup, and tls-renew generators. Without it, quadlet names the container systemd-, and routine operator commands like podman exec psi-secrets break. --- psi/unitgen.py | 6 +++++- tests/test_unitgen.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/psi/unitgen.py b/psi/unitgen.py index 6bff0c8..61bce02 100644 --- a/psi/unitgen.py +++ b/psi/unitgen.py @@ -180,6 +180,7 @@ def generate_container_provider_setup_quadlet( "Requires=psi-secrets.service", "", "[Container]", + f"ContainerName=psi-{provider}-setup", f"Image={image}", f"Exec=setup --provider {provider}", "Network=host", @@ -231,6 +232,7 @@ def generate_container_tls_renew_quadlet(image: str, settings: PsiSettings) -> s "Wants=network-online.target", "", "[Container]", + "ContainerName=psi-tls-renew", f"Image={image}", "Exec=tls renew", "Network=host", @@ -348,6 +350,7 @@ def generate_container_serve_quadlet(image: str, settings: PsiSettings) -> str: "Wants=network-online.target", "", "[Container]", + "ContainerName=psi-secrets", f"Image={image}", "Exec=serve", "Network=host", @@ -363,11 +366,12 @@ def generate_container_serve_quadlet(image: str, settings: PsiSettings) -> str: lines.extend(cache_container) + # Quadlet rejects Type=simple for .container units — it sets Type=notify + # automatically. Only Restart and the cache credential go in [Service]. lines.extend( [ "", "[Service]", - "Type=simple", "Restart=on-failure", f"RuntimeDirectory={runtime_dir.name}", ] diff --git a/tests/test_unitgen.py b/tests/test_unitgen.py index d768023..6f3587f 100644 --- a/tests/test_unitgen.py +++ b/tests/test_unitgen.py @@ -381,3 +381,41 @@ def test_native_serve_no_settings_is_safe(self) -> None: content = generate_native_serve_service("/usr/bin/psi", SystemdScope.SYSTEM) assert "psi-cache-key" not in content assert "StateDirectory=psi" in content + + +class TestQuadletTranslatability: + """Ensure quadlet .container files translate cleanly under podman quadlet.""" + + def test_serve_quadlet_does_not_set_invalid_type_simple(self, tmp_path: Path) -> None: + """Quadlet rejects Type=simple for .container units. + + Setting it causes podman's quadlet generator to fail with + 'invalid service Type "simple"' and the resulting .service unit is + never created. Long-running containers must use the quadlet default + (Type=notify) or Type=exec. + """ + settings = _mock_settings(tmp_path, cache_backend="hsm") + content = generate_container_serve_quadlet("psi:latest", settings) + assert "Type=simple" not in content + + def test_setup_quadlet_uses_oneshot(self, tmp_path: Path) -> None: + """Type=oneshot is valid for quadlet .container units.""" + settings = _mock_settings(tmp_path, cache_backend="hsm") + content = generate_container_provider_setup_quadlet("psi:latest", settings, "infisical") + assert "Type=oneshot" in content + + def test_serve_quadlet_has_container_name(self, tmp_path: Path) -> None: + """ContainerName=psi-secrets lets operators `podman exec psi-secrets`.""" + settings = _mock_settings(tmp_path) + content = generate_container_serve_quadlet("psi:latest", settings) + assert "ContainerName=psi-secrets" in content + + def test_setup_quadlet_has_container_name(self, tmp_path: Path) -> None: + settings = _mock_settings(tmp_path) + content = generate_container_provider_setup_quadlet("psi:latest", settings, "infisical") + assert "ContainerName=psi-infisical-setup" in content + + def test_tls_renew_quadlet_has_container_name(self, tmp_path: Path) -> None: + settings = _mock_settings(tmp_path) + content = generate_container_tls_renew_quadlet("psi:latest", settings) + assert "ContainerName=psi-tls-renew" in content