diff --git a/AGENTS.md b/AGENTS.md index 92e0ad4..8244613 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -48,7 +48,7 @@ make dispatch-eval QUESTION="What is the secret number?" EXPECTED="42" ``` src/trivia_agent/ -├── worker.py # MainLoop entry point +├── agent_loop.py # AgentLoop entry point ├── eval_loop.py # EvalLoop factory ├── sections.py # Question, GameRules, Hints, LuckyDice sections ├── tools.py # hint_lookup, pick_up_dice, throw_dice tools diff --git a/README.md b/README.md index 3d77cc4..feec2a1 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ starter/ │ └── SKILL.md ├── src/ │ └── trivia_agent/ -│ ├── worker.py # MainLoop entry point +│ ├── agent_loop.py # AgentLoop entry point │ ├── eval_loop.py # EvalLoop factory │ ├── dispatch.py # Submit questions to the agent │ ├── models.py # Request/Response dataclasses @@ -323,7 +323,7 @@ make format # Format code ┌────────────┴────────────┐ ▼ ▼ ┌────────────────────────────┐ ┌────────────────────────────┐ -│ MainLoop │ │ EvalLoop │ +│ AgentLoop │ │ EvalLoop │ │ (production requests) │ │ (evaluation samples) │ └─────────────┬──────────────┘ └─────────────┬──────────────┘ │ │ diff --git a/pyproject.toml b/pyproject.toml index 8240edd..d71300c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ dependencies = [ ] [project.scripts] -trivia-agent = "trivia_agent.worker:main" +trivia-agent = "trivia_agent.agent_loop:main" trivia-dispatch = "trivia_agent.dispatch:main" [build-system] @@ -41,7 +41,7 @@ include = ["src"] typeCheckingMode = "strict" [tool.uv.sources] -weakincentives = { git = "https://github.com/weakincentives/weakincentives.git", tag = "v0.23.0" } +weakincentives = { git = "https://github.com/weakincentives/weakincentives.git", tag = "v0.24.0" } [dependency-groups] dev = [ @@ -51,6 +51,6 @@ dev = [ "pytest-timeout", "pyright", "ruff", - "weakincentives[wink] @ git+https://github.com/weakincentives/weakincentives.git@v0.23.0", + "weakincentives[wink] @ git+https://github.com/weakincentives/weakincentives.git@v0.24.0", "pytest-rerunfailures>=16.1", ] diff --git a/src/trivia_agent/adapters.py b/src/trivia_agent/adapters.py index c8d92b5..169141a 100644 --- a/src/trivia_agent/adapters.py +++ b/src/trivia_agent/adapters.py @@ -13,7 +13,7 @@ Usage: >>> from trivia_agent.adapters import create_adapter >>> adapter = create_adapter() - >>> # Pass adapter to MainLoop.create() or EvalLoop.create() + >>> # Pass adapter to AgentLoop.create() or EvalLoop.create() """ from __future__ import annotations @@ -89,7 +89,7 @@ def create_adapter( Factory function that assembles all components needed to run the trivia agent: model selection, task completion checking, isolation configuration, and working directory setup. The returned adapter is ready to be passed - to a WINK MainLoop or EvalLoop. + to a WINK AgentLoop or EvalLoop. The adapter is configured with: - Model: Claude Sonnet (via the "sonnet" alias) @@ -109,15 +109,15 @@ def create_adapter( Returns: ClaudeAgentSDKAdapter[TriviaResponse]: A fully configured adapter instance typed to produce TriviaResponse structured output. - Pass this adapter to ``MainLoop.create()`` or ``EvalLoop.create()`` + Pass this adapter to ``AgentLoop.create()`` or ``EvalLoop.create()`` to run the trivia agent. Example: >>> from trivia_agent.isolation import create_isolation_config >>> isolation = create_isolation_config() >>> adapter = create_adapter(isolation=isolation, cwd="/path/to/workspace") - >>> # Use adapter with MainLoop - >>> loop = MainLoop.create(adapter=adapter, sections=[...]) + >>> # Use adapter with AgentLoop + >>> loop = AgentLoop.create(adapter=adapter, sections=[...]) """ checker = SimpleTaskCompletionChecker() client_config = ClaudeAgentSDKClientConfig( diff --git a/src/trivia_agent/worker.py b/src/trivia_agent/agent_loop.py similarity index 94% rename from src/trivia_agent/worker.py rename to src/trivia_agent/agent_loop.py index 452124a..c15cdee 100644 --- a/src/trivia_agent/worker.py +++ b/src/trivia_agent/agent_loop.py @@ -1,7 +1,7 @@ -"""MainLoop + EvalLoop entry point for the trivia agent. +"""AgentLoop + EvalLoop entry point for the trivia agent. This module demonstrates the full WINK architecture: -- MainLoop for production request processing +- AgentLoop for production request processing - EvalLoop for evaluation with session-aware scoring - PromptTemplate with multiple sections - Feedback providers for soft course correction @@ -28,11 +28,11 @@ from weakincentives.prompt import PromptTemplate from weakincentives.prompt.overrides import LocalPromptOverridesStore, PromptOverridesStore from weakincentives.runtime import ( + AgentLoop, + AgentLoopConfig, + AgentLoopRequest, + AgentLoopResult, LoopGroup, - MainLoop, - MainLoopConfig, - MainLoopRequest, - MainLoopResult, Session, ) from weakincentives.runtime.logging import configure_logging @@ -190,10 +190,10 @@ def build_prompt_template() -> PromptTemplate[TriviaResponse]: ) -class TriviaAgentLoop(MainLoop[TriviaRequest, TriviaResponse]): +class TriviaAgentLoop(AgentLoop[TriviaRequest, TriviaResponse]): """Main processing loop for the trivia agent. - Extends MainLoop to handle TriviaRequest inputs and produce TriviaResponse + Extends AgentLoop to handle TriviaRequest inputs and produce TriviaResponse outputs. This loop demonstrates key WINK patterns for production agents: - **Per-request preparation**: The prepare() method creates a fresh Session @@ -219,7 +219,7 @@ class TriviaAgentLoop(MainLoop[TriviaRequest, TriviaResponse]): >>> loop = TriviaAgentLoop( ... adapter=adapter, ... requests=mailboxes.requests, - ... config=MainLoopConfig(deadline=my_deadline), + ... config=AgentLoopConfig(deadline=my_deadline), ... workspace_dir=Path("./workspace"), ... ) >>> loop.run() # Process requests until shutdown @@ -232,8 +232,8 @@ def __init__( self, *, adapter: ProviderAdapter[TriviaResponse], - requests: Mailbox[MainLoopRequest[TriviaRequest], MainLoopResult[TriviaResponse]], - config: MainLoopConfig | None = None, + requests: Mailbox[AgentLoopRequest[TriviaRequest], AgentLoopResult[TriviaResponse]], + config: AgentLoopConfig | None = None, workspace_dir: Path | None = None, overrides_store: PromptOverridesStore | None = None, ) -> None: @@ -247,11 +247,11 @@ def __init__( adapter: ProviderAdapter[TriviaResponse] that executes agent sessions. Typically created via create_adapter() with appropriate isolation configuration (skills, sandbox settings, API keys). - requests: Mailbox for receiving MainLoopRequest[TriviaRequest] and - sending MainLoopResult[TriviaResponse]. Connect this to your + requests: Mailbox for receiving AgentLoopRequest[TriviaRequest] and + sending AgentLoopResult[TriviaResponse]. Connect this to your message queue (e.g., Redis via TriviaMailboxes). - config: Optional MainLoopConfig with deadline and debug bundle settings. - If None, uses default MainLoop configuration. Set config.deadline + config: Optional AgentLoopConfig with deadline and debug bundle settings. + If None, uses default AgentLoop configuration. Set config.deadline to control maximum execution time per request. workspace_dir: Path to directory containing files to seed into agent workspace. Defaults to DEFAULT_WORKSPACE_DIR (project's workspace/ @@ -273,7 +273,7 @@ def prepare( ) -> tuple[Prompt[TriviaResponse], Session]: """Prepare the prompt and session for processing a trivia request. - Called by the MainLoop for each incoming request. Creates a fresh Session + Called by the AgentLoop for each incoming request. Creates a fresh Session for isolation, builds the complete PromptTemplate with workspace section, binds request parameters, and optionally applies experiment overrides. @@ -406,7 +406,7 @@ def main( Entry point for running the trivia agent as a long-lived worker process. Initializes all dependencies (adapter, mailboxes, loops), then runs both - the MainLoop (for production requests) and EvalLoop (for evaluation + the AgentLoop (for production requests) and EvalLoop (for evaluation requests) concurrently via a LoopGroup. The worker performs these steps: @@ -483,9 +483,9 @@ def main( err.write(f"Failed to connect to Redis: {e}\n") return 1 - # Configure MainLoop with deadline and optional debug bundles + # Configure AgentLoop with deadline and optional debug bundles default_deadline = Deadline(expires_at=datetime.now(UTC) + DEFAULT_DEADLINE_DURATION) - config = MainLoopConfig( + config = AgentLoopConfig( deadline=default_deadline, debug_bundle=( BundleConfig(target=settings.debug_bundles_dir) if settings.debug_bundles_dir else None diff --git a/src/trivia_agent/dispatch.py b/src/trivia_agent/dispatch.py index 05063f8..4b15915 100644 --- a/src/trivia_agent/dispatch.py +++ b/src/trivia_agent/dispatch.py @@ -41,7 +41,7 @@ from redis import Redis from weakincentives import FrozenDataclass from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample -from weakincentives.runtime import MainLoopRequest, MainLoopResult +from weakincentives.runtime import AgentLoopRequest, AgentLoopResult from weakincentives.runtime.mailbox import Mailbox, ReceiptHandleExpiredError from trivia_agent.config import load_redis_settings @@ -100,7 +100,7 @@ class DispatchRuntime: """ mailboxes: TriviaMailboxes | None = None - responses: Mailbox[MainLoopResult[TriviaResponse], None] | None = None + responses: Mailbox[AgentLoopResult[TriviaResponse], None] | None = None eval_results: Mailbox[EvalResult, None] | None = None out: TextIO = field(default_factory=lambda: sys.stdout) err: TextIO = field(default_factory=lambda: sys.stderr) @@ -155,12 +155,12 @@ def _wait_for_eval_result( def _wait_for_response( - responses: Mailbox[MainLoopResult[TriviaResponse], None], + responses: Mailbox[AgentLoopResult[TriviaResponse], None], request_id: str, timeout_seconds: float, wait_time_seconds: int, now: Callable[[], float], -) -> MainLoopResult[TriviaResponse] | None: +) -> AgentLoopResult[TriviaResponse] | None: """Wait for a response matching the request ID. Args: @@ -400,7 +400,7 @@ def main( client.close() # Submit as regular request - main_request = MainLoopRequest(request=request) + main_request = AgentLoopRequest(request=request) if args.no_wait: # Just submit and exit diff --git a/src/trivia_agent/eval_loop.py b/src/trivia_agent/eval_loop.py index d98ce0f..2fae836 100644 --- a/src/trivia_agent/eval_loop.py +++ b/src/trivia_agent/eval_loop.py @@ -3,12 +3,12 @@ This module provides evaluation capabilities for the trivia agent, allowing you to test agent responses against expected secret answers. -The EvalLoop wraps your production MainLoop, ensuring evaluations run +The EvalLoop wraps your production AgentLoop, ensuring evaluations run against your exact agent configuration with no drift or separate test harness. Key features: - Session-aware evaluators for behavioral assertions - - Integration with MainLoop for consistent execution + - Integration with AgentLoop for consistent execution - Collocated evals (same prompts, tools, and config as production) - Debug bundles with eval metadata for tracing @@ -20,7 +20,7 @@ Or programmatically:: from trivia_agent.eval_loop import create_eval_loop - from trivia_agent.worker import TriviaAgentLoop + from trivia_agent.agent_loop import TriviaAgentLoop from trivia_agent.mailboxes import EvalRequestsMailbox loop = TriviaAgentLoop(...) @@ -30,7 +30,7 @@ See Also: - :mod:`trivia_agent.evaluators` for the trivia_evaluator implementation - - :mod:`trivia_agent.worker` for the TriviaAgentLoop being wrapped + - :mod:`trivia_agent.agent_loop` for the TriviaAgentLoop being wrapped """ from __future__ import annotations @@ -40,10 +40,10 @@ from weakincentives.evals import EvalLoop, EvalLoopConfig, SessionEvaluator +from trivia_agent.agent_loop import TriviaAgentLoop from trivia_agent.evaluators import trivia_evaluator from trivia_agent.mailboxes import EvalRequestsMailbox from trivia_agent.models import TriviaRequest, TriviaResponse -from trivia_agent.worker import TriviaAgentLoop def create_eval_loop( @@ -87,7 +87,7 @@ def create_eval_loop( from pathlib import Path from trivia_agent.eval_loop import create_eval_loop - from trivia_agent.worker import TriviaAgentLoop + from trivia_agent.agent_loop import TriviaAgentLoop from trivia_agent.mailboxes import EvalRequestsMailbox # Set up components diff --git a/src/trivia_agent/feedback.py b/src/trivia_agent/feedback.py index 5810248..7ad693d 100644 --- a/src/trivia_agent/feedback.py +++ b/src/trivia_agent/feedback.py @@ -17,12 +17,12 @@ Usage: Import and call `build_feedback_providers()` during agent setup to get - a tuple of configured providers ready for the WINK MainLoop:: + a tuple of configured providers ready for the WINK AgentLoop:: from trivia_agent.feedback import build_feedback_providers providers = build_feedback_providers() - # Pass to MainLoop or session configuration + # Pass to AgentLoop or session configuration See Also: - weakincentives.prompt.DeadlineFeedback: Built-in time awareness provider @@ -160,7 +160,7 @@ def build_feedback_providers() -> tuple[FeedbackProviderConfig, ...]: Returns: tuple[FeedbackProviderConfig, ...]: A tuple of two configured feedback - providers ready to be passed to the WINK MainLoop or session builder. + providers ready to be passed to the WINK AgentLoop or session builder. Example: >>> from trivia_agent.feedback import build_feedback_providers diff --git a/src/trivia_agent/isolation.py b/src/trivia_agent/isolation.py index fadcedb..10bdbeb 100644 --- a/src/trivia_agent/isolation.py +++ b/src/trivia_agent/isolation.py @@ -14,7 +14,7 @@ >>> from trivia_agent.isolation import resolve_isolation_config >>> import os >>> config = resolve_isolation_config(os.environ) - >>> # Use config with your agent's MainLoop or EvalLoop + >>> # Use config with your agent's AgentLoop or EvalLoop """ from __future__ import annotations @@ -135,16 +135,16 @@ def resolve_isolation_config( - TRIVIA_DISABLE_SANDBOX: Set to disable sandbox (e.g., "1" or "true") Returns: - An IsolationConfig ready to pass to MainLoop or EvalLoop. Contains: + An IsolationConfig ready to pass to AgentLoop or EvalLoop. Contains: - sandbox: SandboxConfig with enabled/disabled state - skills: SkillConfig with discovered skills, or None - api_key: The Anthropic API key for authenticated requests Example: >>> import os - >>> from trivia_agent.worker import MainLoop + >>> from trivia_agent.agent_loop import AgentLoop >>> config = resolve_isolation_config(os.environ) - >>> loop = MainLoop(isolation=config, ...) + >>> loop = AgentLoop(isolation=config, ...) """ skills_config = resolve_skills_config(env) api_key = env.get(API_KEY_ENV) diff --git a/src/trivia_agent/mailboxes.py b/src/trivia_agent/mailboxes.py index 60ab07c..ea88cb8 100644 --- a/src/trivia_agent/mailboxes.py +++ b/src/trivia_agent/mailboxes.py @@ -32,7 +32,7 @@ from weakincentives import FrozenDataclass from weakincentives.contrib.mailbox import RedisMailbox from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample -from weakincentives.runtime import MainLoopRequest, MainLoopResult +from weakincentives.runtime import AgentLoopRequest, AgentLoopResult from weakincentives.serde import parse from trivia_agent.config import RedisSettings @@ -66,14 +66,14 @@ def _parse_eval_request(data: Mapping[str, Any]) -> EvalRequest[TriviaRequest, s # Type aliases for the mailboxes RequestsMailbox = RedisMailbox[ - MainLoopRequest[TriviaRequest], - MainLoopResult[TriviaResponse], + AgentLoopRequest[TriviaRequest], + AgentLoopResult[TriviaResponse], ] EvalRequestsMailbox = RedisMailbox[ EvalRequest[TriviaRequest, str], EvalResult, ] -ResponsesMailbox = RedisMailbox[MainLoopResult[TriviaResponse], None] +ResponsesMailbox = RedisMailbox[AgentLoopResult[TriviaResponse], None] EvalResultsMailbox = RedisMailbox[EvalResult, None] @@ -88,7 +88,7 @@ def build_reply_queue_name(prefix: str, request_id: "UUID") -> str: prefix: The base name for reply queues (e.g., "trivia-replies"). Must be non-empty. request_id: The UUID of the request being processed. Typically - obtained from MainLoopRequest.id or EvalRequest.sample.id. + obtained from AgentLoopRequest.id or EvalRequest.sample.id. Returns: A queue name in the format "{prefix}-{request_id}". @@ -117,8 +117,8 @@ class TriviaMailboxes: Attributes: requests: Mailbox for regular trivia questions. Receives - MainLoopRequest[TriviaRequest] messages and allows sending - MainLoopResult[TriviaResponse] replies. + AgentLoopRequest[TriviaRequest] messages and allows sending + AgentLoopResult[TriviaResponse] replies. eval_requests: Mailbox for evaluation runs. Receives EvalRequest[TriviaRequest, str] messages (where str is the expected answer) and allows sending EvalResult replies. @@ -173,8 +173,8 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: settings = RedisSettings() # Loads from environment mailboxes = create_mailboxes(settings) - # Pass to MainLoop - main_loop = MainLoop( + # Pass to AgentLoop + main_loop = AgentLoop( mailboxes=[mailboxes.requests, mailboxes.eval_requests], ... ) @@ -184,7 +184,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: requests: RequestsMailbox = RedisMailbox( name=settings.requests_queue, client=client, - body_type=MainLoopRequest[TriviaRequest], + body_type=AgentLoopRequest[TriviaRequest], ) eval_requests: EvalRequestsMailbox = RedisMailbox( @@ -202,7 +202,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: def create_responses_mailbox( client: Redis, # type: ignore[type-arg] queue_name: str, -) -> "Mailbox[MainLoopResult[TriviaResponse], None]": +) -> "Mailbox[AgentLoopResult[TriviaResponse], None]": """Create a responses mailbox for receiving replies from the worker. Used by dispatch scripts to create a dedicated mailbox for receiving @@ -219,7 +219,7 @@ def create_responses_mailbox( `build_reply_queue_name(prefix, request_id)`. Returns: - A mailbox that yields MainLoopResult[TriviaResponse] messages. + A mailbox that yields AgentLoopResult[TriviaResponse] messages. Iterate over it or call methods like `.get()` to receive responses. Example: @@ -240,11 +240,11 @@ def create_responses_mailbox( print(result.body.answer) """ return cast( - "Mailbox[MainLoopResult[TriviaResponse], None]", + "Mailbox[AgentLoopResult[TriviaResponse], None]", RedisMailbox( name=queue_name, client=client, - body_type=MainLoopResult[TriviaResponse], + body_type=AgentLoopResult[TriviaResponse], ), ) @@ -258,7 +258,7 @@ def create_eval_results_mailbox( Used by eval dispatch scripts to create a dedicated mailbox for receiving the evaluation result for a specific sample. Similar to `create_responses_mailbox()` but typed for EvalResult instead of - MainLoopResult. + AgentLoopResult. The EvalResult contains the evaluation score and any evaluator-specific metadata from the trivia_evaluator. diff --git a/tests/trivia_agent/test_worker.py b/tests/trivia_agent/test_agent_loop.py similarity index 94% rename from tests/trivia_agent/test_worker.py rename to tests/trivia_agent/test_agent_loop.py index 6e1c63c..fb0fb18 100644 --- a/tests/trivia_agent/test_worker.py +++ b/tests/trivia_agent/test_agent_loop.py @@ -1,4 +1,4 @@ -"""Tests for trivia agent worker.""" +"""Tests for trivia agent loop.""" from __future__ import annotations @@ -10,17 +10,17 @@ import fakeredis import pytest -from trivia_agent.config import RedisSettings -from trivia_agent.mailboxes import TriviaMailboxes, create_mailboxes -from trivia_agent.models import TriviaRequest, TriviaResponse -from trivia_agent.sections import QuestionParams, build_question_section -from trivia_agent.worker import ( +from trivia_agent.agent_loop import ( TriviaAgentLoop, TriviaRuntime, create_workspace_section, enumerate_workspace_mounts, main, ) +from trivia_agent.config import RedisSettings +from trivia_agent.mailboxes import TriviaMailboxes, create_mailboxes +from trivia_agent.models import TriviaRequest, TriviaResponse +from trivia_agent.sections import QuestionParams, build_question_section if TYPE_CHECKING: from weakincentives.adapters import ProviderAdapter @@ -310,7 +310,7 @@ def test_api_key_not_required_when_adapter_injected( monkeypatch.setenv("REDIS_URL", "redis://localhost:6379") monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) - with patch("trivia_agent.worker.LoopGroup") as mock_loop_group: + with patch("trivia_agent.agent_loop.LoopGroup") as mock_loop_group: mock_instance = MagicMock() mock_loop_group.return_value = mock_instance result = main(runtime=runtime) @@ -336,7 +336,7 @@ def test_successful_startup( ) # Patch LoopGroup.run to avoid actually running - with patch("trivia_agent.worker.LoopGroup") as mock_loop_group: + with patch("trivia_agent.agent_loop.LoopGroup") as mock_loop_group: mock_instance = MagicMock() mock_loop_group.return_value = mock_instance @@ -363,7 +363,7 @@ def test_creates_real_dependencies_when_not_injected( runtime = TriviaRuntime(out=out, err=err) - with patch("trivia_agent.worker.LoopGroup") as mock_loop_group: + with patch("trivia_agent.agent_loop.LoopGroup") as mock_loop_group: mock_instance = MagicMock() mock_loop_group.return_value = mock_instance @@ -393,8 +393,8 @@ def test_debug_bundle_config_when_dir_set( bundles_dir = tmp_path / "debug_bundles" - with patch("trivia_agent.worker.LoopGroup") as mock_loop_group: - with patch("trivia_agent.worker.TriviaAgentLoop") as mock_qa_loop: + with patch("trivia_agent.agent_loop.LoopGroup") as mock_loop_group: + with patch("trivia_agent.agent_loop.TriviaAgentLoop") as mock_qa_loop: mock_instance = MagicMock() mock_loop_group.return_value = mock_instance mock_qa_loop.return_value = MagicMock() @@ -428,8 +428,8 @@ def test_no_debug_bundle_config_when_dir_not_set( err=err, ) - with patch("trivia_agent.worker.LoopGroup") as mock_loop_group: - with patch("trivia_agent.worker.TriviaAgentLoop") as mock_qa_loop: + with patch("trivia_agent.agent_loop.LoopGroup") as mock_loop_group: + with patch("trivia_agent.agent_loop.TriviaAgentLoop") as mock_qa_loop: mock_instance = MagicMock() mock_loop_group.return_value = mock_instance mock_qa_loop.return_value = MagicMock() @@ -465,8 +465,8 @@ def test_prompt_overrides_store_when_dir_set( overrides_dir = tmp_path / "prompt_overrides" - with patch("trivia_agent.worker.LoopGroup") as mock_loop_group: - with patch("trivia_agent.worker.TriviaAgentLoop") as mock_qa_loop: + with patch("trivia_agent.agent_loop.LoopGroup") as mock_loop_group: + with patch("trivia_agent.agent_loop.TriviaAgentLoop") as mock_qa_loop: mock_instance = MagicMock() mock_loop_group.return_value = mock_instance mock_qa_loop.return_value = MagicMock() @@ -493,7 +493,7 @@ def test_adapter_creation_failure( monkeypatch.setenv("REDIS_URL", "redis://localhost:6379") monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key") - with patch("trivia_agent.worker.create_adapter") as mock_create_adapter: + with patch("trivia_agent.agent_loop.create_adapter") as mock_create_adapter: mock_create_adapter.side_effect = RuntimeError("SDK initialization failed") result = main(runtime=runtime) @@ -514,7 +514,7 @@ def test_mailbox_creation_failure( monkeypatch.setenv("REDIS_URL", "redis://localhost:6379") - with patch("trivia_agent.worker.create_mailboxes") as mock_create_mailboxes: + with patch("trivia_agent.agent_loop.create_mailboxes") as mock_create_mailboxes: mock_create_mailboxes.side_effect = ConnectionError("Connection refused") result = main(runtime=runtime) diff --git a/tests/trivia_agent/test_dispatch.py b/tests/trivia_agent/test_dispatch.py index d8da172..4944366 100644 --- a/tests/trivia_agent/test_dispatch.py +++ b/tests/trivia_agent/test_dispatch.py @@ -9,7 +9,7 @@ import pytest from weakincentives.evals import EvalResult, Score -from weakincentives.runtime import MainLoopResult +from weakincentives.runtime import AgentLoopResult from trivia_agent.dispatch import ( DispatchRuntime, @@ -198,7 +198,7 @@ class TestWaitForResponse: def test_returns_matching_response(self) -> None: """Test that matching response is returned.""" request_id = "12345678-1234-5678-1234-567812345678" - response = MainLoopResult( + response = AgentLoopResult( request_id=UUID(request_id), output=TriviaResponse(answer="42"), error=None, @@ -256,12 +256,12 @@ def test_nacks_non_matching_response(self) -> None: request_id = "12345678-1234-5678-1234-567812345678" wrong_id = "87654321-4321-8765-4321-876543218765" - wrong_response = MainLoopResult( + wrong_response = AgentLoopResult( request_id=UUID(wrong_id), output=TriviaResponse(answer="wrong"), error=None, ) - correct_response = MainLoopResult( + correct_response = AgentLoopResult( request_id=UUID(request_id), output=TriviaResponse(answer="correct"), error=None, @@ -316,7 +316,7 @@ def test_wait_for_response_success( ) mock_response_msg = MagicMock() - mock_response_msg.body = MainLoopResult( + mock_response_msg.body = AgentLoopResult( request_id=UUID("12345678-1234-5678-1234-567812345678"), output=TriviaResponse(answer="4"), error=None, @@ -340,9 +340,9 @@ def now() -> float: now=now, ) - # Patch Redis and MainLoopRequest to control request_id + # Patch Redis and AgentLoopRequest to control request_id with patch("trivia_agent.dispatch.Redis"): - with patch("trivia_agent.dispatch.MainLoopRequest") as mock_request_cls: + with patch("trivia_agent.dispatch.AgentLoopRequest") as mock_request_cls: mock_request = MagicMock() mock_request.request_id = UUID("12345678-1234-5678-1234-567812345678") mock_request.request = MagicMock() @@ -416,7 +416,7 @@ def test_response_with_error( ) mock_response_msg = MagicMock() - mock_response_msg.body = MainLoopResult( + mock_response_msg.body = AgentLoopResult( request_id=UUID("12345678-1234-5678-1234-567812345678"), output=None, error="Agent failed", @@ -441,7 +441,7 @@ def now() -> float: ) with patch("trivia_agent.dispatch.Redis"): - with patch("trivia_agent.dispatch.MainLoopRequest") as mock_request_cls: + with patch("trivia_agent.dispatch.AgentLoopRequest") as mock_request_cls: mock_request = MagicMock() mock_request.request_id = UUID("12345678-1234-5678-1234-567812345678") mock_request.request = MagicMock() @@ -472,7 +472,7 @@ def test_response_with_no_output( ) mock_response_msg = MagicMock() - mock_response_msg.body = MainLoopResult( + mock_response_msg.body = AgentLoopResult( request_id=UUID("12345678-1234-5678-1234-567812345678"), output=None, error=None, @@ -497,7 +497,7 @@ def now() -> float: ) with patch("trivia_agent.dispatch.Redis"): - with patch("trivia_agent.dispatch.MainLoopRequest") as mock_request_cls: + with patch("trivia_agent.dispatch.AgentLoopRequest") as mock_request_cls: mock_request = MagicMock() mock_request.request_id = UUID("12345678-1234-5678-1234-567812345678") mock_request.request = MagicMock() diff --git a/tests/trivia_agent/test_eval_loop.py b/tests/trivia_agent/test_eval_loop.py index 3b5bbcf..3f6b39e 100644 --- a/tests/trivia_agent/test_eval_loop.py +++ b/tests/trivia_agent/test_eval_loop.py @@ -9,11 +9,11 @@ import pytest from weakincentives.evals import EvalLoop +from trivia_agent.agent_loop import TriviaAgentLoop from trivia_agent.config import RedisSettings from trivia_agent.eval_loop import create_eval_loop from trivia_agent.mailboxes import TriviaMailboxes, create_mailboxes from trivia_agent.models import TriviaResponse -from trivia_agent.worker import TriviaAgentLoop if TYPE_CHECKING: from weakincentives.adapters import ProviderAdapter