diff --git a/AGENTS.md b/AGENTS.md index 92e0ad4..d3e4098 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -48,7 +48,7 @@ make dispatch-eval QUESTION="What is the secret number?" EXPECTED="42" ``` src/trivia_agent/ -├── worker.py # MainLoop entry point +├── worker.py # AgentLoop entry point ├── eval_loop.py # EvalLoop factory ├── sections.py # Question, GameRules, Hints, LuckyDice sections ├── tools.py # hint_lookup, pick_up_dice, throw_dice tools diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cc38c3a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,15 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## [Unreleased] + +### Changed + +- Renamed `MainLoop` to `AgentLoop` throughout the codebase for better alignment with mainstream agent terminology. This pairs well with `EvalLoop` naming convention. + - `MainLoop` → `AgentLoop` + - `MainLoopConfig` → `AgentLoopConfig` + - `MainLoopRequest` → `AgentLoopRequest` + - `MainLoopResult` → `AgentLoopResult` diff --git a/README.md b/README.md index 48529d2..6f779f2 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ starter/ │ └── SKILL.md ├── src/ │ └── trivia_agent/ -│ ├── worker.py # MainLoop entry point +│ ├── worker.py # AgentLoop entry point │ ├── eval_loop.py # EvalLoop factory │ ├── dispatch.py # Submit questions to the agent │ ├── models.py # Request/Response dataclasses @@ -326,7 +326,7 @@ make format # Format code ┌────────────┴────────────┐ ▼ ▼ ┌────────────────────────────┐ ┌────────────────────────────┐ -│ MainLoop │ │ EvalLoop │ +│ AgentLoop │ │ EvalLoop │ │ (production requests) │ │ (evaluation samples) │ └─────────────┬──────────────┘ └─────────────┬──────────────┘ │ │ diff --git a/src/trivia_agent/adapters.py b/src/trivia_agent/adapters.py index c8d92b5..169141a 100644 --- a/src/trivia_agent/adapters.py +++ b/src/trivia_agent/adapters.py @@ -13,7 +13,7 @@ Usage: >>> from trivia_agent.adapters import create_adapter >>> adapter = create_adapter() - >>> # Pass adapter to MainLoop.create() or EvalLoop.create() + >>> # Pass adapter to AgentLoop.create() or EvalLoop.create() """ from __future__ import annotations @@ -89,7 +89,7 @@ def create_adapter( Factory function that assembles all components needed to run the trivia agent: model selection, task completion checking, isolation configuration, and working directory setup. The returned adapter is ready to be passed - to a WINK MainLoop or EvalLoop. + to a WINK AgentLoop or EvalLoop. The adapter is configured with: - Model: Claude Sonnet (via the "sonnet" alias) @@ -109,15 +109,15 @@ def create_adapter( Returns: ClaudeAgentSDKAdapter[TriviaResponse]: A fully configured adapter instance typed to produce TriviaResponse structured output. - Pass this adapter to ``MainLoop.create()`` or ``EvalLoop.create()`` + Pass this adapter to ``AgentLoop.create()`` or ``EvalLoop.create()`` to run the trivia agent. Example: >>> from trivia_agent.isolation import create_isolation_config >>> isolation = create_isolation_config() >>> adapter = create_adapter(isolation=isolation, cwd="/path/to/workspace") - >>> # Use adapter with MainLoop - >>> loop = MainLoop.create(adapter=adapter, sections=[...]) + >>> # Use adapter with AgentLoop + >>> loop = AgentLoop.create(adapter=adapter, sections=[...]) """ checker = SimpleTaskCompletionChecker() client_config = ClaudeAgentSDKClientConfig( diff --git a/src/trivia_agent/dispatch.py b/src/trivia_agent/dispatch.py index 05063f8..919f571 100644 --- a/src/trivia_agent/dispatch.py +++ b/src/trivia_agent/dispatch.py @@ -41,7 +41,10 @@ from redis import Redis from weakincentives import FrozenDataclass from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample -from weakincentives.runtime import MainLoopRequest, MainLoopResult +from weakincentives.runtime import ( + MainLoopRequest as AgentLoopRequest, + MainLoopResult as AgentLoopResult, +) from weakincentives.runtime.mailbox import Mailbox, ReceiptHandleExpiredError from trivia_agent.config import load_redis_settings @@ -100,7 +103,7 @@ class DispatchRuntime: """ mailboxes: TriviaMailboxes | None = None - responses: Mailbox[MainLoopResult[TriviaResponse], None] | None = None + responses: Mailbox[AgentLoopResult[TriviaResponse], None] | None = None eval_results: Mailbox[EvalResult, None] | None = None out: TextIO = field(default_factory=lambda: sys.stdout) err: TextIO = field(default_factory=lambda: sys.stderr) @@ -155,12 +158,12 @@ def _wait_for_eval_result( def _wait_for_response( - responses: Mailbox[MainLoopResult[TriviaResponse], None], + responses: Mailbox[AgentLoopResult[TriviaResponse], None], request_id: str, timeout_seconds: float, wait_time_seconds: int, now: Callable[[], float], -) -> MainLoopResult[TriviaResponse] | None: +) -> AgentLoopResult[TriviaResponse] | None: """Wait for a response matching the request ID. Args: @@ -400,7 +403,7 @@ def main( client.close() # Submit as regular request - main_request = MainLoopRequest(request=request) + main_request = AgentLoopRequest(request=request) if args.no_wait: # Just submit and exit diff --git a/src/trivia_agent/eval_loop.py b/src/trivia_agent/eval_loop.py index d98ce0f..02e0ce1 100644 --- a/src/trivia_agent/eval_loop.py +++ b/src/trivia_agent/eval_loop.py @@ -3,12 +3,12 @@ This module provides evaluation capabilities for the trivia agent, allowing you to test agent responses against expected secret answers. -The EvalLoop wraps your production MainLoop, ensuring evaluations run +The EvalLoop wraps your production AgentLoop, ensuring evaluations run against your exact agent configuration with no drift or separate test harness. Key features: - Session-aware evaluators for behavioral assertions - - Integration with MainLoop for consistent execution + - Integration with AgentLoop for consistent execution - Collocated evals (same prompts, tools, and config as production) - Debug bundles with eval metadata for tracing diff --git a/src/trivia_agent/feedback.py b/src/trivia_agent/feedback.py index 5810248..7ad693d 100644 --- a/src/trivia_agent/feedback.py +++ b/src/trivia_agent/feedback.py @@ -17,12 +17,12 @@ Usage: Import and call `build_feedback_providers()` during agent setup to get - a tuple of configured providers ready for the WINK MainLoop:: + a tuple of configured providers ready for the WINK AgentLoop:: from trivia_agent.feedback import build_feedback_providers providers = build_feedback_providers() - # Pass to MainLoop or session configuration + # Pass to AgentLoop or session configuration See Also: - weakincentives.prompt.DeadlineFeedback: Built-in time awareness provider @@ -160,7 +160,7 @@ def build_feedback_providers() -> tuple[FeedbackProviderConfig, ...]: Returns: tuple[FeedbackProviderConfig, ...]: A tuple of two configured feedback - providers ready to be passed to the WINK MainLoop or session builder. + providers ready to be passed to the WINK AgentLoop or session builder. Example: >>> from trivia_agent.feedback import build_feedback_providers diff --git a/src/trivia_agent/isolation.py b/src/trivia_agent/isolation.py index fadcedb..1e87b48 100644 --- a/src/trivia_agent/isolation.py +++ b/src/trivia_agent/isolation.py @@ -14,7 +14,7 @@ >>> from trivia_agent.isolation import resolve_isolation_config >>> import os >>> config = resolve_isolation_config(os.environ) - >>> # Use config with your agent's MainLoop or EvalLoop + >>> # Use config with your agent's AgentLoop or EvalLoop """ from __future__ import annotations @@ -135,16 +135,16 @@ def resolve_isolation_config( - TRIVIA_DISABLE_SANDBOX: Set to disable sandbox (e.g., "1" or "true") Returns: - An IsolationConfig ready to pass to MainLoop or EvalLoop. Contains: + An IsolationConfig ready to pass to AgentLoop or EvalLoop. Contains: - sandbox: SandboxConfig with enabled/disabled state - skills: SkillConfig with discovered skills, or None - api_key: The Anthropic API key for authenticated requests Example: >>> import os - >>> from trivia_agent.worker import MainLoop + >>> from trivia_agent.worker import AgentLoop >>> config = resolve_isolation_config(os.environ) - >>> loop = MainLoop(isolation=config, ...) + >>> loop = AgentLoop(isolation=config, ...) """ skills_config = resolve_skills_config(env) api_key = env.get(API_KEY_ENV) diff --git a/src/trivia_agent/mailboxes.py b/src/trivia_agent/mailboxes.py index 60ab07c..d014b39 100644 --- a/src/trivia_agent/mailboxes.py +++ b/src/trivia_agent/mailboxes.py @@ -32,7 +32,10 @@ from weakincentives import FrozenDataclass from weakincentives.contrib.mailbox import RedisMailbox from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample -from weakincentives.runtime import MainLoopRequest, MainLoopResult +from weakincentives.runtime import ( + MainLoopRequest as AgentLoopRequest, + MainLoopResult as AgentLoopResult, +) from weakincentives.serde import parse from trivia_agent.config import RedisSettings @@ -66,14 +69,14 @@ def _parse_eval_request(data: Mapping[str, Any]) -> EvalRequest[TriviaRequest, s # Type aliases for the mailboxes RequestsMailbox = RedisMailbox[ - MainLoopRequest[TriviaRequest], - MainLoopResult[TriviaResponse], + AgentLoopRequest[TriviaRequest], + AgentLoopResult[TriviaResponse], ] EvalRequestsMailbox = RedisMailbox[ EvalRequest[TriviaRequest, str], EvalResult, ] -ResponsesMailbox = RedisMailbox[MainLoopResult[TriviaResponse], None] +ResponsesMailbox = RedisMailbox[AgentLoopResult[TriviaResponse], None] EvalResultsMailbox = RedisMailbox[EvalResult, None] @@ -88,7 +91,7 @@ def build_reply_queue_name(prefix: str, request_id: "UUID") -> str: prefix: The base name for reply queues (e.g., "trivia-replies"). Must be non-empty. request_id: The UUID of the request being processed. Typically - obtained from MainLoopRequest.id or EvalRequest.sample.id. + obtained from AgentLoopRequest.id or EvalRequest.sample.id. Returns: A queue name in the format "{prefix}-{request_id}". @@ -113,12 +116,12 @@ class TriviaMailboxes: An immutable dataclass that groups together the mailboxes needed by the trivia agent worker. This container is returned by `create_mailboxes()` - and passed to the worker's main loop. + and passed to the worker's agent loop. Attributes: requests: Mailbox for regular trivia questions. Receives - MainLoopRequest[TriviaRequest] messages and allows sending - MainLoopResult[TriviaResponse] replies. + AgentLoopRequest[TriviaRequest] messages and allows sending + AgentLoopResult[TriviaResponse] replies. eval_requests: Mailbox for evaluation runs. Receives EvalRequest[TriviaRequest, str] messages (where str is the expected answer) and allows sending EvalResult replies. @@ -149,7 +152,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: Factory function that initializes the Redis connection and creates both the requests and eval_requests mailboxes. Call this once at - worker startup and pass the result to your main loop. + worker startup and pass the result to your agent loop. The function creates a single Redis client that is shared between both mailboxes for efficient connection pooling. @@ -173,8 +176,8 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: settings = RedisSettings() # Loads from environment mailboxes = create_mailboxes(settings) - # Pass to MainLoop - main_loop = MainLoop( + # Pass to AgentLoop + agent_loop = AgentLoop( mailboxes=[mailboxes.requests, mailboxes.eval_requests], ... ) @@ -184,7 +187,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: requests: RequestsMailbox = RedisMailbox( name=settings.requests_queue, client=client, - body_type=MainLoopRequest[TriviaRequest], + body_type=AgentLoopRequest[TriviaRequest], ) eval_requests: EvalRequestsMailbox = RedisMailbox( @@ -202,7 +205,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes: def create_responses_mailbox( client: Redis, # type: ignore[type-arg] queue_name: str, -) -> "Mailbox[MainLoopResult[TriviaResponse], None]": +) -> "Mailbox[AgentLoopResult[TriviaResponse], None]": """Create a responses mailbox for receiving replies from the worker. Used by dispatch scripts to create a dedicated mailbox for receiving @@ -219,7 +222,7 @@ def create_responses_mailbox( `build_reply_queue_name(prefix, request_id)`. Returns: - A mailbox that yields MainLoopResult[TriviaResponse] messages. + A mailbox that yields AgentLoopResult[TriviaResponse] messages. Iterate over it or call methods like `.get()` to receive responses. Example: @@ -240,11 +243,11 @@ def create_responses_mailbox( print(result.body.answer) """ return cast( - "Mailbox[MainLoopResult[TriviaResponse], None]", + "Mailbox[AgentLoopResult[TriviaResponse], None]", RedisMailbox( name=queue_name, client=client, - body_type=MainLoopResult[TriviaResponse], + body_type=AgentLoopResult[TriviaResponse], ), ) @@ -258,7 +261,7 @@ def create_eval_results_mailbox( Used by eval dispatch scripts to create a dedicated mailbox for receiving the evaluation result for a specific sample. Similar to `create_responses_mailbox()` but typed for EvalResult instead of - MainLoopResult. + AgentLoopResult. The EvalResult contains the evaluation score and any evaluator-specific metadata from the trivia_evaluator. diff --git a/src/trivia_agent/worker.py b/src/trivia_agent/worker.py index 9cb43b2..f20a541 100644 --- a/src/trivia_agent/worker.py +++ b/src/trivia_agent/worker.py @@ -1,7 +1,7 @@ -"""MainLoop + EvalLoop entry point for the trivia agent. +"""AgentLoop + EvalLoop entry point for the trivia agent. This module demonstrates the full WINK architecture: -- MainLoop for production request processing +- AgentLoop for production request processing - EvalLoop for evaluation with session-aware scoring - PromptTemplate with multiple sections - Feedback providers for soft course correction @@ -29,10 +29,10 @@ from weakincentives.prompt.overrides import LocalPromptOverridesStore, PromptOverridesStore from weakincentives.runtime import ( LoopGroup, - MainLoop, - MainLoopConfig, - MainLoopRequest, - MainLoopResult, + MainLoop as AgentLoop, + MainLoopConfig as AgentLoopConfig, + MainLoopRequest as AgentLoopRequest, + MainLoopResult as AgentLoopResult, Session, ) from weakincentives.runtime.logging import configure_logging @@ -190,10 +190,10 @@ def build_prompt_template() -> PromptTemplate[TriviaResponse]: ) -class TriviaAgentLoop(MainLoop[TriviaRequest, TriviaResponse]): +class TriviaAgentLoop(AgentLoop[TriviaRequest, TriviaResponse]): """Main processing loop for the trivia agent. - Extends MainLoop to handle TriviaRequest inputs and produce TriviaResponse + Extends AgentLoop to handle TriviaRequest inputs and produce TriviaResponse outputs. This loop demonstrates key WINK patterns for production agents: - **Per-request preparation**: The prepare() method creates a fresh Session @@ -219,7 +219,7 @@ class TriviaAgentLoop(MainLoop[TriviaRequest, TriviaResponse]): >>> loop = TriviaAgentLoop( ... adapter=adapter, ... requests=mailboxes.requests, - ... config=MainLoopConfig(deadline=my_deadline), + ... config=AgentLoopConfig(deadline=my_deadline), ... workspace_dir=Path("./workspace"), ... ) >>> loop.run() # Process requests until shutdown @@ -232,8 +232,8 @@ def __init__( self, *, adapter: ProviderAdapter[TriviaResponse], - requests: Mailbox[MainLoopRequest[TriviaRequest], MainLoopResult[TriviaResponse]], - config: MainLoopConfig | None = None, + requests: Mailbox[AgentLoopRequest[TriviaRequest], AgentLoopResult[TriviaResponse]], + config: AgentLoopConfig | None = None, workspace_dir: Path | None = None, overrides_store: PromptOverridesStore | None = None, ) -> None: @@ -247,11 +247,11 @@ def __init__( adapter: ProviderAdapter[TriviaResponse] that executes agent sessions. Typically created via create_adapter() with appropriate isolation configuration (skills, sandbox settings, API keys). - requests: Mailbox for receiving MainLoopRequest[TriviaRequest] and - sending MainLoopResult[TriviaResponse]. Connect this to your + requests: Mailbox for receiving AgentLoopRequest[TriviaRequest] and + sending AgentLoopResult[TriviaResponse]. Connect this to your message queue (e.g., Redis via TriviaMailboxes). - config: Optional MainLoopConfig with deadline and debug bundle settings. - If None, uses default MainLoop configuration. Set config.deadline + config: Optional AgentLoopConfig with deadline and debug bundle settings. + If None, uses default AgentLoop configuration. Set config.deadline to control maximum execution time per request. workspace_dir: Path to directory containing files to seed into agent workspace. Defaults to DEFAULT_WORKSPACE_DIR (project's workspace/ @@ -273,7 +273,7 @@ def prepare( ) -> tuple[Prompt[TriviaResponse], Session]: """Prepare the prompt and session for processing a trivia request. - Called by the MainLoop for each incoming request. Creates a fresh Session + Called by the AgentLoop for each incoming request. Creates a fresh Session for isolation, builds the complete PromptTemplate with workspace section, binds request parameters, and optionally applies experiment overrides. @@ -406,7 +406,7 @@ def main( Entry point for running the trivia agent as a long-lived worker process. Initializes all dependencies (adapter, mailboxes, loops), then runs both - the MainLoop (for production requests) and EvalLoop (for evaluation + the AgentLoop (for production requests) and EvalLoop (for evaluation requests) concurrently via a LoopGroup. The worker performs these steps: @@ -483,9 +483,9 @@ def main( err.write(f"Failed to connect to Redis: {e}\n") return 1 - # Configure MainLoop with deadline and optional debug bundles + # Configure AgentLoop with deadline and optional debug bundles default_deadline = Deadline(expires_at=datetime.now(UTC) + DEFAULT_DEADLINE_DURATION) - config = MainLoopConfig( + config = AgentLoopConfig( deadline=default_deadline, debug_bundle=( BundleConfig(target=settings.debug_bundles_dir) if settings.debug_bundles_dir else None @@ -497,7 +497,7 @@ def main( if settings.prompt_overrides_dir: overrides_store = LocalPromptOverridesStore(root_path=settings.prompt_overrides_dir) - # Create the main loop + # Create the agent loop loop = TriviaAgentLoop( adapter=adapter, requests=mailboxes.requests, diff --git a/tests/trivia_agent/test_dispatch.py b/tests/trivia_agent/test_dispatch.py index d8da172..d4b21ba 100644 --- a/tests/trivia_agent/test_dispatch.py +++ b/tests/trivia_agent/test_dispatch.py @@ -9,7 +9,7 @@ import pytest from weakincentives.evals import EvalResult, Score -from weakincentives.runtime import MainLoopResult +from weakincentives.runtime import MainLoopResult as AgentLoopResult from trivia_agent.dispatch import ( DispatchRuntime, @@ -198,7 +198,7 @@ class TestWaitForResponse: def test_returns_matching_response(self) -> None: """Test that matching response is returned.""" request_id = "12345678-1234-5678-1234-567812345678" - response = MainLoopResult( + response = AgentLoopResult( request_id=UUID(request_id), output=TriviaResponse(answer="42"), error=None, @@ -256,12 +256,12 @@ def test_nacks_non_matching_response(self) -> None: request_id = "12345678-1234-5678-1234-567812345678" wrong_id = "87654321-4321-8765-4321-876543218765" - wrong_response = MainLoopResult( + wrong_response = AgentLoopResult( request_id=UUID(wrong_id), output=TriviaResponse(answer="wrong"), error=None, ) - correct_response = MainLoopResult( + correct_response = AgentLoopResult( request_id=UUID(request_id), output=TriviaResponse(answer="correct"), error=None, @@ -316,7 +316,7 @@ def test_wait_for_response_success( ) mock_response_msg = MagicMock() - mock_response_msg.body = MainLoopResult( + mock_response_msg.body = AgentLoopResult( request_id=UUID("12345678-1234-5678-1234-567812345678"), output=TriviaResponse(answer="4"), error=None, @@ -340,9 +340,9 @@ def now() -> float: now=now, ) - # Patch Redis and MainLoopRequest to control request_id + # Patch Redis and AgentLoopRequest to control request_id with patch("trivia_agent.dispatch.Redis"): - with patch("trivia_agent.dispatch.MainLoopRequest") as mock_request_cls: + with patch("trivia_agent.dispatch.AgentLoopRequest") as mock_request_cls: mock_request = MagicMock() mock_request.request_id = UUID("12345678-1234-5678-1234-567812345678") mock_request.request = MagicMock() @@ -416,7 +416,7 @@ def test_response_with_error( ) mock_response_msg = MagicMock() - mock_response_msg.body = MainLoopResult( + mock_response_msg.body = AgentLoopResult( request_id=UUID("12345678-1234-5678-1234-567812345678"), output=None, error="Agent failed", @@ -441,7 +441,7 @@ def now() -> float: ) with patch("trivia_agent.dispatch.Redis"): - with patch("trivia_agent.dispatch.MainLoopRequest") as mock_request_cls: + with patch("trivia_agent.dispatch.AgentLoopRequest") as mock_request_cls: mock_request = MagicMock() mock_request.request_id = UUID("12345678-1234-5678-1234-567812345678") mock_request.request = MagicMock() @@ -472,7 +472,7 @@ def test_response_with_no_output( ) mock_response_msg = MagicMock() - mock_response_msg.body = MainLoopResult( + mock_response_msg.body = AgentLoopResult( request_id=UUID("12345678-1234-5678-1234-567812345678"), output=None, error=None, @@ -497,7 +497,7 @@ def now() -> float: ) with patch("trivia_agent.dispatch.Redis"): - with patch("trivia_agent.dispatch.MainLoopRequest") as mock_request_cls: + with patch("trivia_agent.dispatch.AgentLoopRequest") as mock_request_cls: mock_request = MagicMock() mock_request.request_id = UUID("12345678-1234-5678-1234-567812345678") mock_request.request = MagicMock()