Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ make dispatch-eval QUESTION="What is the secret number?" EXPECTED="42"

```
src/trivia_agent/
├── worker.py # MainLoop entry point
├── worker.py # AgentLoop entry point
├── eval_loop.py # EvalLoop factory
├── sections.py # Question, GameRules, Hints, LuckyDice sections
├── tools.py # hint_lookup, pick_up_dice, throw_dice tools
Expand Down
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Changelog

All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [Unreleased]

### Changed

- Renamed `MainLoop` to `AgentLoop` throughout the codebase for better alignment with mainstream agent terminology. This pairs well with `EvalLoop` naming convention.
- `MainLoop` → `AgentLoop`
- `MainLoopConfig` → `AgentLoopConfig`
- `MainLoopRequest` → `AgentLoopRequest`
- `MainLoopResult` → `AgentLoopResult`
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ starter/
│ └── SKILL.md
├── src/
│ └── trivia_agent/
│ ├── worker.py # MainLoop entry point
│ ├── worker.py # AgentLoop entry point
│ ├── eval_loop.py # EvalLoop factory
│ ├── dispatch.py # Submit questions to the agent
│ ├── models.py # Request/Response dataclasses
Expand Down Expand Up @@ -326,7 +326,7 @@ make format # Format code
┌────────────┴────────────┐
▼ ▼
┌────────────────────────────┐ ┌────────────────────────────┐
MainLoop │ │ EvalLoop │
AgentLoop │ │ EvalLoop │
│ (production requests) │ │ (evaluation samples) │
└─────────────┬──────────────┘ └─────────────┬──────────────┘
│ │
Expand Down
10 changes: 5 additions & 5 deletions src/trivia_agent/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
Usage:
>>> from trivia_agent.adapters import create_adapter
>>> adapter = create_adapter()
>>> # Pass adapter to MainLoop.create() or EvalLoop.create()
>>> # Pass adapter to AgentLoop.create() or EvalLoop.create()
"""

from __future__ import annotations
Expand Down Expand Up @@ -89,7 +89,7 @@ def create_adapter(
Factory function that assembles all components needed to run the trivia
agent: model selection, task completion checking, isolation configuration,
and working directory setup. The returned adapter is ready to be passed
to a WINK MainLoop or EvalLoop.
to a WINK AgentLoop or EvalLoop.

The adapter is configured with:
- Model: Claude Sonnet (via the "sonnet" alias)
Expand All @@ -109,15 +109,15 @@ def create_adapter(
Returns:
ClaudeAgentSDKAdapter[TriviaResponse]: A fully configured adapter
instance typed to produce TriviaResponse structured output.
Pass this adapter to ``MainLoop.create()`` or ``EvalLoop.create()``
Pass this adapter to ``AgentLoop.create()`` or ``EvalLoop.create()``
to run the trivia agent.

Example:
>>> from trivia_agent.isolation import create_isolation_config
>>> isolation = create_isolation_config()
>>> adapter = create_adapter(isolation=isolation, cwd="/path/to/workspace")
>>> # Use adapter with MainLoop
>>> loop = MainLoop.create(adapter=adapter, sections=[...])
>>> # Use adapter with AgentLoop
>>> loop = AgentLoop.create(adapter=adapter, sections=[...])
"""
checker = SimpleTaskCompletionChecker()
client_config = ClaudeAgentSDKClientConfig(
Expand Down
13 changes: 8 additions & 5 deletions src/trivia_agent/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@
from redis import Redis
from weakincentives import FrozenDataclass
from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample
from weakincentives.runtime import MainLoopRequest, MainLoopResult
from weakincentives.runtime import (
MainLoopRequest as AgentLoopRequest,
MainLoopResult as AgentLoopResult,
)
from weakincentives.runtime.mailbox import Mailbox, ReceiptHandleExpiredError

from trivia_agent.config import load_redis_settings
Expand Down Expand Up @@ -100,7 +103,7 @@ class DispatchRuntime:
"""

mailboxes: TriviaMailboxes | None = None
responses: Mailbox[MainLoopResult[TriviaResponse], None] | None = None
responses: Mailbox[AgentLoopResult[TriviaResponse], None] | None = None
eval_results: Mailbox[EvalResult, None] | None = None
out: TextIO = field(default_factory=lambda: sys.stdout)
err: TextIO = field(default_factory=lambda: sys.stderr)
Expand Down Expand Up @@ -155,12 +158,12 @@ def _wait_for_eval_result(


def _wait_for_response(
responses: Mailbox[MainLoopResult[TriviaResponse], None],
responses: Mailbox[AgentLoopResult[TriviaResponse], None],
request_id: str,
timeout_seconds: float,
wait_time_seconds: int,
now: Callable[[], float],
) -> MainLoopResult[TriviaResponse] | None:
) -> AgentLoopResult[TriviaResponse] | None:
"""Wait for a response matching the request ID.

Args:
Expand Down Expand Up @@ -400,7 +403,7 @@ def main(
client.close()

# Submit as regular request
main_request = MainLoopRequest(request=request)
main_request = AgentLoopRequest(request=request)

if args.no_wait:
# Just submit and exit
Expand Down
4 changes: 2 additions & 2 deletions src/trivia_agent/eval_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
This module provides evaluation capabilities for the trivia agent,
allowing you to test agent responses against expected secret answers.

The EvalLoop wraps your production MainLoop, ensuring evaluations run
The EvalLoop wraps your production AgentLoop, ensuring evaluations run
against your exact agent configuration with no drift or separate test harness.

Key features:
- Session-aware evaluators for behavioral assertions
- Integration with MainLoop for consistent execution
- Integration with AgentLoop for consistent execution
- Collocated evals (same prompts, tools, and config as production)
- Debug bundles with eval metadata for tracing

Expand Down
6 changes: 3 additions & 3 deletions src/trivia_agent/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

Usage:
Import and call `build_feedback_providers()` during agent setup to get
a tuple of configured providers ready for the WINK MainLoop::
a tuple of configured providers ready for the WINK AgentLoop::

from trivia_agent.feedback import build_feedback_providers

providers = build_feedback_providers()
# Pass to MainLoop or session configuration
# Pass to AgentLoop or session configuration

See Also:
- weakincentives.prompt.DeadlineFeedback: Built-in time awareness provider
Expand Down Expand Up @@ -160,7 +160,7 @@ def build_feedback_providers() -> tuple[FeedbackProviderConfig, ...]:

Returns:
tuple[FeedbackProviderConfig, ...]: A tuple of two configured feedback
providers ready to be passed to the WINK MainLoop or session builder.
providers ready to be passed to the WINK AgentLoop or session builder.

Example:
>>> from trivia_agent.feedback import build_feedback_providers
Expand Down
8 changes: 4 additions & 4 deletions src/trivia_agent/isolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
>>> from trivia_agent.isolation import resolve_isolation_config
>>> import os
>>> config = resolve_isolation_config(os.environ)
>>> # Use config with your agent's MainLoop or EvalLoop
>>> # Use config with your agent's AgentLoop or EvalLoop
"""

from __future__ import annotations
Expand Down Expand Up @@ -135,16 +135,16 @@ def resolve_isolation_config(
- TRIVIA_DISABLE_SANDBOX: Set to disable sandbox (e.g., "1" or "true")

Returns:
An IsolationConfig ready to pass to MainLoop or EvalLoop. Contains:
An IsolationConfig ready to pass to AgentLoop or EvalLoop. Contains:
- sandbox: SandboxConfig with enabled/disabled state
- skills: SkillConfig with discovered skills, or None
- api_key: The Anthropic API key for authenticated requests

Example:
>>> import os
>>> from trivia_agent.worker import MainLoop
>>> from trivia_agent.worker import AgentLoop
>>> config = resolve_isolation_config(os.environ)
>>> loop = MainLoop(isolation=config, ...)
>>> loop = AgentLoop(isolation=config, ...)
"""
skills_config = resolve_skills_config(env)
api_key = env.get(API_KEY_ENV)
Expand Down
37 changes: 20 additions & 17 deletions src/trivia_agent/mailboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
from weakincentives import FrozenDataclass
from weakincentives.contrib.mailbox import RedisMailbox
from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample
from weakincentives.runtime import MainLoopRequest, MainLoopResult
from weakincentives.runtime import (
MainLoopRequest as AgentLoopRequest,
MainLoopResult as AgentLoopResult,
)
from weakincentives.serde import parse

from trivia_agent.config import RedisSettings
Expand Down Expand Up @@ -66,14 +69,14 @@ def _parse_eval_request(data: Mapping[str, Any]) -> EvalRequest[TriviaRequest, s

# Type aliases for the mailboxes
RequestsMailbox = RedisMailbox[
MainLoopRequest[TriviaRequest],
MainLoopResult[TriviaResponse],
AgentLoopRequest[TriviaRequest],
AgentLoopResult[TriviaResponse],
]
EvalRequestsMailbox = RedisMailbox[
EvalRequest[TriviaRequest, str],
EvalResult,
]
ResponsesMailbox = RedisMailbox[MainLoopResult[TriviaResponse], None]
ResponsesMailbox = RedisMailbox[AgentLoopResult[TriviaResponse], None]
EvalResultsMailbox = RedisMailbox[EvalResult, None]


Expand All @@ -88,7 +91,7 @@ def build_reply_queue_name(prefix: str, request_id: "UUID") -> str:
prefix: The base name for reply queues (e.g., "trivia-replies").
Must be non-empty.
request_id: The UUID of the request being processed. Typically
obtained from MainLoopRequest.id or EvalRequest.sample.id.
obtained from AgentLoopRequest.id or EvalRequest.sample.id.

Returns:
A queue name in the format "{prefix}-{request_id}".
Expand All @@ -113,12 +116,12 @@ class TriviaMailboxes:

An immutable dataclass that groups together the mailboxes needed by the
trivia agent worker. This container is returned by `create_mailboxes()`
and passed to the worker's main loop.
and passed to the worker's agent loop.

Attributes:
requests: Mailbox for regular trivia questions. Receives
MainLoopRequest[TriviaRequest] messages and allows sending
MainLoopResult[TriviaResponse] replies.
AgentLoopRequest[TriviaRequest] messages and allows sending
AgentLoopResult[TriviaResponse] replies.
eval_requests: Mailbox for evaluation runs. Receives
EvalRequest[TriviaRequest, str] messages (where str is the
expected answer) and allows sending EvalResult replies.
Expand Down Expand Up @@ -149,7 +152,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes:

Factory function that initializes the Redis connection and creates
both the requests and eval_requests mailboxes. Call this once at
worker startup and pass the result to your main loop.
worker startup and pass the result to your agent loop.

The function creates a single Redis client that is shared between
both mailboxes for efficient connection pooling.
Expand All @@ -173,8 +176,8 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes:
settings = RedisSettings() # Loads from environment
mailboxes = create_mailboxes(settings)

# Pass to MainLoop
main_loop = MainLoop(
# Pass to AgentLoop
agent_loop = AgentLoop(
mailboxes=[mailboxes.requests, mailboxes.eval_requests],
...
)
Expand All @@ -184,7 +187,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes:
requests: RequestsMailbox = RedisMailbox(
name=settings.requests_queue,
client=client,
body_type=MainLoopRequest[TriviaRequest],
body_type=AgentLoopRequest[TriviaRequest],
)

eval_requests: EvalRequestsMailbox = RedisMailbox(
Expand All @@ -202,7 +205,7 @@ def create_mailboxes(settings: RedisSettings) -> TriviaMailboxes:
def create_responses_mailbox(
client: Redis, # type: ignore[type-arg]
queue_name: str,
) -> "Mailbox[MainLoopResult[TriviaResponse], None]":
) -> "Mailbox[AgentLoopResult[TriviaResponse], None]":
"""Create a responses mailbox for receiving replies from the worker.

Used by dispatch scripts to create a dedicated mailbox for receiving
Expand All @@ -219,7 +222,7 @@ def create_responses_mailbox(
`build_reply_queue_name(prefix, request_id)`.

Returns:
A mailbox that yields MainLoopResult[TriviaResponse] messages.
A mailbox that yields AgentLoopResult[TriviaResponse] messages.
Iterate over it or call methods like `.get()` to receive responses.

Example:
Expand All @@ -240,11 +243,11 @@ def create_responses_mailbox(
print(result.body.answer)
"""
return cast(
"Mailbox[MainLoopResult[TriviaResponse], None]",
"Mailbox[AgentLoopResult[TriviaResponse], None]",
RedisMailbox(
name=queue_name,
client=client,
body_type=MainLoopResult[TriviaResponse],
body_type=AgentLoopResult[TriviaResponse],
),
)

Expand All @@ -258,7 +261,7 @@ def create_eval_results_mailbox(
Used by eval dispatch scripts to create a dedicated mailbox for receiving
the evaluation result for a specific sample. Similar to
`create_responses_mailbox()` but typed for EvalResult instead of
MainLoopResult.
AgentLoopResult.

The EvalResult contains the evaluation score and any evaluator-specific
metadata from the trivia_evaluator.
Expand Down
Loading