Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ make dispatch-eval QUESTION="What is the secret number?" EXPECTED="42"

```
src/trivia_agent/
├── worker.py # MainLoop entry point
├── agent_loop.py # AgentLoop entry point
├── eval_loop.py # EvalLoop factory
├── sections.py # Question, GameRules, Hints, LuckyDice sections
├── tools.py # hint_lookup, pick_up_dice, throw_dice tools
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ starter/
│ └── SKILL.md
├── src/
│ └── trivia_agent/
│ ├── worker.py # MainLoop entry point
│ ├── agent_loop.py # AgentLoop entry point
│ ├── eval_loop.py # EvalLoop factory
│ ├── dispatch.py # Submit questions to the agent
│ ├── models.py # Request/Response dataclasses
Expand Down Expand Up @@ -323,7 +323,7 @@ make format # Format code
┌────────────┴────────────┐
▼ ▼
┌────────────────────────────┐ ┌────────────────────────────┐
MainLoop │ │ EvalLoop │
AgentLoop │ │ EvalLoop │
│ (production requests) │ │ (evaluation samples) │
└─────────────┬──────────────┘ └─────────────┬──────────────┘
│ │
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies = [
]

[project.scripts]
trivia-agent = "trivia_agent.worker:main"
trivia-agent = "trivia_agent.agent_loop:main"
trivia-dispatch = "trivia_agent.dispatch:main"

[build-system]
Expand Down Expand Up @@ -41,7 +41,7 @@ include = ["src"]
typeCheckingMode = "strict"

[tool.uv.sources]
weakincentives = { git = "https://github.com/weakincentives/weakincentives.git", tag = "v0.23.0" }
weakincentives = { git = "https://github.com/weakincentives/weakincentives.git", tag = "v0.24.0" }

[dependency-groups]
dev = [
Expand All @@ -51,6 +51,6 @@ dev = [
"pytest-timeout",
"pyright",
"ruff",
"weakincentives[wink] @ git+https://github.com/weakincentives/weakincentives.git@v0.23.0",
"weakincentives[wink] @ git+https://github.com/weakincentives/weakincentives.git@v0.24.0",
"pytest-rerunfailures>=16.1",
]
10 changes: 5 additions & 5 deletions src/trivia_agent/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
Usage:
>>> from trivia_agent.adapters import create_adapter
>>> adapter = create_adapter()
>>> # Pass adapter to MainLoop.create() or EvalLoop.create()
>>> # Pass adapter to AgentLoop.create() or EvalLoop.create()
"""

from __future__ import annotations
Expand Down Expand Up @@ -89,7 +89,7 @@ def create_adapter(
Factory function that assembles all components needed to run the trivia
agent: model selection, task completion checking, isolation configuration,
and working directory setup. The returned adapter is ready to be passed
to a WINK MainLoop or EvalLoop.
to a WINK AgentLoop or EvalLoop.

The adapter is configured with:
- Model: Claude Sonnet (via the "sonnet" alias)
Expand All @@ -109,15 +109,15 @@ def create_adapter(
Returns:
ClaudeAgentSDKAdapter[TriviaResponse]: A fully configured adapter
instance typed to produce TriviaResponse structured output.
Pass this adapter to ``MainLoop.create()`` or ``EvalLoop.create()``
Pass this adapter to ``AgentLoop.create()`` or ``EvalLoop.create()``
to run the trivia agent.

Example:
>>> from trivia_agent.isolation import create_isolation_config
>>> isolation = create_isolation_config()
>>> adapter = create_adapter(isolation=isolation, cwd="/path/to/workspace")
>>> # Use adapter with MainLoop
>>> loop = MainLoop.create(adapter=adapter, sections=[...])
>>> # Use adapter with AgentLoop
>>> loop = AgentLoop.create(adapter=adapter, sections=[...])
"""
checker = SimpleTaskCompletionChecker()
client_config = ClaudeAgentSDKClientConfig(
Expand Down
38 changes: 19 additions & 19 deletions src/trivia_agent/worker.py → src/trivia_agent/agent_loop.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""MainLoop + EvalLoop entry point for the trivia agent.
"""AgentLoop + EvalLoop entry point for the trivia agent.

This module demonstrates the full WINK architecture:
- MainLoop for production request processing
- AgentLoop for production request processing
- EvalLoop for evaluation with session-aware scoring
- PromptTemplate with multiple sections
- Feedback providers for soft course correction
Expand All @@ -28,11 +28,11 @@
from weakincentives.prompt import PromptTemplate
from weakincentives.prompt.overrides import LocalPromptOverridesStore, PromptOverridesStore
from weakincentives.runtime import (
AgentLoop,
AgentLoopConfig,
AgentLoopRequest,
AgentLoopResult,
LoopGroup,
MainLoop,
MainLoopConfig,
MainLoopRequest,
MainLoopResult,
Session,
)
from weakincentives.runtime.logging import configure_logging
Expand Down Expand Up @@ -190,10 +190,10 @@ def build_prompt_template() -> PromptTemplate[TriviaResponse]:
)


class TriviaAgentLoop(MainLoop[TriviaRequest, TriviaResponse]):
class TriviaAgentLoop(AgentLoop[TriviaRequest, TriviaResponse]):
"""Main processing loop for the trivia agent.

Extends MainLoop to handle TriviaRequest inputs and produce TriviaResponse
Extends AgentLoop to handle TriviaRequest inputs and produce TriviaResponse
outputs. This loop demonstrates key WINK patterns for production agents:

- **Per-request preparation**: The prepare() method creates a fresh Session
Expand All @@ -219,7 +219,7 @@ class TriviaAgentLoop(MainLoop[TriviaRequest, TriviaResponse]):
>>> loop = TriviaAgentLoop(
... adapter=adapter,
... requests=mailboxes.requests,
... config=MainLoopConfig(deadline=my_deadline),
... config=AgentLoopConfig(deadline=my_deadline),
... workspace_dir=Path("./workspace"),
... )
>>> loop.run() # Process requests until shutdown
Expand All @@ -232,8 +232,8 @@ def __init__(
self,
*,
adapter: ProviderAdapter[TriviaResponse],
requests: Mailbox[MainLoopRequest[TriviaRequest], MainLoopResult[TriviaResponse]],
config: MainLoopConfig | None = None,
requests: Mailbox[AgentLoopRequest[TriviaRequest], AgentLoopResult[TriviaResponse]],
config: AgentLoopConfig | None = None,
workspace_dir: Path | None = None,
overrides_store: PromptOverridesStore | None = None,
) -> None:
Expand All @@ -247,11 +247,11 @@ def __init__(
adapter: ProviderAdapter[TriviaResponse] that executes agent sessions.
Typically created via create_adapter() with appropriate isolation
configuration (skills, sandbox settings, API keys).
requests: Mailbox for receiving MainLoopRequest[TriviaRequest] and
sending MainLoopResult[TriviaResponse]. Connect this to your
requests: Mailbox for receiving AgentLoopRequest[TriviaRequest] and
sending AgentLoopResult[TriviaResponse]. Connect this to your
message queue (e.g., Redis via TriviaMailboxes).
config: Optional MainLoopConfig with deadline and debug bundle settings.
If None, uses default MainLoop configuration. Set config.deadline
config: Optional AgentLoopConfig with deadline and debug bundle settings.
If None, uses default AgentLoop configuration. Set config.deadline
to control maximum execution time per request.
workspace_dir: Path to directory containing files to seed into agent
workspace. Defaults to DEFAULT_WORKSPACE_DIR (project's workspace/
Expand All @@ -273,7 +273,7 @@ def prepare(
) -> tuple[Prompt[TriviaResponse], Session]:
"""Prepare the prompt and session for processing a trivia request.

Called by the MainLoop for each incoming request. Creates a fresh Session
Called by the AgentLoop for each incoming request. Creates a fresh Session
for isolation, builds the complete PromptTemplate with workspace section,
binds request parameters, and optionally applies experiment overrides.

Expand Down Expand Up @@ -406,7 +406,7 @@ def main(

Entry point for running the trivia agent as a long-lived worker process.
Initializes all dependencies (adapter, mailboxes, loops), then runs both
the MainLoop (for production requests) and EvalLoop (for evaluation
the AgentLoop (for production requests) and EvalLoop (for evaluation
requests) concurrently via a LoopGroup.

The worker performs these steps:
Expand Down Expand Up @@ -483,9 +483,9 @@ def main(
err.write(f"Failed to connect to Redis: {e}\n")
return 1

# Configure MainLoop with deadline and optional debug bundles
# Configure AgentLoop with deadline and optional debug bundles
default_deadline = Deadline(expires_at=datetime.now(UTC) + DEFAULT_DEADLINE_DURATION)
config = MainLoopConfig(
config = AgentLoopConfig(
deadline=default_deadline,
debug_bundle=(
BundleConfig(target=settings.debug_bundles_dir) if settings.debug_bundles_dir else None
Expand Down
10 changes: 5 additions & 5 deletions src/trivia_agent/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from redis import Redis
from weakincentives import FrozenDataclass
from weakincentives.evals import EvalRequest, EvalResult, Experiment, Sample
from weakincentives.runtime import MainLoopRequest, MainLoopResult
from weakincentives.runtime import AgentLoopRequest, AgentLoopResult
from weakincentives.runtime.mailbox import Mailbox, ReceiptHandleExpiredError

from trivia_agent.config import load_redis_settings
Expand Down Expand Up @@ -100,7 +100,7 @@ class DispatchRuntime:
"""

mailboxes: TriviaMailboxes | None = None
responses: Mailbox[MainLoopResult[TriviaResponse], None] | None = None
responses: Mailbox[AgentLoopResult[TriviaResponse], None] | None = None
eval_results: Mailbox[EvalResult, None] | None = None
out: TextIO = field(default_factory=lambda: sys.stdout)
err: TextIO = field(default_factory=lambda: sys.stderr)
Expand Down Expand Up @@ -155,12 +155,12 @@ def _wait_for_eval_result(


def _wait_for_response(
responses: Mailbox[MainLoopResult[TriviaResponse], None],
responses: Mailbox[AgentLoopResult[TriviaResponse], None],
request_id: str,
timeout_seconds: float,
wait_time_seconds: int,
now: Callable[[], float],
) -> MainLoopResult[TriviaResponse] | None:
) -> AgentLoopResult[TriviaResponse] | None:
"""Wait for a response matching the request ID.

Args:
Expand Down Expand Up @@ -400,7 +400,7 @@ def main(
client.close()

# Submit as regular request
main_request = MainLoopRequest(request=request)
main_request = AgentLoopRequest(request=request)

if args.no_wait:
# Just submit and exit
Expand Down
12 changes: 6 additions & 6 deletions src/trivia_agent/eval_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
This module provides evaluation capabilities for the trivia agent,
allowing you to test agent responses against expected secret answers.

The EvalLoop wraps your production MainLoop, ensuring evaluations run
The EvalLoop wraps your production AgentLoop, ensuring evaluations run
against your exact agent configuration with no drift or separate test harness.

Key features:
- Session-aware evaluators for behavioral assertions
- Integration with MainLoop for consistent execution
- Integration with AgentLoop for consistent execution
- Collocated evals (same prompts, tools, and config as production)
- Debug bundles with eval metadata for tracing

Expand All @@ -20,7 +20,7 @@
Or programmatically::

from trivia_agent.eval_loop import create_eval_loop
from trivia_agent.worker import TriviaAgentLoop
from trivia_agent.agent_loop import TriviaAgentLoop
from trivia_agent.mailboxes import EvalRequestsMailbox

loop = TriviaAgentLoop(...)
Expand All @@ -30,7 +30,7 @@

See Also:
- :mod:`trivia_agent.evaluators` for the trivia_evaluator implementation
- :mod:`trivia_agent.worker` for the TriviaAgentLoop being wrapped
- :mod:`trivia_agent.agent_loop` for the TriviaAgentLoop being wrapped
"""

from __future__ import annotations
Expand All @@ -40,10 +40,10 @@

from weakincentives.evals import EvalLoop, EvalLoopConfig, SessionEvaluator

from trivia_agent.agent_loop import TriviaAgentLoop
from trivia_agent.evaluators import trivia_evaluator
from trivia_agent.mailboxes import EvalRequestsMailbox
from trivia_agent.models import TriviaRequest, TriviaResponse
from trivia_agent.worker import TriviaAgentLoop


def create_eval_loop(
Expand Down Expand Up @@ -87,7 +87,7 @@ def create_eval_loop(

from pathlib import Path
from trivia_agent.eval_loop import create_eval_loop
from trivia_agent.worker import TriviaAgentLoop
from trivia_agent.agent_loop import TriviaAgentLoop
from trivia_agent.mailboxes import EvalRequestsMailbox

# Set up components
Expand Down
6 changes: 3 additions & 3 deletions src/trivia_agent/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

Usage:
Import and call `build_feedback_providers()` during agent setup to get
a tuple of configured providers ready for the WINK MainLoop::
a tuple of configured providers ready for the WINK AgentLoop::

from trivia_agent.feedback import build_feedback_providers

providers = build_feedback_providers()
# Pass to MainLoop or session configuration
# Pass to AgentLoop or session configuration

See Also:
- weakincentives.prompt.DeadlineFeedback: Built-in time awareness provider
Expand Down Expand Up @@ -160,7 +160,7 @@ def build_feedback_providers() -> tuple[FeedbackProviderConfig, ...]:

Returns:
tuple[FeedbackProviderConfig, ...]: A tuple of two configured feedback
providers ready to be passed to the WINK MainLoop or session builder.
providers ready to be passed to the WINK AgentLoop or session builder.

Example:
>>> from trivia_agent.feedback import build_feedback_providers
Expand Down
8 changes: 4 additions & 4 deletions src/trivia_agent/isolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
>>> from trivia_agent.isolation import resolve_isolation_config
>>> import os
>>> config = resolve_isolation_config(os.environ)
>>> # Use config with your agent's MainLoop or EvalLoop
>>> # Use config with your agent's AgentLoop or EvalLoop
"""

from __future__ import annotations
Expand Down Expand Up @@ -135,16 +135,16 @@ def resolve_isolation_config(
- TRIVIA_DISABLE_SANDBOX: Set to disable sandbox (e.g., "1" or "true")

Returns:
An IsolationConfig ready to pass to MainLoop or EvalLoop. Contains:
An IsolationConfig ready to pass to AgentLoop or EvalLoop. Contains:
- sandbox: SandboxConfig with enabled/disabled state
- skills: SkillConfig with discovered skills, or None
- api_key: The Anthropic API key for authenticated requests

Example:
>>> import os
>>> from trivia_agent.worker import MainLoop
>>> from trivia_agent.agent_loop import AgentLoop
>>> config = resolve_isolation_config(os.environ)
>>> loop = MainLoop(isolation=config, ...)
>>> loop = AgentLoop(isolation=config, ...)
"""
skills_config = resolve_skills_config(env)
api_key = env.get(API_KEY_ENV)
Expand Down
Loading