From 21702ba8606c43faa4e76cdbb79a2368ea368b13 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Thu, 4 Dec 2025 20:47:32 -0500
Subject: [PATCH 01/13] Add session_prefix output to process-input script

---
 .github/scripts/javascript/process-input.cjs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/scripts/javascript/process-input.cjs b/.github/scripts/javascript/process-input.cjs
index cf0965fa..562615f0 100644
--- a/.github/scripts/javascript/process-input.cjs
+++ b/.github/scripts/javascript/process-input.cjs
@@ -99,6 +99,7 @@ module.exports = async (context, github, core, inputs) => {
 
     core.setOutput('branch_name', branchName);
     core.setOutput('session_id', sessionId);
+    core.setOutput('session_prefix', process.env.GITHUB_REPOSITORY);
     core.setOutput('system_prompt', systemPrompt);
     core.setOutput('prompt', prompt);
 

From 42dec5cbc710be97a6b9f6ae868032ee36f15e11 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Thu, 4 Dec 2025 20:50:51 -0500
Subject: [PATCH 02/13] Use GITHUB_REPOSITORY as S3 prefix

---
 .github/scripts/python/agent_runner.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/python/agent_runner.py b/.github/scripts/python/agent_runner.py
index 425d2607..776695e1 100644
--- a/.github/scripts/python/agent_runner.py
+++ b/.github/scripts/python/agent_runner.py
@@ -109,13 +109,14 @@ def run_agent(query: str):
         system_prompt = os.getenv("INPUT_SYSTEM_PROMPT", DEFAULT_SYSTEM_PROMPT)
         session_id = os.getenv("SESSION_ID")
         s3_bucket = os.getenv("S3_SESSION_BUCKET")
+        s3_prefix = os.getenv("GITHUB_REPOSITORY", "")
 
         if s3_bucket and session_id:
             print(f"🤖 Using session manager with session ID: {session_id}")
             session_manager = S3SessionManager(
                 session_id=session_id,
                 bucket=s3_bucket,
-                prefix="",
+                prefix=s3_prefix,
             )
         else:
             raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set")
@@ -160,4 +161,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From 5db4716be85bd23c968543a2ecb1ef78408c52d4 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Thu, 4 Dec 2025 21:04:19 -0500
Subject: [PATCH 03/13] Update config.yml

---
 .github/ISSUE_TEMPLATE/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 41fbad21..20daf959 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,4 +1,4 @@
-blank_issues_enabled: false
+blank_issues_enabled: true
 contact_links:
   - name: Strands Agents SDK Support
     url: https://github.com/strands-agents/sdk-typescript/discussions

From 57bef6f4dc5cc4bae5075cac48d9167c0045e1fb Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 09:36:09 -0500
Subject: [PATCH 04/13] Add PR guidelines for agents and collaborators

---
 AGENTS.md       | 135 +++++++++++++++++++++++----------
 CONTRIBUTING.md |  16 ++--
 docs/PR.md      | 195 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 300 insertions(+), 46 deletions(-)
 create mode 100644 docs/PR.md

diff --git a/AGENTS.md b/AGENTS.md
index 2772a65c..f77d27f9 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -5,6 +5,7 @@ This document provides guidance specifically for AI agents working on the Strand
 ## Purpose and Scope
 
 **AGENTS.md** contains agent-specific repository information including:
+
 - Directory structure with summaries of what is included in each directory
 - Development workflow instructions for agents to follow when developing features
 - Coding patterns and testing patterns to follow when writing code
@@ -151,6 +152,7 @@ sdk-typescript/
 ### 1. Environment Setup
 
 See [CONTRIBUTING.md - Development Environment](CONTRIBUTING.md#development-environment) for:
+
 - Prerequisites (Node.js 20+, npm)
 - Installation steps
 - Verification commands
@@ -162,10 +164,24 @@ See [CONTRIBUTING.md - Development Environment](CONTRIBUTING.md#development-envi
 3. **Run quality checks** before committing (pre-commit hooks will run automatically)
 4. **Commit with conventional commits**: `feat:`, `fix:`, `refactor:`, `docs:`, etc.
 5. **Push to remote**: `git push origin agent-tasks/{ISSUE_NUMBER}`
+6. **Create pull request** following [PR.md](docs/PR.md) guidelines
+
+### 3. Pull Request Guidelines
+
+When creating pull requests, you **MUST** follow the guidelines in [PR.md](PR.md). Key principles:
 
-### 3. Quality Gates
+- **Focus on WHY**: Explain motivation and user impact, not implementation details
+- **Document public API changes**: Show before/after code examples
+- **Be concise**: Use prose over bullet lists; avoid exhaustive checklists
+- **Target senior engineers**: Assume familiarity with the SDK
+- **Exclude implementation details**: Leave these to code comments and diffs
+
+See [PR.md](PR.md) for the complete RFC-style guidance and template.
+
+### 4. Quality Gates
 
 Pre-commit hooks automatically run:
+
 - Unit tests (via npm test)
 - Linting (via npm run lint)
 - Format checking (via npm run format:check)
@@ -180,6 +196,7 @@ All checks must pass before commit is allowed.
 The SDK uses a structured logging format consistent with the Python SDK for better log parsing and searchability.
 
 **Format**:
+
 ```typescript
 // With context fields
 logger.warn(`field1=<${value1}>, field2=<${value2}> | human readable message`)
@@ -242,6 +259,7 @@ import { something } from 'external-package'
 ### File Organization Pattern
 
 **For source files**:
+
 ```
 src/
 ├── module.ts              # Source file
@@ -250,12 +268,14 @@ src/
 ```
 
 **Function ordering within files**:
+
 - Functions MUST be ordered from most general to most specific (top-down reading)
 - Public/exported functions MUST appear before private helper functions
 - Main entry point functions MUST be at the top of the file
 - Helper functions SHOULD follow in order of their usage
 
 **Example**:
+
 ```typescript
 // ✅ Good: Main function first, helpers follow
 export async function* mainFunction() {
@@ -283,6 +303,7 @@ export async function* mainFunction() {
 ```
 
 **For integration tests**:
+
 ```
 tests_integ/
 └── feature.test.ts        # Tests public API
@@ -293,6 +314,7 @@ tests_integ/
 Follow this nested describe pattern for consistency:
 
 **For functions**:
+
 ```typescript
 import { describe, it, expect } from 'vitest'
 import { functionName } from '../module'
@@ -315,6 +337,7 @@ describe('functionName', () => {
 ```
 
 **For classes**:
+
 ```typescript
 import { describe, it, expect } from 'vitest'
 import { ClassName } from '../module'
@@ -342,6 +365,7 @@ describe('ClassName', () => {
 ```
 
 **Key principles**:
+
 - Top-level `describe` uses the function/class name
 - Nested `describe` blocks group related test scenarios
 - Use descriptive test names without "should" prefix
@@ -352,32 +376,42 @@ describe('ClassName', () => {
 **Rule**: When test setup cost exceeds test logic cost, you MUST batch related assertions into a single test.
 
 **You MUST batch when**:
+
 - Setup complexity > test logic complexity
 - Multiple assertions verify the same object state
 - Related behaviors share expensive context
 
 **You SHOULD keep separate tests for**:
+
 - Distinct behaviors or execution paths
 - Error conditions
 - Different input scenarios
 
 **Bad - Redundant setup**:
+
 ```typescript
 it('has correct tool name', () => {
-  const tool = createComplexTool({ /* expensive setup */ })
+  const tool = createComplexTool({
+    /* expensive setup */
+  })
   expect(tool.toolName).toBe('testTool')
 })
 
 it('has correct description', () => {
-  const tool = createComplexTool({ /* same expensive setup */ })
+  const tool = createComplexTool({
+    /* same expensive setup */
+  })
   expect(tool.description).toBe('Test description')
 })
 ```
 
 **Good - Batched properties**:
+
 ```typescript
 it('creates tool with correct properties', () => {
-  const tool = createComplexTool({ /* setup once */ })
+  const tool = createComplexTool({
+    /* setup once */
+  })
   expect(tool.toolName).toBe('testTool')
   expect(tool.description).toBe('Test description')
   expect(tool.toolSpec.name).toBe('testTool')
@@ -387,6 +421,7 @@ it('creates tool with correct properties', () => {
 ### TypeScript Type Safety
 
 **Strict requirements**:
+
 ```typescript
 // Good: Explicit return types
 export function process(input: string): string {
@@ -410,6 +445,7 @@ export function getData(): any {
 ```
 
 **Rules**:
+
 - Always provide explicit return types
 - Never use `any` type (enforced by ESLint)
 - Use TypeScript strict mode features
@@ -437,7 +473,7 @@ export class Example {
 
 // ❌ Bad: No underscore for private fields
 export class Example {
-  private readonly config: Config  // Missing underscore
+  private readonly config: Config // Missing underscore
 
   constructor(config: Config) {
     this.config = config
@@ -446,6 +482,7 @@ export class Example {
 ```
 
 **Rules**:
+
 - Private fields MUST use underscore prefix (e.g., `_field`)
 - Public fields MUST NOT use underscore prefix
 - This convention improves code readability and makes the distinction between public and private members immediately visible
@@ -454,14 +491,14 @@ export class Example {
 
 **TSDoc format** (required for all exported functions):
 
-```typescript
+````typescript
 /**
  * Brief description of what the function does.
- * 
+ *
  * @param paramName - Description of the parameter
  * @param optionalParam - Description of optional parameter
  * @returns Description of what is returned
- * 
+ *
  * @example
  * ```typescript
  * const result = functionName('input')
@@ -471,7 +508,7 @@ export class Example {
 export function functionName(paramName: string, optionalParam?: number): string {
   // Implementation
 }
-```
+````
 
 **Interface property documentation**:
 
@@ -494,6 +531,7 @@ export interface MyConfig {
 ```
 
 **Requirements**:
+
 - All exported functions, classes, and interfaces must have TSDoc
 - Include `@param` for all parameters
 - Include `@returns` for return values
@@ -506,6 +544,7 @@ export interface MyConfig {
 ### Code Style Guidelines
 
 **Formatting** (enforced by Prettier):
+
 - No semicolons
 - Single quotes
 - Line length: 120 characters
@@ -513,6 +552,7 @@ export interface MyConfig {
 - Trailing commas in ES5 style
 
 **Example**:
+
 ```typescript
 export function example(name: string, options?: Options): Result {
   const config = {
@@ -531,6 +571,7 @@ export function example(name: string, options?: Options): Result {
 ### Import Organization
 
 Organize imports in this order:
+
 ```typescript
 // 1. External dependencies
 import { something } from 'external-package'
@@ -561,7 +602,9 @@ export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock
 export class TextBlock {
   readonly type = 'textBlock' as const
   readonly text: string
-  constructor(data: { text: string }) { this.text = data.text }
+  constructor(data: { text: string }) {
+    this.text = data.text
+  }
 }
 
 export class ToolUseBlock {
@@ -595,7 +638,8 @@ export interface TextBlockData {
   text: string
 }
 
-export interface Message {  // Top-level should come first
+export interface Message {
+  // Top-level should come first
   role: Role
   content: ContentBlock[]
 }
@@ -610,22 +654,26 @@ export interface Message {  // Top-level should come first
 ```typescript
 // ✅ Correct - type matches class name (first letter lowercase)
 export class TextBlock {
-  readonly type = 'textBlock' as const  // Matches 'TextBlock' class name
+  readonly type = 'textBlock' as const // Matches 'TextBlock' class name
   readonly text: string
-  constructor(data: { text: string }) { this.text = data.text }
+  constructor(data: { text: string }) {
+    this.text = data.text
+  }
 }
 
 export class CachePointBlock {
-  readonly type = 'cachePointBlock' as const  // Matches 'CachePointBlock' class name
+  readonly type = 'cachePointBlock' as const // Matches 'CachePointBlock' class name
   readonly cacheType: 'default'
-  constructor(data: { cacheType: 'default' }) { this.cacheType = data.cacheType }
+  constructor(data: { cacheType: 'default' }) {
+    this.cacheType = data.cacheType
+  }
 }
 
 export type ContentBlock = TextBlock | ToolUseBlock | CachePointBlock
 
 // ❌ Wrong - type doesn't match class name
 export class CachePointBlock {
-  readonly type = 'cachePoint' as const  // Should be 'cachePointBlock'
+  readonly type = 'cachePoint' as const // Should be 'cachePointBlock'
   readonly cacheType: 'default'
 }
 ```
@@ -726,7 +774,7 @@ it('returns expected user object', () => {
     id: '123',
     name: 'John Doe',
     email: 'john@example.com',
-    isActive: true
+    isActive: true,
   })
 })
 
@@ -761,12 +809,14 @@ it('yields expected stream events', async () => {
 ```
 
 **Benefits of testing entire objects**:
+
 - **More concise**: Single assertion instead of multiple
 - **Better test coverage**: Catches unexpected additional or missing properties
 - **More readable**: Clear expectation of the entire structure
 - **Easier to maintain**: Changes to the object require updating one place
 
 **Use cases**:
+
 - Always use `toEqual()` for object and array comparisons
 - Use `toBe()` only for primitive values and reference equality
 - When testing error objects, verify the entire structure including message and type
@@ -774,17 +824,19 @@ it('yields expected stream events', async () => {
 ### Testing Guidelines
 
 **Testing Approach:**
+
 - You **MUST** write tests for implementations (functions, classes, methods)
 - You **SHOULD NOT** write tests for interfaces since TypeScript compiler already enforces type correctness
 - You **SHOULD** write Vitest type tests (`*.test-d.ts`) for complex types to ensure backwards compatibility
 
 **Example Implementation Test:**
+
 ```typescript
 describe('BedrockModel', () => {
   it('streams messages correctly', async () => {
     const provider = new BedrockModel(config)
     const stream = provider.stream(messages)
-    
+
     for await (const event of stream) {
       if (event.type === 'modelMessageStartEvent') {
         expect(event.role).toBe('assistant')
@@ -836,6 +888,7 @@ const provider = new MockMessageModel()
 The [Model Context Protocol (MCP)](https://modelcontextprotocol.io) enables agents to connect to external tools and data sources through a standardized protocol. The SDK provides `McpClient` for seamless integration with MCP servers.
 
 **Implementation:**
+
 - [`src/mcp.ts`](src/mcp.ts) - McpClient class
 - [`src/tools/mcp-tool.ts`](src/tools/mcp-tool.ts) - McpTool wrapper
 - [`examples/mcp/`](examples/mcp/) - Usage examples
@@ -850,13 +903,13 @@ import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'
 const localMcpClient = new McpClient({
   transport: new StdioClientTransport({
     command: 'npx',
-    args: ['-y', 'chrome-devtools-mcp']
-  })
+    args: ['-y', 'chrome-devtools-mcp'],
+  }),
 })
 
 const agent = new Agent({
   tools: [localMcpClient],
-  model: new OpenAIModel()
+  model: new OpenAIModel(),
 })
 ```
 
@@ -866,14 +919,11 @@ const agent = new Agent({
 import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js'
 
 const remoteMcpClient = new McpClient({
-  transport: new StreamableHTTPClientTransport(
-    new URL('https://api.example.com/mcp/'),
-    {
-      requestInit: {
-        headers: { Authorization: `Bearer ${token}` }
-      }
-    }
-  )
+  transport: new StreamableHTTPClientTransport(new URL('https://api.example.com/mcp/'), {
+    requestInit: {
+      headers: { Authorization: `Bearer ${token}` },
+    },
+  }),
 })
 ```
 
@@ -882,11 +932,12 @@ const remoteMcpClient = new McpClient({
 ```typescript
 const agent = new Agent({
   tools: [localMcpClient, remoteMcpClient],
-  model: new OpenAIModel()
+  model: new OpenAIModel(),
 })
 ```
 
 **Key Features:**
+
 - Automatic tool discovery and registration
 - Lazy connection (connects on first use)
 - Supports stdio and HTTP transports
@@ -897,6 +948,7 @@ const agent = new Agent({
 ## Things to Do
 
 ✅ **Do**:
+
 - Use relative imports for internal modules
 - Co-locate unit tests with source under `__tests__` directories
 - Follow nested describe pattern for test organization
@@ -910,6 +962,7 @@ const agent = new Agent({
 ## Things NOT to Do
 
 ❌ **Don't**:
+
 - Use `any` type (enforced by ESLint)
 - Put unit tests in separate `tests/` directory (use `src/**/__tests__/**`)
 - Skip documentation for exported functions
@@ -924,11 +977,12 @@ const agent = new Agent({
 For detailed command usage, see [CONTRIBUTING.md - Testing Instructions](CONTRIBUTING.md#testing-instructions-and-best-practices).
 
 Quick reference:
+
 ```bash
 npm test              # Run unit tests in Node.js
 npm run test:browser  # Run unit tests in browser (Chromium via Playwright)
 npm run test:all      # Run all tests in all environments
-npm run test:integ    # Run integration tests  
+npm run test:integ    # Run integration tests
 npm run test:coverage # Run tests with coverage report
 npm run lint          # Check code quality
 npm run format        # Auto-fix formatting
@@ -959,8 +1013,8 @@ import { describe, it, expect } from 'vitest'
 import { isNode } from '../__fixtures__/environment'
 
 // Tests will run in Node.js, skip in browser
-describe.skipIf(!isNode)("Node.js specific features", () => {
-  it("uses environment variables", () => {
+describe.skipIf(!isNode)('Node.js specific features', () => {
+  it('uses environment variables', () => {
     // This test accesses process.env
     expect(process.env.NODE_ENV).toBeDefined()
   })
@@ -970,6 +1024,7 @@ describe.skipIf(!isNode)("Node.js specific features", () => {
 ## Troubleshooting Common Issues
 
 If TypeScript compilation fails:
+
 1. Run `npm run type-check` to see all type errors
 2. Ensure all functions have explicit return types
 3. Verify no `any` types are used
@@ -988,8 +1043,8 @@ If TypeScript compilation fails:
 4. **Document as you go** with TSDoc comments
 5. **Run all checks** before committing (pre-commit hooks will enforce this)
 
-
 ### Writing code
+
 - YOU MUST make the SMALLEST reasonable changes to achieve the desired outcome.
 - We STRONGLY prefer simple, clean, maintainable solutions over clever or complex ones. Readability and maintainability are PRIMARY CONCERNS, even at the cost of conciseness or performance.
 - YOU MUST WORK HARD to reduce code duplication, even if the refactoring takes extra effort.
@@ -997,18 +1052,18 @@ If TypeScript compilation fails:
 - YOU MUST NOT manually change whitespace that does not affect execution or output. Otherwise, use a formatting tool.
 - Fix broken things immediately when you find them. Don't ask permission to fix bugs.
 
-
 #### Code Comments
- - NEVER add comments explaining that something is "improved", "better", "new", "enhanced", or referencing what it used to be
- - Comments should explain WHAT the code does or WHY it exists, not how it's better than something else
- - YOU MUST NEVER add comments about what used to be there or how something has changed. 
- - YOU MUST NEVER refer to temporal context in comments (like "recently refactored" "moved") or code. Comments should be evergreen and describe the code as it is.
- - YOU MUST NEVER write overly verbose comments. Use concise language.
 
+- NEVER add comments explaining that something is "improved", "better", "new", "enhanced", or referencing what it used to be
+- Comments should explain WHAT the code does or WHY it exists, not how it's better than something else
+- YOU MUST NEVER add comments about what used to be there or how something has changed.
+- YOU MUST NEVER refer to temporal context in comments (like "recently refactored" "moved") or code. Comments should be evergreen and describe the code as it is.
+- YOU MUST NEVER write overly verbose comments. Use concise language.
 
 ### Code Review Considerations
 
 When responding to PR feedback:
+
 - Address all review comments
 - Test changes thoroughly
 - Update documentation if behavior changes
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 23e04260..c558264e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -7,6 +7,7 @@ Please read through this document before submitting any issues or pull requests.
 > **Note**: For AI agent-specific development patterns and guidelines, see [AGENTS.md](AGENTS.md).
 
 ## Development Tenets
+
 Our team follows these core principles when designing and implementing features. These tenets help us make consistent decisions, resolve trade-offs, and maintain the quality and coherence of the SDK. When contributing, please consider how your changes align with these principles:
 
 1. **Simple at any scale:** We believe that simple things should be simple. The same clean abstractions that power a weekend prototype should scale effortlessly to production workloads. We reject the notion that enterprise-grade means enterprise-complicated - Strands remains approachable whether it's your first agent or your millionth.
@@ -28,6 +29,7 @@ When proposing solutions or reviewing code, we reference these principles to gui
 ### Setup
 
 1. Clone the repository and install dependencies:
+
    ```bash
    git clone https://github.com/strands-agents/sdk-typescript.git
    cd sdk-typescript
@@ -35,11 +37,13 @@ When proposing solutions or reviewing code, we reference these principles to gui
    ```
 
 2. Install Playwright browsers for browser testing:
+
    ```bash
    npm run test:browser:install
    ```
 
 3. Verify your setup by running the test suite:
+
    ```bash
    npm test
    npm run lint
@@ -110,16 +114,16 @@ We welcome you to use the GitHub issue tracker to report bugs or suggest feature
 
 When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
 
-* A reproducible test case or series of steps
-* The version of our code being used
-* Any modifications you've made relevant to the bug
-* Anything unusual about your environment or deployment
+- A reproducible test case or series of steps
+- The version of our code being used
+- Any modifications you've made relevant to the bug
+- Anything unusual about your environment or deployment
 
 ## Contributing via Pull Requests
 
 Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
 
-1. You are working against the latest source on the *main* branch.
+1. You are working against the latest source on the _main_ branch.
 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
 
@@ -148,6 +152,7 @@ To send us a pull request, please:
 - **Formatting**: Prettier formatting applied consistently
 - **Type safety**: No `any` types allowed, explicit return types required
 - **Conventional commits**: Use conventional commit message format
+- **PR description**: Follow the [PR description guidelines](docs/PR.md) for writing effective descriptions
 
 GitHub provides additional documentation on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
 [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
@@ -169,4 +174,3 @@ If you discover a potential security issue in this project we ask that you notif
 ## Licensing
 
 See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
-
diff --git a/docs/PR.md b/docs/PR.md
new file mode 100644
index 00000000..8a50336f
--- /dev/null
+++ b/docs/PR.md
@@ -0,0 +1,195 @@
+# Pull Request Description Guidelines
+
+Good PR descriptions help reviewers understand the context and impact of your changes. They enable faster reviews, better decision-making, and serve as valuable historical documentation.
+
+When creating a PR, follow the [GitHub PR template](../.github/PULL_REQUEST_TEMPLATE.md) and use these guidelines to fill it out effectively.
+
+## Who's Reading Your PR?
+
+Write for senior engineers familiar with the SDK. Assume your reader:
+
+- Understands the SDK's architecture and patterns
+- Has context about the broader system
+- Can read code diffs to understand implementation details
+- Values concise, focused communication
+
+## What to Include
+
+Every PR description should have:
+
+1. **Motivation** — Why is this change needed?
+2. **Public API Changes** — What changes to the public API (with code examples)?
+3. **Use Cases** (optional) — When would developers use this feature? Only include for non-obvious functionality; skip for trivial changes.
+4. **Breaking Changes** (if applicable) — What breaks and how to migrate?
+
+## Writing Principles
+
+**Focus on WHY, not HOW:**
+
+- ✅ "The OpenAI SDK supports dynamic API keys, but we don't expose this capability"
+- ❌ "Added ApiKeySetter type import from openai/client"
+
+**Document public API changes with examples:**
+
+- ✅ Show before/after code examples for API changes
+- ❌ List every file or line changed
+
+**Be concise:**
+
+- ✅ Use prose over bullet lists when possible
+- ❌ Create exhaustive implementation checklists
+
+**Emphasize user impact:**
+
+- ✅ "Enables secret manager integration for credential rotation"
+- ❌ "Updated error message to mention 'string or function'"
+
+## What to Skip
+
+Leave these out of your PR description:
+
+- **Implementation details** — Code comments and commit messages cover this
+- **Test coverage notes** — CI will catch issues; assume tests are comprehensive
+- **Line-by-line change lists** — The diff provides this
+- **Build/lint/coverage status** — CI handles verification
+- **Commit hashes** — GitHub links commits automatically
+
+## Anti-patterns
+
+❌ **Over-detailed checklists:**
+
+```markdown
+### Type Definition Updates
+
+- Added ApiKeySetter type import from 'openai/client'
+- Updated OpenAIModelOptions interface apiKey type
+```
+
+❌ **Implementation notes reviewers don't need:**
+
+```markdown
+## Implementation Notes
+
+- No breaking changes - all existing string-based usage continues to work
+- OpenAI SDK handles validation of function return values
+```
+
+❌ **Test coverage bullets:**
+
+```markdown
+### Test Coverage
+
+- Added test: accepts function-based API key
+- Added test: accepts async function-based API key
+```
+
+## Good Examples
+
+✅ **Motivation section:**
+
+```markdown
+## Motivation
+
+The OpenAI SDK supports dynamic API key resolution through async functions,
+enabling use cases like credential rotation and secret manager integration.
+However, our SDK currently only accepts static strings for the apiKey parameter,
+preventing users from leveraging these capabilities.
+```
+
+✅ **Public API Changes section:**
+
+````markdown
+## Public API Changes
+
+The `OpenAIModelOptions.apiKey` parameter now accepts either a string or an
+async function:
+
+```typescript
+// Before: only string supported
+const model = new OpenAIModel({
+  modelId: 'gpt-4o',
+  apiKey: 'sk-...',
+})
+
+// After: function also supported
+const model = new OpenAIModel({
+  modelId: 'gpt-4o',
+  apiKey: async () => await secretManager.getApiKey(),
+})
+```
+````
+
+The change is backward compatible—all existing string-based usage continues
+to work without modification.
+
+````
+
+✅ **Use Cases section:**
+```markdown
+## Use Cases
+
+- **API key rotation**: Rotate keys without application restart
+- **Secret manager integration**: Fetch credentials from AWS Secrets Manager, Vault, etc.
+- **Multi-tenant systems**: Dynamically select API keys based on context
+````
+
+## Template
+
+````markdown
+## Motivation
+
+[Explain WHY this change is needed. What problem does it solve? What limitation
+does it address? What user need does it fulfill?]
+
+Resolves: #[issue-number]
+
+## Public API Changes
+
+[Document changes to public APIs with before/after code examples. If no public
+API changes, state "No public API changes."]
+
+```typescript
+// Before
+[existing API usage]
+
+// After
+[new API usage]
+```
+````
+
+[Explain behavior, parameters, return values, and backward compatibility.]
+
+## Use Cases (optional)
+
+[Only include for non-obvious functionality. Provide 2-4 concrete use cases
+showing when developers would use this feature. Skip for trivial changes.]
+
+## Breaking Changes (if applicable)
+
+[If this is a breaking change, explain what breaks and provide migration guidance.]
+
+### Migration
+
+```typescript
+// Before
+[old code]
+
+// After
+[new code]
+```
+
+```
+
+## Why These Guidelines?
+
+**Focus on WHY over HOW** because code diffs show implementation details, commit messages document granular changes, and PR descriptions provide the broader context reviewers need.
+
+**Skip test/lint/coverage details** because CI pipelines verify these automatically. Including them adds noise without value.
+
+**Write for senior engineers** to enable concise, technical communication without redundant explanations.
+
+## References
+
+- [Conventional Commits](https://www.conventionalcommits.org/)
+- [Google's Code Review Guidelines](https://google.github.io/eng-practices/review/)
+```

From 48c9a5dc4ae12f469e1e2780bddc562467d411de Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 09:43:24 -0500
Subject: [PATCH 05/13] Split out testing to a separate document

---
 AGENTS.md       | 344 +++---------------------------------------------
 CONTRIBUTING.md |   2 +-
 docs/TESTING.md | 340 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 360 insertions(+), 326 deletions(-)
 create mode 100644 docs/TESTING.md

diff --git a/AGENTS.md b/AGENTS.md
index f77d27f9..ac6aaf25 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -168,7 +168,7 @@ See [CONTRIBUTING.md - Development Environment](CONTRIBUTING.md#development-envi
 
 ### 3. Pull Request Guidelines
 
-When creating pull requests, you **MUST** follow the guidelines in [PR.md](PR.md). Key principles:
+When creating pull requests, you **MUST** follow the guidelines in [PR.md](docs/PR.md). Key principles:
 
 - **Focus on WHY**: Explain motivation and user impact, not implementation details
 - **Document public API changes**: Show before/after code examples
@@ -176,7 +176,7 @@ When creating pull requests, you **MUST** follow the guidelines in [PR.md](PR.md
 - **Target senior engineers**: Assume familiarity with the SDK
 - **Exclude implementation details**: Leave these to code comments and diffs
 
-See [PR.md](PR.md) for the complete RFC-style guidance and template.
+See [PR.md](docs/PR.md) for the complete guidance and template.
 
 ### 4. Quality Gates
 
@@ -189,6 +189,21 @@ Pre-commit hooks automatically run:
 
 All checks must pass before commit is allowed.
 
+### 5. Testing Guidelines
+
+When writing tests, you **MUST** follow the guidelines in [docs/TESTING.md](docs/TESTING.md). Key topics covered:
+
+- Test organization and file location
+- Test structure patterns (nested describe)
+- Test batching strategy
+- Object assertion best practices
+- What to test (and what not to)
+- Test coverage requirements
+- Test model providers (MockMessageModel, TestModelProvider)
+- Multi-environment testing (Node.js and browser)
+
+See [TESTING.md](docs/TESTING.md) for the complete testing reference.
+
 ## Coding Patterns and Best Practices
 
 ### Logging Style Guide
@@ -309,115 +324,6 @@ tests_integ/
 └── feature.test.ts        # Tests public API
 ```
 
-### Test Organization Pattern
-
-Follow this nested describe pattern for consistency:
-
-**For functions**:
-
-```typescript
-import { describe, it, expect } from 'vitest'
-import { functionName } from '../module'
-
-describe('functionName', () => {
-  describe('when called with valid input', () => {
-    it('returns expected result', () => {
-      const result = functionName('input')
-      expect(result).toBe('expected')
-    })
-  })
-
-  describe('when called with edge case', () => {
-    it('handles gracefully', () => {
-      const result = functionName('')
-      expect(result).toBeDefined()
-    })
-  })
-})
-```
-
-**For classes**:
-
-```typescript
-import { describe, it, expect } from 'vitest'
-import { ClassName } from '../module'
-
-describe('ClassName', () => {
-  describe('methodName', () => {
-    it('returns expected result', () => {
-      const instance = new ClassName()
-      const result = instance.methodName()
-      expect(result).toBe('expected')
-    })
-
-    it('handles error case', () => {
-      const instance = new ClassName()
-      expect(() => instance.methodName()).toThrow()
-    })
-  })
-
-  describe('anotherMethod', () => {
-    it('performs expected action', () => {
-      // Test implementation
-    })
-  })
-})
-```
-
-**Key principles**:
-
-- Top-level `describe` uses the function/class name
-- Nested `describe` blocks group related test scenarios
-- Use descriptive test names without "should" prefix
-- Group tests by functionality or scenario
-
-### Test Batching Strategy
-
-**Rule**: When test setup cost exceeds test logic cost, you MUST batch related assertions into a single test.
-
-**You MUST batch when**:
-
-- Setup complexity > test logic complexity
-- Multiple assertions verify the same object state
-- Related behaviors share expensive context
-
-**You SHOULD keep separate tests for**:
-
-- Distinct behaviors or execution paths
-- Error conditions
-- Different input scenarios
-
-**Bad - Redundant setup**:
-
-```typescript
-it('has correct tool name', () => {
-  const tool = createComplexTool({
-    /* expensive setup */
-  })
-  expect(tool.toolName).toBe('testTool')
-})
-
-it('has correct description', () => {
-  const tool = createComplexTool({
-    /* same expensive setup */
-  })
-  expect(tool.description).toBe('Test description')
-})
-```
-
-**Good - Batched properties**:
-
-```typescript
-it('creates tool with correct properties', () => {
-  const tool = createComplexTool({
-    /* setup once */
-  })
-  expect(tool.toolName).toBe('testTool')
-  expect(tool.description).toBe('Test description')
-  expect(tool.toolSpec.name).toBe('testTool')
-})
-```
-
 ### TypeScript Type Safety
 
 **Strict requirements**:
@@ -700,189 +606,6 @@ export class ValidationError extends Error {
 }
 ```
 
-## Testing Patterns
-
-### Unit Test Location
-
-**Rule**: Unit tests files are co-located with source files, grouped in a directory named `__tests__`
-
-```
-src/subdir/
-├── agent.ts                    # Source file
-├── model.ts                    # Source file
-└── __tests__/
-    ├── agent.test.ts           # Tests for agent.ts
-    └── model.test.ts           # Tests for model.ts
-```
-
-### Integration Test Location
-
-**Rule**: Integration tests are separate in `tests_integ/`
-
-```
-tests_integ/
-├── api.test.ts                 # Tests public API
-└── environment.test.ts         # Tests environment compatibility
-```
-
-### Test File Naming
-
-- Unit tests: `{sourceFileName}.test.ts` in `src/**/__tests__/**`
-- Integration tests: `{feature}.test.ts` in `tests_integ/`
-
-### Test Coverage
-
-- **Minimum**: 80% coverage required (enforced by Vitest)
-- **Target**: Aim for high coverage on critical paths
-- **Exclusions**: Test files, config files, generated code
-
-### Writing Effective Tests
-
-```typescript
-// Good: Clear, specific test
-describe('calculateTotal', () => {
-  describe('when given valid numbers', () => {
-    it('returns the sum', () => {
-      expect(calculateTotal([1, 2, 3])).toBe(6)
-    })
-  })
-
-  describe('when given empty array', () => {
-    it('returns zero', () => {
-      expect(calculateTotal([])).toBe(0)
-    })
-  })
-})
-
-// Bad: Vague, unclear test
-describe('calculateTotal', () => {
-  it('works', () => {
-    expect(calculateTotal([1, 2, 3])).toBeTruthy()
-  })
-})
-```
-
-### Object Assertion Best Practices
-
-**Prefer testing entire objects at once** instead of individual properties for better readability and test coverage.
-
-```typescript
-// ✅ Good: Verify entire object at once
-it('returns expected user object', () => {
-  const user = getUser('123')
-  expect(user).toEqual({
-    id: '123',
-    name: 'John Doe',
-    email: 'john@example.com',
-    isActive: true,
-  })
-})
-
-// ✅ Good: Verify entire array of objects
-it('yields expected stream events', async () => {
-  const events = await collectIterator(stream)
-  expect(events).toEqual([
-    { type: 'streamEvent', data: 'Starting...' },
-    { type: 'streamEvent', data: 'Processing...' },
-    { type: 'streamEvent', data: 'Complete!' },
-  ])
-})
-
-// ❌ Bad: Testing individual properties
-it('returns expected user object', () => {
-  const user = getUser('123')
-  expect(user).toBeDefined()
-  expect(user.id).toBe('123')
-  expect(user.name).toBe('John Doe')
-  expect(user.email).toBe('john@example.com')
-  expect(user.isActive).toBe(true)
-})
-
-// ❌ Bad: Testing array elements individually in a loop
-it('yields expected stream events', async () => {
-  const events = await collectIterator(stream)
-  for (const event of events) {
-    expect(event.type).toBe('streamEvent')
-    expect(event).toHaveProperty('data')
-  }
-})
-```
-
-**Benefits of testing entire objects**:
-
-- **More concise**: Single assertion instead of multiple
-- **Better test coverage**: Catches unexpected additional or missing properties
-- **More readable**: Clear expectation of the entire structure
-- **Easier to maintain**: Changes to the object require updating one place
-
-**Use cases**:
-
-- Always use `toEqual()` for object and array comparisons
-- Use `toBe()` only for primitive values and reference equality
-- When testing error objects, verify the entire structure including message and type
-
-### Testing Guidelines
-
-**Testing Approach:**
-
-- You **MUST** write tests for implementations (functions, classes, methods)
-- You **SHOULD NOT** write tests for interfaces since TypeScript compiler already enforces type correctness
-- You **SHOULD** write Vitest type tests (`*.test-d.ts`) for complex types to ensure backwards compatibility
-
-**Example Implementation Test:**
-
-```typescript
-describe('BedrockModel', () => {
-  it('streams messages correctly', async () => {
-    const provider = new BedrockModel(config)
-    const stream = provider.stream(messages)
-
-    for await (const event of stream) {
-      if (event.type === 'modelMessageStartEvent') {
-        expect(event.role).toBe('assistant')
-      }
-    }
-  })
-})
-```
-
-### Test Model Providers
-
-**When to use each test provider:**
-
-- **`MockMessageModel`**: For agent loop tests and high-level flows - focused on content blocks
-- **`TestModelProvider`**: For low-level event streaming tests where you need precise control over individual events
-
-#### MockMessageModel - Content-Focused Testing
-
-For tests focused on messages, you SHOULD use `MockMessageModel` with a content-focused API that eliminates boilerplate:
-
-```typescript
-import { MockMessageModel } from '../__fixtures__/mock-message-model'
-
-// ✅ RECOMMENDED - Single content block (most common)
-const provider = new MockMessageModel().addTurn({ type: 'textBlock', text: 'Hello' })
-
-// ✅ RECOMMENDED - Array of content blocks
-const provider = new MockMessageModel().addTurn([
-  { type: 'textBlock', text: 'Let me help' },
-  { type: 'toolUseBlock', name: 'calc', toolUseId: 'id-1', input: {} },
-])
-
-// ✅ RECOMMENDED - Multi-turn with builder pattern
-const provider = new MockMessageModel()
-  .addTurn({ type: 'toolUseBlock', name: 'calc', toolUseId: 'id-1', input: {} }) // Auto-derives 'toolUse'
-  .addTurn({ type: 'textBlock', text: 'The answer is 42' }) // Auto-derives 'endTurn'
-
-// ✅ OPTIONAL - Explicit stopReason when needed
-const provider = new MockMessageModel().addTurn({ type: 'textBlock', text: 'Partial response' }, 'maxTokens')
-
-// ✅ OPTIONAL - Error handling
-const provider = new MockMessageModel()
-  .addTurn({ type: 'textBlock', text: 'Success' })
-  .addTurn(new Error('Model failed'))
-```
-
 ## MCP (Model Context Protocol) Integration
 
 The [Model Context Protocol (MCP)](https://modelcontextprotocol.io) enables agents to connect to external tools and data sources through a standardized protocol. The SDK provides `McpClient` for seamless integration with MCP servers.
@@ -990,37 +713,6 @@ npm run type-check    # Verify TypeScript types
 npm run build         # Compile TypeScript
 ```
 
-## Multi-Environment Testing
-
-The SDK is designed to work seamlessly in both Node.js and browser environments. Our test suite validates this by running tests in both environments using Vitest's browser mode with Playwright.
-
-### Test Projects
-
-The test suite is organized into three projects:
-
-1. **unit-node** (green): Unit tests running in Node.js environment
-2. **unit-browser** (cyan): Same unit tests running in Chromium browser
-3. **integ** (magenta): Integration tests running in Node.js
-
-### Environment-Specific Test Patterns
-
-- You MUST write tests that are environment-agnostic unless they depend on Node.js features like filesystem or env-vars
-
-Some tests require Node.js-specific features (like process.env, AWS SDK) and should be skipped in browser environments:
-
-```typescript
-import { describe, it, expect } from 'vitest'
-import { isNode } from '../__fixtures__/environment'
-
-// Tests will run in Node.js, skip in browser
-describe.skipIf(!isNode)('Node.js specific features', () => {
-  it('uses environment variables', () => {
-    // This test accesses process.env
-    expect(process.env.NODE_ENV).toBeDefined()
-  })
-})
-```
-
 ## Troubleshooting Common Issues
 
 If TypeScript compilation fails:
@@ -1073,6 +765,8 @@ When responding to PR feedback:
 ### Integration with Other Files
 
 - **CONTRIBUTING.md**: Contains testing/setup commands and human contribution guidelines
+- **docs/TESTING.md**: Comprehensive testing guidelines (MUST follow when writing tests)
+- **docs/PR.md**: Pull request guidelines and template
 - **README.md**: Public-facing documentation, links to strandsagents.com
 - **package.json**: Defines all npm scripts referenced in documentation
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c558264e..91b8c62e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -98,7 +98,7 @@ npm run test:all:coverage
 - **Integration Tests**: Test complete workflows in `tests_integ/` directory
 - **TSDoc Coverage**: All exported functions must have complete documentation
 
-For detailed testing patterns and examples, see [AGENTS.md - Testing Patterns](AGENTS.md#testing-patterns).
+For detailed testing patterns and guidelines, see [Testing Guidelines](docs/TESTING.md).
 
 ### Documentation Updates
 
diff --git a/docs/TESTING.md b/docs/TESTING.md
new file mode 100644
index 00000000..8b340cf1
--- /dev/null
+++ b/docs/TESTING.md
@@ -0,0 +1,340 @@
+# Testing Guidelines - Strands TypeScript SDK
+
+> **IMPORTANT**: When writing tests, you **MUST** follow the guidelines in this document. These patterns ensure consistency, maintainability, and proper test coverage across the SDK.
+
+This document contains comprehensive testing guidelines for the Strands TypeScript SDK. For general development guidance, see [AGENTS.md](../AGENTS.md).
+
+## Test Organization
+
+### Unit Test Location
+
+**Rule**: Unit test files are co-located with source files, grouped in a directory named `__tests__`
+
+```
+src/subdir/
+├── agent.ts                    # Source file
+├── model.ts                    # Source file
+└── __tests__/
+    ├── agent.test.ts           # Tests for agent.ts
+    └── model.test.ts           # Tests for model.ts
+```
+
+### Integration Test Location
+
+**Rule**: Integration tests are separate in `tests_integ/`
+
+```
+tests_integ/
+├── api.test.ts                 # Tests public API
+└── environment.test.ts         # Tests environment compatibility
+```
+
+### Test File Naming
+
+- Unit tests: `{sourceFileName}.test.ts` in `src/**/__tests__/**`
+- Integration tests: `{feature}.test.ts` in `tests_integ/`
+
+## Test Structure Pattern
+
+Follow this nested describe pattern for consistency:
+
+### For Functions
+
+```typescript
+import { describe, it, expect } from 'vitest'
+import { functionName } from '../module'
+
+describe('functionName', () => {
+  describe('when called with valid input', () => {
+    it('returns expected result', () => {
+      const result = functionName('input')
+      expect(result).toBe('expected')
+    })
+  })
+
+  describe('when called with edge case', () => {
+    it('handles gracefully', () => {
+      const result = functionName('')
+      expect(result).toBeDefined()
+    })
+  })
+})
+```
+
+### For Classes
+
+```typescript
+import { describe, it, expect } from 'vitest'
+import { ClassName } from '../module'
+
+describe('ClassName', () => {
+  describe('methodName', () => {
+    it('returns expected result', () => {
+      const instance = new ClassName()
+      const result = instance.methodName()
+      expect(result).toBe('expected')
+    })
+
+    it('handles error case', () => {
+      const instance = new ClassName()
+      expect(() => instance.methodName()).toThrow()
+    })
+  })
+
+  describe('anotherMethod', () => {
+    it('performs expected action', () => {
+      // Test implementation
+    })
+  })
+})
+```
+
+### Key Principles
+
+- Top-level `describe` uses the function/class name
+- Nested `describe` blocks group related test scenarios
+- Use descriptive test names without "should" prefix
+- Group tests by functionality or scenario
+
+## Writing Effective Tests
+
+```typescript
+// Good: Clear, specific test
+describe('calculateTotal', () => {
+  describe('when given valid numbers', () => {
+    it('returns the sum', () => {
+      expect(calculateTotal([1, 2, 3])).toBe(6)
+    })
+  })
+
+  describe('when given empty array', () => {
+    it('returns zero', () => {
+      expect(calculateTotal([])).toBe(0)
+    })
+  })
+})
+
+// Bad: Vague, unclear test
+describe('calculateTotal', () => {
+  it('works', () => {
+    expect(calculateTotal([1, 2, 3])).toBeTruthy()
+  })
+})
+```
+
+## Test Batching Strategy
+
+**Rule**: When test setup cost exceeds test logic cost, you MUST batch related assertions into a single test.
+
+**You MUST batch when**:
+
+- Setup complexity > test logic complexity
+- Multiple assertions verify the same object state
+- Related behaviors share expensive context
+
+**You SHOULD keep separate tests for**:
+
+- Distinct behaviors or execution paths
+- Error conditions
+- Different input scenarios
+
+**Bad - Redundant setup**:
+
+```typescript
+it('has correct tool name', () => {
+  const tool = createComplexTool({
+    /* expensive setup */
+  })
+  expect(tool.toolName).toBe('testTool')
+})
+
+it('has correct description', () => {
+  const tool = createComplexTool({
+    /* same expensive setup */
+  })
+  expect(tool.description).toBe('Test description')
+})
+```
+
+**Good - Batched properties**:
+
+```typescript
+it('creates tool with correct properties', () => {
+  const tool = createComplexTool({
+    /* setup once */
+  })
+  expect(tool.toolName).toBe('testTool')
+  expect(tool.description).toBe('Test description')
+  expect(tool.toolSpec.name).toBe('testTool')
+})
+```
+
+## Object Assertion Best Practices
+
+**Prefer testing entire objects at once** instead of individual properties for better readability and test coverage.
+
+```typescript
+// ✅ Good: Verify entire object at once
+it('returns expected user object', () => {
+  const user = getUser('123')
+  expect(user).toEqual({
+    id: '123',
+    name: 'John Doe',
+    email: 'john@example.com',
+    isActive: true,
+  })
+})
+
+// ✅ Good: Verify entire array of objects
+it('yields expected stream events', async () => {
+  const events = await collectIterator(stream)
+  expect(events).toEqual([
+    { type: 'streamEvent', data: 'Starting...' },
+    { type: 'streamEvent', data: 'Processing...' },
+    { type: 'streamEvent', data: 'Complete!' },
+  ])
+})
+
+// ❌ Bad: Testing individual properties
+it('returns expected user object', () => {
+  const user = getUser('123')
+  expect(user).toBeDefined()
+  expect(user.id).toBe('123')
+  expect(user.name).toBe('John Doe')
+  expect(user.email).toBe('john@example.com')
+  expect(user.isActive).toBe(true)
+})
+
+// ❌ Bad: Testing array elements individually in a loop
+it('yields expected stream events', async () => {
+  const events = await collectIterator(stream)
+  for (const event of events) {
+    expect(event.type).toBe('streamEvent')
+    expect(event).toHaveProperty('data')
+  }
+})
+```
+
+**Benefits of testing entire objects**:
+
+- **More concise**: Single assertion instead of multiple
+- **Better test coverage**: Catches unexpected additional or missing properties
+- **More readable**: Clear expectation of the entire structure
+- **Easier to maintain**: Changes to the object require updating one place
+
+**Use cases**:
+
+- Always use `toEqual()` for object and array comparisons
+- Use `toBe()` only for primitive values and reference equality
+- When testing error objects, verify the entire structure including message and type
+
+## What to Test
+
+**Testing Approach:**
+
+- You **MUST** write tests for implementations (functions, classes, methods)
+- You **SHOULD NOT** write tests for interfaces since TypeScript compiler already enforces type correctness
+- You **SHOULD** write Vitest type tests (`*.test-d.ts`) for complex types to ensure backwards compatibility
+
+**Example Implementation Test:**
+
+```typescript
+describe('BedrockModel', () => {
+  it('streams messages correctly', async () => {
+    const provider = new BedrockModel(config)
+    const stream = provider.stream(messages)
+
+    for await (const event of stream) {
+      if (event.type === 'modelMessageStartEvent') {
+        expect(event.role).toBe('assistant')
+      }
+    }
+  })
+})
+```
+
+## Test Coverage
+
+- **Minimum**: 80% coverage required (enforced by Vitest)
+- **Target**: Aim for high coverage on critical paths
+- **Exclusions**: Test files, config files, generated code
+
+## Test Model Providers
+
+**When to use each test provider:**
+
+- **`MockMessageModel`**: For agent loop tests and high-level flows - focused on content blocks
+- **`TestModelProvider`**: For low-level event streaming tests where you need precise control over individual events
+
+### MockMessageModel - Content-Focused Testing
+
+For tests focused on messages, you SHOULD use `MockMessageModel` with a content-focused API that eliminates boilerplate:
+
+```typescript
+import { MockMessageModel } from '../__fixtures__/mock-message-model'
+
+// ✅ RECOMMENDED - Single content block (most common)
+const provider = new MockMessageModel().addTurn({ type: 'textBlock', text: 'Hello' })
+
+// ✅ RECOMMENDED - Array of content blocks
+const provider = new MockMessageModel().addTurn([
+  { type: 'textBlock', text: 'Let me help' },
+  { type: 'toolUseBlock', name: 'calc', toolUseId: 'id-1', input: {} },
+])
+
+// ✅ RECOMMENDED - Multi-turn with builder pattern
+const provider = new MockMessageModel()
+  .addTurn({ type: 'toolUseBlock', name: 'calc', toolUseId: 'id-1', input: {} }) // Auto-derives 'toolUse'
+  .addTurn({ type: 'textBlock', text: 'The answer is 42' }) // Auto-derives 'endTurn'
+
+// ✅ OPTIONAL - Explicit stopReason when needed
+const provider = new MockMessageModel().addTurn({ type: 'textBlock', text: 'Partial response' }, 'maxTokens')
+
+// ✅ OPTIONAL - Error handling
+const provider = new MockMessageModel()
+  .addTurn({ type: 'textBlock', text: 'Success' })
+  .addTurn(new Error('Model failed'))
+```
+
+## Multi-Environment Testing
+
+The SDK is designed to work seamlessly in both Node.js and browser environments. Our test suite validates this by running tests in both environments using Vitest's browser mode with Playwright.
+
+### Test Projects
+
+The test suite is organized into three projects:
+
+1. **unit-node** (green): Unit tests running in Node.js environment
+2. **unit-browser** (cyan): Same unit tests running in Chromium browser
+3. **integ** (magenta): Integration tests running in Node.js
+
+### Environment-Specific Test Patterns
+
+- You MUST write tests that are environment-agnostic unless they depend on Node.js features like filesystem or env-vars
+
+Some tests require Node.js-specific features (like process.env, AWS SDK) and should be skipped in browser environments:
+
+```typescript
+import { describe, it, expect } from 'vitest'
+import { isNode } from '../__fixtures__/environment'
+
+// Tests will run in Node.js, skip in browser
+describe.skipIf(!isNode)('Node.js specific features', () => {
+  it('uses environment variables', () => {
+    // This test accesses process.env
+    expect(process.env.NODE_ENV).toBeDefined()
+  })
+})
+```
+
+## Development Commands
+
+```bash
+npm test              # Run unit tests in Node.js
+npm run test:browser  # Run unit tests in browser (Chromium via Playwright)
+npm run test:all      # Run all tests in all environments
+npm run test:integ    # Run integration tests
+npm run test:coverage # Run tests with coverage report
+```
+
+For detailed command usage, see [CONTRIBUTING.md - Testing Instructions](../CONTRIBUTING.md#testing-instructions-and-best-practices).

From e6da2309dc132475880f95cc34004833da500f8a Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 12:21:02 -0500
Subject: [PATCH 06/13] Add test reporters

---
 .github/workflows/test-lint.yml | 13 +++++++++++--
 vitest.config.ts                |  5 +++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-lint.yml b/.github/workflows/test-lint.yml
index a7076070..5a298f81 100644
--- a/.github/workflows/test-lint.yml
+++ b/.github/workflows/test-lint.yml
@@ -18,7 +18,7 @@ jobs:
       matrix:
         node-version: [20, 22]
         os: [ubuntu-latest, windows-latest, macos-latest]
-    
+
     steps:
       - name: Checkout code
         uses: actions/checkout@v6
@@ -40,6 +40,15 @@ jobs:
       - name: Run unit tests
         run: npm run test:all:coverage
 
+      - name: Upload Artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-artifacts
+          path: ./test/.artifacts/
+          retention-days: 4
+          if-no-files-found: ignore
+
       - name: Run linting
         run: npm run lint
 
@@ -53,4 +62,4 @@ jobs:
         run: npm run build
 
       - name: Test packaging
-        run: npm run test:package
\ No newline at end of file
+        run: npm run test:package
diff --git a/vitest.config.ts b/vitest.config.ts
index 08a531cd..2dba9298 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -23,6 +23,11 @@ const getOpenAIAPIKey: BrowserCommand<[], string | undefined> = async ({ testPat
 export default defineConfig({
   test: {
     unstubEnvs: true,
+    reporters: [
+      'default',
+      ['junit', { outputFile: 'test/.artifacts/test-report/junit/report.xml' }],
+      ['json', { outputFile: 'test/.artifacts/test-report/json/report.json' }],
+    ],
     projects: [
       {
         test: {

From 4505fb737525c9f6c53dcb9f3730cffb3dc0ece2 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 16:18:03 -0500
Subject: [PATCH 07/13] Update agent guidelines

---
 .github/agent-sops/task-implementer.sop.md | 84 ++++++++++++++++------
 .github/agent-sops/task-refiner.sop.md     | 46 +++++++++---
 2 files changed, 100 insertions(+), 30 deletions(-)

diff --git a/.github/agent-sops/task-implementer.sop.md b/.github/agent-sops/task-implementer.sop.md
index db621c06..fda4b8f5 100644
--- a/.github/agent-sops/task-implementer.sop.md
+++ b/.github/agent-sops/task-implementer.sop.md
@@ -2,7 +2,7 @@
 
 ## Role
 
-You are a Task Implementer, and your goal is to implement a task defined in a github issue. You will write code using test-driven development principles, following a structured Explore, Plan, Code, Commit workflow. During your implementation, you will write code that follows existing patterns, create comprehensive documentation, generate test cases,  create a pull requests for review, and iterate on the provided feedback until the pull request is accepted.
+You are a Task Implementer, and your goal is to implement a task defined in a github issue. You will write code using test-driven development principles, following a structured Explore, Plan, Code, Commit workflow. During your implementation, you will write code that follows existing patterns, create comprehensive documentation, generate test cases, create a pull requests for review, and iterate on the provided feedback until the pull request is accepted.
 
 ## Steps
 
@@ -11,6 +11,7 @@ You are a Task Implementer, and your goal is to implement a task defined in a gi
 Initialize the task environment and discover repository instruction files.
 
 **Constraints:**
+
 - You MUST create a progress notebook to track script execution using markdown checklists, setup notes, and implementation progress
 - You MUST check for environment setup instructions in the following locations:
   - `AGENTS.md`
@@ -37,7 +38,6 @@ Initialize the task environment and discover repository instruction files.
   - If the push operation is deferred, continue with the workflow and note the deferred status
 - You MAY continue on the current branch if not on main branch
 
-
 ### 2. Explore Phase
 
 ### 2.1 Extract Task Context
@@ -45,6 +45,7 @@ Initialize the task environment and discover repository instruction files.
 Analyze the task description and existing documentation to identify core functionality, edge cases, and constraints.
 
 **Constraints:**
+
 - You MUST read the issue description
 - You MUST investigate any links provided in the feature request
   - You MUST note how the information from this link can influence the implementation
@@ -61,6 +62,7 @@ Analyze the task description and existing documentation to identify core functio
 Search for similar implementations and identify interfaces, libraries, and components the implementation will interact with.
 
 **Constraints:**
+
 - You MUST analyze the task and identify core functionality, edge cases, and constraints
 - You MUST search the repository for relevant code, patterns, and information related to the coding task and note your findings
 - You MUST create a dependency map showing how new code will integrate
@@ -72,6 +74,7 @@ Search for similar implementations and identify interfaces, libraries, and compo
 Compile all findings into a comprehensive code context notebook.
 
 **Constraints:**
+
 - You MUST update your notebook with requirements, implementation details, patterns, and dependencies
 - You MUST ensure your notes are well-structured with clear headings
 - You MUST focus on high-level concepts and patterns rather than detailed implementation code
@@ -87,7 +90,6 @@ Compile all findings into a comprehensive code context notebook.
 - You MUST clearly label any included code snippets as examples or references, not as the actual implementation
 - You MUST keep any included code snippets brief and focused on the specific concept being illustrated
 
-
 ### 3. Plan Phase
 
 #### 3.1 Design Test Strategy
@@ -95,10 +97,12 @@ Compile all findings into a comprehensive code context notebook.
 Create a comprehensive list of test scenarios covering normal operation, edge cases, and error conditions.
 
 **Constraints:**
+
+- You MUST read and follow the testing guidelines in [docs/TESTING.md](../../docs/TESTING.md) before writing any tests
 - You MUST check for existing testing strategies documented in the repository documentation or your notes
 - You MUST cover all acceptance criteria with at least one test scenario
 - You MUST define explicit input/output pairs for each test case
-- You MUST make note of these test scenarios 
+- You MUST make note of these test scenarios
 - You MUST design tests that will initially fail when run against non-existent implementations
 - You MUST NOT create mock implementations during the test design phase because tests should be written based solely on expected behavior, not influenced by implementation details
 - You MUST focus on test scenarios and expected behaviors rather than detailed test code in documentation
@@ -110,12 +114,12 @@ Create a comprehensive list of test scenarios covering normal operation, edge ca
 - You MUST clearly label any included test code snippets as examples or references
 - You SHOULD explain the reasoning behind the proposed test structure
 
-
 #### 3.2 Implementation Planning & Tracking
 
 Outline the high-level structure of the implementation and create an implementation plan.
 
 **Constraints:**
+
 - You MUST create an implementation plan notebook
 - You MUST include all key implementation tasks in the plan
 - You SHOULD consider performance, security, and maintainability implications
@@ -140,6 +144,8 @@ Outline the high-level structure of the implementation and create an implementat
 Write test cases based on the outlines, following strict TDD principles.
 
 **Constraints:**
+
+- You MUST follow the test patterns and conventions defined in [docs/TESTING.md](../../docs/TESTING.md)
 - You MUST validate that the task environment is set up properly
   - If you already created a commit, ensure the latest commit matches the expected hash
   - If not, ensure the correct branch is checked out
@@ -171,6 +177,7 @@ Write test cases based on the outlines, following strict TDD principles.
 Write implementation code to pass the tests, focusing on simplicity and correctness first.
 
 **Constraints:**
+
 - You MUST update your progress in your implementation plan notes
 - You MUST follow the strict TDD cycle: RED → GREEN → REFACTOR
 - You MUST document each TDD cycle in your progress notes
@@ -202,6 +209,7 @@ Write implementation code to pass the tests, focusing on simplicity and correctn
 If the implementation is complete, proceed with review of the implementation to identify opportunities for simplification or improvement.
 
 **Constraints:**
+
 - You MAY reply to user review threads with a concise response
   - You MUST keep your response to less than 3 sentences
 - You MUST check that all tasks are complete before proceeding
@@ -209,7 +217,7 @@ If the implementation is complete, proceed with review of the implementation to
   - if builds fail, you MUST identify the issue implement a fix
 - You MUST prioritize readability and maintainability over clever optimizations
 - You MUST maintain test passing status throughout refactoring
-- You SHOULD make note of simplification  in your progress notes
+- You SHOULD make note of simplification in your progress notes
 - You SHOULD record significant refactorings in your progress notes
 
 #### 4.4 Validate Implementation
@@ -217,15 +225,17 @@ If the implementation is complete, proceed with review of the implementation to
 If the implementation meets all requirements and follows established patterns, proceed with this step. Otherwise, return to step 4.2 to fix any issues.
 
 **Constraints:**
+
 - You MUST address any discrepancies between requirements and implementation
 - You MUST execute the relevant test command and verify all implemented tests pass successfully
-- You MUST execute the relevant build command and verify builds succeed 
-- You MUST ensure code coverage meets the requirements for the repository 
+- You MUST execute the relevant build command and verify builds succeed
+- You MUST ensure code coverage meets the requirements for the repository
 - You MUST verify all items in the implementation plan have been completed
 - You MUST provide the complete test execution output
 - You MUST NOT claim implementation is complete if any tests are failing because failing tests indicate the implementation doesn't meet requirements
 
 **Build Validation:**
+
 - You MUST run appropriate build commands based on the guidance in the repository
 - You MUST verify that all dependencies are satisfied
 - You MUST follow the Build Output Management practices defined in the Best Practices section
@@ -235,15 +245,17 @@ If the implementation meets all requirements and follows established patterns, p
 If all tests are passing, draft a conventional commit message, perform the git commit, and create/update the pull request.
 
 **Constraints:**
+
+- You MUST read and follow the PR description guidelines in [docs/PR.md](../../docs/PR.md) when creating pull requests
 - You MUST check that all tasks are complete before proceeding
 - You MUST reference your notes for the issue you are creating a pull request for
-- You MUST NOT commit changes until builds AND tests have been verified because committing broken code can disrupt the development workflow and introduce bugs into the codebase 
+- You MUST NOT commit changes until builds AND tests have been verified because committing broken code can disrupt the development workflow and introduce bugs into the codebase
 - You MUST follow the Conventional Commits specification
 - You MUST use `git status` to check which files have been modified
 - You MUST use `git add` to stage all relevant files
 - You MUST execute the `git commit -m <COMMIT_MESSAGE>` command with the prepared commit message
 - You MAY use `git push origin <BRANCH_NAME>` to push the local branch to the remote if the `GITHUB_WRITE` environment variable is set to `true`
-  - If the push operation is deferred, continue with PR creation and note the deferred status 
+  - If the push operation is deferred, continue with PR creation and note the deferred status
 - You MUST attempt to create the pull request using the `create_pull_request` tool if it does not exist yet
   - If the PR creation is deferred, continue with the workflow and note the deferred status
   - You MUST use the task id recorded in your notes, not the issue id
@@ -270,6 +282,7 @@ If all tests are passing, draft a conventional commit message, perform the git c
 Request the user for feedback on the implementation using the handoff_to_user tool.
 
 **Constraints:**
+
 - You MUST use the handoff_to_user tool to inform the user you want their feedback as comments on the pull request
 
 #### 6.2. Read User Responses
@@ -277,6 +290,7 @@ Request the user for feedback on the implementation using the handoff_to_user to
 Retrieve and analyze the user's responses from the pull request reviews and comments.
 
 **Constraints:**
+
 - You MUST make note of the pull request number
 - You MUST fetch the review and the review comments from the PR using available tools
   - You MUST use the list_pr_reviews to list all pr reviews
@@ -298,6 +312,7 @@ Retrieve and analyze the user's responses from the pull request reviews and comm
 Based on the users feedback, you will review and update your implementation plan
 
 **Constraints:**
+
 - You MUST make note of the requested changes from the user
 - You MUST update your implementation plan based on the feedback from the user
 - You MUST return to step 3 if you need to re-plan your implementation
@@ -308,45 +323,53 @@ Based on the users feedback, you will review and update your implementation plan
 
 ## Desired Outcome
 
-* A complete, well-tested code implementation that meets the specified requirements
-* A comprehensive test suite that validates the implementation
-* Clean, documented code that:
-  * Follows existing package patterns and conventions
-  * Prioritizes readability and extensibility
-  * Avoids over-engineering and over-abstraction
-  * Is idiomatic and modern in the implementation language
-* A well-organized set of implementation artifacts in the pull request description or comments
-* Documentation or comments of key design decisions and implementation notes
-* Properly committed changes with conventional commit messages
+- A complete, well-tested code implementation that meets the specified requirements
+- A comprehensive test suite that validates the implementation
+- Clean, documented code that:
+  - Follows existing package patterns and conventions
+  - Prioritizes readability and extensibility
+  - Avoids over-engineering and over-abstraction
+  - Is idiomatic and modern in the implementation language
+- A well-organized set of implementation artifacts in the pull request description or comments
+- Documentation or comments of key design decisions and implementation notes
+- Properly committed changes with conventional commit messages
 
 ## Examples
 
 ## Troubleshooting
 
 ### Branch Creation Issues
+
 If feature branch creation fails:
+
 - Move any changes in the `.github` directory to the `.github_temp` directory
 - Check for existing branch with same name
 - Generate alternative branch name with timestamp
-- Ensure git repository is properly 
+- Ensure git repository is properly
 - As a last resort, leave a comment on the Task Issue mentioning the issue you are facing
 
 ### Pull Request Creation Issues
+
 If PR creation fails (excluding deferred responses):
+
 - Verify GitHub authentication and permissions
 - Check if remote repository exists and is accessible
 - You MUST commit your current work to the branch
 - As a last resort, leave a comment on the Task Issue mentioning the issue you are facing
 
 ### Deferred Operations
+
 When GitHub tools or git operations are deferred:
+
 - Continue with the workflow as if the operation succeeded
 - Note the deferred status in your progress tracking
 - The operations will be executed after agent completion
 - Do not retry or attempt alternative approaches for deferred operations
 
 ### Build Issues
+
 If builds fail during implementation:
+
 - You SHOULD follow build instructions from DEVELOPMENT.md if available
 - You SHOULD verify you're in the correct directory for the build system
 - You SHOULD try clean builds before rebuilding when encountering issues
@@ -356,23 +379,27 @@ If builds fail during implementation:
 ## Best Practices
 
 ### Repository-Specific Instructions
+
 - Always check for DEVELOPMENT.md, AGENTS.md, and README.md in the current repository and follow any instructions provided
 - If these don't exist, suggest creating it
 - Always follow build commands, testing frameworks, and coding standards as specified
 
 ### Project Structure Detection
+
 - Detect project type by examining files (pyproject.toml, build.gradle, package.json, etc.)
 - Check for DEVELOPMENT.md for explicit project instructions
 - Apply appropriate build commands and directory structures based on detected type
 - Use project-specific practices when specified in DEVELOPMENT.md
 
 ### Build Command Patterns
+
 - Use project-appropriate build commands as specified in DEVELOPMENT.md or detected from project type
 - Always run builds from the correct directory as specified in the repository documentation
 - Use clean builds when encountering issues
 - Verify builds pass before committing changes
 
 ### Build Output Management
+
 - Pipe all build output to log files to avoid context pollution: `[build-command] > build_output.log 2>&1`
 - Use targeted search patterns to verify build results instead of displaying full output
 - Search for specific success/failure indicators based on build system
@@ -380,11 +407,14 @@ If builds fail during implementation:
 - You MUST not include build logs in your commit and pull request
 
 ### Dependency Management
+
 - Handle dependencies appropriately based on project type and DEVELOPMENT.md instructions
 - Follow project-specific dependency resolution procedures when specified
 - Use appropriate package managers and dependency files for the project type
 
 ### Testing Best Practices
+
+- You MUST follow the comprehensive testing guidelines in [docs/TESTING.md](../../docs/TESTING.md)
 - Follow TDD principles: RED → GREEN → REFACTOR
 - Write tests that fail initially, then implement to make them pass
 - Use appropriate testing frameworks for the project type or as specified in DEVELOPMENT.md
@@ -392,14 +422,24 @@ If builds fail during implementation:
 - Run tests after each implementation step
 
 ### Documentation Organization
+
 - Use consolidated documentation files: context.md, plan.md, progress.md
 - Keep documentation separate from implementation code
 - Focus on high-level concepts rather than detailed code in documentation
 - Use progress tracking with markdown checklists
 - Document decisions, assumptions, and challenges
 
+### Pull Request Best Practices
+
+- You MUST follow the PR description guidelines in [docs/PR.md](../../docs/PR.md)
+- Focus on WHY the change is needed, not HOW it's implemented
+- Document public API changes with before/after code examples
+- Write for senior engineers familiar with the project
+- Skip implementation details, test coverage notes, and line-by-line change lists
+
 ### Git Best Practices
+
 - Commit early and often with descriptive messages
 - Follow Conventional Commits specification
 - You must create a new commit for each feedback iteration
-- You must only push to your feature branch, never main
\ No newline at end of file
+- You must only push to your feature branch, never main
diff --git a/.github/agent-sops/task-refiner.sop.md b/.github/agent-sops/task-refiner.sop.md
index fd1f1a86..2823d8a6 100644
--- a/.github/agent-sops/task-refiner.sop.md
+++ b/.github/agent-sops/task-refiner.sop.md
@@ -11,16 +11,19 @@ You are a Task Refiner, and your goal is to review the feature request for a tas
 Retrieve the complete issue information including description and all comments.
 
 **Constraints:**
+
 - You MUST read the issue description
 - You MUST read all existing comments to understand full context
 - You MUST capture issue metadata (title, labels, status, etc.)
 
 ### 2. Explore Phase
+
 #### 2.1 Analyze Feature Request
 
 Analyze the issue content to identify implementation requirements and potential ambiguities.
 
 **Constraints:**
+
 - You MUST check for existing documentation in:
   - `AGENTS.md`
   - `CONTRIBUTING.md`
@@ -40,6 +43,7 @@ Analyze the issue content to identify implementation requirements and potential
 Search for similar implementations and identify interfaces, libraries, and components the implementation will interact with.
 
 **Constraints:**
+
 - You MUST identify the main programming languages and frameworks used
 - You MUST search the current repository for relevant code, patterns, and information related to the task
 - You MUST locate relevant existing code that relates to the feature request
@@ -54,6 +58,7 @@ Search for similar implementations and identify interfaces, libraries, and compo
 After performing the investigation of the feature request and understanding the repository, you will think about the work needed to implement this feature. This feature will be implemented by a single developer, and should be scoped to be completed in a few days. You should note any concerns that this task is too large in scope
 
 **Constraints:**
+
 - You MUST identify the work required to implement this feature
 - You MUST review the current state of the repository, and identify any potential issues that might occur during implementation
 - You MUST determine if this task is small enough to be implemented in a single Pull Request
@@ -69,6 +74,7 @@ After performing the investigation of the feature request and understanding the
 Deterime if you should ask clarifying questions, or if the task is already in an implementable state given your research.
 
 **Constraints:**
+
 - You MAY skip to step 4 if you do not have any clarifying questions
 - You SHOULD continue to the next step if you have identified questions to ask
 
@@ -77,6 +83,7 @@ Deterime if you should ask clarifying questions, or if the task is already in an
 Create a numbered list of questions to resolve ambiguities and gather missing information. Once you have generated a list of questions, you will post all of the questions as a single comment on the issue.
 
 **Constraints:**
+
 - You MUST review relevant notes you made in your notebook
 - You MUST clarify if github workflow creations or changes are needed
   - You MUST suggest creating them under a `.github_temp` directory since you do not have permission to push to `.github` directory
@@ -100,6 +107,7 @@ Create a numbered list of questions to resolve ambiguities and gather missing in
 Use the handoff_to_user tool to inform the user they can reply to the clarifying questions on the issue.
 
 **Constraints:**
+
 - You MUST use the handoff_to_user tool after posting your questions
 - You MUST ask your clarifying questions when handing off to user
 - You MUST tell the user to reply to your questions on the issue
@@ -109,6 +117,7 @@ Use the handoff_to_user tool to inform the user they can reply to the clarifying
 Retrieve and analyze the user's responses from the issue comments.
 
 **Constraints:**
+
 - You MUST read all new comments since the last check
 - You MUST identify which comments contain responses to your questions
 - You MUST extract answers and map them to the original questions
@@ -120,6 +129,7 @@ Retrieve and analyze the user's responses from the issue comments.
 Determine from the users responses if the task should be broken down into sub-task. You can skip this step if the user does not think this should be broken down.
 
 **Constraints:**
+
 - You MUST note any clarifying questions that are needed when breaking down this issue into a smaller task
 - You MUST create a notebook for each new sub-issue you plan to create
 - You MUST identify any dependencies that are required for the new sub-task
@@ -132,6 +142,7 @@ Determine from the users responses if the task should be broken down into sub-ta
 Determine if the responses provide sufficient information for implementation
 
 **Constraints:**
+
 - You MUST assess if all critical questions have been answered
 - You MUST identify any remaining ambiguities
 - You MUST determine if additional clarification is needed
@@ -144,13 +155,14 @@ Determine if the responses provide sufficient information for implementation
 - You MAY return to step 3.2 if significant questions remain unanswered
 - You MUST limit iterations to prevent endless loops (maximum 5 rounds of questions)
 
-
 ### 4. Update Task
+
 #### 4.1 Update Task Description
 
 Update the original issue with a comprehensive task description.
 
 **Constraints:**
+
 - You MUST edit the original issue description directly
   - If the edit operation is deferred, continue with the workflow and note the deferred status
 - You MUST preserve the original request context
@@ -170,6 +182,7 @@ Update the original issue with a comprehensive task description.
 Create new sub-tasks if you and the user have determined that this task is too complex
 
 **Constraints:**
+
 - You MUST create new issue for each sub-task
   - If issue creation is deferred, continue with the workflow and note the deferred status
 - You MUST create a description with a comprehensive overview of the work required, following the same description format as the parent task
@@ -181,22 +194,24 @@ Create new sub-tasks if you and the user have determined that this task is too c
 Record that the task review is complete and ready as a comment on the issue.
 
 **Constraints:**
+
 - You MUST only add a comment on the parent issue if any sub-issues were created
   - If comment posting is deferred, continue with the workflow and note the deferred status
 - You MUST summarize what was accomplished in your comment
 - You MUST confirm in your comment that the issue is ready for implementation, or explain why it is not
-- You MUST record the estimated scope of work based on repository analysis
 - You SHOULD mention any final recommendations or considerations
 
 ## Examples
 
 ### Example Repository Analysis Comment
+
 ```markdown
 ## Repository Analysis & Clarifying Questions
 
 I've analyzed the repository structure and have some questions to ensure proper implementation:
 
 ### Repository Context
+
 - **Framework**: React with TypeScript frontend, Node.js/Express backend
 - **Authentication**: Currently using JWT tokens (found in `/src/auth/`)
 - **Database**: PostgreSQL with Prisma ORM
@@ -205,14 +220,17 @@ I've analyzed the repository structure and have some questions to ensure proper
 ### Clarifying Questions
 
 #### Integration with Existing Auth System
+
 1. Should this feature extend the existing JWT authentication or replace it?
 2. How should this integrate with the current user registration flow?
 
 #### Database Schema
+
 3. Should we modify the existing `users` table or create new tables?
 4. What user data fields are required for this feature?
 
 #### Frontend Components
+
 5. Should we update existing auth components or create new ones?
 6. What should the user interface look like for this feature?
 
@@ -220,29 +238,36 @@ Please respond when you have a chance. Based on my analysis, this will require m
 ```
 
 ### Example Final Issue Description Update
+
 ```markdown
 # Overview
+
 Add user authentication system to allow users to log in and access protected features.
 
 ## Implementation Requirements
+
 Based on clarification discussion and repository analysis:
 
 ### Technical Approach
+
 - **Framework Integration**: Extend existing React/TypeScript frontend and Node.js backend
 - **Database Changes**: Modify existing `users` table in PostgreSQL
 - **Authentication Flow**: Enhance current JWT-based system
 
 ### Authentication Method
+
 - Email/password authentication
 - Optional two-factor authentication (2FA)
 - Support for password reset functionality
 
 ### Session Management
+
 - 24-hour session duration
 - Automatic session renewal on activity
 - Secure session storage using existing JWT infrastructure
 
 ### Files to Modify
+
 - `/src/auth/authController.js` - Add 2FA logic
 - `/src/components/auth/LoginForm.tsx` - Update UI
 - `/src/models/User.js` - Add 2FA fields
@@ -250,6 +275,7 @@ Based on clarification discussion and repository analysis:
 - `/src/middleware/auth.js` - Session management
 
 ### Acceptance Criteria
+
 - [ ] Users can register with email/password
 - [ ] Users can log in and log out
 - [ ] Sessions expire after 24 hours of inactivity
@@ -257,41 +283,45 @@ Based on clarification discussion and repository analysis:
 - [ ] 2FA can be enabled/disabled by user
 - [ ] Integration tests pass
 - [ ] Existing auth functionality remains intact
-
-### Estimated Scope
-- **Complexity**: Medium
-- **Files Modified**: ~8-10 files
-- **New Components**: 2-3 React components
-- **Database Migrations**: 1-2 migrations required
 ```
 
 ## Troubleshooting
 
 ### Missing Issue:
+
 If the issue does not exist:
+
 1. You MUST gracefully exit without performing any actions
 
 ### Repository Access Issues
+
 If unable to access repository files:
+
 1. Verify repository permissions and authentication
 2. Check if the repository is private or has restricted access
 3. Leave a comment explaining the access limitation
 
 ### Large Repository Analysis
+
 For very large repositories:
+
 1. Focus on key directories related to the feature
 2. Use search functionality to find relevant code patterns
 3. Prioritize understanding the main architecture over exhaustive exploration
 
 ### Deferred Operations
+
 When GitHub tools are deferred:
+
 - Continue with the workflow as if the operation succeeded
 - Note the deferred status in your progress tracking
 - The operations will be executed after agent completion
 - Do not retry or attempt alternative approaches for deferred operations
 
 ### Incomplete Repository Understanding
+
 If the codebase is unclear or poorly documented:
+
 1. Ask specific questions about architecture in your clarifying questions
 2. Request documentation or guidance from the repository maintainers
 3. Make reasonable assumptions and document them clearly

From 55e8ba60b2430980c4e03489ceb502af368aec76 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 20:12:20 -0500
Subject: [PATCH 08/13] Add checklist for PR description requirements

Added a checklist for PR descriptions to ensure clarity and relevance.
---
 docs/PR.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/PR.md b/docs/PR.md
index 8a50336f..d13c602e 100644
--- a/docs/PR.md
+++ b/docs/PR.md
@@ -193,3 +193,9 @@ showing when developers would use this feature. Skip for trivial changes.]
 - [Conventional Commits](https://www.conventionalcommits.org/)
 - [Google's Code Review Guidelines](https://google.github.io/eng-practices/review/)
 ```
+
+## Checklist Items
+
+ - [ ] Does the PR description target a Senior Engineer familiar with the project?
+ - [ ] Does the PR contain the motivation or use-cases behind the change?
+ - [ ] Does the PR omit irrelevent details not needed for historical reference?

From a192002c2d1307b1590f9d9f2dbb708dda4a85e1 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 20:16:34 -0500
Subject: [PATCH 09/13] Add checklist items for testing guidelines

Added a checklist for testing best practices.
---
 docs/TESTING.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/TESTING.md b/docs/TESTING.md
index 8b340cf1..7e2bf987 100644
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -338,3 +338,11 @@ npm run test:coverage # Run tests with coverage report
 ```
 
 For detailed command usage, see [CONTRIBUTING.md - Testing Instructions](../CONTRIBUTING.md#testing-instructions-and-best-practices).
+
+## Checklist Items
+
+ - [ ] Do the test use relevent helpers from __fixtures__ (`MockMessageModel`, `createMockTool`, `createMockAgent` etc.)
+ - [ ] Are reoccuring code or patterns extracted to functions for better usability/readability
+ - [ ] Are tests focused on verifying one or two things only?
+ - [ ] Are tests written concisely enough that the bulk of each test is important to the test instead of boilerplate code?
+ - [ ] Are tests asserting on the entire object instead of specific fields?

From 3b72a135ce95d28cda98203cf23514b8b37a8b8d Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 20:22:28 -0500
Subject: [PATCH 10/13] Add checklist item for PR description overview

Added a checklist item to ensure the PR description includes an overview of the feature and key implementation decisions.
---
 docs/PR.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/PR.md b/docs/PR.md
index d13c602e..30d89529 100644
--- a/docs/PR.md
+++ b/docs/PR.md
@@ -197,5 +197,7 @@ showing when developers would use this feature. Skip for trivial changes.]
 ## Checklist Items
 
  - [ ] Does the PR description target a Senior Engineer familiar with the project?
+ - [ ] Does the PR description give an overview of the feature being implemented, including any notes on key implemention decisions
+ - [ ] Does the PR include a "Resolves #<ISSUE NUMBER>" in the body and is not bolded?
  - [ ] Does the PR contain the motivation or use-cases behind the change?
  - [ ] Does the PR omit irrelevent details not needed for historical reference?

From e0e93bf2c33a8d500e2800e83461756ae16b0078 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <3211021+zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 20:25:16 -0500
Subject: [PATCH 11/13] Update task implementation SOP with PR guidelines

Clarified guidelines for pull request creation and checklist inclusion.
---
 .github/agent-sops/task-implementer.sop.md | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/agent-sops/task-implementer.sop.md b/.github/agent-sops/task-implementer.sop.md
index fda4b8f5..38237bc0 100644
--- a/.github/agent-sops/task-implementer.sop.md
+++ b/.github/agent-sops/task-implementer.sop.md
@@ -210,6 +210,7 @@ If the implementation is complete, proceed with review of the implementation to
 
 **Constraints:**
 
+- You SHOULD review your own code according to the checklist items in [docs/TESTING.md](../../docs/TESTING.md).
 - You MAY reply to user review threads with a concise response
   - You MUST keep your response to less than 3 sentences
 - You MUST check that all tasks are complete before proceeding
@@ -246,7 +247,8 @@ If all tests are passing, draft a conventional commit message, perform the git c
 
 **Constraints:**
 
-- You MUST read and follow the PR description guidelines in [docs/PR.md](../../docs/PR.md) when creating pull requests
+- You MUST read and follow the PR description guidelines in [docs/PR.md](../../docs/PR.md) when creating pull requests & commits
+- You SHOULD include checklist items from [docs/PR.md](../../docs/PR.md) to validate the pull request description
 - You MUST check that all tasks are complete before proceeding
 - You MUST reference your notes for the issue you are creating a pull request for
 - You MUST NOT commit changes until builds AND tests have been verified because committing broken code can disrupt the development workflow and introduce bugs into the codebase
@@ -259,16 +261,11 @@ If all tests are passing, draft a conventional commit message, perform the git c
 - You MUST attempt to create the pull request using the `create_pull_request` tool if it does not exist yet
   - If the PR creation is deferred, continue with the workflow and note the deferred status
   - You MUST use the task id recorded in your notes, not the issue id
-  - You MUST include "Resolves: #<ISSUE NUMBER>" in the body of the pull request
-    - You MUST NOT bold this line
-  - You MUST give an overview of the feature being implemented
-  - You MUST include any notes on key implementation decisions, ambiguity, or other information as part of the pull request description
 - If the `create_pull_request` tool fails (excluding deferred responses):
   - You MUST create a PR creation link using GitHub's query parameters
   - You MUST post the link as a comment on the issue
     - You MUST use the format: `https://github.com/{owner}/{repo}/compare/{base}...{head}?quick_pull=1&title={url_encoded_title}&body={url_encoded_body}`
     - URL-encode the title and body parameters
-    - Include "Resolves: #{issue_number}" in the body
 - If PR creation succeeds or is deferred:
   - You MUST review your notes for any updates to provide on the pull request
   - You MAY use the `update_pull_request` tool to update the pull request body or title
@@ -320,6 +317,7 @@ Based on the users feedback, you will review and update your implementation plan
 - You MUST NOT close the parent issue - only the user should close it after the pull request is merged
 - You MUST not attempt to merge the pull request
 - You MUST use the handoff_to_user tool to inform the user you are ready for clarifying information on the pull request
+- You SHOULD include additional checklist items from [docs/PR.md](../../docs/PR.md) to validate the pull request description is correct after making additional changes
 
 ## Desired Outcome
 
@@ -387,7 +385,7 @@ If builds fail during implementation:
 ### Project Structure Detection
 
 - Detect project type by examining files (pyproject.toml, build.gradle, package.json, etc.)
-- Check for DEVELOPMENT.md for explicit project instructions
+- Check for DEVELOPMENT.md for explicit project instructions in the `docs/` directory
 - Apply appropriate build commands and directory structures based on detected type
 - Use project-specific practices when specified in DEVELOPMENT.md
 

From 5712d58c2619ab9cb4cfa1cd8ca1112533b61111 Mon Sep 17 00:00:00 2001
From: Mackenzie Zastrow <zastrowm@users.noreply.github.com>
Date: Fri, 5 Dec 2025 21:09:07 -0500
Subject: [PATCH 12/13] Flesh out agent instructions a bit more

---
 .github/agent-sops/task-implementer.sop.md | 56 +++++++++++++++++-----
 .gitignore                                 |  4 +-
 docs/TESTING.md                            | 40 ++++++++++++++--
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/.github/agent-sops/task-implementer.sop.md b/.github/agent-sops/task-implementer.sop.md
index 38237bc0..d8911b4b 100644
--- a/.github/agent-sops/task-implementer.sop.md
+++ b/.github/agent-sops/task-implementer.sop.md
@@ -204,26 +204,42 @@ Write implementation code to pass the tests, focusing on simplicity and correctn
 - You MUST otherwise continue automatically after verifying test results
 - You MUST follow the Build Output Management practices defined in the Best Practices section
 
-#### 4.3 Review, Refactor, and Optimize
+#### 4.3 Review and Refactor Implementation
 
-If the implementation is complete, proceed with review of the implementation to identify opportunities for simplification or improvement.
+If the implementation is complete, proceed with a self-review of the implementation code to identify opportunities for simplification or improvement.
 
 **Constraints:**
 
-- You SHOULD review your own code according to the checklist items in [docs/TESTING.md](../../docs/TESTING.md).
-- You MAY reply to user review threads with a concise response
-  - You MUST keep your response to less than 3 sentences
 - You MUST check that all tasks are complete before proceeding
-  - if tests fail, you MUST identify the issue and implement a fix
-  - if builds fail, you MUST identify the issue implement a fix
-- You MUST prioritize readability and maintainability over clever optimizations
+  - If tests fail, you MUST identify the issue and implement a fix
+  - If builds fail, you MUST identify the issue and implement a fix
+- You MUST prioritize readability and maintainability
 - You MUST maintain test passing status throughout refactoring
-- You SHOULD make note of simplification in your progress notes
+- You MUST look for opportunities to:
+  - Reduce code duplication
+  - Simplify complex logic
+  - Improve naming and clarity
+  - Remove unnecessary abstractions
+- You SHOULD make note of simplifications in your progress notes
 - You SHOULD record significant refactorings in your progress notes
+- You MUST return to step 4.2 if refactoring reveals additional implementation needs
+
+#### 4.4 Review and Refactor Tests
+
+After reviewing the implementation, review the test code to ensure it follows established patterns and provides adequate coverage.
+
+**Constraints:**
+
+- You MUST review your test code according to the guidelines in [docs/TESTING.md](../../docs/TESTING.md).
+  - You SHOULD add a task to validate it conforms to that documents
+- You MUST verify tests conform to the testing documentation standards
+- You MUST verify tests are readable and maintainable
+- You SHOULD refactor tests that are overly complex or duplicative
+- You MUST return to step 4.1 if tests need significant restructuring
 
-#### 4.4 Validate Implementation
+#### 4.5 Validate Implementation
 
-If the implementation meets all requirements and follows established patterns, proceed with this step. Otherwise, return to step 4.2 to fix any issues.
+If the implementation meets all requirements and follows established patterns, proceed with this step. Otherwise, return to step 4.2, 4.3, or 4.4 to fix any issues.
 
 **Constraints:**
 
@@ -241,6 +257,24 @@ If the implementation meets all requirements and follows established patterns, p
 - You MUST verify that all dependencies are satisfied
 - You MUST follow the Build Output Management practices defined in the Best Practices section
 
+#### 4.6 Respond to Review Feedback
+
+If you have received feedback from user reviews or PR comments, address them before proceeding to the commit phase.
+
+**Constraints:**
+
+- You MAY skip this step if no user feedback has been received yet
+- You MUST reply to user review threads with a concise response
+  - You MUST keep your response to less than 3 sentences
+- You MUST categorize each piece of feedback as:
+  - Actionable code changes that can be implemented immediately
+  - Clarifying questions that require user input
+  - Suggestions to consider for future iterations
+- You MUST implement actionable code changes before proceeding
+- You MUST re-run tests after addressing feedback to ensure nothing is broken
+- You MUST return to step 4.3 after implementing changes to review the updated code
+- You MUST use the handoff_to_user tool if clarification is needed before you can proceed
+
 ### 5. Commit and Pull Request Phase
 
 If all tests are passing, draft a conventional commit message, perform the git commit, and create/update the pull request.
diff --git a/.gitignore b/.gitignore
index 57b1943b..37b5c487 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,4 +37,6 @@ Thumbs.db
 .env.production.local
 
 # Github workflow artifacts
-.artifact
\ No newline at end of file
+.artifact
+
+test/.artifacts
\ No newline at end of file
diff --git a/docs/TESTING.md b/docs/TESTING.md
index 7e2bf987..19e7696a 100644
--- a/docs/TESTING.md
+++ b/docs/TESTING.md
@@ -296,6 +296,36 @@ const provider = new MockMessageModel()
   .addTurn(new Error('Model failed'))
 ```
 
+## Testing Hooks
+
+When testing hook behavior with an Agent, you **SHOULD** register callbacks directly on the agent's `hooks` property rather than creating an inline `HookProvider` object.
+
+```typescript
+// ✅ CORRECT - Register callbacks directly on agent.hooks
+const agent = new Agent({ model, tools: [tool] })
+
+agent.hooks.addCallback(BeforeToolCallEvent, (event: BeforeToolCallEvent) => {
+  event.toolUse = {
+    ...event.toolUse,
+    input: { value: 42 },
+  }
+})
+
+// ❌ WRONG - Do not create HookProvider objects for tests
+const modifyInputHook = {
+  registerCallbacks: (registry: HookRegistry) => {
+    registry.addCallback(BeforeToolCallEvent, (event: BeforeToolCallEvent) => {
+      event.toolUse = {
+        ...event.toolUse,
+        input: { value: 42 },
+      }
+    })
+  },
+}
+```
+
+Tests should use the simpler, direct approach via `agent.hooks.addCallback()`
+
 ## Multi-Environment Testing
 
 The SDK is designed to work seamlessly in both Node.js and browser environments. Our test suite validates this by running tests in both environments using Vitest's browser mode with Playwright.
@@ -341,8 +371,8 @@ For detailed command usage, see [CONTRIBUTING.md - Testing Instructions](../CONT
 
 ## Checklist Items
 
- - [ ] Do the test use relevent helpers from __fixtures__ (`MockMessageModel`, `createMockTool`, `createMockAgent` etc.)
- - [ ] Are reoccuring code or patterns extracted to functions for better usability/readability
- - [ ] Are tests focused on verifying one or two things only?
- - [ ] Are tests written concisely enough that the bulk of each test is important to the test instead of boilerplate code?
- - [ ] Are tests asserting on the entire object instead of specific fields?
+- [ ] Do the test use relevent helpers from `__fixtures__` (`MockMessageModel`, `createMockTool`, `createMockAgent` etc.)
+- [ ] Are reoccuring code or patterns extracted to functions for better usability/readability
+- [ ] Are tests focused on verifying one or two things only?
+- [ ] Are tests written concisely enough that the bulk of each test is important to the test instead of boilerplate code?
+- [ ] Are tests asserting on the entire object instead of specific fields?

From 26e35c98fa120aa09b00733d79504c8c88073402 Mon Sep 17 00:00:00 2001
From: Strands Agent <217235299+strands-agent@users.noreply.github.com>
Date: Sat, 6 Dec 2025 02:23:18 +0000
Subject: [PATCH 13/13] feat: enable writable hook properties for tool and
 result modification

Allow hook callbacks to modify event properties to change agent behavior,
achieving parity with the Python SDK.

Hook callbacks can now modify:
- BeforeToolCallEvent.tool - Change which tool gets executed
- BeforeToolCallEvent.toolUse - Modify tool parameters before execution
- AfterToolCallEvent.result - Transform tool results before sending to model

This enables use cases like tool routing, parameter validation, result
transformation, and conditional behavior changes without modifying tool
implementations.

Resolves #12
---
 src/agent/__tests__/agent.hook.test.ts | 226 +++++++++++++++++++++++++
 src/agent/agent.ts                     |  68 +++++---
 src/hooks/__tests__/events.test.ts     | 100 ++++++++++-
 src/hooks/events.ts                    |   6 +-
 4 files changed, 372 insertions(+), 28 deletions(-)

diff --git a/src/agent/__tests__/agent.hook.test.ts b/src/agent/__tests__/agent.hook.test.ts
index eb77a825..986419d2 100644
--- a/src/agent/__tests__/agent.hook.test.ts
+++ b/src/agent/__tests__/agent.hook.test.ts
@@ -10,6 +10,7 @@ import {
   MessageAddedEvent,
   ModelStreamEventHook,
   type HookRegistry,
+  type HookProvider,
 } from '../../hooks/index.js'
 import { MockMessageModel } from '../../__fixtures__/mock-message-model.js'
 import { MockHookProvider } from '../../__fixtures__/mock-hook-provider.js'
@@ -241,6 +242,231 @@ describe('Agent Hooks Integration', () => {
     })
   })
 
+  describe('writable hook properties', () => {
+    it('allows hook to modify tool in BeforeToolCallEvent', async () => {
+      const toolA = new FunctionTool({
+        name: 'toolA',
+        description: 'Tool A',
+        inputSchema: {},
+        callback: () => 'Tool A result',
+      })
+
+      const toolB = new FunctionTool({
+        name: 'toolB',
+        description: 'Tool B',
+        inputSchema: {},
+        callback: () => 'Tool B result',
+      })
+
+      const model = new MockMessageModel()
+        .addTurn({ type: 'toolUseBlock', name: 'toolA', toolUseId: 'tool-1', input: {} })
+        .addTurn({ type: 'textBlock', text: 'Final response' })
+
+      // Hook provider that modifies the tool
+      class ToolModifyingHooks implements HookProvider {
+        registerCallbacks(registry: HookRegistry): void {
+          registry.addCallback(BeforeToolCallEvent, (event) => {
+            if (event.tool?.name === 'toolA') {
+              event.tool = toolB
+            }
+          })
+        }
+      }
+
+      const agent = new Agent({
+        model,
+        tools: [toolA, toolB],
+        hooks: [new ToolModifyingHooks(), mockProvider],
+      })
+
+      await agent.invoke('Test')
+
+      // Find AfterToolCallEvent to verify toolB was executed
+      const afterToolCallEvents = mockProvider.invocations.filter((e) => e instanceof AfterToolCallEvent)
+      expect(afterToolCallEvents.length).toBe(1)
+
+      const afterToolCall = afterToolCallEvents[0] as AfterToolCallEvent
+      expect(afterToolCall.result.content).toEqual([new TextBlock('Tool B result')])
+    })
+
+    it('allows hook to modify toolUse in BeforeToolCallEvent', async () => {
+      const tool = new FunctionTool({
+        name: 'echoTool',
+        description: 'Echoes the input',
+        inputSchema: {},
+        callback: (input, _context) => {
+          return `Input: ${JSON.stringify(input)}`
+        },
+      })
+
+      const model = new MockMessageModel()
+        .addTurn({ type: 'toolUseBlock', name: 'echoTool', toolUseId: 'tool-1', input: { x: 1 } })
+        .addTurn({ type: 'textBlock', text: 'Final response' })
+
+      // Hook provider that modifies the toolUse
+      class ToolUseModifyingHooks implements HookProvider {
+        registerCallbacks(registry: HookRegistry): void {
+          registry.addCallback(BeforeToolCallEvent, (event) => {
+            event.toolUse = {
+              name: event.toolUse.name,
+              toolUseId: event.toolUse.toolUseId,
+              input: { x: 2 },
+            }
+          })
+        }
+      }
+
+      const agent = new Agent({
+        model,
+        tools: [tool],
+        hooks: [new ToolUseModifyingHooks(), mockProvider],
+      })
+
+      await agent.invoke('Test')
+
+      // Find AfterToolCallEvent to verify modified input was used
+      const afterToolCallEvents = mockProvider.invocations.filter((e) => e instanceof AfterToolCallEvent)
+      expect(afterToolCallEvents.length).toBe(1)
+
+      const afterToolCall = afterToolCallEvents[0] as AfterToolCallEvent
+      expect(afterToolCall.result.content).toEqual([new TextBlock('Input: {"x":2}')])
+    })
+
+    it('allows hook to set tool to undefined in BeforeToolCallEvent', async () => {
+      const tool = new FunctionTool({
+        name: 'testTool',
+        description: 'Test tool',
+        inputSchema: {},
+        callback: () => 'Tool result',
+      })
+
+      const model = new MockMessageModel()
+        .addTurn({ type: 'toolUseBlock', name: 'testTool', toolUseId: 'tool-1', input: {} })
+        .addTurn({ type: 'textBlock', text: 'Final response' })
+
+      // Hook provider that sets tool to undefined
+      class ToolNullifyingHooks implements HookProvider {
+        registerCallbacks(registry: HookRegistry): void {
+          registry.addCallback(BeforeToolCallEvent, (event) => {
+            event.tool = undefined
+          })
+        }
+      }
+
+      const agent = new Agent({
+        model,
+        tools: [tool],
+        hooks: [new ToolNullifyingHooks(), mockProvider],
+      })
+
+      await agent.invoke('Test')
+
+      // Find AfterToolCallEvent to verify error result was returned
+      const afterToolCallEvents = mockProvider.invocations.filter((e) => e instanceof AfterToolCallEvent)
+      expect(afterToolCallEvents.length).toBe(1)
+
+      const afterToolCall = afterToolCallEvents[0] as AfterToolCallEvent
+      expect(afterToolCall.result.status).toBe('error')
+      expect(afterToolCall.result.content).toEqual([new TextBlock("Tool 'testTool' not found in registry")])
+    })
+
+    it('allows hook to modify result in AfterToolCallEvent', async () => {
+      const tool = new FunctionTool({
+        name: 'testTool',
+        description: 'Test tool',
+        inputSchema: {},
+        callback: () => 'Original result',
+      })
+
+      const model = new MockMessageModel()
+        .addTurn({ type: 'toolUseBlock', name: 'testTool', toolUseId: 'tool-1', input: {} })
+        .addTurn({ type: 'textBlock', text: 'Final response' })
+
+      // Hook provider that modifies the result
+      class ResultModifyingHooks implements HookProvider {
+        registerCallbacks(registry: HookRegistry): void {
+          registry.addCallback(AfterToolCallEvent, (event) => {
+            event.result = new ToolResultBlock({
+              toolUseId: event.result.toolUseId,
+              status: 'success',
+              content: [new TextBlock('Modified result')],
+            })
+          })
+        }
+      }
+
+      const agent = new Agent({
+        model,
+        tools: [tool],
+        hooks: [new ResultModifyingHooks()],
+      })
+
+      await agent.invoke('Test')
+
+      // Check the conversation history to verify modified result was used
+      const messages = agent.messages
+      const toolResultMessage = messages.find(
+        (m) => m.role === 'user' && m.content.some((c) => c.type === 'toolResultBlock')
+      )
+      expect(toolResultMessage).toBeDefined()
+
+      const toolResultBlock = toolResultMessage!.content.find((c) => c.type === 'toolResultBlock') as ToolResultBlock
+      expect(toolResultBlock.content).toEqual([new TextBlock('Modified result')])
+    })
+
+    it('allows hook to change result status from success to error', async () => {
+      const tool = new FunctionTool({
+        name: 'testTool',
+        description: 'Test tool',
+        inputSchema: {},
+        callback: () => 'Success result',
+      })
+
+      const model = new MockMessageModel()
+        .addTurn({ type: 'toolUseBlock', name: 'testTool', toolUseId: 'tool-1', input: {} })
+        .addTurn({ type: 'textBlock', text: 'Final response' })
+
+      // Hook provider that changes result to error
+      class ErrorConvertingHooks implements HookProvider {
+        registerCallbacks(registry: HookRegistry): void {
+          registry.addCallback(AfterToolCallEvent, (event) => {
+            event.result = new ToolResultBlock({
+              toolUseId: event.result.toolUseId,
+              status: 'error',
+              content: [new TextBlock('Converted to error')],
+            })
+          })
+        }
+      }
+
+      const agent = new Agent({
+        model,
+        tools: [tool],
+        hooks: [new ErrorConvertingHooks(), mockProvider],
+      })
+
+      await agent.invoke('Test')
+
+      // Find AfterToolCallEvent to verify result was changed to error
+      const afterToolCallEvents = mockProvider.invocations.filter((e) => e instanceof AfterToolCallEvent)
+      expect(afterToolCallEvents.length).toBe(1)
+
+      const afterToolCall = afterToolCallEvents[0] as AfterToolCallEvent
+      expect(afterToolCall.result.status).toBe('error')
+      expect(afterToolCall.result.content).toEqual([new TextBlock('Converted to error')])
+
+      // Verify error result was sent to the model
+      const messages = agent.messages
+      const toolResultMessage = messages.find(
+        (m) => m.role === 'user' && m.content.some((c) => c.type === 'toolResultBlock')
+      )
+      expect(toolResultMessage).toBeDefined()
+
+      const toolResultBlock = toolResultMessage!.content.find((c) => c.type === 'toolResultBlock') as ToolResultBlock
+      expect(toolResultBlock.status).toBe('error')
+    })
+  })
+
   describe('ModelStreamEventHook', () => {
     it('fires for each streaming event from the model', async () => {
       const model = new MockMessageModel().addTurn({ type: 'textBlock', text: 'Hello' })
diff --git a/src/agent/agent.ts b/src/agent/agent.ts
index 4f00483b..735fa5ce 100644
--- a/src/agent/agent.ts
+++ b/src/agent/agent.ts
@@ -567,33 +567,44 @@ export class Agent implements AgentData {
       input: toolUseBlock.input,
     }
 
-    yield new BeforeToolCallEvent({ agent: this, toolUse, tool })
+    const beforeEvent = new BeforeToolCallEvent({ agent: this, toolUse, tool })
+    yield beforeEvent
 
-    if (!tool) {
+    // Read potentially modified values from the hook
+    const actualTool = beforeEvent.tool
+    const actualToolUse = beforeEvent.toolUse
+
+    if (!actualTool) {
       // Tool not found - return error result instead of throwing
       const errorResult = new ToolResultBlock({
-        toolUseId: toolUseBlock.toolUseId,
+        toolUseId: actualToolUse.toolUseId,
         status: 'error',
-        content: [new TextBlock(`Tool '${toolUseBlock.name}' not found in registry`)],
+        content: [new TextBlock(`Tool '${actualToolUse.name}' not found in registry`)],
       })
 
-      yield new AfterToolCallEvent({ agent: this, toolUse, tool, result: errorResult })
+      const afterEvent = new AfterToolCallEvent({
+        agent: this,
+        toolUse: actualToolUse,
+        tool: actualTool,
+        result: errorResult,
+      })
+      yield afterEvent
 
-      return errorResult
+      return afterEvent.result
     }
 
     // Execute tool and collect result
     const toolContext: ToolContext = {
       toolUse: {
-        name: toolUseBlock.name,
-        toolUseId: toolUseBlock.toolUseId,
-        input: toolUseBlock.input,
+        name: actualToolUse.name,
+        toolUseId: actualToolUse.toolUseId,
+        input: actualToolUse.input,
       },
       agent: this,
     }
 
     try {
-      const toolGenerator = tool.stream(toolContext)
+      const toolGenerator = actualTool.stream(toolContext)
 
       // Use yield* to delegate to the tool generator and capture the return value
       const toolResult = yield* toolGenerator
@@ -601,33 +612,52 @@ export class Agent implements AgentData {
       if (!toolResult) {
         // Tool didn't return a result - return error result instead of throwing
         const errorResult = new ToolResultBlock({
-          toolUseId: toolUseBlock.toolUseId,
+          toolUseId: actualToolUse.toolUseId,
           status: 'error',
-          content: [new TextBlock(`Tool '${toolUseBlock.name}' did not return a result`)],
+          content: [new TextBlock(`Tool '${actualToolUse.name}' did not return a result`)],
         })
 
-        yield new AfterToolCallEvent({ agent: this, toolUse, tool, result: errorResult })
+        const afterEvent = new AfterToolCallEvent({
+          agent: this,
+          toolUse: actualToolUse,
+          tool: actualTool,
+          result: errorResult,
+        })
+        yield afterEvent
 
-        return errorResult
+        return afterEvent.result
       }
 
-      yield new AfterToolCallEvent({ agent: this, toolUse, tool, result: toolResult })
+      const afterEvent = new AfterToolCallEvent({
+        agent: this,
+        toolUse: actualToolUse,
+        tool: actualTool,
+        result: toolResult,
+      })
+      yield afterEvent
 
       // Tool already returns ToolResultBlock directly
-      return toolResult
+      return afterEvent.result
     } catch (error) {
       // Tool execution failed with error
       const toolError = normalizeError(error)
       const errorResult = new ToolResultBlock({
-        toolUseId: toolUseBlock.toolUseId,
+        toolUseId: actualToolUse.toolUseId,
         status: 'error',
         content: [new TextBlock(toolError.message)],
         error: toolError,
       })
 
-      yield new AfterToolCallEvent({ agent: this, toolUse, tool, result: errorResult, error: toolError })
+      const afterEvent = new AfterToolCallEvent({
+        agent: this,
+        toolUse: actualToolUse,
+        tool: actualTool,
+        result: errorResult,
+        error: toolError,
+      })
+      yield afterEvent
 
-      return errorResult
+      return afterEvent.result
     }
   }
 
diff --git a/src/hooks/__tests__/events.test.ts b/src/hooks/__tests__/events.test.ts
index c516e7f0..6bbcfdc0 100644
--- a/src/hooks/__tests__/events.test.ts
+++ b/src/hooks/__tests__/events.test.ts
@@ -104,10 +104,67 @@ describe('BeforeToolCallEvent', () => {
     })
     // @ts-expect-error verifying that property is readonly
     event.agent = new Agent()
-    // @ts-expect-error verifying that property is readonly
-    event.toolUse = toolUse
-    // @ts-expect-error verifying that property is readonly
-    event.tool = tool
+  })
+
+  it('allows tool to be modified', () => {
+    const agent = new Agent()
+    const toolA = new FunctionTool({
+      name: 'toolA',
+      description: 'Tool A',
+      inputSchema: {},
+      callback: () => 'result A',
+    })
+    const toolB = new FunctionTool({
+      name: 'toolB',
+      description: 'Tool B',
+      inputSchema: {},
+      callback: () => 'result B',
+    })
+    const toolUse = {
+      name: 'toolA',
+      toolUseId: 'test-id',
+      input: {},
+    }
+    const event = new BeforeToolCallEvent({ agent, toolUse, tool: toolA })
+
+    // Verify initial state
+    expect(event.tool).toBe(toolA)
+
+    // Modify tool property
+    event.tool = toolB
+    expect(event.tool).toBe(toolB)
+
+    // Can also set to undefined
+    event.tool = undefined
+    expect(event.tool).toBeUndefined()
+  })
+
+  it('allows toolUse to be modified', () => {
+    const agent = new Agent()
+    const tool = new FunctionTool({
+      name: 'testTool',
+      description: 'Test tool',
+      inputSchema: {},
+      callback: () => 'result',
+    })
+    const originalToolUse = {
+      name: 'testTool',
+      toolUseId: 'test-id',
+      input: { arg: 'original' },
+    }
+    const event = new BeforeToolCallEvent({ agent, toolUse: originalToolUse, tool })
+
+    // Verify initial state
+    expect(event.toolUse).toEqual(originalToolUse)
+
+    // Modify toolUse property
+    const modifiedToolUse = {
+      name: 'modifiedTool',
+      toolUseId: 'modified-id',
+      input: { arg: 'modified' },
+    }
+    event.toolUse = modifiedToolUse
+    expect(event.toolUse).toEqual(modifiedToolUse)
   })
 
   it('creates instance with undefined tool when tool is not found', () => {
@@ -170,8 +227,39 @@ describe('AfterToolCallEvent', () => {
     event.toolUse = toolUse
     // @ts-expect-error verifying that property is readonly
     event.tool = tool
-    // @ts-expect-error verifying that property is readonly
-    event.result = result
+  })
+
+  it('allows result to be modified', () => {
+    const agent = new Agent()
+    const tool = new FunctionTool({
+      name: 'testTool',
+      description: 'Test tool',
+      inputSchema: {},
+      callback: () => 'result',
+    })
+    const toolUse = {
+      name: 'testTool',
+      toolUseId: 'test-id',
+      input: {},
+    }
+    const originalResult = new ToolResultBlock({
+      toolUseId: 'test-id',
+      status: 'success',
+      content: [new TextBlock('Original')],
+    })
+    const event = new AfterToolCallEvent({ agent, toolUse, tool, result: originalResult })
+
+    // Verify initial state
+    expect(event.result).toBe(originalResult)
+
+    // Modify result property
+    const modifiedResult = new ToolResultBlock({
+      toolUseId: 'test-id',
+      status: 'error',
+      content: [new TextBlock('Modified')],
+    })
+    event.result = modifiedResult
+    expect(event.result).toBe(modifiedResult)
   })
 
   it('creates instance with error property when tool execution fails', () => {
diff --git a/src/hooks/events.ts b/src/hooks/events.ts
index 2fd7ab83..de09fc20 100644
--- a/src/hooks/events.ts
+++ b/src/hooks/events.ts
@@ -76,12 +76,12 @@ export class MessageAddedEvent extends HookEvent {
 export class BeforeToolCallEvent extends HookEvent {
   readonly type = 'beforeToolCallEvent' as const
   readonly agent: AgentData
-  readonly toolUse: {
+  toolUse: {
     name: string
     toolUseId: string
     input: JSONValue
   }
-  readonly tool: Tool | undefined
+  tool: Tool | undefined
 
   constructor(data: {
     agent: AgentData
@@ -109,7 +109,7 @@ export class AfterToolCallEvent extends HookEvent {
     input: JSONValue
   }
   readonly tool: Tool | undefined
-  readonly result: ToolResultBlock
+  result: ToolResultBlock
   readonly error?: Error
 
   constructor(data: {