From 0f376b7a57b35745801835697b5ff8f11db66a31 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 15 Nov 2025 13:08:57 +0000 Subject: [PATCH 1/3] feat: Add Code Execution with MCP specification and implementation plan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds comprehensive documentation for implementing Anthropic's Code Execution with MCP approach in Terraphim AI, achieving 98% token reduction and significant performance improvements. Documents added: - CODE_EXECUTION_MCP_SUMMARY.md: Executive summary and quick start - CODE_EXECUTION_WITH_MCP_SPEC.md: Full technical specification - CODE_EXECUTION_MCP_GAP_ANALYSIS.md: Capability assessment - CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md: 12-week roadmap Key findings: - Terraphim AI is 60% ready for implementation - Core infrastructure exists (Firecracker VMs, MCP server, agents) - Three critical components needed: 1. MCP Code API Layer (convert tools to importable modules) 2. In-VM MCP Runtime (enable tool usage within code execution) 3. Progressive Tool Discovery (scale to 100+ tools) Implementation timeline: 12 weeks in 3 phases Expected outcome: 98% token reduction (150K → 2K tokens) Based on: https://medium.com/ai-software-engineer/anthropic-just-solved-ai-agent-bloat-150k-tokens-down-to-2k-code-execution-with-mcp-8266b8e80301 --- CODE_EXECUTION_MCP_GAP_ANALYSIS.md | 573 +++++++++++ CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md | 1127 +++++++++++++++++++++ CODE_EXECUTION_MCP_SUMMARY.md | 353 +++++++ CODE_EXECUTION_WITH_MCP_SPEC.md | 554 ++++++++++ 4 files changed, 2607 insertions(+) create mode 100644 CODE_EXECUTION_MCP_GAP_ANALYSIS.md create mode 100644 CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md create mode 100644 CODE_EXECUTION_MCP_SUMMARY.md create mode 100644 CODE_EXECUTION_WITH_MCP_SPEC.md diff --git a/CODE_EXECUTION_MCP_GAP_ANALYSIS.md b/CODE_EXECUTION_MCP_GAP_ANALYSIS.md new file mode 100644 index 00000000..aa2f573d --- /dev/null +++ b/CODE_EXECUTION_MCP_GAP_ANALYSIS.md @@ -0,0 +1,573 @@ +# Code Execution with MCP - Gap Analysis for Terraphim AI + +**Version:** 1.0 +**Date:** 2025-11-15 +**Status:** Planning + +## Executive Summary + +This document analyzes Terraphim AI's current capabilities against the requirements for implementing Anthropic's Code Execution with MCP approach. Overall assessment: **60% capability exists**, requiring targeted development in specific areas. + +## Current Capabilities ✅ + +### 1. Secure Code Execution Environment +**Status:** ✅ **COMPLETE** + +Terraphim AI has a fully functional VM execution system: + +- **Firecracker VMs:** Sub-2 second boot times +- **VM Pooling:** Efficient resource management +- **Multiple Languages:** Python, JavaScript, Bash, Rust +- **Security:** Sandboxed execution with resource limits +- **Monitoring:** Execution metrics and error tracking + +**Location:** `crates/terraphim_multi_agent/src/vm_execution/` + +**Evidence:** +```rust +// crates/terraphim_multi_agent/src/vm_execution/models.rs +pub struct VmExecutionConfig { + pub enabled: bool, + pub api_base_url: String, + pub vm_pool_size: u32, + pub default_vm_type: String, + pub execution_timeout_ms: u64, + pub allowed_languages: Vec, + // ... more configuration +} +``` + +**Capabilities:** +- Execute code in isolated VMs +- Timeout enforcement +- Resource limits (CPU, memory, disk) +- Command history tracking +- Snapshot and rollback support +- Code validation before execution + +### 2. MCP Server Implementation +**Status:** ✅ **COMPLETE** + +Comprehensive MCP server with 17 tools: + +- **Location:** `crates/terraphim_mcp_server/` +- **Tools Available:** + - search + - autocomplete_terms + - autocomplete_with_snippets + - fuzzy_autocomplete_search + - find_matches + - replace_matches + - extract_paragraphs_from_automata + - load_thesaurus + - build_autocomplete_index + - serialize/deserialize_autocomplete_index + - is_all_terms_connected_by_path + - json_decode + - update_config_tool + +**Evidence:** +```rust +// crates/terraphim_mcp_server/src/lib.rs +impl ServerHandler for McpService { + async fn list_tools(...) -> Result { + // 17 tools exposed via MCP protocol + } +} +``` + +### 3. Agent System +**Status:** ✅ **COMPLETE** + +Multi-agent system with lifecycle management: + +- **Supervisor:** `crates/terraphim_agent_supervisor/` +- **Multi-agent:** `crates/terraphim_multi_agent/` +- **Registry:** `crates/terraphim_agent_registry/` +- **Messaging:** `crates/terraphim_agent_messaging/` + +**Capabilities:** +- Agent lifecycle (init, start, stop, terminate) +- Health checks and monitoring +- Agent supervision and restart policies +- Inter-agent communication +- Task decomposition +- Goal alignment + +### 4. State Persistence +**Status:** ✅ **COMPLETE** + +Comprehensive state management: + +- **Location:** `crates/terraphim_persistence/` +- **Backends:** Memory, DashMap, SQLite, Redb +- **Features:** + - Document storage + - Configuration persistence + - State snapshots + - Versioned memory (agent evolution) + +### 5. Code Extraction +**Status:** ✅ **COMPLETE** + +Extract code blocks from LLM responses: + +- **Location:** `crates/terraphim_multi_agent/src/vm_execution/code_extractor.rs` +- **Features:** + - Markdown code block parsing + - Language detection + - Execution intent detection + - Confidence scoring + +## Missing Capabilities ❌ + +### 1. MCP Servers as Code APIs +**Status:** ❌ **NOT IMPLEMENTED** + +**Current State:** +- MCP tools exposed only via MCP protocol +- Direct tool calling through request/response +- No programmatic import interface + +**Required:** +```typescript +// Need to support this: +import { terraphim } from 'mcp-servers'; + +const results = await terraphim.search({ + query: "rust async patterns", + limit: 10 +}); + +const filtered = results.filter(doc => doc.rank > 0.8); +``` + +**Gap:** +- No TypeScript/Python module wrappers for MCP tools +- No import mechanism in code execution environment +- No module discovery API + +**Location to Implement:** +- New crate: `crates/terraphim_mcp_codegen/` +- Modify: `crates/terraphim_multi_agent/src/vm_execution/` + +### 2. Progressive Tool Discovery +**Status:** ❌ **NOT IMPLEMENTED** + +**Current State:** +- All tools listed via `list_tools()` +- No search or filtering +- No dynamic documentation + +**Required:** +```typescript +// Need to support: +import { searchTools, getToolDocs } from 'mcp-runtime'; + +const tools = await searchTools({ + category: 'knowledge-graph', + capabilities: ['search', 'autocomplete'] +}); + +const docs = await getToolDocs('terraphim.search'); +``` + +**Gap:** +- No tool categorization system +- No tool search functionality +- No dynamic documentation generation +- No capability-based filtering + +**Location to Implement:** +- New module: `crates/terraphim_mcp_server/src/discovery.rs` +- Update: `crates/terraphim_mcp_server/src/lib.rs` + +### 3. In-Environment Data Processing +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- VM execution runs code +- But MCP tools not accessible within VM +- Results still pass through context + +**Required:** +- MCP tools callable from within VM environment +- Data processing happens in VM before returning +- Only final results exit to agent + +**Gap:** +- No MCP runtime in VM environment +- No bridge between VM execution and MCP tools +- No in-VM data transformation utilities + +**Location to Implement:** +- New module: `crates/terraphim_multi_agent/src/vm_execution/mcp_runtime.rs` +- Update: `crates/terraphim_multi_agent/src/vm_execution/client.rs` + +### 4. Skill Library System +**Status:** ❌ **NOT IMPLEMENTED** + +**Current State:** +- No skill storage mechanism +- No SKILL.MD pattern +- No reusable function library + +**Required:** +```markdown +# SKILL.MD: Knowledge Graph Analysis + +## Function +async function analyzeConnectivity(text: string): Promise { + // Reusable skill implementation +} + +## Usage History +- Success rate: 95% +- Average execution time: 1.2s +``` + +**Gap:** +- No skill storage directory structure +- No skill discovery/search +- No usage tracking +- No skill versioning + +**Location to Implement:** +- New crate: `crates/terraphim_skills/` +- Directory: `skills/` in workspace root + +### 5. Agent Code Generation Optimization +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- Agents use LLM for responses +- Code extraction exists +- But not optimized for code-first approach + +**Required:** +- Agents preferentially generate code over tool calls +- Code generation prompts optimized +- Import-based tool usage +- Error handling in code + +**Gap:** +- No code-first prompt templates +- No examples of MCP tool imports in prompts +- No code quality feedback loop + +**Location to Implement:** +- Update: `crates/terraphim_multi_agent/src/agent.rs` +- New: `crates/terraphim_multi_agent/src/prompts/code_execution.rs` + +### 6. Token Usage Optimization +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- Token tracking exists +- Cost tracking exists +- But not optimized for code execution pattern + +**Required:** +- Track token savings from code execution +- Compare traditional vs code approach +- Metrics dashboard + +**Gap:** +- No comparison metrics +- No optimization recommendations +- No A/B testing framework + +**Location to Implement:** +- Update: `crates/terraphim_multi_agent/src/agent.rs` +- New: `crates/terraphim_multi_agent/src/metrics/code_execution.rs` + +### 7. Workspace Management +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- VM execution has temporary storage +- But no structured workspace + +**Required:** +``` +workspace/ + ├── data/ # Temporary data files + ├── results/ # Execution results + ├── checkpoints/ # Saved state snapshots + └── skills/ # Reusable skill library +``` + +**Gap:** +- No workspace directory structure +- No file management utilities +- No cleanup policies + +**Location to Implement:** +- New module: `crates/terraphim_multi_agent/src/workspace.rs` + +## Capability Matrix + +| Requirement | Status | Priority | Effort | Notes | +|------------|--------|----------|--------|-------| +| Secure Code Execution | ✅ Complete | - | - | Firecracker VMs ready | +| MCP Server | ✅ Complete | - | - | 17 tools available | +| Agent System | ✅ Complete | - | - | Full lifecycle management | +| State Persistence | ✅ Complete | - | - | Multiple backends | +| Code Extraction | ✅ Complete | - | - | Parse markdown blocks | +| **MCP Code APIs** | ❌ Missing | **Critical** | **High** | Core requirement | +| **Progressive Discovery** | ❌ Missing | **High** | **Medium** | Scalability essential | +| **In-Environment Processing** | ⚠️ Partial | **Critical** | **High** | Token reduction key | +| **Skill Library** | ❌ Missing | **Medium** | **Medium** | Reusability benefit | +| **Code-First Prompts** | ⚠️ Partial | **High** | **Low** | Quick win | +| **Token Optimization** | ⚠️ Partial | **Medium** | **Low** | Metrics important | +| **Workspace Management** | ⚠️ Partial | **Low** | **Low** | Nice to have | + +## Summary Statistics + +- **Complete:** 5/12 (42%) +- **Partial:** 4/12 (33%) +- **Missing:** 3/12 (25%) +- **Overall Readiness:** ~60% + +## Critical Path Items + +To achieve minimum viable implementation: + +1. **MCP Code APIs** (Critical, High Effort) + - Convert MCP tools to importable modules + - Create runtime environment in VMs + - Enable code-based tool usage + +2. **In-Environment Processing** (Critical, High Effort) + - Bridge MCP tools to VM execution + - Process data within VM + - Return only final results + +3. **Code-First Prompts** (High, Low Effort) + - Update agent prompts + - Add code examples + - Optimize for imports + +## Recommended Implementation Order + +### Phase 1: Foundation (4 weeks) +**Goal:** Basic code execution with MCP tools + +1. Create MCP code API layer + - TypeScript/Python wrappers + - Import mechanism + - Runtime in VM + +2. Update code-first prompts + - Add import examples + - Optimize for code generation + - Test with existing agents + +3. Implement in-environment processing + - MCP bridge to VM + - Data transformation utilities + - Result minimization + +**Success Criteria:** +- Agents can import and use MCP tools in code +- Basic workflow achieves >80% token reduction +- Code execution completes in <3 seconds + +### Phase 2: Discovery & Scale (4 weeks) +**Goal:** Support many tools efficiently + +1. Progressive tool discovery + - Tool search API + - Categorization system + - Dynamic documentation + +2. Workspace management + - Structured directories + - File utilities + - Cleanup policies + +3. Token optimization metrics + - Comparison tracking + - Dashboard creation + - Optimization recommendations + +**Success Criteria:** +- Tool discovery <100ms +- Support 100+ tools +- Token reduction metrics visible + +### Phase 3: Skills & Optimization (4 weeks) +**Goal:** Production-ready features + +1. Skill library system + - Storage structure + - SKILL.MD format + - Discovery and search + - Usage tracking + +2. Performance optimization + - Caching and memoization + - Resource pooling + - Load testing + +3. Production hardening + - Monitoring dashboards + - Error handling + - Documentation + +**Success Criteria:** +- Skills reusable across agents +- 98%+ token reduction achieved +- Production deployment ready + +## Next Steps + +1. **Review and approve** this gap analysis +2. **Prioritize** critical path items +3. **Create detailed tasks** for Phase 1 +4. **Assign resources** and timeline +5. **Begin implementation** of MCP Code APIs + +## Appendices + +### A. Code API Example + +Current MCP tool call: +```json +{ + "tool": "search", + "arguments": { + "query": "rust async patterns", + "limit": 10 + } +} +``` + +Desired code-based usage: +```typescript +import { terraphim } from 'mcp-servers'; + +async function analyzePatterns() { + const docs = await terraphim.search({ + query: "rust async patterns", + limit: 100 + }); + + const highQuality = docs.filter(d => d.rank > 0.8); + const byTopic = groupBy(highQuality, 'topic'); + + return { + total: highQuality.length, + topics: Object.keys(byTopic), + top_doc: highQuality[0] + }; +} +``` + +### B. Architecture Diagram + +``` +┌─────────────────────────────────────────────────┐ +│ Agent Layer │ +│ - Generates code instead of tool calls │ +│ - Optimized prompts for imports │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Code Execution Layer │ +│ ┌────────────────────────────────────────┐ │ +│ │ Firecracker VM (existing) │ │ +│ │ ┌──────────────────────────────────┐ │ │ +│ │ │ MCP Runtime (NEW) │ │ │ +│ │ │ - Import MCP tools as modules │ │ │ +│ │ │ - Process data in-environment │ │ │ +│ │ │ - Return minimal results │ │ │ +│ │ └──────────────────────────────────┘ │ │ +│ └────────────────────────────────────────┘ │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ MCP Code API Layer (NEW) │ +│ - TypeScript/Python module wrappers │ +│ - Tool discovery API │ +│ - Documentation generation │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ MCP Server (existing) │ +│ - 17 knowledge graph tools │ +│ - Autocomplete, search, analysis │ +└─────────────────────────────────────────────────┘ +``` + +### C. Token Reduction Calculation + +**Baseline Workflow (Traditional):** +``` +1. Load all tool definitions: 17 tools × 800 tokens = 13,600 tokens +2. Call search: query (200) + results (8,000) = 8,200 tokens +3. Call autocomplete: query (200) + results (5,000) = 5,200 tokens +4. Call find_matches: query (300) + results (10,000) = 10,300 tokens +5. Agent processing and response: 2,000 tokens +Total: 39,300 tokens +``` + +**Code Execution Workflow:** +``` +1. Agent generates code: 1,200 tokens +2. Code executes in VM: + - Calls search (internal, no tokens) + - Calls autocomplete (internal, no tokens) + - Calls find_matches (internal, no tokens) + - Processes data (internal, no tokens) +3. Final result returned: 800 tokens +Total: 2,000 tokens +``` + +**Reduction: 95% (39,300 → 2,000)** + +### D. Implementation Checklist + +**Phase 1: Foundation** +- [ ] Create `crates/terraphim_mcp_codegen/` crate +- [ ] Generate TypeScript wrappers for all 17 MCP tools +- [ ] Generate Python wrappers for all 17 MCP tools +- [ ] Implement MCP runtime in VM environment +- [ ] Add import mechanism to code execution +- [ ] Create code-first prompt templates +- [ ] Update agent code generation logic +- [ ] Implement MCP bridge in VM execution client +- [ ] Add data transformation utilities +- [ ] Test end-to-end workflow +- [ ] Measure token reduction +- [ ] Document new patterns + +**Phase 2: Discovery & Scale** +- [ ] Implement tool search API +- [ ] Create tool categorization system +- [ ] Add capability-based filtering +- [ ] Generate dynamic documentation +- [ ] Create workspace directory structure +- [ ] Implement file management utilities +- [ ] Add cleanup policies +- [ ] Create token comparison metrics +- [ ] Build metrics dashboard +- [ ] Add optimization recommendations +- [ ] Load test with 100+ tools + +**Phase 3: Skills & Optimization** +- [ ] Design skill storage structure +- [ ] Implement SKILL.MD format parser +- [ ] Create skill discovery/search +- [ ] Add usage tracking +- [ ] Implement skill versioning +- [ ] Add caching layer +- [ ] Implement memoization +- [ ] Optimize resource pooling +- [ ] Load test with 1000+ concurrent agents +- [ ] Create monitoring dashboards +- [ ] Write production documentation +- [ ] Security audit and hardening diff --git a/CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md b/CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..f87b57bb --- /dev/null +++ b/CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md @@ -0,0 +1,1127 @@ +# Code Execution with MCP - Implementation Plan + +**Version:** 1.0 +**Date:** 2025-11-15 +**Timeline:** 12 weeks (3 phases × 4 weeks) +**Status:** Ready for Implementation + +## Overview + +This document provides a detailed, actionable implementation plan to add Code Execution with MCP capabilities to Terraphim AI, achieving 98% token reduction and significant performance improvements. + +## Validation Summary + +### Can Terraphim AI Run Agent Execution? + +**Current State: YES ✅ (with limitations)** + +Terraphim AI **can** run agent execution today: +- ✅ Agents can execute code in Firecracker VMs +- ✅ Agents have access to MCP tools via protocol +- ✅ Code extraction and execution pipeline exists +- ✅ Security sandbox operational + +**But NOT optimized for Anthropic's approach:** +- ❌ MCP tools not usable as code imports +- ❌ Data still flows through context window +- ❌ No progressive tool discovery +- ❌ No skill library system + +### What's Needed for Full Implementation? + +**Critical (Must Have):** +1. MCP Code API Layer - Convert MCP tools to importable modules +2. In-VM MCP Runtime - Enable tool usage within code execution +3. Code-First Prompts - Optimize agent prompts for code generation + +**Important (Should Have):** +4. Progressive Tool Discovery - Scale to 100+ tools +5. Token Optimization Metrics - Measure and track improvements + +**Nice to Have:** +6. Skill Library System - Reusable function patterns +7. Workspace Management - Structured file handling + +## Phase 1: Foundation (Weeks 1-4) + +### Goal +Enable basic code execution with MCP tools as importable modules. + +### Milestones + +#### Week 1: MCP Code API Layer Setup + +**Tasks:** +1. Create new crate structure + ```bash + cargo new --lib crates/terraphim_mcp_codegen + ``` + +2. Add dependencies + ```toml + # crates/terraphim_mcp_codegen/Cargo.toml + [dependencies] + serde = { version = "1.0", features = ["derive"] } + serde_json = "1.0" + tokio = { version = "1", features = ["full"] } + terraphim_mcp_server = { path = "../terraphim_mcp_server" } + tera = "1.19" # Template engine + ``` + +3. Design module structure + ``` + crates/terraphim_mcp_codegen/ + ├── src/ + │ ├── lib.rs + │ ├── typescript_gen.rs # TypeScript wrapper generation + │ ├── python_gen.rs # Python wrapper generation + │ ├── runtime.rs # MCP runtime for VMs + │ └── templates/ + │ ├── typescript.tera # TypeScript module template + │ └── python.tera # Python module template + └── Cargo.toml + ``` + +4. Implement tool introspection + ```rust + // crates/terraphim_mcp_codegen/src/lib.rs + pub struct ToolMetadata { + pub name: String, + pub description: String, + pub parameters: Vec, + pub return_type: String, + } + + pub fn introspect_mcp_tools() -> Vec { + // Extract tool metadata from MCP server + } + ``` + +**Deliverables:** +- [ ] `terraphim_mcp_codegen` crate created +- [ ] Tool introspection working +- [ ] Template system configured + +#### Week 2: Generate TypeScript Wrappers + +**Tasks:** +1. Create TypeScript template + ```typescript + // Template: templates/typescript.tera + export interface {{ tool_name | pascal_case }}Params { + {% for param in parameters %} + {{ param.name }}: {{ param.type }}; + {% endfor %} + } + + export async function {{ tool_name | camel_case }}( + params: {{ tool_name | pascal_case }}Params + ): Promise<{{ return_type }}> { + const response = await mcpCall('{{ tool_name }}', params); + return response; + } + ``` + +2. Implement code generator + ```rust + // crates/terraphim_mcp_codegen/src/typescript_gen.rs + pub struct TypeScriptGenerator { + template: tera::Tera, + } + + impl TypeScriptGenerator { + pub fn generate_module(&self, tools: &[ToolMetadata]) -> String { + // Generate TypeScript module from tools + } + } + ``` + +3. Generate wrapper for all 17 tools + ```bash + cargo run --bin mcp-codegen -- \ + --output workspace/mcp-servers/terraphim.ts \ + --format typescript + ``` + +4. Test wrapper in Node.js + ```typescript + import { search, autocompleteTerms } from './terraphim'; + + const results = await search({ + query: "rust async patterns", + limit: 10 + }); + console.log(results); + ``` + +**Deliverables:** +- [ ] TypeScript generator implemented +- [ ] All 17 tools wrapped +- [ ] TypeScript module tested + +#### Week 3: Generate Python Wrappers & MCP Runtime + +**Tasks:** +1. Create Python template + ```python + # Template: templates/python.tera + from typing import Dict, List, Optional + import asyncio + + async def {{ tool_name }}( + {% for param in parameters %} + {{ param.name }}: {{ param.python_type }}, + {% endfor %} + ) -> {{ return_type }}: + """{{ description }}""" + response = await mcp_call('{{ tool_name }}', { + {% for param in parameters %} + '{{ param.name }}': {{ param.name }}, + {% endfor %} + }) + return response + ``` + +2. Implement MCP runtime for VMs + ```rust + // crates/terraphim_mcp_codegen/src/runtime.rs + pub struct McpRuntime { + mcp_client: Arc, + } + + impl McpRuntime { + pub async fn call_tool(&self, name: &str, params: Value) -> Result { + // Forward call to MCP server + } + + pub fn inject_into_vm(&self, vm_id: &str) -> Result<()> { + // Make runtime available in VM + } + } + ``` + +3. Create bridge between VM and MCP + ```rust + // crates/terraphim_multi_agent/src/vm_execution/mcp_bridge.rs + pub struct McpBridge { + runtime: Arc, + } + + impl McpBridge { + pub async fn setup_vm_environment(&self, vm_id: &str) -> Result<()> { + // 1. Generate wrapper modules + // 2. Copy to VM filesystem + // 3. Inject MCP runtime + // 4. Configure imports + } + } + ``` + +**Deliverables:** +- [ ] Python generator implemented +- [ ] MCP runtime created +- [ ] VM-MCP bridge functional + +#### Week 4: Integration & Testing + +**Tasks:** +1. Update agent code generation prompts + ```rust + // crates/terraphim_multi_agent/src/prompts/code_execution.rs + pub const CODE_EXECUTION_SYSTEM_PROMPT: &str = r#" + You are an AI assistant that solves problems by writing code. + + Available MCP tools (import as modules): + ```typescript + import { terraphim } from 'mcp-servers'; + ``` + + Available functions: + - terraphim.search(query, options) + - terraphim.autocompleteTerms(query, limit) + - terraphim.findMatches(text, role) + // ... etc + + When solving problems: + 1. Import only the tools you need + 2. Process data within your code + 3. Return only the final result + 4. Use async/await for all tool calls + + Example: + ```typescript + import { terraphim } from 'mcp-servers'; + + async function analyzeDocuments(topic: string) { + const docs = await terraphim.search({ query: topic, limit: 100 }); + const relevant = docs.filter(d => d.rank > 0.8); + return { + count: relevant.length, + top_doc: relevant[0] + }; + } + ``` + "#; + ``` + +2. Modify agent to prefer code generation + ```rust + // crates/terraphim_multi_agent/src/agent.rs + impl TerraphimAgent { + async fn handle_command(&mut self, command: Command) -> Result { + // 1. Generate code instead of tool calls + let code = self.generate_code(&command).await?; + + // 2. Execute in VM with MCP runtime + let result = self.execute_code_in_vm(code).await?; + + // 3. Return only final result + Ok(result) + } + } + ``` + +3. End-to-end testing + ```rust + #[tokio::test] + async fn test_code_execution_workflow() { + let agent = create_test_agent().await; + + let command = Command::new("Find rust async patterns and summarize"); + + let result = agent.handle_command(command).await.unwrap(); + + assert!(result.token_count < 5000); // Should be much less than traditional + assert!(result.execution_time_ms < 3000); + assert!(result.contains_summary()); + } + ``` + +4. Token usage comparison + ```rust + #[tokio::test] + async fn test_token_reduction() { + let traditional_tokens = measure_traditional_approach().await; + let code_exec_tokens = measure_code_execution_approach().await; + + let reduction = (traditional_tokens - code_exec_tokens) as f64 + / traditional_tokens as f64; + + assert!(reduction > 0.80); // At least 80% reduction + } + ``` + +**Deliverables:** +- [ ] Code-first prompts implemented +- [ ] Agent integration complete +- [ ] End-to-end tests passing +- [ ] Token reduction measured (target: >80%) + +### Phase 1 Success Criteria + +- ✅ Agents can import and use MCP tools in generated code +- ✅ Code executes successfully in Firecracker VMs +- ✅ Token reduction >80% for typical workflows +- ✅ Execution time <3 seconds +- ✅ All 17 MCP tools available as imports + +## Phase 2: Discovery & Scale (Weeks 5-8) + +### Goal +Enable progressive tool discovery and support 100+ tools efficiently. + +### Milestones + +#### Week 5: Tool Discovery API + +**Tasks:** +1. Design tool metadata schema + ```rust + // crates/terraphim_mcp_server/src/discovery.rs + #[derive(Serialize, Deserialize)] + pub struct ToolMetadata { + pub name: String, + pub category: String, + pub capabilities: Vec, + pub description: String, + pub examples: Vec, + pub parameters: Vec, + } + + #[derive(Serialize, Deserialize)] + pub struct ToolSearchQuery { + pub category: Option, + pub capabilities: Option>, + pub keywords: Option>, + } + ``` + +2. Implement tool search + ```rust + pub struct ToolDiscovery { + tools: Vec, + index: SearchIndex, + } + + impl ToolDiscovery { + pub async fn search(&self, query: ToolSearchQuery) -> Vec { + // Search and filter tools + } + + pub async fn get_documentation(&self, tool_name: &str) -> Option { + // Generate markdown documentation + } + } + ``` + +3. Add MCP endpoints + ```rust + // New MCP tools: + // - search_tools(query) + // - get_tool_documentation(name) + // - list_categories() + // - list_capabilities() + ``` + +4. Test tool discovery + ```typescript + import { searchTools, getToolDocs } from 'mcp-servers'; + + const tools = await searchTools({ + category: 'knowledge-graph', + capabilities: ['search', 'autocomplete'] + }); + + const docs = await getToolDocs('terraphim.search'); + ``` + +**Deliverables:** +- [ ] Tool discovery API implemented +- [ ] Search functionality working +- [ ] Documentation generation functional + +#### Week 6: Categorization & Documentation + +**Tasks:** +1. Categorize existing tools + ```rust + // crates/terraphim_mcp_server/src/tool_categories.rs + pub enum ToolCategory { + KnowledgeGraph, + Autocomplete, + TextProcessing, + Configuration, + Analysis, + } + + pub fn categorize_tools() -> HashMap { + HashMap::from([ + ("search", ToolCategory::KnowledgeGraph), + ("autocomplete_terms", ToolCategory::Autocomplete), + ("find_matches", ToolCategory::TextProcessing), + // ... etc + ]) + } + ``` + +2. Generate rich documentation + ```markdown + # terraphim.search + + **Category:** Knowledge Graph + **Capabilities:** search, semantic-matching + + ## Description + Search for documents in the Terraphim knowledge graph using semantic matching. + + ## Parameters + - `query` (string, required): The search query + - `role` (string, optional): Filter by role + - `limit` (number, optional): Maximum results (default: 10) + + ## Returns + Array of Document objects with id, url, body, description, rank. + + ## Example + ```typescript + import { terraphim } from 'mcp-servers'; + + const results = await terraphim.search({ + query: "rust async patterns", + limit: 10 + }); + ``` + + ## See Also + - autocomplete_terms - Get autocomplete suggestions + - find_matches - Find term matches in text + ``` + +3. Implement lazy loading + ```typescript + // Only load tool when first used + class McpProxy { + async search(params) { + if (!this._search) { + this._search = await import('./tools/search'); + } + return this._search.default(params); + } + } + ``` + +**Deliverables:** +- [ ] All tools categorized +- [ ] Rich documentation generated +- [ ] Lazy loading implemented + +#### Week 7: Workspace Management + +**Tasks:** +1. Design workspace structure + ```rust + // crates/terraphim_multi_agent/src/workspace.rs + pub struct Workspace { + root: PathBuf, + agent_id: AgentId, + } + + impl Workspace { + pub fn new(agent_id: AgentId) -> Self { + let root = PathBuf::from(format!("workspace/{}", agent_id)); + fs::create_dir_all(&root).unwrap(); + fs::create_dir_all(root.join("data")).unwrap(); + fs::create_dir_all(root.join("results")).unwrap(); + fs::create_dir_all(root.join("checkpoints")).unwrap(); + fs::create_dir_all(root.join("skills")).unwrap(); + Self { root, agent_id } + } + + pub fn data_dir(&self) -> PathBuf { + self.root.join("data") + } + + pub fn results_dir(&self) -> PathBuf { + self.root.join("results") + } + + pub fn checkpoint(&self, name: &str) -> Result<()> { + // Create checkpoint snapshot + } + + pub fn restore(&self, checkpoint: &str) -> Result<()> { + // Restore from checkpoint + } + } + ``` + +2. Integrate with VM execution + ```rust + impl VmExecutionClient { + pub async fn execute_with_workspace( + &self, + code: &str, + workspace: &Workspace, + ) -> Result { + // 1. Mount workspace in VM + // 2. Execute code + // 3. Persist results to workspace + } + } + ``` + +3. Add file utilities + ```typescript + // Available in VM environment + import { workspace } from 'mcp-runtime'; + + // Save data + await workspace.saveData('analysis.json', data); + + // Load data + const data = await workspace.loadData('analysis.json'); + + // Create checkpoint + await workspace.checkpoint('before-filter'); + + // Restore if needed + await workspace.restore('before-filter'); + ``` + +**Deliverables:** +- [ ] Workspace structure implemented +- [ ] VM integration complete +- [ ] File utilities available + +#### Week 8: Token Optimization Metrics + +**Tasks:** +1. Create metrics tracking + ```rust + // crates/terraphim_multi_agent/src/metrics/code_execution.rs + #[derive(Serialize, Deserialize)] + pub struct ExecutionMetrics { + pub traditional_tokens: u64, + pub code_execution_tokens: u64, + pub reduction_percentage: f64, + pub execution_time_ms: u64, + pub tool_count: usize, + pub code_lines: usize, + } + + pub struct MetricsCollector { + pub fn record_execution(&mut self, metrics: ExecutionMetrics); + pub fn get_statistics(&self) -> ExecutionStatistics; + pub fn compare_approaches(&self) -> ComparisonReport; + } + ``` + +2. Build dashboard + ```rust + // Expose metrics via API + #[get("/api/metrics/code-execution")] + async fn get_code_execution_metrics() -> Json { + // Return aggregated metrics + } + ``` + +3. Add optimization recommendations + ```rust + pub fn analyze_token_usage(metrics: &ExecutionMetrics) -> Vec { + let mut recommendations = Vec::new(); + + if metrics.reduction_percentage < 80.0 { + recommendations.push(Recommendation { + priority: Priority::High, + message: "Consider processing more data in-environment".to_string(), + }); + } + + recommendations + } + ``` + +**Deliverables:** +- [ ] Metrics collection working +- [ ] Dashboard accessible +- [ ] Recommendations generated + +### Phase 2 Success Criteria + +- ✅ Tool discovery <100ms response time +- ✅ Support for 100+ tools without degradation +- ✅ Workspace management functional +- ✅ Token reduction metrics visible +- ✅ Documentation auto-generated for all tools + +## Phase 3: Skills & Production (Weeks 9-12) + +### Goal +Production-ready system with reusable skills and comprehensive monitoring. + +### Milestones + +#### Week 9: Skill Library System + +**Tasks:** +1. Design SKILL.MD format + ```markdown + # SKILL: Knowledge Graph Analysis + + ## Metadata + - **Created:** 2025-11-15 + - **Version:** 1.0 + - **Author:** agent-001 + - **Tags:** knowledge-graph, analysis, connectivity + + ## Description + Analyzes knowledge graph connectivity and generates comprehensive reports. + + ## Function Signature + ```typescript + async function analyzeKnowledgeGraph( + text: string, + options?: AnalysisOptions + ): Promise + ``` + + ## Implementation + ```typescript + import { terraphim } from 'mcp-servers'; + + async function analyzeKnowledgeGraph(text, options = {}) { + const matches = await terraphim.findMatches({ text }); + const connected = await terraphim.isAllTermsConnectedByPath({ text }); + + return { + matched_terms: matches.length, + connectivity: connected, + graph_summary: generateSummary(matches, connected) + }; + } + ``` + + ## Usage History + - **Total Uses:** 42 + - **Success Rate:** 95.2% + - **Avg Execution Time:** 1.8s + - **Last Used:** 2025-11-14 + + ## Examples + ```typescript + const report = await analyzeKnowledgeGraph( + "Rust async patterns with tokio and futures", + { detailed: true } + ); + ``` + ``` + +2. Implement skill storage + ```rust + // crates/terraphim_skills/src/lib.rs + pub struct Skill { + pub metadata: SkillMetadata, + pub code: String, + pub usage_stats: UsageStatistics, + } + + pub struct SkillLibrary { + skills: HashMap, + index: SearchIndex, + } + + impl SkillLibrary { + pub async fn save_skill(&mut self, skill: Skill) -> Result<()>; + pub async fn load_skill(&self, name: &str) -> Option<&Skill>; + pub async fn search_skills(&self, query: &str) -> Vec<&Skill>; + pub async fn record_usage(&mut self, name: &str, success: bool); + } + ``` + +3. Auto-save successful patterns + ```rust + impl TerraphimAgent { + async fn execute_code(&mut self, code: &str) -> Result { + let result = self.vm_client.execute(code).await?; + + if result.success && self.should_save_as_skill(&code) { + let skill = self.extract_skill(code, &result)?; + self.skills.save_skill(skill).await?; + } + + Ok(result) + } + } + ``` + +**Deliverables:** +- [ ] SKILL.MD format defined +- [ ] Skill library implemented +- [ ] Auto-save working +- [ ] Skill search functional + +#### Week 10: Performance Optimization + +**Tasks:** +1. Add caching layer + ```rust + // crates/terraphim_multi_agent/src/cache.rs + pub struct ExecutionCache { + cache: Arc>>, + } + + impl ExecutionCache { + pub async fn get(&self, code_hash: &str) -> Option; + pub async fn set(&self, code_hash: &str, result: CachedResult); + } + ``` + +2. Implement memoization + ```rust + impl VmExecutionClient { + async fn execute_memoized(&self, code: &str) -> Result { + let hash = calculate_code_hash(code); + + if let Some(cached) = self.cache.get(&hash).await { + return Ok(cached.result); + } + + let result = self.execute_uncached(code).await?; + self.cache.set(&hash, CachedResult::new(result.clone())).await; + + Ok(result) + } + } + ``` + +3. Optimize resource pooling + ```rust + pub struct VmPool { + available: Vec, + in_use: HashMap, + config: PoolConfig, + } + + impl VmPool { + pub async fn acquire(&mut self) -> Result { + // Smart allocation with warm VMs + } + + pub async fn release(&mut self, vm: VmInstance) { + // Keep VM warm for reuse + } + } + ``` + +4. Load testing + ```rust + #[tokio::test] + async fn load_test_1000_concurrent_agents() { + let agents = create_test_agents(1000).await; + + let start = Instant::now(); + + let results = futures::future::join_all( + agents.iter().map(|a| a.execute_code(SAMPLE_CODE)) + ).await; + + let duration = start.elapsed(); + + assert!(duration.as_secs() < 30); // Complete in 30s + assert!(results.iter().all(|r| r.is_ok())); + } + ``` + +**Deliverables:** +- [ ] Caching implemented +- [ ] Memoization working +- [ ] Resource pooling optimized +- [ ] Load tests passing + +#### Week 11: Production Hardening + +**Tasks:** +1. Comprehensive error handling + ```rust + #[derive(Error, Debug)] + pub enum CodeExecutionError { + #[error("Code generation failed: {0}")] + GenerationFailed(String), + + #[error("Code validation failed: {0}")] + ValidationFailed(String), + + #[error("VM execution error: {0}")] + ExecutionError(String), + + #[error("MCP tool error: {0}")] + ToolError(String), + + #[error("Timeout after {0}ms")] + Timeout(u64), + } + ``` + +2. Monitoring dashboards + ```rust + // Prometheus metrics + lazy_static! { + static ref CODE_EXECUTIONS: IntCounter = register_int_counter!( + "terraphim_code_executions_total", + "Total code executions" + ).unwrap(); + + static ref EXECUTION_DURATION: Histogram = register_histogram!( + "terraphim_execution_duration_seconds", + "Code execution duration" + ).unwrap(); + + static ref TOKEN_REDUCTION: Histogram = register_histogram!( + "terraphim_token_reduction_percentage", + "Token reduction percentage" + ).unwrap(); + } + ``` + +3. Health checks + ```rust + #[get("/health/code-execution")] + async fn health_check() -> Json { + Json(HealthStatus { + vm_pool_available: check_vm_pool().await, + mcp_server_reachable: check_mcp_server().await, + skill_library_accessible: check_skills().await, + cache_operational: check_cache().await, + }) + } + ``` + +4. Documentation + - [ ] API documentation (rustdoc) + - [ ] User guide + - [ ] Architecture diagrams + - [ ] Troubleshooting guide + +**Deliverables:** +- [ ] Error handling comprehensive +- [ ] Monitoring operational +- [ ] Health checks working +- [ ] Documentation complete + +#### Week 12: Final Testing & Launch + +**Tasks:** +1. End-to-end integration tests + ```rust + #[tokio::test] + async fn test_complete_workflow() { + // 1. Agent receives query + // 2. Generates code + // 3. Discovers tools + // 4. Executes in VM + // 5. Processes data + // 6. Returns result + // 7. Saves skill + // 8. Metrics recorded + } + ``` + +2. Performance benchmarks + ```rust + #[bench] + fn bench_traditional_approach(b: &mut Bencher) { + b.iter(|| execute_traditional_workflow()); + } + + #[bench] + fn bench_code_execution_approach(b: &mut Bencher) { + b.iter(|| execute_code_execution_workflow()); + } + ``` + +3. Security audit + - [ ] Sandbox escape testing + - [ ] Input validation review + - [ ] Access control verification + - [ ] Secrets management audit + +4. Production deployment + - [ ] Canary deployment + - [ ] Gradual rollout + - [ ] Monitor metrics + - [ ] Collect feedback + +**Deliverables:** +- [ ] All tests passing +- [ ] Benchmarks documented +- [ ] Security approved +- [ ] Production deployed + +### Phase 3 Success Criteria + +- ✅ Skills reusable across agents +- ✅ 98%+ token reduction achieved +- ✅ Sub-2 second execution times +- ✅ 1000+ concurrent agents supported +- ✅ 99.9% uptime +- ✅ Production documentation complete +- ✅ Security audit passed + +## Resource Requirements + +### Development Team +- **Senior Rust Engineer** (1 FTE, 12 weeks) + - MCP code API layer + - VM integration + - Performance optimization + +- **Full-Stack Engineer** (0.5 FTE, 12 weeks) + - TypeScript/Python wrappers + - Tool discovery API + - Metrics dashboard + +- **DevOps Engineer** (0.25 FTE, weeks 9-12) + - Deployment infrastructure + - Monitoring setup + - Load testing + +- **Technical Writer** (0.25 FTE, weeks 10-12) + - Documentation + - User guides + - API docs + +### Infrastructure +- **Development Environment** + - 4 vCPUs, 16GB RAM + - Firecracker VMs + - Docker containers + +- **Staging Environment** + - 8 vCPUs, 32GB RAM + - Load testing capacity + - Monitoring stack + +- **Production Rollout** + - Gradual scale-up + - Canary deployment + - Rollback capability + +## Risk Mitigation + +### Technical Risks + +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| Code generation quality | High | Medium | Comprehensive validation, testing, fallback to traditional | +| Sandbox escape | Critical | Low | Multiple isolation layers, security audit, penetration testing | +| Performance degradation | Medium | Low | Caching, pooling, load testing, monitoring | +| Integration complexity | Medium | Medium | Incremental approach, feature flags, rollback plan | + +### Project Risks + +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| Timeline slip | Medium | Medium | Buffer in estimates, weekly progress reviews, adjust scope | +| Resource constraints | High | Low | Early identification, backup resources, vendor support | +| Requirement changes | Medium | Low | Clear spec upfront, change control process | + +## Success Metrics + +### Phase 1 Targets +- Token reduction: >80% +- Execution time: <3s +- Code success rate: >90% +- Test coverage: >85% + +### Phase 2 Targets +- Token reduction: >90% +- Tool discovery: <100ms +- Support 100+ tools +- Documentation coverage: 100% + +### Phase 3 Targets +- Token reduction: >98% +- Execution time: <2s +- Concurrent agents: 1000+ +- Uptime: 99.9% +- Security audit: Passed + +## Next Steps + +1. **Review & Approval** (Week 0) + - [ ] Review specification with stakeholders + - [ ] Approve implementation plan + - [ ] Allocate resources + - [ ] Set up project tracking + +2. **Kickoff** (Week 1, Day 1) + - [ ] Team onboarding + - [ ] Environment setup + - [ ] Create project board + - [ ] First sprint planning + +3. **Ongoing** (Weekly) + - [ ] Sprint planning + - [ ] Daily standups + - [ ] Code reviews + - [ ] Progress tracking + - [ ] Risk assessment + +4. **Launch** (Week 12) + - [ ] Production deployment + - [ ] Monitoring active + - [ ] Documentation published + - [ ] Success metrics tracked + +## Appendices + +### A. File Structure + +``` +terraphim-ai/ +├── crates/ +│ ├── terraphim_mcp_codegen/ # NEW: Code generation +│ │ ├── src/ +│ │ │ ├── lib.rs +│ │ │ ├── typescript_gen.rs +│ │ │ ├── python_gen.rs +│ │ │ ├── runtime.rs +│ │ │ └── templates/ +│ │ └── Cargo.toml +│ ├── terraphim_skills/ # NEW: Skill library +│ │ ├── src/ +│ │ │ ├── lib.rs +│ │ │ ├── storage.rs +│ │ │ └── search.rs +│ │ └── Cargo.toml +│ ├── terraphim_mcp_server/ # UPDATED: Add discovery +│ │ └── src/ +│ │ └── discovery.rs # NEW +│ └── terraphim_multi_agent/ # UPDATED: Code-first +│ └── src/ +│ ├── prompts/ +│ │ └── code_execution.rs # NEW +│ ├── metrics/ +│ │ └── code_execution.rs # NEW +│ ├── workspace.rs # NEW +│ └── vm_execution/ +│ └── mcp_bridge.rs # NEW +├── workspace/ # NEW: Agent workspaces +│ ├── mcp-servers/ +│ │ ├── terraphim.ts # Generated +│ │ └── terraphim.py # Generated +│ └── {agent-id}/ +│ ├── data/ +│ ├── results/ +│ ├── checkpoints/ +│ └── skills/ +├── skills/ # NEW: Global skills +│ └── *.skill.md +└── docs/ + ├── CODE_EXECUTION_WITH_MCP_SPEC.md + ├── CODE_EXECUTION_MCP_GAP_ANALYSIS.md + └── CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md +``` + +### B. Dependencies + +```toml +# New dependencies across crates + +[dependencies] +# Code generation +tera = "1.19" +convert_case = "0.6" + +# Metrics +prometheus = "0.13" + +# Caching +lru = "0.12" + +# Existing (versions may need updates) +tokio = { version = "1", features = ["full"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +``` + +### C. Test Coverage Requirements + +- Unit tests: >85% coverage +- Integration tests: All critical paths +- End-to-end tests: Main workflows +- Load tests: 1000+ concurrent agents +- Security tests: Sandbox, access control + +### D. Deployment Checklist + +- [ ] All tests passing +- [ ] Documentation complete +- [ ] Security audit passed +- [ ] Performance benchmarks met +- [ ] Monitoring configured +- [ ] Rollback plan tested +- [ ] Team trained +- [ ] User guide published +- [ ] Canary deployment successful +- [ ] Production deployment approved diff --git a/CODE_EXECUTION_MCP_SUMMARY.md b/CODE_EXECUTION_MCP_SUMMARY.md new file mode 100644 index 00000000..f727de13 --- /dev/null +++ b/CODE_EXECUTION_MCP_SUMMARY.md @@ -0,0 +1,353 @@ +# Code Execution with MCP - Project Summary + +**Date:** 2025-11-15 +**Status:** Ready for Review +**Implementation Timeline:** 12 weeks (3 phases) + +## Quick Links + +- [Technical Specification](./CODE_EXECUTION_WITH_MCP_SPEC.md) - Full architectural design +- [Gap Analysis](./CODE_EXECUTION_MCP_GAP_ANALYSIS.md) - Current capabilities vs. requirements +- [Implementation Plan](./CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md) - Detailed 12-week roadmap + +## Executive Summary + +This project implements Anthropic's "Code Execution with MCP" approach in Terraphim AI, achieving: +- **98% token reduction** (150K → 2K tokens for complex workflows) +- **Faster execution** (sub-2 second response times) +- **Unlimited tool scaling** (support 100+ tools without degradation) +- **Enhanced privacy** (data processing in sandbox, not context) + +## The Problem + +Traditional AI agent workflows consume massive amounts of tokens: +1. **Load all tool definitions** upfront → 10K-20K tokens +2. **Every tool call** passes results through context → 5K-50K tokens each +3. **Chain multiple calls** → 150K+ tokens total + +This creates: +- ❌ High API costs +- ❌ Increased latency +- ❌ Context window limits +- ❌ Impossible to scale to many tools + +## The Solution + +**Treat MCP servers as code APIs** instead of direct tool calls: + +```typescript +// Instead of: Multiple separate tool calls through context +// Result: Tool def (1K) + Call 1 (8K) + Call 2 (5K) + Call 3 (10K) = 24K tokens + +// Do this: Write code that uses tools programmatically +import { terraphim } from 'mcp-servers'; + +async function analyzeDocuments() { + const docs = await terraphim.search({ query: "rust async", limit: 100 }); + const relevant = docs.filter(d => d.rank > 0.8); + return { count: relevant.length, top: relevant[0] }; +} +// Result: Code (500) + Final result (500) = 1K tokens +``` + +**Benefits:** +- ✅ 98% token reduction +- ✅ Faster execution (parallel processing in code) +- ✅ Better privacy (data stays in sandbox) +- ✅ Unlimited tools (load only what's needed) +- ✅ Reusable skills (save successful patterns) + +## Can Terraphim AI Do This Today? + +### Current Capabilities ✅ + +**YES - Terraphim AI has most infrastructure:** + +1. **✅ Secure Code Execution** + - Firecracker VMs operational + - Sub-2 second boot times + - Python, JavaScript, Bash, Rust support + - Location: `crates/terraphim_multi_agent/src/vm_execution/` + +2. **✅ MCP Server** + - 17 tools available + - Search, autocomplete, analysis + - Location: `crates/terraphim_mcp_server/` + +3. **✅ Agent System** + - Multi-agent coordination + - Lifecycle management + - Location: `crates/terraphim_agent_supervisor/`, `crates/terraphim_multi_agent/` + +4. **✅ State Persistence** + - Multiple storage backends + - Location: `crates/terraphim_persistence/` + +### Missing Capabilities ❌ + +**NO - Three critical components needed:** + +1. **❌ MCP Code APIs** (Critical) + - MCP tools not importable as modules + - Need TypeScript/Python wrappers + - **Effort:** 2 weeks + +2. **❌ In-VM MCP Runtime** (Critical) + - Tools not callable from within VM + - Need bridge between VM and MCP + - **Effort:** 2 weeks + +3. **❌ Progressive Tool Discovery** (Important) + - No tool search/categorization + - No dynamic documentation + - **Effort:** 1 week + +### Overall Assessment + +**Current Readiness: 60%** +- ✅ Infrastructure exists (VMs, MCP, agents) +- ❌ Integration layer missing (code APIs, runtime bridge) +- **Implementation Time: 12 weeks** to production-ready + +## Implementation Overview + +### Phase 1: Foundation (Weeks 1-4) +**Goal:** Basic code execution with MCP tools + +**Key Tasks:** +1. Create MCP code API layer +2. Generate TypeScript/Python wrappers +3. Build MCP runtime for VMs +4. Update agent prompts for code-first approach + +**Deliverables:** +- Agents can import MCP tools in code +- >80% token reduction achieved +- End-to-end workflow functional + +### Phase 2: Discovery & Scale (Weeks 5-8) +**Goal:** Support 100+ tools efficiently + +**Key Tasks:** +1. Implement progressive tool discovery +2. Add workspace management +3. Create token optimization metrics +4. Build documentation system + +**Deliverables:** +- Tool discovery <100ms +- Support 100+ tools +- Metrics dashboard live + +### Phase 3: Skills & Production (Weeks 9-12) +**Goal:** Production-ready with reusable skills + +**Key Tasks:** +1. Build skill library system +2. Performance optimization (caching, pooling) +3. Production hardening (monitoring, docs) +4. Security audit and deployment + +**Deliverables:** +- Skill library functional +- 98%+ token reduction +- 1000+ concurrent agents +- Production deployed + +## Success Metrics + +### Token Efficiency +- **Baseline:** 150K tokens (traditional approach) +- **Target:** 2K tokens (code execution) +- **Reduction:** 98%+ + +### Performance +- **Code Execution:** <2 seconds +- **Tool Discovery:** <100ms +- **End-to-End:** <5 seconds + +### Scalability +- **Tools:** 500+ without degradation +- **Agents:** 1000+ concurrent +- **Uptime:** 99.9% + +### Quality +- **Code Success Rate:** >95% +- **Security:** 0 sandbox escapes +- **Test Coverage:** >85% + +## Resource Requirements + +### Team +- **Senior Rust Engineer:** 1 FTE (12 weeks) +- **Full-Stack Engineer:** 0.5 FTE (12 weeks) +- **DevOps Engineer:** 0.25 FTE (weeks 9-12) +- **Technical Writer:** 0.25 FTE (weeks 10-12) + +### Infrastructure +- Development environment (4 vCPUs, 16GB RAM) +- Staging environment (8 vCPUs, 32GB RAM) +- Firecracker VMs, Docker containers +- Monitoring stack (Prometheus, Grafana) + +## Key Technical Components + +### 1. MCP Code API Layer +**New Crate:** `crates/terraphim_mcp_codegen/` + +Generates TypeScript/Python wrappers for MCP tools: +```typescript +// Auto-generated from MCP server introspection +export async function search(params: SearchParams): Promise { + return await mcpCall('search', params); +} +``` + +### 2. MCP Runtime for VMs +**New Module:** `crates/terraphim_multi_agent/src/vm_execution/mcp_runtime.rs` + +Makes MCP tools available in VM environment: +```rust +pub struct McpRuntime { + mcp_client: Arc, +} + +impl McpRuntime { + pub async fn call_tool(&self, name: &str, params: Value) -> Result; + pub fn inject_into_vm(&self, vm_id: &str) -> Result<()>; +} +``` + +### 3. Code-First Agent Prompts +**New Module:** `crates/terraphim_multi_agent/src/prompts/code_execution.rs` + +Optimized prompts for code generation: +``` +You solve problems by writing code that imports MCP tools. + +Available tools: +import { terraphim } from 'mcp-servers'; + +Example: +async function analyze() { + const docs = await terraphim.search({ query: "...", limit: 100 }); + return docs.filter(d => d.rank > 0.8); +} +``` + +### 4. Skill Library +**New Crate:** `crates/terraphim_skills/` + +Stores reusable code patterns: +```markdown +# SKILL: Knowledge Graph Analysis + +## Function +async function analyzeKnowledgeGraph(text: string): Promise + +## Usage History +- Success Rate: 95% +- Avg Time: 1.8s +``` + +## Risk Assessment + +### Technical Risks +| Risk | Impact | Mitigation | +|------|--------|------------| +| Code generation quality | Medium | Validation, testing, fallback | +| Sandbox escape | High | Multiple isolation layers, audit | +| Performance degradation | Low | Caching, pooling, monitoring | + +### Project Risks +| Risk | Impact | Mitigation | +|------|--------|------------| +| Timeline slip | Medium | Buffer, weekly reviews, scope adjustment | +| Resource constraints | Low | Early identification, backup resources | + +## Comparison: Traditional vs Code Execution + +### Traditional Approach + +``` +User: "Find high-value Salesforce accounts and summarize" + +1. Load all Salesforce tool definitions → 10K tokens +2. Agent calls search_salesforce + - Query: 200 tokens + - Results: 50K rows → 8K tokens +3. Agent calls filter_records + - Query: 200 tokens + - Results: 500 rows → 5K tokens +4. Agent calls create_summary + - Query: 200 tokens + - Summary: 10K tokens + +Total: ~40K tokens +Time: ~8 seconds (multiple round-trips) +``` + +### Code Execution Approach + +``` +User: "Find high-value Salesforce accounts and summarize" + +1. Agent generates code → 1K tokens + ```typescript + import { salesforce } from 'mcp-servers'; + + async function analyze() { + const all = await salesforce.search({ query: "active accounts" }); + const filtered = all.filter(a => a.revenue > 1000000); + return { + count: filtered.length, + top: filtered[0] + }; + } + ``` + +2. Code executes in VM (all processing internal) +3. Final result returned → 500 tokens + +Total: ~2K tokens +Time: ~2 seconds (single execution) +``` + +**Improvement: 95% token reduction, 75% faster** + +## Next Steps + +### 1. Review Phase (Week 0) +- [ ] Review all documentation +- [ ] Approve implementation plan +- [ ] Allocate team resources +- [ ] Set up project tracking + +### 2. Kickoff (Week 1) +- [ ] Team onboarding +- [ ] Environment setup +- [ ] Create `terraphim_mcp_codegen` crate +- [ ] Start TypeScript wrapper generation + +### 3. Ongoing +- [ ] Weekly progress reviews +- [ ] Daily standups +- [ ] Continuous integration +- [ ] Metrics tracking + +### 4. Launch (Week 12) +- [ ] Production deployment +- [ ] Monitoring active +- [ ] Documentation published +- [ ] Success celebration! 🎉 + +## Questions? + +Contact the project team or refer to: +- [Technical Specification](./CODE_EXECUTION_WITH_MCP_SPEC.md) for architecture details +- [Gap Analysis](./CODE_EXECUTION_MCP_GAP_ANALYSIS.md) for capability assessment +- [Implementation Plan](./CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md) for detailed tasks + +--- + +**Ready to implement?** Start with Phase 1, Week 1: Creating the MCP code API layer. diff --git a/CODE_EXECUTION_WITH_MCP_SPEC.md b/CODE_EXECUTION_WITH_MCP_SPEC.md new file mode 100644 index 00000000..5c687e49 --- /dev/null +++ b/CODE_EXECUTION_WITH_MCP_SPEC.md @@ -0,0 +1,554 @@ +# Code Execution with MCP - Technical Specification + +**Version:** 1.0 +**Date:** 2025-11-15 +**Based on:** Anthropic's "Code Execution with MCP" Guide + +## Executive Summary + +This specification defines an architecture where AI agents write code to interact with MCP (Model Context Protocol) servers, reducing token consumption by ~98% (150K → 2K tokens) while improving performance, privacy, and scalability. + +### Core Concept + +**Traditional Approach:** +- Agent uses tool calling API +- Model loads ALL tool definitions upfront +- Model calls tools directly via function calls +- Results pass through context window +- **Problem:** Massive token overhead, latency, limited tool count + +**Code Execution Approach:** +- Agent writes code to interact with tools +- Code imports only needed MCP modules +- Code executes and processes data in sandboxed environment +- Only final results return to model +- **Benefit:** 98%+ token reduction, faster execution, unlimited tools + +## Problem Statement + +### Current Challenges with Traditional Tool Calling + +#### 1. Token Overhead Nightmare +- Every tool definition loaded into context upfront +- Each tool includes: description, parameters, format, return type +- **Example:** 100 tools × ~1,500 tokens = 150,000 tokens before any work + +#### 2. Intermediate Results Problem +- Every tool call result passes through context window +- Chain of 10-30 tool calls creates massive data flow +- Simple data processing consumes thousands of tokens + +#### 3. Impact on Production Systems +- **Cost spirals:** More tokens = higher API bills +- **Latency increases:** More processing time per request +- **Context limits:** Can't add more tools without hitting ceiling +- **Scaling impossible:** Each new tool makes problem worse + +### Quantified Impact + +``` +Traditional workflow: 150,000 tokens +Code execution workflow: 2,000 tokens +Reduction: 98.7% +``` + +## Solution Architecture + +### Overview + +Present MCP servers as **code APIs** rather than direct tool calls. Agents import and use MCP tools programmatically within a secure code execution environment. + +### Key Components + +#### 1. MCP Code API Layer +``` +┌─────────────────────────────────────┐ +│ MCP Servers as Code Modules │ +│ - TypeScript/Python/Rust modules │ +│ - Importable via standard imports │ +│ - Full programmatic access │ +└─────────────────────────────────────┘ +``` + +#### 2. Agent Code Generation +``` +┌─────────────────────────────────────┐ +│ Agent writes code: │ +│ import { salesforce } from 'mcp' │ +│ const data = await salesforce... │ +│ return processedResult │ +└─────────────────────────────────────┘ +``` + +#### 3. Secure Code Execution Environment +``` +┌─────────────────────────────────────┐ +│ Sandbox Environment │ +│ - Resource limits │ +│ - Network isolation │ +│ - Filesystem restrictions │ +│ - Timeout enforcement │ +└─────────────────────────────────────┘ +``` + +#### 4. Result Flow +``` +User Query → Agent generates code → Execute in sandbox → +Process data → Return final result → Agent responds +``` + +### Data Flow Comparison + +**Traditional (150K tokens):** +``` +1. Load all Salesforce tool definitions (10K tokens) +2. Agent calls search_salesforce → Full results through context (50K tokens) +3. Agent processes, calls filter_records → Filtered results through context (30K tokens) +4. Agent calls create_summary → Summary through context (60K tokens) +Total: ~150K tokens +``` + +**Code Execution (2K tokens):** +``` +1. Agent writes single code block (500 tokens) +2. Code executes: search → filter → summarize (all in-environment) +3. Final summary returns to agent (500 tokens) +Total: ~2K tokens +``` + +## Core Benefits + +### 1. Massive Token Efficiency (98%+ reduction) +- Load only needed tools on-demand +- No intermediate results through context +- Single code block replaces multiple tool calls + +### 2. Progressive Tool Discovery +- Browse available tools dynamically +- Search for specific functionality +- Read documentation only when needed +- No need to memorize entire catalog + +### 3. In-Environment Data Processing +- Filter, transform, aggregate within sandbox +- Process 10,000 rows → return 5 relevant ones +- Privacy: sensitive data never enters model context + +### 4. Better Control Flow +- Use loops, conditionals, error handling +- Native programming constructs +- Reduce 50 sequential calls to 1 code execution + +### 5. Privacy Advantages +- Sensitive data stays in execution environment +- Only explicitly returned values visible to model +- Process confidential information safely + +### 6. State Persistence +- Save intermediate results to files +- Resume work across sessions +- Checkpoint progress for long-running tasks + +### 7. Reusable Skills +- Build library of higher-level capabilities +- Document with SKILL.MD files +- Agent references previous work +- Complex operations become single functions + +## Technical Requirements + +### 1. Code Execution Environment + +#### Requirements +- **Sandboxing:** Isolated execution context +- **Resource Limits:** CPU, memory, disk quotas +- **Timeout Enforcement:** Maximum execution time +- **Network Control:** Allow/block specific endpoints +- **Filesystem:** Restricted access, temporary storage +- **Monitoring:** Execution metrics, error tracking + +#### Languages Supported +- Python (primary for data processing) +- JavaScript/TypeScript (MCP native) +- Rust (performance-critical operations) +- Bash (system commands) + +### 2. MCP Code API Interface + +#### Module Structure +```typescript +// Example: MCP server exposed as code module +import { salesforce } from 'mcp-servers'; + +interface SalesforceAPI { + search(query: SearchQuery): Promise; + filter(data: any[], condition: FilterCondition): Promise; + create(record: Record): Promise; + update(id: string, data: Partial): Promise; +} +``` + +#### Discovery API +```typescript +// Progressive tool discovery +import { searchTools, getToolDocs } from 'mcp-runtime'; + +const tools = await searchTools({ + category: 'database', + capabilities: ['read', 'write'] +}); + +const docs = await getToolDocs('salesforce.search'); +``` + +### 3. Agent Code Generation + +#### Code Block Format +```markdown +```typescript +import { salesforce } from 'mcp-servers'; + +async function getSummary() { + const results = await salesforce.search({ + query: "active accounts", + fields: ["name", "revenue", "status"] + }); + + const filtered = results.filter(r => r.revenue > 1000000); + + return { + total: filtered.length, + total_revenue: filtered.reduce((sum, r) => sum + r.revenue, 0), + top_account: filtered.sort((a, b) => b.revenue - a.revenue)[0] + }; +} +``` +``` + +#### Validation +- **Syntax checking** before execution +- **Static analysis** for security issues +- **Import validation** (only allowed modules) +- **API rate limit** enforcement + +### 4. Security Model + +#### Execution Sandbox +- **VM isolation** (Firecracker microVMs) +- **SELinux/AppArmor** policies +- **Seccomp filters** for syscalls +- **Network namespaces** for isolation + +#### Code Restrictions +- No arbitrary network access +- No filesystem access outside workspace +- No subprocess spawning +- No infinite loops (timeout) +- Memory limits enforced + +#### Audit Trail +- Log all code execution +- Track resource usage +- Monitor API calls +- Record data access patterns + +### 5. State Management + +#### Workspace Persistence +``` +workspace/ + ├── data/ # Temporary data files + ├── results/ # Execution results + ├── checkpoints/ # Saved state snapshots + └── skills/ # Reusable skill library +``` + +#### Session Continuity +- Save workspace state between executions +- Resume long-running tasks +- Checkpoint important milestones +- Rollback on errors + +### 6. Skill Library Pattern + +#### SKILL.MD Format +```markdown +# Skill: Salesforce Account Analysis + +## Description +Analyzes Salesforce accounts and generates revenue reports. + +## Function Signature +```typescript +async function analyzeAccounts(options: AnalysisOptions): Promise +``` + +## Example Usage +```typescript +const report = await analyzeAccounts({ + minRevenue: 1000000, + includeInactive: false +}); +``` + +## Dependencies +- mcp-servers/salesforce +- mcp-servers/analytics +``` + +## Implementation Phases + +### Phase 1: Foundation (Weeks 1-4) +**Goal:** Basic code execution with MCP tools + +- [ ] Set up secure code execution environment +- [ ] Create MCP → Code API translation layer +- [ ] Implement basic Python/TypeScript execution +- [ ] Add syntax validation and security checks +- [ ] Create simple tool discovery mechanism + +**Deliverables:** +- Working code execution sandbox +- 3-5 MCP tools exposed as code APIs +- Basic documentation + +### Phase 2: Agent Integration (Weeks 5-8) +**Goal:** Integrate with existing agent system + +- [ ] Modify agent code generation prompts +- [ ] Add code block extraction from responses +- [ ] Implement execution flow in agent lifecycle +- [ ] Create result integration back to agent +- [ ] Add error handling and recovery + +**Deliverables:** +- Agents can generate and execute code +- End-to-end workflow functional +- Error handling complete + +### Phase 3: Tool Expansion (Weeks 9-12) +**Goal:** Scale to many tools + +- [ ] Expose all MCP tools as code APIs +- [ ] Implement progressive tool discovery +- [ ] Add comprehensive documentation generation +- [ ] Create tool search and filtering +- [ ] Optimize for 100+ tools + +**Deliverables:** +- All existing MCP tools available as code +- Tool discovery API +- Searchable documentation + +### Phase 4: Advanced Features (Weeks 13-16) +**Goal:** Production-ready features + +- [ ] Implement skill library system +- [ ] Add state persistence and checkpointing +- [ ] Create workspace management +- [ ] Implement SKILL.MD pattern +- [ ] Add monitoring and metrics + +**Deliverables:** +- Skill library functional +- State persistence working +- Production monitoring + +### Phase 5: Optimization (Weeks 17-20) +**Goal:** Performance and scale + +- [ ] Token usage optimization +- [ ] Execution performance tuning +- [ ] Caching and memoization +- [ ] Resource pooling +- [ ] Load testing and benchmarks + +**Deliverables:** +- 98%+ token reduction achieved +- Sub-second execution times +- 1000+ concurrent agents supported + +## Success Metrics + +### Token Efficiency +- **Target:** 98% reduction in token usage +- **Measurement:** Compare traditional vs code execution workflows +- **Baseline:** 150K tokens → 2K tokens for complex workflows + +### Performance +- **Code Execution Latency:** < 2 seconds for typical workflows +- **Tool Discovery:** < 100ms to find relevant tools +- **End-to-End:** < 5 seconds from query to response + +### Scalability +- **Tool Count:** Support 500+ tools without degradation +- **Concurrent Agents:** 1000+ agents executing code simultaneously +- **Workspace Size:** 100MB per agent workspace + +### Quality +- **Code Success Rate:** > 95% of generated code executes successfully +- **Security:** 0 sandbox escapes, 0 unauthorized access +- **Uptime:** 99.9% availability + +## Risk Analysis + +### Technical Risks + +1. **Code Generation Quality** + - *Risk:* Agent generates invalid or insecure code + - *Mitigation:* Comprehensive validation, static analysis, testing + - *Severity:* Medium + +2. **Sandbox Escape** + - *Risk:* Malicious code breaks out of sandbox + - *Mitigation:* Multiple isolation layers (VM + OS + runtime) + - *Severity:* High + +3. **Performance Degradation** + - *Risk:* Code execution slower than direct tool calls + - *Mitigation:* Async execution, caching, pooling + - *Severity:* Low + +### Operational Risks + +1. **Resource Exhaustion** + - *Risk:* Runaway code consumes all resources + - *Mitigation:* Strict limits, monitoring, auto-termination + - *Severity:* Medium + +2. **Complexity** + - *Risk:* System becomes too complex to maintain + - *Mitigation:* Clear architecture, good documentation + - *Severity:* Medium + +## Alternative Approaches + +### Option 1: Hybrid Model +- Use direct tool calls for simple operations +- Use code execution for complex workflows +- **Trade-off:** More complexity, but gradual migration + +### Option 2: Agent-Specific +- Enable code execution per agent +- Some agents use traditional, some use code +- **Trade-off:** Flexibility, but inconsistent experience + +### Option 3: Tool Streaming +- Stream tool definitions on-demand +- Partial context loading +- **Trade-off:** Still uses more tokens than code execution + +## References + +- [Anthropic: Code Execution with MCP](https://medium.com/ai-software-engineer/anthropic-just-solved-ai-agent-bloat-150k-tokens-down-to-2k-code-execution-with-mcp-8266b8e80301) +- [Model Context Protocol Specification](https://modelcontextprotocol.io/) +- [Terraphim AI Architecture](./CLAUDE.md) + +## Appendices + +### A. Example Code Execution Workflow + +```typescript +// Agent receives query: "Find high-value Salesforce accounts and summarize" + +// Agent generates this code: +import { salesforce } from 'mcp-servers'; + +async function analyzeHighValueAccounts() { + // Search for active accounts + const accounts = await salesforce.search({ + query: "active accounts", + fields: ["name", "revenue", "status", "industry"] + }); + + // Filter high-value accounts (in-environment processing) + const highValue = accounts.filter(acc => acc.revenue > 1000000); + + // Group by industry + const byIndustry = highValue.reduce((groups, acc) => { + const industry = acc.industry || 'Unknown'; + if (!groups[industry]) groups[industry] = []; + groups[industry].push(acc); + return groups; + }, {}); + + // Generate summary + return { + total_accounts: highValue.length, + total_revenue: highValue.reduce((sum, acc) => sum + acc.revenue, 0), + by_industry: Object.entries(byIndustry).map(([industry, accs]) => ({ + industry, + count: accs.length, + revenue: accs.reduce((sum, a) => sum + a.revenue, 0) + })), + top_account: highValue.sort((a, b) => b.revenue - a.revenue)[0] + }; +} + +// Execute +const result = await analyzeHighValueAccounts(); +console.log(JSON.stringify(result, null, 2)); +``` + +**Token Comparison:** +- Traditional: ~150K tokens (all tool defs + intermediate results) +- Code execution: ~2K tokens (code + final result) +- **Reduction: 98.7%** + +### B. Skill Library Example + +```markdown +# SKILL.MD: Database Query Optimization + +## Description +Analyzes and optimizes database queries for performance. + +## Expertise +- Database performance tuning +- Query plan analysis +- Index recommendations + +## Function +```typescript +async function optimizeQuery(query: string, database: string): Promise { + const { db } = await import('mcp-servers/database'); + + // Analyze query plan + const plan = await db.explain(query, database); + + // Identify bottlenecks + const bottlenecks = analyzeBottlenecks(plan); + + // Generate recommendations + const recommendations = generateRecommendations(bottlenecks); + + return { + original_query: query, + estimated_cost: plan.cost, + bottlenecks, + recommendations, + optimized_query: applyOptimizations(query, recommendations) + }; +} +``` + +## Usage History +- Last used: 2025-11-14 +- Success rate: 95% +- Average improvement: 3.2x faster queries +``` + +### C. Security Checklist + +- [ ] VM isolation configured (Firecracker) +- [ ] Resource limits enforced (CPU, memory, disk) +- [ ] Network namespaces isolated +- [ ] Filesystem access restricted +- [ ] Timeout mechanisms active +- [ ] Static analysis for code validation +- [ ] Import whitelist configured +- [ ] Audit logging enabled +- [ ] Monitoring dashboards created +- [ ] Incident response procedures documented +- [ ] Penetration testing completed +- [ ] Security review approved From 9dcddeee81826f751895c043e4280ffadc8a7e39 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 16 Nov 2025 12:16:39 +0000 Subject: [PATCH 2/3] feat: Implement MCP code generation crate for TypeScript/Python wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements Phase 1 of the Code Execution with MCP plan, creating the terraphim_mcp_codegen crate that generates typed wrappers for MCP tools to enable code-based tool usage instead of direct tool calls. New crate features: - terraphim_mcp_codegen: Code generation infrastructure - Tool introspection and metadata extraction - TypeScript code generator with full type definitions - Python code generator with type hints - MCP runtime bridge for JavaScript and Python - CLI tool (mcp-codegen) for generating code packages Generated code includes: - Typed interfaces/classes for all 17 MCP tools - Async/await patterns for tool calls - JSDoc/docstring documentation - Usage examples for each tool - Tool categorization and capability metadata This enables AI agents to write code that imports MCP tools as modules: ```typescript import { terraphim } from 'mcp-servers'; const results = await terraphim.search({ query: "rust patterns" }); const filtered = results.filter(r => r.rank > 0.8); return { count: filtered.length, top: filtered[0] }; ``` Expected outcome: 98% token reduction (150K → 2K tokens for workflows) --- Cargo.lock | 149 +++++- crates/terraphim_mcp_codegen/Cargo.toml | 34 ++ .../terraphim_mcp_codegen/src/bin/codegen.rs | 458 ++++++++++++++++++ .../src/introspection.rs | 360 ++++++++++++++ crates/terraphim_mcp_codegen/src/lib.rs | 389 +++++++++++++++ .../terraphim_mcp_codegen/src/python_gen.rs | 319 ++++++++++++ crates/terraphim_mcp_codegen/src/runtime.rs | 306 ++++++++++++ .../src/typescript_gen.rs | 258 ++++++++++ 8 files changed, 2269 insertions(+), 4 deletions(-) create mode 100644 crates/terraphim_mcp_codegen/Cargo.toml create mode 100644 crates/terraphim_mcp_codegen/src/bin/codegen.rs create mode 100644 crates/terraphim_mcp_codegen/src/introspection.rs create mode 100644 crates/terraphim_mcp_codegen/src/lib.rs create mode 100644 crates/terraphim_mcp_codegen/src/python_gen.rs create mode 100644 crates/terraphim_mcp_codegen/src/runtime.rs create mode 100644 crates/terraphim_mcp_codegen/src/typescript_gen.rs diff --git a/Cargo.lock b/Cargo.lock index c43900d0..c5979f83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -782,6 +782,28 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "chrono-tz" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf 0.11.3", +] + +[[package]] +name = "chrono-tz-build" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +dependencies = [ + "parse-zoneinfo", + "phf 0.11.3", + "phf_codegen 0.11.3", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -1655,6 +1677,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "diff" version = "0.1.13" @@ -2688,6 +2716,17 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags 2.10.0", + "ignore", + "walkdir", +] + [[package]] name = "gloo-timers" version = "0.3.0" @@ -3133,6 +3172,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + [[package]] name = "humantime" version = "2.3.0" @@ -4823,6 +4871,15 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.15" @@ -6017,6 +6074,27 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmcp" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37f2048a81a7ff7e8ef6bc5abced70c3d9114c8f03d85d7aaaafd9fd04f12e9e" +dependencies = [ + "base64 0.22.1", + "chrono", + "futures", + "paste", + "pin-project-lite", + "rmcp-macros 0.2.1", + "schemars 0.8.22", + "serde", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "rmcp" version = "0.6.4" @@ -6035,7 +6113,7 @@ dependencies = [ "pin-project-lite", "process-wrap", "rand 0.9.2", - "rmcp-macros", + "rmcp-macros 0.6.4", "schemars 1.0.4", "serde", "serde_json", @@ -6049,6 +6127,19 @@ dependencies = [ "uuid", ] +[[package]] +name = "rmcp-macros" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72398e694b9f6dbb5de960cf158c8699e6a1854cb5bbaac7de0646b2005763c4" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "serde_json", + "syn 2.0.108", +] + [[package]] name = "rmcp-macros" version = "0.6.4" @@ -6401,6 +6492,7 @@ version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ + "chrono", "dyn-clone", "schemars_derive 0.8.22", "serde", @@ -7028,6 +7120,16 @@ dependencies = [ "parking_lot 0.11.2", ] +[[package]] +name = "slug" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + [[package]] name = "smallvec" version = "1.15.1" @@ -7832,6 +7934,28 @@ dependencies = [ "utf-8", ] +[[package]] +name = "tera" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8004bca281f2d32df3bacd59bc67b312cb4c70cea46cbd79dbe8ac5ed206722" +dependencies = [ + "chrono", + "chrono-tz", + "globwalk", + "humansize", + "lazy_static", + "percent-encoding", + "pest", + "pest_derive", + "rand 0.8.5", + "regex", + "serde", + "serde_json", + "slug", + "unicode-segmentation", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -7859,7 +7983,7 @@ dependencies = [ "lru 0.16.2", "mockall", "portpicker", - "rmcp", + "rmcp 0.6.4", "schemars 0.8.22", "serde", "serde_json", @@ -8220,6 +8344,23 @@ dependencies = [ "uuid", ] +[[package]] +name = "terraphim_mcp_codegen" +version = "0.1.0" +dependencies = [ + "anyhow", + "convert_case 0.6.0", + "rmcp 0.2.1", + "serde", + "serde_json", + "tempfile", + "tera", + "terraphim_config", + "terraphim_mcp_server", + "thiserror 1.0.69", + "tokio", +] + [[package]] name = "terraphim_mcp_server" version = "1.0.0" @@ -8231,7 +8372,7 @@ dependencies = [ "clap", "env_logger 0.11.8", "regex", - "rmcp", + "rmcp 0.6.4", "serde_json", "serial_test", "tempfile", @@ -8268,7 +8409,7 @@ dependencies = [ "mcp-spec", "reqwest 0.12.24", "reqwest-eventsource 0.5.0", - "rmcp", + "rmcp 0.6.4", "scraper", "serde", "serde_json", diff --git a/crates/terraphim_mcp_codegen/Cargo.toml b/crates/terraphim_mcp_codegen/Cargo.toml new file mode 100644 index 00000000..284a164f --- /dev/null +++ b/crates/terraphim_mcp_codegen/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "terraphim_mcp_codegen" +version = "0.1.0" +edition = "2021" +description = "Code generator for MCP server tools - creates TypeScript and Python wrappers" +license = "MIT" + +[dependencies] +# Workspace dependencies +tokio = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +anyhow = { workspace = true } + +# Template engine for code generation +tera = "1.19" + +# Case conversion for naming conventions +convert_case = "0.6" + +# MCP server integration +terraphim_mcp_server = { path = "../terraphim_mcp_server" } +terraphim_config = { path = "../terraphim_config" } + +# Runtime support +rmcp = "0.2" + +[[bin]] +name = "mcp-codegen" +path = "src/bin/codegen.rs" + +[dev-dependencies] +tempfile = "3.10" diff --git a/crates/terraphim_mcp_codegen/src/bin/codegen.rs b/crates/terraphim_mcp_codegen/src/bin/codegen.rs new file mode 100644 index 00000000..f478d1e2 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/bin/codegen.rs @@ -0,0 +1,458 @@ +//! MCP Code Generator CLI +//! +//! Generates TypeScript and Python wrappers for MCP tools. + +use std::path::PathBuf; + +use terraphim_mcp_codegen::{ + generate_code, + runtime::{McpRuntime, RuntimeConfig}, + CodegenConfig, OutputFormat, +}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Parse command line arguments + let args: Vec = std::env::args().collect(); + + if args.len() < 2 { + print_usage(); + std::process::exit(1); + } + + let command = &args[1]; + + match command.as_str() { + "generate" => { + let format = args.get(2).map(|s| s.as_str()).unwrap_or("typescript"); + let output_path = args.get(3).map(PathBuf::from).unwrap_or_else(|| { + if format == "python" || format == "py" { + PathBuf::from("terraphim.py") + } else { + PathBuf::from("terraphim.ts") + } + }); + + generate_wrappers(format, output_path).await?; + } + "package" => { + let output_dir = args + .get(2) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("mcp-runtime")); + + create_package(output_dir).await?; + } + "introspect" => { + introspect_tools().await?; + } + "help" | "--help" | "-h" => { + print_usage(); + } + _ => { + eprintln!("Unknown command: {}", command); + print_usage(); + std::process::exit(1); + } + } + + Ok(()) +} + +fn print_usage() { + eprintln!( + r#" +MCP Code Generator - Generate TypeScript/Python wrappers for MCP tools + +USAGE: + mcp-codegen [options] + +COMMANDS: + generate [format] [output] Generate wrapper code + format: typescript (default), python + output: output file path (default: terraphim.ts or terraphim.py) + + package [output_dir] Create complete code execution package + output_dir: directory for the package (default: mcp-runtime) + + introspect List all available MCP tools + + help Show this help message + +EXAMPLES: + mcp-codegen generate typescript ./workspace/terraphim.ts + mcp-codegen generate python ./workspace/terraphim.py + mcp-codegen package ./workspace/mcp-runtime + mcp-codegen introspect +"# + ); +} + +async fn generate_wrappers(format: &str, output_path: PathBuf) -> Result<(), Box> { + println!("Generating {} wrappers...", format); + + // Create MCP service to introspect tools + let metadata = get_mcp_metadata().await?; + + let output_format: OutputFormat = format.parse().map_err(|e| { + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Invalid format: {}", e), + )) + })?; + + let config = CodegenConfig { + format: output_format, + output_path: output_path.clone(), + module_name: "terraphim".to_string(), + async_functions: true, + include_docs: true, + include_examples: true, + }; + + let code = generate_code(&metadata, &config)?; + + // Ensure parent directory exists + if let Some(parent) = output_path.parent() { + std::fs::create_dir_all(parent)?; + } + + std::fs::write(&output_path, code)?; + + println!("Generated {} tools to {}", metadata.tools.len(), output_path.display()); + println!("\nTools generated:"); + for tool in &metadata.tools { + println!(" - {} ({})", tool.name, tool.category); + } + + Ok(()) +} + +async fn create_package(output_dir: PathBuf) -> Result<(), Box> { + println!("Creating code execution package..."); + + let metadata = get_mcp_metadata().await?; + + // Create both TypeScript and Python wrappers + let ts_config = CodegenConfig { + format: OutputFormat::TypeScript, + module_name: "terraphim".to_string(), + ..Default::default() + }; + + let py_config = CodegenConfig { + format: OutputFormat::Python, + module_name: "terraphim".to_string(), + ..Default::default() + }; + + let ts_code = generate_code(&metadata, &ts_config)?; + let py_code = generate_code(&metadata, &py_config)?; + + // Create runtime configuration + let runtime_config = RuntimeConfig::default(); + let runtime = McpRuntime::new(runtime_config.clone()); + + // Setup directory structure + std::fs::create_dir_all(&output_dir)?; + std::fs::create_dir_all(output_dir.join("typescript"))?; + std::fs::create_dir_all(output_dir.join("python"))?; + + // Write TypeScript package + std::fs::write(output_dir.join("typescript/terraphim.ts"), ts_code)?; + runtime.write_javascript_runtime(&output_dir.join("typescript/runtime.js"))?; + + // Write Python package + std::fs::write(output_dir.join("python/terraphim.py"), py_code)?; + runtime.write_python_runtime(&output_dir.join("python/runtime.py"))?; + + // Write package.json for TypeScript + let package_json = serde_json::json!({ + "name": "terraphim-mcp", + "version": "1.0.0", + "type": "module", + "main": "terraphim.ts", + "dependencies": {} + }); + std::fs::write( + output_dir.join("typescript/package.json"), + serde_json::to_string_pretty(&package_json)?, + )?; + + // Write requirements.txt for Python + std::fs::write( + output_dir.join("python/requirements.txt"), + "aiohttp>=3.8.0\n", + )?; + + // Write README + let readme = format!( + r#"# Terraphim MCP Code Execution Package + +This package contains TypeScript and Python wrappers for {} MCP tools. + +## TypeScript Usage + +```typescript +import {{ terraphim }} from './typescript/terraphim'; +import './typescript/runtime'; + +const results = await terraphim.search({{ query: "rust patterns", limit: 10 }}); +``` + +## Python Usage + +```python +from python.runtime import mcp_call +from python.terraphim import terraphim + +results = await terraphim.search(query="rust patterns", limit=10) +``` + +## Available Tools + +{} +"#, + metadata.tools.len(), + metadata + .tools + .iter() + .map(|t| format!("- **{}**: {}", t.name, t.description)) + .collect::>() + .join("\n") + ); + std::fs::write(output_dir.join("README.md"), readme)?; + + println!( + "Package created at {} with {} tools", + output_dir.display(), + metadata.tools.len() + ); + + Ok(()) +} + +async fn introspect_tools() -> Result<(), Box> { + println!("Introspecting MCP tools...\n"); + + let metadata = get_mcp_metadata().await?; + + println!("Server: {} v{}", metadata.name, metadata.version); + if let Some(desc) = &metadata.description { + println!("Description: {}", desc); + } + println!("\nAvailable Tools ({}):\n", metadata.tools.len()); + + for tool in &metadata.tools { + println!(" {} - {}", tool.name, tool.category); + println!(" {}", tool.description); + if !tool.parameters.is_empty() { + println!(" Parameters:"); + for param in &tool.parameters { + let required = if param.required { "required" } else { "optional" }; + println!( + " - {} ({}): {} [{}]", + param.name, param.json_type, param.description, required + ); + } + } + println!(" Capabilities: {}", tool.capabilities.join(", ")); + println!(); + } + + Ok(()) +} + +async fn get_mcp_metadata( +) -> Result> { + // Build metadata directly from known MCP server tools + // This avoids needing to create a runtime context + use terraphim_mcp_codegen::{ + categorize_tool, extract_capabilities, McpServerMetadata, ParameterMetadata, ToolMetadata, + }; + + let tools = vec![ + create_tool_metadata( + "search", + "Search for documents in the Terraphim knowledge graph", + vec![ + ("query", "string", "The search query", true), + ("role", "string", "Optional role to filter by", false), + ("limit", "integer", "Maximum number of results to return", false), + ("skip", "integer", "Number of results to skip", false), + ], + ), + create_tool_metadata( + "update_config_tool", + "Update the Terraphim configuration", + vec![("config_str", "string", "JSON configuration string", true)], + ), + create_tool_metadata( + "build_autocomplete_index", + "Build FST-based autocomplete index from role's knowledge graph", + vec![("role", "string", "Optional role name to build autocomplete index for", false)], + ), + create_tool_metadata( + "autocomplete_terms", + "Autocomplete terms using FST prefix + fuzzy fallback", + vec![ + ("query", "string", "Prefix or term for suggestions", true), + ("limit", "integer", "Max suggestions (default 10)", false), + ("role", "string", "Optional role name to use for autocomplete", false), + ], + ), + create_tool_metadata( + "autocomplete_with_snippets", + "Autocomplete and return short snippets from matching documents", + vec![ + ("query", "string", "Prefix or term for suggestions with snippets", true), + ("limit", "integer", "Max suggestions (default 10)", false), + ("role", "string", "Optional role name to use for autocomplete", false), + ], + ), + create_tool_metadata( + "fuzzy_autocomplete_search", + "Perform fuzzy autocomplete search using Jaro-Winkler similarity", + vec![ + ("query", "string", "The text to get autocomplete suggestions for", true), + ("similarity", "number", "Minimum similarity threshold (0.0-1.0, default: 0.6)", false), + ("limit", "integer", "Maximum number of suggestions to return (default: 10)", false), + ], + ), + create_tool_metadata( + "fuzzy_autocomplete_search_levenshtein", + "Perform fuzzy autocomplete search using Levenshtein distance", + vec![ + ("query", "string", "The text to get autocomplete suggestions for", true), + ("max_edit_distance", "integer", "Maximum Levenshtein edit distance allowed (default: 2)", false), + ("limit", "integer", "Maximum number of suggestions to return (default: 10)", false), + ], + ), + create_tool_metadata( + "fuzzy_autocomplete_search_jaro_winkler", + "Perform fuzzy autocomplete search using Jaro-Winkler similarity (explicit)", + vec![ + ("query", "string", "The text to get autocomplete suggestions for", true), + ("similarity", "number", "Minimum similarity threshold (0.0-1.0, default: 0.6)", false), + ("limit", "integer", "Maximum number of suggestions to return (default: 10)", false), + ], + ), + create_tool_metadata( + "serialize_autocomplete_index", + "Serialize the current autocomplete index to a base64-encoded string", + vec![], + ), + create_tool_metadata( + "deserialize_autocomplete_index", + "Deserialize an autocomplete index from a base64-encoded string", + vec![("base64_data", "string", "The base64-encoded string of the serialized index", true)], + ), + create_tool_metadata( + "find_matches", + "Find all term matches in text using Aho-Corasick algorithm", + vec![ + ("text", "string", "The text to search in", true), + ("role", "string", "Optional role to filter by", false), + ("return_positions", "boolean", "Whether to return positions (default: false)", false), + ], + ), + create_tool_metadata( + "replace_matches", + "Replace matched terms in text with links using specified format", + vec![ + ("text", "string", "The text to replace terms in", true), + ("role", "string", "Optional role to filter by", false), + ("link_type", "string", "The type of link to use (wiki, html, markdown)", true), + ], + ), + create_tool_metadata( + "extract_paragraphs_from_automata", + "Extract paragraphs containing matched terms from text", + vec![ + ("text", "string", "The text to extract paragraphs from", true), + ("role", "string", "Optional role to filter by", false), + ("include_term", "boolean", "Whether to include the matched term (default: true)", false), + ], + ), + create_tool_metadata( + "json_decode", + "Parse Logseq JSON output using terraphim_automata", + vec![("jsonlines", "string", "The JSON lines string to decode", true)], + ), + create_tool_metadata( + "load_thesaurus", + "Load thesaurus from a local file or remote URL", + vec![("automata_path", "string", "The path to the automata file (local or remote URL)", true)], + ), + create_tool_metadata( + "load_thesaurus_from_json", + "Load thesaurus from a JSON string", + vec![("json_str", "string", "The JSON string to load thesaurus from", true)], + ), + create_tool_metadata( + "is_all_terms_connected_by_path", + "Check if all matched terms in text can be connected by a single path in the knowledge graph", + vec![ + ("text", "string", "The text to check for term connectivity", true), + ("role", "string", "Optional role to use for thesaurus and graph", false), + ], + ), + ]; + + Ok(McpServerMetadata { + name: "terraphim-mcp".to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + tools, + description: Some("Terraphim MCP Server - Knowledge graph search and autocomplete tools".to_string()), + }) +} + +fn create_tool_metadata( + name: &str, + description: &str, + params: Vec<(&str, &str, &str, bool)>, +) -> terraphim_mcp_codegen::ToolMetadata { + use terraphim_mcp_codegen::{ + categorize_tool, extract_capabilities, ParameterMetadata, ToolMetadata, + }; + + let parameters: Vec = params + .into_iter() + .map(|(pname, ptype, pdesc, required)| ParameterMetadata { + name: pname.to_string(), + description: pdesc.to_string(), + json_type: ptype.to_string(), + required, + default_value: None, + array_item_type: None, + object_properties: None, + }) + .collect(); + + let category = categorize_tool(name); + + let mut metadata = ToolMetadata { + name: name.to_string(), + title: None, + description: description.to_string(), + category, + capabilities: vec![], + parameters, + return_type: "Promise".to_string(), + examples: terraphim_mcp_codegen::introspection::generate_examples(name), + }; + + metadata.capabilities = extract_capabilities(&metadata); + metadata +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_generate_wrappers() { + // This would test the wrapper generation + // For now, just a placeholder + } +} diff --git a/crates/terraphim_mcp_codegen/src/introspection.rs b/crates/terraphim_mcp_codegen/src/introspection.rs new file mode 100644 index 00000000..e65ae37d --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/introspection.rs @@ -0,0 +1,360 @@ +//! MCP Server Introspection - Extract tool metadata from MCP servers + +use std::collections::HashMap; +use std::sync::Arc; + +use crate::{ + categorize_tool, extract_capabilities, CodegenError, McpServerMetadata, ParameterMetadata, + Result, ToolMetadata, +}; + +/// Extract metadata from MCP server tool definitions +pub fn extract_server_metadata( + tools: Vec, + server_info: &rmcp::model::ServerInfo, +) -> Result { + let mut tool_metadata = Vec::new(); + + for tool in tools { + let metadata = extract_tool_metadata(tool)?; + tool_metadata.push(metadata); + } + + Ok(McpServerMetadata { + name: server_info.server_info.name.clone(), + version: server_info.server_info.version.clone(), + tools: tool_metadata, + description: server_info.instructions.clone(), + }) +} + +/// Extract metadata from a single MCP tool definition +fn extract_tool_metadata(tool: rmcp::model::Tool) -> Result { + let name = tool.name.to_string(); + + // Extract parameters from input schema + let parameters = extract_parameters_from_schema(&tool.input_schema)?; + + // Determine category and capabilities + let category = categorize_tool(&name); + + // Create base tool metadata + // Note: rmcp::model::Tool has fields: name, description, input_schema, annotations + let mut metadata = ToolMetadata { + name: name.clone(), + title: None, // Not available in rmcp Tool, will derive from name if needed + description: tool.description.map(|s| s.to_string()).unwrap_or_default(), + category, + capabilities: vec![], // Will be filled after creation + parameters, + return_type: "Promise".to_string(), + examples: generate_examples(&name), + }; + + // Extract capabilities based on the metadata + metadata.capabilities = extract_capabilities(&metadata); + + Ok(metadata) +} + +/// Extract parameter metadata from JSON Schema +fn extract_parameters_from_schema( + schema: &Arc>, +) -> Result> { + let mut parameters = Vec::new(); + + // Get required fields + let required_fields: Vec = schema + .get("required") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default(); + + // Get properties + if let Some(properties) = schema.get("properties") { + if let Some(props_obj) = properties.as_object() { + for (param_name, param_schema) in props_obj { + let param = extract_single_parameter(param_name, param_schema, &required_fields)?; + parameters.push(param); + } + } + } + + // Sort parameters: required first, then optional + parameters.sort_by(|a, b| { + match (a.required, b.required) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => a.name.cmp(&b.name), // Alphabetical within same required status + } + }); + + Ok(parameters) +} + +/// Extract a single parameter from its JSON Schema definition +fn extract_single_parameter( + name: &str, + schema: &serde_json::Value, + required_fields: &[String], +) -> Result { + let schema_obj = schema.as_object().ok_or_else(|| { + CodegenError::InvalidSpec(format!("Parameter {} schema is not an object", name)) + })?; + + let json_type = schema_obj + .get("type") + .and_then(|v| v.as_str()) + .unwrap_or("any") + .to_string(); + + let description = schema_obj + .get("description") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let default_value = schema_obj.get("default").cloned(); + + let array_item_type = if json_type == "array" { + schema_obj + .get("items") + .and_then(|v| v.as_object()) + .and_then(|obj| obj.get("type")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + } else { + None + }; + + let object_properties = if json_type == "object" { + schema_obj + .get("properties") + .and_then(|v| v.as_object()) + .map(|props| { + let mut nested_params = HashMap::new(); + for (prop_name, prop_schema) in props { + if let Ok(nested_param) = + extract_single_parameter(prop_name, prop_schema, &[]) + { + nested_params.insert(prop_name.clone(), nested_param); + } + } + nested_params + }) + } else { + None + }; + + Ok(ParameterMetadata { + name: name.to_string(), + description, + json_type, + required: required_fields.contains(&name.to_string()), + default_value, + array_item_type, + object_properties, + }) +} + +/// Generate usage examples for a tool +pub fn generate_examples(tool_name: &str) -> Vec { + match tool_name { + "search" => vec![ + r#" +const results = await terraphim.search({ + query: "rust async patterns", + limit: 10 +}); +console.log(`Found ${results.length} documents`); +"# + .trim() + .to_string(), + ], + "autocomplete_terms" => vec![ + r#" +const suggestions = await terraphim.autocompleteTerms({ + query: "tera", + limit: 5 +}); +suggestions.forEach(s => console.log(s)); +"# + .trim() + .to_string(), + ], + "find_matches" => vec![ + r#" +const matches = await terraphim.findMatches({ + text: "This document discusses async rust patterns with tokio", + returnPositions: true +}); +console.log(`Found ${matches.length} term matches`); +"# + .trim() + .to_string(), + ], + "fuzzy_autocomplete_search" => vec![ + r#" +const suggestions = await terraphim.fuzzyAutocompleteSearch({ + query: "asynch", // typo intentional + similarity: 0.7, + limit: 5 +}); +"# + .trim() + .to_string(), + ], + "replace_matches" => vec![ + r#" +const linkedText = await terraphim.replaceMatches({ + text: "Learn about async rust and tokio patterns", + linkType: "markdown" +}); +// Returns: "Learn about [async rust](url) and [tokio patterns](url)" +"# + .trim() + .to_string(), + ], + "extract_paragraphs_from_automata" => vec![ + r#" +const paragraphs = await terraphim.extractParagraphsFromAutomata({ + text: longDocument, + includeTerm: true +}); +paragraphs.forEach(p => console.log(p.term, p.paragraph)); +"# + .trim() + .to_string(), + ], + "build_autocomplete_index" => vec![ + r#" +await terraphim.buildAutocompleteIndex({ + role: "engineer" +}); +console.log("Index built successfully"); +"# + .trim() + .to_string(), + ], + "is_all_terms_connected_by_path" => vec![ + r#" +const connected = await terraphim.isAllTermsConnectedByPath({ + text: "async programming with tokio runtime" +}); +console.log(`Terms are connected: ${connected}`); +"# + .trim() + .to_string(), + ], + _ => vec![format!( + r#" +const result = await terraphim.{}(params); +console.log(result); +"#, + to_camel_case(tool_name) + ) + .trim() + .to_string()], + } +} + +/// Convert snake_case to camelCase +pub fn to_camel_case(s: &str) -> String { + let mut result = String::new(); + let mut capitalize_next = false; + + for c in s.chars() { + if c == '_' { + capitalize_next = true; + } else if capitalize_next { + result.push(c.to_uppercase().next().unwrap()); + capitalize_next = false; + } else { + result.push(c); + } + } + + result +} + +/// Convert snake_case to PascalCase +pub fn to_pascal_case(s: &str) -> String { + let camel = to_camel_case(s); + if let Some(first) = camel.chars().next() { + format!("{}{}", first.to_uppercase(), &camel[1..]) + } else { + camel + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_to_camel_case() { + assert_eq!(to_camel_case("search"), "search"); + assert_eq!(to_camel_case("autocomplete_terms"), "autocompleteTerms"); + assert_eq!( + to_camel_case("fuzzy_autocomplete_search"), + "fuzzyAutocompleteSearch" + ); + assert_eq!( + to_camel_case("is_all_terms_connected_by_path"), + "isAllTermsConnectedByPath" + ); + } + + #[test] + fn test_to_pascal_case() { + assert_eq!(to_pascal_case("search"), "Search"); + assert_eq!(to_pascal_case("autocomplete_terms"), "AutocompleteTerms"); + assert_eq!( + to_pascal_case("fuzzy_autocomplete_search"), + "FuzzyAutocompleteSearch" + ); + } + + #[test] + fn test_extract_single_parameter_string() { + let schema = serde_json::json!({ + "type": "string", + "description": "The search query" + }); + + let param = extract_single_parameter("query", &schema, &["query".to_string()]).unwrap(); + + assert_eq!(param.name, "query"); + assert_eq!(param.json_type, "string"); + assert_eq!(param.description, "The search query"); + assert!(param.required); + } + + #[test] + fn test_extract_single_parameter_optional() { + let schema = serde_json::json!({ + "type": "integer", + "description": "Maximum results" + }); + + let param = extract_single_parameter("limit", &schema, &[]).unwrap(); + + assert_eq!(param.name, "limit"); + assert_eq!(param.json_type, "integer"); + assert!(!param.required); + } + + #[test] + fn test_generate_examples() { + let examples = generate_examples("search"); + assert!(!examples.is_empty()); + assert!(examples[0].contains("terraphim.search")); + + let fuzzy_examples = generate_examples("fuzzy_autocomplete_search"); + assert!(fuzzy_examples[0].contains("fuzzyAutocompleteSearch")); + } +} diff --git a/crates/terraphim_mcp_codegen/src/lib.rs b/crates/terraphim_mcp_codegen/src/lib.rs new file mode 100644 index 00000000..f9f91a43 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/lib.rs @@ -0,0 +1,389 @@ +//! MCP Code Generator - Generates TypeScript and Python wrappers for MCP tools +//! +//! This crate enables AI agents to use MCP tools as importable code modules, +//! achieving massive token reduction by allowing code-based tool usage instead +//! of traditional tool calling patterns. + +pub mod introspection; +pub mod python_gen; +pub mod runtime; +pub mod typescript_gen; + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use thiserror::Error; + +/// Error types for MCP code generation +#[derive(Error, Debug)] +pub enum CodegenError { + #[error("Template error: {0}")] + Template(#[from] tera::Error), + + #[error("Serialization error: {0}")] + Serialization(#[from] serde_json::Error), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("MCP introspection error: {0}")] + Introspection(String), + + #[error("Invalid tool specification: {0}")] + InvalidSpec(String), + + #[error("Unsupported type: {0}")] + UnsupportedType(String), +} + +pub type Result = std::result::Result; + +/// Metadata for a single MCP tool parameter +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParameterMetadata { + /// Parameter name + pub name: String, + /// Parameter description + pub description: String, + /// JSON Schema type (string, number, integer, boolean, array, object) + pub json_type: String, + /// Whether the parameter is required + pub required: bool, + /// Default value if any + pub default_value: Option, + /// For arrays, the item type + pub array_item_type: Option, + /// For objects, the properties + pub object_properties: Option>, +} + +impl ParameterMetadata { + /// Convert JSON type to TypeScript type + pub fn to_typescript_type(&self) -> String { + match self.json_type.as_str() { + "string" => "string".to_string(), + "number" => "number".to_string(), + "integer" => "number".to_string(), + "boolean" => "boolean".to_string(), + "array" => { + let item_type = self + .array_item_type + .as_deref() + .unwrap_or("any") + .to_string(); + format!("{}[]", item_type) + } + "object" => "Record".to_string(), + "null" => "null".to_string(), + _ => "any".to_string(), + } + } + + /// Convert JSON type to Python type hint + pub fn to_python_type(&self) -> String { + match self.json_type.as_str() { + "string" => "str".to_string(), + "number" => "float".to_string(), + "integer" => "int".to_string(), + "boolean" => "bool".to_string(), + "array" => { + let item_type = self.array_item_type.as_deref().unwrap_or("Any"); + let python_item = match item_type { + "string" => "str", + "number" => "float", + "integer" => "int", + "boolean" => "bool", + _ => "Any", + }; + format!("List[{}]", python_item) + } + "object" => "Dict[str, Any]".to_string(), + "null" => "None".to_string(), + _ => "Any".to_string(), + } + } +} + +/// Metadata for a single MCP tool +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolMetadata { + /// Tool name (e.g., "search", "autocomplete_terms") + pub name: String, + /// Human-readable title + pub title: Option, + /// Tool description + pub description: String, + /// Tool category for discovery + pub category: ToolCategory, + /// Tool capabilities for discovery + pub capabilities: Vec, + /// Input parameters + pub parameters: Vec, + /// Return type description + pub return_type: String, + /// Example usage code + pub examples: Vec, +} + +/// Tool category for progressive discovery +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum ToolCategory { + KnowledgeGraph, + Autocomplete, + TextProcessing, + Configuration, + Analysis, + Serialization, + Other(String), +} + +impl std::fmt::Display for ToolCategory { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ToolCategory::KnowledgeGraph => write!(f, "knowledge-graph"), + ToolCategory::Autocomplete => write!(f, "autocomplete"), + ToolCategory::TextProcessing => write!(f, "text-processing"), + ToolCategory::Configuration => write!(f, "configuration"), + ToolCategory::Analysis => write!(f, "analysis"), + ToolCategory::Serialization => write!(f, "serialization"), + ToolCategory::Other(s) => write!(f, "{}", s), + } + } +} + +/// Complete MCP server metadata for code generation +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct McpServerMetadata { + /// Server name + pub name: String, + /// Server version + pub version: String, + /// All available tools + pub tools: Vec, + /// Server description + pub description: Option, +} + +/// Output format for code generation +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OutputFormat { + TypeScript, + Python, +} + +impl std::str::FromStr for OutputFormat { + type Err = CodegenError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "typescript" | "ts" => Ok(OutputFormat::TypeScript), + "python" | "py" => Ok(OutputFormat::Python), + _ => Err(CodegenError::InvalidSpec(format!( + "Unknown output format: {}", + s + ))), + } + } +} + +/// Configuration for code generation +#[derive(Debug, Clone)] +pub struct CodegenConfig { + /// Output format (TypeScript or Python) + pub format: OutputFormat, + /// Output file path + pub output_path: std::path::PathBuf, + /// Module name to use + pub module_name: String, + /// Whether to generate async functions + pub async_functions: bool, + /// Include documentation comments + pub include_docs: bool, + /// Include usage examples + pub include_examples: bool, +} + +impl Default for CodegenConfig { + fn default() -> Self { + Self { + format: OutputFormat::TypeScript, + output_path: std::path::PathBuf::from("terraphim.ts"), + module_name: "terraphim".to_string(), + async_functions: true, + include_docs: true, + include_examples: true, + } + } +} + +/// Main code generator trait +pub trait CodeGenerator { + /// Generate code for all tools in the metadata + fn generate(&self, metadata: &McpServerMetadata, config: &CodegenConfig) -> Result; + + /// Generate code for a single tool + fn generate_tool(&self, tool: &ToolMetadata, config: &CodegenConfig) -> Result; +} + +/// Generate code based on configuration +pub fn generate_code(metadata: &McpServerMetadata, config: &CodegenConfig) -> Result { + match config.format { + OutputFormat::TypeScript => { + let generator = typescript_gen::TypeScriptGenerator::new()?; + generator.generate(metadata, config) + } + OutputFormat::Python => { + let generator = python_gen::PythonGenerator::new()?; + generator.generate(metadata, config) + } + } +} + +/// Categorize tools based on their names and descriptions +pub fn categorize_tool(tool_name: &str) -> ToolCategory { + match tool_name { + "search" | "find_matches" | "is_all_terms_connected_by_path" => { + ToolCategory::KnowledgeGraph + } + name if name.contains("autocomplete") => ToolCategory::Autocomplete, + "replace_matches" | "extract_paragraphs_from_automata" | "json_decode" => { + ToolCategory::TextProcessing + } + "update_config_tool" => ToolCategory::Configuration, + "load_thesaurus" | "load_thesaurus_from_json" | "build_autocomplete_index" => { + ToolCategory::Analysis + } + name if name.contains("serialize") || name.contains("deserialize") => { + ToolCategory::Serialization + } + _ => ToolCategory::Other("uncategorized".to_string()), + } +} + +/// Extract capabilities from tool metadata +pub fn extract_capabilities(tool: &ToolMetadata) -> Vec { + let mut capabilities = Vec::new(); + + // Based on tool name patterns + if tool.name.contains("search") { + capabilities.push("search".to_string()); + } + if tool.name.contains("autocomplete") { + capabilities.push("autocomplete".to_string()); + capabilities.push("suggestions".to_string()); + } + if tool.name.contains("fuzzy") { + capabilities.push("fuzzy-matching".to_string()); + } + if tool.name.contains("find") || tool.name.contains("match") { + capabilities.push("pattern-matching".to_string()); + } + if tool.name.contains("replace") { + capabilities.push("text-transformation".to_string()); + } + if tool.name.contains("extract") { + capabilities.push("text-extraction".to_string()); + } + if tool.name.contains("load") { + capabilities.push("data-loading".to_string()); + } + if tool.name.contains("serialize") || tool.name.contains("deserialize") { + capabilities.push("serialization".to_string()); + } + if tool.name.contains("config") { + capabilities.push("configuration".to_string()); + } + + // Add read/write based on likely side effects + if tool.name.starts_with("update") || tool.name.starts_with("build") { + capabilities.push("write".to_string()); + } else { + capabilities.push("read".to_string()); + } + + capabilities +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_categorize_tool() { + assert_eq!(categorize_tool("search"), ToolCategory::KnowledgeGraph); + assert_eq!( + categorize_tool("autocomplete_terms"), + ToolCategory::Autocomplete + ); + assert_eq!( + categorize_tool("fuzzy_autocomplete_search"), + ToolCategory::Autocomplete + ); + assert_eq!( + categorize_tool("replace_matches"), + ToolCategory::TextProcessing + ); + assert_eq!( + categorize_tool("update_config_tool"), + ToolCategory::Configuration + ); + assert_eq!( + categorize_tool("serialize_autocomplete_index"), + ToolCategory::Serialization + ); + } + + #[test] + fn test_extract_capabilities() { + let tool = ToolMetadata { + name: "fuzzy_autocomplete_search".to_string(), + title: None, + description: "Fuzzy search".to_string(), + category: ToolCategory::Autocomplete, + capabilities: vec![], + parameters: vec![], + return_type: "string[]".to_string(), + examples: vec![], + }; + + let caps = extract_capabilities(&tool); + assert!(caps.contains(&"autocomplete".to_string())); + assert!(caps.contains(&"fuzzy-matching".to_string())); + assert!(caps.contains(&"read".to_string())); + } + + #[test] + fn test_parameter_type_conversion() { + let param = ParameterMetadata { + name: "items".to_string(), + description: "Array of strings".to_string(), + json_type: "array".to_string(), + required: true, + default_value: None, + array_item_type: Some("string".to_string()), + object_properties: None, + }; + + assert_eq!(param.to_typescript_type(), "string[]"); + assert_eq!(param.to_python_type(), "List[str]"); + } + + #[test] + fn test_output_format_parsing() { + assert_eq!( + "typescript".parse::().unwrap(), + OutputFormat::TypeScript + ); + assert_eq!( + "ts".parse::().unwrap(), + OutputFormat::TypeScript + ); + assert_eq!( + "python".parse::().unwrap(), + OutputFormat::Python + ); + assert_eq!( + "py".parse::().unwrap(), + OutputFormat::Python + ); + } +} diff --git a/crates/terraphim_mcp_codegen/src/python_gen.rs b/crates/terraphim_mcp_codegen/src/python_gen.rs new file mode 100644 index 00000000..0cc033f7 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/python_gen.rs @@ -0,0 +1,319 @@ +//! Python Code Generator for MCP Tools + +use crate::{ + introspection::to_camel_case, CodeGenerator, CodegenConfig, McpServerMetadata, Result, + ToolMetadata, +}; +use tera::{Context, Tera}; + +const PYTHON_MODULE_TEMPLATE: &str = r#" +""" +{{ server_name }} MCP Tools +{{ server_description }} + +Generated automatically from MCP server introspection. +Version: {{ server_version }} + +Usage: + from {{ module_name }} import {{ module_name }} + + results = await {{ module_name }}.search(query="rust patterns") +""" + +from typing import Any, Dict, List, Optional +import asyncio + +# Type alias for MCP call results +McpCallResult = Dict[str, Any] + +# MCP Runtime - connects to actual MCP server +async def mcp_call(tool_name: str, params: Dict[str, Any]) -> McpCallResult: + """Call an MCP tool. This should be injected by the runtime.""" + raise NotImplementedError("mcp_call must be injected by the MCP runtime") + +{% for tool in tools %} +{% if include_docs %} +async def {{ tool.snake_name }}( +{% for param in tool.parameters %} + {{ param.name }}: {% if param.required %}{{ param.python_type }}{% else %}Optional[{{ param.python_type }}] = None{% endif %}, +{% endfor %} +) -> McpCallResult: + """ + {{ tool.description }} + + Category: {{ tool.category }} + Capabilities: {{ tool.capabilities | join(sep=", ") }} + + Args: +{% for param in tool.parameters %} + {{ param.name }}: {{ param.description }}{% if not param.required %} (optional){% endif %} +{% endfor %} + + Returns: + McpCallResult: The result from the MCP server +{% if include_examples %} + + Example: +{% for example in tool.examples %} + {{ example | replace(from="\n", to="\n ") }} +{% endfor %} +{% endif %} + """ +{% else %} +async def {{ tool.snake_name }}( +{% for param in tool.parameters %} + {{ param.name }}: {% if param.required %}{{ param.python_type }}{% else %}Optional[{{ param.python_type }}] = None{% endif %}, +{% endfor %} +) -> McpCallResult: +{% endif %} + params = { +{% for param in tool.parameters %} +{% if param.required %} + "{{ param.name }}": {{ param.name }}, +{% else %} + "{{ param.name }}": {{ param.name }}, +{% endif %} +{% endfor %} + } + # Remove None values for optional parameters + params = {k: v for k, v in params.items() if v is not None} + return await mcp_call("{{ tool.name }}", params) + +{% endfor %} + +# Main module class +class {{ module_name_pascal }}: + """{{ server_name }} MCP Tools API""" + +{% for tool in tools %} + {{ tool.snake_name }} = staticmethod({{ tool.snake_name }}) +{% endfor %} + +# Convenience alias +{{ module_name }} = {{ module_name_pascal }} + +__all__ = [ + "{{ module_name }}", + "{{ module_name_pascal }}", +{% for tool in tools %} + "{{ tool.snake_name }}", +{% endfor %} +] +"#; + +/// Python code generator +pub struct PythonGenerator { + tera: Tera, +} + +impl PythonGenerator { + /// Create a new Python generator + pub fn new() -> Result { + let mut tera = Tera::default(); + tera.add_raw_template("python_module", PYTHON_MODULE_TEMPLATE)?; + + Ok(Self { tera }) + } + + /// Convert camelCase to snake_case + fn to_snake_case(s: &str) -> String { + let mut result = String::new(); + for (i, c) in s.chars().enumerate() { + if c.is_uppercase() { + if i > 0 { + result.push('_'); + } + result.push(c.to_lowercase().next().unwrap()); + } else if c == '_' { + result.push('_'); + } else { + result.push(c); + } + } + result + } + + /// Convert to PascalCase for class names + fn to_pascal_case_python(s: &str) -> String { + s.split('_') + .map(|word| { + let mut chars = word.chars(); + match chars.next() { + None => String::new(), + Some(first) => { + first.to_uppercase().collect::() + chars.as_str().to_lowercase().as_str() + } + } + }) + .collect::() + } +} + +impl CodeGenerator for PythonGenerator { + fn generate(&self, metadata: &McpServerMetadata, config: &CodegenConfig) -> Result { + let mut context = Context::new(); + + // Server info + context.insert("server_name", &metadata.name); + context.insert("server_version", &metadata.version); + context.insert( + "server_description", + metadata.description.as_deref().unwrap_or(""), + ); + context.insert("module_name", &config.module_name); + context.insert( + "module_name_pascal", + &Self::to_pascal_case_python(&config.module_name), + ); + context.insert("include_docs", &config.include_docs); + context.insert("include_examples", &config.include_examples); + + // Transform tools for template + let tools: Vec = metadata + .tools + .iter() + .map(|tool| { + let params: Vec = tool + .parameters + .iter() + .map(|p| { + serde_json::json!({ + "name": Self::to_snake_case(&p.name), + "description": p.description, + "python_type": p.to_python_type(), + "required": p.required, + }) + }) + .collect(); + + // Convert examples to Python style + let python_examples: Vec = tool + .examples + .iter() + .map(|ex| { + ex.replace("await terraphim.", "await ") + .replace("const ", "") + .replace("let ", "") + .replace(";", "") + .replace(" =>", ":") + .replace("console.log", "print") + .replace("${", "{") + .replace("}`", "}") + }) + .collect(); + + serde_json::json!({ + "name": tool.name, + "snake_name": Self::to_snake_case(&tool.name), + "description": tool.description, + "category": tool.category.to_string(), + "capabilities": tool.capabilities, + "parameters": params, + "examples": python_examples, + }) + }) + .collect(); + + context.insert("tools", &tools); + + let rendered = self.tera.render("python_module", &context)?; + + // Clean up extra whitespace + let cleaned = rendered + .lines() + .map(|line| line.trim_end()) + .collect::>() + .join("\n"); + + Ok(cleaned.trim().to_string()) + } + + fn generate_tool(&self, tool: &ToolMetadata, config: &CodegenConfig) -> Result { + // Create single-tool metadata + let metadata = McpServerMetadata { + name: "terraphim".to_string(), + version: "1.0.0".to_string(), + tools: vec![tool.clone()], + description: None, + }; + + self.generate(&metadata, config) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ParameterMetadata, ToolCategory}; + + #[test] + fn test_to_snake_case() { + assert_eq!( + PythonGenerator::to_snake_case("autocomplete_terms"), + "autocomplete_terms" + ); + assert_eq!( + PythonGenerator::to_snake_case("fuzzy_autocomplete_search"), + "fuzzy_autocomplete_search" + ); + assert_eq!(PythonGenerator::to_snake_case("search"), "search"); + } + + #[test] + fn test_to_pascal_case_python() { + assert_eq!( + PythonGenerator::to_pascal_case_python("terraphim"), + "Terraphim" + ); + assert_eq!( + PythonGenerator::to_pascal_case_python("mcp_server"), + "McpServer" + ); + } + + #[test] + fn test_generate_simple_tool() { + let generator = PythonGenerator::new().unwrap(); + + let tool = ToolMetadata { + name: "search".to_string(), + title: Some("Search".to_string()), + description: "Search for documents".to_string(), + category: ToolCategory::KnowledgeGraph, + capabilities: vec!["search".to_string()], + parameters: vec![ + ParameterMetadata { + name: "query".to_string(), + description: "The search query".to_string(), + json_type: "string".to_string(), + required: true, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ParameterMetadata { + name: "limit".to_string(), + description: "Max results".to_string(), + json_type: "integer".to_string(), + required: false, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ], + return_type: "McpCallResult".to_string(), + examples: vec!["results = await search(query=\"test\")".to_string()], + }; + + let config = CodegenConfig { + module_name: "terraphim".to_string(), + ..Default::default() + }; + let code = generator.generate_tool(&tool, &config).unwrap(); + + assert!(code.contains("async def search(")); + assert!(code.contains("query: str,")); + assert!(code.contains("limit: Optional[int] = None,")); + assert!(code.contains("mcp_call(\"search\"")); + } +} diff --git a/crates/terraphim_mcp_codegen/src/runtime.rs b/crates/terraphim_mcp_codegen/src/runtime.rs new file mode 100644 index 00000000..90fa0fe8 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/runtime.rs @@ -0,0 +1,306 @@ +//! MCP Runtime Bridge - Enables code execution environment to call MCP tools +//! +//! This module provides the runtime infrastructure that allows code generated +//! by the TypeScript/Python generators to actually call MCP tools. + +use std::sync::Arc; + +use crate::{CodegenError, Result}; + +/// Configuration for the MCP runtime +#[derive(Debug, Clone)] +pub struct RuntimeConfig { + /// MCP server URL for HTTP transport + pub mcp_server_url: Option, + /// Whether to use stdio transport + pub use_stdio: bool, + /// Timeout for MCP calls in milliseconds + pub timeout_ms: u64, + /// Maximum concurrent calls + pub max_concurrent: usize, +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self { + mcp_server_url: None, + use_stdio: true, + timeout_ms: 30000, + max_concurrent: 10, + } + } +} + +/// MCP Runtime that bridges code execution to MCP servers +pub struct McpRuntime { + config: RuntimeConfig, +} + +impl McpRuntime { + /// Create a new MCP runtime + pub fn new(config: RuntimeConfig) -> Self { + Self { config } + } + + /// Generate JavaScript runtime code that injects the mcpCall function + pub fn generate_javascript_runtime(&self) -> String { + let server_url = self + .config + .mcp_server_url + .as_deref() + .unwrap_or("http://localhost:3001"); + + format!( + r#" +// MCP Runtime Bridge for JavaScript/TypeScript +// This provides the mcpCall function that generated code uses + +const MCP_SERVER_URL = "{}"; +const MCP_TIMEOUT_MS = {}; + +async function mcpCall(toolName, params) {{ + const response = await fetch(`${{MCP_SERVER_URL}}/mcp/tools/call`, {{ + method: 'POST', + headers: {{ + 'Content-Type': 'application/json', + }}, + body: JSON.stringify({{ + jsonrpc: '2.0', + id: Date.now(), + method: 'tools/call', + params: {{ + name: toolName, + arguments: params + }} + }}), + signal: AbortSignal.timeout(MCP_TIMEOUT_MS) + }}); + + if (!response.ok) {{ + throw new Error(`MCP call failed: ${{response.statusText}}`); + }} + + const result = await response.json(); + + if (result.error) {{ + throw new Error(`MCP tool error: ${{result.error.message}}`); + }} + + return result.result; +}} + +// Make mcpCall available globally +globalThis.mcpCall = mcpCall; +"#, + server_url, self.config.timeout_ms + ) + } + + /// Generate Python runtime code that injects the mcp_call function + pub fn generate_python_runtime(&self) -> String { + let server_url = self + .config + .mcp_server_url + .as_deref() + .unwrap_or("http://localhost:3001"); + + format!( + r#" +# MCP Runtime Bridge for Python +# This provides the mcp_call function that generated code uses + +import aiohttp +import json +from typing import Any, Dict + +MCP_SERVER_URL = "{}" +MCP_TIMEOUT_MS = {} + +async def mcp_call(tool_name: str, params: Dict[str, Any]) -> Dict[str, Any]: + """ + Call an MCP tool through the MCP server. + + Args: + tool_name: Name of the tool to call + params: Parameters to pass to the tool + + Returns: + The result from the MCP server + + Raises: + Exception: If the MCP call fails + """ + async with aiohttp.ClientSession() as session: + payload = {{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": {{ + "name": tool_name, + "arguments": params + }} + }} + + timeout = aiohttp.ClientTimeout(total=MCP_TIMEOUT_MS / 1000) + + async with session.post( + f"{{MCP_SERVER_URL}}/mcp/tools/call", + json=payload, + timeout=timeout + ) as response: + if not response.ok: + raise Exception(f"MCP call failed: {{response.status}}") + + result = await response.json() + + if "error" in result: + raise Exception(f"MCP tool error: {{result['error']['message']}}") + + return result.get("result", {{}}) + +# Inject into module namespace +import sys +current_module = sys.modules[__name__] +current_module.mcp_call = mcp_call +"#, + server_url, self.config.timeout_ms + ) + } + + /// Write JavaScript runtime to a file + pub fn write_javascript_runtime(&self, path: &std::path::Path) -> Result<()> { + let runtime_code = self.generate_javascript_runtime(); + std::fs::write(path, runtime_code)?; + Ok(()) + } + + /// Write Python runtime to a file + pub fn write_python_runtime(&self, path: &std::path::Path) -> Result<()> { + let runtime_code = self.generate_python_runtime(); + std::fs::write(path, runtime_code)?; + Ok(()) + } + + /// Setup runtime in a VM environment + pub async fn setup_vm_environment(&self, workspace_path: &std::path::Path) -> Result<()> { + // Create workspace directories + std::fs::create_dir_all(workspace_path.join("mcp-runtime"))?; + + // Write JavaScript runtime + self.write_javascript_runtime(&workspace_path.join("mcp-runtime/runtime.js"))?; + + // Write Python runtime + self.write_python_runtime(&workspace_path.join("mcp-runtime/runtime.py"))?; + + // Write package.json for Node.js + let package_json = serde_json::json!({ + "name": "mcp-runtime", + "version": "1.0.0", + "type": "module", + "main": "runtime.js" + }); + std::fs::write( + workspace_path.join("mcp-runtime/package.json"), + serde_json::to_string_pretty(&package_json)?, + )?; + + Ok(()) + } +} + +/// Builder for creating complete code execution packages +pub struct CodeExecutionPackage { + /// Generated wrapper code (TypeScript or Python) + pub wrapper_code: String, + /// Runtime bridge code + pub runtime_code: String, + /// Configuration + pub config: RuntimeConfig, +} + +impl CodeExecutionPackage { + /// Create a new code execution package for TypeScript + pub fn typescript(wrapper_code: String, config: RuntimeConfig) -> Self { + let runtime = McpRuntime::new(config.clone()); + Self { + wrapper_code, + runtime_code: runtime.generate_javascript_runtime(), + config, + } + } + + /// Create a new code execution package for Python + pub fn python(wrapper_code: String, config: RuntimeConfig) -> Self { + let runtime = McpRuntime::new(config.clone()); + Self { + wrapper_code, + runtime_code: runtime.generate_python_runtime(), + config, + } + } + + /// Write the complete package to a directory + pub fn write_to_directory(&self, dir: &std::path::Path) -> Result<()> { + std::fs::create_dir_all(dir)?; + + // Determine file extensions based on content + let (wrapper_name, runtime_name) = if self.wrapper_code.contains("export async function") { + ("terraphim.ts", "runtime.js") + } else { + ("terraphim.py", "runtime.py") + }; + + std::fs::write(dir.join(wrapper_name), &self.wrapper_code)?; + std::fs::write(dir.join(runtime_name), &self.runtime_code)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_generate_javascript_runtime() { + let config = RuntimeConfig { + mcp_server_url: Some("http://localhost:3001".to_string()), + timeout_ms: 30000, + ..Default::default() + }; + + let runtime = McpRuntime::new(config); + let code = runtime.generate_javascript_runtime(); + + assert!(code.contains("http://localhost:3001")); + assert!(code.contains("async function mcpCall")); + assert!(code.contains("globalThis.mcpCall")); + } + + #[test] + fn test_generate_python_runtime() { + let config = RuntimeConfig { + mcp_server_url: Some("http://localhost:3001".to_string()), + timeout_ms: 30000, + ..Default::default() + }; + + let runtime = McpRuntime::new(config); + let code = runtime.generate_python_runtime(); + + assert!(code.contains("http://localhost:3001")); + assert!(code.contains("async def mcp_call")); + assert!(code.contains("aiohttp")); + } + + #[test] + fn test_code_execution_package() { + let wrapper = "export async function search() {}".to_string(); + let config = RuntimeConfig::default(); + + let package = CodeExecutionPackage::typescript(wrapper, config); + + assert!(package.wrapper_code.contains("export async")); + assert!(package.runtime_code.contains("mcpCall")); + } +} diff --git a/crates/terraphim_mcp_codegen/src/typescript_gen.rs b/crates/terraphim_mcp_codegen/src/typescript_gen.rs new file mode 100644 index 00000000..13a840c9 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/typescript_gen.rs @@ -0,0 +1,258 @@ +//! TypeScript Code Generator for MCP Tools + +use crate::{ + introspection::{to_camel_case, to_pascal_case}, + CodeGenerator, CodegenConfig, McpServerMetadata, Result, ToolMetadata, +}; +use tera::{Context, Tera}; + +const TYPESCRIPT_MODULE_TEMPLATE: &str = r#" +/** + * {{ server_name }} MCP Tools + * {{ server_description }} + * + * Generated automatically from MCP server introspection. + * Version: {{ server_version }} + * + * Usage: + * ```typescript + * import { {{ module_name }} } from './{{ module_name }}'; + * + * const results = await {{ module_name }}.search({ query: "rust patterns" }); + * ``` + */ + +// Runtime type for MCP call results +interface McpCallResult { + content: Array<{ type: string; text?: string; resource?: any }>; + isError?: boolean; +} + +// MCP Runtime - connects to actual MCP server +declare const mcpCall: (toolName: string, params: Record) => Promise; + +{% for tool in tools %} +{% if include_docs %} +/** + * {{ tool.description }} + * + * Category: {{ tool.category }} + * Capabilities: {{ tool.capabilities | join(sep=", ") }} +{% for param in tool.parameters %} + * @param {{ param.name }} - {{ param.description }}{% if not param.required %} (optional){% endif %} +{% endfor %} + * @returns Promise +{% if include_examples %} + * + * @example + * ```typescript +{% for example in tool.examples %} + * {{ example | replace(from="\n", to="\n * ") }} +{% endfor %} + * ``` +{% endif %} + */ +{% endif %} +export interface {{ tool.pascal_name }}Params { +{% for param in tool.parameters %} + {{ param.name }}{% if not param.required %}?{% endif %}: {{ param.typescript_type }}; +{% endfor %} +} + +export async function {{ tool.camel_name }}( + params: {{ tool.pascal_name }}Params +): Promise { + return await mcpCall('{{ tool.name }}', params); +} + +{% endfor %} + +// Main module export +export const {{ module_name }} = { +{% for tool in tools %} + {{ tool.camel_name }}, +{% endfor %} +}; + +// Default export +export default {{ module_name }}; +"#; + +/// TypeScript code generator +pub struct TypeScriptGenerator { + tera: Tera, +} + +impl TypeScriptGenerator { + /// Create a new TypeScript generator + pub fn new() -> Result { + let mut tera = Tera::default(); + tera.add_raw_template("typescript_module", TYPESCRIPT_MODULE_TEMPLATE)?; + + Ok(Self { tera }) + } +} + +impl CodeGenerator for TypeScriptGenerator { + fn generate(&self, metadata: &McpServerMetadata, config: &CodegenConfig) -> Result { + let mut context = Context::new(); + + // Server info + context.insert("server_name", &metadata.name); + context.insert("server_version", &metadata.version); + context.insert( + "server_description", + metadata.description.as_deref().unwrap_or(""), + ); + context.insert("module_name", &config.module_name); + context.insert("include_docs", &config.include_docs); + context.insert("include_examples", &config.include_examples); + + // Transform tools for template + let tools: Vec = metadata + .tools + .iter() + .map(|tool| { + let params: Vec = tool + .parameters + .iter() + .map(|p| { + serde_json::json!({ + "name": p.name, + "description": p.description, + "typescript_type": p.to_typescript_type(), + "required": p.required, + }) + }) + .collect(); + + serde_json::json!({ + "name": tool.name, + "camel_name": to_camel_case(&tool.name), + "pascal_name": to_pascal_case(&tool.name), + "description": tool.description, + "category": tool.category.to_string(), + "capabilities": tool.capabilities, + "parameters": params, + "examples": tool.examples, + }) + }) + .collect(); + + context.insert("tools", &tools); + + let rendered = self.tera.render("typescript_module", &context)?; + + // Clean up extra whitespace + let cleaned = rendered + .lines() + .filter(|line| !line.trim().is_empty() || line.is_empty()) + .collect::>() + .join("\n"); + + Ok(cleaned.trim().to_string()) + } + + fn generate_tool(&self, tool: &ToolMetadata, config: &CodegenConfig) -> Result { + // Create single-tool metadata + let metadata = McpServerMetadata { + name: "terraphim".to_string(), + version: "1.0.0".to_string(), + tools: vec![tool.clone()], + description: None, + }; + + self.generate(&metadata, config) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ParameterMetadata, ToolCategory}; + + #[test] + fn test_generate_simple_tool() { + let generator = TypeScriptGenerator::new().unwrap(); + + let tool = ToolMetadata { + name: "search".to_string(), + title: Some("Search".to_string()), + description: "Search for documents".to_string(), + category: ToolCategory::KnowledgeGraph, + capabilities: vec!["search".to_string()], + parameters: vec![ + ParameterMetadata { + name: "query".to_string(), + description: "The search query".to_string(), + json_type: "string".to_string(), + required: true, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ParameterMetadata { + name: "limit".to_string(), + description: "Max results".to_string(), + json_type: "integer".to_string(), + required: false, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ], + return_type: "Promise".to_string(), + examples: vec!["const r = await terraphim.search({...})".to_string()], + }; + + let config = CodegenConfig::default(); + let code = generator.generate_tool(&tool, &config).unwrap(); + + assert!(code.contains("export interface SearchParams")); + assert!(code.contains("query: string;")); + assert!(code.contains("limit?: number;")); + assert!(code.contains("export async function search")); + assert!(code.contains("mcpCall('search'")); + } + + #[test] + fn test_generate_multiple_tools() { + let generator = TypeScriptGenerator::new().unwrap(); + + let metadata = McpServerMetadata { + name: "terraphim-mcp".to_string(), + version: "0.1.0".to_string(), + tools: vec![ + ToolMetadata { + name: "search".to_string(), + title: None, + description: "Search documents".to_string(), + category: ToolCategory::KnowledgeGraph, + capabilities: vec!["search".to_string()], + parameters: vec![], + return_type: "Promise".to_string(), + examples: vec![], + }, + ToolMetadata { + name: "autocomplete_terms".to_string(), + title: None, + description: "Get suggestions".to_string(), + category: ToolCategory::Autocomplete, + capabilities: vec!["autocomplete".to_string()], + parameters: vec![], + return_type: "Promise".to_string(), + examples: vec![], + }, + ], + description: Some("Terraphim MCP Server".to_string()), + }; + + let config = CodegenConfig::default(); + let code = generator.generate(&metadata, &config).unwrap(); + + assert!(code.contains("export async function search")); + assert!(code.contains("export async function autocompleteTerms")); + assert!(code.contains("search,")); + assert!(code.contains("autocompleteTerms,")); + } +} From bcf33a298ca615dfb79e2f9ad234711c386ddb81 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 16 Nov 2025 12:19:47 +0000 Subject: [PATCH 3/3] feat: Add code-first prompt templates for agent code execution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds optimized prompt templates that guide agents to write code instead of making direct tool calls, enabling the Code Execution with MCP approach for massive token reduction. New features: - Code execution prompts module in terraphim_multi_agent - TypeScript system prompt with complete MCP tool documentation - Python system prompt with type-annotated tool usage - Task analysis for automatic execution mode selection - Anti-patterns guidance to avoid token waste - Examples showing in-environment data processing Key capabilities: - Agents can now be configured for code-first behavior - Prompts emphasize processing data in-environment - Return only minimal results (not raw data) - Parallel execution patterns for efficiency - Error handling and robustness guidelines This completes the foundation for Code Execution with MCP: 1. ✅ MCP code generation (TypeScript/Python wrappers) 2. ✅ Runtime bridge for VM environments 3. ✅ Code-first prompts for agents 4. 🔲 VM integration (next phase) 5. 🔲 End-to-end testing (next phase) Expected token reduction: 98% for complex workflows --- crates/terraphim_multi_agent/src/lib.rs | 1 + .../src/prompts/code_execution.rs | 474 ++++++++++++++++++ .../terraphim_multi_agent/src/prompts/mod.rs | 6 + 3 files changed, 481 insertions(+) create mode 100644 crates/terraphim_multi_agent/src/prompts/code_execution.rs create mode 100644 crates/terraphim_multi_agent/src/prompts/mod.rs diff --git a/crates/terraphim_multi_agent/src/lib.rs b/crates/terraphim_multi_agent/src/lib.rs index 8ddacccb..827c4ff5 100644 --- a/crates/terraphim_multi_agent/src/lib.rs +++ b/crates/terraphim_multi_agent/src/lib.rs @@ -35,6 +35,7 @@ pub mod genai_llm_client; pub mod history; pub mod llm_types; pub mod prompt_sanitizer; +pub mod prompts; pub mod vm_execution; // pub mod llm_client; // Disabled - uses rig-core // pub mod simple_llm_client; // Disabled - uses rig-core diff --git a/crates/terraphim_multi_agent/src/prompts/code_execution.rs b/crates/terraphim_multi_agent/src/prompts/code_execution.rs new file mode 100644 index 00000000..8de44ed1 --- /dev/null +++ b/crates/terraphim_multi_agent/src/prompts/code_execution.rs @@ -0,0 +1,474 @@ +//! Code-first prompt templates for MCP tool usage +//! +//! These prompts optimize agents to generate code that imports and uses MCP tools +//! programmatically, achieving massive token reduction compared to traditional tool calling. + +use std::collections::HashMap; + +/// System prompt for code-first agents using TypeScript +pub const TYPESCRIPT_SYSTEM_PROMPT: &str = r#" +You are an AI assistant that solves problems by writing executable code. + +## Core Approach: Code Over Tool Calls + +Instead of making individual tool calls, you write code that imports and uses MCP tools as modules. +This approach: +- Reduces token usage by 98% (processing 150K tokens down to 2K) +- Processes data efficiently within the code execution environment +- Returns only the final results, not intermediate data + +## Available MCP Tools + +Import tools from the 'mcp-servers' module: + +```typescript +import { terraphim } from 'mcp-servers'; +``` + +### Knowledge Graph Tools +- `terraphim.search({ query, role?, limit?, skip? })` - Search documents in knowledge graph +- `terraphim.findMatches({ text, role?, returnPositions? })` - Find term matches using Aho-Corasick +- `terraphim.isAllTermsConnectedByPath({ text, role? })` - Check if matched terms connect via single path + +### Autocomplete Tools +- `terraphim.autocompleteTerms({ query, limit?, role? })` - Get term suggestions +- `terraphim.autocompleteWithSnippets({ query, limit?, role? })` - Get suggestions with snippets +- `terraphim.fuzzyAutocompleteSearch({ query, similarity?, limit? })` - Fuzzy search with Jaro-Winkler +- `terraphim.fuzzyAutocompleteSearchLevenshtein({ query, maxEditDistance?, limit? })` - Fuzzy with Levenshtein +- `terraphim.buildAutocompleteIndex({ role? })` - Build FST index for role + +### Text Processing Tools +- `terraphim.replaceMatches({ text, role?, linkType })` - Replace matches with links +- `terraphim.extractParagraphsFromAutomata({ text, role?, includeTerm? })` - Extract paragraphs with matches +- `terraphim.jsonDecode({ jsonlines })` - Parse Logseq JSON + +### Configuration & Data Tools +- `terraphim.updateConfigTool({ configStr })` - Update configuration +- `terraphim.loadThesaurus({ automataPath })` - Load thesaurus from file/URL +- `terraphim.loadThesaurusFromJson({ jsonStr })` - Load thesaurus from JSON +- `terraphim.serializeAutocompleteIndex()` - Serialize index to base64 +- `terraphim.deserializeAutocompleteIndex({ base64Data })` - Deserialize index + +## Code Writing Guidelines + +1. **Import only what you need** - Don't load unnecessary tools +2. **Process data in-environment** - Filter, transform, aggregate before returning +3. **Return minimal results** - Only the final answer, not intermediate data +4. **Use async/await** - All tool calls are asynchronous +5. **Handle errors gracefully** - Use try/catch for robustness +6. **Add comments** - Explain your logic for clarity + +## Example: Document Analysis + +User: "Find documents about async Rust patterns and summarize the top results" + +```typescript +import { terraphim } from 'mcp-servers'; + +async function analyzeAsyncRustPatterns() { + // Search for relevant documents + const results = await terraphim.search({ + query: "async rust patterns", + limit: 100 + }); + + // Filter high-quality results (processing in-environment, not through context) + const highQuality = results.filter(doc => doc.rank > 0.7); + + // Group by topic + const byTopic = highQuality.reduce((groups, doc) => { + const topic = extractTopic(doc); // Helper function + if (!groups[topic]) groups[topic] = []; + groups[topic].push(doc); + return groups; + }, {}); + + // Return only the summary, not all the documents + return { + total_found: highQuality.length, + topics: Object.keys(byTopic), + top_documents: highQuality.slice(0, 5).map(d => ({ + title: d.title, + url: d.url, + rank: d.rank + })), + by_topic: Object.entries(byTopic).map(([topic, docs]) => ({ + topic, + count: docs.length, + best_doc: docs[0] + })) + }; +} + +function extractTopic(doc) { + // Simple topic extraction from document + const keywords = doc.tags || []; + return keywords[0] || 'general'; +} + +// Execute and return results +const analysis = await analyzeAsyncRustPatterns(); +console.log(JSON.stringify(analysis, null, 2)); +``` + +## Example: Term Connectivity Analysis + +```typescript +import { terraphim } from 'mcp-servers'; + +async function analyzeConnectivity(text: string) { + // Check if terms are connected in knowledge graph + const connected = await terraphim.isAllTermsConnectedByPath({ text }); + + // Get all matches + const matches = await terraphim.findMatches({ + text, + returnPositions: true + }); + + // Extract relevant paragraphs + const paragraphs = await terraphim.extractParagraphsFromAutomata({ + text, + includeTerm: true + }); + + return { + text_length: text.length, + terms_connected: connected, + match_count: matches.length, + key_paragraphs: paragraphs.length, + connectivity_score: connected ? 1.0 : matches.length > 0 ? 0.5 : 0.0 + }; +} +``` + +## Anti-Patterns to Avoid + +❌ **DON'T** pass large datasets through the result: +```typescript +// BAD - Returns all 1000 documents through context +const docs = await terraphim.search({ limit: 1000 }); +return docs; // Expensive! +``` + +✅ **DO** process and summarize: +```typescript +// GOOD - Returns only summary +const docs = await terraphim.search({ limit: 1000 }); +return { + count: docs.length, + top_5: docs.slice(0, 5) +}; +``` + +❌ **DON'T** make sequential calls when you can batch: +```typescript +// BAD - Multiple calls +const a = await terraphim.search({ query: "topic A" }); +const b = await terraphim.search({ query: "topic B" }); +const c = await terraphim.search({ query: "topic C" }); +``` + +✅ **DO** use concurrent calls: +```typescript +// GOOD - Parallel execution +const [a, b, c] = await Promise.all([ + terraphim.search({ query: "topic A" }), + terraphim.search({ query: "topic B" }), + terraphim.search({ query: "topic C" }) +]); +``` + +When you receive a task, write executable code that solves it efficiently using the MCP tools. +Focus on returning minimal, actionable results. +"#; + +/// System prompt for code-first agents using Python +pub const PYTHON_SYSTEM_PROMPT: &str = r#" +You are an AI assistant that solves problems by writing executable Python code. + +## Core Approach: Code Over Tool Calls + +Instead of making individual tool calls, you write code that imports and uses MCP tools as modules. +This approach: +- Reduces token usage by 98% (processing 150K tokens down to 2K) +- Processes data efficiently within the code execution environment +- Returns only the final results, not intermediate data + +## Available MCP Tools + +Import tools from the terraphim module: + +```python +from terraphim import terraphim +``` + +### Knowledge Graph Tools +- `await terraphim.search(query, role=None, limit=None, skip=None)` - Search documents +- `await terraphim.find_matches(text, role=None, return_positions=None)` - Find term matches +- `await terraphim.is_all_terms_connected_by_path(text, role=None)` - Check connectivity + +### Autocomplete Tools +- `await terraphim.autocomplete_terms(query, limit=None, role=None)` - Get suggestions +- `await terraphim.autocomplete_with_snippets(query, limit=None, role=None)` - With snippets +- `await terraphim.fuzzy_autocomplete_search(query, similarity=None, limit=None)` - Fuzzy search + +### Text Processing Tools +- `await terraphim.replace_matches(text, role=None, link_type=...)` - Replace with links +- `await terraphim.extract_paragraphs_from_automata(text, role=None)` - Extract paragraphs +- `await terraphim.json_decode(jsonlines)` - Parse Logseq JSON + +## Code Writing Guidelines + +1. **Import only what you need** - Don't load unnecessary modules +2. **Process data in-environment** - Filter, transform, aggregate before returning +3. **Return minimal results** - Only the final answer, not intermediate data +4. **Use async/await** - All tool calls are asynchronous +5. **Handle errors gracefully** - Use try/except for robustness +6. **Add comments** - Explain your logic for clarity + +## Example: Document Analysis + +```python +from terraphim import terraphim +import asyncio +from collections import defaultdict + +async def analyze_async_rust_patterns(): + # Search for relevant documents + results = await terraphim.search( + query="async rust patterns", + limit=100 + ) + + # Filter high-quality results (processing in-environment) + high_quality = [doc for doc in results if doc.get('rank', 0) > 0.7] + + # Group by topic + by_topic = defaultdict(list) + for doc in high_quality: + topic = doc.get('tags', ['general'])[0] if doc.get('tags') else 'general' + by_topic[topic].append(doc) + + # Return only the summary + return { + 'total_found': len(high_quality), + 'topics': list(by_topic.keys()), + 'top_documents': [ + {'title': d.get('title'), 'url': d.get('url'), 'rank': d.get('rank')} + for d in high_quality[:5] + ], + 'by_topic': [ + {'topic': topic, 'count': len(docs), 'best_doc': docs[0]} + for topic, docs in by_topic.items() + ] + } + +# Execute +result = asyncio.run(analyze_async_rust_patterns()) +print(result) +``` + +When you receive a task, write executable Python code that solves it efficiently using the MCP tools. +Focus on returning minimal, actionable results. +"#; + +/// Generate a task-specific prompt that includes code execution context +pub fn generate_task_prompt(task: &str, language: &str) -> String { + let system_prompt = match language { + "python" | "py" => PYTHON_SYSTEM_PROMPT, + _ => TYPESCRIPT_SYSTEM_PROMPT, + }; + + format!( + "{}\n\n## Current Task\n\n{}\n\nWrite code to solve this task. \ + Return only the final results needed to answer the question.", + system_prompt, task + ) +} + +/// Wrapper for code execution context +pub struct CodeExecutionPrompt { + pub system_prompt: String, + pub language: String, + pub available_tools: Vec, +} + +impl CodeExecutionPrompt { + /// Create a new TypeScript code execution prompt + pub fn typescript() -> Self { + Self { + system_prompt: TYPESCRIPT_SYSTEM_PROMPT.to_string(), + language: "typescript".to_string(), + available_tools: vec![ + "search".to_string(), + "autocomplete_terms".to_string(), + "autocomplete_with_snippets".to_string(), + "fuzzy_autocomplete_search".to_string(), + "find_matches".to_string(), + "replace_matches".to_string(), + "extract_paragraphs_from_automata".to_string(), + "is_all_terms_connected_by_path".to_string(), + "load_thesaurus".to_string(), + "build_autocomplete_index".to_string(), + ], + } + } + + /// Create a new Python code execution prompt + pub fn python() -> Self { + Self { + system_prompt: PYTHON_SYSTEM_PROMPT.to_string(), + language: "python".to_string(), + available_tools: vec![ + "search".to_string(), + "autocomplete_terms".to_string(), + "autocomplete_with_snippets".to_string(), + "fuzzy_autocomplete_search".to_string(), + "find_matches".to_string(), + "replace_matches".to_string(), + "extract_paragraphs_from_automata".to_string(), + "is_all_terms_connected_by_path".to_string(), + "load_thesaurus".to_string(), + "build_autocomplete_index".to_string(), + ], + } + } + + /// Generate a complete prompt for a specific task + pub fn for_task(&self, task: &str) -> String { + generate_task_prompt(task, &self.language) + } +} + +/// Code execution mode for agents +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum CodeExecutionMode { + /// Traditional tool calling (no code execution) + Traditional, + /// Code-first approach with TypeScript + TypeScript, + /// Code-first approach with Python + Python, + /// Automatic selection based on task + Auto, +} + +impl Default for CodeExecutionMode { + fn default() -> Self { + CodeExecutionMode::Auto + } +} + +/// Analyze a task to determine the best code execution mode +pub fn recommend_execution_mode(task: &str) -> CodeExecutionMode { + let task_lower = task.to_lowercase(); + + // Tasks that benefit from code execution + let code_patterns = [ + "analyze", + "summarize", + "filter", + "group", + "aggregate", + "process", + "transform", + "compare", + "calculate", + "statistics", + "multiple documents", + "batch", + "all documents", + ]; + + // Tasks better suited for traditional approach + let traditional_patterns = [ + "single", + "one document", + "quick lookup", + "simple search", + "what is", + "define", + ]; + + let code_score: i32 = code_patterns + .iter() + .filter(|p| task_lower.contains(*p)) + .count() as i32; + + let traditional_score: i32 = traditional_patterns + .iter() + .filter(|p| task_lower.contains(*p)) + .count() as i32; + + if code_score > traditional_score { + // Prefer TypeScript for most tasks as it's more widely supported + CodeExecutionMode::TypeScript + } else if traditional_score > code_score { + CodeExecutionMode::Traditional + } else { + // Default to code execution for efficiency + CodeExecutionMode::TypeScript + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_typescript_prompt_contains_tools() { + assert!(TYPESCRIPT_SYSTEM_PROMPT.contains("terraphim.search")); + assert!(TYPESCRIPT_SYSTEM_PROMPT.contains("terraphim.findMatches")); + assert!(TYPESCRIPT_SYSTEM_PROMPT.contains("import { terraphim }")); + } + + #[test] + fn test_python_prompt_contains_tools() { + assert!(PYTHON_SYSTEM_PROMPT.contains("terraphim.search")); + assert!(PYTHON_SYSTEM_PROMPT.contains("terraphim.find_matches")); + assert!(PYTHON_SYSTEM_PROMPT.contains("from terraphim import")); + } + + #[test] + fn test_generate_task_prompt() { + let task = "Find documents about Rust async patterns"; + let prompt = generate_task_prompt(task, "typescript"); + + assert!(prompt.contains("Current Task")); + assert!(prompt.contains("Rust async patterns")); + assert!(prompt.contains("terraphim.search")); + } + + #[test] + fn test_recommend_execution_mode() { + let analysis_task = "Analyze all documents about Rust and summarize the key patterns"; + assert_eq!( + recommend_execution_mode(analysis_task), + CodeExecutionMode::TypeScript + ); + + let simple_task = "What is the definition of async?"; + assert_eq!( + recommend_execution_mode(simple_task), + CodeExecutionMode::Traditional + ); + + let batch_task = "Process multiple documents and aggregate results"; + assert_eq!( + recommend_execution_mode(batch_task), + CodeExecutionMode::TypeScript + ); + } + + #[test] + fn test_code_execution_prompt_builder() { + let ts_prompt = CodeExecutionPrompt::typescript(); + assert_eq!(ts_prompt.language, "typescript"); + assert!(ts_prompt.available_tools.contains(&"search".to_string())); + + let py_prompt = CodeExecutionPrompt::python(); + assert_eq!(py_prompt.language, "python"); + assert!(py_prompt.available_tools.contains(&"search".to_string())); + } +} diff --git a/crates/terraphim_multi_agent/src/prompts/mod.rs b/crates/terraphim_multi_agent/src/prompts/mod.rs new file mode 100644 index 00000000..da20b5f4 --- /dev/null +++ b/crates/terraphim_multi_agent/src/prompts/mod.rs @@ -0,0 +1,6 @@ +//! Prompt templates for AI agents +//! +//! This module provides optimized prompts for different agent behaviors, +//! including code-first approaches for MCP tool usage. + +pub mod code_execution;