diff --git a/CODE_EXECUTION_MCP_GAP_ANALYSIS.md b/CODE_EXECUTION_MCP_GAP_ANALYSIS.md new file mode 100644 index 00000000..aa2f573d --- /dev/null +++ b/CODE_EXECUTION_MCP_GAP_ANALYSIS.md @@ -0,0 +1,573 @@ +# Code Execution with MCP - Gap Analysis for Terraphim AI + +**Version:** 1.0 +**Date:** 2025-11-15 +**Status:** Planning + +## Executive Summary + +This document analyzes Terraphim AI's current capabilities against the requirements for implementing Anthropic's Code Execution with MCP approach. Overall assessment: **60% capability exists**, requiring targeted development in specific areas. + +## Current Capabilities ✅ + +### 1. Secure Code Execution Environment +**Status:** ✅ **COMPLETE** + +Terraphim AI has a fully functional VM execution system: + +- **Firecracker VMs:** Sub-2 second boot times +- **VM Pooling:** Efficient resource management +- **Multiple Languages:** Python, JavaScript, Bash, Rust +- **Security:** Sandboxed execution with resource limits +- **Monitoring:** Execution metrics and error tracking + +**Location:** `crates/terraphim_multi_agent/src/vm_execution/` + +**Evidence:** +```rust +// crates/terraphim_multi_agent/src/vm_execution/models.rs +pub struct VmExecutionConfig { + pub enabled: bool, + pub api_base_url: String, + pub vm_pool_size: u32, + pub default_vm_type: String, + pub execution_timeout_ms: u64, + pub allowed_languages: Vec, + // ... more configuration +} +``` + +**Capabilities:** +- Execute code in isolated VMs +- Timeout enforcement +- Resource limits (CPU, memory, disk) +- Command history tracking +- Snapshot and rollback support +- Code validation before execution + +### 2. MCP Server Implementation +**Status:** ✅ **COMPLETE** + +Comprehensive MCP server with 17 tools: + +- **Location:** `crates/terraphim_mcp_server/` +- **Tools Available:** + - search + - autocomplete_terms + - autocomplete_with_snippets + - fuzzy_autocomplete_search + - find_matches + - replace_matches + - extract_paragraphs_from_automata + - load_thesaurus + - build_autocomplete_index + - serialize/deserialize_autocomplete_index + - is_all_terms_connected_by_path + - json_decode + - update_config_tool + +**Evidence:** +```rust +// crates/terraphim_mcp_server/src/lib.rs +impl ServerHandler for McpService { + async fn list_tools(...) -> Result { + // 17 tools exposed via MCP protocol + } +} +``` + +### 3. Agent System +**Status:** ✅ **COMPLETE** + +Multi-agent system with lifecycle management: + +- **Supervisor:** `crates/terraphim_agent_supervisor/` +- **Multi-agent:** `crates/terraphim_multi_agent/` +- **Registry:** `crates/terraphim_agent_registry/` +- **Messaging:** `crates/terraphim_agent_messaging/` + +**Capabilities:** +- Agent lifecycle (init, start, stop, terminate) +- Health checks and monitoring +- Agent supervision and restart policies +- Inter-agent communication +- Task decomposition +- Goal alignment + +### 4. State Persistence +**Status:** ✅ **COMPLETE** + +Comprehensive state management: + +- **Location:** `crates/terraphim_persistence/` +- **Backends:** Memory, DashMap, SQLite, Redb +- **Features:** + - Document storage + - Configuration persistence + - State snapshots + - Versioned memory (agent evolution) + +### 5. Code Extraction +**Status:** ✅ **COMPLETE** + +Extract code blocks from LLM responses: + +- **Location:** `crates/terraphim_multi_agent/src/vm_execution/code_extractor.rs` +- **Features:** + - Markdown code block parsing + - Language detection + - Execution intent detection + - Confidence scoring + +## Missing Capabilities ❌ + +### 1. MCP Servers as Code APIs +**Status:** ❌ **NOT IMPLEMENTED** + +**Current State:** +- MCP tools exposed only via MCP protocol +- Direct tool calling through request/response +- No programmatic import interface + +**Required:** +```typescript +// Need to support this: +import { terraphim } from 'mcp-servers'; + +const results = await terraphim.search({ + query: "rust async patterns", + limit: 10 +}); + +const filtered = results.filter(doc => doc.rank > 0.8); +``` + +**Gap:** +- No TypeScript/Python module wrappers for MCP tools +- No import mechanism in code execution environment +- No module discovery API + +**Location to Implement:** +- New crate: `crates/terraphim_mcp_codegen/` +- Modify: `crates/terraphim_multi_agent/src/vm_execution/` + +### 2. Progressive Tool Discovery +**Status:** ❌ **NOT IMPLEMENTED** + +**Current State:** +- All tools listed via `list_tools()` +- No search or filtering +- No dynamic documentation + +**Required:** +```typescript +// Need to support: +import { searchTools, getToolDocs } from 'mcp-runtime'; + +const tools = await searchTools({ + category: 'knowledge-graph', + capabilities: ['search', 'autocomplete'] +}); + +const docs = await getToolDocs('terraphim.search'); +``` + +**Gap:** +- No tool categorization system +- No tool search functionality +- No dynamic documentation generation +- No capability-based filtering + +**Location to Implement:** +- New module: `crates/terraphim_mcp_server/src/discovery.rs` +- Update: `crates/terraphim_mcp_server/src/lib.rs` + +### 3. In-Environment Data Processing +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- VM execution runs code +- But MCP tools not accessible within VM +- Results still pass through context + +**Required:** +- MCP tools callable from within VM environment +- Data processing happens in VM before returning +- Only final results exit to agent + +**Gap:** +- No MCP runtime in VM environment +- No bridge between VM execution and MCP tools +- No in-VM data transformation utilities + +**Location to Implement:** +- New module: `crates/terraphim_multi_agent/src/vm_execution/mcp_runtime.rs` +- Update: `crates/terraphim_multi_agent/src/vm_execution/client.rs` + +### 4. Skill Library System +**Status:** ❌ **NOT IMPLEMENTED** + +**Current State:** +- No skill storage mechanism +- No SKILL.MD pattern +- No reusable function library + +**Required:** +```markdown +# SKILL.MD: Knowledge Graph Analysis + +## Function +async function analyzeConnectivity(text: string): Promise { + // Reusable skill implementation +} + +## Usage History +- Success rate: 95% +- Average execution time: 1.2s +``` + +**Gap:** +- No skill storage directory structure +- No skill discovery/search +- No usage tracking +- No skill versioning + +**Location to Implement:** +- New crate: `crates/terraphim_skills/` +- Directory: `skills/` in workspace root + +### 5. Agent Code Generation Optimization +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- Agents use LLM for responses +- Code extraction exists +- But not optimized for code-first approach + +**Required:** +- Agents preferentially generate code over tool calls +- Code generation prompts optimized +- Import-based tool usage +- Error handling in code + +**Gap:** +- No code-first prompt templates +- No examples of MCP tool imports in prompts +- No code quality feedback loop + +**Location to Implement:** +- Update: `crates/terraphim_multi_agent/src/agent.rs` +- New: `crates/terraphim_multi_agent/src/prompts/code_execution.rs` + +### 6. Token Usage Optimization +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- Token tracking exists +- Cost tracking exists +- But not optimized for code execution pattern + +**Required:** +- Track token savings from code execution +- Compare traditional vs code approach +- Metrics dashboard + +**Gap:** +- No comparison metrics +- No optimization recommendations +- No A/B testing framework + +**Location to Implement:** +- Update: `crates/terraphim_multi_agent/src/agent.rs` +- New: `crates/terraphim_multi_agent/src/metrics/code_execution.rs` + +### 7. Workspace Management +**Status:** ⚠️ **PARTIAL** + +**Current State:** +- VM execution has temporary storage +- But no structured workspace + +**Required:** +``` +workspace/ + ├── data/ # Temporary data files + ├── results/ # Execution results + ├── checkpoints/ # Saved state snapshots + └── skills/ # Reusable skill library +``` + +**Gap:** +- No workspace directory structure +- No file management utilities +- No cleanup policies + +**Location to Implement:** +- New module: `crates/terraphim_multi_agent/src/workspace.rs` + +## Capability Matrix + +| Requirement | Status | Priority | Effort | Notes | +|------------|--------|----------|--------|-------| +| Secure Code Execution | ✅ Complete | - | - | Firecracker VMs ready | +| MCP Server | ✅ Complete | - | - | 17 tools available | +| Agent System | ✅ Complete | - | - | Full lifecycle management | +| State Persistence | ✅ Complete | - | - | Multiple backends | +| Code Extraction | ✅ Complete | - | - | Parse markdown blocks | +| **MCP Code APIs** | ❌ Missing | **Critical** | **High** | Core requirement | +| **Progressive Discovery** | ❌ Missing | **High** | **Medium** | Scalability essential | +| **In-Environment Processing** | ⚠️ Partial | **Critical** | **High** | Token reduction key | +| **Skill Library** | ❌ Missing | **Medium** | **Medium** | Reusability benefit | +| **Code-First Prompts** | ⚠️ Partial | **High** | **Low** | Quick win | +| **Token Optimization** | ⚠️ Partial | **Medium** | **Low** | Metrics important | +| **Workspace Management** | ⚠️ Partial | **Low** | **Low** | Nice to have | + +## Summary Statistics + +- **Complete:** 5/12 (42%) +- **Partial:** 4/12 (33%) +- **Missing:** 3/12 (25%) +- **Overall Readiness:** ~60% + +## Critical Path Items + +To achieve minimum viable implementation: + +1. **MCP Code APIs** (Critical, High Effort) + - Convert MCP tools to importable modules + - Create runtime environment in VMs + - Enable code-based tool usage + +2. **In-Environment Processing** (Critical, High Effort) + - Bridge MCP tools to VM execution + - Process data within VM + - Return only final results + +3. **Code-First Prompts** (High, Low Effort) + - Update agent prompts + - Add code examples + - Optimize for imports + +## Recommended Implementation Order + +### Phase 1: Foundation (4 weeks) +**Goal:** Basic code execution with MCP tools + +1. Create MCP code API layer + - TypeScript/Python wrappers + - Import mechanism + - Runtime in VM + +2. Update code-first prompts + - Add import examples + - Optimize for code generation + - Test with existing agents + +3. Implement in-environment processing + - MCP bridge to VM + - Data transformation utilities + - Result minimization + +**Success Criteria:** +- Agents can import and use MCP tools in code +- Basic workflow achieves >80% token reduction +- Code execution completes in <3 seconds + +### Phase 2: Discovery & Scale (4 weeks) +**Goal:** Support many tools efficiently + +1. Progressive tool discovery + - Tool search API + - Categorization system + - Dynamic documentation + +2. Workspace management + - Structured directories + - File utilities + - Cleanup policies + +3. Token optimization metrics + - Comparison tracking + - Dashboard creation + - Optimization recommendations + +**Success Criteria:** +- Tool discovery <100ms +- Support 100+ tools +- Token reduction metrics visible + +### Phase 3: Skills & Optimization (4 weeks) +**Goal:** Production-ready features + +1. Skill library system + - Storage structure + - SKILL.MD format + - Discovery and search + - Usage tracking + +2. Performance optimization + - Caching and memoization + - Resource pooling + - Load testing + +3. Production hardening + - Monitoring dashboards + - Error handling + - Documentation + +**Success Criteria:** +- Skills reusable across agents +- 98%+ token reduction achieved +- Production deployment ready + +## Next Steps + +1. **Review and approve** this gap analysis +2. **Prioritize** critical path items +3. **Create detailed tasks** for Phase 1 +4. **Assign resources** and timeline +5. **Begin implementation** of MCP Code APIs + +## Appendices + +### A. Code API Example + +Current MCP tool call: +```json +{ + "tool": "search", + "arguments": { + "query": "rust async patterns", + "limit": 10 + } +} +``` + +Desired code-based usage: +```typescript +import { terraphim } from 'mcp-servers'; + +async function analyzePatterns() { + const docs = await terraphim.search({ + query: "rust async patterns", + limit: 100 + }); + + const highQuality = docs.filter(d => d.rank > 0.8); + const byTopic = groupBy(highQuality, 'topic'); + + return { + total: highQuality.length, + topics: Object.keys(byTopic), + top_doc: highQuality[0] + }; +} +``` + +### B. Architecture Diagram + +``` +┌─────────────────────────────────────────────────┐ +│ Agent Layer │ +│ - Generates code instead of tool calls │ +│ - Optimized prompts for imports │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ Code Execution Layer │ +│ ┌────────────────────────────────────────┐ │ +│ │ Firecracker VM (existing) │ │ +│ │ ┌──────────────────────────────────┐ │ │ +│ │ │ MCP Runtime (NEW) │ │ │ +│ │ │ - Import MCP tools as modules │ │ │ +│ │ │ - Process data in-environment │ │ │ +│ │ │ - Return minimal results │ │ │ +│ │ └──────────────────────────────────┘ │ │ +│ └────────────────────────────────────────┘ │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ MCP Code API Layer (NEW) │ +│ - TypeScript/Python module wrappers │ +│ - Tool discovery API │ +│ - Documentation generation │ +└─────────────────┬───────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────┐ +│ MCP Server (existing) │ +│ - 17 knowledge graph tools │ +│ - Autocomplete, search, analysis │ +└─────────────────────────────────────────────────┘ +``` + +### C. Token Reduction Calculation + +**Baseline Workflow (Traditional):** +``` +1. Load all tool definitions: 17 tools × 800 tokens = 13,600 tokens +2. Call search: query (200) + results (8,000) = 8,200 tokens +3. Call autocomplete: query (200) + results (5,000) = 5,200 tokens +4. Call find_matches: query (300) + results (10,000) = 10,300 tokens +5. Agent processing and response: 2,000 tokens +Total: 39,300 tokens +``` + +**Code Execution Workflow:** +``` +1. Agent generates code: 1,200 tokens +2. Code executes in VM: + - Calls search (internal, no tokens) + - Calls autocomplete (internal, no tokens) + - Calls find_matches (internal, no tokens) + - Processes data (internal, no tokens) +3. Final result returned: 800 tokens +Total: 2,000 tokens +``` + +**Reduction: 95% (39,300 → 2,000)** + +### D. Implementation Checklist + +**Phase 1: Foundation** +- [ ] Create `crates/terraphim_mcp_codegen/` crate +- [ ] Generate TypeScript wrappers for all 17 MCP tools +- [ ] Generate Python wrappers for all 17 MCP tools +- [ ] Implement MCP runtime in VM environment +- [ ] Add import mechanism to code execution +- [ ] Create code-first prompt templates +- [ ] Update agent code generation logic +- [ ] Implement MCP bridge in VM execution client +- [ ] Add data transformation utilities +- [ ] Test end-to-end workflow +- [ ] Measure token reduction +- [ ] Document new patterns + +**Phase 2: Discovery & Scale** +- [ ] Implement tool search API +- [ ] Create tool categorization system +- [ ] Add capability-based filtering +- [ ] Generate dynamic documentation +- [ ] Create workspace directory structure +- [ ] Implement file management utilities +- [ ] Add cleanup policies +- [ ] Create token comparison metrics +- [ ] Build metrics dashboard +- [ ] Add optimization recommendations +- [ ] Load test with 100+ tools + +**Phase 3: Skills & Optimization** +- [ ] Design skill storage structure +- [ ] Implement SKILL.MD format parser +- [ ] Create skill discovery/search +- [ ] Add usage tracking +- [ ] Implement skill versioning +- [ ] Add caching layer +- [ ] Implement memoization +- [ ] Optimize resource pooling +- [ ] Load test with 1000+ concurrent agents +- [ ] Create monitoring dashboards +- [ ] Write production documentation +- [ ] Security audit and hardening diff --git a/CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md b/CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..f87b57bb --- /dev/null +++ b/CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md @@ -0,0 +1,1127 @@ +# Code Execution with MCP - Implementation Plan + +**Version:** 1.0 +**Date:** 2025-11-15 +**Timeline:** 12 weeks (3 phases × 4 weeks) +**Status:** Ready for Implementation + +## Overview + +This document provides a detailed, actionable implementation plan to add Code Execution with MCP capabilities to Terraphim AI, achieving 98% token reduction and significant performance improvements. + +## Validation Summary + +### Can Terraphim AI Run Agent Execution? + +**Current State: YES ✅ (with limitations)** + +Terraphim AI **can** run agent execution today: +- ✅ Agents can execute code in Firecracker VMs +- ✅ Agents have access to MCP tools via protocol +- ✅ Code extraction and execution pipeline exists +- ✅ Security sandbox operational + +**But NOT optimized for Anthropic's approach:** +- ❌ MCP tools not usable as code imports +- ❌ Data still flows through context window +- ❌ No progressive tool discovery +- ❌ No skill library system + +### What's Needed for Full Implementation? + +**Critical (Must Have):** +1. MCP Code API Layer - Convert MCP tools to importable modules +2. In-VM MCP Runtime - Enable tool usage within code execution +3. Code-First Prompts - Optimize agent prompts for code generation + +**Important (Should Have):** +4. Progressive Tool Discovery - Scale to 100+ tools +5. Token Optimization Metrics - Measure and track improvements + +**Nice to Have:** +6. Skill Library System - Reusable function patterns +7. Workspace Management - Structured file handling + +## Phase 1: Foundation (Weeks 1-4) + +### Goal +Enable basic code execution with MCP tools as importable modules. + +### Milestones + +#### Week 1: MCP Code API Layer Setup + +**Tasks:** +1. Create new crate structure + ```bash + cargo new --lib crates/terraphim_mcp_codegen + ``` + +2. Add dependencies + ```toml + # crates/terraphim_mcp_codegen/Cargo.toml + [dependencies] + serde = { version = "1.0", features = ["derive"] } + serde_json = "1.0" + tokio = { version = "1", features = ["full"] } + terraphim_mcp_server = { path = "../terraphim_mcp_server" } + tera = "1.19" # Template engine + ``` + +3. Design module structure + ``` + crates/terraphim_mcp_codegen/ + ├── src/ + │ ├── lib.rs + │ ├── typescript_gen.rs # TypeScript wrapper generation + │ ├── python_gen.rs # Python wrapper generation + │ ├── runtime.rs # MCP runtime for VMs + │ └── templates/ + │ ├── typescript.tera # TypeScript module template + │ └── python.tera # Python module template + └── Cargo.toml + ``` + +4. Implement tool introspection + ```rust + // crates/terraphim_mcp_codegen/src/lib.rs + pub struct ToolMetadata { + pub name: String, + pub description: String, + pub parameters: Vec, + pub return_type: String, + } + + pub fn introspect_mcp_tools() -> Vec { + // Extract tool metadata from MCP server + } + ``` + +**Deliverables:** +- [ ] `terraphim_mcp_codegen` crate created +- [ ] Tool introspection working +- [ ] Template system configured + +#### Week 2: Generate TypeScript Wrappers + +**Tasks:** +1. Create TypeScript template + ```typescript + // Template: templates/typescript.tera + export interface {{ tool_name | pascal_case }}Params { + {% for param in parameters %} + {{ param.name }}: {{ param.type }}; + {% endfor %} + } + + export async function {{ tool_name | camel_case }}( + params: {{ tool_name | pascal_case }}Params + ): Promise<{{ return_type }}> { + const response = await mcpCall('{{ tool_name }}', params); + return response; + } + ``` + +2. Implement code generator + ```rust + // crates/terraphim_mcp_codegen/src/typescript_gen.rs + pub struct TypeScriptGenerator { + template: tera::Tera, + } + + impl TypeScriptGenerator { + pub fn generate_module(&self, tools: &[ToolMetadata]) -> String { + // Generate TypeScript module from tools + } + } + ``` + +3. Generate wrapper for all 17 tools + ```bash + cargo run --bin mcp-codegen -- \ + --output workspace/mcp-servers/terraphim.ts \ + --format typescript + ``` + +4. Test wrapper in Node.js + ```typescript + import { search, autocompleteTerms } from './terraphim'; + + const results = await search({ + query: "rust async patterns", + limit: 10 + }); + console.log(results); + ``` + +**Deliverables:** +- [ ] TypeScript generator implemented +- [ ] All 17 tools wrapped +- [ ] TypeScript module tested + +#### Week 3: Generate Python Wrappers & MCP Runtime + +**Tasks:** +1. Create Python template + ```python + # Template: templates/python.tera + from typing import Dict, List, Optional + import asyncio + + async def {{ tool_name }}( + {% for param in parameters %} + {{ param.name }}: {{ param.python_type }}, + {% endfor %} + ) -> {{ return_type }}: + """{{ description }}""" + response = await mcp_call('{{ tool_name }}', { + {% for param in parameters %} + '{{ param.name }}': {{ param.name }}, + {% endfor %} + }) + return response + ``` + +2. Implement MCP runtime for VMs + ```rust + // crates/terraphim_mcp_codegen/src/runtime.rs + pub struct McpRuntime { + mcp_client: Arc, + } + + impl McpRuntime { + pub async fn call_tool(&self, name: &str, params: Value) -> Result { + // Forward call to MCP server + } + + pub fn inject_into_vm(&self, vm_id: &str) -> Result<()> { + // Make runtime available in VM + } + } + ``` + +3. Create bridge between VM and MCP + ```rust + // crates/terraphim_multi_agent/src/vm_execution/mcp_bridge.rs + pub struct McpBridge { + runtime: Arc, + } + + impl McpBridge { + pub async fn setup_vm_environment(&self, vm_id: &str) -> Result<()> { + // 1. Generate wrapper modules + // 2. Copy to VM filesystem + // 3. Inject MCP runtime + // 4. Configure imports + } + } + ``` + +**Deliverables:** +- [ ] Python generator implemented +- [ ] MCP runtime created +- [ ] VM-MCP bridge functional + +#### Week 4: Integration & Testing + +**Tasks:** +1. Update agent code generation prompts + ```rust + // crates/terraphim_multi_agent/src/prompts/code_execution.rs + pub const CODE_EXECUTION_SYSTEM_PROMPT: &str = r#" + You are an AI assistant that solves problems by writing code. + + Available MCP tools (import as modules): + ```typescript + import { terraphim } from 'mcp-servers'; + ``` + + Available functions: + - terraphim.search(query, options) + - terraphim.autocompleteTerms(query, limit) + - terraphim.findMatches(text, role) + // ... etc + + When solving problems: + 1. Import only the tools you need + 2. Process data within your code + 3. Return only the final result + 4. Use async/await for all tool calls + + Example: + ```typescript + import { terraphim } from 'mcp-servers'; + + async function analyzeDocuments(topic: string) { + const docs = await terraphim.search({ query: topic, limit: 100 }); + const relevant = docs.filter(d => d.rank > 0.8); + return { + count: relevant.length, + top_doc: relevant[0] + }; + } + ``` + "#; + ``` + +2. Modify agent to prefer code generation + ```rust + // crates/terraphim_multi_agent/src/agent.rs + impl TerraphimAgent { + async fn handle_command(&mut self, command: Command) -> Result { + // 1. Generate code instead of tool calls + let code = self.generate_code(&command).await?; + + // 2. Execute in VM with MCP runtime + let result = self.execute_code_in_vm(code).await?; + + // 3. Return only final result + Ok(result) + } + } + ``` + +3. End-to-end testing + ```rust + #[tokio::test] + async fn test_code_execution_workflow() { + let agent = create_test_agent().await; + + let command = Command::new("Find rust async patterns and summarize"); + + let result = agent.handle_command(command).await.unwrap(); + + assert!(result.token_count < 5000); // Should be much less than traditional + assert!(result.execution_time_ms < 3000); + assert!(result.contains_summary()); + } + ``` + +4. Token usage comparison + ```rust + #[tokio::test] + async fn test_token_reduction() { + let traditional_tokens = measure_traditional_approach().await; + let code_exec_tokens = measure_code_execution_approach().await; + + let reduction = (traditional_tokens - code_exec_tokens) as f64 + / traditional_tokens as f64; + + assert!(reduction > 0.80); // At least 80% reduction + } + ``` + +**Deliverables:** +- [ ] Code-first prompts implemented +- [ ] Agent integration complete +- [ ] End-to-end tests passing +- [ ] Token reduction measured (target: >80%) + +### Phase 1 Success Criteria + +- ✅ Agents can import and use MCP tools in generated code +- ✅ Code executes successfully in Firecracker VMs +- ✅ Token reduction >80% for typical workflows +- ✅ Execution time <3 seconds +- ✅ All 17 MCP tools available as imports + +## Phase 2: Discovery & Scale (Weeks 5-8) + +### Goal +Enable progressive tool discovery and support 100+ tools efficiently. + +### Milestones + +#### Week 5: Tool Discovery API + +**Tasks:** +1. Design tool metadata schema + ```rust + // crates/terraphim_mcp_server/src/discovery.rs + #[derive(Serialize, Deserialize)] + pub struct ToolMetadata { + pub name: String, + pub category: String, + pub capabilities: Vec, + pub description: String, + pub examples: Vec, + pub parameters: Vec, + } + + #[derive(Serialize, Deserialize)] + pub struct ToolSearchQuery { + pub category: Option, + pub capabilities: Option>, + pub keywords: Option>, + } + ``` + +2. Implement tool search + ```rust + pub struct ToolDiscovery { + tools: Vec, + index: SearchIndex, + } + + impl ToolDiscovery { + pub async fn search(&self, query: ToolSearchQuery) -> Vec { + // Search and filter tools + } + + pub async fn get_documentation(&self, tool_name: &str) -> Option { + // Generate markdown documentation + } + } + ``` + +3. Add MCP endpoints + ```rust + // New MCP tools: + // - search_tools(query) + // - get_tool_documentation(name) + // - list_categories() + // - list_capabilities() + ``` + +4. Test tool discovery + ```typescript + import { searchTools, getToolDocs } from 'mcp-servers'; + + const tools = await searchTools({ + category: 'knowledge-graph', + capabilities: ['search', 'autocomplete'] + }); + + const docs = await getToolDocs('terraphim.search'); + ``` + +**Deliverables:** +- [ ] Tool discovery API implemented +- [ ] Search functionality working +- [ ] Documentation generation functional + +#### Week 6: Categorization & Documentation + +**Tasks:** +1. Categorize existing tools + ```rust + // crates/terraphim_mcp_server/src/tool_categories.rs + pub enum ToolCategory { + KnowledgeGraph, + Autocomplete, + TextProcessing, + Configuration, + Analysis, + } + + pub fn categorize_tools() -> HashMap { + HashMap::from([ + ("search", ToolCategory::KnowledgeGraph), + ("autocomplete_terms", ToolCategory::Autocomplete), + ("find_matches", ToolCategory::TextProcessing), + // ... etc + ]) + } + ``` + +2. Generate rich documentation + ```markdown + # terraphim.search + + **Category:** Knowledge Graph + **Capabilities:** search, semantic-matching + + ## Description + Search for documents in the Terraphim knowledge graph using semantic matching. + + ## Parameters + - `query` (string, required): The search query + - `role` (string, optional): Filter by role + - `limit` (number, optional): Maximum results (default: 10) + + ## Returns + Array of Document objects with id, url, body, description, rank. + + ## Example + ```typescript + import { terraphim } from 'mcp-servers'; + + const results = await terraphim.search({ + query: "rust async patterns", + limit: 10 + }); + ``` + + ## See Also + - autocomplete_terms - Get autocomplete suggestions + - find_matches - Find term matches in text + ``` + +3. Implement lazy loading + ```typescript + // Only load tool when first used + class McpProxy { + async search(params) { + if (!this._search) { + this._search = await import('./tools/search'); + } + return this._search.default(params); + } + } + ``` + +**Deliverables:** +- [ ] All tools categorized +- [ ] Rich documentation generated +- [ ] Lazy loading implemented + +#### Week 7: Workspace Management + +**Tasks:** +1. Design workspace structure + ```rust + // crates/terraphim_multi_agent/src/workspace.rs + pub struct Workspace { + root: PathBuf, + agent_id: AgentId, + } + + impl Workspace { + pub fn new(agent_id: AgentId) -> Self { + let root = PathBuf::from(format!("workspace/{}", agent_id)); + fs::create_dir_all(&root).unwrap(); + fs::create_dir_all(root.join("data")).unwrap(); + fs::create_dir_all(root.join("results")).unwrap(); + fs::create_dir_all(root.join("checkpoints")).unwrap(); + fs::create_dir_all(root.join("skills")).unwrap(); + Self { root, agent_id } + } + + pub fn data_dir(&self) -> PathBuf { + self.root.join("data") + } + + pub fn results_dir(&self) -> PathBuf { + self.root.join("results") + } + + pub fn checkpoint(&self, name: &str) -> Result<()> { + // Create checkpoint snapshot + } + + pub fn restore(&self, checkpoint: &str) -> Result<()> { + // Restore from checkpoint + } + } + ``` + +2. Integrate with VM execution + ```rust + impl VmExecutionClient { + pub async fn execute_with_workspace( + &self, + code: &str, + workspace: &Workspace, + ) -> Result { + // 1. Mount workspace in VM + // 2. Execute code + // 3. Persist results to workspace + } + } + ``` + +3. Add file utilities + ```typescript + // Available in VM environment + import { workspace } from 'mcp-runtime'; + + // Save data + await workspace.saveData('analysis.json', data); + + // Load data + const data = await workspace.loadData('analysis.json'); + + // Create checkpoint + await workspace.checkpoint('before-filter'); + + // Restore if needed + await workspace.restore('before-filter'); + ``` + +**Deliverables:** +- [ ] Workspace structure implemented +- [ ] VM integration complete +- [ ] File utilities available + +#### Week 8: Token Optimization Metrics + +**Tasks:** +1. Create metrics tracking + ```rust + // crates/terraphim_multi_agent/src/metrics/code_execution.rs + #[derive(Serialize, Deserialize)] + pub struct ExecutionMetrics { + pub traditional_tokens: u64, + pub code_execution_tokens: u64, + pub reduction_percentage: f64, + pub execution_time_ms: u64, + pub tool_count: usize, + pub code_lines: usize, + } + + pub struct MetricsCollector { + pub fn record_execution(&mut self, metrics: ExecutionMetrics); + pub fn get_statistics(&self) -> ExecutionStatistics; + pub fn compare_approaches(&self) -> ComparisonReport; + } + ``` + +2. Build dashboard + ```rust + // Expose metrics via API + #[get("/api/metrics/code-execution")] + async fn get_code_execution_metrics() -> Json { + // Return aggregated metrics + } + ``` + +3. Add optimization recommendations + ```rust + pub fn analyze_token_usage(metrics: &ExecutionMetrics) -> Vec { + let mut recommendations = Vec::new(); + + if metrics.reduction_percentage < 80.0 { + recommendations.push(Recommendation { + priority: Priority::High, + message: "Consider processing more data in-environment".to_string(), + }); + } + + recommendations + } + ``` + +**Deliverables:** +- [ ] Metrics collection working +- [ ] Dashboard accessible +- [ ] Recommendations generated + +### Phase 2 Success Criteria + +- ✅ Tool discovery <100ms response time +- ✅ Support for 100+ tools without degradation +- ✅ Workspace management functional +- ✅ Token reduction metrics visible +- ✅ Documentation auto-generated for all tools + +## Phase 3: Skills & Production (Weeks 9-12) + +### Goal +Production-ready system with reusable skills and comprehensive monitoring. + +### Milestones + +#### Week 9: Skill Library System + +**Tasks:** +1. Design SKILL.MD format + ```markdown + # SKILL: Knowledge Graph Analysis + + ## Metadata + - **Created:** 2025-11-15 + - **Version:** 1.0 + - **Author:** agent-001 + - **Tags:** knowledge-graph, analysis, connectivity + + ## Description + Analyzes knowledge graph connectivity and generates comprehensive reports. + + ## Function Signature + ```typescript + async function analyzeKnowledgeGraph( + text: string, + options?: AnalysisOptions + ): Promise + ``` + + ## Implementation + ```typescript + import { terraphim } from 'mcp-servers'; + + async function analyzeKnowledgeGraph(text, options = {}) { + const matches = await terraphim.findMatches({ text }); + const connected = await terraphim.isAllTermsConnectedByPath({ text }); + + return { + matched_terms: matches.length, + connectivity: connected, + graph_summary: generateSummary(matches, connected) + }; + } + ``` + + ## Usage History + - **Total Uses:** 42 + - **Success Rate:** 95.2% + - **Avg Execution Time:** 1.8s + - **Last Used:** 2025-11-14 + + ## Examples + ```typescript + const report = await analyzeKnowledgeGraph( + "Rust async patterns with tokio and futures", + { detailed: true } + ); + ``` + ``` + +2. Implement skill storage + ```rust + // crates/terraphim_skills/src/lib.rs + pub struct Skill { + pub metadata: SkillMetadata, + pub code: String, + pub usage_stats: UsageStatistics, + } + + pub struct SkillLibrary { + skills: HashMap, + index: SearchIndex, + } + + impl SkillLibrary { + pub async fn save_skill(&mut self, skill: Skill) -> Result<()>; + pub async fn load_skill(&self, name: &str) -> Option<&Skill>; + pub async fn search_skills(&self, query: &str) -> Vec<&Skill>; + pub async fn record_usage(&mut self, name: &str, success: bool); + } + ``` + +3. Auto-save successful patterns + ```rust + impl TerraphimAgent { + async fn execute_code(&mut self, code: &str) -> Result { + let result = self.vm_client.execute(code).await?; + + if result.success && self.should_save_as_skill(&code) { + let skill = self.extract_skill(code, &result)?; + self.skills.save_skill(skill).await?; + } + + Ok(result) + } + } + ``` + +**Deliverables:** +- [ ] SKILL.MD format defined +- [ ] Skill library implemented +- [ ] Auto-save working +- [ ] Skill search functional + +#### Week 10: Performance Optimization + +**Tasks:** +1. Add caching layer + ```rust + // crates/terraphim_multi_agent/src/cache.rs + pub struct ExecutionCache { + cache: Arc>>, + } + + impl ExecutionCache { + pub async fn get(&self, code_hash: &str) -> Option; + pub async fn set(&self, code_hash: &str, result: CachedResult); + } + ``` + +2. Implement memoization + ```rust + impl VmExecutionClient { + async fn execute_memoized(&self, code: &str) -> Result { + let hash = calculate_code_hash(code); + + if let Some(cached) = self.cache.get(&hash).await { + return Ok(cached.result); + } + + let result = self.execute_uncached(code).await?; + self.cache.set(&hash, CachedResult::new(result.clone())).await; + + Ok(result) + } + } + ``` + +3. Optimize resource pooling + ```rust + pub struct VmPool { + available: Vec, + in_use: HashMap, + config: PoolConfig, + } + + impl VmPool { + pub async fn acquire(&mut self) -> Result { + // Smart allocation with warm VMs + } + + pub async fn release(&mut self, vm: VmInstance) { + // Keep VM warm for reuse + } + } + ``` + +4. Load testing + ```rust + #[tokio::test] + async fn load_test_1000_concurrent_agents() { + let agents = create_test_agents(1000).await; + + let start = Instant::now(); + + let results = futures::future::join_all( + agents.iter().map(|a| a.execute_code(SAMPLE_CODE)) + ).await; + + let duration = start.elapsed(); + + assert!(duration.as_secs() < 30); // Complete in 30s + assert!(results.iter().all(|r| r.is_ok())); + } + ``` + +**Deliverables:** +- [ ] Caching implemented +- [ ] Memoization working +- [ ] Resource pooling optimized +- [ ] Load tests passing + +#### Week 11: Production Hardening + +**Tasks:** +1. Comprehensive error handling + ```rust + #[derive(Error, Debug)] + pub enum CodeExecutionError { + #[error("Code generation failed: {0}")] + GenerationFailed(String), + + #[error("Code validation failed: {0}")] + ValidationFailed(String), + + #[error("VM execution error: {0}")] + ExecutionError(String), + + #[error("MCP tool error: {0}")] + ToolError(String), + + #[error("Timeout after {0}ms")] + Timeout(u64), + } + ``` + +2. Monitoring dashboards + ```rust + // Prometheus metrics + lazy_static! { + static ref CODE_EXECUTIONS: IntCounter = register_int_counter!( + "terraphim_code_executions_total", + "Total code executions" + ).unwrap(); + + static ref EXECUTION_DURATION: Histogram = register_histogram!( + "terraphim_execution_duration_seconds", + "Code execution duration" + ).unwrap(); + + static ref TOKEN_REDUCTION: Histogram = register_histogram!( + "terraphim_token_reduction_percentage", + "Token reduction percentage" + ).unwrap(); + } + ``` + +3. Health checks + ```rust + #[get("/health/code-execution")] + async fn health_check() -> Json { + Json(HealthStatus { + vm_pool_available: check_vm_pool().await, + mcp_server_reachable: check_mcp_server().await, + skill_library_accessible: check_skills().await, + cache_operational: check_cache().await, + }) + } + ``` + +4. Documentation + - [ ] API documentation (rustdoc) + - [ ] User guide + - [ ] Architecture diagrams + - [ ] Troubleshooting guide + +**Deliverables:** +- [ ] Error handling comprehensive +- [ ] Monitoring operational +- [ ] Health checks working +- [ ] Documentation complete + +#### Week 12: Final Testing & Launch + +**Tasks:** +1. End-to-end integration tests + ```rust + #[tokio::test] + async fn test_complete_workflow() { + // 1. Agent receives query + // 2. Generates code + // 3. Discovers tools + // 4. Executes in VM + // 5. Processes data + // 6. Returns result + // 7. Saves skill + // 8. Metrics recorded + } + ``` + +2. Performance benchmarks + ```rust + #[bench] + fn bench_traditional_approach(b: &mut Bencher) { + b.iter(|| execute_traditional_workflow()); + } + + #[bench] + fn bench_code_execution_approach(b: &mut Bencher) { + b.iter(|| execute_code_execution_workflow()); + } + ``` + +3. Security audit + - [ ] Sandbox escape testing + - [ ] Input validation review + - [ ] Access control verification + - [ ] Secrets management audit + +4. Production deployment + - [ ] Canary deployment + - [ ] Gradual rollout + - [ ] Monitor metrics + - [ ] Collect feedback + +**Deliverables:** +- [ ] All tests passing +- [ ] Benchmarks documented +- [ ] Security approved +- [ ] Production deployed + +### Phase 3 Success Criteria + +- ✅ Skills reusable across agents +- ✅ 98%+ token reduction achieved +- ✅ Sub-2 second execution times +- ✅ 1000+ concurrent agents supported +- ✅ 99.9% uptime +- ✅ Production documentation complete +- ✅ Security audit passed + +## Resource Requirements + +### Development Team +- **Senior Rust Engineer** (1 FTE, 12 weeks) + - MCP code API layer + - VM integration + - Performance optimization + +- **Full-Stack Engineer** (0.5 FTE, 12 weeks) + - TypeScript/Python wrappers + - Tool discovery API + - Metrics dashboard + +- **DevOps Engineer** (0.25 FTE, weeks 9-12) + - Deployment infrastructure + - Monitoring setup + - Load testing + +- **Technical Writer** (0.25 FTE, weeks 10-12) + - Documentation + - User guides + - API docs + +### Infrastructure +- **Development Environment** + - 4 vCPUs, 16GB RAM + - Firecracker VMs + - Docker containers + +- **Staging Environment** + - 8 vCPUs, 32GB RAM + - Load testing capacity + - Monitoring stack + +- **Production Rollout** + - Gradual scale-up + - Canary deployment + - Rollback capability + +## Risk Mitigation + +### Technical Risks + +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| Code generation quality | High | Medium | Comprehensive validation, testing, fallback to traditional | +| Sandbox escape | Critical | Low | Multiple isolation layers, security audit, penetration testing | +| Performance degradation | Medium | Low | Caching, pooling, load testing, monitoring | +| Integration complexity | Medium | Medium | Incremental approach, feature flags, rollback plan | + +### Project Risks + +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| Timeline slip | Medium | Medium | Buffer in estimates, weekly progress reviews, adjust scope | +| Resource constraints | High | Low | Early identification, backup resources, vendor support | +| Requirement changes | Medium | Low | Clear spec upfront, change control process | + +## Success Metrics + +### Phase 1 Targets +- Token reduction: >80% +- Execution time: <3s +- Code success rate: >90% +- Test coverage: >85% + +### Phase 2 Targets +- Token reduction: >90% +- Tool discovery: <100ms +- Support 100+ tools +- Documentation coverage: 100% + +### Phase 3 Targets +- Token reduction: >98% +- Execution time: <2s +- Concurrent agents: 1000+ +- Uptime: 99.9% +- Security audit: Passed + +## Next Steps + +1. **Review & Approval** (Week 0) + - [ ] Review specification with stakeholders + - [ ] Approve implementation plan + - [ ] Allocate resources + - [ ] Set up project tracking + +2. **Kickoff** (Week 1, Day 1) + - [ ] Team onboarding + - [ ] Environment setup + - [ ] Create project board + - [ ] First sprint planning + +3. **Ongoing** (Weekly) + - [ ] Sprint planning + - [ ] Daily standups + - [ ] Code reviews + - [ ] Progress tracking + - [ ] Risk assessment + +4. **Launch** (Week 12) + - [ ] Production deployment + - [ ] Monitoring active + - [ ] Documentation published + - [ ] Success metrics tracked + +## Appendices + +### A. File Structure + +``` +terraphim-ai/ +├── crates/ +│ ├── terraphim_mcp_codegen/ # NEW: Code generation +│ │ ├── src/ +│ │ │ ├── lib.rs +│ │ │ ├── typescript_gen.rs +│ │ │ ├── python_gen.rs +│ │ │ ├── runtime.rs +│ │ │ └── templates/ +│ │ └── Cargo.toml +│ ├── terraphim_skills/ # NEW: Skill library +│ │ ├── src/ +│ │ │ ├── lib.rs +│ │ │ ├── storage.rs +│ │ │ └── search.rs +│ │ └── Cargo.toml +│ ├── terraphim_mcp_server/ # UPDATED: Add discovery +│ │ └── src/ +│ │ └── discovery.rs # NEW +│ └── terraphim_multi_agent/ # UPDATED: Code-first +│ └── src/ +│ ├── prompts/ +│ │ └── code_execution.rs # NEW +│ ├── metrics/ +│ │ └── code_execution.rs # NEW +│ ├── workspace.rs # NEW +│ └── vm_execution/ +│ └── mcp_bridge.rs # NEW +├── workspace/ # NEW: Agent workspaces +│ ├── mcp-servers/ +│ │ ├── terraphim.ts # Generated +│ │ └── terraphim.py # Generated +│ └── {agent-id}/ +│ ├── data/ +│ ├── results/ +│ ├── checkpoints/ +│ └── skills/ +├── skills/ # NEW: Global skills +│ └── *.skill.md +└── docs/ + ├── CODE_EXECUTION_WITH_MCP_SPEC.md + ├── CODE_EXECUTION_MCP_GAP_ANALYSIS.md + └── CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md +``` + +### B. Dependencies + +```toml +# New dependencies across crates + +[dependencies] +# Code generation +tera = "1.19" +convert_case = "0.6" + +# Metrics +prometheus = "0.13" + +# Caching +lru = "0.12" + +# Existing (versions may need updates) +tokio = { version = "1", features = ["full"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +``` + +### C. Test Coverage Requirements + +- Unit tests: >85% coverage +- Integration tests: All critical paths +- End-to-end tests: Main workflows +- Load tests: 1000+ concurrent agents +- Security tests: Sandbox, access control + +### D. Deployment Checklist + +- [ ] All tests passing +- [ ] Documentation complete +- [ ] Security audit passed +- [ ] Performance benchmarks met +- [ ] Monitoring configured +- [ ] Rollback plan tested +- [ ] Team trained +- [ ] User guide published +- [ ] Canary deployment successful +- [ ] Production deployment approved diff --git a/CODE_EXECUTION_MCP_SUMMARY.md b/CODE_EXECUTION_MCP_SUMMARY.md new file mode 100644 index 00000000..f727de13 --- /dev/null +++ b/CODE_EXECUTION_MCP_SUMMARY.md @@ -0,0 +1,353 @@ +# Code Execution with MCP - Project Summary + +**Date:** 2025-11-15 +**Status:** Ready for Review +**Implementation Timeline:** 12 weeks (3 phases) + +## Quick Links + +- [Technical Specification](./CODE_EXECUTION_WITH_MCP_SPEC.md) - Full architectural design +- [Gap Analysis](./CODE_EXECUTION_MCP_GAP_ANALYSIS.md) - Current capabilities vs. requirements +- [Implementation Plan](./CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md) - Detailed 12-week roadmap + +## Executive Summary + +This project implements Anthropic's "Code Execution with MCP" approach in Terraphim AI, achieving: +- **98% token reduction** (150K → 2K tokens for complex workflows) +- **Faster execution** (sub-2 second response times) +- **Unlimited tool scaling** (support 100+ tools without degradation) +- **Enhanced privacy** (data processing in sandbox, not context) + +## The Problem + +Traditional AI agent workflows consume massive amounts of tokens: +1. **Load all tool definitions** upfront → 10K-20K tokens +2. **Every tool call** passes results through context → 5K-50K tokens each +3. **Chain multiple calls** → 150K+ tokens total + +This creates: +- ❌ High API costs +- ❌ Increased latency +- ❌ Context window limits +- ❌ Impossible to scale to many tools + +## The Solution + +**Treat MCP servers as code APIs** instead of direct tool calls: + +```typescript +// Instead of: Multiple separate tool calls through context +// Result: Tool def (1K) + Call 1 (8K) + Call 2 (5K) + Call 3 (10K) = 24K tokens + +// Do this: Write code that uses tools programmatically +import { terraphim } from 'mcp-servers'; + +async function analyzeDocuments() { + const docs = await terraphim.search({ query: "rust async", limit: 100 }); + const relevant = docs.filter(d => d.rank > 0.8); + return { count: relevant.length, top: relevant[0] }; +} +// Result: Code (500) + Final result (500) = 1K tokens +``` + +**Benefits:** +- ✅ 98% token reduction +- ✅ Faster execution (parallel processing in code) +- ✅ Better privacy (data stays in sandbox) +- ✅ Unlimited tools (load only what's needed) +- ✅ Reusable skills (save successful patterns) + +## Can Terraphim AI Do This Today? + +### Current Capabilities ✅ + +**YES - Terraphim AI has most infrastructure:** + +1. **✅ Secure Code Execution** + - Firecracker VMs operational + - Sub-2 second boot times + - Python, JavaScript, Bash, Rust support + - Location: `crates/terraphim_multi_agent/src/vm_execution/` + +2. **✅ MCP Server** + - 17 tools available + - Search, autocomplete, analysis + - Location: `crates/terraphim_mcp_server/` + +3. **✅ Agent System** + - Multi-agent coordination + - Lifecycle management + - Location: `crates/terraphim_agent_supervisor/`, `crates/terraphim_multi_agent/` + +4. **✅ State Persistence** + - Multiple storage backends + - Location: `crates/terraphim_persistence/` + +### Missing Capabilities ❌ + +**NO - Three critical components needed:** + +1. **❌ MCP Code APIs** (Critical) + - MCP tools not importable as modules + - Need TypeScript/Python wrappers + - **Effort:** 2 weeks + +2. **❌ In-VM MCP Runtime** (Critical) + - Tools not callable from within VM + - Need bridge between VM and MCP + - **Effort:** 2 weeks + +3. **❌ Progressive Tool Discovery** (Important) + - No tool search/categorization + - No dynamic documentation + - **Effort:** 1 week + +### Overall Assessment + +**Current Readiness: 60%** +- ✅ Infrastructure exists (VMs, MCP, agents) +- ❌ Integration layer missing (code APIs, runtime bridge) +- **Implementation Time: 12 weeks** to production-ready + +## Implementation Overview + +### Phase 1: Foundation (Weeks 1-4) +**Goal:** Basic code execution with MCP tools + +**Key Tasks:** +1. Create MCP code API layer +2. Generate TypeScript/Python wrappers +3. Build MCP runtime for VMs +4. Update agent prompts for code-first approach + +**Deliverables:** +- Agents can import MCP tools in code +- >80% token reduction achieved +- End-to-end workflow functional + +### Phase 2: Discovery & Scale (Weeks 5-8) +**Goal:** Support 100+ tools efficiently + +**Key Tasks:** +1. Implement progressive tool discovery +2. Add workspace management +3. Create token optimization metrics +4. Build documentation system + +**Deliverables:** +- Tool discovery <100ms +- Support 100+ tools +- Metrics dashboard live + +### Phase 3: Skills & Production (Weeks 9-12) +**Goal:** Production-ready with reusable skills + +**Key Tasks:** +1. Build skill library system +2. Performance optimization (caching, pooling) +3. Production hardening (monitoring, docs) +4. Security audit and deployment + +**Deliverables:** +- Skill library functional +- 98%+ token reduction +- 1000+ concurrent agents +- Production deployed + +## Success Metrics + +### Token Efficiency +- **Baseline:** 150K tokens (traditional approach) +- **Target:** 2K tokens (code execution) +- **Reduction:** 98%+ + +### Performance +- **Code Execution:** <2 seconds +- **Tool Discovery:** <100ms +- **End-to-End:** <5 seconds + +### Scalability +- **Tools:** 500+ without degradation +- **Agents:** 1000+ concurrent +- **Uptime:** 99.9% + +### Quality +- **Code Success Rate:** >95% +- **Security:** 0 sandbox escapes +- **Test Coverage:** >85% + +## Resource Requirements + +### Team +- **Senior Rust Engineer:** 1 FTE (12 weeks) +- **Full-Stack Engineer:** 0.5 FTE (12 weeks) +- **DevOps Engineer:** 0.25 FTE (weeks 9-12) +- **Technical Writer:** 0.25 FTE (weeks 10-12) + +### Infrastructure +- Development environment (4 vCPUs, 16GB RAM) +- Staging environment (8 vCPUs, 32GB RAM) +- Firecracker VMs, Docker containers +- Monitoring stack (Prometheus, Grafana) + +## Key Technical Components + +### 1. MCP Code API Layer +**New Crate:** `crates/terraphim_mcp_codegen/` + +Generates TypeScript/Python wrappers for MCP tools: +```typescript +// Auto-generated from MCP server introspection +export async function search(params: SearchParams): Promise { + return await mcpCall('search', params); +} +``` + +### 2. MCP Runtime for VMs +**New Module:** `crates/terraphim_multi_agent/src/vm_execution/mcp_runtime.rs` + +Makes MCP tools available in VM environment: +```rust +pub struct McpRuntime { + mcp_client: Arc, +} + +impl McpRuntime { + pub async fn call_tool(&self, name: &str, params: Value) -> Result; + pub fn inject_into_vm(&self, vm_id: &str) -> Result<()>; +} +``` + +### 3. Code-First Agent Prompts +**New Module:** `crates/terraphim_multi_agent/src/prompts/code_execution.rs` + +Optimized prompts for code generation: +``` +You solve problems by writing code that imports MCP tools. + +Available tools: +import { terraphim } from 'mcp-servers'; + +Example: +async function analyze() { + const docs = await terraphim.search({ query: "...", limit: 100 }); + return docs.filter(d => d.rank > 0.8); +} +``` + +### 4. Skill Library +**New Crate:** `crates/terraphim_skills/` + +Stores reusable code patterns: +```markdown +# SKILL: Knowledge Graph Analysis + +## Function +async function analyzeKnowledgeGraph(text: string): Promise + +## Usage History +- Success Rate: 95% +- Avg Time: 1.8s +``` + +## Risk Assessment + +### Technical Risks +| Risk | Impact | Mitigation | +|------|--------|------------| +| Code generation quality | Medium | Validation, testing, fallback | +| Sandbox escape | High | Multiple isolation layers, audit | +| Performance degradation | Low | Caching, pooling, monitoring | + +### Project Risks +| Risk | Impact | Mitigation | +|------|--------|------------| +| Timeline slip | Medium | Buffer, weekly reviews, scope adjustment | +| Resource constraints | Low | Early identification, backup resources | + +## Comparison: Traditional vs Code Execution + +### Traditional Approach + +``` +User: "Find high-value Salesforce accounts and summarize" + +1. Load all Salesforce tool definitions → 10K tokens +2. Agent calls search_salesforce + - Query: 200 tokens + - Results: 50K rows → 8K tokens +3. Agent calls filter_records + - Query: 200 tokens + - Results: 500 rows → 5K tokens +4. Agent calls create_summary + - Query: 200 tokens + - Summary: 10K tokens + +Total: ~40K tokens +Time: ~8 seconds (multiple round-trips) +``` + +### Code Execution Approach + +``` +User: "Find high-value Salesforce accounts and summarize" + +1. Agent generates code → 1K tokens + ```typescript + import { salesforce } from 'mcp-servers'; + + async function analyze() { + const all = await salesforce.search({ query: "active accounts" }); + const filtered = all.filter(a => a.revenue > 1000000); + return { + count: filtered.length, + top: filtered[0] + }; + } + ``` + +2. Code executes in VM (all processing internal) +3. Final result returned → 500 tokens + +Total: ~2K tokens +Time: ~2 seconds (single execution) +``` + +**Improvement: 95% token reduction, 75% faster** + +## Next Steps + +### 1. Review Phase (Week 0) +- [ ] Review all documentation +- [ ] Approve implementation plan +- [ ] Allocate team resources +- [ ] Set up project tracking + +### 2. Kickoff (Week 1) +- [ ] Team onboarding +- [ ] Environment setup +- [ ] Create `terraphim_mcp_codegen` crate +- [ ] Start TypeScript wrapper generation + +### 3. Ongoing +- [ ] Weekly progress reviews +- [ ] Daily standups +- [ ] Continuous integration +- [ ] Metrics tracking + +### 4. Launch (Week 12) +- [ ] Production deployment +- [ ] Monitoring active +- [ ] Documentation published +- [ ] Success celebration! 🎉 + +## Questions? + +Contact the project team or refer to: +- [Technical Specification](./CODE_EXECUTION_WITH_MCP_SPEC.md) for architecture details +- [Gap Analysis](./CODE_EXECUTION_MCP_GAP_ANALYSIS.md) for capability assessment +- [Implementation Plan](./CODE_EXECUTION_MCP_IMPLEMENTATION_PLAN.md) for detailed tasks + +--- + +**Ready to implement?** Start with Phase 1, Week 1: Creating the MCP code API layer. diff --git a/CODE_EXECUTION_WITH_MCP_SPEC.md b/CODE_EXECUTION_WITH_MCP_SPEC.md new file mode 100644 index 00000000..5c687e49 --- /dev/null +++ b/CODE_EXECUTION_WITH_MCP_SPEC.md @@ -0,0 +1,554 @@ +# Code Execution with MCP - Technical Specification + +**Version:** 1.0 +**Date:** 2025-11-15 +**Based on:** Anthropic's "Code Execution with MCP" Guide + +## Executive Summary + +This specification defines an architecture where AI agents write code to interact with MCP (Model Context Protocol) servers, reducing token consumption by ~98% (150K → 2K tokens) while improving performance, privacy, and scalability. + +### Core Concept + +**Traditional Approach:** +- Agent uses tool calling API +- Model loads ALL tool definitions upfront +- Model calls tools directly via function calls +- Results pass through context window +- **Problem:** Massive token overhead, latency, limited tool count + +**Code Execution Approach:** +- Agent writes code to interact with tools +- Code imports only needed MCP modules +- Code executes and processes data in sandboxed environment +- Only final results return to model +- **Benefit:** 98%+ token reduction, faster execution, unlimited tools + +## Problem Statement + +### Current Challenges with Traditional Tool Calling + +#### 1. Token Overhead Nightmare +- Every tool definition loaded into context upfront +- Each tool includes: description, parameters, format, return type +- **Example:** 100 tools × ~1,500 tokens = 150,000 tokens before any work + +#### 2. Intermediate Results Problem +- Every tool call result passes through context window +- Chain of 10-30 tool calls creates massive data flow +- Simple data processing consumes thousands of tokens + +#### 3. Impact on Production Systems +- **Cost spirals:** More tokens = higher API bills +- **Latency increases:** More processing time per request +- **Context limits:** Can't add more tools without hitting ceiling +- **Scaling impossible:** Each new tool makes problem worse + +### Quantified Impact + +``` +Traditional workflow: 150,000 tokens +Code execution workflow: 2,000 tokens +Reduction: 98.7% +``` + +## Solution Architecture + +### Overview + +Present MCP servers as **code APIs** rather than direct tool calls. Agents import and use MCP tools programmatically within a secure code execution environment. + +### Key Components + +#### 1. MCP Code API Layer +``` +┌─────────────────────────────────────┐ +│ MCP Servers as Code Modules │ +│ - TypeScript/Python/Rust modules │ +│ - Importable via standard imports │ +│ - Full programmatic access │ +└─────────────────────────────────────┘ +``` + +#### 2. Agent Code Generation +``` +┌─────────────────────────────────────┐ +│ Agent writes code: │ +│ import { salesforce } from 'mcp' │ +│ const data = await salesforce... │ +│ return processedResult │ +└─────────────────────────────────────┘ +``` + +#### 3. Secure Code Execution Environment +``` +┌─────────────────────────────────────┐ +│ Sandbox Environment │ +│ - Resource limits │ +│ - Network isolation │ +│ - Filesystem restrictions │ +│ - Timeout enforcement │ +└─────────────────────────────────────┘ +``` + +#### 4. Result Flow +``` +User Query → Agent generates code → Execute in sandbox → +Process data → Return final result → Agent responds +``` + +### Data Flow Comparison + +**Traditional (150K tokens):** +``` +1. Load all Salesforce tool definitions (10K tokens) +2. Agent calls search_salesforce → Full results through context (50K tokens) +3. Agent processes, calls filter_records → Filtered results through context (30K tokens) +4. Agent calls create_summary → Summary through context (60K tokens) +Total: ~150K tokens +``` + +**Code Execution (2K tokens):** +``` +1. Agent writes single code block (500 tokens) +2. Code executes: search → filter → summarize (all in-environment) +3. Final summary returns to agent (500 tokens) +Total: ~2K tokens +``` + +## Core Benefits + +### 1. Massive Token Efficiency (98%+ reduction) +- Load only needed tools on-demand +- No intermediate results through context +- Single code block replaces multiple tool calls + +### 2. Progressive Tool Discovery +- Browse available tools dynamically +- Search for specific functionality +- Read documentation only when needed +- No need to memorize entire catalog + +### 3. In-Environment Data Processing +- Filter, transform, aggregate within sandbox +- Process 10,000 rows → return 5 relevant ones +- Privacy: sensitive data never enters model context + +### 4. Better Control Flow +- Use loops, conditionals, error handling +- Native programming constructs +- Reduce 50 sequential calls to 1 code execution + +### 5. Privacy Advantages +- Sensitive data stays in execution environment +- Only explicitly returned values visible to model +- Process confidential information safely + +### 6. State Persistence +- Save intermediate results to files +- Resume work across sessions +- Checkpoint progress for long-running tasks + +### 7. Reusable Skills +- Build library of higher-level capabilities +- Document with SKILL.MD files +- Agent references previous work +- Complex operations become single functions + +## Technical Requirements + +### 1. Code Execution Environment + +#### Requirements +- **Sandboxing:** Isolated execution context +- **Resource Limits:** CPU, memory, disk quotas +- **Timeout Enforcement:** Maximum execution time +- **Network Control:** Allow/block specific endpoints +- **Filesystem:** Restricted access, temporary storage +- **Monitoring:** Execution metrics, error tracking + +#### Languages Supported +- Python (primary for data processing) +- JavaScript/TypeScript (MCP native) +- Rust (performance-critical operations) +- Bash (system commands) + +### 2. MCP Code API Interface + +#### Module Structure +```typescript +// Example: MCP server exposed as code module +import { salesforce } from 'mcp-servers'; + +interface SalesforceAPI { + search(query: SearchQuery): Promise; + filter(data: any[], condition: FilterCondition): Promise; + create(record: Record): Promise; + update(id: string, data: Partial): Promise; +} +``` + +#### Discovery API +```typescript +// Progressive tool discovery +import { searchTools, getToolDocs } from 'mcp-runtime'; + +const tools = await searchTools({ + category: 'database', + capabilities: ['read', 'write'] +}); + +const docs = await getToolDocs('salesforce.search'); +``` + +### 3. Agent Code Generation + +#### Code Block Format +```markdown +```typescript +import { salesforce } from 'mcp-servers'; + +async function getSummary() { + const results = await salesforce.search({ + query: "active accounts", + fields: ["name", "revenue", "status"] + }); + + const filtered = results.filter(r => r.revenue > 1000000); + + return { + total: filtered.length, + total_revenue: filtered.reduce((sum, r) => sum + r.revenue, 0), + top_account: filtered.sort((a, b) => b.revenue - a.revenue)[0] + }; +} +``` +``` + +#### Validation +- **Syntax checking** before execution +- **Static analysis** for security issues +- **Import validation** (only allowed modules) +- **API rate limit** enforcement + +### 4. Security Model + +#### Execution Sandbox +- **VM isolation** (Firecracker microVMs) +- **SELinux/AppArmor** policies +- **Seccomp filters** for syscalls +- **Network namespaces** for isolation + +#### Code Restrictions +- No arbitrary network access +- No filesystem access outside workspace +- No subprocess spawning +- No infinite loops (timeout) +- Memory limits enforced + +#### Audit Trail +- Log all code execution +- Track resource usage +- Monitor API calls +- Record data access patterns + +### 5. State Management + +#### Workspace Persistence +``` +workspace/ + ├── data/ # Temporary data files + ├── results/ # Execution results + ├── checkpoints/ # Saved state snapshots + └── skills/ # Reusable skill library +``` + +#### Session Continuity +- Save workspace state between executions +- Resume long-running tasks +- Checkpoint important milestones +- Rollback on errors + +### 6. Skill Library Pattern + +#### SKILL.MD Format +```markdown +# Skill: Salesforce Account Analysis + +## Description +Analyzes Salesforce accounts and generates revenue reports. + +## Function Signature +```typescript +async function analyzeAccounts(options: AnalysisOptions): Promise +``` + +## Example Usage +```typescript +const report = await analyzeAccounts({ + minRevenue: 1000000, + includeInactive: false +}); +``` + +## Dependencies +- mcp-servers/salesforce +- mcp-servers/analytics +``` + +## Implementation Phases + +### Phase 1: Foundation (Weeks 1-4) +**Goal:** Basic code execution with MCP tools + +- [ ] Set up secure code execution environment +- [ ] Create MCP → Code API translation layer +- [ ] Implement basic Python/TypeScript execution +- [ ] Add syntax validation and security checks +- [ ] Create simple tool discovery mechanism + +**Deliverables:** +- Working code execution sandbox +- 3-5 MCP tools exposed as code APIs +- Basic documentation + +### Phase 2: Agent Integration (Weeks 5-8) +**Goal:** Integrate with existing agent system + +- [ ] Modify agent code generation prompts +- [ ] Add code block extraction from responses +- [ ] Implement execution flow in agent lifecycle +- [ ] Create result integration back to agent +- [ ] Add error handling and recovery + +**Deliverables:** +- Agents can generate and execute code +- End-to-end workflow functional +- Error handling complete + +### Phase 3: Tool Expansion (Weeks 9-12) +**Goal:** Scale to many tools + +- [ ] Expose all MCP tools as code APIs +- [ ] Implement progressive tool discovery +- [ ] Add comprehensive documentation generation +- [ ] Create tool search and filtering +- [ ] Optimize for 100+ tools + +**Deliverables:** +- All existing MCP tools available as code +- Tool discovery API +- Searchable documentation + +### Phase 4: Advanced Features (Weeks 13-16) +**Goal:** Production-ready features + +- [ ] Implement skill library system +- [ ] Add state persistence and checkpointing +- [ ] Create workspace management +- [ ] Implement SKILL.MD pattern +- [ ] Add monitoring and metrics + +**Deliverables:** +- Skill library functional +- State persistence working +- Production monitoring + +### Phase 5: Optimization (Weeks 17-20) +**Goal:** Performance and scale + +- [ ] Token usage optimization +- [ ] Execution performance tuning +- [ ] Caching and memoization +- [ ] Resource pooling +- [ ] Load testing and benchmarks + +**Deliverables:** +- 98%+ token reduction achieved +- Sub-second execution times +- 1000+ concurrent agents supported + +## Success Metrics + +### Token Efficiency +- **Target:** 98% reduction in token usage +- **Measurement:** Compare traditional vs code execution workflows +- **Baseline:** 150K tokens → 2K tokens for complex workflows + +### Performance +- **Code Execution Latency:** < 2 seconds for typical workflows +- **Tool Discovery:** < 100ms to find relevant tools +- **End-to-End:** < 5 seconds from query to response + +### Scalability +- **Tool Count:** Support 500+ tools without degradation +- **Concurrent Agents:** 1000+ agents executing code simultaneously +- **Workspace Size:** 100MB per agent workspace + +### Quality +- **Code Success Rate:** > 95% of generated code executes successfully +- **Security:** 0 sandbox escapes, 0 unauthorized access +- **Uptime:** 99.9% availability + +## Risk Analysis + +### Technical Risks + +1. **Code Generation Quality** + - *Risk:* Agent generates invalid or insecure code + - *Mitigation:* Comprehensive validation, static analysis, testing + - *Severity:* Medium + +2. **Sandbox Escape** + - *Risk:* Malicious code breaks out of sandbox + - *Mitigation:* Multiple isolation layers (VM + OS + runtime) + - *Severity:* High + +3. **Performance Degradation** + - *Risk:* Code execution slower than direct tool calls + - *Mitigation:* Async execution, caching, pooling + - *Severity:* Low + +### Operational Risks + +1. **Resource Exhaustion** + - *Risk:* Runaway code consumes all resources + - *Mitigation:* Strict limits, monitoring, auto-termination + - *Severity:* Medium + +2. **Complexity** + - *Risk:* System becomes too complex to maintain + - *Mitigation:* Clear architecture, good documentation + - *Severity:* Medium + +## Alternative Approaches + +### Option 1: Hybrid Model +- Use direct tool calls for simple operations +- Use code execution for complex workflows +- **Trade-off:** More complexity, but gradual migration + +### Option 2: Agent-Specific +- Enable code execution per agent +- Some agents use traditional, some use code +- **Trade-off:** Flexibility, but inconsistent experience + +### Option 3: Tool Streaming +- Stream tool definitions on-demand +- Partial context loading +- **Trade-off:** Still uses more tokens than code execution + +## References + +- [Anthropic: Code Execution with MCP](https://medium.com/ai-software-engineer/anthropic-just-solved-ai-agent-bloat-150k-tokens-down-to-2k-code-execution-with-mcp-8266b8e80301) +- [Model Context Protocol Specification](https://modelcontextprotocol.io/) +- [Terraphim AI Architecture](./CLAUDE.md) + +## Appendices + +### A. Example Code Execution Workflow + +```typescript +// Agent receives query: "Find high-value Salesforce accounts and summarize" + +// Agent generates this code: +import { salesforce } from 'mcp-servers'; + +async function analyzeHighValueAccounts() { + // Search for active accounts + const accounts = await salesforce.search({ + query: "active accounts", + fields: ["name", "revenue", "status", "industry"] + }); + + // Filter high-value accounts (in-environment processing) + const highValue = accounts.filter(acc => acc.revenue > 1000000); + + // Group by industry + const byIndustry = highValue.reduce((groups, acc) => { + const industry = acc.industry || 'Unknown'; + if (!groups[industry]) groups[industry] = []; + groups[industry].push(acc); + return groups; + }, {}); + + // Generate summary + return { + total_accounts: highValue.length, + total_revenue: highValue.reduce((sum, acc) => sum + acc.revenue, 0), + by_industry: Object.entries(byIndustry).map(([industry, accs]) => ({ + industry, + count: accs.length, + revenue: accs.reduce((sum, a) => sum + a.revenue, 0) + })), + top_account: highValue.sort((a, b) => b.revenue - a.revenue)[0] + }; +} + +// Execute +const result = await analyzeHighValueAccounts(); +console.log(JSON.stringify(result, null, 2)); +``` + +**Token Comparison:** +- Traditional: ~150K tokens (all tool defs + intermediate results) +- Code execution: ~2K tokens (code + final result) +- **Reduction: 98.7%** + +### B. Skill Library Example + +```markdown +# SKILL.MD: Database Query Optimization + +## Description +Analyzes and optimizes database queries for performance. + +## Expertise +- Database performance tuning +- Query plan analysis +- Index recommendations + +## Function +```typescript +async function optimizeQuery(query: string, database: string): Promise { + const { db } = await import('mcp-servers/database'); + + // Analyze query plan + const plan = await db.explain(query, database); + + // Identify bottlenecks + const bottlenecks = analyzeBottlenecks(plan); + + // Generate recommendations + const recommendations = generateRecommendations(bottlenecks); + + return { + original_query: query, + estimated_cost: plan.cost, + bottlenecks, + recommendations, + optimized_query: applyOptimizations(query, recommendations) + }; +} +``` + +## Usage History +- Last used: 2025-11-14 +- Success rate: 95% +- Average improvement: 3.2x faster queries +``` + +### C. Security Checklist + +- [ ] VM isolation configured (Firecracker) +- [ ] Resource limits enforced (CPU, memory, disk) +- [ ] Network namespaces isolated +- [ ] Filesystem access restricted +- [ ] Timeout mechanisms active +- [ ] Static analysis for code validation +- [ ] Import whitelist configured +- [ ] Audit logging enabled +- [ ] Monitoring dashboards created +- [ ] Incident response procedures documented +- [ ] Penetration testing completed +- [ ] Security review approved diff --git a/Cargo.lock b/Cargo.lock index c43900d0..c5979f83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -782,6 +782,28 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "chrono-tz" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf 0.11.3", +] + +[[package]] +name = "chrono-tz-build" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +dependencies = [ + "parse-zoneinfo", + "phf 0.11.3", + "phf_codegen 0.11.3", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -1655,6 +1677,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "diff" version = "0.1.13" @@ -2688,6 +2716,17 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "globwalk" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" +dependencies = [ + "bitflags 2.10.0", + "ignore", + "walkdir", +] + [[package]] name = "gloo-timers" version = "0.3.0" @@ -3133,6 +3172,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + [[package]] name = "humantime" version = "2.3.0" @@ -4823,6 +4871,15 @@ dependencies = [ "windows-link 0.2.1", ] +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + [[package]] name = "paste" version = "1.0.15" @@ -6017,6 +6074,27 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmcp" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37f2048a81a7ff7e8ef6bc5abced70c3d9114c8f03d85d7aaaafd9fd04f12e9e" +dependencies = [ + "base64 0.22.1", + "chrono", + "futures", + "paste", + "pin-project-lite", + "rmcp-macros 0.2.1", + "schemars 0.8.22", + "serde", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "rmcp" version = "0.6.4" @@ -6035,7 +6113,7 @@ dependencies = [ "pin-project-lite", "process-wrap", "rand 0.9.2", - "rmcp-macros", + "rmcp-macros 0.6.4", "schemars 1.0.4", "serde", "serde_json", @@ -6049,6 +6127,19 @@ dependencies = [ "uuid", ] +[[package]] +name = "rmcp-macros" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72398e694b9f6dbb5de960cf158c8699e6a1854cb5bbaac7de0646b2005763c4" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "serde_json", + "syn 2.0.108", +] + [[package]] name = "rmcp-macros" version = "0.6.4" @@ -6401,6 +6492,7 @@ version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ + "chrono", "dyn-clone", "schemars_derive 0.8.22", "serde", @@ -7028,6 +7120,16 @@ dependencies = [ "parking_lot 0.11.2", ] +[[package]] +name = "slug" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + [[package]] name = "smallvec" version = "1.15.1" @@ -7832,6 +7934,28 @@ dependencies = [ "utf-8", ] +[[package]] +name = "tera" +version = "1.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8004bca281f2d32df3bacd59bc67b312cb4c70cea46cbd79dbe8ac5ed206722" +dependencies = [ + "chrono", + "chrono-tz", + "globwalk", + "humansize", + "lazy_static", + "percent-encoding", + "pest", + "pest_derive", + "rand 0.8.5", + "regex", + "serde", + "serde_json", + "slug", + "unicode-segmentation", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -7859,7 +7983,7 @@ dependencies = [ "lru 0.16.2", "mockall", "portpicker", - "rmcp", + "rmcp 0.6.4", "schemars 0.8.22", "serde", "serde_json", @@ -8220,6 +8344,23 @@ dependencies = [ "uuid", ] +[[package]] +name = "terraphim_mcp_codegen" +version = "0.1.0" +dependencies = [ + "anyhow", + "convert_case 0.6.0", + "rmcp 0.2.1", + "serde", + "serde_json", + "tempfile", + "tera", + "terraphim_config", + "terraphim_mcp_server", + "thiserror 1.0.69", + "tokio", +] + [[package]] name = "terraphim_mcp_server" version = "1.0.0" @@ -8231,7 +8372,7 @@ dependencies = [ "clap", "env_logger 0.11.8", "regex", - "rmcp", + "rmcp 0.6.4", "serde_json", "serial_test", "tempfile", @@ -8268,7 +8409,7 @@ dependencies = [ "mcp-spec", "reqwest 0.12.24", "reqwest-eventsource 0.5.0", - "rmcp", + "rmcp 0.6.4", "scraper", "serde", "serde_json", diff --git a/crates/terraphim_mcp_codegen/Cargo.toml b/crates/terraphim_mcp_codegen/Cargo.toml new file mode 100644 index 00000000..284a164f --- /dev/null +++ b/crates/terraphim_mcp_codegen/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "terraphim_mcp_codegen" +version = "0.1.0" +edition = "2021" +description = "Code generator for MCP server tools - creates TypeScript and Python wrappers" +license = "MIT" + +[dependencies] +# Workspace dependencies +tokio = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +anyhow = { workspace = true } + +# Template engine for code generation +tera = "1.19" + +# Case conversion for naming conventions +convert_case = "0.6" + +# MCP server integration +terraphim_mcp_server = { path = "../terraphim_mcp_server" } +terraphim_config = { path = "../terraphim_config" } + +# Runtime support +rmcp = "0.2" + +[[bin]] +name = "mcp-codegen" +path = "src/bin/codegen.rs" + +[dev-dependencies] +tempfile = "3.10" diff --git a/crates/terraphim_mcp_codegen/src/bin/codegen.rs b/crates/terraphim_mcp_codegen/src/bin/codegen.rs new file mode 100644 index 00000000..f478d1e2 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/bin/codegen.rs @@ -0,0 +1,458 @@ +//! MCP Code Generator CLI +//! +//! Generates TypeScript and Python wrappers for MCP tools. + +use std::path::PathBuf; + +use terraphim_mcp_codegen::{ + generate_code, + runtime::{McpRuntime, RuntimeConfig}, + CodegenConfig, OutputFormat, +}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Parse command line arguments + let args: Vec = std::env::args().collect(); + + if args.len() < 2 { + print_usage(); + std::process::exit(1); + } + + let command = &args[1]; + + match command.as_str() { + "generate" => { + let format = args.get(2).map(|s| s.as_str()).unwrap_or("typescript"); + let output_path = args.get(3).map(PathBuf::from).unwrap_or_else(|| { + if format == "python" || format == "py" { + PathBuf::from("terraphim.py") + } else { + PathBuf::from("terraphim.ts") + } + }); + + generate_wrappers(format, output_path).await?; + } + "package" => { + let output_dir = args + .get(2) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("mcp-runtime")); + + create_package(output_dir).await?; + } + "introspect" => { + introspect_tools().await?; + } + "help" | "--help" | "-h" => { + print_usage(); + } + _ => { + eprintln!("Unknown command: {}", command); + print_usage(); + std::process::exit(1); + } + } + + Ok(()) +} + +fn print_usage() { + eprintln!( + r#" +MCP Code Generator - Generate TypeScript/Python wrappers for MCP tools + +USAGE: + mcp-codegen [options] + +COMMANDS: + generate [format] [output] Generate wrapper code + format: typescript (default), python + output: output file path (default: terraphim.ts or terraphim.py) + + package [output_dir] Create complete code execution package + output_dir: directory for the package (default: mcp-runtime) + + introspect List all available MCP tools + + help Show this help message + +EXAMPLES: + mcp-codegen generate typescript ./workspace/terraphim.ts + mcp-codegen generate python ./workspace/terraphim.py + mcp-codegen package ./workspace/mcp-runtime + mcp-codegen introspect +"# + ); +} + +async fn generate_wrappers(format: &str, output_path: PathBuf) -> Result<(), Box> { + println!("Generating {} wrappers...", format); + + // Create MCP service to introspect tools + let metadata = get_mcp_metadata().await?; + + let output_format: OutputFormat = format.parse().map_err(|e| { + Box::new(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Invalid format: {}", e), + )) + })?; + + let config = CodegenConfig { + format: output_format, + output_path: output_path.clone(), + module_name: "terraphim".to_string(), + async_functions: true, + include_docs: true, + include_examples: true, + }; + + let code = generate_code(&metadata, &config)?; + + // Ensure parent directory exists + if let Some(parent) = output_path.parent() { + std::fs::create_dir_all(parent)?; + } + + std::fs::write(&output_path, code)?; + + println!("Generated {} tools to {}", metadata.tools.len(), output_path.display()); + println!("\nTools generated:"); + for tool in &metadata.tools { + println!(" - {} ({})", tool.name, tool.category); + } + + Ok(()) +} + +async fn create_package(output_dir: PathBuf) -> Result<(), Box> { + println!("Creating code execution package..."); + + let metadata = get_mcp_metadata().await?; + + // Create both TypeScript and Python wrappers + let ts_config = CodegenConfig { + format: OutputFormat::TypeScript, + module_name: "terraphim".to_string(), + ..Default::default() + }; + + let py_config = CodegenConfig { + format: OutputFormat::Python, + module_name: "terraphim".to_string(), + ..Default::default() + }; + + let ts_code = generate_code(&metadata, &ts_config)?; + let py_code = generate_code(&metadata, &py_config)?; + + // Create runtime configuration + let runtime_config = RuntimeConfig::default(); + let runtime = McpRuntime::new(runtime_config.clone()); + + // Setup directory structure + std::fs::create_dir_all(&output_dir)?; + std::fs::create_dir_all(output_dir.join("typescript"))?; + std::fs::create_dir_all(output_dir.join("python"))?; + + // Write TypeScript package + std::fs::write(output_dir.join("typescript/terraphim.ts"), ts_code)?; + runtime.write_javascript_runtime(&output_dir.join("typescript/runtime.js"))?; + + // Write Python package + std::fs::write(output_dir.join("python/terraphim.py"), py_code)?; + runtime.write_python_runtime(&output_dir.join("python/runtime.py"))?; + + // Write package.json for TypeScript + let package_json = serde_json::json!({ + "name": "terraphim-mcp", + "version": "1.0.0", + "type": "module", + "main": "terraphim.ts", + "dependencies": {} + }); + std::fs::write( + output_dir.join("typescript/package.json"), + serde_json::to_string_pretty(&package_json)?, + )?; + + // Write requirements.txt for Python + std::fs::write( + output_dir.join("python/requirements.txt"), + "aiohttp>=3.8.0\n", + )?; + + // Write README + let readme = format!( + r#"# Terraphim MCP Code Execution Package + +This package contains TypeScript and Python wrappers for {} MCP tools. + +## TypeScript Usage + +```typescript +import {{ terraphim }} from './typescript/terraphim'; +import './typescript/runtime'; + +const results = await terraphim.search({{ query: "rust patterns", limit: 10 }}); +``` + +## Python Usage + +```python +from python.runtime import mcp_call +from python.terraphim import terraphim + +results = await terraphim.search(query="rust patterns", limit=10) +``` + +## Available Tools + +{} +"#, + metadata.tools.len(), + metadata + .tools + .iter() + .map(|t| format!("- **{}**: {}", t.name, t.description)) + .collect::>() + .join("\n") + ); + std::fs::write(output_dir.join("README.md"), readme)?; + + println!( + "Package created at {} with {} tools", + output_dir.display(), + metadata.tools.len() + ); + + Ok(()) +} + +async fn introspect_tools() -> Result<(), Box> { + println!("Introspecting MCP tools...\n"); + + let metadata = get_mcp_metadata().await?; + + println!("Server: {} v{}", metadata.name, metadata.version); + if let Some(desc) = &metadata.description { + println!("Description: {}", desc); + } + println!("\nAvailable Tools ({}):\n", metadata.tools.len()); + + for tool in &metadata.tools { + println!(" {} - {}", tool.name, tool.category); + println!(" {}", tool.description); + if !tool.parameters.is_empty() { + println!(" Parameters:"); + for param in &tool.parameters { + let required = if param.required { "required" } else { "optional" }; + println!( + " - {} ({}): {} [{}]", + param.name, param.json_type, param.description, required + ); + } + } + println!(" Capabilities: {}", tool.capabilities.join(", ")); + println!(); + } + + Ok(()) +} + +async fn get_mcp_metadata( +) -> Result> { + // Build metadata directly from known MCP server tools + // This avoids needing to create a runtime context + use terraphim_mcp_codegen::{ + categorize_tool, extract_capabilities, McpServerMetadata, ParameterMetadata, ToolMetadata, + }; + + let tools = vec![ + create_tool_metadata( + "search", + "Search for documents in the Terraphim knowledge graph", + vec![ + ("query", "string", "The search query", true), + ("role", "string", "Optional role to filter by", false), + ("limit", "integer", "Maximum number of results to return", false), + ("skip", "integer", "Number of results to skip", false), + ], + ), + create_tool_metadata( + "update_config_tool", + "Update the Terraphim configuration", + vec![("config_str", "string", "JSON configuration string", true)], + ), + create_tool_metadata( + "build_autocomplete_index", + "Build FST-based autocomplete index from role's knowledge graph", + vec![("role", "string", "Optional role name to build autocomplete index for", false)], + ), + create_tool_metadata( + "autocomplete_terms", + "Autocomplete terms using FST prefix + fuzzy fallback", + vec![ + ("query", "string", "Prefix or term for suggestions", true), + ("limit", "integer", "Max suggestions (default 10)", false), + ("role", "string", "Optional role name to use for autocomplete", false), + ], + ), + create_tool_metadata( + "autocomplete_with_snippets", + "Autocomplete and return short snippets from matching documents", + vec![ + ("query", "string", "Prefix or term for suggestions with snippets", true), + ("limit", "integer", "Max suggestions (default 10)", false), + ("role", "string", "Optional role name to use for autocomplete", false), + ], + ), + create_tool_metadata( + "fuzzy_autocomplete_search", + "Perform fuzzy autocomplete search using Jaro-Winkler similarity", + vec![ + ("query", "string", "The text to get autocomplete suggestions for", true), + ("similarity", "number", "Minimum similarity threshold (0.0-1.0, default: 0.6)", false), + ("limit", "integer", "Maximum number of suggestions to return (default: 10)", false), + ], + ), + create_tool_metadata( + "fuzzy_autocomplete_search_levenshtein", + "Perform fuzzy autocomplete search using Levenshtein distance", + vec![ + ("query", "string", "The text to get autocomplete suggestions for", true), + ("max_edit_distance", "integer", "Maximum Levenshtein edit distance allowed (default: 2)", false), + ("limit", "integer", "Maximum number of suggestions to return (default: 10)", false), + ], + ), + create_tool_metadata( + "fuzzy_autocomplete_search_jaro_winkler", + "Perform fuzzy autocomplete search using Jaro-Winkler similarity (explicit)", + vec![ + ("query", "string", "The text to get autocomplete suggestions for", true), + ("similarity", "number", "Minimum similarity threshold (0.0-1.0, default: 0.6)", false), + ("limit", "integer", "Maximum number of suggestions to return (default: 10)", false), + ], + ), + create_tool_metadata( + "serialize_autocomplete_index", + "Serialize the current autocomplete index to a base64-encoded string", + vec![], + ), + create_tool_metadata( + "deserialize_autocomplete_index", + "Deserialize an autocomplete index from a base64-encoded string", + vec![("base64_data", "string", "The base64-encoded string of the serialized index", true)], + ), + create_tool_metadata( + "find_matches", + "Find all term matches in text using Aho-Corasick algorithm", + vec![ + ("text", "string", "The text to search in", true), + ("role", "string", "Optional role to filter by", false), + ("return_positions", "boolean", "Whether to return positions (default: false)", false), + ], + ), + create_tool_metadata( + "replace_matches", + "Replace matched terms in text with links using specified format", + vec![ + ("text", "string", "The text to replace terms in", true), + ("role", "string", "Optional role to filter by", false), + ("link_type", "string", "The type of link to use (wiki, html, markdown)", true), + ], + ), + create_tool_metadata( + "extract_paragraphs_from_automata", + "Extract paragraphs containing matched terms from text", + vec![ + ("text", "string", "The text to extract paragraphs from", true), + ("role", "string", "Optional role to filter by", false), + ("include_term", "boolean", "Whether to include the matched term (default: true)", false), + ], + ), + create_tool_metadata( + "json_decode", + "Parse Logseq JSON output using terraphim_automata", + vec![("jsonlines", "string", "The JSON lines string to decode", true)], + ), + create_tool_metadata( + "load_thesaurus", + "Load thesaurus from a local file or remote URL", + vec![("automata_path", "string", "The path to the automata file (local or remote URL)", true)], + ), + create_tool_metadata( + "load_thesaurus_from_json", + "Load thesaurus from a JSON string", + vec![("json_str", "string", "The JSON string to load thesaurus from", true)], + ), + create_tool_metadata( + "is_all_terms_connected_by_path", + "Check if all matched terms in text can be connected by a single path in the knowledge graph", + vec![ + ("text", "string", "The text to check for term connectivity", true), + ("role", "string", "Optional role to use for thesaurus and graph", false), + ], + ), + ]; + + Ok(McpServerMetadata { + name: "terraphim-mcp".to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), + tools, + description: Some("Terraphim MCP Server - Knowledge graph search and autocomplete tools".to_string()), + }) +} + +fn create_tool_metadata( + name: &str, + description: &str, + params: Vec<(&str, &str, &str, bool)>, +) -> terraphim_mcp_codegen::ToolMetadata { + use terraphim_mcp_codegen::{ + categorize_tool, extract_capabilities, ParameterMetadata, ToolMetadata, + }; + + let parameters: Vec = params + .into_iter() + .map(|(pname, ptype, pdesc, required)| ParameterMetadata { + name: pname.to_string(), + description: pdesc.to_string(), + json_type: ptype.to_string(), + required, + default_value: None, + array_item_type: None, + object_properties: None, + }) + .collect(); + + let category = categorize_tool(name); + + let mut metadata = ToolMetadata { + name: name.to_string(), + title: None, + description: description.to_string(), + category, + capabilities: vec![], + parameters, + return_type: "Promise".to_string(), + examples: terraphim_mcp_codegen::introspection::generate_examples(name), + }; + + metadata.capabilities = extract_capabilities(&metadata); + metadata +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_generate_wrappers() { + // This would test the wrapper generation + // For now, just a placeholder + } +} diff --git a/crates/terraphim_mcp_codegen/src/introspection.rs b/crates/terraphim_mcp_codegen/src/introspection.rs new file mode 100644 index 00000000..e65ae37d --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/introspection.rs @@ -0,0 +1,360 @@ +//! MCP Server Introspection - Extract tool metadata from MCP servers + +use std::collections::HashMap; +use std::sync::Arc; + +use crate::{ + categorize_tool, extract_capabilities, CodegenError, McpServerMetadata, ParameterMetadata, + Result, ToolMetadata, +}; + +/// Extract metadata from MCP server tool definitions +pub fn extract_server_metadata( + tools: Vec, + server_info: &rmcp::model::ServerInfo, +) -> Result { + let mut tool_metadata = Vec::new(); + + for tool in tools { + let metadata = extract_tool_metadata(tool)?; + tool_metadata.push(metadata); + } + + Ok(McpServerMetadata { + name: server_info.server_info.name.clone(), + version: server_info.server_info.version.clone(), + tools: tool_metadata, + description: server_info.instructions.clone(), + }) +} + +/// Extract metadata from a single MCP tool definition +fn extract_tool_metadata(tool: rmcp::model::Tool) -> Result { + let name = tool.name.to_string(); + + // Extract parameters from input schema + let parameters = extract_parameters_from_schema(&tool.input_schema)?; + + // Determine category and capabilities + let category = categorize_tool(&name); + + // Create base tool metadata + // Note: rmcp::model::Tool has fields: name, description, input_schema, annotations + let mut metadata = ToolMetadata { + name: name.clone(), + title: None, // Not available in rmcp Tool, will derive from name if needed + description: tool.description.map(|s| s.to_string()).unwrap_or_default(), + category, + capabilities: vec![], // Will be filled after creation + parameters, + return_type: "Promise".to_string(), + examples: generate_examples(&name), + }; + + // Extract capabilities based on the metadata + metadata.capabilities = extract_capabilities(&metadata); + + Ok(metadata) +} + +/// Extract parameter metadata from JSON Schema +fn extract_parameters_from_schema( + schema: &Arc>, +) -> Result> { + let mut parameters = Vec::new(); + + // Get required fields + let required_fields: Vec = schema + .get("required") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(|s| s.to_string())) + .collect() + }) + .unwrap_or_default(); + + // Get properties + if let Some(properties) = schema.get("properties") { + if let Some(props_obj) = properties.as_object() { + for (param_name, param_schema) in props_obj { + let param = extract_single_parameter(param_name, param_schema, &required_fields)?; + parameters.push(param); + } + } + } + + // Sort parameters: required first, then optional + parameters.sort_by(|a, b| { + match (a.required, b.required) { + (true, false) => std::cmp::Ordering::Less, + (false, true) => std::cmp::Ordering::Greater, + _ => a.name.cmp(&b.name), // Alphabetical within same required status + } + }); + + Ok(parameters) +} + +/// Extract a single parameter from its JSON Schema definition +fn extract_single_parameter( + name: &str, + schema: &serde_json::Value, + required_fields: &[String], +) -> Result { + let schema_obj = schema.as_object().ok_or_else(|| { + CodegenError::InvalidSpec(format!("Parameter {} schema is not an object", name)) + })?; + + let json_type = schema_obj + .get("type") + .and_then(|v| v.as_str()) + .unwrap_or("any") + .to_string(); + + let description = schema_obj + .get("description") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let default_value = schema_obj.get("default").cloned(); + + let array_item_type = if json_type == "array" { + schema_obj + .get("items") + .and_then(|v| v.as_object()) + .and_then(|obj| obj.get("type")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + } else { + None + }; + + let object_properties = if json_type == "object" { + schema_obj + .get("properties") + .and_then(|v| v.as_object()) + .map(|props| { + let mut nested_params = HashMap::new(); + for (prop_name, prop_schema) in props { + if let Ok(nested_param) = + extract_single_parameter(prop_name, prop_schema, &[]) + { + nested_params.insert(prop_name.clone(), nested_param); + } + } + nested_params + }) + } else { + None + }; + + Ok(ParameterMetadata { + name: name.to_string(), + description, + json_type, + required: required_fields.contains(&name.to_string()), + default_value, + array_item_type, + object_properties, + }) +} + +/// Generate usage examples for a tool +pub fn generate_examples(tool_name: &str) -> Vec { + match tool_name { + "search" => vec![ + r#" +const results = await terraphim.search({ + query: "rust async patterns", + limit: 10 +}); +console.log(`Found ${results.length} documents`); +"# + .trim() + .to_string(), + ], + "autocomplete_terms" => vec![ + r#" +const suggestions = await terraphim.autocompleteTerms({ + query: "tera", + limit: 5 +}); +suggestions.forEach(s => console.log(s)); +"# + .trim() + .to_string(), + ], + "find_matches" => vec![ + r#" +const matches = await terraphim.findMatches({ + text: "This document discusses async rust patterns with tokio", + returnPositions: true +}); +console.log(`Found ${matches.length} term matches`); +"# + .trim() + .to_string(), + ], + "fuzzy_autocomplete_search" => vec![ + r#" +const suggestions = await terraphim.fuzzyAutocompleteSearch({ + query: "asynch", // typo intentional + similarity: 0.7, + limit: 5 +}); +"# + .trim() + .to_string(), + ], + "replace_matches" => vec![ + r#" +const linkedText = await terraphim.replaceMatches({ + text: "Learn about async rust and tokio patterns", + linkType: "markdown" +}); +// Returns: "Learn about [async rust](url) and [tokio patterns](url)" +"# + .trim() + .to_string(), + ], + "extract_paragraphs_from_automata" => vec![ + r#" +const paragraphs = await terraphim.extractParagraphsFromAutomata({ + text: longDocument, + includeTerm: true +}); +paragraphs.forEach(p => console.log(p.term, p.paragraph)); +"# + .trim() + .to_string(), + ], + "build_autocomplete_index" => vec![ + r#" +await terraphim.buildAutocompleteIndex({ + role: "engineer" +}); +console.log("Index built successfully"); +"# + .trim() + .to_string(), + ], + "is_all_terms_connected_by_path" => vec![ + r#" +const connected = await terraphim.isAllTermsConnectedByPath({ + text: "async programming with tokio runtime" +}); +console.log(`Terms are connected: ${connected}`); +"# + .trim() + .to_string(), + ], + _ => vec![format!( + r#" +const result = await terraphim.{}(params); +console.log(result); +"#, + to_camel_case(tool_name) + ) + .trim() + .to_string()], + } +} + +/// Convert snake_case to camelCase +pub fn to_camel_case(s: &str) -> String { + let mut result = String::new(); + let mut capitalize_next = false; + + for c in s.chars() { + if c == '_' { + capitalize_next = true; + } else if capitalize_next { + result.push(c.to_uppercase().next().unwrap()); + capitalize_next = false; + } else { + result.push(c); + } + } + + result +} + +/// Convert snake_case to PascalCase +pub fn to_pascal_case(s: &str) -> String { + let camel = to_camel_case(s); + if let Some(first) = camel.chars().next() { + format!("{}{}", first.to_uppercase(), &camel[1..]) + } else { + camel + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_to_camel_case() { + assert_eq!(to_camel_case("search"), "search"); + assert_eq!(to_camel_case("autocomplete_terms"), "autocompleteTerms"); + assert_eq!( + to_camel_case("fuzzy_autocomplete_search"), + "fuzzyAutocompleteSearch" + ); + assert_eq!( + to_camel_case("is_all_terms_connected_by_path"), + "isAllTermsConnectedByPath" + ); + } + + #[test] + fn test_to_pascal_case() { + assert_eq!(to_pascal_case("search"), "Search"); + assert_eq!(to_pascal_case("autocomplete_terms"), "AutocompleteTerms"); + assert_eq!( + to_pascal_case("fuzzy_autocomplete_search"), + "FuzzyAutocompleteSearch" + ); + } + + #[test] + fn test_extract_single_parameter_string() { + let schema = serde_json::json!({ + "type": "string", + "description": "The search query" + }); + + let param = extract_single_parameter("query", &schema, &["query".to_string()]).unwrap(); + + assert_eq!(param.name, "query"); + assert_eq!(param.json_type, "string"); + assert_eq!(param.description, "The search query"); + assert!(param.required); + } + + #[test] + fn test_extract_single_parameter_optional() { + let schema = serde_json::json!({ + "type": "integer", + "description": "Maximum results" + }); + + let param = extract_single_parameter("limit", &schema, &[]).unwrap(); + + assert_eq!(param.name, "limit"); + assert_eq!(param.json_type, "integer"); + assert!(!param.required); + } + + #[test] + fn test_generate_examples() { + let examples = generate_examples("search"); + assert!(!examples.is_empty()); + assert!(examples[0].contains("terraphim.search")); + + let fuzzy_examples = generate_examples("fuzzy_autocomplete_search"); + assert!(fuzzy_examples[0].contains("fuzzyAutocompleteSearch")); + } +} diff --git a/crates/terraphim_mcp_codegen/src/lib.rs b/crates/terraphim_mcp_codegen/src/lib.rs new file mode 100644 index 00000000..f9f91a43 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/lib.rs @@ -0,0 +1,389 @@ +//! MCP Code Generator - Generates TypeScript and Python wrappers for MCP tools +//! +//! This crate enables AI agents to use MCP tools as importable code modules, +//! achieving massive token reduction by allowing code-based tool usage instead +//! of traditional tool calling patterns. + +pub mod introspection; +pub mod python_gen; +pub mod runtime; +pub mod typescript_gen; + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use thiserror::Error; + +/// Error types for MCP code generation +#[derive(Error, Debug)] +pub enum CodegenError { + #[error("Template error: {0}")] + Template(#[from] tera::Error), + + #[error("Serialization error: {0}")] + Serialization(#[from] serde_json::Error), + + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + + #[error("MCP introspection error: {0}")] + Introspection(String), + + #[error("Invalid tool specification: {0}")] + InvalidSpec(String), + + #[error("Unsupported type: {0}")] + UnsupportedType(String), +} + +pub type Result = std::result::Result; + +/// Metadata for a single MCP tool parameter +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ParameterMetadata { + /// Parameter name + pub name: String, + /// Parameter description + pub description: String, + /// JSON Schema type (string, number, integer, boolean, array, object) + pub json_type: String, + /// Whether the parameter is required + pub required: bool, + /// Default value if any + pub default_value: Option, + /// For arrays, the item type + pub array_item_type: Option, + /// For objects, the properties + pub object_properties: Option>, +} + +impl ParameterMetadata { + /// Convert JSON type to TypeScript type + pub fn to_typescript_type(&self) -> String { + match self.json_type.as_str() { + "string" => "string".to_string(), + "number" => "number".to_string(), + "integer" => "number".to_string(), + "boolean" => "boolean".to_string(), + "array" => { + let item_type = self + .array_item_type + .as_deref() + .unwrap_or("any") + .to_string(); + format!("{}[]", item_type) + } + "object" => "Record".to_string(), + "null" => "null".to_string(), + _ => "any".to_string(), + } + } + + /// Convert JSON type to Python type hint + pub fn to_python_type(&self) -> String { + match self.json_type.as_str() { + "string" => "str".to_string(), + "number" => "float".to_string(), + "integer" => "int".to_string(), + "boolean" => "bool".to_string(), + "array" => { + let item_type = self.array_item_type.as_deref().unwrap_or("Any"); + let python_item = match item_type { + "string" => "str", + "number" => "float", + "integer" => "int", + "boolean" => "bool", + _ => "Any", + }; + format!("List[{}]", python_item) + } + "object" => "Dict[str, Any]".to_string(), + "null" => "None".to_string(), + _ => "Any".to_string(), + } + } +} + +/// Metadata for a single MCP tool +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolMetadata { + /// Tool name (e.g., "search", "autocomplete_terms") + pub name: String, + /// Human-readable title + pub title: Option, + /// Tool description + pub description: String, + /// Tool category for discovery + pub category: ToolCategory, + /// Tool capabilities for discovery + pub capabilities: Vec, + /// Input parameters + pub parameters: Vec, + /// Return type description + pub return_type: String, + /// Example usage code + pub examples: Vec, +} + +/// Tool category for progressive discovery +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub enum ToolCategory { + KnowledgeGraph, + Autocomplete, + TextProcessing, + Configuration, + Analysis, + Serialization, + Other(String), +} + +impl std::fmt::Display for ToolCategory { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ToolCategory::KnowledgeGraph => write!(f, "knowledge-graph"), + ToolCategory::Autocomplete => write!(f, "autocomplete"), + ToolCategory::TextProcessing => write!(f, "text-processing"), + ToolCategory::Configuration => write!(f, "configuration"), + ToolCategory::Analysis => write!(f, "analysis"), + ToolCategory::Serialization => write!(f, "serialization"), + ToolCategory::Other(s) => write!(f, "{}", s), + } + } +} + +/// Complete MCP server metadata for code generation +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct McpServerMetadata { + /// Server name + pub name: String, + /// Server version + pub version: String, + /// All available tools + pub tools: Vec, + /// Server description + pub description: Option, +} + +/// Output format for code generation +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OutputFormat { + TypeScript, + Python, +} + +impl std::str::FromStr for OutputFormat { + type Err = CodegenError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "typescript" | "ts" => Ok(OutputFormat::TypeScript), + "python" | "py" => Ok(OutputFormat::Python), + _ => Err(CodegenError::InvalidSpec(format!( + "Unknown output format: {}", + s + ))), + } + } +} + +/// Configuration for code generation +#[derive(Debug, Clone)] +pub struct CodegenConfig { + /// Output format (TypeScript or Python) + pub format: OutputFormat, + /// Output file path + pub output_path: std::path::PathBuf, + /// Module name to use + pub module_name: String, + /// Whether to generate async functions + pub async_functions: bool, + /// Include documentation comments + pub include_docs: bool, + /// Include usage examples + pub include_examples: bool, +} + +impl Default for CodegenConfig { + fn default() -> Self { + Self { + format: OutputFormat::TypeScript, + output_path: std::path::PathBuf::from("terraphim.ts"), + module_name: "terraphim".to_string(), + async_functions: true, + include_docs: true, + include_examples: true, + } + } +} + +/// Main code generator trait +pub trait CodeGenerator { + /// Generate code for all tools in the metadata + fn generate(&self, metadata: &McpServerMetadata, config: &CodegenConfig) -> Result; + + /// Generate code for a single tool + fn generate_tool(&self, tool: &ToolMetadata, config: &CodegenConfig) -> Result; +} + +/// Generate code based on configuration +pub fn generate_code(metadata: &McpServerMetadata, config: &CodegenConfig) -> Result { + match config.format { + OutputFormat::TypeScript => { + let generator = typescript_gen::TypeScriptGenerator::new()?; + generator.generate(metadata, config) + } + OutputFormat::Python => { + let generator = python_gen::PythonGenerator::new()?; + generator.generate(metadata, config) + } + } +} + +/// Categorize tools based on their names and descriptions +pub fn categorize_tool(tool_name: &str) -> ToolCategory { + match tool_name { + "search" | "find_matches" | "is_all_terms_connected_by_path" => { + ToolCategory::KnowledgeGraph + } + name if name.contains("autocomplete") => ToolCategory::Autocomplete, + "replace_matches" | "extract_paragraphs_from_automata" | "json_decode" => { + ToolCategory::TextProcessing + } + "update_config_tool" => ToolCategory::Configuration, + "load_thesaurus" | "load_thesaurus_from_json" | "build_autocomplete_index" => { + ToolCategory::Analysis + } + name if name.contains("serialize") || name.contains("deserialize") => { + ToolCategory::Serialization + } + _ => ToolCategory::Other("uncategorized".to_string()), + } +} + +/// Extract capabilities from tool metadata +pub fn extract_capabilities(tool: &ToolMetadata) -> Vec { + let mut capabilities = Vec::new(); + + // Based on tool name patterns + if tool.name.contains("search") { + capabilities.push("search".to_string()); + } + if tool.name.contains("autocomplete") { + capabilities.push("autocomplete".to_string()); + capabilities.push("suggestions".to_string()); + } + if tool.name.contains("fuzzy") { + capabilities.push("fuzzy-matching".to_string()); + } + if tool.name.contains("find") || tool.name.contains("match") { + capabilities.push("pattern-matching".to_string()); + } + if tool.name.contains("replace") { + capabilities.push("text-transformation".to_string()); + } + if tool.name.contains("extract") { + capabilities.push("text-extraction".to_string()); + } + if tool.name.contains("load") { + capabilities.push("data-loading".to_string()); + } + if tool.name.contains("serialize") || tool.name.contains("deserialize") { + capabilities.push("serialization".to_string()); + } + if tool.name.contains("config") { + capabilities.push("configuration".to_string()); + } + + // Add read/write based on likely side effects + if tool.name.starts_with("update") || tool.name.starts_with("build") { + capabilities.push("write".to_string()); + } else { + capabilities.push("read".to_string()); + } + + capabilities +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_categorize_tool() { + assert_eq!(categorize_tool("search"), ToolCategory::KnowledgeGraph); + assert_eq!( + categorize_tool("autocomplete_terms"), + ToolCategory::Autocomplete + ); + assert_eq!( + categorize_tool("fuzzy_autocomplete_search"), + ToolCategory::Autocomplete + ); + assert_eq!( + categorize_tool("replace_matches"), + ToolCategory::TextProcessing + ); + assert_eq!( + categorize_tool("update_config_tool"), + ToolCategory::Configuration + ); + assert_eq!( + categorize_tool("serialize_autocomplete_index"), + ToolCategory::Serialization + ); + } + + #[test] + fn test_extract_capabilities() { + let tool = ToolMetadata { + name: "fuzzy_autocomplete_search".to_string(), + title: None, + description: "Fuzzy search".to_string(), + category: ToolCategory::Autocomplete, + capabilities: vec![], + parameters: vec![], + return_type: "string[]".to_string(), + examples: vec![], + }; + + let caps = extract_capabilities(&tool); + assert!(caps.contains(&"autocomplete".to_string())); + assert!(caps.contains(&"fuzzy-matching".to_string())); + assert!(caps.contains(&"read".to_string())); + } + + #[test] + fn test_parameter_type_conversion() { + let param = ParameterMetadata { + name: "items".to_string(), + description: "Array of strings".to_string(), + json_type: "array".to_string(), + required: true, + default_value: None, + array_item_type: Some("string".to_string()), + object_properties: None, + }; + + assert_eq!(param.to_typescript_type(), "string[]"); + assert_eq!(param.to_python_type(), "List[str]"); + } + + #[test] + fn test_output_format_parsing() { + assert_eq!( + "typescript".parse::().unwrap(), + OutputFormat::TypeScript + ); + assert_eq!( + "ts".parse::().unwrap(), + OutputFormat::TypeScript + ); + assert_eq!( + "python".parse::().unwrap(), + OutputFormat::Python + ); + assert_eq!( + "py".parse::().unwrap(), + OutputFormat::Python + ); + } +} diff --git a/crates/terraphim_mcp_codegen/src/python_gen.rs b/crates/terraphim_mcp_codegen/src/python_gen.rs new file mode 100644 index 00000000..0cc033f7 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/python_gen.rs @@ -0,0 +1,319 @@ +//! Python Code Generator for MCP Tools + +use crate::{ + introspection::to_camel_case, CodeGenerator, CodegenConfig, McpServerMetadata, Result, + ToolMetadata, +}; +use tera::{Context, Tera}; + +const PYTHON_MODULE_TEMPLATE: &str = r#" +""" +{{ server_name }} MCP Tools +{{ server_description }} + +Generated automatically from MCP server introspection. +Version: {{ server_version }} + +Usage: + from {{ module_name }} import {{ module_name }} + + results = await {{ module_name }}.search(query="rust patterns") +""" + +from typing import Any, Dict, List, Optional +import asyncio + +# Type alias for MCP call results +McpCallResult = Dict[str, Any] + +# MCP Runtime - connects to actual MCP server +async def mcp_call(tool_name: str, params: Dict[str, Any]) -> McpCallResult: + """Call an MCP tool. This should be injected by the runtime.""" + raise NotImplementedError("mcp_call must be injected by the MCP runtime") + +{% for tool in tools %} +{% if include_docs %} +async def {{ tool.snake_name }}( +{% for param in tool.parameters %} + {{ param.name }}: {% if param.required %}{{ param.python_type }}{% else %}Optional[{{ param.python_type }}] = None{% endif %}, +{% endfor %} +) -> McpCallResult: + """ + {{ tool.description }} + + Category: {{ tool.category }} + Capabilities: {{ tool.capabilities | join(sep=", ") }} + + Args: +{% for param in tool.parameters %} + {{ param.name }}: {{ param.description }}{% if not param.required %} (optional){% endif %} +{% endfor %} + + Returns: + McpCallResult: The result from the MCP server +{% if include_examples %} + + Example: +{% for example in tool.examples %} + {{ example | replace(from="\n", to="\n ") }} +{% endfor %} +{% endif %} + """ +{% else %} +async def {{ tool.snake_name }}( +{% for param in tool.parameters %} + {{ param.name }}: {% if param.required %}{{ param.python_type }}{% else %}Optional[{{ param.python_type }}] = None{% endif %}, +{% endfor %} +) -> McpCallResult: +{% endif %} + params = { +{% for param in tool.parameters %} +{% if param.required %} + "{{ param.name }}": {{ param.name }}, +{% else %} + "{{ param.name }}": {{ param.name }}, +{% endif %} +{% endfor %} + } + # Remove None values for optional parameters + params = {k: v for k, v in params.items() if v is not None} + return await mcp_call("{{ tool.name }}", params) + +{% endfor %} + +# Main module class +class {{ module_name_pascal }}: + """{{ server_name }} MCP Tools API""" + +{% for tool in tools %} + {{ tool.snake_name }} = staticmethod({{ tool.snake_name }}) +{% endfor %} + +# Convenience alias +{{ module_name }} = {{ module_name_pascal }} + +__all__ = [ + "{{ module_name }}", + "{{ module_name_pascal }}", +{% for tool in tools %} + "{{ tool.snake_name }}", +{% endfor %} +] +"#; + +/// Python code generator +pub struct PythonGenerator { + tera: Tera, +} + +impl PythonGenerator { + /// Create a new Python generator + pub fn new() -> Result { + let mut tera = Tera::default(); + tera.add_raw_template("python_module", PYTHON_MODULE_TEMPLATE)?; + + Ok(Self { tera }) + } + + /// Convert camelCase to snake_case + fn to_snake_case(s: &str) -> String { + let mut result = String::new(); + for (i, c) in s.chars().enumerate() { + if c.is_uppercase() { + if i > 0 { + result.push('_'); + } + result.push(c.to_lowercase().next().unwrap()); + } else if c == '_' { + result.push('_'); + } else { + result.push(c); + } + } + result + } + + /// Convert to PascalCase for class names + fn to_pascal_case_python(s: &str) -> String { + s.split('_') + .map(|word| { + let mut chars = word.chars(); + match chars.next() { + None => String::new(), + Some(first) => { + first.to_uppercase().collect::() + chars.as_str().to_lowercase().as_str() + } + } + }) + .collect::() + } +} + +impl CodeGenerator for PythonGenerator { + fn generate(&self, metadata: &McpServerMetadata, config: &CodegenConfig) -> Result { + let mut context = Context::new(); + + // Server info + context.insert("server_name", &metadata.name); + context.insert("server_version", &metadata.version); + context.insert( + "server_description", + metadata.description.as_deref().unwrap_or(""), + ); + context.insert("module_name", &config.module_name); + context.insert( + "module_name_pascal", + &Self::to_pascal_case_python(&config.module_name), + ); + context.insert("include_docs", &config.include_docs); + context.insert("include_examples", &config.include_examples); + + // Transform tools for template + let tools: Vec = metadata + .tools + .iter() + .map(|tool| { + let params: Vec = tool + .parameters + .iter() + .map(|p| { + serde_json::json!({ + "name": Self::to_snake_case(&p.name), + "description": p.description, + "python_type": p.to_python_type(), + "required": p.required, + }) + }) + .collect(); + + // Convert examples to Python style + let python_examples: Vec = tool + .examples + .iter() + .map(|ex| { + ex.replace("await terraphim.", "await ") + .replace("const ", "") + .replace("let ", "") + .replace(";", "") + .replace(" =>", ":") + .replace("console.log", "print") + .replace("${", "{") + .replace("}`", "}") + }) + .collect(); + + serde_json::json!({ + "name": tool.name, + "snake_name": Self::to_snake_case(&tool.name), + "description": tool.description, + "category": tool.category.to_string(), + "capabilities": tool.capabilities, + "parameters": params, + "examples": python_examples, + }) + }) + .collect(); + + context.insert("tools", &tools); + + let rendered = self.tera.render("python_module", &context)?; + + // Clean up extra whitespace + let cleaned = rendered + .lines() + .map(|line| line.trim_end()) + .collect::>() + .join("\n"); + + Ok(cleaned.trim().to_string()) + } + + fn generate_tool(&self, tool: &ToolMetadata, config: &CodegenConfig) -> Result { + // Create single-tool metadata + let metadata = McpServerMetadata { + name: "terraphim".to_string(), + version: "1.0.0".to_string(), + tools: vec![tool.clone()], + description: None, + }; + + self.generate(&metadata, config) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ParameterMetadata, ToolCategory}; + + #[test] + fn test_to_snake_case() { + assert_eq!( + PythonGenerator::to_snake_case("autocomplete_terms"), + "autocomplete_terms" + ); + assert_eq!( + PythonGenerator::to_snake_case("fuzzy_autocomplete_search"), + "fuzzy_autocomplete_search" + ); + assert_eq!(PythonGenerator::to_snake_case("search"), "search"); + } + + #[test] + fn test_to_pascal_case_python() { + assert_eq!( + PythonGenerator::to_pascal_case_python("terraphim"), + "Terraphim" + ); + assert_eq!( + PythonGenerator::to_pascal_case_python("mcp_server"), + "McpServer" + ); + } + + #[test] + fn test_generate_simple_tool() { + let generator = PythonGenerator::new().unwrap(); + + let tool = ToolMetadata { + name: "search".to_string(), + title: Some("Search".to_string()), + description: "Search for documents".to_string(), + category: ToolCategory::KnowledgeGraph, + capabilities: vec!["search".to_string()], + parameters: vec![ + ParameterMetadata { + name: "query".to_string(), + description: "The search query".to_string(), + json_type: "string".to_string(), + required: true, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ParameterMetadata { + name: "limit".to_string(), + description: "Max results".to_string(), + json_type: "integer".to_string(), + required: false, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ], + return_type: "McpCallResult".to_string(), + examples: vec!["results = await search(query=\"test\")".to_string()], + }; + + let config = CodegenConfig { + module_name: "terraphim".to_string(), + ..Default::default() + }; + let code = generator.generate_tool(&tool, &config).unwrap(); + + assert!(code.contains("async def search(")); + assert!(code.contains("query: str,")); + assert!(code.contains("limit: Optional[int] = None,")); + assert!(code.contains("mcp_call(\"search\"")); + } +} diff --git a/crates/terraphim_mcp_codegen/src/runtime.rs b/crates/terraphim_mcp_codegen/src/runtime.rs new file mode 100644 index 00000000..90fa0fe8 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/runtime.rs @@ -0,0 +1,306 @@ +//! MCP Runtime Bridge - Enables code execution environment to call MCP tools +//! +//! This module provides the runtime infrastructure that allows code generated +//! by the TypeScript/Python generators to actually call MCP tools. + +use std::sync::Arc; + +use crate::{CodegenError, Result}; + +/// Configuration for the MCP runtime +#[derive(Debug, Clone)] +pub struct RuntimeConfig { + /// MCP server URL for HTTP transport + pub mcp_server_url: Option, + /// Whether to use stdio transport + pub use_stdio: bool, + /// Timeout for MCP calls in milliseconds + pub timeout_ms: u64, + /// Maximum concurrent calls + pub max_concurrent: usize, +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self { + mcp_server_url: None, + use_stdio: true, + timeout_ms: 30000, + max_concurrent: 10, + } + } +} + +/// MCP Runtime that bridges code execution to MCP servers +pub struct McpRuntime { + config: RuntimeConfig, +} + +impl McpRuntime { + /// Create a new MCP runtime + pub fn new(config: RuntimeConfig) -> Self { + Self { config } + } + + /// Generate JavaScript runtime code that injects the mcpCall function + pub fn generate_javascript_runtime(&self) -> String { + let server_url = self + .config + .mcp_server_url + .as_deref() + .unwrap_or("http://localhost:3001"); + + format!( + r#" +// MCP Runtime Bridge for JavaScript/TypeScript +// This provides the mcpCall function that generated code uses + +const MCP_SERVER_URL = "{}"; +const MCP_TIMEOUT_MS = {}; + +async function mcpCall(toolName, params) {{ + const response = await fetch(`${{MCP_SERVER_URL}}/mcp/tools/call`, {{ + method: 'POST', + headers: {{ + 'Content-Type': 'application/json', + }}, + body: JSON.stringify({{ + jsonrpc: '2.0', + id: Date.now(), + method: 'tools/call', + params: {{ + name: toolName, + arguments: params + }} + }}), + signal: AbortSignal.timeout(MCP_TIMEOUT_MS) + }}); + + if (!response.ok) {{ + throw new Error(`MCP call failed: ${{response.statusText}}`); + }} + + const result = await response.json(); + + if (result.error) {{ + throw new Error(`MCP tool error: ${{result.error.message}}`); + }} + + return result.result; +}} + +// Make mcpCall available globally +globalThis.mcpCall = mcpCall; +"#, + server_url, self.config.timeout_ms + ) + } + + /// Generate Python runtime code that injects the mcp_call function + pub fn generate_python_runtime(&self) -> String { + let server_url = self + .config + .mcp_server_url + .as_deref() + .unwrap_or("http://localhost:3001"); + + format!( + r#" +# MCP Runtime Bridge for Python +# This provides the mcp_call function that generated code uses + +import aiohttp +import json +from typing import Any, Dict + +MCP_SERVER_URL = "{}" +MCP_TIMEOUT_MS = {} + +async def mcp_call(tool_name: str, params: Dict[str, Any]) -> Dict[str, Any]: + """ + Call an MCP tool through the MCP server. + + Args: + tool_name: Name of the tool to call + params: Parameters to pass to the tool + + Returns: + The result from the MCP server + + Raises: + Exception: If the MCP call fails + """ + async with aiohttp.ClientSession() as session: + payload = {{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": {{ + "name": tool_name, + "arguments": params + }} + }} + + timeout = aiohttp.ClientTimeout(total=MCP_TIMEOUT_MS / 1000) + + async with session.post( + f"{{MCP_SERVER_URL}}/mcp/tools/call", + json=payload, + timeout=timeout + ) as response: + if not response.ok: + raise Exception(f"MCP call failed: {{response.status}}") + + result = await response.json() + + if "error" in result: + raise Exception(f"MCP tool error: {{result['error']['message']}}") + + return result.get("result", {{}}) + +# Inject into module namespace +import sys +current_module = sys.modules[__name__] +current_module.mcp_call = mcp_call +"#, + server_url, self.config.timeout_ms + ) + } + + /// Write JavaScript runtime to a file + pub fn write_javascript_runtime(&self, path: &std::path::Path) -> Result<()> { + let runtime_code = self.generate_javascript_runtime(); + std::fs::write(path, runtime_code)?; + Ok(()) + } + + /// Write Python runtime to a file + pub fn write_python_runtime(&self, path: &std::path::Path) -> Result<()> { + let runtime_code = self.generate_python_runtime(); + std::fs::write(path, runtime_code)?; + Ok(()) + } + + /// Setup runtime in a VM environment + pub async fn setup_vm_environment(&self, workspace_path: &std::path::Path) -> Result<()> { + // Create workspace directories + std::fs::create_dir_all(workspace_path.join("mcp-runtime"))?; + + // Write JavaScript runtime + self.write_javascript_runtime(&workspace_path.join("mcp-runtime/runtime.js"))?; + + // Write Python runtime + self.write_python_runtime(&workspace_path.join("mcp-runtime/runtime.py"))?; + + // Write package.json for Node.js + let package_json = serde_json::json!({ + "name": "mcp-runtime", + "version": "1.0.0", + "type": "module", + "main": "runtime.js" + }); + std::fs::write( + workspace_path.join("mcp-runtime/package.json"), + serde_json::to_string_pretty(&package_json)?, + )?; + + Ok(()) + } +} + +/// Builder for creating complete code execution packages +pub struct CodeExecutionPackage { + /// Generated wrapper code (TypeScript or Python) + pub wrapper_code: String, + /// Runtime bridge code + pub runtime_code: String, + /// Configuration + pub config: RuntimeConfig, +} + +impl CodeExecutionPackage { + /// Create a new code execution package for TypeScript + pub fn typescript(wrapper_code: String, config: RuntimeConfig) -> Self { + let runtime = McpRuntime::new(config.clone()); + Self { + wrapper_code, + runtime_code: runtime.generate_javascript_runtime(), + config, + } + } + + /// Create a new code execution package for Python + pub fn python(wrapper_code: String, config: RuntimeConfig) -> Self { + let runtime = McpRuntime::new(config.clone()); + Self { + wrapper_code, + runtime_code: runtime.generate_python_runtime(), + config, + } + } + + /// Write the complete package to a directory + pub fn write_to_directory(&self, dir: &std::path::Path) -> Result<()> { + std::fs::create_dir_all(dir)?; + + // Determine file extensions based on content + let (wrapper_name, runtime_name) = if self.wrapper_code.contains("export async function") { + ("terraphim.ts", "runtime.js") + } else { + ("terraphim.py", "runtime.py") + }; + + std::fs::write(dir.join(wrapper_name), &self.wrapper_code)?; + std::fs::write(dir.join(runtime_name), &self.runtime_code)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_generate_javascript_runtime() { + let config = RuntimeConfig { + mcp_server_url: Some("http://localhost:3001".to_string()), + timeout_ms: 30000, + ..Default::default() + }; + + let runtime = McpRuntime::new(config); + let code = runtime.generate_javascript_runtime(); + + assert!(code.contains("http://localhost:3001")); + assert!(code.contains("async function mcpCall")); + assert!(code.contains("globalThis.mcpCall")); + } + + #[test] + fn test_generate_python_runtime() { + let config = RuntimeConfig { + mcp_server_url: Some("http://localhost:3001".to_string()), + timeout_ms: 30000, + ..Default::default() + }; + + let runtime = McpRuntime::new(config); + let code = runtime.generate_python_runtime(); + + assert!(code.contains("http://localhost:3001")); + assert!(code.contains("async def mcp_call")); + assert!(code.contains("aiohttp")); + } + + #[test] + fn test_code_execution_package() { + let wrapper = "export async function search() {}".to_string(); + let config = RuntimeConfig::default(); + + let package = CodeExecutionPackage::typescript(wrapper, config); + + assert!(package.wrapper_code.contains("export async")); + assert!(package.runtime_code.contains("mcpCall")); + } +} diff --git a/crates/terraphim_mcp_codegen/src/typescript_gen.rs b/crates/terraphim_mcp_codegen/src/typescript_gen.rs new file mode 100644 index 00000000..13a840c9 --- /dev/null +++ b/crates/terraphim_mcp_codegen/src/typescript_gen.rs @@ -0,0 +1,258 @@ +//! TypeScript Code Generator for MCP Tools + +use crate::{ + introspection::{to_camel_case, to_pascal_case}, + CodeGenerator, CodegenConfig, McpServerMetadata, Result, ToolMetadata, +}; +use tera::{Context, Tera}; + +const TYPESCRIPT_MODULE_TEMPLATE: &str = r#" +/** + * {{ server_name }} MCP Tools + * {{ server_description }} + * + * Generated automatically from MCP server introspection. + * Version: {{ server_version }} + * + * Usage: + * ```typescript + * import { {{ module_name }} } from './{{ module_name }}'; + * + * const results = await {{ module_name }}.search({ query: "rust patterns" }); + * ``` + */ + +// Runtime type for MCP call results +interface McpCallResult { + content: Array<{ type: string; text?: string; resource?: any }>; + isError?: boolean; +} + +// MCP Runtime - connects to actual MCP server +declare const mcpCall: (toolName: string, params: Record) => Promise; + +{% for tool in tools %} +{% if include_docs %} +/** + * {{ tool.description }} + * + * Category: {{ tool.category }} + * Capabilities: {{ tool.capabilities | join(sep=", ") }} +{% for param in tool.parameters %} + * @param {{ param.name }} - {{ param.description }}{% if not param.required %} (optional){% endif %} +{% endfor %} + * @returns Promise +{% if include_examples %} + * + * @example + * ```typescript +{% for example in tool.examples %} + * {{ example | replace(from="\n", to="\n * ") }} +{% endfor %} + * ``` +{% endif %} + */ +{% endif %} +export interface {{ tool.pascal_name }}Params { +{% for param in tool.parameters %} + {{ param.name }}{% if not param.required %}?{% endif %}: {{ param.typescript_type }}; +{% endfor %} +} + +export async function {{ tool.camel_name }}( + params: {{ tool.pascal_name }}Params +): Promise { + return await mcpCall('{{ tool.name }}', params); +} + +{% endfor %} + +// Main module export +export const {{ module_name }} = { +{% for tool in tools %} + {{ tool.camel_name }}, +{% endfor %} +}; + +// Default export +export default {{ module_name }}; +"#; + +/// TypeScript code generator +pub struct TypeScriptGenerator { + tera: Tera, +} + +impl TypeScriptGenerator { + /// Create a new TypeScript generator + pub fn new() -> Result { + let mut tera = Tera::default(); + tera.add_raw_template("typescript_module", TYPESCRIPT_MODULE_TEMPLATE)?; + + Ok(Self { tera }) + } +} + +impl CodeGenerator for TypeScriptGenerator { + fn generate(&self, metadata: &McpServerMetadata, config: &CodegenConfig) -> Result { + let mut context = Context::new(); + + // Server info + context.insert("server_name", &metadata.name); + context.insert("server_version", &metadata.version); + context.insert( + "server_description", + metadata.description.as_deref().unwrap_or(""), + ); + context.insert("module_name", &config.module_name); + context.insert("include_docs", &config.include_docs); + context.insert("include_examples", &config.include_examples); + + // Transform tools for template + let tools: Vec = metadata + .tools + .iter() + .map(|tool| { + let params: Vec = tool + .parameters + .iter() + .map(|p| { + serde_json::json!({ + "name": p.name, + "description": p.description, + "typescript_type": p.to_typescript_type(), + "required": p.required, + }) + }) + .collect(); + + serde_json::json!({ + "name": tool.name, + "camel_name": to_camel_case(&tool.name), + "pascal_name": to_pascal_case(&tool.name), + "description": tool.description, + "category": tool.category.to_string(), + "capabilities": tool.capabilities, + "parameters": params, + "examples": tool.examples, + }) + }) + .collect(); + + context.insert("tools", &tools); + + let rendered = self.tera.render("typescript_module", &context)?; + + // Clean up extra whitespace + let cleaned = rendered + .lines() + .filter(|line| !line.trim().is_empty() || line.is_empty()) + .collect::>() + .join("\n"); + + Ok(cleaned.trim().to_string()) + } + + fn generate_tool(&self, tool: &ToolMetadata, config: &CodegenConfig) -> Result { + // Create single-tool metadata + let metadata = McpServerMetadata { + name: "terraphim".to_string(), + version: "1.0.0".to_string(), + tools: vec![tool.clone()], + description: None, + }; + + self.generate(&metadata, config) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ParameterMetadata, ToolCategory}; + + #[test] + fn test_generate_simple_tool() { + let generator = TypeScriptGenerator::new().unwrap(); + + let tool = ToolMetadata { + name: "search".to_string(), + title: Some("Search".to_string()), + description: "Search for documents".to_string(), + category: ToolCategory::KnowledgeGraph, + capabilities: vec!["search".to_string()], + parameters: vec![ + ParameterMetadata { + name: "query".to_string(), + description: "The search query".to_string(), + json_type: "string".to_string(), + required: true, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ParameterMetadata { + name: "limit".to_string(), + description: "Max results".to_string(), + json_type: "integer".to_string(), + required: false, + default_value: None, + array_item_type: None, + object_properties: None, + }, + ], + return_type: "Promise".to_string(), + examples: vec!["const r = await terraphim.search({...})".to_string()], + }; + + let config = CodegenConfig::default(); + let code = generator.generate_tool(&tool, &config).unwrap(); + + assert!(code.contains("export interface SearchParams")); + assert!(code.contains("query: string;")); + assert!(code.contains("limit?: number;")); + assert!(code.contains("export async function search")); + assert!(code.contains("mcpCall('search'")); + } + + #[test] + fn test_generate_multiple_tools() { + let generator = TypeScriptGenerator::new().unwrap(); + + let metadata = McpServerMetadata { + name: "terraphim-mcp".to_string(), + version: "0.1.0".to_string(), + tools: vec![ + ToolMetadata { + name: "search".to_string(), + title: None, + description: "Search documents".to_string(), + category: ToolCategory::KnowledgeGraph, + capabilities: vec!["search".to_string()], + parameters: vec![], + return_type: "Promise".to_string(), + examples: vec![], + }, + ToolMetadata { + name: "autocomplete_terms".to_string(), + title: None, + description: "Get suggestions".to_string(), + category: ToolCategory::Autocomplete, + capabilities: vec!["autocomplete".to_string()], + parameters: vec![], + return_type: "Promise".to_string(), + examples: vec![], + }, + ], + description: Some("Terraphim MCP Server".to_string()), + }; + + let config = CodegenConfig::default(); + let code = generator.generate(&metadata, &config).unwrap(); + + assert!(code.contains("export async function search")); + assert!(code.contains("export async function autocompleteTerms")); + assert!(code.contains("search,")); + assert!(code.contains("autocompleteTerms,")); + } +} diff --git a/crates/terraphim_multi_agent/src/lib.rs b/crates/terraphim_multi_agent/src/lib.rs index 8ddacccb..827c4ff5 100644 --- a/crates/terraphim_multi_agent/src/lib.rs +++ b/crates/terraphim_multi_agent/src/lib.rs @@ -35,6 +35,7 @@ pub mod genai_llm_client; pub mod history; pub mod llm_types; pub mod prompt_sanitizer; +pub mod prompts; pub mod vm_execution; // pub mod llm_client; // Disabled - uses rig-core // pub mod simple_llm_client; // Disabled - uses rig-core diff --git a/crates/terraphim_multi_agent/src/prompts/code_execution.rs b/crates/terraphim_multi_agent/src/prompts/code_execution.rs new file mode 100644 index 00000000..8de44ed1 --- /dev/null +++ b/crates/terraphim_multi_agent/src/prompts/code_execution.rs @@ -0,0 +1,474 @@ +//! Code-first prompt templates for MCP tool usage +//! +//! These prompts optimize agents to generate code that imports and uses MCP tools +//! programmatically, achieving massive token reduction compared to traditional tool calling. + +use std::collections::HashMap; + +/// System prompt for code-first agents using TypeScript +pub const TYPESCRIPT_SYSTEM_PROMPT: &str = r#" +You are an AI assistant that solves problems by writing executable code. + +## Core Approach: Code Over Tool Calls + +Instead of making individual tool calls, you write code that imports and uses MCP tools as modules. +This approach: +- Reduces token usage by 98% (processing 150K tokens down to 2K) +- Processes data efficiently within the code execution environment +- Returns only the final results, not intermediate data + +## Available MCP Tools + +Import tools from the 'mcp-servers' module: + +```typescript +import { terraphim } from 'mcp-servers'; +``` + +### Knowledge Graph Tools +- `terraphim.search({ query, role?, limit?, skip? })` - Search documents in knowledge graph +- `terraphim.findMatches({ text, role?, returnPositions? })` - Find term matches using Aho-Corasick +- `terraphim.isAllTermsConnectedByPath({ text, role? })` - Check if matched terms connect via single path + +### Autocomplete Tools +- `terraphim.autocompleteTerms({ query, limit?, role? })` - Get term suggestions +- `terraphim.autocompleteWithSnippets({ query, limit?, role? })` - Get suggestions with snippets +- `terraphim.fuzzyAutocompleteSearch({ query, similarity?, limit? })` - Fuzzy search with Jaro-Winkler +- `terraphim.fuzzyAutocompleteSearchLevenshtein({ query, maxEditDistance?, limit? })` - Fuzzy with Levenshtein +- `terraphim.buildAutocompleteIndex({ role? })` - Build FST index for role + +### Text Processing Tools +- `terraphim.replaceMatches({ text, role?, linkType })` - Replace matches with links +- `terraphim.extractParagraphsFromAutomata({ text, role?, includeTerm? })` - Extract paragraphs with matches +- `terraphim.jsonDecode({ jsonlines })` - Parse Logseq JSON + +### Configuration & Data Tools +- `terraphim.updateConfigTool({ configStr })` - Update configuration +- `terraphim.loadThesaurus({ automataPath })` - Load thesaurus from file/URL +- `terraphim.loadThesaurusFromJson({ jsonStr })` - Load thesaurus from JSON +- `terraphim.serializeAutocompleteIndex()` - Serialize index to base64 +- `terraphim.deserializeAutocompleteIndex({ base64Data })` - Deserialize index + +## Code Writing Guidelines + +1. **Import only what you need** - Don't load unnecessary tools +2. **Process data in-environment** - Filter, transform, aggregate before returning +3. **Return minimal results** - Only the final answer, not intermediate data +4. **Use async/await** - All tool calls are asynchronous +5. **Handle errors gracefully** - Use try/catch for robustness +6. **Add comments** - Explain your logic for clarity + +## Example: Document Analysis + +User: "Find documents about async Rust patterns and summarize the top results" + +```typescript +import { terraphim } from 'mcp-servers'; + +async function analyzeAsyncRustPatterns() { + // Search for relevant documents + const results = await terraphim.search({ + query: "async rust patterns", + limit: 100 + }); + + // Filter high-quality results (processing in-environment, not through context) + const highQuality = results.filter(doc => doc.rank > 0.7); + + // Group by topic + const byTopic = highQuality.reduce((groups, doc) => { + const topic = extractTopic(doc); // Helper function + if (!groups[topic]) groups[topic] = []; + groups[topic].push(doc); + return groups; + }, {}); + + // Return only the summary, not all the documents + return { + total_found: highQuality.length, + topics: Object.keys(byTopic), + top_documents: highQuality.slice(0, 5).map(d => ({ + title: d.title, + url: d.url, + rank: d.rank + })), + by_topic: Object.entries(byTopic).map(([topic, docs]) => ({ + topic, + count: docs.length, + best_doc: docs[0] + })) + }; +} + +function extractTopic(doc) { + // Simple topic extraction from document + const keywords = doc.tags || []; + return keywords[0] || 'general'; +} + +// Execute and return results +const analysis = await analyzeAsyncRustPatterns(); +console.log(JSON.stringify(analysis, null, 2)); +``` + +## Example: Term Connectivity Analysis + +```typescript +import { terraphim } from 'mcp-servers'; + +async function analyzeConnectivity(text: string) { + // Check if terms are connected in knowledge graph + const connected = await terraphim.isAllTermsConnectedByPath({ text }); + + // Get all matches + const matches = await terraphim.findMatches({ + text, + returnPositions: true + }); + + // Extract relevant paragraphs + const paragraphs = await terraphim.extractParagraphsFromAutomata({ + text, + includeTerm: true + }); + + return { + text_length: text.length, + terms_connected: connected, + match_count: matches.length, + key_paragraphs: paragraphs.length, + connectivity_score: connected ? 1.0 : matches.length > 0 ? 0.5 : 0.0 + }; +} +``` + +## Anti-Patterns to Avoid + +❌ **DON'T** pass large datasets through the result: +```typescript +// BAD - Returns all 1000 documents through context +const docs = await terraphim.search({ limit: 1000 }); +return docs; // Expensive! +``` + +✅ **DO** process and summarize: +```typescript +// GOOD - Returns only summary +const docs = await terraphim.search({ limit: 1000 }); +return { + count: docs.length, + top_5: docs.slice(0, 5) +}; +``` + +❌ **DON'T** make sequential calls when you can batch: +```typescript +// BAD - Multiple calls +const a = await terraphim.search({ query: "topic A" }); +const b = await terraphim.search({ query: "topic B" }); +const c = await terraphim.search({ query: "topic C" }); +``` + +✅ **DO** use concurrent calls: +```typescript +// GOOD - Parallel execution +const [a, b, c] = await Promise.all([ + terraphim.search({ query: "topic A" }), + terraphim.search({ query: "topic B" }), + terraphim.search({ query: "topic C" }) +]); +``` + +When you receive a task, write executable code that solves it efficiently using the MCP tools. +Focus on returning minimal, actionable results. +"#; + +/// System prompt for code-first agents using Python +pub const PYTHON_SYSTEM_PROMPT: &str = r#" +You are an AI assistant that solves problems by writing executable Python code. + +## Core Approach: Code Over Tool Calls + +Instead of making individual tool calls, you write code that imports and uses MCP tools as modules. +This approach: +- Reduces token usage by 98% (processing 150K tokens down to 2K) +- Processes data efficiently within the code execution environment +- Returns only the final results, not intermediate data + +## Available MCP Tools + +Import tools from the terraphim module: + +```python +from terraphim import terraphim +``` + +### Knowledge Graph Tools +- `await terraphim.search(query, role=None, limit=None, skip=None)` - Search documents +- `await terraphim.find_matches(text, role=None, return_positions=None)` - Find term matches +- `await terraphim.is_all_terms_connected_by_path(text, role=None)` - Check connectivity + +### Autocomplete Tools +- `await terraphim.autocomplete_terms(query, limit=None, role=None)` - Get suggestions +- `await terraphim.autocomplete_with_snippets(query, limit=None, role=None)` - With snippets +- `await terraphim.fuzzy_autocomplete_search(query, similarity=None, limit=None)` - Fuzzy search + +### Text Processing Tools +- `await terraphim.replace_matches(text, role=None, link_type=...)` - Replace with links +- `await terraphim.extract_paragraphs_from_automata(text, role=None)` - Extract paragraphs +- `await terraphim.json_decode(jsonlines)` - Parse Logseq JSON + +## Code Writing Guidelines + +1. **Import only what you need** - Don't load unnecessary modules +2. **Process data in-environment** - Filter, transform, aggregate before returning +3. **Return minimal results** - Only the final answer, not intermediate data +4. **Use async/await** - All tool calls are asynchronous +5. **Handle errors gracefully** - Use try/except for robustness +6. **Add comments** - Explain your logic for clarity + +## Example: Document Analysis + +```python +from terraphim import terraphim +import asyncio +from collections import defaultdict + +async def analyze_async_rust_patterns(): + # Search for relevant documents + results = await terraphim.search( + query="async rust patterns", + limit=100 + ) + + # Filter high-quality results (processing in-environment) + high_quality = [doc for doc in results if doc.get('rank', 0) > 0.7] + + # Group by topic + by_topic = defaultdict(list) + for doc in high_quality: + topic = doc.get('tags', ['general'])[0] if doc.get('tags') else 'general' + by_topic[topic].append(doc) + + # Return only the summary + return { + 'total_found': len(high_quality), + 'topics': list(by_topic.keys()), + 'top_documents': [ + {'title': d.get('title'), 'url': d.get('url'), 'rank': d.get('rank')} + for d in high_quality[:5] + ], + 'by_topic': [ + {'topic': topic, 'count': len(docs), 'best_doc': docs[0]} + for topic, docs in by_topic.items() + ] + } + +# Execute +result = asyncio.run(analyze_async_rust_patterns()) +print(result) +``` + +When you receive a task, write executable Python code that solves it efficiently using the MCP tools. +Focus on returning minimal, actionable results. +"#; + +/// Generate a task-specific prompt that includes code execution context +pub fn generate_task_prompt(task: &str, language: &str) -> String { + let system_prompt = match language { + "python" | "py" => PYTHON_SYSTEM_PROMPT, + _ => TYPESCRIPT_SYSTEM_PROMPT, + }; + + format!( + "{}\n\n## Current Task\n\n{}\n\nWrite code to solve this task. \ + Return only the final results needed to answer the question.", + system_prompt, task + ) +} + +/// Wrapper for code execution context +pub struct CodeExecutionPrompt { + pub system_prompt: String, + pub language: String, + pub available_tools: Vec, +} + +impl CodeExecutionPrompt { + /// Create a new TypeScript code execution prompt + pub fn typescript() -> Self { + Self { + system_prompt: TYPESCRIPT_SYSTEM_PROMPT.to_string(), + language: "typescript".to_string(), + available_tools: vec![ + "search".to_string(), + "autocomplete_terms".to_string(), + "autocomplete_with_snippets".to_string(), + "fuzzy_autocomplete_search".to_string(), + "find_matches".to_string(), + "replace_matches".to_string(), + "extract_paragraphs_from_automata".to_string(), + "is_all_terms_connected_by_path".to_string(), + "load_thesaurus".to_string(), + "build_autocomplete_index".to_string(), + ], + } + } + + /// Create a new Python code execution prompt + pub fn python() -> Self { + Self { + system_prompt: PYTHON_SYSTEM_PROMPT.to_string(), + language: "python".to_string(), + available_tools: vec![ + "search".to_string(), + "autocomplete_terms".to_string(), + "autocomplete_with_snippets".to_string(), + "fuzzy_autocomplete_search".to_string(), + "find_matches".to_string(), + "replace_matches".to_string(), + "extract_paragraphs_from_automata".to_string(), + "is_all_terms_connected_by_path".to_string(), + "load_thesaurus".to_string(), + "build_autocomplete_index".to_string(), + ], + } + } + + /// Generate a complete prompt for a specific task + pub fn for_task(&self, task: &str) -> String { + generate_task_prompt(task, &self.language) + } +} + +/// Code execution mode for agents +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum CodeExecutionMode { + /// Traditional tool calling (no code execution) + Traditional, + /// Code-first approach with TypeScript + TypeScript, + /// Code-first approach with Python + Python, + /// Automatic selection based on task + Auto, +} + +impl Default for CodeExecutionMode { + fn default() -> Self { + CodeExecutionMode::Auto + } +} + +/// Analyze a task to determine the best code execution mode +pub fn recommend_execution_mode(task: &str) -> CodeExecutionMode { + let task_lower = task.to_lowercase(); + + // Tasks that benefit from code execution + let code_patterns = [ + "analyze", + "summarize", + "filter", + "group", + "aggregate", + "process", + "transform", + "compare", + "calculate", + "statistics", + "multiple documents", + "batch", + "all documents", + ]; + + // Tasks better suited for traditional approach + let traditional_patterns = [ + "single", + "one document", + "quick lookup", + "simple search", + "what is", + "define", + ]; + + let code_score: i32 = code_patterns + .iter() + .filter(|p| task_lower.contains(*p)) + .count() as i32; + + let traditional_score: i32 = traditional_patterns + .iter() + .filter(|p| task_lower.contains(*p)) + .count() as i32; + + if code_score > traditional_score { + // Prefer TypeScript for most tasks as it's more widely supported + CodeExecutionMode::TypeScript + } else if traditional_score > code_score { + CodeExecutionMode::Traditional + } else { + // Default to code execution for efficiency + CodeExecutionMode::TypeScript + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_typescript_prompt_contains_tools() { + assert!(TYPESCRIPT_SYSTEM_PROMPT.contains("terraphim.search")); + assert!(TYPESCRIPT_SYSTEM_PROMPT.contains("terraphim.findMatches")); + assert!(TYPESCRIPT_SYSTEM_PROMPT.contains("import { terraphim }")); + } + + #[test] + fn test_python_prompt_contains_tools() { + assert!(PYTHON_SYSTEM_PROMPT.contains("terraphim.search")); + assert!(PYTHON_SYSTEM_PROMPT.contains("terraphim.find_matches")); + assert!(PYTHON_SYSTEM_PROMPT.contains("from terraphim import")); + } + + #[test] + fn test_generate_task_prompt() { + let task = "Find documents about Rust async patterns"; + let prompt = generate_task_prompt(task, "typescript"); + + assert!(prompt.contains("Current Task")); + assert!(prompt.contains("Rust async patterns")); + assert!(prompt.contains("terraphim.search")); + } + + #[test] + fn test_recommend_execution_mode() { + let analysis_task = "Analyze all documents about Rust and summarize the key patterns"; + assert_eq!( + recommend_execution_mode(analysis_task), + CodeExecutionMode::TypeScript + ); + + let simple_task = "What is the definition of async?"; + assert_eq!( + recommend_execution_mode(simple_task), + CodeExecutionMode::Traditional + ); + + let batch_task = "Process multiple documents and aggregate results"; + assert_eq!( + recommend_execution_mode(batch_task), + CodeExecutionMode::TypeScript + ); + } + + #[test] + fn test_code_execution_prompt_builder() { + let ts_prompt = CodeExecutionPrompt::typescript(); + assert_eq!(ts_prompt.language, "typescript"); + assert!(ts_prompt.available_tools.contains(&"search".to_string())); + + let py_prompt = CodeExecutionPrompt::python(); + assert_eq!(py_prompt.language, "python"); + assert!(py_prompt.available_tools.contains(&"search".to_string())); + } +} diff --git a/crates/terraphim_multi_agent/src/prompts/mod.rs b/crates/terraphim_multi_agent/src/prompts/mod.rs new file mode 100644 index 00000000..da20b5f4 --- /dev/null +++ b/crates/terraphim_multi_agent/src/prompts/mod.rs @@ -0,0 +1,6 @@ +//! Prompt templates for AI agents +//! +//! This module provides optimized prompts for different agent behaviors, +//! including code-first approaches for MCP tool usage. + +pub mod code_execution;