From ec6ee34a7ebe040e164b05a71ec0b7a0efe10973 Mon Sep 17 00:00:00 2001 From: John Lussier Date: Thu, 28 May 2026 14:49:18 -0700 Subject: [PATCH] feat(cursor): add cursor-sdk backend with typed turn events Extract agent/cursor/ package so CLI and SDK transports share CursorTurnAccumulator, lazy-install cursor-sdk via provider.cursor_sdk, and map CursorAgentError in the retry classifier. Keeps cursor_agent_client.py as a thin compatibility facade. Co-authored-by: Cursor --- agent/cursor/__init__.py | 44 + agent/cursor/accumulator.py | 235 +++ agent/cursor/backend.py | 82 + agent/cursor/cli_backend.py | 208 +++ agent/cursor/client.py | 454 +++++ agent/cursor/constants.py | 37 + agent/cursor/env.py | 89 + agent/cursor/events.py | 341 ++++ agent/cursor/prompt.py | 151 ++ agent/cursor/sdk_backend.py | 154 ++ agent/cursor/streaming.py | 159 ++ agent/cursor/tool_events.py | 163 ++ agent/cursor_agent_client.py | 1772 +------------------- agent/error_classifier.py | 76 + docs/cursor_architecture.md | 62 +- hermes_cli/main.py | 34 + hermes_cli/status.py | 31 + plugins/model-providers/cursor/__init__.py | 15 +- pyproject.toml | 1 + tests/agent/test_cursor_agent_client.py | 17 +- tests/agent/test_cursor_events.py | 84 + tests/agent/test_cursor_sdk_backend.py | 160 ++ tools/lazy_deps.py | 3 + 23 files changed, 2599 insertions(+), 1773 deletions(-) create mode 100644 agent/cursor/__init__.py create mode 100644 agent/cursor/accumulator.py create mode 100644 agent/cursor/backend.py create mode 100644 agent/cursor/cli_backend.py create mode 100644 agent/cursor/client.py create mode 100644 agent/cursor/constants.py create mode 100644 agent/cursor/env.py create mode 100644 agent/cursor/events.py create mode 100644 agent/cursor/prompt.py create mode 100644 agent/cursor/sdk_backend.py create mode 100644 agent/cursor/streaming.py create mode 100644 agent/cursor/tool_events.py create mode 100644 tests/agent/test_cursor_events.py create mode 100644 tests/agent/test_cursor_sdk_backend.py diff --git a/agent/cursor/__init__.py b/agent/cursor/__init__.py new file mode 100644 index 000000000000..cdcf327f2c77 --- /dev/null +++ b/agent/cursor/__init__.py @@ -0,0 +1,44 @@ +"""Cursor provider runtime package (CLI + SDK backends).""" + +from agent.cursor.accumulator import CursorTurnAccumulator, _StreamJsonAccumulator +from agent.cursor.backend import ( + cursor_sdk_installed, + ensure_cursor_sdk, + resolve_cursor_backend, +) +from agent.cursor.client import CursorAgentClient +from agent.cursor.constants import ( + CURSOR_MARKER_BASE_URL, + DEFAULT_CURSOR_COMMAND, + DEFAULT_CURSOR_MODEL, + DEFAULT_CURSOR_MODE, +) +from agent.cursor.events import ( + run_stream_event_to_events, + sdk_message_to_events, + stream_json_dict_to_events, +) +from agent.cursor.prompt import format_messages_as_prompt +from agent.cursor.sdk_backend import SdkSession, run_prompt_via_sdk +from agent.cursor.tool_events import CursorToolEvent, _CursorToolEvent + +__all__ = [ + "CURSOR_MARKER_BASE_URL", + "CursorAgentClient", + "CursorToolEvent", + "CursorTurnAccumulator", + "DEFAULT_CURSOR_COMMAND", + "DEFAULT_CURSOR_MODEL", + "DEFAULT_CURSOR_MODE", + "SdkSession", + "_CursorToolEvent", + "_StreamJsonAccumulator", + "cursor_sdk_installed", + "ensure_cursor_sdk", + "format_messages_as_prompt", + "resolve_cursor_backend", + "run_prompt_via_sdk", + "run_stream_event_to_events", + "sdk_message_to_events", + "stream_json_dict_to_events", +] diff --git a/agent/cursor/accumulator.py b/agent/cursor/accumulator.py new file mode 100644 index 000000000000..9f2c25a9665f --- /dev/null +++ b/agent/cursor/accumulator.py @@ -0,0 +1,235 @@ +"""Accumulates Cursor turn state from typed events (CLI or SDK).""" + +from __future__ import annotations + +import time +from types import SimpleNamespace +from typing import Any + +from agent.cursor.events import ( + AssistantTextEvent, + CursorTurnEvent, + SystemEvent, + ThinkingEvent, + ToolCompletedEvent, + ToolStartedEvent, + TurnResultEvent, + stream_json_dict_to_events, +) +from agent.cursor.tool_events import ( + CursorToolEvent, + summarise_cursor_tool_result, +) + + +class CursorTurnAccumulator: + """Accumulates state from a cursor turn event feed. + + Caller feeds typed events with :meth:`feed`. When a terminal + :class:`TurnResultEvent` arrives the accumulator stores success/failure + state and surface text. The instance is reusable per-call but not + thread-safe. + """ + + def __init__(self, on_tool_event: Any = None, on_text_event: Any = None) -> None: + self.text_parts: list[str] = [] + self.reasoning_parts: list[str] = [] + self.session_id: str = "" + self.request_id: str = "" + self.model_label: str = "" + self.duration_ms: int = 0 + self.usage: dict[str, int] = {} + self.terminal: bool = False + self.is_error: bool = False + self.error_message: str = "" + self.final_result_text: str = "" + self.event_log: list[tuple[str, Any]] = [] + self._on_tool_event = on_tool_event + self._on_text_event = on_text_event + self._tool_events: dict[str, CursorToolEvent] = {} + self.tool_events: list[CursorToolEvent] = [] + self.messages_estimate: int = 0 + self._pending_text: list[str] = [] + + def feed(self, event: CursorTurnEvent | dict[str, Any]) -> None: + """Accept a typed event or legacy stream-json dict.""" + if isinstance(event, dict): + for typed in stream_json_dict_to_events(event): + self.feed(typed) + return + + if isinstance(event, SystemEvent): + if event.model: + self.model_label = event.model + if event.session_id: + self.session_id = event.session_id + return + + if isinstance(event, ThinkingEvent): + if event.text: + self.reasoning_parts.append(event.text) + return + + if isinstance(event, AssistantTextEvent): + if event.text: + self.text_parts.append(event.text) + self.event_log.append(("text", event.text)) + self._pending_text.append(event.text) + return + + if isinstance(event, ToolStartedEvent): + if self._pending_text: + for buffered in self._pending_text: + self._dispatch_text_event(buffered) + self._pending_text.clear() + self._consume_tool_started(event) + self.event_log.append(("tool", None)) + return + + if isinstance(event, ToolCompletedEvent): + self._consume_tool_completed(event) + return + + if isinstance(event, TurnResultEvent): + self.terminal = True + self.is_error = event.is_error + self.duration_ms = event.duration_ms + if event.request_id: + self.request_id = event.request_id + if event.usage: + self.usage = dict(event.usage) + if event.result_text: + self.final_result_text = event.result_text + if not self.text_parts and not self.is_error: + self.text_parts.append(event.result_text) + if self.is_error and not self.error_message: + self.error_message = event.error_message or event.result_text or "cursor-agent returned an error" + return + + def _consume_tool_started(self, event: ToolStartedEvent) -> None: + evt = CursorToolEvent( + call_id=event.call_id, + envelope_key=event.envelope_key, + args=event.args, + ) + self._tool_events[event.call_id] = evt + self.tool_events.append(evt) + self._fire_tool_event("started", evt) + + def _consume_tool_completed(self, event: ToolCompletedEvent) -> None: + evt = self._tool_events.get(event.call_id) + if evt is None: + evt = CursorToolEvent( + call_id=event.call_id, + envelope_key=event.envelope_key, + args=event.args, + ) + self._tool_events[event.call_id] = evt + self.tool_events.append(evt) + self._fire_tool_event("started", evt) + evt.completed_at = time.monotonic() + evt.duration_ms = int((evt.completed_at - evt.started_at) * 1000) + result = event.result_payload.get("result") + if isinstance(result, dict): + if "error" in result and result.get("error"): + evt.is_error = True + success = result.get("success") if isinstance(result, dict) else None + if isinstance(success, dict): + la = success.get("linesAdded") + lr = success.get("linesRemoved") + ds = success.get("diffString") + if isinstance(la, int): + evt.lines_added = la + if isinstance(lr, int): + evt.lines_removed = lr + if isinstance(ds, str): + evt.diff_string = ds + evt.result_text = summarise_cursor_tool_result(event.envelope_key, event.result_payload) + self._fire_tool_event("completed", evt) + + def _fire_tool_event(self, stage: str, evt: CursorToolEvent) -> None: + if self._on_tool_event is None: + return + try: + self._on_tool_event(stage, evt) + except Exception: + pass + + def _dispatch_text_event(self, text: str) -> None: + if self._on_text_event is None: + return + try: + self._on_text_event(text) + except Exception: + pass + + def assembled_text(self) -> str: + return "".join(self.text_parts).strip() + + def synthesis_text(self) -> str: + tool_seen = False + synth: list[str] = [] + for kind, payload in self.event_log: + if kind == "tool": + tool_seen = True + synth.clear() + elif kind == "text": + synth.append(payload) + if synth: + return "".join(synth).strip() + if not tool_seen: + return self.assembled_text() + if self.final_result_text and self.final_result_text.strip(): + return self.final_result_text.strip() + return self.assembled_text() + + def narration_text(self) -> str: + narration: list[str] = [] + bucket: list[str] = [] + for kind, payload in self.event_log: + if kind == "tool": + if bucket: + narration.append("".join(bucket).strip()) + bucket = [] + elif kind == "text": + bucket.append(payload) + return "\n".join(n for n in narration if n) + + def assembled_reasoning(self) -> str: + return "".join(self.reasoning_parts).strip() + + def openai_usage(self) -> SimpleNamespace: + input_tokens_raw = int(self.usage.get("inputTokens", 0)) + output_tokens = int(self.usage.get("outputTokens", 0)) + cache_read_raw = int(self.usage.get("cacheReadTokens", 0)) + + rounds = max(len(self.tool_events) + 1, 1) + per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw + per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw + approx_context_tokens = per_round_cache + per_round_input + + if self.messages_estimate > 0: + prompt_tokens = self.messages_estimate + else: + prompt_tokens = approx_context_tokens + + return SimpleNamespace( + prompt_tokens=prompt_tokens, + completion_tokens=output_tokens, + total_tokens=prompt_tokens + output_tokens, + prompt_tokens_details=SimpleNamespace(cached_tokens=per_round_cache), + cursor_raw_input_tokens=input_tokens_raw, + cursor_raw_cache_read_tokens=cache_read_raw, + cursor_internal_rounds=rounds, + cursor_per_round_context=approx_context_tokens, + ) + + +# Backward-compat alias for tests and legacy imports. +_StreamJsonAccumulator = CursorTurnAccumulator + + +__all__ = [ + "CursorTurnAccumulator", + "_StreamJsonAccumulator", +] diff --git a/agent/cursor/backend.py b/agent/cursor/backend.py new file mode 100644 index 000000000000..2f490a9e98e6 --- /dev/null +++ b/agent/cursor/backend.py @@ -0,0 +1,82 @@ +"""Backend selection and cursor-sdk lazy-install helpers.""" + +from __future__ import annotations + +import os + +from agent.cursor.constants import ( + DEFAULT_CURSOR_BACKEND, + _API_KEY_SENTINELS, + _SDK_MODES, + _VALID_BACKENDS, +) + + +def cursor_sdk_installed() -> bool: + try: + import cursor_sdk # noqa: F401 + except ImportError: + return False + return True + + +def ensure_cursor_sdk(*, prompt: bool = False) -> None: + """Lazy-install cursor-sdk when the SDK backend is selected.""" + if cursor_sdk_installed(): + return + try: + from tools import lazy_deps + from tools.lazy_deps import FeatureUnavailable + + lazy_deps.ensure("provider.cursor_sdk", prompt=prompt) + except FeatureUnavailable as exc: + raise RuntimeError( + "cursor-sdk is not installed. " + "Run: uv pip install cursor-sdk (or pip install 'hermes-agent[cursor]')" + ) from exc + + +def real_api_key(api_key: str | None) -> str | None: + key = (api_key or os.getenv("CURSOR_API_KEY", "") or "").strip() + if not key or key in _API_KEY_SENTINELS: + return None + return key + + +def map_hermes_mode_to_sdk(mode: str) -> str | None: + normalized = (mode or "agent").strip().lower() + if normalized == "ask": + return "plan" + if normalized in _SDK_MODES: + return normalized + return "agent" + + +def resolve_cursor_backend(*, api_key: str | None = None) -> str: + """Return the effective backend: ``cli`` or ``sdk``.""" + raw = os.getenv("HERMES_CURSOR_BACKEND", "").strip().lower() or DEFAULT_CURSOR_BACKEND + if raw not in _VALID_BACKENDS: + raw = DEFAULT_CURSOR_BACKEND + if raw == "cli": + return "cli" + if raw == "sdk": + ensure_cursor_sdk(prompt=False) + if not real_api_key(api_key): + raise RuntimeError( + "HERMES_CURSOR_BACKEND=sdk requires CURSOR_API_KEY " + "(Dashboard → Integrations → User API Keys)." + ) + return "sdk" + # auto + if cursor_sdk_installed() and real_api_key(api_key): + return "sdk" + return "cli" + + +__all__ = [ + "cursor_sdk_installed", + "ensure_cursor_sdk", + "map_hermes_mode_to_sdk", + "real_api_key", + "resolve_cursor_backend", +] diff --git a/agent/cursor/cli_backend.py b/agent/cursor/cli_backend.py new file mode 100644 index 000000000000..3b5dc54be860 --- /dev/null +++ b/agent/cursor/cli_backend.py @@ -0,0 +1,208 @@ +"""CLI subprocess backend for the Cursor provider.""" + +from __future__ import annotations + +import json +import queue +import subprocess +import threading +import time +from collections import deque +from typing import Any, Callable + +from agent.cursor.accumulator import CursorTurnAccumulator +from agent.cursor.constants import _CURSOR_CLI_MODES +from agent.cursor.env import build_subprocess_env +from agent.cursor.events import stream_json_dict_to_events +from agent.redact import redact_sensitive_text + + +def build_argv( + *, + command: str, + mode: str, + model: str, + workspace: str, + api_key: str | None, + extra_args: list[str], +) -> list[str]: + argv = [ + command, + "-p", + "--output-format", + "stream-json", + ] + if mode in _CURSOR_CLI_MODES: + argv.extend(["--mode", mode]) + argv.extend( + [ + "--model", + model, + "--workspace", + workspace, + "--force", + "--trust", + ] + ) + if api_key: + argv.extend(["--api-key", api_key]) + argv.extend(extra_args) + return argv + + +def run_prompt_cli( + *, + command: str, + mode: str, + model: str, + workspace: str, + api_key: str | None, + extra_args: list[str], + prompt_text: str, + timeout_seconds: float, + on_tool_event: Any, + on_text_event: Any, + set_active_process: Callable[[subprocess.Popen[str] | None], None], + terminate_active_proc: Callable[[subprocess.Popen[str]], None], + mark_open: Callable[[], None], +) -> CursorTurnAccumulator: + """Execute one Hermes turn via cursor-agent subprocess.""" + argv = build_argv( + command=command, + mode=mode, + model=model, + workspace=workspace, + api_key=api_key, + extra_args=extra_args, + ) + + try: + proc = subprocess.Popen( + argv, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + cwd=workspace, + env=build_subprocess_env(api_key), + ) + except FileNotFoundError as exc: + raise RuntimeError( + f"Could not start Cursor Agent CLI '{command}'. " + "Install Cursor CLI (https://cursor.com/dashboard/integrations) " + "or set HERMES_CURSOR_COMMAND / CURSOR_AGENT_PATH." + ) from exc + + if proc.stdin is None or proc.stdout is None: + proc.kill() + raise RuntimeError("cursor-agent process did not expose stdin/stdout pipes.") + + mark_open() + set_active_process(proc) + + try: + stderr_tail: deque[str] = deque(maxlen=80) + inbox: queue.Queue[dict[str, Any]] = queue.Queue() + + def _stderr_reader_early() -> None: + if proc.stderr is None: + return + for line in proc.stderr: + stderr_tail.append(line.rstrip("\n")) + + err_thread = threading.Thread(target=_stderr_reader_early, daemon=True) + err_thread.start() + + stdin_error: BaseException | None = None + try: + proc.stdin.write(prompt_text) + proc.stdin.flush() + except BrokenPipeError as exc: + stdin_error = exc + except Exception as exc: # pragma: no cover - defensive + stdin_error = exc + finally: + try: + proc.stdin.close() + except Exception: + pass + + if stdin_error is not None: + try: + proc.wait(timeout=3) + except Exception: + pass + err_thread.join(timeout=1) + exit_code = getattr(proc, "returncode", None) + if exit_code is None: + try: + exit_code = proc.poll() + except Exception: + exit_code = None + stderr_text = "\n".join(stderr_tail).strip() + redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else "" + detail = f" stderr: {redacted}" if redacted else "" + raise RuntimeError( + "cursor-agent closed stdin before reading the prompt " + f"(exit {exit_code}).{detail}" + ) from stdin_error + + def _stdout_reader() -> None: + if proc.stdout is None: + return + for line in proc.stdout: + line = line.strip() + if not line: + continue + try: + inbox.put(json.loads(line)) + except Exception: + stderr_tail.append("[stdout-non-json] " + line) + + out_thread = threading.Thread(target=_stdout_reader, daemon=True) + out_thread.start() + + accumulator = CursorTurnAccumulator( + on_tool_event=on_tool_event, + on_text_event=on_text_event, + ) + idle_seconds = float(timeout_seconds) + deadline = time.monotonic() + idle_seconds + + while not accumulator.terminal: + if time.monotonic() >= deadline: + terminate_active_proc(proc) + raise TimeoutError( + f"cursor-agent emitted no events for {idle_seconds:.0f}s; " + f"presumed hung. Set HERMES_CURSOR_TIMEOUT_SECONDS to " + f"increase the idle threshold." + ) + if proc.poll() is not None and inbox.empty(): + break + try: + event = inbox.get(timeout=0.25) + except queue.Empty: + continue + deadline = time.monotonic() + idle_seconds + try: + for typed in stream_json_dict_to_events(event): + accumulator.feed(typed) + except Exception: + continue + + if not accumulator.terminal: + stderr_text = "\n".join(stderr_tail).strip() + redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else "" + raise RuntimeError( + "cursor-agent exited before emitting a terminal result. " + + (f"stderr tail:\n{redacted}" if redacted else "(no stderr)") + ) + + return accumulator + finally: + terminate_active_proc(proc) + set_active_process(None) + + +__all__ = ["build_argv", "run_prompt_cli"] diff --git a/agent/cursor/client.py b/agent/cursor/client.py new file mode 100644 index 000000000000..62a7dd92944f --- /dev/null +++ b/agent/cursor/client.py @@ -0,0 +1,454 @@ +"""OpenAI-compatible facade for the Cursor provider (CLI or SDK transport).""" + +from __future__ import annotations + +import os +import shutil +import subprocess +import tempfile +import threading +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from agent.copilot_acp_client import _extract_tool_calls_from_text +from agent.cursor.accumulator import CursorTurnAccumulator +from agent.cursor.backend import resolve_cursor_backend +from agent.cursor.cli_backend import run_prompt_cli +from agent.cursor.constants import ( + CURSOR_MARKER_BASE_URL, + DEFAULT_CURSOR_COMMAND, + DEFAULT_CURSOR_MODEL, + DEFAULT_CURSOR_MODE, + _API_KEY_SENTINELS, + _CURSOR_CLI_MODES, + _DEFAULT_TIMEOUT_SECONDS, + _VALID_CURSOR_MODES, +) +from agent.cursor.env import ( + build_subprocess_env, + resolve_command, + resolve_extra_args, + resolve_mode, + resolve_workspace_override, +) +from agent.cursor.prompt import format_messages_as_prompt +from agent.cursor.sdk_backend import SdkSession, run_prompt_via_sdk +from agent.cursor.streaming import CursorChatNamespace +from agent.cursor.tool_events import CursorToolEvent, build_cursor_tool_preview + + +class CursorAgentClient: + """Minimal OpenAI-client-compatible facade for Cursor (CLI or SDK).""" + + def __init__( + self, + *, + api_key: str | None = None, + base_url: str | None = None, + default_headers: dict[str, str] | None = None, + command: str | None = None, + args: list[str] | None = None, + workspace: str | None = None, + mode: str | None = None, + timeout_seconds: float | None = None, + tool_progress_callback: Any = None, + context_estimate_callback: Any = None, + **_: Any, + ): + candidate_key = (api_key or os.getenv("CURSOR_API_KEY", "") or "").strip() + self.api_key = None if candidate_key in _API_KEY_SENTINELS else candidate_key + self.base_url = base_url or CURSOR_MARKER_BASE_URL + self._default_headers = dict(default_headers or {}) + self._command = (command or resolve_command()).strip() or DEFAULT_CURSOR_COMMAND + self._extra_args = list(args) if args else resolve_extra_args() + chosen_mode = (mode or resolve_mode()).strip().lower() or DEFAULT_CURSOR_MODE + if chosen_mode not in _VALID_CURSOR_MODES: + chosen_mode = DEFAULT_CURSOR_MODE + self._mode = chosen_mode + override = workspace or resolve_workspace_override() + self._workspace: str | None = override or None + self._timeout_seconds = float(timeout_seconds) if timeout_seconds else _DEFAULT_TIMEOUT_SECONDS + env_timeout = os.environ.get("HERMES_CURSOR_TIMEOUT_SECONDS", "").strip() + if env_timeout: + try: + env_timeout_val = float(env_timeout) + if env_timeout_val > 0: + self._timeout_seconds = env_timeout_val + except ValueError: + pass + + self._tool_progress_callback = tool_progress_callback + self._context_estimate_callback = context_estimate_callback + self._context_high_water: int = 0 + self._last_user_msg_count: int = 0 + + self.chat = CursorChatNamespace(self) + self.is_closed = False + + self._active_process: subprocess.Popen[str] | None = None + self._active_process_lock = threading.Lock() + self._ephemeral_dirs: list[str] = [] + self._dir_lock = threading.Lock() + self._session_workspace: str | None = None + + self._sdk_session = SdkSession() + self._backend = resolve_cursor_backend(api_key=self.api_key) + + @property + def backend(self) -> str: + """Effective transport: ``sdk`` (cursor-sdk) or ``cli`` (cursor-agent).""" + return getattr(self, "_backend", "cli") + + def close(self) -> None: + proc: subprocess.Popen[str] | None + with self._active_process_lock: + proc = self._active_process + self._active_process = None + self.is_closed = True + sdk_session = getattr(self, "_sdk_session", None) + if sdk_session is not None: + try: + sdk_session.close() + except Exception: + pass + self._context_high_water = 0 + if proc is not None: + try: + proc.terminate() + proc.wait(timeout=2) + except Exception: + try: + proc.kill() + except Exception: + pass + with self._dir_lock: + dirs, self._ephemeral_dirs = self._ephemeral_dirs, [] + self._session_workspace = None + for d in dirs: + try: + shutil.rmtree(d, ignore_errors=True) + except Exception: + pass + + def _create_chat_completion( + self, + *, + model: str | None = None, + messages: list[dict[str, Any]] | None = None, + timeout: float | None = None, + tools: list[dict[str, Any]] | None = None, + tool_choice: Any = None, + **_: Any, + ) -> Any: + try: + user_msg_count = sum( + 1 for m in (messages or []) if (m or {}).get("role") == "user" + ) + except Exception: + user_msg_count = self._last_user_msg_count + is_new_user_turn = user_msg_count > self._last_user_msg_count + if is_new_user_turn: + self._context_high_water = 0 + self._last_user_msg_count = user_msg_count + + try: + from agent.model_metadata import estimate_request_tokens_rough + self._last_messages_estimate = estimate_request_tokens_rough( + messages or [], tools=tools or None + ) + except Exception: + self._last_messages_estimate = 0 + + if self._last_messages_estimate > self._context_high_water: + self._context_high_water = self._last_messages_estimate + if callable(self._context_estimate_callback) and self._last_messages_estimate > 0: + try: + self._context_estimate_callback( + self._last_messages_estimate, reset=is_new_user_turn + ) + except TypeError: + try: + self._context_estimate_callback(self._last_messages_estimate) + except Exception: + pass + except Exception: + pass + + prompt_text = format_messages_as_prompt( + messages or [], + model=model, + tools=tools, + tool_choice=tool_choice, + ) + + if timeout is None: + effective_timeout = self._timeout_seconds + elif isinstance(timeout, (int, float)): + effective_timeout = float(timeout) + else: + candidates = [ + getattr(timeout, attr, None) + for attr in ("read", "write", "connect", "pool", "timeout") + ] + numeric = [float(v) for v in candidates if isinstance(v, (int, float))] + effective_timeout = max(numeric) if numeric else self._timeout_seconds + + chosen_model = (model or DEFAULT_CURSOR_MODEL).strip() or DEFAULT_CURSOR_MODEL + + accumulator = self._run_prompt( + prompt_text=prompt_text, + model=chosen_model, + timeout_seconds=effective_timeout, + ) + + assistant_text = accumulator.synthesis_text() + reasoning_text = accumulator.assembled_reasoning() or None + + if accumulator.is_error: + raise RuntimeError( + f"cursor-agent reported an error: {accumulator.error_message or assistant_text}" + ) + + tool_calls, cleaned_text = _extract_tool_calls_from_text(assistant_text) + cursor_internal_tools = [evt.to_public_dict() for evt in accumulator.tool_events] + cur_estimate = getattr(self, "_last_messages_estimate", 0) or 0 + cursor_per_round = self._estimate_per_round_context(accumulator) + new_high = max(self._context_high_water, cur_estimate, cursor_per_round) + self._context_high_water = new_high + accumulator.messages_estimate = new_high + assistant_message = SimpleNamespace( + content=cleaned_text, + tool_calls=tool_calls, + reasoning=reasoning_text, + reasoning_content=reasoning_text, + reasoning_details=None, + cursor_internal_tools=cursor_internal_tools, + ) + finish_reason = "tool_calls" if tool_calls else "stop" + choice = SimpleNamespace( + message=assistant_message, + finish_reason=finish_reason, + index=0, + ) + return SimpleNamespace( + choices=[choice], + usage=accumulator.openai_usage(), + model=chosen_model, + id=accumulator.request_id or f"cursor-{accumulator.session_id}", + object="chat.completion", + cursor_internal_tools=cursor_internal_tools, + ) + + def _build_argv(self, *, model: str, workspace: str) -> list[str]: + from agent.cursor.cli_backend import build_argv + + return build_argv( + command=self._command, + mode=self._mode, + model=model, + workspace=workspace, + api_key=self.api_key, + extra_args=self._extra_args, + ) + + def _allocate_workspace(self) -> tuple[str, bool]: + if self._workspace: + try: + Path(self._workspace).mkdir(parents=True, exist_ok=True) + except Exception: + pass + return self._workspace, False + with self._dir_lock: + if self._session_workspace is None: + tmp = tempfile.mkdtemp(prefix="hermes-cursor-") + self._session_workspace = tmp + self._ephemeral_dirs.append(tmp) + return self._session_workspace, True + + def _run_prompt( + self, + *, + prompt_text: str, + model: str, + timeout_seconds: float, + ) -> CursorTurnAccumulator: + backend = getattr(self, "_backend", "cli") + if backend == "sdk" and self.api_key: + workspace, _ephemeral = self._allocate_workspace() + try: + return run_prompt_via_sdk( + prompt_text=prompt_text, + model=model, + api_key=self.api_key, + workspace=workspace, + mode=self._mode, + timeout_seconds=timeout_seconds, + on_tool_event=self._build_tool_event_bridge(), + on_text_event=self._build_text_event_bridge(), + sdk_session=self._sdk_session, + ) + except RuntimeError as exc: + forced = os.getenv("HERMES_CURSOR_BACKEND", "").strip().lower() + if forced == "sdk": + raise + lowered = str(exc).lower() + if "sdk" in lowered and ( + "preview" in lowered + or "not enabled" in lowered + or "not installed" in lowered + ): + self._backend = "cli" + return self._run_prompt_cli( + prompt_text=prompt_text, + model=model, + timeout_seconds=timeout_seconds, + ) + raise + return self._run_prompt_cli( + prompt_text=prompt_text, + model=model, + timeout_seconds=timeout_seconds, + ) + + def _run_prompt_cli( + self, + *, + prompt_text: str, + model: str, + timeout_seconds: float, + ) -> CursorTurnAccumulator: + workspace, _ephemeral = self._allocate_workspace() + + def _set_active(proc: subprocess.Popen[str] | None) -> None: + with self._active_process_lock: + self._active_process = proc + + return run_prompt_cli( + command=self._command, + mode=self._mode, + model=model, + workspace=workspace, + api_key=self.api_key, + extra_args=self._extra_args, + prompt_text=prompt_text, + timeout_seconds=timeout_seconds, + on_tool_event=self._build_tool_event_bridge(), + on_text_event=self._build_text_event_bridge(), + set_active_process=_set_active, + terminate_active_proc=self._terminate_active_proc, + mark_open=lambda: setattr(self, "is_closed", False), + ) + + def _estimate_per_round_context(self, accumulator: CursorTurnAccumulator) -> int: + input_tokens_raw = int(accumulator.usage.get("inputTokens", 0)) + cache_read_raw = int(accumulator.usage.get("cacheReadTokens", 0)) + rounds = max(len(accumulator.tool_events) + 1, 1) + per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw + per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw + return per_round_cache + per_round_input + + def reset_context_baseline(self) -> None: + self._context_high_water = 0 + + def _build_text_event_bridge(self) -> Any: + cb = self._tool_progress_callback + if cb is None: + return None + + def _bridge(text: str) -> None: + try: + preview = text.strip().splitlines()[0] if text else "" + if len(preview) > 240: + preview = preview[:237] + "..." + if not preview: + return + cb("tool.started", "narrate", preview, {"text": text}) + cb( + "tool.completed", "narrate", None, None, + duration=0.0, is_error=False, result=text, + ) + except Exception: + pass + + return _bridge + + def _build_tool_event_bridge(self) -> Any: + cb = self._tool_progress_callback + if cb is None: + return None + + def _bridge(stage: str, evt: CursorToolEvent) -> None: + try: + if stage == "started": + preview = build_cursor_tool_preview(evt) + cb("tool.started", evt.name, preview, evt.args) + elif stage == "completed": + if ( + evt.lines_added is not None + or evt.lines_removed is not None + ) and isinstance(evt.args, dict): + evt.args["_diff_stats"] = { + "added": evt.lines_added or 0, + "removed": evt.lines_removed or 0, + } + if evt.diff_string: + evt.args["_diff_string"] = evt.diff_string + cb( + "tool.completed", + evt.name, + None, + None, + duration=evt.duration_ms / 1000.0, + is_error=evt.is_error, + result=evt.result_text, + ) + except Exception: + try: + cb(f"tool.{stage}", evt.name, evt.result_text or "", evt.args) + except Exception: + pass + + return _bridge + + def _terminate_active_proc(self, proc: subprocess.Popen[str]) -> None: + with self._active_process_lock: + current = self._active_process + if current is proc: + self._active_process = None + if proc.poll() is not None: + return + try: + proc.wait(timeout=0.7) + return + except subprocess.TimeoutExpired: + pass + try: + proc.terminate() + proc.wait(timeout=1.5) + except Exception: + try: + proc.kill() + except Exception: + pass + + def whoami(self) -> dict[str, Any]: + try: + out = subprocess.check_output( + [self._command, "status"], + text=True, + timeout=10, + env=build_subprocess_env(self.api_key), + ) + except Exception: + return {} + info: dict[str, Any] = {"raw": out.strip()} + for line in out.splitlines(): + line = line.strip() + if line.startswith("✓ Logged in as "): + info["email"] = line.removeprefix("✓ Logged in as ").strip() + info["authenticated"] = True + return info + + +__all__ = ["CursorAgentClient"] diff --git a/agent/cursor/constants.py b/agent/cursor/constants.py new file mode 100644 index 000000000000..4e5163966ebf --- /dev/null +++ b/agent/cursor/constants.py @@ -0,0 +1,37 @@ +"""Shared constants for the Cursor provider.""" +from __future__ import annotations + +CURSOR_MARKER_BASE_URL = "cursor://agent" +DEFAULT_CURSOR_COMMAND = "cursor-agent" +DEFAULT_CURSOR_MODE = "agent" +DEFAULT_CURSOR_MODEL = "auto" + +_VALID_CURSOR_MODES = frozenset({"ask", "plan", "agent"}) +_CURSOR_CLI_MODES = frozenset({"ask", "plan"}) +_DEFAULT_TIMEOUT_SECONDS = 1800.0 + +_API_KEY_SENTINELS = frozenset({ + "", + "cursor-agent-login", + "cursor-cli-login", + "external-process", + "external_process", +}) + +DEFAULT_CURSOR_BACKEND = "auto" +_VALID_BACKENDS = frozenset({"auto", "cli", "sdk"}) +_SDK_MODES = frozenset({"agent", "plan"}) + +__all__ = [ + "CURSOR_MARKER_BASE_URL", + "DEFAULT_CURSOR_COMMAND", + "DEFAULT_CURSOR_MODE", + "DEFAULT_CURSOR_MODEL", + "DEFAULT_CURSOR_BACKEND", + "_VALID_CURSOR_MODES", + "_CURSOR_CLI_MODES", + "_DEFAULT_TIMEOUT_SECONDS", + "_API_KEY_SENTINELS", + "_VALID_BACKENDS", + "_SDK_MODES", +] diff --git a/agent/cursor/env.py b/agent/cursor/env.py new file mode 100644 index 000000000000..4b2c5bb9a0ec --- /dev/null +++ b/agent/cursor/env.py @@ -0,0 +1,89 @@ +"""Environment and subprocess helpers for Cursor backends.""" + +from __future__ import annotations + +import os +import shlex + +from agent.cursor.constants import ( + DEFAULT_CURSOR_COMMAND, + DEFAULT_CURSOR_MODE, + _VALID_CURSOR_MODES, +) + + +def resolve_command() -> str: + return ( + os.getenv("HERMES_CURSOR_COMMAND", "").strip() + or os.getenv("CURSOR_AGENT_PATH", "").strip() + or DEFAULT_CURSOR_COMMAND + ) + + +def resolve_extra_args() -> list[str]: + raw = os.getenv("HERMES_CURSOR_ARGS", "").strip() + if not raw: + return [] + return shlex.split(raw) + + +def resolve_mode() -> str: + mode = os.getenv("HERMES_CURSOR_MODE", "").strip().lower() or DEFAULT_CURSOR_MODE + if mode not in _VALID_CURSOR_MODES: + mode = DEFAULT_CURSOR_MODE + return mode + + +def resolve_workspace_override() -> str: + return os.getenv("HERMES_CURSOR_WORKSPACE", "").strip() + + +def resolve_home_dir() -> str: + """Pick a stable HOME for the child process.""" + try: + from hermes_constants import get_subprocess_home + + profile_home = get_subprocess_home() + if profile_home: + return profile_home + except Exception: + pass + + home = os.environ.get("HOME", "").strip() + if home: + return home + + expanded = os.path.expanduser("~") + if expanded and expanded != "~": + return expanded + + try: + import pwd + + resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() + if resolved: + return resolved + except Exception: + pass + + return "/tmp" + + +def build_subprocess_env(api_key: str | None) -> dict[str, str]: + env = os.environ.copy() + env["HOME"] = resolve_home_dir() + if api_key: + env["CURSOR_API_KEY"] = api_key + env.setdefault("NO_COLOR", "1") + env.setdefault("TERM", "dumb") + return env + + +__all__ = [ + "build_subprocess_env", + "resolve_command", + "resolve_extra_args", + "resolve_home_dir", + "resolve_mode", + "resolve_workspace_override", +] diff --git a/agent/cursor/events.py b/agent/cursor/events.py new file mode 100644 index 000000000000..8d61a2da5625 --- /dev/null +++ b/agent/cursor/events.py @@ -0,0 +1,341 @@ +"""Typed turn events for Cursor CLI and SDK backends.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Mapping, Union + + +@dataclass(frozen=True) +class SystemEvent: + model: str = "" + session_id: str = "" + + +@dataclass(frozen=True) +class ThinkingEvent: + text: str + + +@dataclass(frozen=True) +class AssistantTextEvent: + text: str + + +@dataclass(frozen=True) +class ToolStartedEvent: + call_id: str + envelope_key: str + args: dict[str, Any] + + +@dataclass(frozen=True) +class ToolCompletedEvent: + call_id: str + envelope_key: str + args: dict[str, Any] + result_payload: dict[str, Any] + + +@dataclass(frozen=True) +class TurnResultEvent: + is_error: bool + result_text: str = "" + request_id: str = "" + duration_ms: int = 0 + usage: dict[str, int] = field(default_factory=dict) + error_message: str = "" + + +CursorTurnEvent = Union[ + SystemEvent, + ThinkingEvent, + AssistantTextEvent, + ToolStartedEvent, + ToolCompletedEvent, + TurnResultEvent, +] + +_TOOL_NAME_TO_ENVELOPE: dict[str, str] = { + "shell": "shellToolCall", + "read": "readToolCall", + "read_file": "readToolCall", + "list": "listToolCall", + "list_directory": "listToolCall", + "edit": "editToolCall", + "edit_file": "editToolCall", + "write": "writeToolCall", + "write_file": "writeToolCall", + "patch": "patchToolCall", + "grep": "grepToolCall", + "glob": "globToolCall", + "search": "searchToolCall", + "delete": "deleteToolCall", + "delete_file": "deleteToolCall", + "web_fetch": "fetchToolCall", + "fetch": "fetchToolCall", +} + + +def tool_name_to_envelope(name: str) -> str: + if name.endswith("ToolCall"): + return name + return _TOOL_NAME_TO_ENVELOPE.get(name.lower(), f"{name}ToolCall") + + +def normalize_tool_call_envelope(tool_call: Mapping[str, Any]) -> tuple[str, dict[str, Any]]: + """Return ``(envelope_key, payload)`` from SDK or stream-json tool_call dict.""" + if not tool_call: + return "", {} + for key, payload in tool_call.items(): + if isinstance(key, str) and key.endswith("ToolCall") and isinstance(payload, Mapping): + return key, dict(payload) + name = str(tool_call.get("name") or tool_call.get("toolName") or "") + args = tool_call.get("args") + if not isinstance(args, Mapping): + args = tool_call.get("input") if isinstance(tool_call.get("input"), Mapping) else {} + envelope = tool_name_to_envelope(name or "cursor") + payload: dict[str, Any] = {"args": dict(args or {})} + if "result" in tool_call: + payload["result"] = tool_call["result"] + return envelope, payload + + +def stream_json_dict_to_events(event: dict[str, Any]) -> list[CursorTurnEvent]: + """Translate one cursor-agent stream-json dict into typed turn events.""" + evt_type = event.get("type") + if not isinstance(evt_type, str): + return [] + + if evt_type == "system": + model = event.get("model") + session = event.get("session_id") + return [SystemEvent( + model=model if isinstance(model, str) else "", + session_id=session if isinstance(session, str) else "", + )] + + if evt_type == "thinking": + text = event.get("text") + if isinstance(text, str) and text: + return [ThinkingEvent(text=text)] + return [] + + if evt_type == "assistant": + out: list[CursorTurnEvent] = [] + message = event.get("message") + if isinstance(message, dict): + content = message.get("content") + if isinstance(content, list): + for block in content: + if not isinstance(block, dict): + continue + if block.get("type") == "text": + text = block.get("text") + if isinstance(text, str) and text: + out.append(AssistantTextEvent(text=text)) + return out + + if evt_type == "tool_call": + sub = event.get("subtype") + call_id = event.get("call_id") + if not isinstance(call_id, str) or not call_id: + return [] + tool_call = event.get("tool_call") + if not isinstance(tool_call, dict) or not tool_call: + return [] + envelope_key = next(iter(tool_call.keys()), "") + payload = tool_call.get(envelope_key) if isinstance(envelope_key, str) else None + if not isinstance(payload, dict): + return [] + args_obj = payload.get("args") + if not isinstance(args_obj, dict): + args_obj = {} + if sub == "started": + return [ToolStartedEvent(call_id=call_id, envelope_key=envelope_key, args=args_obj)] + if sub == "completed": + return [ToolCompletedEvent( + call_id=call_id, + envelope_key=envelope_key, + args=args_obj, + result_payload=payload, + )] + return [] + + if evt_type == "result": + is_error = bool(event.get("is_error", False)) + subtype = event.get("subtype") + if subtype == "error": + is_error = True + duration = event.get("duration_ms") + duration_ms = duration if isinstance(duration, int) else 0 + request = event.get("request_id") + request_id = request if isinstance(request, str) else "" + usage_raw = event.get("usage") + usage: dict[str, int] = {} + if isinstance(usage_raw, dict): + for k, v in usage_raw.items(): + if isinstance(v, (int, float)): + usage[str(k)] = int(v) + result_text = event.get("result") + result_str = result_text if isinstance(result_text, str) else "" + error_message = "" + if is_error and not error_message: + error_message = result_str or "cursor-agent returned an error" + return [TurnResultEvent( + is_error=is_error, + result_text=result_str, + request_id=request_id, + duration_ms=duration_ms, + usage=usage, + error_message=error_message, + )] + + return [] + + +def sdk_message_to_events(message: Any) -> list[CursorTurnEvent]: + """Translate one SDKMessage into typed turn events.""" + msg_type = getattr(message, "type", None) + if msg_type == "system": + model = getattr(getattr(message, "model", None), "id", None) or "" + return [SystemEvent( + model=model, + session_id=getattr(message, "agent_id", "") or getattr(message, "run_id", ""), + )] + if msg_type == "thinking": + text = getattr(message, "text", "") + if text: + return [ThinkingEvent(text=text)] + return [] + if msg_type == "assistant": + out: list[CursorTurnEvent] = [] + msg = getattr(message, "message", None) + for block in getattr(msg, "content", ()) or (): + if getattr(block, "type", None) == "text": + text = getattr(block, "text", "") + if text: + out.append(AssistantTextEvent(text=text)) + return out + if msg_type == "tool_call": + envelope = tool_name_to_envelope(getattr(message, "name", "") or "cursor") + args = getattr(message, "args", None) + if not isinstance(args, Mapping): + args = {} + status = str(getattr(message, "status", "") or "").lower() + call_id = getattr(message, "call_id", "") or "" + if status in {"running", "started"}: + return [ToolStartedEvent(call_id=call_id, envelope_key=envelope, args=dict(args))] + if status in {"completed", "error", "failed"}: + result = getattr(message, "result", None) + payload: dict[str, Any] = {"args": dict(args)} + if isinstance(result, Mapping): + payload["result"] = dict(result) + elif status == "error": + payload["result"] = {"error": result or "tool error"} + else: + payload["result"] = {"success": result} if result is not None else {} + return [ToolCompletedEvent( + call_id=call_id, + envelope_key=envelope, + args=dict(args), + result_payload=payload, + )] + return [] + return [] + + +def interaction_update_to_events(update: Any) -> list[CursorTurnEvent]: + """Translate InteractionUpdate events into typed turn events.""" + update_type = getattr(update, "type", None) + if update_type == "text-delta": + text = getattr(update, "text", "") + if text: + return [AssistantTextEvent(text=text)] + return [] + if update_type == "thinking-delta": + text = getattr(update, "text", "") + if text: + return [ThinkingEvent(text=text)] + return [] + if update_type == "tool-call-started": + tool_call = getattr(update, "tool_call", {}) or {} + envelope, payload = normalize_tool_call_envelope(tool_call) + if not envelope: + return [] + args = payload.get("args") + if not isinstance(args, dict): + args = {} + return [ToolStartedEvent( + call_id=getattr(update, "call_id", "") or "", + envelope_key=envelope, + args=args, + )] + if update_type == "tool-call-completed": + tool_call = getattr(update, "tool_call", {}) or {} + envelope, payload = normalize_tool_call_envelope(tool_call) + if not envelope: + return [] + args = payload.get("args") + if not isinstance(args, dict): + args = {} + return [ToolCompletedEvent( + call_id=getattr(update, "call_id", "") or "", + envelope_key=envelope, + args=args, + result_payload=payload, + )] + if update_type == "turn-ended": + usage = getattr(update, "usage", None) + if isinstance(usage, Mapping) and usage: + usage_dict = {str(k): int(v) for k, v in usage.items() if isinstance(v, (int, float))} + return [TurnResultEvent( + is_error=False, + duration_ms=int(usage.get("durationMs") or usage.get("duration_ms") or 0), + usage=usage_dict, + )] + return [] + return [] + + +def run_stream_event_to_events(event: Any) -> list[CursorTurnEvent]: + """Translate a RunStreamEvent into zero or more typed turn events.""" + kind = getattr(event, "kind", "") + if kind == "sdk_message" and event.sdk_message is not None: + return sdk_message_to_events(event.sdk_message) + if kind == "interaction_update" and event.interaction_update is not None: + return interaction_update_to_events(event.interaction_update) + if kind == "result" and event.result is not None: + payload = dict(event.result) + status = str(payload.get("status") or "").lower() + is_error = status in {"error", "failed", "cancelled", "canceled"} + usage = payload.get("usage") + if not isinstance(usage, Mapping): + usage = {} + usage_dict = {str(k): int(v) for k, v in usage.items() if isinstance(v, (int, float))} + return [TurnResultEvent( + is_error=is_error, + result_text=str(payload.get("result") or ""), + request_id=str(payload.get("runId") or payload.get("id") or ""), + duration_ms=int(payload.get("durationMs") or payload.get("duration_ms") or 0), + usage=usage_dict, + error_message=str(payload.get("result") or "") if is_error else "", + )] + return [] + + +__all__ = [ + "AssistantTextEvent", + "CursorTurnEvent", + "SystemEvent", + "ThinkingEvent", + "ToolCompletedEvent", + "ToolStartedEvent", + "TurnResultEvent", + "interaction_update_to_events", + "normalize_tool_call_envelope", + "run_stream_event_to_events", + "sdk_message_to_events", + "stream_json_dict_to_events", + "tool_name_to_envelope", +] diff --git a/agent/cursor/prompt.py b/agent/cursor/prompt.py new file mode 100644 index 000000000000..8ecc3bcf5b0b --- /dev/null +++ b/agent/cursor/prompt.py @@ -0,0 +1,151 @@ +"""Prompt formatting shared by CLI and SDK Cursor backends.""" + +from __future__ import annotations + +import json +from typing import Any + +from agent.copilot_acp_client import _render_message_content + + +def format_messages_as_prompt( + messages: list[dict[str, Any]], + model: str | None = None, + tools: list[dict[str, Any]] | None = None, + tool_choice: Any = None, +) -> str: + """Build the prompt sent to cursor-agent stdin or SDK ``send()``.""" + sections: list[str] = [] + has_tools = bool(tools) + if has_tools: + sections.extend([ + "You are powering a chat session inside Hermes Agent.", + "You have TWO sets of tools available:", + "(A) Your own built-in cursor-agent tools (shell, read_file, " + "edit_file, write_file, list_directory, grep, glob, web_fetch). " + "Use these DIRECTLY for filesystem/shell/search work — they run " + "on the real workspace, are fast, and Hermes will surface their " + "results to the user automatically.", + "(B) Hermes-side tools listed in the schema below. They cover " + "capabilities your built-in tools do NOT have (skills, MCP " + "servers, browser automation, remote APIs, etc.). To invoke " + "one of THESE, emit a " + "{...} block in OpenAI function-call " + "shape: " + '{"id":"call_","type":"function",' + '"function":{"name":"","arguments":""}}. ' + "``arguments`` MUST be a JSON STRING (escaped), not a nested " + "object.", + "RULES:", + "1. Prefer your built-in tools for any shell command, file " + "read/write/list/edit, grep, or glob operation — they're " + "faster than round-tripping through Hermes. CRITICAL for " + "file creation/modification: ALWAYS use the ``write`` or " + "``edit`` built-in tools, NEVER ``shell`` with ``echo > " + "file`` / ``cat > file`` / ``sed -i`` / ``>>``. Only the " + "write/edit tools report ``linesAdded`` / ``linesRemoved`` " + "/ ``diffString`` to the harness, which is what Hermes " + "renders as the colored ``+``/``-`` diff in the UI. Shell " + "redirections create the file but the user sees no diff " + "and has no idea what changed.", + "2. Only emit blocks for tools listed in the " + "schema below; do NOT invent tool names. Multiple tool_calls " + "per turn are allowed.", + "3. Work iteratively (ReAct-style): before each tool batch, " + "emit ONE short line of plain text saying what you're about " + "to check and why. After tool results come back, briefly " + "reflect on what you found before deciding the next step. " + "Hermes surfaces these intermediate lines to the user as " + "live narration so they can follow your reasoning.", + "4. Don't dump every tool call upfront — chain them: think, " + "tool, reflect, tool, reflect, ... then synthesise the final " + "answer at the end. If the task genuinely is independent " + "lookups, parallel tool calls in one batch are fine.", + "5. If no tool is needed (pure conversation, math, " + "summarising content already in the transcript), answer as " + "plain text.", + "6. Never hallucinate file contents or command output — if " + "you say \"Reading the file…\" you MUST actually run the " + "read_file (built-in) or emit a if it's a " + "Hermes-specific tool.", + "7. The Hermes UI already shows file edits to the user as a " + "colored +/- diff right next to each ``edit`` / ``write`` " + "tool call (and tool calls + diffs are streamed live). Do " + "NOT re-dump the before/after content or paste the diff " + "again in your final response — just confirm what was " + "changed at a high level (e.g. \"updated foo.py to fix the " + "off-by-one\"). Same for shell output: it's already visible.", + ]) + else: + sections.append( + "Hermes auxiliary call. Answer the user message below directly " + "and concisely; do not run any tools, do not write files, do " + "not ask follow-up questions. Plain-text reply only." + ) + if model: + sections.append(f"Hermes requested model hint: {model}") + + if isinstance(tools, list) and tools: + tool_specs: list[dict[str, Any]] = [] + for t in tools: + if not isinstance(t, dict): + continue + fn = t.get("function") or {} + if not isinstance(fn, dict): + continue + name = fn.get("name") + if not isinstance(name, str) or not name.strip(): + continue + tool_specs.append( + { + "name": name.strip(), + "description": fn.get("description", ""), + "parameters": fn.get("parameters", {}), + } + ) + if tool_specs: + sections.append( + "Hermes-side tools (OpenAI function schema). Emit " + "{...} blocks to invoke these. " + "For plain shell / file / grep / glob actions prefer your " + "own built-in tools instead (they're faster).\n" + + json.dumps(tool_specs, ensure_ascii=False) + ) + + if tool_choice is not None: + sections.append( + f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}" + ) + + transcript: list[str] = [] + for message in messages: + if not isinstance(message, dict): + continue + role = str(message.get("role") or "unknown").strip().lower() + if role == "tool": + role = "tool" + elif role not in {"system", "user", "assistant"}: + role = "context" + + content = message.get("content") + rendered = _render_message_content(content) + if not rendered: + continue + + label = { + "system": "System", + "user": "User", + "assistant": "Assistant", + "tool": "Tool", + "context": "Context", + }.get(role, role.title()) + transcript.append(f"{label}:\n{rendered}") + + if transcript: + sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript)) + + sections.append("Continue the conversation from the latest user request.") + return "\n\n".join(section.strip() for section in sections if section and section.strip()) + + +__all__ = ["format_messages_as_prompt"] diff --git a/agent/cursor/sdk_backend.py b/agent/cursor/sdk_backend.py new file mode 100644 index 000000000000..caa8921a5272 --- /dev/null +++ b/agent/cursor/sdk_backend.py @@ -0,0 +1,154 @@ +"""cursor-sdk backend for the Cursor provider.""" + +from __future__ import annotations + +import time +from typing import Any + +from agent.cursor.accumulator import CursorTurnAccumulator +from agent.cursor.backend import ensure_cursor_sdk, map_hermes_mode_to_sdk +from agent.cursor.events import TurnResultEvent, run_stream_event_to_events + + +def _finalize_terminal_result(accumulator: CursorTurnAccumulator, result: Any) -> None: + if accumulator.terminal: + return + status = str(getattr(result, "status", "") or "").lower() + is_error = status in {"error", "failed", "cancelled", "canceled"} + accumulator.feed(TurnResultEvent( + is_error=is_error, + result_text=str(getattr(result, "result", "") or ""), + request_id=str(getattr(result, "id", "") or ""), + duration_ms=int(getattr(result, "duration_ms", 0) or 0), + usage={}, + error_message=str(getattr(result, "result", "") or "") if is_error else "", + )) + + +class SdkSession: + """Reused SDK bridge client scoped to one Hermes chat session.""" + + def __init__(self) -> None: + self._client: Any = None + self._workspace: str | None = None + + def get_client(self, *, workspace: str, api_key: str) -> Any: + ensure_cursor_sdk(prompt=False) + from cursor_sdk import CursorClient + + if self._client is not None and self._workspace == workspace: + return self._client + self.close() + self._client = CursorClient.launch_bridge( + workspace=workspace, + allow_api_key_env_fallback=False, + ) + self._workspace = workspace + return self._client + + def close(self) -> None: + client = self._client + self._client = None + self._workspace = None + if client is None: + return + try: + client.close() + except Exception: + pass + + +def run_prompt_via_sdk( + *, + prompt_text: str, + model: str, + api_key: str, + workspace: str, + mode: str, + timeout_seconds: float, + on_tool_event: Any, + on_text_event: Any, + sdk_session: SdkSession, +) -> CursorTurnAccumulator: + """Execute one Hermes turn via cursor-sdk; return a populated accumulator.""" + ensure_cursor_sdk(prompt=False) + from cursor_sdk import Agent, AgentOptions, LocalAgentOptions + from cursor_sdk.errors import CursorAgentError, IntegrationNotConnectedError + + sdk_mode = map_hermes_mode_to_sdk(mode) + client = sdk_session.get_client(workspace=workspace, api_key=api_key) + options = AgentOptions( + model=model, + api_key=api_key, + mode=sdk_mode, + local=LocalAgentOptions(cwd=workspace), + ) + accumulator = CursorTurnAccumulator( + on_tool_event=on_tool_event, + on_text_event=on_text_event, + ) + idle_seconds = float(timeout_seconds) + deadline = time.monotonic() + idle_seconds + + agent = Agent.create(options, client=client) + try: + run = agent.send(prompt_text) + for event in run.events(): + deadline = time.monotonic() + idle_seconds + for typed in run_stream_event_to_events(event): + accumulator.feed(typed) + if accumulator.terminal: + break + if accumulator.terminal: + break + if time.monotonic() >= deadline: + if run.supports("cancel"): + run.cancel() + raise TimeoutError( + f"cursor-sdk emitted no events for {idle_seconds:.0f}s; " + f"presumed hung. Set HERMES_CURSOR_TIMEOUT_SECONDS to " + f"increase the idle threshold." + ) + + result = run.wait() + if str(getattr(result, "status", "") or "").lower() == "error": + accumulator.feed(TurnResultEvent( + is_error=True, + result_text=str(getattr(result, "result", "") or "cursor-sdk run failed"), + request_id=str(getattr(result, "id", "") or ""), + duration_ms=int(getattr(result, "duration_ms", 0) or 0), + usage={}, + error_message=str(getattr(result, "result", "") or "cursor-sdk run failed"), + )) + elif not accumulator.terminal: + _finalize_terminal_result(accumulator, result) + + if accumulator.is_error: + raise RuntimeError( + f"cursor-sdk reported an error: {accumulator.error_message or result.result}" + ) + return accumulator + except IntegrationNotConnectedError as exc: + raise RuntimeError( + "cursor-sdk access is not enabled for this account " + "(sdk_python_preview_access). Set HERMES_CURSOR_BACKEND=cli or " + "generate a User API Key once SDK access is granted." + ) from exc + except CursorAgentError: + raise + finally: + try: + agent.close() + except Exception: + pass + + +# Backward-compat alias. +_SdkSession = SdkSession + + +__all__ = [ + "SdkSession", + "_SdkSession", + "run_prompt_via_sdk", +] diff --git a/agent/cursor/streaming.py b/agent/cursor/streaming.py new file mode 100644 index 000000000000..d0e447b2a55c --- /dev/null +++ b/agent/cursor/streaming.py @@ -0,0 +1,159 @@ +"""OpenAI-style streaming shims for the Cursor provider facade.""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any, TYPE_CHECKING + +if TYPE_CHECKING: + from agent.cursor.client import CursorAgentClient + + +class CursorChatCompletions: + def __init__(self, client: "CursorAgentClient"): + self._client = client + + def create(self, **kwargs: Any) -> Any: + stream_requested = bool(kwargs.pop("stream", False)) + kwargs.pop("stream_options", None) + response = self._client._create_chat_completion(**kwargs) + if not stream_requested: + return response + return synthesise_stream_chunks(response) + + +class CursorChatNamespace: + def __init__(self, client: "CursorAgentClient"): + self.completions = CursorChatCompletions(client) + + +def synthesise_stream_chunks(response: Any): + """Yield OpenAI-style streaming chunks from a non-streaming response.""" + try: + choice = response.choices[0] + except Exception: + return + + message = getattr(choice, "message", None) + if message is None: + return + + role = "assistant" + content = getattr(message, "content", "") or "" + tool_calls = getattr(message, "tool_calls", None) or [] + reasoning = getattr(message, "reasoning", None) + reasoning_content = getattr(message, "reasoning_content", None) + finish_reason = getattr(choice, "finish_reason", "stop") + model = getattr(response, "model", "cursor") + usage = getattr(response, "usage", None) + + if reasoning_content: + yield SimpleNamespace( + choices=[ + SimpleNamespace( + delta=SimpleNamespace( + role=role, + content=None, + tool_calls=None, + reasoning=None, + reasoning_content=reasoning_content, + ), + finish_reason=None, + index=0, + ) + ], + model=model, + usage=None, + ) + elif reasoning: + yield SimpleNamespace( + choices=[ + SimpleNamespace( + delta=SimpleNamespace( + role=role, + content=None, + tool_calls=None, + reasoning=reasoning, + reasoning_content=None, + ), + finish_reason=None, + index=0, + ) + ], + model=model, + usage=None, + ) + + if content: + yield SimpleNamespace( + choices=[ + SimpleNamespace( + delta=SimpleNamespace( + role=role, + content=content, + tool_calls=None, + reasoning=None, + reasoning_content=None, + ), + finish_reason=None, + index=0, + ) + ], + model=model, + usage=None, + ) + + if tool_calls: + for i, tc in enumerate(tool_calls): + yield SimpleNamespace( + choices=[ + SimpleNamespace( + delta=SimpleNamespace( + role=role, + content=None, + tool_calls=[ + SimpleNamespace( + index=i, + id=getattr(tc, "id", f"call_{i}"), + type="function", + function=SimpleNamespace( + name=getattr(tc.function, "name", ""), + arguments=getattr(tc.function, "arguments", ""), + ), + ) + ], + reasoning=None, + reasoning_content=None, + ), + finish_reason=None, + index=0, + ) + ], + model=model, + usage=None, + ) + + yield SimpleNamespace( + choices=[ + SimpleNamespace( + delta=SimpleNamespace( + role=None, + content=None, + tool_calls=None, + reasoning=None, + reasoning_content=None, + ), + finish_reason=finish_reason, + index=0, + ) + ], + model=model, + usage=usage, + ) + + +__all__ = [ + "CursorChatCompletions", + "CursorChatNamespace", + "synthesise_stream_chunks", +] diff --git a/agent/cursor/tool_events.py b/agent/cursor/tool_events.py new file mode 100644 index 000000000000..86ca8a5594dd --- /dev/null +++ b/agent/cursor/tool_events.py @@ -0,0 +1,163 @@ +"""Cursor internal tool event types and helpers.""" + +from __future__ import annotations + +import json +import time +from typing import Any + + +def build_cursor_tool_preview(evt: "CursorToolEvent") -> str: + """Compact one-line description of a cursor tool call for the UI.""" + args = evt.args or {} + try: + if evt.envelope_key == "shellToolCall": + cmd = args.get("command") or args.get("cmd") + if isinstance(cmd, list): + cmd = " ".join(str(part) for part in cmd) + if isinstance(cmd, str) and cmd.strip(): + return cmd.strip()[:200] + if evt.envelope_key in ( + "readToolCall", + "editToolCall", + "writeToolCall", + "patchToolCall", + "deleteToolCall", + ): + path = ( + args.get("path") + or args.get("file") + or args.get("filePath") + or args.get("filename") + or args.get("target_file") + or args.get("targetFile") + or args.get("file_path") + or args.get("relative_workspace_path") + or "" + ) + if isinstance(path, str) and path.strip(): + return path.strip()[:200] + if evt.envelope_key == "globToolCall": + pat = args.get("globPattern") or args.get("pattern") or "" + target = args.get("targetDirectory") or args.get("path") or "" + label = " in ".join(p for p in (pat, target) if isinstance(p, str) and p.strip()) + if label: + return label[:200] + if evt.envelope_key in ("grepToolCall", "searchToolCall"): + pat = args.get("pattern") or args.get("query") or args.get("regex") or "" + target = args.get("path") or args.get("targetDirectory") or "" + if isinstance(pat, str) and pat.strip(): + if isinstance(target, str) and target.strip(): + return f"{pat} in {target}"[:200] + return pat.strip()[:200] + if isinstance(target, str) and target.strip(): + return target.strip()[:200] + if evt.envelope_key == "listToolCall": + path = args.get("path") or args.get("directory") or args.get("targetDirectory") or "" + if isinstance(path, str) and path.strip(): + return path.strip()[:200] + return json.dumps(args, ensure_ascii=False)[:200] + except Exception: + return "" + + +def normalize_cursor_tool_name(envelope_key: str) -> str: + """Map cursor's ``ToolCall`` keys to Hermes tool names.""" + if not isinstance(envelope_key, str): + return "cursor_tool" + suffix = "ToolCall" + base = envelope_key[: -len(suffix)] if envelope_key.endswith(suffix) else envelope_key + if not base: + return "cursor_tool" + return { + "shell": "shell", + "read": "read_file", + "list": "list_directory", + "edit": "edit_file", + "write": "write_file", + "patch": "patch", + "grep": "grep", + "glob": "glob", + "search": "search", + "todo": "todo", + "delete": "delete_file", + "task": "task", + "fetch": "web_fetch", + }.get(base.lower(), base) + + +def summarise_cursor_tool_result(envelope_key: str, payload: dict[str, Any]) -> str: + """Return a compact human-readable result string for the UI / log.""" + result = payload.get("result") + if not isinstance(result, dict): + return "" + success = result.get("success") + if not isinstance(success, dict): + if "error" in result and isinstance(result["error"], (str, dict)): + return f"error: {result['error']}"[:400] + return "" + try: + if envelope_key == "shellToolCall": + stdout = success.get("stdout") or "" + return stdout if isinstance(stdout, str) else json.dumps(stdout) + if envelope_key == "readToolCall": + content = success.get("content") or "" + total = success.get("totalLines") + if total is not None: + return f"({total} lines)\n{content}" if content else f"({total} lines)" + return content if isinstance(content, str) else json.dumps(content) + if envelope_key in ("listToolCall", "globToolCall"): + files = success.get("files") or success.get("entries") or [] + if isinstance(files, list): + return "\n".join(str(f) for f in files[:200]) + return json.dumps(success, ensure_ascii=False)[:1000] + except Exception: + return "" + + +class CursorToolEvent: + """A captured cursor-agent tool invocation (started + completed states).""" + + __slots__ = ( + "call_id", "envelope_key", "name", "args", "started_at", + "completed_at", "result_text", "is_error", "duration_ms", + "lines_added", "lines_removed", "diff_string", + ) + + def __init__(self, call_id: str, envelope_key: str, args: dict[str, Any]) -> None: + self.call_id = call_id + self.envelope_key = envelope_key + self.name = normalize_cursor_tool_name(envelope_key) + self.args = args + self.started_at = time.monotonic() + self.completed_at: float | None = None + self.result_text: str = "" + self.is_error: bool = False + self.duration_ms: int = 0 + self.lines_added: int | None = None + self.lines_removed: int | None = None + self.diff_string: str = "" + + def to_public_dict(self) -> dict[str, Any]: + return { + "id": self.call_id, + "name": self.name, + "envelope": self.envelope_key, + "arguments": self.args, + "result": self.result_text, + "is_error": self.is_error, + "duration_ms": self.duration_ms, + } + + +# Backward-compat alias for tests and internal imports. +_CursorToolEvent = CursorToolEvent + + +__all__ = [ + "CursorToolEvent", + "_CursorToolEvent", + "build_cursor_tool_preview", + "normalize_cursor_tool_name", + "summarise_cursor_tool_result", +] diff --git a/agent/cursor_agent_client.py b/agent/cursor_agent_client.py index 7a3926ac7a3c..218740c5d5d0 100644 --- a/agent/cursor_agent_client.py +++ b/agent/cursor_agent_client.py @@ -1,1748 +1,31 @@ -"""OpenAI-compatible facade that forwards Hermes requests to ``cursor-agent``. +"""OpenAI-compatible facade that forwards Hermes requests to Cursor (CLI or SDK). -This adapter lets Hermes treat the Cursor Agent CLI as a chat-style backend so -every Cursor user (Hobby/Pro/Pro+/Ultra/Teams) can route Hermes calls through -their existing Cursor subscription / API credits. - -Per request we spawn ``cursor-agent -p`` with ``--output-format stream-json``, -pass the formatted conversation as the prompt (via stdin to avoid the argv -length limit), then parse the line-delimited JSON events into a single OpenAI -chat-completion response. - -Design notes: - -- One subprocess per request (no shared long-running session). Warm sessions - via ``--resume`` are an opt-in path documented below. -- Default ``--mode ask`` keeps Cursor read-only — useful when we just want the - model as an LLM rather than letting it edit files. -- Default workspace is an ephemeral temp dir so the agent never sees the - caller's repo. Override via ``HERMES_CURSOR_WORKSPACE`` or the ``workspace`` - ctor arg. -- Tool calls follow the Copilot-ACP convention: tools are described in the - system prompt and the model emits ``{...}`` blocks - that we lift back into OpenAI ``tool_calls``. -- The CLI auth (``cursor-agent login`` or ``CURSOR_API_KEY``) is what governs - identity; we forward ``CURSOR_API_KEY`` to the subprocess and let the CLI - resolve it (same as the IDE does). +This module re-exports the :mod:`agent.cursor` package for backward compatibility. +New code should import from ``agent.cursor`` directly. """ from __future__ import annotations -import json -import os -import queue -import shlex -import shutil -import subprocess -import tempfile -import threading -import time -from collections import deque -from pathlib import Path -from types import SimpleNamespace -from typing import Any - -from agent.redact import redact_sensitive_text - -CURSOR_MARKER_BASE_URL = "cursor://agent" -DEFAULT_CURSOR_COMMAND = "cursor-agent" -# ``agent`` matches cursor-agent's own default permissionMode (the -# behavior you get from ``cursor-agent -p`` with no ``--mode`` flag). -# This is what a user picking ``cursor`` in ``hermes model`` will expect: -# the same write/edit/shell power they'd have from the cursor CLI directly. -# Users who want read-only behavior set ``HERMES_CURSOR_MODE=ask`` (or -# ``plan``); Hermes' own ``approvals.mode`` config additionally gates any -# tool execution (manual / smart / off) on top of this, identical to every -# other provider. -DEFAULT_CURSOR_MODE = "agent" -DEFAULT_CURSOR_MODEL = "auto" - -# cursor-agent CLI accepts only ``ask`` and ``plan`` for ``--mode`` today, -# but ``-p/--print`` *without* ``--mode`` runs in the full-capability -# ``default`` permissionMode (write+shell+everything). We expose that as -# the synthetic ``agent`` value here: -# - ``ask`` : read-only Q&A. Cursor's built-in mutation tools are -# disabled. Hermes-side tools still apply for any work -# that needs to touch the user's disk. -# - ``plan`` : read-only planning mode. Produces structured plan output -# from Cursor's planner. -# - ``agent``: omits ``--mode`` so Cursor runs in its IDE-equivalent -# "default" permissionMode — built-in shell, write, edit, -# read, etc. all active. Use this when you want Cursor to -# drive multi-step work end-to-end (it will still emit -# tool_call events that we surface to Hermes UI). -# Anything else falls back to ``ask``. Don't add new values without -# re-checking ``cursor-agent --help``; passing an unknown ``--mode`` -# value causes a hard-crash BrokenPipe with a confusing -# "Allowed choices are plan, ask." stderr. -_VALID_CURSOR_MODES = frozenset({"ask", "plan", "agent"}) -_CURSOR_CLI_MODES = frozenset({"ask", "plan"}) -# Idle threshold (not wall-clock): the deadline resets on every stream-json -# event from cursor-agent. A turn can legitimately run for much longer than -# this in total wall-clock; what matters is that events keep arriving. If -# nothing arrives for this long, the subprocess is presumed hung and is -# force-killed with a clear TimeoutError. Override via -# ``HERMES_CURSOR_TIMEOUT_SECONDS`` env var. Default is 30 minutes; cursor- -# agent's own internal shell ceiling is 10 minutes so a single shell call -# can chew up that much idle time, and chained internal operations (deep -# greps, large reads after a long shell) routinely push past 15 minutes -# without emitting events. 30 min gives comfortable headroom while still -# catching genuine hangs. -_DEFAULT_TIMEOUT_SECONDS = 1800.0 - -# Sentinels that mean "no real api key — use the cursor-agent CLI's own login -# session". Hermes's external_process auth path injects these as placeholders; -# forwarding them to ``cursor-agent --api-key`` makes the CLI reject the -# request and close stdin, manifesting upstream as ``BrokenPipeError``. -_API_KEY_SENTINELS = frozenset({ - "", - "cursor-agent-login", - "cursor-cli-login", - "external-process", - "external_process", -}) - -# Reuse the tool-call extraction grammar from copilot_acp_client. We do NOT -# reuse its prompt builder — cursor's model is itself an agentic CLI with its -# own built-in shell/edit/read tools, and the softer ACP wording ("ACP agent -# backend, use ACP capabilities") makes it prefer those built-ins and run the -# work internally, leaving Hermes' tool surface unused. Cursor needs an -# explicit "you are JUST the LLM, do not execute anything yourself" directive -# (see ``_format_messages_as_prompt`` below). -import json as _json # noqa: E402 - -from agent.copilot_acp_client import ( # noqa: E402 - _extract_tool_calls_from_text, - _render_message_content, +from agent.cursor.accumulator import CursorTurnAccumulator, _StreamJsonAccumulator +from agent.cursor.backend import cursor_sdk_installed, resolve_cursor_backend +from agent.cursor.client import CursorAgentClient +from agent.cursor.constants import ( + CURSOR_MARKER_BASE_URL, + DEFAULT_CURSOR_COMMAND, + DEFAULT_CURSOR_MODEL, + DEFAULT_CURSOR_MODE, +) +from agent.cursor.env import ( + build_subprocess_env as _build_subprocess_env, + resolve_command as _resolve_command, +) +from agent.cursor.prompt import format_messages_as_prompt as _format_messages_as_prompt +from agent.cursor.sdk_backend import SdkSession as _SdkSession, run_prompt_via_sdk +from agent.cursor.tool_events import ( + CursorToolEvent as _CursorToolEvent, + build_cursor_tool_preview as _build_cursor_tool_preview, + normalize_cursor_tool_name as _normalize_cursor_tool_name, ) - - -def _format_messages_as_prompt( - messages: list[dict[str, Any]], - model: str | None = None, - tools: list[dict[str, Any]] | None = None, - tool_choice: Any = None, -) -> str: - """Build the prompt sent to ``cursor-agent`` stdin. - - Key differences vs. the copilot-acp formatter: - - * Hard "you are the LLM, NOT an agent" framing — without this, cursor's - built-in shell/edit/read tools intercept the request and the agentic - loop runs entirely inside ``cursor-agent``, so Hermes never sees a - ``tool_calls`` response (the symptom: chat sessions show 0 tool - calls even though tools are advertised). - * Explicit "do NOT run ls/cat/edit yourself" line — empirically required - to push cursor's model past its default "I'll just do it" reflex. - * Tool-call grammar identical to copilot-acp so the ``{...} - `` extractor we share keeps working. - """ - # Auxiliary-style calls (title generation, compression, vision, - # mcp router, etc.) come in with NO ``tools`` and just a system+user - # pair. They want a short, direct response — slapping the full - # "you are an agent backend, emit tool_call blocks" preamble on top - # of them makes cursor's harness reply with a verbose multi-paragraph - # answer or even crash on the formatting constraints. So we keep - # the heavy preamble only for the agentic chat path (tools provided). - sections: list[str] = [] - has_tools = bool(tools) - if has_tools: - sections.extend([ - "You are powering a chat session inside Hermes Agent.", - "You have TWO sets of tools available:", - "(A) Your own built-in cursor-agent tools (shell, read_file, " - "edit_file, write_file, list_directory, grep, glob, web_fetch). " - "Use these DIRECTLY for filesystem/shell/search work — they run " - "on the real workspace, are fast, and Hermes will surface their " - "results to the user automatically.", - "(B) Hermes-side tools listed in the schema below. They cover " - "capabilities your built-in tools do NOT have (skills, MCP " - "servers, browser automation, remote APIs, etc.). To invoke " - "one of THESE, emit a " - "{...} block in OpenAI function-call " - "shape: " - '{"id":"call_","type":"function",' - '"function":{"name":"","arguments":""}}. ' - "``arguments`` MUST be a JSON STRING (escaped), not a nested " - "object.", - "RULES:", - "1. Prefer your built-in tools for any shell command, file " - "read/write/list/edit, grep, or glob operation — they're " - "faster than round-tripping through Hermes. CRITICAL for " - "file creation/modification: ALWAYS use the ``write`` or " - "``edit`` built-in tools, NEVER ``shell`` with ``echo > " - "file`` / ``cat > file`` / ``sed -i`` / ``>>``. Only the " - "write/edit tools report ``linesAdded`` / ``linesRemoved`` " - "/ ``diffString`` to the harness, which is what Hermes " - "renders as the colored ``+``/``-`` diff in the UI. Shell " - "redirections create the file but the user sees no diff " - "and has no idea what changed.", - "2. Only emit blocks for tools listed in the " - "schema below; do NOT invent tool names. Multiple tool_calls " - "per turn are allowed.", - "3. Work iteratively (ReAct-style): before each tool batch, " - "emit ONE short line of plain text saying what you're about " - "to check and why. After tool results come back, briefly " - "reflect on what you found before deciding the next step. " - "Hermes surfaces these intermediate lines to the user as " - "live narration so they can follow your reasoning.", - "4. Don't dump every tool call upfront — chain them: think, " - "tool, reflect, tool, reflect, ... then synthesise the final " - "answer at the end. If the task genuinely is independent " - "lookups, parallel tool calls in one batch are fine.", - "5. If no tool is needed (pure conversation, math, " - "summarising content already in the transcript), answer as " - "plain text.", - "6. Never hallucinate file contents or command output — if " - "you say \"Reading the file…\" you MUST actually run the " - "read_file (built-in) or emit a if it's a " - "Hermes-specific tool.", - "7. The Hermes UI already shows file edits to the user as a " - "colored +/- diff right next to each ``edit`` / ``write`` " - "tool call (and tool calls + diffs are streamed live). Do " - "NOT re-dump the before/after content or paste the diff " - "again in your final response — just confirm what was " - "changed at a high level (e.g. \"updated foo.py to fix the " - "off-by-one\"). Same for shell output: it's already visible.", - ]) - else: - # Lite preamble for aux calls — just enough to keep cursor's - # harness from running its own tools / writing files / asking - # clarifying questions when all we want is a single short reply. - sections.append( - "Hermes auxiliary call. Answer the user message below directly " - "and concisely; do not run any tools, do not write files, do " - "not ask follow-up questions. Plain-text reply only." - ) - if model: - sections.append(f"Hermes requested model hint: {model}") - - if isinstance(tools, list) and tools: - tool_specs: list[dict[str, Any]] = [] - for t in tools: - if not isinstance(t, dict): - continue - fn = t.get("function") or {} - if not isinstance(fn, dict): - continue - name = fn.get("name") - if not isinstance(name, str) or not name.strip(): - continue - tool_specs.append( - { - "name": name.strip(), - "description": fn.get("description", ""), - "parameters": fn.get("parameters", {}), - } - ) - if tool_specs: - sections.append( - "Hermes-side tools (OpenAI function schema). Emit " - "{...} blocks to invoke these. " - "For plain shell / file / grep / glob actions prefer your " - "own built-in tools instead (they're faster).\n" - + _json.dumps(tool_specs, ensure_ascii=False) - ) - - if tool_choice is not None: - sections.append( - f"Tool choice hint: {_json.dumps(tool_choice, ensure_ascii=False)}" - ) - - transcript: list[str] = [] - for message in messages: - if not isinstance(message, dict): - continue - role = str(message.get("role") or "unknown").strip().lower() - if role == "tool": - role = "tool" - elif role not in {"system", "user", "assistant"}: - role = "context" - - content = message.get("content") - rendered = _render_message_content(content) - if not rendered: - continue - - label = { - "system": "System", - "user": "User", - "assistant": "Assistant", - "tool": "Tool", - "context": "Context", - }.get(role, role.title()) - transcript.append(f"{label}:\n{rendered}") - - if transcript: - sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript)) - - sections.append("Continue the conversation from the latest user request.") - return "\n\n".join(section.strip() for section in sections if section and section.strip()) - - -# --------------------------------------------------------------------------- -# Environment & path helpers -# --------------------------------------------------------------------------- - - -def _resolve_command() -> str: - return ( - os.getenv("HERMES_CURSOR_COMMAND", "").strip() - or os.getenv("CURSOR_AGENT_PATH", "").strip() - or DEFAULT_CURSOR_COMMAND - ) - - -def _resolve_extra_args() -> list[str]: - raw = os.getenv("HERMES_CURSOR_ARGS", "").strip() - if not raw: - return [] - return shlex.split(raw) - - -def _resolve_mode() -> str: - mode = os.getenv("HERMES_CURSOR_MODE", "").strip().lower() or DEFAULT_CURSOR_MODE - if mode not in _VALID_CURSOR_MODES: - mode = DEFAULT_CURSOR_MODE - return mode - - -def _resolve_workspace_override() -> str: - return os.getenv("HERMES_CURSOR_WORKSPACE", "").strip() - - -def _resolve_home_dir() -> str: - """Pick a stable HOME for the child process. - - Mirrors ``agent/copilot_acp_client.py:_resolve_home_dir`` so subprocess - behaviour stays predictable across providers. - """ - try: - from hermes_constants import get_subprocess_home - - profile_home = get_subprocess_home() - if profile_home: - return profile_home - except Exception: - pass - - home = os.environ.get("HOME", "").strip() - if home: - return home - - expanded = os.path.expanduser("~") - if expanded and expanded != "~": - return expanded - - # POSIX-only last resort: read the home dir from the password - # database. ``os.getuid`` does not exist on Windows; gate explicitly - # so the import-time footgun checker stays clean. (Windows already - # falls through to the ``USERPROFILE`` / ``HOMEDRIVE+HOMEPATH`` - # branches above; if those failed there is no equivalent password - # database here, so just bail to ``/tmp``.) - if hasattr(os, "getuid"): - try: - import pwd - - resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() # windows-footgun: ok - if resolved: - return resolved - except Exception: - pass - - return "/tmp" - - -def _build_subprocess_env(api_key: str | None) -> dict[str, str]: - env = os.environ.copy() - env["HOME"] = _resolve_home_dir() - if api_key: - env["CURSOR_API_KEY"] = api_key - env.setdefault("NO_COLOR", "1") - env.setdefault("TERM", "dumb") - return env - - -# --------------------------------------------------------------------------- -# Stream-json parser -# --------------------------------------------------------------------------- - - -def _build_cursor_tool_preview(evt: "_CursorToolEvent") -> str: - """Compact one-line description of a cursor tool call for the UI. - - Mirrors the spirit of ``_build_tool_preview`` in ``tool_executor.py`` — - a single short string the spinner / activity feed can show next to - the tool name. Tool-specific extractors fall back to a JSON dump of - arguments when we don't have a hand-written formatter. - """ - args = evt.args or {} - try: - if evt.envelope_key == "shellToolCall": - cmd = args.get("command") or args.get("cmd") - if isinstance(cmd, list): - cmd = " ".join(str(part) for part in cmd) - if isinstance(cmd, str) and cmd.strip(): - return cmd.strip()[:200] - if evt.envelope_key in ( - "readToolCall", - "editToolCall", - "writeToolCall", - "patchToolCall", - "deleteToolCall", - ): - # Cursor's wire format isn't fully consistent across tool - # kinds; ``editToolCall.args`` has been seen using - # ``target_file`` / ``targetFile`` / ``file_path`` while - # other tools use ``path``. Try them all so the activity - # feed always shows what was touched. - path = ( - args.get("path") - or args.get("file") - or args.get("filePath") - or args.get("filename") - or args.get("target_file") - or args.get("targetFile") - or args.get("file_path") - or args.get("relative_workspace_path") - or "" - ) - if isinstance(path, str) and path.strip(): - return path.strip()[:200] - if evt.envelope_key == "globToolCall": - pat = args.get("globPattern") or args.get("pattern") or "" - target = args.get("targetDirectory") or args.get("path") or "" - label = " in ".join(p for p in (pat, target) if isinstance(p, str) and p.strip()) - if label: - return label[:200] - if evt.envelope_key in ("grepToolCall", "searchToolCall"): - pat = args.get("pattern") or args.get("query") or args.get("regex") or "" - target = args.get("path") or args.get("targetDirectory") or "" - if isinstance(pat, str) and pat.strip(): - if isinstance(target, str) and target.strip(): - return f"{pat} in {target}"[:200] - return pat.strip()[:200] - if isinstance(target, str) and target.strip(): - return target.strip()[:200] - if evt.envelope_key == "listToolCall": - path = args.get("path") or args.get("directory") or args.get("targetDirectory") or "" - if isinstance(path, str) and path.strip(): - return path.strip()[:200] - return json.dumps(args, ensure_ascii=False)[:200] - except Exception: - return "" - - -def _normalize_cursor_tool_name(envelope_key: str) -> str: - """Map cursor's wire-format ``ToolCall`` keys to Hermes tool names. - - cursor-agent's stream-json wraps every internal tool call as - ``"ToolCall"`` (e.g. ``shellToolCall``, ``readToolCall``). We - translate the kind so the activity surfaces in Hermes' UI with names - the user already recognises from other providers. - """ - if not isinstance(envelope_key, str): - return "cursor_tool" - suffix = "ToolCall" - base = envelope_key[: -len(suffix)] if envelope_key.endswith(suffix) else envelope_key - if not base: - return "cursor_tool" - return { - "shell": "shell", - "read": "read_file", - "list": "list_directory", - "edit": "edit_file", - "write": "write_file", - "patch": "patch", - "grep": "grep", - "glob": "glob", - "search": "search", - "todo": "todo", - "delete": "delete_file", - "task": "task", - "fetch": "web_fetch", - }.get(base.lower(), base) - - -def _summarise_cursor_tool_result(envelope_key: str, payload: dict[str, Any]) -> str: - """Return a compact human-readable result string for the UI / log. - - Falls back to a generic JSON dump when we don't have a hand-written - extractor for the tool kind. Best-effort — never raises. - """ - result = payload.get("result") - if not isinstance(result, dict): - return "" - success = result.get("success") - if not isinstance(success, dict): - if "error" in result and isinstance(result["error"], (str, dict)): - return f"error: {result['error']}"[:400] - return "" - try: - if envelope_key == "shellToolCall": - stdout = success.get("stdout") or "" - return stdout if isinstance(stdout, str) else json.dumps(stdout) - if envelope_key == "readToolCall": - content = success.get("content") or "" - total = success.get("totalLines") - if total is not None: - return f"({total} lines)\n{content}" if content else f"({total} lines)" - return content if isinstance(content, str) else json.dumps(content) - if envelope_key in ("listToolCall", "globToolCall"): - files = success.get("files") or success.get("entries") or [] - if isinstance(files, list): - return "\n".join(str(f) for f in files[:200]) - return json.dumps(success, ensure_ascii=False)[:1000] - except Exception: - return "" - - -class _CursorToolEvent: - """A captured cursor-agent tool invocation (started + completed states). - - Used both for live progress callbacks (Hermes' ``tool_progress_callback`` - surface) and for the post-hoc audit list returned alongside the - response so sessions can persist what cursor did. - """ - - __slots__ = ( - "call_id", "envelope_key", "name", "args", "started_at", - "completed_at", "result_text", "is_error", "duration_ms", - "lines_added", "lines_removed", "diff_string", - ) - - def __init__(self, call_id: str, envelope_key: str, args: dict[str, Any]) -> None: - self.call_id = call_id - self.envelope_key = envelope_key - self.name = _normalize_cursor_tool_name(envelope_key) - self.args = args - self.started_at = time.monotonic() - self.completed_at: float | None = None - self.result_text: str = "" - self.is_error: bool = False - self.duration_ms: int = 0 - # Edit/write result metadata. Cursor's stream-json provides - # ``linesAdded`` / ``linesRemoved`` / ``diffString`` on the - # completion event for edit and write operations. We surface - # the count in the activity feed ("+5 -2") and persist the - # diff for replays / audits. - self.lines_added: int | None = None - self.lines_removed: int | None = None - self.diff_string: str = "" - - def to_public_dict(self) -> dict[str, Any]: - return { - "id": self.call_id, - "name": self.name, - "envelope": self.envelope_key, - "arguments": self.args, - "result": self.result_text, - "is_error": self.is_error, - "duration_ms": self.duration_ms, - } - - -class _StreamJsonAccumulator: - """Accumulates state from a ``cursor-agent --output-format stream-json`` stream. - - Caller feeds parsed JSON events with :meth:`feed`. When a terminal - ``result`` event arrives the accumulator stores the success/failure state - and surface text. The instance is reusable per-call but not thread-safe. - """ - - def __init__(self, on_tool_event: Any = None, on_text_event: Any = None) -> None: - self.text_parts: list[str] = [] - self.reasoning_parts: list[str] = [] - self.session_id: str = "" - self.request_id: str = "" - self.model_label: str = "" - self.duration_ms: int = 0 - self.usage: dict[str, int] = {} - self.terminal: bool = False - self.is_error: bool = False - self.error_message: str = "" - self.final_result_text: str = "" - # Ordered transcript of (kind, payload) events as cursor emitted - # them — used to separate "narrative text between tools" from - # "final synthesis text" when we assemble the response. Without - # this, ``assembled_text()`` glues every intermediate text event - # to the end of the final answer and the user sees a wall of - # planning prose preceding the actual response. - self.event_log: list[tuple[str, Any]] = [] - # ``on_tool_event(stage, event)`` — invoked synchronously from - # ``feed()`` when a ``tool_call`` event arrives. - self._on_tool_event = on_tool_event - # ``on_text_event(text)`` — invoked when cursor emits an - # intermediate ``assistant`` text block (cursor often prints a - # 1-2 sentence "let me check X next" between tool batches). - # Surfacing these live as narration events gives the Hermes UI - # the agentic feel of "tool → text → tool → text" that the user - # asked about; without it everything bundles into one final - # answer block. - self._on_text_event = on_text_event - self._tool_events: dict[str, _CursorToolEvent] = {} - self.tool_events: list[_CursorToolEvent] = [] - # Optional caller-provided estimate of "current prompt size" in - # tokens, used to surface a stable number on the Hermes status - # bar. Set by ``_create_chat_completion`` before each call. - self.messages_estimate: int = 0 - # We BUFFER intermediate text instead of dispatching it eagerly - # so the final text-after-last-tool only appears in the - # synthesis (assistant response) and not duplicated as a narrate - # event in the activity feed. Flush rule: when the next tool - # starts we now know the buffered text was "between tools" and - # safe to surface. If no more tools come, the buffer is dropped - # and only ``synthesis_text()`` shows it. - self._pending_text: list[str] = [] - - def feed(self, event: dict[str, Any]) -> None: - evt_type = event.get("type") - if not isinstance(evt_type, str): - return - - if evt_type == "system": - model = event.get("model") - if isinstance(model, str): - self.model_label = model - session = event.get("session_id") - if isinstance(session, str): - self.session_id = session - return - - if evt_type == "thinking": - text = event.get("text") - if isinstance(text, str) and text: - self.reasoning_parts.append(text) - return - - if evt_type == "assistant": - message = event.get("message") - if isinstance(message, dict): - content = message.get("content") - if isinstance(content, list): - for block in content: - if not isinstance(block, dict): - continue - if block.get("type") == "text": - text = block.get("text") - if isinstance(text, str) and text: - self.text_parts.append(text) - self.event_log.append(("text", text)) - # Defer the narrate dispatch — see - # ``_pending_text`` docstring above. - self._pending_text.append(text) - return - - if evt_type == "tool_call": - sub = event.get("subtype") - # Before recording a NEW tool start, flush any text we'd - # buffered: by definition that text was "between tools", - # so it's safe (and useful) to show as narration now. - if sub == "started" and self._pending_text: - for buffered in self._pending_text: - self._dispatch_text_event(buffered) - self._pending_text.clear() - self._consume_tool_call_event(event) - # Note the tool event order so ``synthesis_text`` can pick - # the right "final" text. We append only on ``started`` to - # avoid double-counting; the per-tool event timeline is - # already preserved in ``self.tool_events``. - if sub == "started": - self.event_log.append(("tool", None)) - return - - if evt_type == "result": - self.terminal = True - self.is_error = bool(event.get("is_error", False)) - subtype = event.get("subtype") - if subtype == "error": - self.is_error = True - duration = event.get("duration_ms") - if isinstance(duration, int): - self.duration_ms = duration - request = event.get("request_id") - if isinstance(request, str): - self.request_id = request - usage = event.get("usage") - if isinstance(usage, dict): - # Cursor emits camelCase keys. - normalized = {} - for k, v in usage.items(): - if isinstance(v, (int, float)): - normalized[str(k)] = int(v) - self.usage = normalized - result_text = event.get("result") - if isinstance(result_text, str): - self.final_result_text = result_text - if not self.text_parts and not self.is_error: - self.text_parts.append(result_text) - if self.is_error and not self.error_message: - self.error_message = result_text or "cursor-agent returned an error" - return - - # Unknown / informational events (e.g. ``user`` echo) — ignore. - - def _consume_tool_call_event(self, event: dict[str, Any]) -> None: - """Translate one cursor stream-json ``tool_call`` event. - - cursor-agent emits one event with ``subtype="started"`` when the LLM - decides to use one of its built-in tools (shell, read, edit, ...), - and a follow-up with ``subtype="completed"`` carrying the result. - We rebuild a ``_CursorToolEvent`` from those, fire the optional - progress callback so Hermes' UI can show the activity in real time, - and stash the final list so the caller can surface "what cursor - actually did" in the response (e.g. for session audit). - """ - subtype = event.get("subtype") - call_id = event.get("call_id") - if not isinstance(call_id, str) or not call_id: - return - tool_call = event.get("tool_call") - if not isinstance(tool_call, dict) or not tool_call: - return - envelope_key = next(iter(tool_call.keys()), "") - payload = tool_call.get(envelope_key) if isinstance(envelope_key, str) else None - if not isinstance(payload, dict): - return - args_obj = payload.get("args") - if not isinstance(args_obj, dict): - args_obj = {} - - if subtype == "started": - evt = _CursorToolEvent( - call_id=call_id, - envelope_key=envelope_key, - args=args_obj, - ) - self._tool_events[call_id] = evt - self.tool_events.append(evt) - self._fire_tool_event("started", evt) - return - - if subtype == "completed": - evt = self._tool_events.get(call_id) - if evt is None: - # Cursor sent a completed event we never saw started for — - # synthesise the started state so the audit list still has it. - evt = _CursorToolEvent( - call_id=call_id, - envelope_key=envelope_key, - args=args_obj, - ) - self._tool_events[call_id] = evt - self.tool_events.append(evt) - self._fire_tool_event("started", evt) - evt.completed_at = time.monotonic() - evt.duration_ms = int((evt.completed_at - evt.started_at) * 1000) - result = payload.get("result") - if isinstance(result, dict): - if "error" in result and result.get("error"): - evt.is_error = True - # Pull diff stats off edit/write/patch completion events - # so the activity feed can show "+5 -2" next to the - # path. Cursor only emits these for file-modifying tools. - success = result.get("success") if isinstance(result, dict) else None - if isinstance(success, dict): - la = success.get("linesAdded") - lr = success.get("linesRemoved") - ds = success.get("diffString") - if isinstance(la, int): - evt.lines_added = la - if isinstance(lr, int): - evt.lines_removed = lr - if isinstance(ds, str): - evt.diff_string = ds - evt.result_text = _summarise_cursor_tool_result(envelope_key, payload) - self._fire_tool_event("completed", evt) - return - - def _fire_tool_event(self, stage: str, evt: _CursorToolEvent) -> None: - if self._on_tool_event is None: - return - try: - self._on_tool_event(stage, evt) - except Exception: - # A broken UI must never bring down the chat call. - pass - - def _dispatch_text_event(self, text: str) -> None: - """Forward an intermediate assistant text event to the UI bridge. - - Errors are swallowed — a broken callback must never abort the - chat call. - """ - if self._on_text_event is None: - return - try: - self._on_text_event(text) - except Exception: - pass - - def assembled_text(self) -> str: - return "".join(self.text_parts).strip() - - def synthesis_text(self) -> str: - """Return only the synthesis portion of the response. - - Cursor's stream interleaves planning prose ("Searching the - agent directory…") with tool calls, then ends with the actual - synthesised answer. Gluing every text event together leaves - the user staring at a wall of "I'll do X next" lines before - the real answer. This helper returns just the text emitted - AFTER the last tool call — that's the synthesis. - - Falls back to the full ``assembled_text()`` when: - * no tools ran (every text is part of the answer); - * cursor emitted no text after the last tool (rare; we then - use the cursor-supplied ``result.result`` if it differs - from the bundled text, otherwise the full bundle so the - user sees *something*). - """ - tool_seen = False - synth: list[str] = [] - for kind, payload in self.event_log: - if kind == "tool": - tool_seen = True - synth.clear() # drop earlier planning text - elif kind == "text": - synth.append(payload) - if synth: - return "".join(synth).strip() - # No text after the last tool. If cursor's ``result.result`` - # carries something useful and distinct, use it; otherwise - # surface the full bundle so the user isn't left empty-handed. - if not tool_seen: - return self.assembled_text() - if self.final_result_text and self.final_result_text.strip(): - return self.final_result_text.strip() - return self.assembled_text() - - def narration_text(self) -> str: - """Return the planning / between-tool prose for transcript replay. - - The live bridge already surfaces each piece individually via - ``on_text_event``. This helper is for tests / debug consumers - that want to inspect what was intermediate vs. final. - """ - narration: list[str] = [] - bucket: list[str] = [] - for kind, payload in self.event_log: - if kind == "tool": - if bucket: - narration.append("".join(bucket).strip()) - bucket = [] - elif kind == "text": - bucket.append(payload) - # The final ``bucket`` is the synthesis — drop it. - return "\n".join(n for n in narration if n) - - def assembled_reasoning(self) -> str: - return "".join(self.reasoning_parts).strip() - - def openai_usage(self) -> SimpleNamespace: - """Translate cursor-agent's per-turn usage into OpenAI-shaped fields. - - Quirk worth knowing: cursor-agent's ``result.usage.inputTokens`` - is the SUM of fresh (non-cached) input tokens across **every - internal LLM round-trip** in the turn. For an agentic turn that - runs N tool calls there are roughly N+1 internal model calls - (one per tool round plus the final text), and each call's input - grows as tool results accumulate. So inputTokens for a deep - multi-tool turn can easily reach 1M+ while the model's actual - context window (e.g. 200K on composer-2.5-fast) was never - exceeded — cursor reused the cache between calls. - - Hermes' status bar and compressor use ``prompt_tokens`` as a - proxy for "what's currently in the model's context" (used to - drive compression decisions and the % bar). Reporting the raw - cumulative SUM blows the bar past 100% on agentic turns, which - is both visually wrong and triggers spurious compression. - - Fix: divide the cumulative figures by the number of internal - rounds we observed (tool_events + 1) to produce an honest - per-round average that matches the model's actual context use. - The full billing total is still reported separately for cost - tracking via ``session_input_tokens``. - """ - input_tokens_raw = int(self.usage.get("inputTokens", 0)) - output_tokens = int(self.usage.get("outputTokens", 0)) - cache_read_raw = int(self.usage.get("cacheReadTokens", 0)) - - rounds = max(len(self.tool_events) + 1, 1) - per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw - per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw - approx_context_tokens = per_round_cache + per_round_input - - # Hermes' messages-based estimate is the canonical "what's in - # the model's context right now" number (it matches what the - # next-turn prompt will look like). Prefer it for - # ``prompt_tokens`` so the status bar stays consistent before, - # during, and after generation. Fall back to the per-round - # average when no estimate is set (e.g. accumulator used outside - # the client, in unit tests). - if self.messages_estimate > 0: - prompt_tokens = self.messages_estimate - else: - prompt_tokens = approx_context_tokens - - return SimpleNamespace( - prompt_tokens=prompt_tokens, - completion_tokens=output_tokens, - total_tokens=prompt_tokens + output_tokens, - prompt_tokens_details=SimpleNamespace(cached_tokens=per_round_cache), - # Preserve raw cursor-side totals for billing / cost tracking - # consumers that need the actual usage figures. - cursor_raw_input_tokens=input_tokens_raw, - cursor_raw_cache_read_tokens=cache_read_raw, - cursor_internal_rounds=rounds, - cursor_per_round_context=approx_context_tokens, - ) - - -# --------------------------------------------------------------------------- -# Inline OpenAI-style namespace shims (mirror copilot_acp_client style) -# --------------------------------------------------------------------------- - - -class _CursorChatCompletions: - def __init__(self, client: "CursorAgentClient"): - self._client = client - - def create(self, **kwargs: Any) -> Any: - # ``cursor-agent`` exposes streaming via stream-json on its own stdout, - # but the synchronous ``_create_chat_completion`` already accumulates - # the full response. If a caller passes ``stream=True`` we synthesise - # an OpenAI-style chunk iterator from the final response so the - # streaming hot path stays iterable. Without this, iterating the - # ``SimpleNamespace`` we return surfaces as ``TypeError: - # 'types.SimpleNamespace' object is not iterable`` (Hermes' chat - # streaming loop did this). - stream_requested = bool(kwargs.pop("stream", False)) - kwargs.pop("stream_options", None) # OpenAI SDK extras — irrelevant - response = self._client._create_chat_completion(**kwargs) - if not stream_requested: - return response - return _synthesise_stream_chunks(response) - - -class _CursorChatNamespace: - def __init__(self, client: "CursorAgentClient"): - self.completions = _CursorChatCompletions(client) - - -def _synthesise_stream_chunks(response: Any): - """Yield OpenAI-style streaming chunks from a non-streaming response. - - Hermes' chat streaming loop expects ``for chunk in stream:`` with each - chunk shaped like an OpenAI ``ChatCompletionChunk``: ``chunk.choices[0] - .delta.{content,tool_calls,reasoning,reasoning_content}`` and a final - chunk carrying ``usage``. We can't truly stream from the underlying - subprocess at this layer, but we can split the assembled response into a - small number of chunks that the loop will accept without crashing. - """ - try: - choice = response.choices[0] - except Exception: - return - - message = getattr(choice, "message", None) - if message is None: - return - - role = "assistant" - content = getattr(message, "content", "") or "" - tool_calls = getattr(message, "tool_calls", None) or [] - reasoning = getattr(message, "reasoning", None) - reasoning_content = getattr(message, "reasoning_content", None) - finish_reason = getattr(choice, "finish_reason", "stop") - model = getattr(response, "model", "cursor") - usage = getattr(response, "usage", None) - - if reasoning_content: - yield SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace( - role=role, - content=None, - tool_calls=None, - reasoning=None, - reasoning_content=reasoning_content, - ), - finish_reason=None, - index=0, - ) - ], - model=model, - usage=None, - ) - elif reasoning: - yield SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace( - role=role, - content=None, - tool_calls=None, - reasoning=reasoning, - reasoning_content=None, - ), - finish_reason=None, - index=0, - ) - ], - model=model, - usage=None, - ) - - if content: - yield SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace( - role=role, - content=content, - tool_calls=None, - reasoning=None, - reasoning_content=None, - ), - finish_reason=None, - index=0, - ) - ], - model=model, - usage=None, - ) - - if tool_calls: - # Hermes expects streaming tool_calls to include a per-chunk index. - for i, tc in enumerate(tool_calls): - yield SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace( - role=role, - content=None, - tool_calls=[ - SimpleNamespace( - index=i, - id=getattr(tc, "id", f"call_{i}"), - type="function", - function=SimpleNamespace( - name=getattr(tc.function, "name", ""), - arguments=getattr(tc.function, "arguments", ""), - ), - ) - ], - reasoning=None, - reasoning_content=None, - ), - finish_reason=None, - index=0, - ) - ], - model=model, - usage=None, - ) - - yield SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace( - role=None, - content=None, - tool_calls=None, - reasoning=None, - reasoning_content=None, - ), - finish_reason=finish_reason, - index=0, - ) - ], - model=model, - usage=usage, - ) - - -# --------------------------------------------------------------------------- -# Main client -# --------------------------------------------------------------------------- - - -class CursorAgentClient: - """Minimal OpenAI-client-compatible facade for the Cursor Agent CLI.""" - - def __init__( - self, - *, - api_key: str | None = None, - base_url: str | None = None, - default_headers: dict[str, str] | None = None, - command: str | None = None, - args: list[str] | None = None, - workspace: str | None = None, - mode: str | None = None, - timeout_seconds: float | None = None, - tool_progress_callback: Any = None, - context_estimate_callback: Any = None, - **_: Any, - ): - candidate_key = (api_key or os.getenv("CURSOR_API_KEY", "") or "").strip() - # Treat sentinels ("", "cursor-agent-login", …) as "no key" so we don't - # forward them to ``cursor-agent --api-key`` (which rejects them and - # closes stdin, producing BrokenPipeError on our writes). - self.api_key = None if candidate_key in _API_KEY_SENTINELS else candidate_key - self.base_url = base_url or CURSOR_MARKER_BASE_URL - self._default_headers = dict(default_headers or {}) - self._command = (command or _resolve_command()).strip() or DEFAULT_CURSOR_COMMAND - self._extra_args = list(args) if args else _resolve_extra_args() - chosen_mode = (mode or _resolve_mode()).strip().lower() or DEFAULT_CURSOR_MODE - if chosen_mode not in _VALID_CURSOR_MODES: - chosen_mode = DEFAULT_CURSOR_MODE - self._mode = chosen_mode - override = workspace or _resolve_workspace_override() - self._workspace: str | None = override or None # None ⇒ tmpdir per call - # Idle timeout (resets per event). Env var > explicit arg > default. - self._timeout_seconds = float(timeout_seconds) if timeout_seconds else _DEFAULT_TIMEOUT_SECONDS - env_timeout = os.environ.get("HERMES_CURSOR_TIMEOUT_SECONDS", "").strip() - if env_timeout: - try: - env_timeout_val = float(env_timeout) - if env_timeout_val > 0: - self._timeout_seconds = env_timeout_val - except ValueError: - pass - - self._tool_progress_callback = tool_progress_callback - # Optional hook invoked with the rough messages-based token estimate - # *before* the subprocess spawns. Used by the host agent to bump - # the status-bar (``compressor.last_prompt_tokens``) so the input - # context is visible during long in-flight turns instead of the - # bar sitting at 0 until the result event arrives. - self._context_estimate_callback = context_estimate_callback - - # High-water mark for the Hermes status bar. Held only WITHIN - # a single Hermes user turn: Hermes loops on tool_calls (cursor - # returning ```` blocks for Hermes to run), making - # multiple cursor calls per user prompt. Each call's footprint - # can vary (different tools attached, different message slices), - # so the bar must not flicker between those internal calls. - # Reset automatically on every NEW user turn (detected by the - # user-message count growing in the messages list); previously - # this was a session-wide monotonic mark, which incorrectly - # froze the bar at the highest-activity turn's value and - # prevented it from reflecting the actual current input across - # subsequent prompts. - self._context_high_water: int = 0 - # Last seen count of user messages in the prompt list. Used to - # detect new user turns so we can reset the high-water above. - self._last_user_msg_count: int = 0 - - self.chat = _CursorChatNamespace(self) - self.is_closed = False - - self._active_process: subprocess.Popen[str] | None = None - self._active_process_lock = threading.Lock() - self._ephemeral_dirs: list[str] = [] - self._dir_lock = threading.Lock() - # Session-scoped scratch workspace. Lazily minted on first call - # and REUSED across all subsequent calls in the same chat session - # so cursor-agent doesn't pay its ~4.5s "fresh-workspace bootstrap" - # tax on every turn. Cleaned up by ``close()`` along with any - # other ephemeral dirs. When ``self._workspace`` (user override) - # is set we skip this entirely and honour the explicit path. - self._session_workspace: str | None = None - - # ------------------------------------------------------------------ - # Lifecycle - # ------------------------------------------------------------------ - - def close(self) -> None: - proc: subprocess.Popen[str] | None - with self._active_process_lock: - proc = self._active_process - self._active_process = None - self.is_closed = True - # New session starts fresh: drop the high-water floor so the - # status bar reflects current prompt size, not the residual - # of a previously-large conversation. - self._context_high_water = 0 - if proc is not None: - try: - proc.terminate() - proc.wait(timeout=2) - except Exception: - try: - proc.kill() - except Exception: - pass - with self._dir_lock: - dirs, self._ephemeral_dirs = self._ephemeral_dirs, [] - # Drop the cached session workspace ref; if this client is - # ever re-used after close() (shouldn't happen, but defensive) - # the next call will lazy-init a fresh dir. - self._session_workspace = None - for d in dirs: - try: - shutil.rmtree(d, ignore_errors=True) - except Exception: - pass - - # ------------------------------------------------------------------ - # OpenAI-compat surface - # ------------------------------------------------------------------ - - def _create_chat_completion( - self, - *, - model: str | None = None, - messages: list[dict[str, Any]] | None = None, - timeout: float | None = None, - tools: list[dict[str, Any]] | None = None, - tool_choice: Any = None, - **_: Any, - ) -> Any: - # Estimate context size from what Hermes is actually sending — - # this is the authoritative answer for the status bar and the - # compressor. Cursor's reported ``inputTokens`` is the SUM - # across internal tool round-trips and undercounts the snapshot - # at end of turn (after averaging) while overcounting at end of - # turn (raw); neither matches what's in the next-turn prompt. - # Estimating from messages keeps the bar consistent before, - # during, and after the call (#cursor-bar-stable). - # Detect a NEW user turn: Hermes adds a user message at the top - # of every fresh prompt cycle, then loops internally on tool_calls - # without adding more user messages. So a strictly increased user- - # message count is the signal that this is a new prompt and the - # high-water mark from the previous turn no longer applies. - try: - user_msg_count = sum( - 1 for m in (messages or []) if (m or {}).get("role") == "user" - ) - except Exception: - user_msg_count = self._last_user_msg_count - is_new_user_turn = user_msg_count > self._last_user_msg_count - if is_new_user_turn: - # Drop the floor so the bar can reflect this turn's actual - # input size (which may legitimately be smaller than a prior - # heavy-tool-use turn's per-round average). - self._context_high_water = 0 - self._last_user_msg_count = user_msg_count - - try: - from agent.model_metadata import estimate_request_tokens_rough - self._last_messages_estimate = estimate_request_tokens_rough( - messages or [], tools=tools or None - ) - except Exception: - self._last_messages_estimate = 0 - - # Bump the high-water mark NOW (before subprocess spawn) so the - # status bar reflects input context immediately. Without this the - # bar shows 0/200K throughout a long in-flight FIRST turn because - # the compressor only learns about prompt_tokens from the final - # response. - if self._last_messages_estimate > self._context_high_water: - self._context_high_water = self._last_messages_estimate - if callable(self._context_estimate_callback) and self._last_messages_estimate > 0: - # On a new user turn, signal the host so it can reset its - # compressor bar to this turn's estimate (allowing the bar - # to DROP if appropriate). Otherwise the callback should - # bump monotonically so the in-loop cursor calls don't - # flicker the bar down between iterations. - try: - self._context_estimate_callback( - self._last_messages_estimate, reset=is_new_user_turn - ) - except TypeError: - # Backward-compat: older callbacks without ``reset`` kwarg. - try: - self._context_estimate_callback(self._last_messages_estimate) - except Exception: - pass - except Exception: - # Never let a UI hook break the actual request. - pass - - prompt_text = _format_messages_as_prompt( - messages or [], - model=model, - tools=tools, - tool_choice=tool_choice, - ) - - if timeout is None: - effective_timeout = self._timeout_seconds - elif isinstance(timeout, (int, float)): - effective_timeout = float(timeout) - else: - candidates = [ - getattr(timeout, attr, None) - for attr in ("read", "write", "connect", "pool", "timeout") - ] - numeric = [float(v) for v in candidates if isinstance(v, (int, float))] - effective_timeout = max(numeric) if numeric else self._timeout_seconds - - chosen_model = (model or DEFAULT_CURSOR_MODEL).strip() or DEFAULT_CURSOR_MODEL - - accumulator = self._run_prompt( - prompt_text=prompt_text, - model=chosen_model, - timeout_seconds=effective_timeout, - ) - - # Use the synthesis text (post-last-tool) so the user gets the - # actual answer without the wall of "let me check X next" prose - # that cursor's model emits between tool batches. The - # intermediate prose was already surfaced live via the text- - # event bridge. - assistant_text = accumulator.synthesis_text() - reasoning_text = accumulator.assembled_reasoning() or None - - if accumulator.is_error: - raise RuntimeError( - f"cursor-agent reported an error: {accumulator.error_message or assistant_text}" - ) - - tool_calls, cleaned_text = _extract_tool_calls_from_text(assistant_text) - - cursor_internal_tools = [evt.to_public_dict() for evt in accumulator.tool_events] - # Hand cursor's accumulator our messages-based estimate so - # ``openai_usage`` can use it as the canonical ``prompt_tokens`` - # the status bar reads from. Without this the bar shows - # different numbers during vs after generation (cursor's - # per-round average vs Hermes' messages estimate, ~3x apart). - # - # We also gate the estimate by the running high-water mark so - # the bar never visibly DROPS within a chat session — a wobble - # we saw when Hermes loops over multiple cursor calls per user - # turn (each call has a different tools/messages footprint). - cur_estimate = getattr(self, "_last_messages_estimate", 0) or 0 - # ``openai_usage`` may also use cursor's per-round average; mix - # it in so the high-water never undercounts when our estimate - # is too low (e.g. tools=[] on a follow-up call). - cursor_per_round = self._estimate_per_round_context(accumulator) - new_high = max(self._context_high_water, cur_estimate, cursor_per_round) - self._context_high_water = new_high - accumulator.messages_estimate = new_high - assistant_message = SimpleNamespace( - content=cleaned_text, - tool_calls=tool_calls, - reasoning=reasoning_text, - reasoning_content=reasoning_text, - reasoning_details=None, - # Audit log of cursor-agent's *internal* tool calls (shell/read/ - # edit/etc. that cursor's harness ran by itself). Hermes' UI is - # already shown them in real time via tool_progress_callback; - # this field lets sessions persist what happened. - cursor_internal_tools=cursor_internal_tools, - ) - finish_reason = "tool_calls" if tool_calls else "stop" - choice = SimpleNamespace( - message=assistant_message, - finish_reason=finish_reason, - index=0, - ) - return SimpleNamespace( - choices=[choice], - usage=accumulator.openai_usage(), - model=chosen_model, - id=accumulator.request_id or f"cursor-{accumulator.session_id}", - object="chat.completion", - cursor_internal_tools=cursor_internal_tools, - ) - - # ------------------------------------------------------------------ - # Subprocess plumbing - # ------------------------------------------------------------------ - - def _build_argv(self, *, model: str, workspace: str) -> list[str]: - argv = [ - self._command, - "-p", - "--output-format", - "stream-json", - ] - # Only forward ``--mode`` to the CLI for values it knows about. - # The synthetic ``agent`` value means "use cursor's default - # permissionMode" — achieved by omitting the flag entirely. - if self._mode in _CURSOR_CLI_MODES: - argv.extend(["--mode", self._mode]) - argv.extend( - [ - "--model", - model, - "--workspace", - workspace, - "--force", - "--trust", - ] - ) - if self.api_key: - argv.extend(["--api-key", self.api_key]) - argv.extend(self._extra_args) - return argv - - def _allocate_workspace(self) -> tuple[str, bool]: - """Return ``(workspace, ephemeral)``. - - Strategy: - 1. If the caller pinned an explicit ``workspace`` (env var or kwarg), - always honour it. - 2. Otherwise, lazily mint ONE temp dir for the whole client session - and reuse it across calls. Per-turn fresh dirs cost cursor-agent - ~4.5s of bootstrap overhead each invocation (measured), and there's - no isolation benefit between turns of the SAME chat session - anyway — they're already operating on behalf of the same user. - - The session workspace is tracked in ``_ephemeral_dirs`` so - ``close()`` cleans it up just like the legacy per-call dirs. - """ - if self._workspace: - try: - Path(self._workspace).mkdir(parents=True, exist_ok=True) - except Exception: - pass - return self._workspace, False - with self._dir_lock: - if self._session_workspace is None: - tmp = tempfile.mkdtemp(prefix="hermes-cursor-") - self._session_workspace = tmp - self._ephemeral_dirs.append(tmp) - return self._session_workspace, True - - def _run_prompt( - self, - *, - prompt_text: str, - model: str, - timeout_seconds: float, - ) -> _StreamJsonAccumulator: - workspace, _ephemeral = self._allocate_workspace() - argv = self._build_argv(model=model, workspace=workspace) - - try: - proc = subprocess.Popen( - argv, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - bufsize=1, - cwd=workspace, - env=_build_subprocess_env(self.api_key), - ) - except FileNotFoundError as exc: - raise RuntimeError( - f"Could not start Cursor Agent CLI '{self._command}'. " - "Install Cursor CLI (https://cursor.com/dashboard/integrations) " - "or set HERMES_CURSOR_COMMAND / CURSOR_AGENT_PATH." - ) from exc - - if proc.stdin is None or proc.stdout is None: - proc.kill() - raise RuntimeError("cursor-agent process did not expose stdin/stdout pipes.") - - self.is_closed = False - with self._active_process_lock: - self._active_process = proc - - try: - # Drain stderr concurrently while we feed stdin so a fast-exiting - # cursor-agent (e.g. on bad auth) can't deadlock or hide its - # diagnostic message behind our pipe write. - stderr_tail: deque[str] = deque(maxlen=80) - inbox: queue.Queue[dict[str, Any]] = queue.Queue() - - def _stderr_reader_early() -> None: - if proc.stderr is None: - return - for line in proc.stderr: - stderr_tail.append(line.rstrip("\n")) - - err_thread = threading.Thread(target=_stderr_reader_early, daemon=True) - err_thread.start() - - stdin_error: BaseException | None = None - try: - proc.stdin.write(prompt_text) - proc.stdin.flush() - except BrokenPipeError as exc: - # cursor-agent closed stdin before consuming the prompt — almost - # always means it rejected auth (e.g. invalid API key) or - # bailed on a flag. Capture the cause; we'll raise after we - # have stderr context. - stdin_error = exc - except Exception as exc: # pragma: no cover - defensive - stdin_error = exc - finally: - try: - proc.stdin.close() - except Exception: - pass - - if stdin_error is not None: - # Give the child a moment to flush its error message, then bail. - try: - proc.wait(timeout=3) - except Exception: - pass - err_thread.join(timeout=1) - exit_code = getattr(proc, "returncode", None) - if exit_code is None: - try: - exit_code = proc.poll() - except Exception: - exit_code = None - stderr_text = "\n".join(stderr_tail).strip() - redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else "" - detail = f" stderr: {redacted}" if redacted else "" - raise RuntimeError( - "cursor-agent closed stdin before reading the prompt " - f"(exit {exit_code}).{detail}" - ) from stdin_error - - def _stdout_reader() -> None: - if proc.stdout is None: - return - for line in proc.stdout: - line = line.strip() - if not line: - continue - try: - inbox.put(json.loads(line)) - except Exception: - # Cursor sometimes prints non-JSON warnings before/after - # the JSON stream — preserve them in stderr_tail-like - # form so timeouts can surface useful diagnostics. - stderr_tail.append("[stdout-non-json] " + line) - - out_thread = threading.Thread(target=_stdout_reader, daemon=True) - out_thread.start() - # err_thread is already running from the pre-stdin-write block above. - - accumulator = _StreamJsonAccumulator( - on_tool_event=self._build_tool_event_bridge(), - on_text_event=self._build_text_event_bridge(), - ) - # Idle deadline, not wall-clock. Resets on every successful - # stream-json event. A turn can run arbitrarily long in total - # wall time provided the subprocess keeps emitting events - # (text deltas, tool_calls, tool_results). Only true hangs - # (no events for ``timeout_seconds``) trigger termination. - idle_seconds = float(timeout_seconds) - deadline = time.monotonic() + idle_seconds - - while not accumulator.terminal: - if time.monotonic() >= deadline: - self._terminate_active_proc(proc) - raise TimeoutError( - f"cursor-agent emitted no events for {idle_seconds:.0f}s; " - f"presumed hung. Set HERMES_CURSOR_TIMEOUT_SECONDS to " - f"increase the idle threshold." - ) - if proc.poll() is not None and inbox.empty(): - break - try: - event = inbox.get(timeout=0.25) - except queue.Empty: - continue - # Successful event arrival => subprocess is alive and - # making progress. Reset the idle deadline. - deadline = time.monotonic() + idle_seconds - try: - accumulator.feed(event) - except Exception: - # Don't let a malformed event abort the entire request. - # Keep draining; if the terminal result never comes, - # the idle deadline above will surface the failure. - continue - - if not accumulator.terminal: - stderr_text = "\n".join(stderr_tail).strip() - redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else "" - raise RuntimeError( - "cursor-agent exited before emitting a terminal result. " - + (f"stderr tail:\n{redacted}" if redacted else "(no stderr)") - ) - - return accumulator - finally: - self._terminate_active_proc(proc) - - def _estimate_per_round_context(self, accumulator: "_StreamJsonAccumulator") -> int: - """Compute cursor's per-round context estimate without mutating it. - - Mirrors the math in :meth:`_StreamJsonAccumulator.openai_usage` - but returns just the per-round figure so we can feed it into - the high-water mark before swapping in the messages estimate. - """ - input_tokens_raw = int(accumulator.usage.get("inputTokens", 0)) - cache_read_raw = int(accumulator.usage.get("cacheReadTokens", 0)) - rounds = max(len(accumulator.tool_events) + 1, 1) - per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw - per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw - return per_round_cache + per_round_input - - def reset_context_baseline(self) -> None: - """Reset the bar's monotonic floor (e.g. on ``/new`` or compress). - - Hermes' chat session calls ``close()`` on the client when - starting a fresh session; clients spawned with ``shared=True`` - outlive that. This is the explicit hook for any caller that - wants the bar to drop back to current-prompt size after a - deliberate context wipe. - """ - self._context_high_water = 0 - - def _build_text_event_bridge(self) -> Any: - """Adapter for cursor's intermediate ``assistant`` text events. - - Cursor emits "planning text" between tool batches (e.g. - "Searching the agent directory…" → tools → "Reading each - matching file…" → tools → final synthesis). We surface each - intermediate piece as a synthetic ``narrate`` tool-progress - event so the Hermes activity feed shows the agentic chain - live, interleaved with the real tool events — instead of - bundling everything into one wall of text at the end. - - The synthesis text (the final one after the last tool) is - excluded so it doesn't double-up with the response body. - """ - cb = self._tool_progress_callback - if cb is None: - return None - - def _bridge(text: str) -> None: - try: - preview = text.strip().splitlines()[0] if text else "" - if len(preview) > 240: - preview = preview[:237] + "..." - if not preview: - return - cb("tool.started", "narrate", preview, {"text": text}) - cb( - "tool.completed", "narrate", None, None, - duration=0.0, is_error=False, result=text, - ) - except Exception: - pass - - return _bridge - - def _build_tool_event_bridge(self) -> Any: - """Adapter from our ``_CursorToolEvent`` stream to Hermes' callback. - - ``tool_progress_callback(event_type, name, preview, args, ...)`` is - the same shape Hermes' built-in tools use (see - ``agent/tool_executor.py``). We translate cursor's "tool_call - started/completed" stream-json events into ``tool.started`` / - ``tool.completed`` callbacks so the user's UI shows cursor's - internal shell/read/edit activity the same way it shows native - tool calls from Grok, GPT, Claude, etc. - - Without this bridge, cursor's tool activity is invisible to Hermes - — the user only sees the model's final text and the session's - ``tool_call_count`` stays at zero even when cursor actually ran - multiple shell/read commands internally. - """ - cb = self._tool_progress_callback - if cb is None: - return None - - def _bridge(stage: str, evt: _CursorToolEvent) -> None: - try: - if stage == "started": - preview = _build_cursor_tool_preview(evt) - cb("tool.started", evt.name, preview, evt.args) - elif stage == "completed": - # cli.py stores ``function_args`` from tool.started in a - # FIFO queue and pops them on tool.completed for display. - # ``evt.args`` is the SAME dict reference, so mutating it - # here surfaces our diff stats to ``get_cute_tool_message`` - # without changing the upstream callback signature. - if ( - evt.lines_added is not None - or evt.lines_removed is not None - ) and isinstance(evt.args, dict): - evt.args["_diff_stats"] = { - "added": evt.lines_added or 0, - "removed": evt.lines_removed or 0, - } - if evt.diff_string: - evt.args["_diff_string"] = evt.diff_string - cb( - "tool.completed", - evt.name, - None, - None, - duration=evt.duration_ms / 1000.0, - is_error=evt.is_error, - result=evt.result_text, - ) - except Exception: - # The Hermes callback may not accept all our kwargs (e.g. - # older Hermes builds). Fall back to the simplest form. - try: - cb(f"tool.{stage}", evt.name, evt.result_text or "", evt.args) - except Exception: - pass - - return _bridge - - def _terminate_active_proc(self, proc: subprocess.Popen[str]) -> None: - with self._active_process_lock: - current = self._active_process - if current is proc: - self._active_process = None - if proc.poll() is not None: - return - # cursor-agent exits naturally a few hundred ms after emitting the - # ``result`` event. Give it that grace period BEFORE force-killing — - # SIGTERM forces Node.js to run shutdown hooks which can take - # longer than just letting it exit on its own. - try: - proc.wait(timeout=0.7) - return - except subprocess.TimeoutExpired: - pass - # Still running — force it. - try: - proc.terminate() - proc.wait(timeout=1.5) - except Exception: - try: - proc.kill() - except Exception: - pass - - # ------------------------------------------------------------------ - # Introspection helpers - # ------------------------------------------------------------------ - - def whoami(self) -> dict[str, Any]: - """Return a dict of ``cursor-agent status`` info (best-effort). - - Used by the doctor / setup flow to surface logged-in user + tier. - Returns an empty dict if the CLI is missing or not authenticated. - """ - try: - out = subprocess.check_output( - [self._command, "status"], - text=True, - timeout=10, - env=_build_subprocess_env(self.api_key), - ) - except Exception: - return {} - info: dict[str, Any] = {"raw": out.strip()} - for line in out.splitlines(): - line = line.strip() - if line.startswith("✓ Logged in as "): - info["email"] = line.removeprefix("✓ Logged in as ").strip() - info["authenticated"] = True - return info - __all__ = [ "CursorAgentClient", @@ -1750,4 +33,15 @@ def whoami(self) -> dict[str, Any]: "DEFAULT_CURSOR_COMMAND", "DEFAULT_CURSOR_MODE", "DEFAULT_CURSOR_MODEL", + "_CursorToolEvent", + "_StreamJsonAccumulator", + "_SdkSession", + "_build_cursor_tool_preview", + "_build_subprocess_env", + "_format_messages_as_prompt", + "_normalize_cursor_tool_name", + "_resolve_command", + "cursor_sdk_installed", + "resolve_cursor_backend", + "run_prompt_via_sdk", ] diff --git a/agent/error_classifier.py b/agent/error_classifier.py index e8a44866b28e..809a264d55a1 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -433,6 +433,74 @@ def is_auth(self) -> bool: ] +def _classify_cursor_sdk_error(error: Exception, result_fn) -> "ClassifiedError | None": + """Map cursor-sdk CursorAgentError into Hermes failover hints.""" + try: + from cursor_sdk.errors import ( + AuthenticationError, + CursorAgentError, + IntegrationNotConnectedError, + NotFoundError, + RateLimitError, + ) + except ImportError: + return None + + if not isinstance(error, CursorAgentError): + return None + + status = getattr(error, "status", None) or getattr(error, "status_code", None) + retryable = bool(getattr(error, "is_retryable", False)) + + if isinstance(error, IntegrationNotConnectedError): + return result_fn( + FailoverReason.auth, + status_code=status, + retryable=False, + should_fallback=True, + ) + if isinstance(error, AuthenticationError): + return result_fn( + FailoverReason.auth, + status_code=status, + retryable=False, + should_rotate_credential=True, + should_fallback=True, + ) + if isinstance(error, RateLimitError): + return result_fn( + FailoverReason.rate_limit, + status_code=status or 429, + retryable=True, + should_rotate_credential=True, + should_fallback=True, + ) + if isinstance(error, NotFoundError): + return result_fn( + FailoverReason.model_not_found, + status_code=status or 404, + retryable=False, + should_fallback=True, + ) + + if status is not None: + by_status = _classify_by_status(int(status), str(getattr(error, "message", error)).lower(), result_fn) + if by_status is not None: + if retryable and not by_status.retryable: + return result_fn(by_status.reason, status_code=status, retryable=True) + return by_status + + if retryable: + return result_fn(FailoverReason.server_error, status_code=status, retryable=True) + + return result_fn( + FailoverReason.format_error, + status_code=status, + retryable=False, + should_fallback=True, + ) + + # ── Classification pipeline ───────────────────────────────────────────── def classify_api_error( @@ -530,6 +598,14 @@ def _result(reason: FailoverReason, **overrides) -> ClassifiedError: defaults.update(overrides) return ClassifiedError(**defaults) + # ── Cursor SDK structured errors ───────────────────────────────── + # cursor-sdk raises typed CursorAgentError subclasses with explicit + # retry hints. Map them before generic string heuristics so Hermes' + # retry loop respects is_retryable / retry_after from the SDK. + _cursor_sdk_err = _classify_cursor_sdk_error(error, _result) + if _cursor_sdk_err is not None: + return _cursor_sdk_err + # ── 1. Provider-specific patterns (highest priority) ──────────── # Provider content-policy / safety-filter block. The provider has made a diff --git a/docs/cursor_architecture.md b/docs/cursor_architecture.md index 151cbe82e27e..c9096010ac13 100644 --- a/docs/cursor_architecture.md +++ b/docs/cursor_architecture.md @@ -83,7 +83,7 @@ Legend / lifecycle: Hermes observes but doesn't gate them past Hermes' approvals.mode) ``` -**Key files:** `agent/cursor_agent_client.py` (runtime + accumulator + bridge), `plugins/model-providers/cursor/` (provider profile), `hermes_cli/auth.py` (credentials + status), `agent/agent_runtime_helpers.py:create_openai_client()` (client factory), `agent/conversation_compression.py` (compress + duck-typed reset hook), `agent/display.py` (`get_cute_tool_message`, `extract_edit_diff`; unified diff rendering for cursor edits). +**Key files:** `agent/cursor/` (package: CLI + SDK backends, typed events, `CursorTurnAccumulator`, bridge), `agent/cursor_agent_client.py` (backward-compat re-exports), `plugins/model-providers/cursor/` (provider profile), `hermes_cli/auth.py` (credentials + status), `agent/agent_runtime_helpers.py:create_openai_client()` (client factory), `agent/conversation_compression.py` (compress + duck-typed reset hook), `agent/display.py` (`get_cute_tool_message`, `extract_edit_diff`; unified diff rendering for cursor edits). --- @@ -118,7 +118,7 @@ Aliases resolving to `cursor`: `cursor-agent`, `cursor-cli`, `cursor-sub`, `curs **Per-request lifecycle:** -1. **Prompt assembly.** `_format_messages_as_prompt()` flattens the OpenAI message list (system/user/assistant/tool) into a single stdin prompt. Tool schemas are inlined as JSON; the model is instructed to emit Hermes tool calls as `{...}` blocks (grammar shared with `copilot_acp_client`). +1. **Prompt assembly.** `format_messages_as_prompt()` flattens the OpenAI message list (system/user/assistant/tool) into a single stdin prompt. Tool schemas are inlined as JSON; the model is instructed to emit Hermes tool calls as `{...}` blocks (grammar shared with `copilot_acp_client`). 2. **Workspace.** Session-scoped: one temp dir per `CursorAgentClient` instance, reused for every call. Created lazily on first call as `hermes-cursor-*`, tracked in `_ephemeral_dirs` for cleanup at `close()`. Override with `HERMES_CURSOR_WORKSPACE` or the `workspace` ctor arg. A fresh dir per call previously cost roughly 4 to 5 seconds of "first-time workspace bootstrap" tax on every turn; fixed by reusing the dir across the session. 3. **Argv.** `cursor-agent -p --output-format stream-json --model --workspace --force --trust` plus optional `--mode`, `--api-key`, and `HERMES_CURSOR_ARGS`. 4. **Mode mapping:** @@ -246,6 +246,26 @@ Completed internal events also populate `response.cursor_internal_tools` / `mess | `HERMES_CURSOR_WORKSPACE` | session-scoped temp dir | Pin workspace directory (reused across all turns of one session by default) | | `HERMES_CURSOR_BASE_URL` | `cursor://agent` | Provider marker (not HTTP) | | `HERMES_CURSOR_TIMEOUT_SECONDS` | `1800` | Idle threshold (not wall-clock). Resets on every stream-json event from cursor-agent. A turn may run arbitrarily long in total provided events keep arriving; only true subprocess hangs trigger termination. Default is 30 minutes; cursor-agent's own internal shell ceiling is 10 min so chained long operations can routinely exceed 15 min. Hermes' outer 90s stale-call detector is disabled for cursor so this is the only timeout in effect. | +| `HERMES_CURSOR_BACKEND` | `auto` | Transport: `auto` (SDK when `cursor-sdk` is installed **and** a real `CURSOR_API_KEY` is set; otherwise CLI), `sdk` (force SDK), `cli` (force `cursor-agent` subprocess). | + +## Backend Selection (CLI vs SDK) + +Hermes supports two transports for the cursor provider: + +| Backend | When used | Auth | +|---------|-----------|------| +| **CLI** (`cursor-agent`) | Default for browser-OAuth users (`cursor-agent login`) | CLI session or optional `CURSOR_API_KEY` forwarded to subprocess | +| **SDK** (`cursor-sdk`) | `HERMES_CURSOR_BACKEND=auto` with `CURSOR_API_KEY` set, or `HERMES_CURSOR_BACKEND=sdk` | User API Key from [Dashboard → Integrations](https://cursor.com/dashboard/integrations) | + +Install the SDK extra: `uv pip install 'hermes-agent[cursor]'` or `uv pip install cursor-sdk`. + +**Lazy install:** When `HERMES_CURSOR_BACKEND=auto|sdk` and a real `CURSOR_API_KEY` is set, Hermes calls `tools.lazy_deps.ensure("provider.cursor_sdk")` on first SDK use (and offers install in `hermes model` when the key is present but the package is missing). After activation, `hermes update` refreshes `cursor-sdk` via `_refresh_active_lazy_features()` like other lazy backends. + +On `auto`, if the SDK path fails because the account lacks `sdk_python_preview_access`, Hermes falls back to the CLI transparently. + +Implementation: `agent/cursor/cli_backend.py` and `agent/cursor/sdk_backend.py` both emit typed `CursorTurnEvent`s into `CursorTurnAccumulator` (`agent/cursor/accumulator.py`), so the UI bridge, compression hooks, and status bar stay unchanged regardless of transport. + +**Hermes streaming:** True token-by-token Hermes streaming for cursor via the SDK is deferred; both transports accumulate a full turn before returning an OpenAI-shaped response (with optional chunk synthesis for callers that pass `stream=True`). ## Turn-Level Timeout Semantics @@ -280,23 +300,21 @@ Cursor released a Python SDK (`cursor-sdk`, public beta, v0.1.5 as of 2026-05-23) which exposes a higher-level agent API with native streaming, typed events (`run.messages()`), proper cancellation (`run.cancel()`), and a structured error model (`CursorAgentError` with `is_retryable` / -`retry_after`). It is the architecturally better target than the -subprocess shim. - -We intentionally did **not** adopt it in this PR for three reasons: - -1. API access via the SDK is currently allowlist-gated. Users without - `sdk_python_preview_access` get `IntegrationNotConnectedError`, which - breaks the "any Cursor subscriber can use this" promise. -2. SDK auth requires manually generating a User API Key - (Dashboard → Integrations) and exporting `CURSOR_API_KEY`. The CLI's - browser OAuth flow is one-time and friction-free; replacing it would - regress the onboarding experience. -3. v0.1.5 in two weeks with documented "APIs may change before GA" - warnings makes upstream pinning risky for a foundational integration. - -When all three constraints lift (SDK GA + allowlist removed + auth -flow supports either API key or CLI-derived token), the inner subprocess -layer should be replaced by an SDK-backed implementation. The outer -layer (`auth_type="external_process"`, provider registration, model -catalog) stays as-is; only `agent/cursor_agent_client.py` changes. +`retry_after`). + +**Status (2026-05-28):** Hermes now supports the SDK as an opt-in/auto +backend via `HERMES_CURSOR_BACKEND` and `agent/cursor/sdk_backend.py`. +The CLI subprocess remains the default for users who only have browser +OAuth (`cursor-agent login`) without a User API Key. + +Remaining gaps before SDK becomes the unconditional default: + +1. API access via the SDK is still allowlist-gated for some accounts + (`sdk_python_preview_access`). Auto mode falls back to CLI when this + blocks. +2. SDK auth still requires manually generating a User API Key — no + browser OAuth flow yet. +3. v0.1.5 ships with "APIs may change before GA" warnings; pin carefully. + +When all three constraints lift, `auto` can flip its default to SDK-first +without changing the outer provider registration layer. diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 97d15142ce04..fa488637c220 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5472,6 +5472,40 @@ def _model_flow_cursor(config, current_model=""): }.get(_cursor_mode, _cursor_mode) print(f" Cursor mode: {_cursor_mode} — {_mode_blurb}") + try: + from hermes_cli.status import _cursor_transport_label + + transport = _cursor_transport_label() + if transport: + print(f" Transport: {transport}") + except Exception: + pass + + _cursor_api_key = (os.environ.get("CURSOR_API_KEY") or "").strip() + if _cursor_api_key and _cursor_api_key not in { + "cursor-agent-login", + "cursor-cli-login", + "external-process", + "external_process", + }: + try: + from agent.cursor.backend import cursor_sdk_installed, ensure_cursor_sdk + + if not cursor_sdk_installed(): + try: + from hermes_cli.cli_output import prompt_yes_no + + if prompt_yes_no( + " Install cursor-sdk for faster SDK transport? [Y/n] ", + default=True, + ): + ensure_cursor_sdk(prompt=True) + print(" ✓ cursor-sdk installed") + except Exception as exc: + print(f" ⚠ cursor-sdk install skipped: {exc}") + except Exception: + pass + # Live catalog first (115+ models incl. composer-2.5-fast default); # fall back to the curated snapshot if the CLI call fails. model_list = provider_model_ids(provider_id) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 2cce67b9c1de..7e7e52c640bd 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -87,6 +87,33 @@ def _effective_provider_label() -> str: return provider_label(effective) +def _cursor_transport_label() -> str | None: + """Return cursor backend label (sdk vs cli) when cursor is the provider.""" + try: + requested = resolve_requested_provider() + effective = resolve_provider(requested) + except AuthError: + effective = requested or "auto" + if effective != "cursor": + return None + try: + from agent.cursor.backend import cursor_sdk_installed, resolve_cursor_backend + except Exception: + return "cli (cursor-agent)" + backend = resolve_cursor_backend() + if backend == "sdk": + installed = cursor_sdk_installed() + return f"sdk (cursor-sdk{' ✓' if installed else ', package missing'})" + forced = os.getenv("HERMES_CURSOR_BACKEND", "").strip().lower() or "auto" + if forced == "cli": + return "cli (cursor-agent, forced)" + if not cursor_sdk_installed(): + return "cli (cursor-agent — install cursor-sdk for SDK)" + if not (get_env_value("CURSOR_API_KEY") or "").strip(): + return "cli (cursor-agent — set CURSOR_API_KEY for SDK)" + return "cli (cursor-agent)" + + from hermes_constants import is_termux as _is_termux @@ -118,6 +145,9 @@ def show_status(args): print(f" Model: {_configured_model_label(config)}") print(f" Provider: {_effective_provider_label()}") + cursor_transport = _cursor_transport_label() + if cursor_transport: + print(f" Transport: {cursor_transport}") # ========================================================================= # API Keys @@ -146,6 +176,7 @@ def show_status(args): "FAL": "FAL_KEY", "ElevenLabs": "ELEVENLABS_API_KEY", "GitHub": "GITHUB_TOKEN", + "Cursor": "CURSOR_API_KEY", } def _resolve_env(env_ref) -> str: diff --git a/plugins/model-providers/cursor/__init__.py b/plugins/model-providers/cursor/__init__.py index 5092f48ecb1a..6d62f5b95a1f 100644 --- a/plugins/model-providers/cursor/__init__.py +++ b/plugins/model-providers/cursor/__init__.py @@ -1,14 +1,11 @@ -"""Cursor provider profile — runs through the ``cursor-agent`` CLI. +"""Cursor provider profile — CLI or SDK transport. -Cursor doesn't expose a chat completions endpoint; it ships an agent. We -spawn ``cursor-agent -p --output-format stream-json --mode ask`` per request -and translate the line-delimited events into an OpenAI chat-completion -response. Auth piggybacks on the user's existing ``cursor-agent login`` (or -``CURSOR_API_KEY``) so every Cursor tier — Hobby, Pro, Pro+, Ultra, Teams — -can use Hermes through their existing subscription / credits. +Cursor doesn't expose a chat completions endpoint; it ships an agent. Hermes +routes requests through ``cursor-agent`` (CLI subprocess) or ``cursor-sdk`` +(Python SDK) depending on ``HERMES_CURSOR_BACKEND`` and ``CURSOR_API_KEY``. -See ``agent/cursor_agent_client.py`` for the runtime client and -``docs/plans/2026-05-25-cursor-provider-integration.md`` for the design. +See ``agent/cursor/`` for the runtime client and +``docs/cursor_architecture.md`` for the design. """ from __future__ import annotations diff --git a/pyproject.toml b/pyproject.toml index e1fe62b6d0c5..0b09323bfa18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,6 +117,7 @@ sms = ["aiohttp==3.13.3"] # to it, which is already provided by the `mcp` extra. computer-use = ["mcp==1.26.0"] acp = ["agent-client-protocol==0.9.0"] +cursor = ["cursor-sdk==0.1.5"] # mistral: extra REMOVED 2026-05-12 — `mistralai` PyPI project quarantined # after malicious 2.4.6 release (Mini Shai-Hulud worm). Every version of # `mistralai` returns 404 on PyPI right now, so any pin we'd write is diff --git a/tests/agent/test_cursor_agent_client.py b/tests/agent/test_cursor_agent_client.py index a7ffca1e6fe2..a8ed956bcd79 100644 --- a/tests/agent/test_cursor_agent_client.py +++ b/tests/agent/test_cursor_agent_client.py @@ -491,6 +491,7 @@ def setUp(self) -> None: # Make env predictable. keys = [k for k in os.environ if k.startswith("HERMES_CURSOR_") or k == "CURSOR_API_KEY" or k == "CURSOR_AGENT_PATH"] self._saved = {k: os.environ.pop(k) for k in keys} + os.environ["HERMES_CURSOR_BACKEND"] = "cli" def tearDown(self) -> None: for k, v in self._saved.items(): @@ -503,7 +504,7 @@ def _fake_popen(argv, **kwargs): fake_proc.env_seen = kwargs.get("env") return fake_proc - return patch("agent.cursor_agent_client.subprocess.Popen", side_effect=_fake_popen) + return patch("agent.cursor.cli_backend.subprocess.Popen", side_effect=_fake_popen) def test_happy_path_returns_openai_shaped_response(self) -> None: proc = _FakeProcess(SUCCESS_STREAM) @@ -1453,6 +1454,16 @@ def test_tool_call_block_is_extracted(self) -> None: finally: client.close() + def test_backend_property_reflects_resolution(self) -> None: + os.environ["HERMES_CURSOR_BACKEND"] = "auto" + with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True): + cli_client = CursorAgentClient() + self.assertEqual(cli_client.backend, "cli") + cli_client.close() + sdk_client = CursorAgentClient(api_key="crsr_real_test_key") + self.assertEqual(sdk_client.backend, "sdk") + sdk_client.close() + def test_subprocess_env_has_cursor_api_key_when_provided(self) -> None: proc = _FakeProcess(SUCCESS_STREAM) client = CursorAgentClient(api_key="crsr_test_42") @@ -1642,7 +1653,7 @@ def _fake_popen(argv, **kwargs): fake_proc.env_seen = kwargs.get("env") return fake_proc - return patch("agent.cursor_agent_client.subprocess.Popen", side_effect=_fake_popen) + return patch("agent.cursor.cli_backend.subprocess.Popen", side_effect=_fake_popen) def test_context_estimate_callback_fires_before_subprocess(self) -> None: # Regression: the status bar sat at 0/200K throughout a long @@ -1764,7 +1775,7 @@ def _fake_popen(argv, **kwargs): return proc try: - with patch("agent.cursor_agent_client.subprocess.Popen", side_effect=_fake_popen): + with patch("agent.cursor.cli_backend.subprocess.Popen", side_effect=_fake_popen): resp = client.chat.completions.create( model="composer-2.5", messages=[{"role": "user", "content": "Hi"}], diff --git a/tests/agent/test_cursor_events.py b/tests/agent/test_cursor_events.py new file mode 100644 index 000000000000..3ebc7bcbee26 --- /dev/null +++ b/tests/agent/test_cursor_events.py @@ -0,0 +1,84 @@ +"""Tests for typed Cursor turn events and CursorTurnAccumulator.""" + +from __future__ import annotations + +import unittest + +from agent.cursor.accumulator import CursorTurnAccumulator +from agent.cursor.events import ( + AssistantTextEvent, + ToolCompletedEvent, + ToolStartedEvent, + TurnResultEvent, + stream_json_dict_to_events, +) + + +class TestStreamJsonConversion(unittest.TestCase): + def test_assistant_text(self): + events = stream_json_dict_to_events({ + "type": "assistant", + "message": {"content": [{"type": "text", "text": "hello"}]}, + }) + self.assertEqual(len(events), 1) + self.assertIsInstance(events[0], AssistantTextEvent) + self.assertEqual(events[0].text, "hello") + + def test_tool_started_and_completed(self): + started = stream_json_dict_to_events({ + "type": "tool_call", + "subtype": "started", + "call_id": "c1", + "tool_call": {"shellToolCall": {"args": {"command": "ls"}}}, + }) + self.assertIsInstance(started[0], ToolStartedEvent) + + completed = stream_json_dict_to_events({ + "type": "tool_call", + "subtype": "completed", + "call_id": "c1", + "tool_call": { + "shellToolCall": { + "args": {"command": "ls"}, + "result": {"success": {"stdout": "ok"}}, + } + }, + }) + self.assertIsInstance(completed[0], ToolCompletedEvent) + + +class TestCursorTurnAccumulator(unittest.TestCase): + def test_synthesis_after_tools(self): + acc = CursorTurnAccumulator() + acc.feed(AssistantTextEvent(text="Searching…")) + acc.feed(ToolStartedEvent(call_id="c1", envelope_key="grepToolCall", args={})) + acc.feed(ToolCompletedEvent( + call_id="c1", + envelope_key="grepToolCall", + args={}, + result_payload={"result": {"success": {}}}, + )) + acc.feed(AssistantTextEvent(text="Found it.")) + acc.feed(TurnResultEvent(is_error=False, result_text="Found it.")) + self.assertEqual(acc.synthesis_text(), "Found it.") + self.assertIn("Searching", acc.narration_text()) + + def test_legacy_dict_feed(self): + acc = CursorTurnAccumulator() + acc.feed({ + "type": "assistant", + "message": {"content": [{"type": "text", "text": "plain"}]}, + }) + acc.feed({ + "type": "result", + "subtype": "success", + "is_error": False, + "result": "plain", + "usage": {"inputTokens": 10, "outputTokens": 2}, + }) + self.assertEqual(acc.synthesis_text(), "plain") + self.assertTrue(acc.terminal) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/agent/test_cursor_sdk_backend.py b/tests/agent/test_cursor_sdk_backend.py new file mode 100644 index 000000000000..dfd22d521f3a --- /dev/null +++ b/tests/agent/test_cursor_sdk_backend.py @@ -0,0 +1,160 @@ +"""Unit tests for cursor-sdk backend selection and typed event translation.""" + +from __future__ import annotations + +import os +import unittest +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from agent.cursor.backend import cursor_sdk_installed, resolve_cursor_backend +from agent.cursor.events import ( + AssistantTextEvent, + ToolStartedEvent, + interaction_update_to_events, + sdk_message_to_events, +) +from agent.cursor.sdk_backend import SdkSession, run_prompt_via_sdk + + +class TestBackendResolution(unittest.TestCase): + def setUp(self): + self._saved = { + k: os.environ.get(k) + for k in ("HERMES_CURSOR_BACKEND", "CURSOR_API_KEY") + } + for k in self._saved: + os.environ.pop(k, None) + + def tearDown(self): + for k, v in self._saved.items(): + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v + + def test_auto_without_key_uses_cli(self): + with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True): + self.assertEqual(resolve_cursor_backend(api_key=None), "cli") + + def test_auto_with_key_uses_sdk_when_installed(self): + with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True): + self.assertEqual( + resolve_cursor_backend(api_key="crsr_real_key_12345"), + "sdk", + ) + + def test_auto_with_sentinel_uses_cli(self): + with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True): + self.assertEqual( + resolve_cursor_backend(api_key="cursor-agent-login"), + "cli", + ) + + def test_forced_cli(self): + os.environ["HERMES_CURSOR_BACKEND"] = "cli" + self.assertEqual(resolve_cursor_backend(api_key="crsr_x"), "cli") + + def test_forced_sdk_requires_package(self): + os.environ["HERMES_CURSOR_BACKEND"] = "sdk" + os.environ["CURSOR_API_KEY"] = "crsr_x" + with patch("agent.cursor.backend.cursor_sdk_installed", return_value=False), patch( + "agent.cursor.backend.ensure_cursor_sdk", + side_effect=RuntimeError("cursor-sdk is not installed"), + ): + with self.assertRaises(RuntimeError): + resolve_cursor_backend(api_key="crsr_x") + + +class TestEventTranslation(unittest.TestCase): + def test_assistant_sdk_message(self): + msg = SimpleNamespace( + type="assistant", + message=SimpleNamespace( + content=[SimpleNamespace(type="text", text="hello")] + ), + ) + events = sdk_message_to_events(msg) + self.assertEqual(len(events), 1) + self.assertIsInstance(events[0], AssistantTextEvent) + self.assertEqual(events[0].text, "hello") + + def test_tool_call_started(self): + msg = SimpleNamespace( + type="tool_call", + call_id="c1", + name="shell", + status="running", + args={"command": "ls"}, + result=None, + ) + events = sdk_message_to_events(msg) + self.assertIsInstance(events[0], ToolStartedEvent) + self.assertEqual(events[0].envelope_key, "shellToolCall") + + def test_text_delta_interaction(self): + update = SimpleNamespace(type="text-delta", text="partial ") + events = interaction_update_to_events(update) + self.assertIsInstance(events[0], AssistantTextEvent) + self.assertEqual(events[0].text, "partial ") + + +class TestRunPromptViaSdk(unittest.TestCase): + def test_streams_events_into_accumulator(self): + sdk_session = SdkSession() + fake_agent = MagicMock() + fake_run = MagicMock() + fake_result = SimpleNamespace(status="finished", result="done", id="r1", duration_ms=10) + + def _events(): + yield SimpleNamespace( + kind="sdk_message", + sdk_message=SimpleNamespace( + type="assistant", + message=SimpleNamespace( + content=[SimpleNamespace(type="text", text="hi")] + ), + ), + interaction_update=None, + result=None, + ) + yield SimpleNamespace( + kind="result", + sdk_message=None, + interaction_update=None, + result={ + "status": "finished", + "result": "done", + "runId": "r1", + "durationMs": 10, + "usage": {"inputTokens": 10, "outputTokens": 2}, + }, + ) + + fake_run.events.side_effect = _events + fake_run.wait.return_value = fake_result + fake_run.supports.return_value = True + fake_agent.send.return_value = fake_run + + with patch("cursor_sdk.Agent.create", return_value=fake_agent), patch.object( + SdkSession, + "get_client", + return_value=MagicMock(), + ), patch("agent.cursor.sdk_backend.ensure_cursor_sdk"): + acc = run_prompt_via_sdk( + prompt_text="ping", + model="composer-2.5", + api_key="crsr_test_key", + workspace="/tmp/ws", + mode="agent", + timeout_seconds=30, + on_tool_event=None, + on_text_event=None, + sdk_session=sdk_session, + ) + self.assertFalse(acc.is_error) + fake_agent.close.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py index 393397349d81..f5e4bbcde4be 100644 --- a/tools/lazy_deps.py +++ b/tools/lazy_deps.py @@ -86,6 +86,9 @@ # when model.auth_mode=entra_id is selected; key-based azure-foundry # users never pay this import. "provider.azure_identity": ("azure-identity==1.25.3",), + # Cursor SDK transport — lazy-installed when HERMES_CURSOR_BACKEND=auto|sdk + # and CURSOR_API_KEY is set. CLI-only OAuth users never pull this in. + "provider.cursor_sdk": ("cursor-sdk==0.1.5",), # ─── Web search backends ─────────────────────────────────────────────── "search.exa": ("exa-py==2.10.2",),