diff --git a/agent/cursor/__init__.py b/agent/cursor/__init__.py
new file mode 100644
index 000000000000..cdcf327f2c77
--- /dev/null
+++ b/agent/cursor/__init__.py
@@ -0,0 +1,44 @@
+"""Cursor provider runtime package (CLI + SDK backends)."""
+
+from agent.cursor.accumulator import CursorTurnAccumulator, _StreamJsonAccumulator
+from agent.cursor.backend import (
+ cursor_sdk_installed,
+ ensure_cursor_sdk,
+ resolve_cursor_backend,
+)
+from agent.cursor.client import CursorAgentClient
+from agent.cursor.constants import (
+ CURSOR_MARKER_BASE_URL,
+ DEFAULT_CURSOR_COMMAND,
+ DEFAULT_CURSOR_MODEL,
+ DEFAULT_CURSOR_MODE,
+)
+from agent.cursor.events import (
+ run_stream_event_to_events,
+ sdk_message_to_events,
+ stream_json_dict_to_events,
+)
+from agent.cursor.prompt import format_messages_as_prompt
+from agent.cursor.sdk_backend import SdkSession, run_prompt_via_sdk
+from agent.cursor.tool_events import CursorToolEvent, _CursorToolEvent
+
+__all__ = [
+ "CURSOR_MARKER_BASE_URL",
+ "CursorAgentClient",
+ "CursorToolEvent",
+ "CursorTurnAccumulator",
+ "DEFAULT_CURSOR_COMMAND",
+ "DEFAULT_CURSOR_MODEL",
+ "DEFAULT_CURSOR_MODE",
+ "SdkSession",
+ "_CursorToolEvent",
+ "_StreamJsonAccumulator",
+ "cursor_sdk_installed",
+ "ensure_cursor_sdk",
+ "format_messages_as_prompt",
+ "resolve_cursor_backend",
+ "run_prompt_via_sdk",
+ "run_stream_event_to_events",
+ "sdk_message_to_events",
+ "stream_json_dict_to_events",
+]
diff --git a/agent/cursor/accumulator.py b/agent/cursor/accumulator.py
new file mode 100644
index 000000000000..9f2c25a9665f
--- /dev/null
+++ b/agent/cursor/accumulator.py
@@ -0,0 +1,235 @@
+"""Accumulates Cursor turn state from typed events (CLI or SDK)."""
+
+from __future__ import annotations
+
+import time
+from types import SimpleNamespace
+from typing import Any
+
+from agent.cursor.events import (
+ AssistantTextEvent,
+ CursorTurnEvent,
+ SystemEvent,
+ ThinkingEvent,
+ ToolCompletedEvent,
+ ToolStartedEvent,
+ TurnResultEvent,
+ stream_json_dict_to_events,
+)
+from agent.cursor.tool_events import (
+ CursorToolEvent,
+ summarise_cursor_tool_result,
+)
+
+
+class CursorTurnAccumulator:
+ """Accumulates state from a cursor turn event feed.
+
+ Caller feeds typed events with :meth:`feed`. When a terminal
+ :class:`TurnResultEvent` arrives the accumulator stores success/failure
+ state and surface text. The instance is reusable per-call but not
+ thread-safe.
+ """
+
+ def __init__(self, on_tool_event: Any = None, on_text_event: Any = None) -> None:
+ self.text_parts: list[str] = []
+ self.reasoning_parts: list[str] = []
+ self.session_id: str = ""
+ self.request_id: str = ""
+ self.model_label: str = ""
+ self.duration_ms: int = 0
+ self.usage: dict[str, int] = {}
+ self.terminal: bool = False
+ self.is_error: bool = False
+ self.error_message: str = ""
+ self.final_result_text: str = ""
+ self.event_log: list[tuple[str, Any]] = []
+ self._on_tool_event = on_tool_event
+ self._on_text_event = on_text_event
+ self._tool_events: dict[str, CursorToolEvent] = {}
+ self.tool_events: list[CursorToolEvent] = []
+ self.messages_estimate: int = 0
+ self._pending_text: list[str] = []
+
+ def feed(self, event: CursorTurnEvent | dict[str, Any]) -> None:
+ """Accept a typed event or legacy stream-json dict."""
+ if isinstance(event, dict):
+ for typed in stream_json_dict_to_events(event):
+ self.feed(typed)
+ return
+
+ if isinstance(event, SystemEvent):
+ if event.model:
+ self.model_label = event.model
+ if event.session_id:
+ self.session_id = event.session_id
+ return
+
+ if isinstance(event, ThinkingEvent):
+ if event.text:
+ self.reasoning_parts.append(event.text)
+ return
+
+ if isinstance(event, AssistantTextEvent):
+ if event.text:
+ self.text_parts.append(event.text)
+ self.event_log.append(("text", event.text))
+ self._pending_text.append(event.text)
+ return
+
+ if isinstance(event, ToolStartedEvent):
+ if self._pending_text:
+ for buffered in self._pending_text:
+ self._dispatch_text_event(buffered)
+ self._pending_text.clear()
+ self._consume_tool_started(event)
+ self.event_log.append(("tool", None))
+ return
+
+ if isinstance(event, ToolCompletedEvent):
+ self._consume_tool_completed(event)
+ return
+
+ if isinstance(event, TurnResultEvent):
+ self.terminal = True
+ self.is_error = event.is_error
+ self.duration_ms = event.duration_ms
+ if event.request_id:
+ self.request_id = event.request_id
+ if event.usage:
+ self.usage = dict(event.usage)
+ if event.result_text:
+ self.final_result_text = event.result_text
+ if not self.text_parts and not self.is_error:
+ self.text_parts.append(event.result_text)
+ if self.is_error and not self.error_message:
+ self.error_message = event.error_message or event.result_text or "cursor-agent returned an error"
+ return
+
+ def _consume_tool_started(self, event: ToolStartedEvent) -> None:
+ evt = CursorToolEvent(
+ call_id=event.call_id,
+ envelope_key=event.envelope_key,
+ args=event.args,
+ )
+ self._tool_events[event.call_id] = evt
+ self.tool_events.append(evt)
+ self._fire_tool_event("started", evt)
+
+ def _consume_tool_completed(self, event: ToolCompletedEvent) -> None:
+ evt = self._tool_events.get(event.call_id)
+ if evt is None:
+ evt = CursorToolEvent(
+ call_id=event.call_id,
+ envelope_key=event.envelope_key,
+ args=event.args,
+ )
+ self._tool_events[event.call_id] = evt
+ self.tool_events.append(evt)
+ self._fire_tool_event("started", evt)
+ evt.completed_at = time.monotonic()
+ evt.duration_ms = int((evt.completed_at - evt.started_at) * 1000)
+ result = event.result_payload.get("result")
+ if isinstance(result, dict):
+ if "error" in result and result.get("error"):
+ evt.is_error = True
+ success = result.get("success") if isinstance(result, dict) else None
+ if isinstance(success, dict):
+ la = success.get("linesAdded")
+ lr = success.get("linesRemoved")
+ ds = success.get("diffString")
+ if isinstance(la, int):
+ evt.lines_added = la
+ if isinstance(lr, int):
+ evt.lines_removed = lr
+ if isinstance(ds, str):
+ evt.diff_string = ds
+ evt.result_text = summarise_cursor_tool_result(event.envelope_key, event.result_payload)
+ self._fire_tool_event("completed", evt)
+
+ def _fire_tool_event(self, stage: str, evt: CursorToolEvent) -> None:
+ if self._on_tool_event is None:
+ return
+ try:
+ self._on_tool_event(stage, evt)
+ except Exception:
+ pass
+
+ def _dispatch_text_event(self, text: str) -> None:
+ if self._on_text_event is None:
+ return
+ try:
+ self._on_text_event(text)
+ except Exception:
+ pass
+
+ def assembled_text(self) -> str:
+ return "".join(self.text_parts).strip()
+
+ def synthesis_text(self) -> str:
+ tool_seen = False
+ synth: list[str] = []
+ for kind, payload in self.event_log:
+ if kind == "tool":
+ tool_seen = True
+ synth.clear()
+ elif kind == "text":
+ synth.append(payload)
+ if synth:
+ return "".join(synth).strip()
+ if not tool_seen:
+ return self.assembled_text()
+ if self.final_result_text and self.final_result_text.strip():
+ return self.final_result_text.strip()
+ return self.assembled_text()
+
+ def narration_text(self) -> str:
+ narration: list[str] = []
+ bucket: list[str] = []
+ for kind, payload in self.event_log:
+ if kind == "tool":
+ if bucket:
+ narration.append("".join(bucket).strip())
+ bucket = []
+ elif kind == "text":
+ bucket.append(payload)
+ return "\n".join(n for n in narration if n)
+
+ def assembled_reasoning(self) -> str:
+ return "".join(self.reasoning_parts).strip()
+
+ def openai_usage(self) -> SimpleNamespace:
+ input_tokens_raw = int(self.usage.get("inputTokens", 0))
+ output_tokens = int(self.usage.get("outputTokens", 0))
+ cache_read_raw = int(self.usage.get("cacheReadTokens", 0))
+
+ rounds = max(len(self.tool_events) + 1, 1)
+ per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw
+ per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw
+ approx_context_tokens = per_round_cache + per_round_input
+
+ if self.messages_estimate > 0:
+ prompt_tokens = self.messages_estimate
+ else:
+ prompt_tokens = approx_context_tokens
+
+ return SimpleNamespace(
+ prompt_tokens=prompt_tokens,
+ completion_tokens=output_tokens,
+ total_tokens=prompt_tokens + output_tokens,
+ prompt_tokens_details=SimpleNamespace(cached_tokens=per_round_cache),
+ cursor_raw_input_tokens=input_tokens_raw,
+ cursor_raw_cache_read_tokens=cache_read_raw,
+ cursor_internal_rounds=rounds,
+ cursor_per_round_context=approx_context_tokens,
+ )
+
+
+# Backward-compat alias for tests and legacy imports.
+_StreamJsonAccumulator = CursorTurnAccumulator
+
+
+__all__ = [
+ "CursorTurnAccumulator",
+ "_StreamJsonAccumulator",
+]
diff --git a/agent/cursor/backend.py b/agent/cursor/backend.py
new file mode 100644
index 000000000000..2f490a9e98e6
--- /dev/null
+++ b/agent/cursor/backend.py
@@ -0,0 +1,82 @@
+"""Backend selection and cursor-sdk lazy-install helpers."""
+
+from __future__ import annotations
+
+import os
+
+from agent.cursor.constants import (
+ DEFAULT_CURSOR_BACKEND,
+ _API_KEY_SENTINELS,
+ _SDK_MODES,
+ _VALID_BACKENDS,
+)
+
+
+def cursor_sdk_installed() -> bool:
+ try:
+ import cursor_sdk # noqa: F401
+ except ImportError:
+ return False
+ return True
+
+
+def ensure_cursor_sdk(*, prompt: bool = False) -> None:
+ """Lazy-install cursor-sdk when the SDK backend is selected."""
+ if cursor_sdk_installed():
+ return
+ try:
+ from tools import lazy_deps
+ from tools.lazy_deps import FeatureUnavailable
+
+ lazy_deps.ensure("provider.cursor_sdk", prompt=prompt)
+ except FeatureUnavailable as exc:
+ raise RuntimeError(
+ "cursor-sdk is not installed. "
+ "Run: uv pip install cursor-sdk (or pip install 'hermes-agent[cursor]')"
+ ) from exc
+
+
+def real_api_key(api_key: str | None) -> str | None:
+ key = (api_key or os.getenv("CURSOR_API_KEY", "") or "").strip()
+ if not key or key in _API_KEY_SENTINELS:
+ return None
+ return key
+
+
+def map_hermes_mode_to_sdk(mode: str) -> str | None:
+ normalized = (mode or "agent").strip().lower()
+ if normalized == "ask":
+ return "plan"
+ if normalized in _SDK_MODES:
+ return normalized
+ return "agent"
+
+
+def resolve_cursor_backend(*, api_key: str | None = None) -> str:
+ """Return the effective backend: ``cli`` or ``sdk``."""
+ raw = os.getenv("HERMES_CURSOR_BACKEND", "").strip().lower() or DEFAULT_CURSOR_BACKEND
+ if raw not in _VALID_BACKENDS:
+ raw = DEFAULT_CURSOR_BACKEND
+ if raw == "cli":
+ return "cli"
+ if raw == "sdk":
+ ensure_cursor_sdk(prompt=False)
+ if not real_api_key(api_key):
+ raise RuntimeError(
+ "HERMES_CURSOR_BACKEND=sdk requires CURSOR_API_KEY "
+ "(Dashboard → Integrations → User API Keys)."
+ )
+ return "sdk"
+ # auto
+ if cursor_sdk_installed() and real_api_key(api_key):
+ return "sdk"
+ return "cli"
+
+
+__all__ = [
+ "cursor_sdk_installed",
+ "ensure_cursor_sdk",
+ "map_hermes_mode_to_sdk",
+ "real_api_key",
+ "resolve_cursor_backend",
+]
diff --git a/agent/cursor/cli_backend.py b/agent/cursor/cli_backend.py
new file mode 100644
index 000000000000..3b5dc54be860
--- /dev/null
+++ b/agent/cursor/cli_backend.py
@@ -0,0 +1,208 @@
+"""CLI subprocess backend for the Cursor provider."""
+
+from __future__ import annotations
+
+import json
+import queue
+import subprocess
+import threading
+import time
+from collections import deque
+from typing import Any, Callable
+
+from agent.cursor.accumulator import CursorTurnAccumulator
+from agent.cursor.constants import _CURSOR_CLI_MODES
+from agent.cursor.env import build_subprocess_env
+from agent.cursor.events import stream_json_dict_to_events
+from agent.redact import redact_sensitive_text
+
+
+def build_argv(
+ *,
+ command: str,
+ mode: str,
+ model: str,
+ workspace: str,
+ api_key: str | None,
+ extra_args: list[str],
+) -> list[str]:
+ argv = [
+ command,
+ "-p",
+ "--output-format",
+ "stream-json",
+ ]
+ if mode in _CURSOR_CLI_MODES:
+ argv.extend(["--mode", mode])
+ argv.extend(
+ [
+ "--model",
+ model,
+ "--workspace",
+ workspace,
+ "--force",
+ "--trust",
+ ]
+ )
+ if api_key:
+ argv.extend(["--api-key", api_key])
+ argv.extend(extra_args)
+ return argv
+
+
+def run_prompt_cli(
+ *,
+ command: str,
+ mode: str,
+ model: str,
+ workspace: str,
+ api_key: str | None,
+ extra_args: list[str],
+ prompt_text: str,
+ timeout_seconds: float,
+ on_tool_event: Any,
+ on_text_event: Any,
+ set_active_process: Callable[[subprocess.Popen[str] | None], None],
+ terminate_active_proc: Callable[[subprocess.Popen[str]], None],
+ mark_open: Callable[[], None],
+) -> CursorTurnAccumulator:
+ """Execute one Hermes turn via cursor-agent subprocess."""
+ argv = build_argv(
+ command=command,
+ mode=mode,
+ model=model,
+ workspace=workspace,
+ api_key=api_key,
+ extra_args=extra_args,
+ )
+
+ try:
+ proc = subprocess.Popen(
+ argv,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ bufsize=1,
+ cwd=workspace,
+ env=build_subprocess_env(api_key),
+ )
+ except FileNotFoundError as exc:
+ raise RuntimeError(
+ f"Could not start Cursor Agent CLI '{command}'. "
+ "Install Cursor CLI (https://cursor.com/dashboard/integrations) "
+ "or set HERMES_CURSOR_COMMAND / CURSOR_AGENT_PATH."
+ ) from exc
+
+ if proc.stdin is None or proc.stdout is None:
+ proc.kill()
+ raise RuntimeError("cursor-agent process did not expose stdin/stdout pipes.")
+
+ mark_open()
+ set_active_process(proc)
+
+ try:
+ stderr_tail: deque[str] = deque(maxlen=80)
+ inbox: queue.Queue[dict[str, Any]] = queue.Queue()
+
+ def _stderr_reader_early() -> None:
+ if proc.stderr is None:
+ return
+ for line in proc.stderr:
+ stderr_tail.append(line.rstrip("\n"))
+
+ err_thread = threading.Thread(target=_stderr_reader_early, daemon=True)
+ err_thread.start()
+
+ stdin_error: BaseException | None = None
+ try:
+ proc.stdin.write(prompt_text)
+ proc.stdin.flush()
+ except BrokenPipeError as exc:
+ stdin_error = exc
+ except Exception as exc: # pragma: no cover - defensive
+ stdin_error = exc
+ finally:
+ try:
+ proc.stdin.close()
+ except Exception:
+ pass
+
+ if stdin_error is not None:
+ try:
+ proc.wait(timeout=3)
+ except Exception:
+ pass
+ err_thread.join(timeout=1)
+ exit_code = getattr(proc, "returncode", None)
+ if exit_code is None:
+ try:
+ exit_code = proc.poll()
+ except Exception:
+ exit_code = None
+ stderr_text = "\n".join(stderr_tail).strip()
+ redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else ""
+ detail = f" stderr: {redacted}" if redacted else ""
+ raise RuntimeError(
+ "cursor-agent closed stdin before reading the prompt "
+ f"(exit {exit_code}).{detail}"
+ ) from stdin_error
+
+ def _stdout_reader() -> None:
+ if proc.stdout is None:
+ return
+ for line in proc.stdout:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ inbox.put(json.loads(line))
+ except Exception:
+ stderr_tail.append("[stdout-non-json] " + line)
+
+ out_thread = threading.Thread(target=_stdout_reader, daemon=True)
+ out_thread.start()
+
+ accumulator = CursorTurnAccumulator(
+ on_tool_event=on_tool_event,
+ on_text_event=on_text_event,
+ )
+ idle_seconds = float(timeout_seconds)
+ deadline = time.monotonic() + idle_seconds
+
+ while not accumulator.terminal:
+ if time.monotonic() >= deadline:
+ terminate_active_proc(proc)
+ raise TimeoutError(
+ f"cursor-agent emitted no events for {idle_seconds:.0f}s; "
+ f"presumed hung. Set HERMES_CURSOR_TIMEOUT_SECONDS to "
+ f"increase the idle threshold."
+ )
+ if proc.poll() is not None and inbox.empty():
+ break
+ try:
+ event = inbox.get(timeout=0.25)
+ except queue.Empty:
+ continue
+ deadline = time.monotonic() + idle_seconds
+ try:
+ for typed in stream_json_dict_to_events(event):
+ accumulator.feed(typed)
+ except Exception:
+ continue
+
+ if not accumulator.terminal:
+ stderr_text = "\n".join(stderr_tail).strip()
+ redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else ""
+ raise RuntimeError(
+ "cursor-agent exited before emitting a terminal result. "
+ + (f"stderr tail:\n{redacted}" if redacted else "(no stderr)")
+ )
+
+ return accumulator
+ finally:
+ terminate_active_proc(proc)
+ set_active_process(None)
+
+
+__all__ = ["build_argv", "run_prompt_cli"]
diff --git a/agent/cursor/client.py b/agent/cursor/client.py
new file mode 100644
index 000000000000..62a7dd92944f
--- /dev/null
+++ b/agent/cursor/client.py
@@ -0,0 +1,454 @@
+"""OpenAI-compatible facade for the Cursor provider (CLI or SDK transport)."""
+
+from __future__ import annotations
+
+import os
+import shutil
+import subprocess
+import tempfile
+import threading
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+from agent.copilot_acp_client import _extract_tool_calls_from_text
+from agent.cursor.accumulator import CursorTurnAccumulator
+from agent.cursor.backend import resolve_cursor_backend
+from agent.cursor.cli_backend import run_prompt_cli
+from agent.cursor.constants import (
+ CURSOR_MARKER_BASE_URL,
+ DEFAULT_CURSOR_COMMAND,
+ DEFAULT_CURSOR_MODEL,
+ DEFAULT_CURSOR_MODE,
+ _API_KEY_SENTINELS,
+ _CURSOR_CLI_MODES,
+ _DEFAULT_TIMEOUT_SECONDS,
+ _VALID_CURSOR_MODES,
+)
+from agent.cursor.env import (
+ build_subprocess_env,
+ resolve_command,
+ resolve_extra_args,
+ resolve_mode,
+ resolve_workspace_override,
+)
+from agent.cursor.prompt import format_messages_as_prompt
+from agent.cursor.sdk_backend import SdkSession, run_prompt_via_sdk
+from agent.cursor.streaming import CursorChatNamespace
+from agent.cursor.tool_events import CursorToolEvent, build_cursor_tool_preview
+
+
+class CursorAgentClient:
+ """Minimal OpenAI-client-compatible facade for Cursor (CLI or SDK)."""
+
+ def __init__(
+ self,
+ *,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ default_headers: dict[str, str] | None = None,
+ command: str | None = None,
+ args: list[str] | None = None,
+ workspace: str | None = None,
+ mode: str | None = None,
+ timeout_seconds: float | None = None,
+ tool_progress_callback: Any = None,
+ context_estimate_callback: Any = None,
+ **_: Any,
+ ):
+ candidate_key = (api_key or os.getenv("CURSOR_API_KEY", "") or "").strip()
+ self.api_key = None if candidate_key in _API_KEY_SENTINELS else candidate_key
+ self.base_url = base_url or CURSOR_MARKER_BASE_URL
+ self._default_headers = dict(default_headers or {})
+ self._command = (command or resolve_command()).strip() or DEFAULT_CURSOR_COMMAND
+ self._extra_args = list(args) if args else resolve_extra_args()
+ chosen_mode = (mode or resolve_mode()).strip().lower() or DEFAULT_CURSOR_MODE
+ if chosen_mode not in _VALID_CURSOR_MODES:
+ chosen_mode = DEFAULT_CURSOR_MODE
+ self._mode = chosen_mode
+ override = workspace or resolve_workspace_override()
+ self._workspace: str | None = override or None
+ self._timeout_seconds = float(timeout_seconds) if timeout_seconds else _DEFAULT_TIMEOUT_SECONDS
+ env_timeout = os.environ.get("HERMES_CURSOR_TIMEOUT_SECONDS", "").strip()
+ if env_timeout:
+ try:
+ env_timeout_val = float(env_timeout)
+ if env_timeout_val > 0:
+ self._timeout_seconds = env_timeout_val
+ except ValueError:
+ pass
+
+ self._tool_progress_callback = tool_progress_callback
+ self._context_estimate_callback = context_estimate_callback
+ self._context_high_water: int = 0
+ self._last_user_msg_count: int = 0
+
+ self.chat = CursorChatNamespace(self)
+ self.is_closed = False
+
+ self._active_process: subprocess.Popen[str] | None = None
+ self._active_process_lock = threading.Lock()
+ self._ephemeral_dirs: list[str] = []
+ self._dir_lock = threading.Lock()
+ self._session_workspace: str | None = None
+
+ self._sdk_session = SdkSession()
+ self._backend = resolve_cursor_backend(api_key=self.api_key)
+
+ @property
+ def backend(self) -> str:
+ """Effective transport: ``sdk`` (cursor-sdk) or ``cli`` (cursor-agent)."""
+ return getattr(self, "_backend", "cli")
+
+ def close(self) -> None:
+ proc: subprocess.Popen[str] | None
+ with self._active_process_lock:
+ proc = self._active_process
+ self._active_process = None
+ self.is_closed = True
+ sdk_session = getattr(self, "_sdk_session", None)
+ if sdk_session is not None:
+ try:
+ sdk_session.close()
+ except Exception:
+ pass
+ self._context_high_water = 0
+ if proc is not None:
+ try:
+ proc.terminate()
+ proc.wait(timeout=2)
+ except Exception:
+ try:
+ proc.kill()
+ except Exception:
+ pass
+ with self._dir_lock:
+ dirs, self._ephemeral_dirs = self._ephemeral_dirs, []
+ self._session_workspace = None
+ for d in dirs:
+ try:
+ shutil.rmtree(d, ignore_errors=True)
+ except Exception:
+ pass
+
+ def _create_chat_completion(
+ self,
+ *,
+ model: str | None = None,
+ messages: list[dict[str, Any]] | None = None,
+ timeout: float | None = None,
+ tools: list[dict[str, Any]] | None = None,
+ tool_choice: Any = None,
+ **_: Any,
+ ) -> Any:
+ try:
+ user_msg_count = sum(
+ 1 for m in (messages or []) if (m or {}).get("role") == "user"
+ )
+ except Exception:
+ user_msg_count = self._last_user_msg_count
+ is_new_user_turn = user_msg_count > self._last_user_msg_count
+ if is_new_user_turn:
+ self._context_high_water = 0
+ self._last_user_msg_count = user_msg_count
+
+ try:
+ from agent.model_metadata import estimate_request_tokens_rough
+ self._last_messages_estimate = estimate_request_tokens_rough(
+ messages or [], tools=tools or None
+ )
+ except Exception:
+ self._last_messages_estimate = 0
+
+ if self._last_messages_estimate > self._context_high_water:
+ self._context_high_water = self._last_messages_estimate
+ if callable(self._context_estimate_callback) and self._last_messages_estimate > 0:
+ try:
+ self._context_estimate_callback(
+ self._last_messages_estimate, reset=is_new_user_turn
+ )
+ except TypeError:
+ try:
+ self._context_estimate_callback(self._last_messages_estimate)
+ except Exception:
+ pass
+ except Exception:
+ pass
+
+ prompt_text = format_messages_as_prompt(
+ messages or [],
+ model=model,
+ tools=tools,
+ tool_choice=tool_choice,
+ )
+
+ if timeout is None:
+ effective_timeout = self._timeout_seconds
+ elif isinstance(timeout, (int, float)):
+ effective_timeout = float(timeout)
+ else:
+ candidates = [
+ getattr(timeout, attr, None)
+ for attr in ("read", "write", "connect", "pool", "timeout")
+ ]
+ numeric = [float(v) for v in candidates if isinstance(v, (int, float))]
+ effective_timeout = max(numeric) if numeric else self._timeout_seconds
+
+ chosen_model = (model or DEFAULT_CURSOR_MODEL).strip() or DEFAULT_CURSOR_MODEL
+
+ accumulator = self._run_prompt(
+ prompt_text=prompt_text,
+ model=chosen_model,
+ timeout_seconds=effective_timeout,
+ )
+
+ assistant_text = accumulator.synthesis_text()
+ reasoning_text = accumulator.assembled_reasoning() or None
+
+ if accumulator.is_error:
+ raise RuntimeError(
+ f"cursor-agent reported an error: {accumulator.error_message or assistant_text}"
+ )
+
+ tool_calls, cleaned_text = _extract_tool_calls_from_text(assistant_text)
+ cursor_internal_tools = [evt.to_public_dict() for evt in accumulator.tool_events]
+ cur_estimate = getattr(self, "_last_messages_estimate", 0) or 0
+ cursor_per_round = self._estimate_per_round_context(accumulator)
+ new_high = max(self._context_high_water, cur_estimate, cursor_per_round)
+ self._context_high_water = new_high
+ accumulator.messages_estimate = new_high
+ assistant_message = SimpleNamespace(
+ content=cleaned_text,
+ tool_calls=tool_calls,
+ reasoning=reasoning_text,
+ reasoning_content=reasoning_text,
+ reasoning_details=None,
+ cursor_internal_tools=cursor_internal_tools,
+ )
+ finish_reason = "tool_calls" if tool_calls else "stop"
+ choice = SimpleNamespace(
+ message=assistant_message,
+ finish_reason=finish_reason,
+ index=0,
+ )
+ return SimpleNamespace(
+ choices=[choice],
+ usage=accumulator.openai_usage(),
+ model=chosen_model,
+ id=accumulator.request_id or f"cursor-{accumulator.session_id}",
+ object="chat.completion",
+ cursor_internal_tools=cursor_internal_tools,
+ )
+
+ def _build_argv(self, *, model: str, workspace: str) -> list[str]:
+ from agent.cursor.cli_backend import build_argv
+
+ return build_argv(
+ command=self._command,
+ mode=self._mode,
+ model=model,
+ workspace=workspace,
+ api_key=self.api_key,
+ extra_args=self._extra_args,
+ )
+
+ def _allocate_workspace(self) -> tuple[str, bool]:
+ if self._workspace:
+ try:
+ Path(self._workspace).mkdir(parents=True, exist_ok=True)
+ except Exception:
+ pass
+ return self._workspace, False
+ with self._dir_lock:
+ if self._session_workspace is None:
+ tmp = tempfile.mkdtemp(prefix="hermes-cursor-")
+ self._session_workspace = tmp
+ self._ephemeral_dirs.append(tmp)
+ return self._session_workspace, True
+
+ def _run_prompt(
+ self,
+ *,
+ prompt_text: str,
+ model: str,
+ timeout_seconds: float,
+ ) -> CursorTurnAccumulator:
+ backend = getattr(self, "_backend", "cli")
+ if backend == "sdk" and self.api_key:
+ workspace, _ephemeral = self._allocate_workspace()
+ try:
+ return run_prompt_via_sdk(
+ prompt_text=prompt_text,
+ model=model,
+ api_key=self.api_key,
+ workspace=workspace,
+ mode=self._mode,
+ timeout_seconds=timeout_seconds,
+ on_tool_event=self._build_tool_event_bridge(),
+ on_text_event=self._build_text_event_bridge(),
+ sdk_session=self._sdk_session,
+ )
+ except RuntimeError as exc:
+ forced = os.getenv("HERMES_CURSOR_BACKEND", "").strip().lower()
+ if forced == "sdk":
+ raise
+ lowered = str(exc).lower()
+ if "sdk" in lowered and (
+ "preview" in lowered
+ or "not enabled" in lowered
+ or "not installed" in lowered
+ ):
+ self._backend = "cli"
+ return self._run_prompt_cli(
+ prompt_text=prompt_text,
+ model=model,
+ timeout_seconds=timeout_seconds,
+ )
+ raise
+ return self._run_prompt_cli(
+ prompt_text=prompt_text,
+ model=model,
+ timeout_seconds=timeout_seconds,
+ )
+
+ def _run_prompt_cli(
+ self,
+ *,
+ prompt_text: str,
+ model: str,
+ timeout_seconds: float,
+ ) -> CursorTurnAccumulator:
+ workspace, _ephemeral = self._allocate_workspace()
+
+ def _set_active(proc: subprocess.Popen[str] | None) -> None:
+ with self._active_process_lock:
+ self._active_process = proc
+
+ return run_prompt_cli(
+ command=self._command,
+ mode=self._mode,
+ model=model,
+ workspace=workspace,
+ api_key=self.api_key,
+ extra_args=self._extra_args,
+ prompt_text=prompt_text,
+ timeout_seconds=timeout_seconds,
+ on_tool_event=self._build_tool_event_bridge(),
+ on_text_event=self._build_text_event_bridge(),
+ set_active_process=_set_active,
+ terminate_active_proc=self._terminate_active_proc,
+ mark_open=lambda: setattr(self, "is_closed", False),
+ )
+
+ def _estimate_per_round_context(self, accumulator: CursorTurnAccumulator) -> int:
+ input_tokens_raw = int(accumulator.usage.get("inputTokens", 0))
+ cache_read_raw = int(accumulator.usage.get("cacheReadTokens", 0))
+ rounds = max(len(accumulator.tool_events) + 1, 1)
+ per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw
+ per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw
+ return per_round_cache + per_round_input
+
+ def reset_context_baseline(self) -> None:
+ self._context_high_water = 0
+
+ def _build_text_event_bridge(self) -> Any:
+ cb = self._tool_progress_callback
+ if cb is None:
+ return None
+
+ def _bridge(text: str) -> None:
+ try:
+ preview = text.strip().splitlines()[0] if text else ""
+ if len(preview) > 240:
+ preview = preview[:237] + "..."
+ if not preview:
+ return
+ cb("tool.started", "narrate", preview, {"text": text})
+ cb(
+ "tool.completed", "narrate", None, None,
+ duration=0.0, is_error=False, result=text,
+ )
+ except Exception:
+ pass
+
+ return _bridge
+
+ def _build_tool_event_bridge(self) -> Any:
+ cb = self._tool_progress_callback
+ if cb is None:
+ return None
+
+ def _bridge(stage: str, evt: CursorToolEvent) -> None:
+ try:
+ if stage == "started":
+ preview = build_cursor_tool_preview(evt)
+ cb("tool.started", evt.name, preview, evt.args)
+ elif stage == "completed":
+ if (
+ evt.lines_added is not None
+ or evt.lines_removed is not None
+ ) and isinstance(evt.args, dict):
+ evt.args["_diff_stats"] = {
+ "added": evt.lines_added or 0,
+ "removed": evt.lines_removed or 0,
+ }
+ if evt.diff_string:
+ evt.args["_diff_string"] = evt.diff_string
+ cb(
+ "tool.completed",
+ evt.name,
+ None,
+ None,
+ duration=evt.duration_ms / 1000.0,
+ is_error=evt.is_error,
+ result=evt.result_text,
+ )
+ except Exception:
+ try:
+ cb(f"tool.{stage}", evt.name, evt.result_text or "", evt.args)
+ except Exception:
+ pass
+
+ return _bridge
+
+ def _terminate_active_proc(self, proc: subprocess.Popen[str]) -> None:
+ with self._active_process_lock:
+ current = self._active_process
+ if current is proc:
+ self._active_process = None
+ if proc.poll() is not None:
+ return
+ try:
+ proc.wait(timeout=0.7)
+ return
+ except subprocess.TimeoutExpired:
+ pass
+ try:
+ proc.terminate()
+ proc.wait(timeout=1.5)
+ except Exception:
+ try:
+ proc.kill()
+ except Exception:
+ pass
+
+ def whoami(self) -> dict[str, Any]:
+ try:
+ out = subprocess.check_output(
+ [self._command, "status"],
+ text=True,
+ timeout=10,
+ env=build_subprocess_env(self.api_key),
+ )
+ except Exception:
+ return {}
+ info: dict[str, Any] = {"raw": out.strip()}
+ for line in out.splitlines():
+ line = line.strip()
+ if line.startswith("✓ Logged in as "):
+ info["email"] = line.removeprefix("✓ Logged in as ").strip()
+ info["authenticated"] = True
+ return info
+
+
+__all__ = ["CursorAgentClient"]
diff --git a/agent/cursor/constants.py b/agent/cursor/constants.py
new file mode 100644
index 000000000000..4e5163966ebf
--- /dev/null
+++ b/agent/cursor/constants.py
@@ -0,0 +1,37 @@
+"""Shared constants for the Cursor provider."""
+from __future__ import annotations
+
+CURSOR_MARKER_BASE_URL = "cursor://agent"
+DEFAULT_CURSOR_COMMAND = "cursor-agent"
+DEFAULT_CURSOR_MODE = "agent"
+DEFAULT_CURSOR_MODEL = "auto"
+
+_VALID_CURSOR_MODES = frozenset({"ask", "plan", "agent"})
+_CURSOR_CLI_MODES = frozenset({"ask", "plan"})
+_DEFAULT_TIMEOUT_SECONDS = 1800.0
+
+_API_KEY_SENTINELS = frozenset({
+ "",
+ "cursor-agent-login",
+ "cursor-cli-login",
+ "external-process",
+ "external_process",
+})
+
+DEFAULT_CURSOR_BACKEND = "auto"
+_VALID_BACKENDS = frozenset({"auto", "cli", "sdk"})
+_SDK_MODES = frozenset({"agent", "plan"})
+
+__all__ = [
+ "CURSOR_MARKER_BASE_URL",
+ "DEFAULT_CURSOR_COMMAND",
+ "DEFAULT_CURSOR_MODE",
+ "DEFAULT_CURSOR_MODEL",
+ "DEFAULT_CURSOR_BACKEND",
+ "_VALID_CURSOR_MODES",
+ "_CURSOR_CLI_MODES",
+ "_DEFAULT_TIMEOUT_SECONDS",
+ "_API_KEY_SENTINELS",
+ "_VALID_BACKENDS",
+ "_SDK_MODES",
+]
diff --git a/agent/cursor/env.py b/agent/cursor/env.py
new file mode 100644
index 000000000000..4b2c5bb9a0ec
--- /dev/null
+++ b/agent/cursor/env.py
@@ -0,0 +1,89 @@
+"""Environment and subprocess helpers for Cursor backends."""
+
+from __future__ import annotations
+
+import os
+import shlex
+
+from agent.cursor.constants import (
+ DEFAULT_CURSOR_COMMAND,
+ DEFAULT_CURSOR_MODE,
+ _VALID_CURSOR_MODES,
+)
+
+
+def resolve_command() -> str:
+ return (
+ os.getenv("HERMES_CURSOR_COMMAND", "").strip()
+ or os.getenv("CURSOR_AGENT_PATH", "").strip()
+ or DEFAULT_CURSOR_COMMAND
+ )
+
+
+def resolve_extra_args() -> list[str]:
+ raw = os.getenv("HERMES_CURSOR_ARGS", "").strip()
+ if not raw:
+ return []
+ return shlex.split(raw)
+
+
+def resolve_mode() -> str:
+ mode = os.getenv("HERMES_CURSOR_MODE", "").strip().lower() or DEFAULT_CURSOR_MODE
+ if mode not in _VALID_CURSOR_MODES:
+ mode = DEFAULT_CURSOR_MODE
+ return mode
+
+
+def resolve_workspace_override() -> str:
+ return os.getenv("HERMES_CURSOR_WORKSPACE", "").strip()
+
+
+def resolve_home_dir() -> str:
+ """Pick a stable HOME for the child process."""
+ try:
+ from hermes_constants import get_subprocess_home
+
+ profile_home = get_subprocess_home()
+ if profile_home:
+ return profile_home
+ except Exception:
+ pass
+
+ home = os.environ.get("HOME", "").strip()
+ if home:
+ return home
+
+ expanded = os.path.expanduser("~")
+ if expanded and expanded != "~":
+ return expanded
+
+ try:
+ import pwd
+
+ resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
+ if resolved:
+ return resolved
+ except Exception:
+ pass
+
+ return "/tmp"
+
+
+def build_subprocess_env(api_key: str | None) -> dict[str, str]:
+ env = os.environ.copy()
+ env["HOME"] = resolve_home_dir()
+ if api_key:
+ env["CURSOR_API_KEY"] = api_key
+ env.setdefault("NO_COLOR", "1")
+ env.setdefault("TERM", "dumb")
+ return env
+
+
+__all__ = [
+ "build_subprocess_env",
+ "resolve_command",
+ "resolve_extra_args",
+ "resolve_home_dir",
+ "resolve_mode",
+ "resolve_workspace_override",
+]
diff --git a/agent/cursor/events.py b/agent/cursor/events.py
new file mode 100644
index 000000000000..8d61a2da5625
--- /dev/null
+++ b/agent/cursor/events.py
@@ -0,0 +1,341 @@
+"""Typed turn events for Cursor CLI and SDK backends."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Mapping, Union
+
+
+@dataclass(frozen=True)
+class SystemEvent:
+ model: str = ""
+ session_id: str = ""
+
+
+@dataclass(frozen=True)
+class ThinkingEvent:
+ text: str
+
+
+@dataclass(frozen=True)
+class AssistantTextEvent:
+ text: str
+
+
+@dataclass(frozen=True)
+class ToolStartedEvent:
+ call_id: str
+ envelope_key: str
+ args: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class ToolCompletedEvent:
+ call_id: str
+ envelope_key: str
+ args: dict[str, Any]
+ result_payload: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class TurnResultEvent:
+ is_error: bool
+ result_text: str = ""
+ request_id: str = ""
+ duration_ms: int = 0
+ usage: dict[str, int] = field(default_factory=dict)
+ error_message: str = ""
+
+
+CursorTurnEvent = Union[
+ SystemEvent,
+ ThinkingEvent,
+ AssistantTextEvent,
+ ToolStartedEvent,
+ ToolCompletedEvent,
+ TurnResultEvent,
+]
+
+_TOOL_NAME_TO_ENVELOPE: dict[str, str] = {
+ "shell": "shellToolCall",
+ "read": "readToolCall",
+ "read_file": "readToolCall",
+ "list": "listToolCall",
+ "list_directory": "listToolCall",
+ "edit": "editToolCall",
+ "edit_file": "editToolCall",
+ "write": "writeToolCall",
+ "write_file": "writeToolCall",
+ "patch": "patchToolCall",
+ "grep": "grepToolCall",
+ "glob": "globToolCall",
+ "search": "searchToolCall",
+ "delete": "deleteToolCall",
+ "delete_file": "deleteToolCall",
+ "web_fetch": "fetchToolCall",
+ "fetch": "fetchToolCall",
+}
+
+
+def tool_name_to_envelope(name: str) -> str:
+ if name.endswith("ToolCall"):
+ return name
+ return _TOOL_NAME_TO_ENVELOPE.get(name.lower(), f"{name}ToolCall")
+
+
+def normalize_tool_call_envelope(tool_call: Mapping[str, Any]) -> tuple[str, dict[str, Any]]:
+ """Return ``(envelope_key, payload)`` from SDK or stream-json tool_call dict."""
+ if not tool_call:
+ return "", {}
+ for key, payload in tool_call.items():
+ if isinstance(key, str) and key.endswith("ToolCall") and isinstance(payload, Mapping):
+ return key, dict(payload)
+ name = str(tool_call.get("name") or tool_call.get("toolName") or "")
+ args = tool_call.get("args")
+ if not isinstance(args, Mapping):
+ args = tool_call.get("input") if isinstance(tool_call.get("input"), Mapping) else {}
+ envelope = tool_name_to_envelope(name or "cursor")
+ payload: dict[str, Any] = {"args": dict(args or {})}
+ if "result" in tool_call:
+ payload["result"] = tool_call["result"]
+ return envelope, payload
+
+
+def stream_json_dict_to_events(event: dict[str, Any]) -> list[CursorTurnEvent]:
+ """Translate one cursor-agent stream-json dict into typed turn events."""
+ evt_type = event.get("type")
+ if not isinstance(evt_type, str):
+ return []
+
+ if evt_type == "system":
+ model = event.get("model")
+ session = event.get("session_id")
+ return [SystemEvent(
+ model=model if isinstance(model, str) else "",
+ session_id=session if isinstance(session, str) else "",
+ )]
+
+ if evt_type == "thinking":
+ text = event.get("text")
+ if isinstance(text, str) and text:
+ return [ThinkingEvent(text=text)]
+ return []
+
+ if evt_type == "assistant":
+ out: list[CursorTurnEvent] = []
+ message = event.get("message")
+ if isinstance(message, dict):
+ content = message.get("content")
+ if isinstance(content, list):
+ for block in content:
+ if not isinstance(block, dict):
+ continue
+ if block.get("type") == "text":
+ text = block.get("text")
+ if isinstance(text, str) and text:
+ out.append(AssistantTextEvent(text=text))
+ return out
+
+ if evt_type == "tool_call":
+ sub = event.get("subtype")
+ call_id = event.get("call_id")
+ if not isinstance(call_id, str) or not call_id:
+ return []
+ tool_call = event.get("tool_call")
+ if not isinstance(tool_call, dict) or not tool_call:
+ return []
+ envelope_key = next(iter(tool_call.keys()), "")
+ payload = tool_call.get(envelope_key) if isinstance(envelope_key, str) else None
+ if not isinstance(payload, dict):
+ return []
+ args_obj = payload.get("args")
+ if not isinstance(args_obj, dict):
+ args_obj = {}
+ if sub == "started":
+ return [ToolStartedEvent(call_id=call_id, envelope_key=envelope_key, args=args_obj)]
+ if sub == "completed":
+ return [ToolCompletedEvent(
+ call_id=call_id,
+ envelope_key=envelope_key,
+ args=args_obj,
+ result_payload=payload,
+ )]
+ return []
+
+ if evt_type == "result":
+ is_error = bool(event.get("is_error", False))
+ subtype = event.get("subtype")
+ if subtype == "error":
+ is_error = True
+ duration = event.get("duration_ms")
+ duration_ms = duration if isinstance(duration, int) else 0
+ request = event.get("request_id")
+ request_id = request if isinstance(request, str) else ""
+ usage_raw = event.get("usage")
+ usage: dict[str, int] = {}
+ if isinstance(usage_raw, dict):
+ for k, v in usage_raw.items():
+ if isinstance(v, (int, float)):
+ usage[str(k)] = int(v)
+ result_text = event.get("result")
+ result_str = result_text if isinstance(result_text, str) else ""
+ error_message = ""
+ if is_error and not error_message:
+ error_message = result_str or "cursor-agent returned an error"
+ return [TurnResultEvent(
+ is_error=is_error,
+ result_text=result_str,
+ request_id=request_id,
+ duration_ms=duration_ms,
+ usage=usage,
+ error_message=error_message,
+ )]
+
+ return []
+
+
+def sdk_message_to_events(message: Any) -> list[CursorTurnEvent]:
+ """Translate one SDKMessage into typed turn events."""
+ msg_type = getattr(message, "type", None)
+ if msg_type == "system":
+ model = getattr(getattr(message, "model", None), "id", None) or ""
+ return [SystemEvent(
+ model=model,
+ session_id=getattr(message, "agent_id", "") or getattr(message, "run_id", ""),
+ )]
+ if msg_type == "thinking":
+ text = getattr(message, "text", "")
+ if text:
+ return [ThinkingEvent(text=text)]
+ return []
+ if msg_type == "assistant":
+ out: list[CursorTurnEvent] = []
+ msg = getattr(message, "message", None)
+ for block in getattr(msg, "content", ()) or ():
+ if getattr(block, "type", None) == "text":
+ text = getattr(block, "text", "")
+ if text:
+ out.append(AssistantTextEvent(text=text))
+ return out
+ if msg_type == "tool_call":
+ envelope = tool_name_to_envelope(getattr(message, "name", "") or "cursor")
+ args = getattr(message, "args", None)
+ if not isinstance(args, Mapping):
+ args = {}
+ status = str(getattr(message, "status", "") or "").lower()
+ call_id = getattr(message, "call_id", "") or ""
+ if status in {"running", "started"}:
+ return [ToolStartedEvent(call_id=call_id, envelope_key=envelope, args=dict(args))]
+ if status in {"completed", "error", "failed"}:
+ result = getattr(message, "result", None)
+ payload: dict[str, Any] = {"args": dict(args)}
+ if isinstance(result, Mapping):
+ payload["result"] = dict(result)
+ elif status == "error":
+ payload["result"] = {"error": result or "tool error"}
+ else:
+ payload["result"] = {"success": result} if result is not None else {}
+ return [ToolCompletedEvent(
+ call_id=call_id,
+ envelope_key=envelope,
+ args=dict(args),
+ result_payload=payload,
+ )]
+ return []
+ return []
+
+
+def interaction_update_to_events(update: Any) -> list[CursorTurnEvent]:
+ """Translate InteractionUpdate events into typed turn events."""
+ update_type = getattr(update, "type", None)
+ if update_type == "text-delta":
+ text = getattr(update, "text", "")
+ if text:
+ return [AssistantTextEvent(text=text)]
+ return []
+ if update_type == "thinking-delta":
+ text = getattr(update, "text", "")
+ if text:
+ return [ThinkingEvent(text=text)]
+ return []
+ if update_type == "tool-call-started":
+ tool_call = getattr(update, "tool_call", {}) or {}
+ envelope, payload = normalize_tool_call_envelope(tool_call)
+ if not envelope:
+ return []
+ args = payload.get("args")
+ if not isinstance(args, dict):
+ args = {}
+ return [ToolStartedEvent(
+ call_id=getattr(update, "call_id", "") or "",
+ envelope_key=envelope,
+ args=args,
+ )]
+ if update_type == "tool-call-completed":
+ tool_call = getattr(update, "tool_call", {}) or {}
+ envelope, payload = normalize_tool_call_envelope(tool_call)
+ if not envelope:
+ return []
+ args = payload.get("args")
+ if not isinstance(args, dict):
+ args = {}
+ return [ToolCompletedEvent(
+ call_id=getattr(update, "call_id", "") or "",
+ envelope_key=envelope,
+ args=args,
+ result_payload=payload,
+ )]
+ if update_type == "turn-ended":
+ usage = getattr(update, "usage", None)
+ if isinstance(usage, Mapping) and usage:
+ usage_dict = {str(k): int(v) for k, v in usage.items() if isinstance(v, (int, float))}
+ return [TurnResultEvent(
+ is_error=False,
+ duration_ms=int(usage.get("durationMs") or usage.get("duration_ms") or 0),
+ usage=usage_dict,
+ )]
+ return []
+ return []
+
+
+def run_stream_event_to_events(event: Any) -> list[CursorTurnEvent]:
+ """Translate a RunStreamEvent into zero or more typed turn events."""
+ kind = getattr(event, "kind", "")
+ if kind == "sdk_message" and event.sdk_message is not None:
+ return sdk_message_to_events(event.sdk_message)
+ if kind == "interaction_update" and event.interaction_update is not None:
+ return interaction_update_to_events(event.interaction_update)
+ if kind == "result" and event.result is not None:
+ payload = dict(event.result)
+ status = str(payload.get("status") or "").lower()
+ is_error = status in {"error", "failed", "cancelled", "canceled"}
+ usage = payload.get("usage")
+ if not isinstance(usage, Mapping):
+ usage = {}
+ usage_dict = {str(k): int(v) for k, v in usage.items() if isinstance(v, (int, float))}
+ return [TurnResultEvent(
+ is_error=is_error,
+ result_text=str(payload.get("result") or ""),
+ request_id=str(payload.get("runId") or payload.get("id") or ""),
+ duration_ms=int(payload.get("durationMs") or payload.get("duration_ms") or 0),
+ usage=usage_dict,
+ error_message=str(payload.get("result") or "") if is_error else "",
+ )]
+ return []
+
+
+__all__ = [
+ "AssistantTextEvent",
+ "CursorTurnEvent",
+ "SystemEvent",
+ "ThinkingEvent",
+ "ToolCompletedEvent",
+ "ToolStartedEvent",
+ "TurnResultEvent",
+ "interaction_update_to_events",
+ "normalize_tool_call_envelope",
+ "run_stream_event_to_events",
+ "sdk_message_to_events",
+ "stream_json_dict_to_events",
+ "tool_name_to_envelope",
+]
diff --git a/agent/cursor/prompt.py b/agent/cursor/prompt.py
new file mode 100644
index 000000000000..8ecc3bcf5b0b
--- /dev/null
+++ b/agent/cursor/prompt.py
@@ -0,0 +1,151 @@
+"""Prompt formatting shared by CLI and SDK Cursor backends."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from agent.copilot_acp_client import _render_message_content
+
+
+def format_messages_as_prompt(
+ messages: list[dict[str, Any]],
+ model: str | None = None,
+ tools: list[dict[str, Any]] | None = None,
+ tool_choice: Any = None,
+) -> str:
+ """Build the prompt sent to cursor-agent stdin or SDK ``send()``."""
+ sections: list[str] = []
+ has_tools = bool(tools)
+ if has_tools:
+ sections.extend([
+ "You are powering a chat session inside Hermes Agent.",
+ "You have TWO sets of tools available:",
+ "(A) Your own built-in cursor-agent tools (shell, read_file, "
+ "edit_file, write_file, list_directory, grep, glob, web_fetch). "
+ "Use these DIRECTLY for filesystem/shell/search work — they run "
+ "on the real workspace, are fast, and Hermes will surface their "
+ "results to the user automatically.",
+ "(B) Hermes-side tools listed in the schema below. They cover "
+ "capabilities your built-in tools do NOT have (skills, MCP "
+ "servers, browser automation, remote APIs, etc.). To invoke "
+ "one of THESE, emit a "
+ "{...} block in OpenAI function-call "
+ "shape: "
+ '{"id":"call_","type":"function",'
+ '"function":{"name":"","arguments":""}}. '
+ "``arguments`` MUST be a JSON STRING (escaped), not a nested "
+ "object.",
+ "RULES:",
+ "1. Prefer your built-in tools for any shell command, file "
+ "read/write/list/edit, grep, or glob operation — they're "
+ "faster than round-tripping through Hermes. CRITICAL for "
+ "file creation/modification: ALWAYS use the ``write`` or "
+ "``edit`` built-in tools, NEVER ``shell`` with ``echo > "
+ "file`` / ``cat > file`` / ``sed -i`` / ``>>``. Only the "
+ "write/edit tools report ``linesAdded`` / ``linesRemoved`` "
+ "/ ``diffString`` to the harness, which is what Hermes "
+ "renders as the colored ``+``/``-`` diff in the UI. Shell "
+ "redirections create the file but the user sees no diff "
+ "and has no idea what changed.",
+ "2. Only emit blocks for tools listed in the "
+ "schema below; do NOT invent tool names. Multiple tool_calls "
+ "per turn are allowed.",
+ "3. Work iteratively (ReAct-style): before each tool batch, "
+ "emit ONE short line of plain text saying what you're about "
+ "to check and why. After tool results come back, briefly "
+ "reflect on what you found before deciding the next step. "
+ "Hermes surfaces these intermediate lines to the user as "
+ "live narration so they can follow your reasoning.",
+ "4. Don't dump every tool call upfront — chain them: think, "
+ "tool, reflect, tool, reflect, ... then synthesise the final "
+ "answer at the end. If the task genuinely is independent "
+ "lookups, parallel tool calls in one batch are fine.",
+ "5. If no tool is needed (pure conversation, math, "
+ "summarising content already in the transcript), answer as "
+ "plain text.",
+ "6. Never hallucinate file contents or command output — if "
+ "you say \"Reading the file…\" you MUST actually run the "
+ "read_file (built-in) or emit a if it's a "
+ "Hermes-specific tool.",
+ "7. The Hermes UI already shows file edits to the user as a "
+ "colored +/- diff right next to each ``edit`` / ``write`` "
+ "tool call (and tool calls + diffs are streamed live). Do "
+ "NOT re-dump the before/after content or paste the diff "
+ "again in your final response — just confirm what was "
+ "changed at a high level (e.g. \"updated foo.py to fix the "
+ "off-by-one\"). Same for shell output: it's already visible.",
+ ])
+ else:
+ sections.append(
+ "Hermes auxiliary call. Answer the user message below directly "
+ "and concisely; do not run any tools, do not write files, do "
+ "not ask follow-up questions. Plain-text reply only."
+ )
+ if model:
+ sections.append(f"Hermes requested model hint: {model}")
+
+ if isinstance(tools, list) and tools:
+ tool_specs: list[dict[str, Any]] = []
+ for t in tools:
+ if not isinstance(t, dict):
+ continue
+ fn = t.get("function") or {}
+ if not isinstance(fn, dict):
+ continue
+ name = fn.get("name")
+ if not isinstance(name, str) or not name.strip():
+ continue
+ tool_specs.append(
+ {
+ "name": name.strip(),
+ "description": fn.get("description", ""),
+ "parameters": fn.get("parameters", {}),
+ }
+ )
+ if tool_specs:
+ sections.append(
+ "Hermes-side tools (OpenAI function schema). Emit "
+ "{...} blocks to invoke these. "
+ "For plain shell / file / grep / glob actions prefer your "
+ "own built-in tools instead (they're faster).\n"
+ + json.dumps(tool_specs, ensure_ascii=False)
+ )
+
+ if tool_choice is not None:
+ sections.append(
+ f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
+ )
+
+ transcript: list[str] = []
+ for message in messages:
+ if not isinstance(message, dict):
+ continue
+ role = str(message.get("role") or "unknown").strip().lower()
+ if role == "tool":
+ role = "tool"
+ elif role not in {"system", "user", "assistant"}:
+ role = "context"
+
+ content = message.get("content")
+ rendered = _render_message_content(content)
+ if not rendered:
+ continue
+
+ label = {
+ "system": "System",
+ "user": "User",
+ "assistant": "Assistant",
+ "tool": "Tool",
+ "context": "Context",
+ }.get(role, role.title())
+ transcript.append(f"{label}:\n{rendered}")
+
+ if transcript:
+ sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
+
+ sections.append("Continue the conversation from the latest user request.")
+ return "\n\n".join(section.strip() for section in sections if section and section.strip())
+
+
+__all__ = ["format_messages_as_prompt"]
diff --git a/agent/cursor/sdk_backend.py b/agent/cursor/sdk_backend.py
new file mode 100644
index 000000000000..caa8921a5272
--- /dev/null
+++ b/agent/cursor/sdk_backend.py
@@ -0,0 +1,154 @@
+"""cursor-sdk backend for the Cursor provider."""
+
+from __future__ import annotations
+
+import time
+from typing import Any
+
+from agent.cursor.accumulator import CursorTurnAccumulator
+from agent.cursor.backend import ensure_cursor_sdk, map_hermes_mode_to_sdk
+from agent.cursor.events import TurnResultEvent, run_stream_event_to_events
+
+
+def _finalize_terminal_result(accumulator: CursorTurnAccumulator, result: Any) -> None:
+ if accumulator.terminal:
+ return
+ status = str(getattr(result, "status", "") or "").lower()
+ is_error = status in {"error", "failed", "cancelled", "canceled"}
+ accumulator.feed(TurnResultEvent(
+ is_error=is_error,
+ result_text=str(getattr(result, "result", "") or ""),
+ request_id=str(getattr(result, "id", "") or ""),
+ duration_ms=int(getattr(result, "duration_ms", 0) or 0),
+ usage={},
+ error_message=str(getattr(result, "result", "") or "") if is_error else "",
+ ))
+
+
+class SdkSession:
+ """Reused SDK bridge client scoped to one Hermes chat session."""
+
+ def __init__(self) -> None:
+ self._client: Any = None
+ self._workspace: str | None = None
+
+ def get_client(self, *, workspace: str, api_key: str) -> Any:
+ ensure_cursor_sdk(prompt=False)
+ from cursor_sdk import CursorClient
+
+ if self._client is not None and self._workspace == workspace:
+ return self._client
+ self.close()
+ self._client = CursorClient.launch_bridge(
+ workspace=workspace,
+ allow_api_key_env_fallback=False,
+ )
+ self._workspace = workspace
+ return self._client
+
+ def close(self) -> None:
+ client = self._client
+ self._client = None
+ self._workspace = None
+ if client is None:
+ return
+ try:
+ client.close()
+ except Exception:
+ pass
+
+
+def run_prompt_via_sdk(
+ *,
+ prompt_text: str,
+ model: str,
+ api_key: str,
+ workspace: str,
+ mode: str,
+ timeout_seconds: float,
+ on_tool_event: Any,
+ on_text_event: Any,
+ sdk_session: SdkSession,
+) -> CursorTurnAccumulator:
+ """Execute one Hermes turn via cursor-sdk; return a populated accumulator."""
+ ensure_cursor_sdk(prompt=False)
+ from cursor_sdk import Agent, AgentOptions, LocalAgentOptions
+ from cursor_sdk.errors import CursorAgentError, IntegrationNotConnectedError
+
+ sdk_mode = map_hermes_mode_to_sdk(mode)
+ client = sdk_session.get_client(workspace=workspace, api_key=api_key)
+ options = AgentOptions(
+ model=model,
+ api_key=api_key,
+ mode=sdk_mode,
+ local=LocalAgentOptions(cwd=workspace),
+ )
+ accumulator = CursorTurnAccumulator(
+ on_tool_event=on_tool_event,
+ on_text_event=on_text_event,
+ )
+ idle_seconds = float(timeout_seconds)
+ deadline = time.monotonic() + idle_seconds
+
+ agent = Agent.create(options, client=client)
+ try:
+ run = agent.send(prompt_text)
+ for event in run.events():
+ deadline = time.monotonic() + idle_seconds
+ for typed in run_stream_event_to_events(event):
+ accumulator.feed(typed)
+ if accumulator.terminal:
+ break
+ if accumulator.terminal:
+ break
+ if time.monotonic() >= deadline:
+ if run.supports("cancel"):
+ run.cancel()
+ raise TimeoutError(
+ f"cursor-sdk emitted no events for {idle_seconds:.0f}s; "
+ f"presumed hung. Set HERMES_CURSOR_TIMEOUT_SECONDS to "
+ f"increase the idle threshold."
+ )
+
+ result = run.wait()
+ if str(getattr(result, "status", "") or "").lower() == "error":
+ accumulator.feed(TurnResultEvent(
+ is_error=True,
+ result_text=str(getattr(result, "result", "") or "cursor-sdk run failed"),
+ request_id=str(getattr(result, "id", "") or ""),
+ duration_ms=int(getattr(result, "duration_ms", 0) or 0),
+ usage={},
+ error_message=str(getattr(result, "result", "") or "cursor-sdk run failed"),
+ ))
+ elif not accumulator.terminal:
+ _finalize_terminal_result(accumulator, result)
+
+ if accumulator.is_error:
+ raise RuntimeError(
+ f"cursor-sdk reported an error: {accumulator.error_message or result.result}"
+ )
+ return accumulator
+ except IntegrationNotConnectedError as exc:
+ raise RuntimeError(
+ "cursor-sdk access is not enabled for this account "
+ "(sdk_python_preview_access). Set HERMES_CURSOR_BACKEND=cli or "
+ "generate a User API Key once SDK access is granted."
+ ) from exc
+ except CursorAgentError:
+ raise
+ finally:
+ try:
+ agent.close()
+ except Exception:
+ pass
+
+
+# Backward-compat alias.
+_SdkSession = SdkSession
+
+
+__all__ = [
+ "SdkSession",
+ "_SdkSession",
+ "run_prompt_via_sdk",
+]
diff --git a/agent/cursor/streaming.py b/agent/cursor/streaming.py
new file mode 100644
index 000000000000..d0e447b2a55c
--- /dev/null
+++ b/agent/cursor/streaming.py
@@ -0,0 +1,159 @@
+"""OpenAI-style streaming shims for the Cursor provider facade."""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from agent.cursor.client import CursorAgentClient
+
+
+class CursorChatCompletions:
+ def __init__(self, client: "CursorAgentClient"):
+ self._client = client
+
+ def create(self, **kwargs: Any) -> Any:
+ stream_requested = bool(kwargs.pop("stream", False))
+ kwargs.pop("stream_options", None)
+ response = self._client._create_chat_completion(**kwargs)
+ if not stream_requested:
+ return response
+ return synthesise_stream_chunks(response)
+
+
+class CursorChatNamespace:
+ def __init__(self, client: "CursorAgentClient"):
+ self.completions = CursorChatCompletions(client)
+
+
+def synthesise_stream_chunks(response: Any):
+ """Yield OpenAI-style streaming chunks from a non-streaming response."""
+ try:
+ choice = response.choices[0]
+ except Exception:
+ return
+
+ message = getattr(choice, "message", None)
+ if message is None:
+ return
+
+ role = "assistant"
+ content = getattr(message, "content", "") or ""
+ tool_calls = getattr(message, "tool_calls", None) or []
+ reasoning = getattr(message, "reasoning", None)
+ reasoning_content = getattr(message, "reasoning_content", None)
+ finish_reason = getattr(choice, "finish_reason", "stop")
+ model = getattr(response, "model", "cursor")
+ usage = getattr(response, "usage", None)
+
+ if reasoning_content:
+ yield SimpleNamespace(
+ choices=[
+ SimpleNamespace(
+ delta=SimpleNamespace(
+ role=role,
+ content=None,
+ tool_calls=None,
+ reasoning=None,
+ reasoning_content=reasoning_content,
+ ),
+ finish_reason=None,
+ index=0,
+ )
+ ],
+ model=model,
+ usage=None,
+ )
+ elif reasoning:
+ yield SimpleNamespace(
+ choices=[
+ SimpleNamespace(
+ delta=SimpleNamespace(
+ role=role,
+ content=None,
+ tool_calls=None,
+ reasoning=reasoning,
+ reasoning_content=None,
+ ),
+ finish_reason=None,
+ index=0,
+ )
+ ],
+ model=model,
+ usage=None,
+ )
+
+ if content:
+ yield SimpleNamespace(
+ choices=[
+ SimpleNamespace(
+ delta=SimpleNamespace(
+ role=role,
+ content=content,
+ tool_calls=None,
+ reasoning=None,
+ reasoning_content=None,
+ ),
+ finish_reason=None,
+ index=0,
+ )
+ ],
+ model=model,
+ usage=None,
+ )
+
+ if tool_calls:
+ for i, tc in enumerate(tool_calls):
+ yield SimpleNamespace(
+ choices=[
+ SimpleNamespace(
+ delta=SimpleNamespace(
+ role=role,
+ content=None,
+ tool_calls=[
+ SimpleNamespace(
+ index=i,
+ id=getattr(tc, "id", f"call_{i}"),
+ type="function",
+ function=SimpleNamespace(
+ name=getattr(tc.function, "name", ""),
+ arguments=getattr(tc.function, "arguments", ""),
+ ),
+ )
+ ],
+ reasoning=None,
+ reasoning_content=None,
+ ),
+ finish_reason=None,
+ index=0,
+ )
+ ],
+ model=model,
+ usage=None,
+ )
+
+ yield SimpleNamespace(
+ choices=[
+ SimpleNamespace(
+ delta=SimpleNamespace(
+ role=None,
+ content=None,
+ tool_calls=None,
+ reasoning=None,
+ reasoning_content=None,
+ ),
+ finish_reason=finish_reason,
+ index=0,
+ )
+ ],
+ model=model,
+ usage=usage,
+ )
+
+
+__all__ = [
+ "CursorChatCompletions",
+ "CursorChatNamespace",
+ "synthesise_stream_chunks",
+]
diff --git a/agent/cursor/tool_events.py b/agent/cursor/tool_events.py
new file mode 100644
index 000000000000..86ca8a5594dd
--- /dev/null
+++ b/agent/cursor/tool_events.py
@@ -0,0 +1,163 @@
+"""Cursor internal tool event types and helpers."""
+
+from __future__ import annotations
+
+import json
+import time
+from typing import Any
+
+
+def build_cursor_tool_preview(evt: "CursorToolEvent") -> str:
+ """Compact one-line description of a cursor tool call for the UI."""
+ args = evt.args or {}
+ try:
+ if evt.envelope_key == "shellToolCall":
+ cmd = args.get("command") or args.get("cmd")
+ if isinstance(cmd, list):
+ cmd = " ".join(str(part) for part in cmd)
+ if isinstance(cmd, str) and cmd.strip():
+ return cmd.strip()[:200]
+ if evt.envelope_key in (
+ "readToolCall",
+ "editToolCall",
+ "writeToolCall",
+ "patchToolCall",
+ "deleteToolCall",
+ ):
+ path = (
+ args.get("path")
+ or args.get("file")
+ or args.get("filePath")
+ or args.get("filename")
+ or args.get("target_file")
+ or args.get("targetFile")
+ or args.get("file_path")
+ or args.get("relative_workspace_path")
+ or ""
+ )
+ if isinstance(path, str) and path.strip():
+ return path.strip()[:200]
+ if evt.envelope_key == "globToolCall":
+ pat = args.get("globPattern") or args.get("pattern") or ""
+ target = args.get("targetDirectory") or args.get("path") or ""
+ label = " in ".join(p for p in (pat, target) if isinstance(p, str) and p.strip())
+ if label:
+ return label[:200]
+ if evt.envelope_key in ("grepToolCall", "searchToolCall"):
+ pat = args.get("pattern") or args.get("query") or args.get("regex") or ""
+ target = args.get("path") or args.get("targetDirectory") or ""
+ if isinstance(pat, str) and pat.strip():
+ if isinstance(target, str) and target.strip():
+ return f"{pat} in {target}"[:200]
+ return pat.strip()[:200]
+ if isinstance(target, str) and target.strip():
+ return target.strip()[:200]
+ if evt.envelope_key == "listToolCall":
+ path = args.get("path") or args.get("directory") or args.get("targetDirectory") or ""
+ if isinstance(path, str) and path.strip():
+ return path.strip()[:200]
+ return json.dumps(args, ensure_ascii=False)[:200]
+ except Exception:
+ return ""
+
+
+def normalize_cursor_tool_name(envelope_key: str) -> str:
+ """Map cursor's ``ToolCall`` keys to Hermes tool names."""
+ if not isinstance(envelope_key, str):
+ return "cursor_tool"
+ suffix = "ToolCall"
+ base = envelope_key[: -len(suffix)] if envelope_key.endswith(suffix) else envelope_key
+ if not base:
+ return "cursor_tool"
+ return {
+ "shell": "shell",
+ "read": "read_file",
+ "list": "list_directory",
+ "edit": "edit_file",
+ "write": "write_file",
+ "patch": "patch",
+ "grep": "grep",
+ "glob": "glob",
+ "search": "search",
+ "todo": "todo",
+ "delete": "delete_file",
+ "task": "task",
+ "fetch": "web_fetch",
+ }.get(base.lower(), base)
+
+
+def summarise_cursor_tool_result(envelope_key: str, payload: dict[str, Any]) -> str:
+ """Return a compact human-readable result string for the UI / log."""
+ result = payload.get("result")
+ if not isinstance(result, dict):
+ return ""
+ success = result.get("success")
+ if not isinstance(success, dict):
+ if "error" in result and isinstance(result["error"], (str, dict)):
+ return f"error: {result['error']}"[:400]
+ return ""
+ try:
+ if envelope_key == "shellToolCall":
+ stdout = success.get("stdout") or ""
+ return stdout if isinstance(stdout, str) else json.dumps(stdout)
+ if envelope_key == "readToolCall":
+ content = success.get("content") or ""
+ total = success.get("totalLines")
+ if total is not None:
+ return f"({total} lines)\n{content}" if content else f"({total} lines)"
+ return content if isinstance(content, str) else json.dumps(content)
+ if envelope_key in ("listToolCall", "globToolCall"):
+ files = success.get("files") or success.get("entries") or []
+ if isinstance(files, list):
+ return "\n".join(str(f) for f in files[:200])
+ return json.dumps(success, ensure_ascii=False)[:1000]
+ except Exception:
+ return ""
+
+
+class CursorToolEvent:
+ """A captured cursor-agent tool invocation (started + completed states)."""
+
+ __slots__ = (
+ "call_id", "envelope_key", "name", "args", "started_at",
+ "completed_at", "result_text", "is_error", "duration_ms",
+ "lines_added", "lines_removed", "diff_string",
+ )
+
+ def __init__(self, call_id: str, envelope_key: str, args: dict[str, Any]) -> None:
+ self.call_id = call_id
+ self.envelope_key = envelope_key
+ self.name = normalize_cursor_tool_name(envelope_key)
+ self.args = args
+ self.started_at = time.monotonic()
+ self.completed_at: float | None = None
+ self.result_text: str = ""
+ self.is_error: bool = False
+ self.duration_ms: int = 0
+ self.lines_added: int | None = None
+ self.lines_removed: int | None = None
+ self.diff_string: str = ""
+
+ def to_public_dict(self) -> dict[str, Any]:
+ return {
+ "id": self.call_id,
+ "name": self.name,
+ "envelope": self.envelope_key,
+ "arguments": self.args,
+ "result": self.result_text,
+ "is_error": self.is_error,
+ "duration_ms": self.duration_ms,
+ }
+
+
+# Backward-compat alias for tests and internal imports.
+_CursorToolEvent = CursorToolEvent
+
+
+__all__ = [
+ "CursorToolEvent",
+ "_CursorToolEvent",
+ "build_cursor_tool_preview",
+ "normalize_cursor_tool_name",
+ "summarise_cursor_tool_result",
+]
diff --git a/agent/cursor_agent_client.py b/agent/cursor_agent_client.py
index 7a3926ac7a3c..218740c5d5d0 100644
--- a/agent/cursor_agent_client.py
+++ b/agent/cursor_agent_client.py
@@ -1,1748 +1,31 @@
-"""OpenAI-compatible facade that forwards Hermes requests to ``cursor-agent``.
+"""OpenAI-compatible facade that forwards Hermes requests to Cursor (CLI or SDK).
-This adapter lets Hermes treat the Cursor Agent CLI as a chat-style backend so
-every Cursor user (Hobby/Pro/Pro+/Ultra/Teams) can route Hermes calls through
-their existing Cursor subscription / API credits.
-
-Per request we spawn ``cursor-agent -p`` with ``--output-format stream-json``,
-pass the formatted conversation as the prompt (via stdin to avoid the argv
-length limit), then parse the line-delimited JSON events into a single OpenAI
-chat-completion response.
-
-Design notes:
-
-- One subprocess per request (no shared long-running session). Warm sessions
- via ``--resume`` are an opt-in path documented below.
-- Default ``--mode ask`` keeps Cursor read-only — useful when we just want the
- model as an LLM rather than letting it edit files.
-- Default workspace is an ephemeral temp dir so the agent never sees the
- caller's repo. Override via ``HERMES_CURSOR_WORKSPACE`` or the ``workspace``
- ctor arg.
-- Tool calls follow the Copilot-ACP convention: tools are described in the
- system prompt and the model emits ``{...}`` blocks
- that we lift back into OpenAI ``tool_calls``.
-- The CLI auth (``cursor-agent login`` or ``CURSOR_API_KEY``) is what governs
- identity; we forward ``CURSOR_API_KEY`` to the subprocess and let the CLI
- resolve it (same as the IDE does).
+This module re-exports the :mod:`agent.cursor` package for backward compatibility.
+New code should import from ``agent.cursor`` directly.
"""
from __future__ import annotations
-import json
-import os
-import queue
-import shlex
-import shutil
-import subprocess
-import tempfile
-import threading
-import time
-from collections import deque
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-from agent.redact import redact_sensitive_text
-
-CURSOR_MARKER_BASE_URL = "cursor://agent"
-DEFAULT_CURSOR_COMMAND = "cursor-agent"
-# ``agent`` matches cursor-agent's own default permissionMode (the
-# behavior you get from ``cursor-agent -p`` with no ``--mode`` flag).
-# This is what a user picking ``cursor`` in ``hermes model`` will expect:
-# the same write/edit/shell power they'd have from the cursor CLI directly.
-# Users who want read-only behavior set ``HERMES_CURSOR_MODE=ask`` (or
-# ``plan``); Hermes' own ``approvals.mode`` config additionally gates any
-# tool execution (manual / smart / off) on top of this, identical to every
-# other provider.
-DEFAULT_CURSOR_MODE = "agent"
-DEFAULT_CURSOR_MODEL = "auto"
-
-# cursor-agent CLI accepts only ``ask`` and ``plan`` for ``--mode`` today,
-# but ``-p/--print`` *without* ``--mode`` runs in the full-capability
-# ``default`` permissionMode (write+shell+everything). We expose that as
-# the synthetic ``agent`` value here:
-# - ``ask`` : read-only Q&A. Cursor's built-in mutation tools are
-# disabled. Hermes-side tools still apply for any work
-# that needs to touch the user's disk.
-# - ``plan`` : read-only planning mode. Produces structured plan output
-# from Cursor's planner.
-# - ``agent``: omits ``--mode`` so Cursor runs in its IDE-equivalent
-# "default" permissionMode — built-in shell, write, edit,
-# read, etc. all active. Use this when you want Cursor to
-# drive multi-step work end-to-end (it will still emit
-# tool_call events that we surface to Hermes UI).
-# Anything else falls back to ``ask``. Don't add new values without
-# re-checking ``cursor-agent --help``; passing an unknown ``--mode``
-# value causes a hard-crash BrokenPipe with a confusing
-# "Allowed choices are plan, ask." stderr.
-_VALID_CURSOR_MODES = frozenset({"ask", "plan", "agent"})
-_CURSOR_CLI_MODES = frozenset({"ask", "plan"})
-# Idle threshold (not wall-clock): the deadline resets on every stream-json
-# event from cursor-agent. A turn can legitimately run for much longer than
-# this in total wall-clock; what matters is that events keep arriving. If
-# nothing arrives for this long, the subprocess is presumed hung and is
-# force-killed with a clear TimeoutError. Override via
-# ``HERMES_CURSOR_TIMEOUT_SECONDS`` env var. Default is 30 minutes; cursor-
-# agent's own internal shell ceiling is 10 minutes so a single shell call
-# can chew up that much idle time, and chained internal operations (deep
-# greps, large reads after a long shell) routinely push past 15 minutes
-# without emitting events. 30 min gives comfortable headroom while still
-# catching genuine hangs.
-_DEFAULT_TIMEOUT_SECONDS = 1800.0
-
-# Sentinels that mean "no real api key — use the cursor-agent CLI's own login
-# session". Hermes's external_process auth path injects these as placeholders;
-# forwarding them to ``cursor-agent --api-key`` makes the CLI reject the
-# request and close stdin, manifesting upstream as ``BrokenPipeError``.
-_API_KEY_SENTINELS = frozenset({
- "",
- "cursor-agent-login",
- "cursor-cli-login",
- "external-process",
- "external_process",
-})
-
-# Reuse the tool-call extraction grammar from copilot_acp_client. We do NOT
-# reuse its prompt builder — cursor's model is itself an agentic CLI with its
-# own built-in shell/edit/read tools, and the softer ACP wording ("ACP agent
-# backend, use ACP capabilities") makes it prefer those built-ins and run the
-# work internally, leaving Hermes' tool surface unused. Cursor needs an
-# explicit "you are JUST the LLM, do not execute anything yourself" directive
-# (see ``_format_messages_as_prompt`` below).
-import json as _json # noqa: E402
-
-from agent.copilot_acp_client import ( # noqa: E402
- _extract_tool_calls_from_text,
- _render_message_content,
+from agent.cursor.accumulator import CursorTurnAccumulator, _StreamJsonAccumulator
+from agent.cursor.backend import cursor_sdk_installed, resolve_cursor_backend
+from agent.cursor.client import CursorAgentClient
+from agent.cursor.constants import (
+ CURSOR_MARKER_BASE_URL,
+ DEFAULT_CURSOR_COMMAND,
+ DEFAULT_CURSOR_MODEL,
+ DEFAULT_CURSOR_MODE,
+)
+from agent.cursor.env import (
+ build_subprocess_env as _build_subprocess_env,
+ resolve_command as _resolve_command,
+)
+from agent.cursor.prompt import format_messages_as_prompt as _format_messages_as_prompt
+from agent.cursor.sdk_backend import SdkSession as _SdkSession, run_prompt_via_sdk
+from agent.cursor.tool_events import (
+ CursorToolEvent as _CursorToolEvent,
+ build_cursor_tool_preview as _build_cursor_tool_preview,
+ normalize_cursor_tool_name as _normalize_cursor_tool_name,
)
-
-
-def _format_messages_as_prompt(
- messages: list[dict[str, Any]],
- model: str | None = None,
- tools: list[dict[str, Any]] | None = None,
- tool_choice: Any = None,
-) -> str:
- """Build the prompt sent to ``cursor-agent`` stdin.
-
- Key differences vs. the copilot-acp formatter:
-
- * Hard "you are the LLM, NOT an agent" framing — without this, cursor's
- built-in shell/edit/read tools intercept the request and the agentic
- loop runs entirely inside ``cursor-agent``, so Hermes never sees a
- ``tool_calls`` response (the symptom: chat sessions show 0 tool
- calls even though tools are advertised).
- * Explicit "do NOT run ls/cat/edit yourself" line — empirically required
- to push cursor's model past its default "I'll just do it" reflex.
- * Tool-call grammar identical to copilot-acp so the ``{...}
- `` extractor we share keeps working.
- """
- # Auxiliary-style calls (title generation, compression, vision,
- # mcp router, etc.) come in with NO ``tools`` and just a system+user
- # pair. They want a short, direct response — slapping the full
- # "you are an agent backend, emit tool_call blocks" preamble on top
- # of them makes cursor's harness reply with a verbose multi-paragraph
- # answer or even crash on the formatting constraints. So we keep
- # the heavy preamble only for the agentic chat path (tools provided).
- sections: list[str] = []
- has_tools = bool(tools)
- if has_tools:
- sections.extend([
- "You are powering a chat session inside Hermes Agent.",
- "You have TWO sets of tools available:",
- "(A) Your own built-in cursor-agent tools (shell, read_file, "
- "edit_file, write_file, list_directory, grep, glob, web_fetch). "
- "Use these DIRECTLY for filesystem/shell/search work — they run "
- "on the real workspace, are fast, and Hermes will surface their "
- "results to the user automatically.",
- "(B) Hermes-side tools listed in the schema below. They cover "
- "capabilities your built-in tools do NOT have (skills, MCP "
- "servers, browser automation, remote APIs, etc.). To invoke "
- "one of THESE, emit a "
- "{...} block in OpenAI function-call "
- "shape: "
- '{"id":"call_","type":"function",'
- '"function":{"name":"","arguments":""}}. '
- "``arguments`` MUST be a JSON STRING (escaped), not a nested "
- "object.",
- "RULES:",
- "1. Prefer your built-in tools for any shell command, file "
- "read/write/list/edit, grep, or glob operation — they're "
- "faster than round-tripping through Hermes. CRITICAL for "
- "file creation/modification: ALWAYS use the ``write`` or "
- "``edit`` built-in tools, NEVER ``shell`` with ``echo > "
- "file`` / ``cat > file`` / ``sed -i`` / ``>>``. Only the "
- "write/edit tools report ``linesAdded`` / ``linesRemoved`` "
- "/ ``diffString`` to the harness, which is what Hermes "
- "renders as the colored ``+``/``-`` diff in the UI. Shell "
- "redirections create the file but the user sees no diff "
- "and has no idea what changed.",
- "2. Only emit blocks for tools listed in the "
- "schema below; do NOT invent tool names. Multiple tool_calls "
- "per turn are allowed.",
- "3. Work iteratively (ReAct-style): before each tool batch, "
- "emit ONE short line of plain text saying what you're about "
- "to check and why. After tool results come back, briefly "
- "reflect on what you found before deciding the next step. "
- "Hermes surfaces these intermediate lines to the user as "
- "live narration so they can follow your reasoning.",
- "4. Don't dump every tool call upfront — chain them: think, "
- "tool, reflect, tool, reflect, ... then synthesise the final "
- "answer at the end. If the task genuinely is independent "
- "lookups, parallel tool calls in one batch are fine.",
- "5. If no tool is needed (pure conversation, math, "
- "summarising content already in the transcript), answer as "
- "plain text.",
- "6. Never hallucinate file contents or command output — if "
- "you say \"Reading the file…\" you MUST actually run the "
- "read_file (built-in) or emit a if it's a "
- "Hermes-specific tool.",
- "7. The Hermes UI already shows file edits to the user as a "
- "colored +/- diff right next to each ``edit`` / ``write`` "
- "tool call (and tool calls + diffs are streamed live). Do "
- "NOT re-dump the before/after content or paste the diff "
- "again in your final response — just confirm what was "
- "changed at a high level (e.g. \"updated foo.py to fix the "
- "off-by-one\"). Same for shell output: it's already visible.",
- ])
- else:
- # Lite preamble for aux calls — just enough to keep cursor's
- # harness from running its own tools / writing files / asking
- # clarifying questions when all we want is a single short reply.
- sections.append(
- "Hermes auxiliary call. Answer the user message below directly "
- "and concisely; do not run any tools, do not write files, do "
- "not ask follow-up questions. Plain-text reply only."
- )
- if model:
- sections.append(f"Hermes requested model hint: {model}")
-
- if isinstance(tools, list) and tools:
- tool_specs: list[dict[str, Any]] = []
- for t in tools:
- if not isinstance(t, dict):
- continue
- fn = t.get("function") or {}
- if not isinstance(fn, dict):
- continue
- name = fn.get("name")
- if not isinstance(name, str) or not name.strip():
- continue
- tool_specs.append(
- {
- "name": name.strip(),
- "description": fn.get("description", ""),
- "parameters": fn.get("parameters", {}),
- }
- )
- if tool_specs:
- sections.append(
- "Hermes-side tools (OpenAI function schema). Emit "
- "{...} blocks to invoke these. "
- "For plain shell / file / grep / glob actions prefer your "
- "own built-in tools instead (they're faster).\n"
- + _json.dumps(tool_specs, ensure_ascii=False)
- )
-
- if tool_choice is not None:
- sections.append(
- f"Tool choice hint: {_json.dumps(tool_choice, ensure_ascii=False)}"
- )
-
- transcript: list[str] = []
- for message in messages:
- if not isinstance(message, dict):
- continue
- role = str(message.get("role") or "unknown").strip().lower()
- if role == "tool":
- role = "tool"
- elif role not in {"system", "user", "assistant"}:
- role = "context"
-
- content = message.get("content")
- rendered = _render_message_content(content)
- if not rendered:
- continue
-
- label = {
- "system": "System",
- "user": "User",
- "assistant": "Assistant",
- "tool": "Tool",
- "context": "Context",
- }.get(role, role.title())
- transcript.append(f"{label}:\n{rendered}")
-
- if transcript:
- sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
-
- sections.append("Continue the conversation from the latest user request.")
- return "\n\n".join(section.strip() for section in sections if section and section.strip())
-
-
-# ---------------------------------------------------------------------------
-# Environment & path helpers
-# ---------------------------------------------------------------------------
-
-
-def _resolve_command() -> str:
- return (
- os.getenv("HERMES_CURSOR_COMMAND", "").strip()
- or os.getenv("CURSOR_AGENT_PATH", "").strip()
- or DEFAULT_CURSOR_COMMAND
- )
-
-
-def _resolve_extra_args() -> list[str]:
- raw = os.getenv("HERMES_CURSOR_ARGS", "").strip()
- if not raw:
- return []
- return shlex.split(raw)
-
-
-def _resolve_mode() -> str:
- mode = os.getenv("HERMES_CURSOR_MODE", "").strip().lower() or DEFAULT_CURSOR_MODE
- if mode not in _VALID_CURSOR_MODES:
- mode = DEFAULT_CURSOR_MODE
- return mode
-
-
-def _resolve_workspace_override() -> str:
- return os.getenv("HERMES_CURSOR_WORKSPACE", "").strip()
-
-
-def _resolve_home_dir() -> str:
- """Pick a stable HOME for the child process.
-
- Mirrors ``agent/copilot_acp_client.py:_resolve_home_dir`` so subprocess
- behaviour stays predictable across providers.
- """
- try:
- from hermes_constants import get_subprocess_home
-
- profile_home = get_subprocess_home()
- if profile_home:
- return profile_home
- except Exception:
- pass
-
- home = os.environ.get("HOME", "").strip()
- if home:
- return home
-
- expanded = os.path.expanduser("~")
- if expanded and expanded != "~":
- return expanded
-
- # POSIX-only last resort: read the home dir from the password
- # database. ``os.getuid`` does not exist on Windows; gate explicitly
- # so the import-time footgun checker stays clean. (Windows already
- # falls through to the ``USERPROFILE`` / ``HOMEDRIVE+HOMEPATH``
- # branches above; if those failed there is no equivalent password
- # database here, so just bail to ``/tmp``.)
- if hasattr(os, "getuid"):
- try:
- import pwd
-
- resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() # windows-footgun: ok
- if resolved:
- return resolved
- except Exception:
- pass
-
- return "/tmp"
-
-
-def _build_subprocess_env(api_key: str | None) -> dict[str, str]:
- env = os.environ.copy()
- env["HOME"] = _resolve_home_dir()
- if api_key:
- env["CURSOR_API_KEY"] = api_key
- env.setdefault("NO_COLOR", "1")
- env.setdefault("TERM", "dumb")
- return env
-
-
-# ---------------------------------------------------------------------------
-# Stream-json parser
-# ---------------------------------------------------------------------------
-
-
-def _build_cursor_tool_preview(evt: "_CursorToolEvent") -> str:
- """Compact one-line description of a cursor tool call for the UI.
-
- Mirrors the spirit of ``_build_tool_preview`` in ``tool_executor.py`` —
- a single short string the spinner / activity feed can show next to
- the tool name. Tool-specific extractors fall back to a JSON dump of
- arguments when we don't have a hand-written formatter.
- """
- args = evt.args or {}
- try:
- if evt.envelope_key == "shellToolCall":
- cmd = args.get("command") or args.get("cmd")
- if isinstance(cmd, list):
- cmd = " ".join(str(part) for part in cmd)
- if isinstance(cmd, str) and cmd.strip():
- return cmd.strip()[:200]
- if evt.envelope_key in (
- "readToolCall",
- "editToolCall",
- "writeToolCall",
- "patchToolCall",
- "deleteToolCall",
- ):
- # Cursor's wire format isn't fully consistent across tool
- # kinds; ``editToolCall.args`` has been seen using
- # ``target_file`` / ``targetFile`` / ``file_path`` while
- # other tools use ``path``. Try them all so the activity
- # feed always shows what was touched.
- path = (
- args.get("path")
- or args.get("file")
- or args.get("filePath")
- or args.get("filename")
- or args.get("target_file")
- or args.get("targetFile")
- or args.get("file_path")
- or args.get("relative_workspace_path")
- or ""
- )
- if isinstance(path, str) and path.strip():
- return path.strip()[:200]
- if evt.envelope_key == "globToolCall":
- pat = args.get("globPattern") or args.get("pattern") or ""
- target = args.get("targetDirectory") or args.get("path") or ""
- label = " in ".join(p for p in (pat, target) if isinstance(p, str) and p.strip())
- if label:
- return label[:200]
- if evt.envelope_key in ("grepToolCall", "searchToolCall"):
- pat = args.get("pattern") or args.get("query") or args.get("regex") or ""
- target = args.get("path") or args.get("targetDirectory") or ""
- if isinstance(pat, str) and pat.strip():
- if isinstance(target, str) and target.strip():
- return f"{pat} in {target}"[:200]
- return pat.strip()[:200]
- if isinstance(target, str) and target.strip():
- return target.strip()[:200]
- if evt.envelope_key == "listToolCall":
- path = args.get("path") or args.get("directory") or args.get("targetDirectory") or ""
- if isinstance(path, str) and path.strip():
- return path.strip()[:200]
- return json.dumps(args, ensure_ascii=False)[:200]
- except Exception:
- return ""
-
-
-def _normalize_cursor_tool_name(envelope_key: str) -> str:
- """Map cursor's wire-format ``ToolCall`` keys to Hermes tool names.
-
- cursor-agent's stream-json wraps every internal tool call as
- ``"ToolCall"`` (e.g. ``shellToolCall``, ``readToolCall``). We
- translate the kind so the activity surfaces in Hermes' UI with names
- the user already recognises from other providers.
- """
- if not isinstance(envelope_key, str):
- return "cursor_tool"
- suffix = "ToolCall"
- base = envelope_key[: -len(suffix)] if envelope_key.endswith(suffix) else envelope_key
- if not base:
- return "cursor_tool"
- return {
- "shell": "shell",
- "read": "read_file",
- "list": "list_directory",
- "edit": "edit_file",
- "write": "write_file",
- "patch": "patch",
- "grep": "grep",
- "glob": "glob",
- "search": "search",
- "todo": "todo",
- "delete": "delete_file",
- "task": "task",
- "fetch": "web_fetch",
- }.get(base.lower(), base)
-
-
-def _summarise_cursor_tool_result(envelope_key: str, payload: dict[str, Any]) -> str:
- """Return a compact human-readable result string for the UI / log.
-
- Falls back to a generic JSON dump when we don't have a hand-written
- extractor for the tool kind. Best-effort — never raises.
- """
- result = payload.get("result")
- if not isinstance(result, dict):
- return ""
- success = result.get("success")
- if not isinstance(success, dict):
- if "error" in result and isinstance(result["error"], (str, dict)):
- return f"error: {result['error']}"[:400]
- return ""
- try:
- if envelope_key == "shellToolCall":
- stdout = success.get("stdout") or ""
- return stdout if isinstance(stdout, str) else json.dumps(stdout)
- if envelope_key == "readToolCall":
- content = success.get("content") or ""
- total = success.get("totalLines")
- if total is not None:
- return f"({total} lines)\n{content}" if content else f"({total} lines)"
- return content if isinstance(content, str) else json.dumps(content)
- if envelope_key in ("listToolCall", "globToolCall"):
- files = success.get("files") or success.get("entries") or []
- if isinstance(files, list):
- return "\n".join(str(f) for f in files[:200])
- return json.dumps(success, ensure_ascii=False)[:1000]
- except Exception:
- return ""
-
-
-class _CursorToolEvent:
- """A captured cursor-agent tool invocation (started + completed states).
-
- Used both for live progress callbacks (Hermes' ``tool_progress_callback``
- surface) and for the post-hoc audit list returned alongside the
- response so sessions can persist what cursor did.
- """
-
- __slots__ = (
- "call_id", "envelope_key", "name", "args", "started_at",
- "completed_at", "result_text", "is_error", "duration_ms",
- "lines_added", "lines_removed", "diff_string",
- )
-
- def __init__(self, call_id: str, envelope_key: str, args: dict[str, Any]) -> None:
- self.call_id = call_id
- self.envelope_key = envelope_key
- self.name = _normalize_cursor_tool_name(envelope_key)
- self.args = args
- self.started_at = time.monotonic()
- self.completed_at: float | None = None
- self.result_text: str = ""
- self.is_error: bool = False
- self.duration_ms: int = 0
- # Edit/write result metadata. Cursor's stream-json provides
- # ``linesAdded`` / ``linesRemoved`` / ``diffString`` on the
- # completion event for edit and write operations. We surface
- # the count in the activity feed ("+5 -2") and persist the
- # diff for replays / audits.
- self.lines_added: int | None = None
- self.lines_removed: int | None = None
- self.diff_string: str = ""
-
- def to_public_dict(self) -> dict[str, Any]:
- return {
- "id": self.call_id,
- "name": self.name,
- "envelope": self.envelope_key,
- "arguments": self.args,
- "result": self.result_text,
- "is_error": self.is_error,
- "duration_ms": self.duration_ms,
- }
-
-
-class _StreamJsonAccumulator:
- """Accumulates state from a ``cursor-agent --output-format stream-json`` stream.
-
- Caller feeds parsed JSON events with :meth:`feed`. When a terminal
- ``result`` event arrives the accumulator stores the success/failure state
- and surface text. The instance is reusable per-call but not thread-safe.
- """
-
- def __init__(self, on_tool_event: Any = None, on_text_event: Any = None) -> None:
- self.text_parts: list[str] = []
- self.reasoning_parts: list[str] = []
- self.session_id: str = ""
- self.request_id: str = ""
- self.model_label: str = ""
- self.duration_ms: int = 0
- self.usage: dict[str, int] = {}
- self.terminal: bool = False
- self.is_error: bool = False
- self.error_message: str = ""
- self.final_result_text: str = ""
- # Ordered transcript of (kind, payload) events as cursor emitted
- # them — used to separate "narrative text between tools" from
- # "final synthesis text" when we assemble the response. Without
- # this, ``assembled_text()`` glues every intermediate text event
- # to the end of the final answer and the user sees a wall of
- # planning prose preceding the actual response.
- self.event_log: list[tuple[str, Any]] = []
- # ``on_tool_event(stage, event)`` — invoked synchronously from
- # ``feed()`` when a ``tool_call`` event arrives.
- self._on_tool_event = on_tool_event
- # ``on_text_event(text)`` — invoked when cursor emits an
- # intermediate ``assistant`` text block (cursor often prints a
- # 1-2 sentence "let me check X next" between tool batches).
- # Surfacing these live as narration events gives the Hermes UI
- # the agentic feel of "tool → text → tool → text" that the user
- # asked about; without it everything bundles into one final
- # answer block.
- self._on_text_event = on_text_event
- self._tool_events: dict[str, _CursorToolEvent] = {}
- self.tool_events: list[_CursorToolEvent] = []
- # Optional caller-provided estimate of "current prompt size" in
- # tokens, used to surface a stable number on the Hermes status
- # bar. Set by ``_create_chat_completion`` before each call.
- self.messages_estimate: int = 0
- # We BUFFER intermediate text instead of dispatching it eagerly
- # so the final text-after-last-tool only appears in the
- # synthesis (assistant response) and not duplicated as a narrate
- # event in the activity feed. Flush rule: when the next tool
- # starts we now know the buffered text was "between tools" and
- # safe to surface. If no more tools come, the buffer is dropped
- # and only ``synthesis_text()`` shows it.
- self._pending_text: list[str] = []
-
- def feed(self, event: dict[str, Any]) -> None:
- evt_type = event.get("type")
- if not isinstance(evt_type, str):
- return
-
- if evt_type == "system":
- model = event.get("model")
- if isinstance(model, str):
- self.model_label = model
- session = event.get("session_id")
- if isinstance(session, str):
- self.session_id = session
- return
-
- if evt_type == "thinking":
- text = event.get("text")
- if isinstance(text, str) and text:
- self.reasoning_parts.append(text)
- return
-
- if evt_type == "assistant":
- message = event.get("message")
- if isinstance(message, dict):
- content = message.get("content")
- if isinstance(content, list):
- for block in content:
- if not isinstance(block, dict):
- continue
- if block.get("type") == "text":
- text = block.get("text")
- if isinstance(text, str) and text:
- self.text_parts.append(text)
- self.event_log.append(("text", text))
- # Defer the narrate dispatch — see
- # ``_pending_text`` docstring above.
- self._pending_text.append(text)
- return
-
- if evt_type == "tool_call":
- sub = event.get("subtype")
- # Before recording a NEW tool start, flush any text we'd
- # buffered: by definition that text was "between tools",
- # so it's safe (and useful) to show as narration now.
- if sub == "started" and self._pending_text:
- for buffered in self._pending_text:
- self._dispatch_text_event(buffered)
- self._pending_text.clear()
- self._consume_tool_call_event(event)
- # Note the tool event order so ``synthesis_text`` can pick
- # the right "final" text. We append only on ``started`` to
- # avoid double-counting; the per-tool event timeline is
- # already preserved in ``self.tool_events``.
- if sub == "started":
- self.event_log.append(("tool", None))
- return
-
- if evt_type == "result":
- self.terminal = True
- self.is_error = bool(event.get("is_error", False))
- subtype = event.get("subtype")
- if subtype == "error":
- self.is_error = True
- duration = event.get("duration_ms")
- if isinstance(duration, int):
- self.duration_ms = duration
- request = event.get("request_id")
- if isinstance(request, str):
- self.request_id = request
- usage = event.get("usage")
- if isinstance(usage, dict):
- # Cursor emits camelCase keys.
- normalized = {}
- for k, v in usage.items():
- if isinstance(v, (int, float)):
- normalized[str(k)] = int(v)
- self.usage = normalized
- result_text = event.get("result")
- if isinstance(result_text, str):
- self.final_result_text = result_text
- if not self.text_parts and not self.is_error:
- self.text_parts.append(result_text)
- if self.is_error and not self.error_message:
- self.error_message = result_text or "cursor-agent returned an error"
- return
-
- # Unknown / informational events (e.g. ``user`` echo) — ignore.
-
- def _consume_tool_call_event(self, event: dict[str, Any]) -> None:
- """Translate one cursor stream-json ``tool_call`` event.
-
- cursor-agent emits one event with ``subtype="started"`` when the LLM
- decides to use one of its built-in tools (shell, read, edit, ...),
- and a follow-up with ``subtype="completed"`` carrying the result.
- We rebuild a ``_CursorToolEvent`` from those, fire the optional
- progress callback so Hermes' UI can show the activity in real time,
- and stash the final list so the caller can surface "what cursor
- actually did" in the response (e.g. for session audit).
- """
- subtype = event.get("subtype")
- call_id = event.get("call_id")
- if not isinstance(call_id, str) or not call_id:
- return
- tool_call = event.get("tool_call")
- if not isinstance(tool_call, dict) or not tool_call:
- return
- envelope_key = next(iter(tool_call.keys()), "")
- payload = tool_call.get(envelope_key) if isinstance(envelope_key, str) else None
- if not isinstance(payload, dict):
- return
- args_obj = payload.get("args")
- if not isinstance(args_obj, dict):
- args_obj = {}
-
- if subtype == "started":
- evt = _CursorToolEvent(
- call_id=call_id,
- envelope_key=envelope_key,
- args=args_obj,
- )
- self._tool_events[call_id] = evt
- self.tool_events.append(evt)
- self._fire_tool_event("started", evt)
- return
-
- if subtype == "completed":
- evt = self._tool_events.get(call_id)
- if evt is None:
- # Cursor sent a completed event we never saw started for —
- # synthesise the started state so the audit list still has it.
- evt = _CursorToolEvent(
- call_id=call_id,
- envelope_key=envelope_key,
- args=args_obj,
- )
- self._tool_events[call_id] = evt
- self.tool_events.append(evt)
- self._fire_tool_event("started", evt)
- evt.completed_at = time.monotonic()
- evt.duration_ms = int((evt.completed_at - evt.started_at) * 1000)
- result = payload.get("result")
- if isinstance(result, dict):
- if "error" in result and result.get("error"):
- evt.is_error = True
- # Pull diff stats off edit/write/patch completion events
- # so the activity feed can show "+5 -2" next to the
- # path. Cursor only emits these for file-modifying tools.
- success = result.get("success") if isinstance(result, dict) else None
- if isinstance(success, dict):
- la = success.get("linesAdded")
- lr = success.get("linesRemoved")
- ds = success.get("diffString")
- if isinstance(la, int):
- evt.lines_added = la
- if isinstance(lr, int):
- evt.lines_removed = lr
- if isinstance(ds, str):
- evt.diff_string = ds
- evt.result_text = _summarise_cursor_tool_result(envelope_key, payload)
- self._fire_tool_event("completed", evt)
- return
-
- def _fire_tool_event(self, stage: str, evt: _CursorToolEvent) -> None:
- if self._on_tool_event is None:
- return
- try:
- self._on_tool_event(stage, evt)
- except Exception:
- # A broken UI must never bring down the chat call.
- pass
-
- def _dispatch_text_event(self, text: str) -> None:
- """Forward an intermediate assistant text event to the UI bridge.
-
- Errors are swallowed — a broken callback must never abort the
- chat call.
- """
- if self._on_text_event is None:
- return
- try:
- self._on_text_event(text)
- except Exception:
- pass
-
- def assembled_text(self) -> str:
- return "".join(self.text_parts).strip()
-
- def synthesis_text(self) -> str:
- """Return only the synthesis portion of the response.
-
- Cursor's stream interleaves planning prose ("Searching the
- agent directory…") with tool calls, then ends with the actual
- synthesised answer. Gluing every text event together leaves
- the user staring at a wall of "I'll do X next" lines before
- the real answer. This helper returns just the text emitted
- AFTER the last tool call — that's the synthesis.
-
- Falls back to the full ``assembled_text()`` when:
- * no tools ran (every text is part of the answer);
- * cursor emitted no text after the last tool (rare; we then
- use the cursor-supplied ``result.result`` if it differs
- from the bundled text, otherwise the full bundle so the
- user sees *something*).
- """
- tool_seen = False
- synth: list[str] = []
- for kind, payload in self.event_log:
- if kind == "tool":
- tool_seen = True
- synth.clear() # drop earlier planning text
- elif kind == "text":
- synth.append(payload)
- if synth:
- return "".join(synth).strip()
- # No text after the last tool. If cursor's ``result.result``
- # carries something useful and distinct, use it; otherwise
- # surface the full bundle so the user isn't left empty-handed.
- if not tool_seen:
- return self.assembled_text()
- if self.final_result_text and self.final_result_text.strip():
- return self.final_result_text.strip()
- return self.assembled_text()
-
- def narration_text(self) -> str:
- """Return the planning / between-tool prose for transcript replay.
-
- The live bridge already surfaces each piece individually via
- ``on_text_event``. This helper is for tests / debug consumers
- that want to inspect what was intermediate vs. final.
- """
- narration: list[str] = []
- bucket: list[str] = []
- for kind, payload in self.event_log:
- if kind == "tool":
- if bucket:
- narration.append("".join(bucket).strip())
- bucket = []
- elif kind == "text":
- bucket.append(payload)
- # The final ``bucket`` is the synthesis — drop it.
- return "\n".join(n for n in narration if n)
-
- def assembled_reasoning(self) -> str:
- return "".join(self.reasoning_parts).strip()
-
- def openai_usage(self) -> SimpleNamespace:
- """Translate cursor-agent's per-turn usage into OpenAI-shaped fields.
-
- Quirk worth knowing: cursor-agent's ``result.usage.inputTokens``
- is the SUM of fresh (non-cached) input tokens across **every
- internal LLM round-trip** in the turn. For an agentic turn that
- runs N tool calls there are roughly N+1 internal model calls
- (one per tool round plus the final text), and each call's input
- grows as tool results accumulate. So inputTokens for a deep
- multi-tool turn can easily reach 1M+ while the model's actual
- context window (e.g. 200K on composer-2.5-fast) was never
- exceeded — cursor reused the cache between calls.
-
- Hermes' status bar and compressor use ``prompt_tokens`` as a
- proxy for "what's currently in the model's context" (used to
- drive compression decisions and the % bar). Reporting the raw
- cumulative SUM blows the bar past 100% on agentic turns, which
- is both visually wrong and triggers spurious compression.
-
- Fix: divide the cumulative figures by the number of internal
- rounds we observed (tool_events + 1) to produce an honest
- per-round average that matches the model's actual context use.
- The full billing total is still reported separately for cost
- tracking via ``session_input_tokens``.
- """
- input_tokens_raw = int(self.usage.get("inputTokens", 0))
- output_tokens = int(self.usage.get("outputTokens", 0))
- cache_read_raw = int(self.usage.get("cacheReadTokens", 0))
-
- rounds = max(len(self.tool_events) + 1, 1)
- per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw
- per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw
- approx_context_tokens = per_round_cache + per_round_input
-
- # Hermes' messages-based estimate is the canonical "what's in
- # the model's context right now" number (it matches what the
- # next-turn prompt will look like). Prefer it for
- # ``prompt_tokens`` so the status bar stays consistent before,
- # during, and after generation. Fall back to the per-round
- # average when no estimate is set (e.g. accumulator used outside
- # the client, in unit tests).
- if self.messages_estimate > 0:
- prompt_tokens = self.messages_estimate
- else:
- prompt_tokens = approx_context_tokens
-
- return SimpleNamespace(
- prompt_tokens=prompt_tokens,
- completion_tokens=output_tokens,
- total_tokens=prompt_tokens + output_tokens,
- prompt_tokens_details=SimpleNamespace(cached_tokens=per_round_cache),
- # Preserve raw cursor-side totals for billing / cost tracking
- # consumers that need the actual usage figures.
- cursor_raw_input_tokens=input_tokens_raw,
- cursor_raw_cache_read_tokens=cache_read_raw,
- cursor_internal_rounds=rounds,
- cursor_per_round_context=approx_context_tokens,
- )
-
-
-# ---------------------------------------------------------------------------
-# Inline OpenAI-style namespace shims (mirror copilot_acp_client style)
-# ---------------------------------------------------------------------------
-
-
-class _CursorChatCompletions:
- def __init__(self, client: "CursorAgentClient"):
- self._client = client
-
- def create(self, **kwargs: Any) -> Any:
- # ``cursor-agent`` exposes streaming via stream-json on its own stdout,
- # but the synchronous ``_create_chat_completion`` already accumulates
- # the full response. If a caller passes ``stream=True`` we synthesise
- # an OpenAI-style chunk iterator from the final response so the
- # streaming hot path stays iterable. Without this, iterating the
- # ``SimpleNamespace`` we return surfaces as ``TypeError:
- # 'types.SimpleNamespace' object is not iterable`` (Hermes' chat
- # streaming loop did this).
- stream_requested = bool(kwargs.pop("stream", False))
- kwargs.pop("stream_options", None) # OpenAI SDK extras — irrelevant
- response = self._client._create_chat_completion(**kwargs)
- if not stream_requested:
- return response
- return _synthesise_stream_chunks(response)
-
-
-class _CursorChatNamespace:
- def __init__(self, client: "CursorAgentClient"):
- self.completions = _CursorChatCompletions(client)
-
-
-def _synthesise_stream_chunks(response: Any):
- """Yield OpenAI-style streaming chunks from a non-streaming response.
-
- Hermes' chat streaming loop expects ``for chunk in stream:`` with each
- chunk shaped like an OpenAI ``ChatCompletionChunk``: ``chunk.choices[0]
- .delta.{content,tool_calls,reasoning,reasoning_content}`` and a final
- chunk carrying ``usage``. We can't truly stream from the underlying
- subprocess at this layer, but we can split the assembled response into a
- small number of chunks that the loop will accept without crashing.
- """
- try:
- choice = response.choices[0]
- except Exception:
- return
-
- message = getattr(choice, "message", None)
- if message is None:
- return
-
- role = "assistant"
- content = getattr(message, "content", "") or ""
- tool_calls = getattr(message, "tool_calls", None) or []
- reasoning = getattr(message, "reasoning", None)
- reasoning_content = getattr(message, "reasoning_content", None)
- finish_reason = getattr(choice, "finish_reason", "stop")
- model = getattr(response, "model", "cursor")
- usage = getattr(response, "usage", None)
-
- if reasoning_content:
- yield SimpleNamespace(
- choices=[
- SimpleNamespace(
- delta=SimpleNamespace(
- role=role,
- content=None,
- tool_calls=None,
- reasoning=None,
- reasoning_content=reasoning_content,
- ),
- finish_reason=None,
- index=0,
- )
- ],
- model=model,
- usage=None,
- )
- elif reasoning:
- yield SimpleNamespace(
- choices=[
- SimpleNamespace(
- delta=SimpleNamespace(
- role=role,
- content=None,
- tool_calls=None,
- reasoning=reasoning,
- reasoning_content=None,
- ),
- finish_reason=None,
- index=0,
- )
- ],
- model=model,
- usage=None,
- )
-
- if content:
- yield SimpleNamespace(
- choices=[
- SimpleNamespace(
- delta=SimpleNamespace(
- role=role,
- content=content,
- tool_calls=None,
- reasoning=None,
- reasoning_content=None,
- ),
- finish_reason=None,
- index=0,
- )
- ],
- model=model,
- usage=None,
- )
-
- if tool_calls:
- # Hermes expects streaming tool_calls to include a per-chunk index.
- for i, tc in enumerate(tool_calls):
- yield SimpleNamespace(
- choices=[
- SimpleNamespace(
- delta=SimpleNamespace(
- role=role,
- content=None,
- tool_calls=[
- SimpleNamespace(
- index=i,
- id=getattr(tc, "id", f"call_{i}"),
- type="function",
- function=SimpleNamespace(
- name=getattr(tc.function, "name", ""),
- arguments=getattr(tc.function, "arguments", ""),
- ),
- )
- ],
- reasoning=None,
- reasoning_content=None,
- ),
- finish_reason=None,
- index=0,
- )
- ],
- model=model,
- usage=None,
- )
-
- yield SimpleNamespace(
- choices=[
- SimpleNamespace(
- delta=SimpleNamespace(
- role=None,
- content=None,
- tool_calls=None,
- reasoning=None,
- reasoning_content=None,
- ),
- finish_reason=finish_reason,
- index=0,
- )
- ],
- model=model,
- usage=usage,
- )
-
-
-# ---------------------------------------------------------------------------
-# Main client
-# ---------------------------------------------------------------------------
-
-
-class CursorAgentClient:
- """Minimal OpenAI-client-compatible facade for the Cursor Agent CLI."""
-
- def __init__(
- self,
- *,
- api_key: str | None = None,
- base_url: str | None = None,
- default_headers: dict[str, str] | None = None,
- command: str | None = None,
- args: list[str] | None = None,
- workspace: str | None = None,
- mode: str | None = None,
- timeout_seconds: float | None = None,
- tool_progress_callback: Any = None,
- context_estimate_callback: Any = None,
- **_: Any,
- ):
- candidate_key = (api_key or os.getenv("CURSOR_API_KEY", "") or "").strip()
- # Treat sentinels ("", "cursor-agent-login", …) as "no key" so we don't
- # forward them to ``cursor-agent --api-key`` (which rejects them and
- # closes stdin, producing BrokenPipeError on our writes).
- self.api_key = None if candidate_key in _API_KEY_SENTINELS else candidate_key
- self.base_url = base_url or CURSOR_MARKER_BASE_URL
- self._default_headers = dict(default_headers or {})
- self._command = (command or _resolve_command()).strip() or DEFAULT_CURSOR_COMMAND
- self._extra_args = list(args) if args else _resolve_extra_args()
- chosen_mode = (mode or _resolve_mode()).strip().lower() or DEFAULT_CURSOR_MODE
- if chosen_mode not in _VALID_CURSOR_MODES:
- chosen_mode = DEFAULT_CURSOR_MODE
- self._mode = chosen_mode
- override = workspace or _resolve_workspace_override()
- self._workspace: str | None = override or None # None ⇒ tmpdir per call
- # Idle timeout (resets per event). Env var > explicit arg > default.
- self._timeout_seconds = float(timeout_seconds) if timeout_seconds else _DEFAULT_TIMEOUT_SECONDS
- env_timeout = os.environ.get("HERMES_CURSOR_TIMEOUT_SECONDS", "").strip()
- if env_timeout:
- try:
- env_timeout_val = float(env_timeout)
- if env_timeout_val > 0:
- self._timeout_seconds = env_timeout_val
- except ValueError:
- pass
-
- self._tool_progress_callback = tool_progress_callback
- # Optional hook invoked with the rough messages-based token estimate
- # *before* the subprocess spawns. Used by the host agent to bump
- # the status-bar (``compressor.last_prompt_tokens``) so the input
- # context is visible during long in-flight turns instead of the
- # bar sitting at 0 until the result event arrives.
- self._context_estimate_callback = context_estimate_callback
-
- # High-water mark for the Hermes status bar. Held only WITHIN
- # a single Hermes user turn: Hermes loops on tool_calls (cursor
- # returning ```` blocks for Hermes to run), making
- # multiple cursor calls per user prompt. Each call's footprint
- # can vary (different tools attached, different message slices),
- # so the bar must not flicker between those internal calls.
- # Reset automatically on every NEW user turn (detected by the
- # user-message count growing in the messages list); previously
- # this was a session-wide monotonic mark, which incorrectly
- # froze the bar at the highest-activity turn's value and
- # prevented it from reflecting the actual current input across
- # subsequent prompts.
- self._context_high_water: int = 0
- # Last seen count of user messages in the prompt list. Used to
- # detect new user turns so we can reset the high-water above.
- self._last_user_msg_count: int = 0
-
- self.chat = _CursorChatNamespace(self)
- self.is_closed = False
-
- self._active_process: subprocess.Popen[str] | None = None
- self._active_process_lock = threading.Lock()
- self._ephemeral_dirs: list[str] = []
- self._dir_lock = threading.Lock()
- # Session-scoped scratch workspace. Lazily minted on first call
- # and REUSED across all subsequent calls in the same chat session
- # so cursor-agent doesn't pay its ~4.5s "fresh-workspace bootstrap"
- # tax on every turn. Cleaned up by ``close()`` along with any
- # other ephemeral dirs. When ``self._workspace`` (user override)
- # is set we skip this entirely and honour the explicit path.
- self._session_workspace: str | None = None
-
- # ------------------------------------------------------------------
- # Lifecycle
- # ------------------------------------------------------------------
-
- def close(self) -> None:
- proc: subprocess.Popen[str] | None
- with self._active_process_lock:
- proc = self._active_process
- self._active_process = None
- self.is_closed = True
- # New session starts fresh: drop the high-water floor so the
- # status bar reflects current prompt size, not the residual
- # of a previously-large conversation.
- self._context_high_water = 0
- if proc is not None:
- try:
- proc.terminate()
- proc.wait(timeout=2)
- except Exception:
- try:
- proc.kill()
- except Exception:
- pass
- with self._dir_lock:
- dirs, self._ephemeral_dirs = self._ephemeral_dirs, []
- # Drop the cached session workspace ref; if this client is
- # ever re-used after close() (shouldn't happen, but defensive)
- # the next call will lazy-init a fresh dir.
- self._session_workspace = None
- for d in dirs:
- try:
- shutil.rmtree(d, ignore_errors=True)
- except Exception:
- pass
-
- # ------------------------------------------------------------------
- # OpenAI-compat surface
- # ------------------------------------------------------------------
-
- def _create_chat_completion(
- self,
- *,
- model: str | None = None,
- messages: list[dict[str, Any]] | None = None,
- timeout: float | None = None,
- tools: list[dict[str, Any]] | None = None,
- tool_choice: Any = None,
- **_: Any,
- ) -> Any:
- # Estimate context size from what Hermes is actually sending —
- # this is the authoritative answer for the status bar and the
- # compressor. Cursor's reported ``inputTokens`` is the SUM
- # across internal tool round-trips and undercounts the snapshot
- # at end of turn (after averaging) while overcounting at end of
- # turn (raw); neither matches what's in the next-turn prompt.
- # Estimating from messages keeps the bar consistent before,
- # during, and after the call (#cursor-bar-stable).
- # Detect a NEW user turn: Hermes adds a user message at the top
- # of every fresh prompt cycle, then loops internally on tool_calls
- # without adding more user messages. So a strictly increased user-
- # message count is the signal that this is a new prompt and the
- # high-water mark from the previous turn no longer applies.
- try:
- user_msg_count = sum(
- 1 for m in (messages or []) if (m or {}).get("role") == "user"
- )
- except Exception:
- user_msg_count = self._last_user_msg_count
- is_new_user_turn = user_msg_count > self._last_user_msg_count
- if is_new_user_turn:
- # Drop the floor so the bar can reflect this turn's actual
- # input size (which may legitimately be smaller than a prior
- # heavy-tool-use turn's per-round average).
- self._context_high_water = 0
- self._last_user_msg_count = user_msg_count
-
- try:
- from agent.model_metadata import estimate_request_tokens_rough
- self._last_messages_estimate = estimate_request_tokens_rough(
- messages or [], tools=tools or None
- )
- except Exception:
- self._last_messages_estimate = 0
-
- # Bump the high-water mark NOW (before subprocess spawn) so the
- # status bar reflects input context immediately. Without this the
- # bar shows 0/200K throughout a long in-flight FIRST turn because
- # the compressor only learns about prompt_tokens from the final
- # response.
- if self._last_messages_estimate > self._context_high_water:
- self._context_high_water = self._last_messages_estimate
- if callable(self._context_estimate_callback) and self._last_messages_estimate > 0:
- # On a new user turn, signal the host so it can reset its
- # compressor bar to this turn's estimate (allowing the bar
- # to DROP if appropriate). Otherwise the callback should
- # bump monotonically so the in-loop cursor calls don't
- # flicker the bar down between iterations.
- try:
- self._context_estimate_callback(
- self._last_messages_estimate, reset=is_new_user_turn
- )
- except TypeError:
- # Backward-compat: older callbacks without ``reset`` kwarg.
- try:
- self._context_estimate_callback(self._last_messages_estimate)
- except Exception:
- pass
- except Exception:
- # Never let a UI hook break the actual request.
- pass
-
- prompt_text = _format_messages_as_prompt(
- messages or [],
- model=model,
- tools=tools,
- tool_choice=tool_choice,
- )
-
- if timeout is None:
- effective_timeout = self._timeout_seconds
- elif isinstance(timeout, (int, float)):
- effective_timeout = float(timeout)
- else:
- candidates = [
- getattr(timeout, attr, None)
- for attr in ("read", "write", "connect", "pool", "timeout")
- ]
- numeric = [float(v) for v in candidates if isinstance(v, (int, float))]
- effective_timeout = max(numeric) if numeric else self._timeout_seconds
-
- chosen_model = (model or DEFAULT_CURSOR_MODEL).strip() or DEFAULT_CURSOR_MODEL
-
- accumulator = self._run_prompt(
- prompt_text=prompt_text,
- model=chosen_model,
- timeout_seconds=effective_timeout,
- )
-
- # Use the synthesis text (post-last-tool) so the user gets the
- # actual answer without the wall of "let me check X next" prose
- # that cursor's model emits between tool batches. The
- # intermediate prose was already surfaced live via the text-
- # event bridge.
- assistant_text = accumulator.synthesis_text()
- reasoning_text = accumulator.assembled_reasoning() or None
-
- if accumulator.is_error:
- raise RuntimeError(
- f"cursor-agent reported an error: {accumulator.error_message or assistant_text}"
- )
-
- tool_calls, cleaned_text = _extract_tool_calls_from_text(assistant_text)
-
- cursor_internal_tools = [evt.to_public_dict() for evt in accumulator.tool_events]
- # Hand cursor's accumulator our messages-based estimate so
- # ``openai_usage`` can use it as the canonical ``prompt_tokens``
- # the status bar reads from. Without this the bar shows
- # different numbers during vs after generation (cursor's
- # per-round average vs Hermes' messages estimate, ~3x apart).
- #
- # We also gate the estimate by the running high-water mark so
- # the bar never visibly DROPS within a chat session — a wobble
- # we saw when Hermes loops over multiple cursor calls per user
- # turn (each call has a different tools/messages footprint).
- cur_estimate = getattr(self, "_last_messages_estimate", 0) or 0
- # ``openai_usage`` may also use cursor's per-round average; mix
- # it in so the high-water never undercounts when our estimate
- # is too low (e.g. tools=[] on a follow-up call).
- cursor_per_round = self._estimate_per_round_context(accumulator)
- new_high = max(self._context_high_water, cur_estimate, cursor_per_round)
- self._context_high_water = new_high
- accumulator.messages_estimate = new_high
- assistant_message = SimpleNamespace(
- content=cleaned_text,
- tool_calls=tool_calls,
- reasoning=reasoning_text,
- reasoning_content=reasoning_text,
- reasoning_details=None,
- # Audit log of cursor-agent's *internal* tool calls (shell/read/
- # edit/etc. that cursor's harness ran by itself). Hermes' UI is
- # already shown them in real time via tool_progress_callback;
- # this field lets sessions persist what happened.
- cursor_internal_tools=cursor_internal_tools,
- )
- finish_reason = "tool_calls" if tool_calls else "stop"
- choice = SimpleNamespace(
- message=assistant_message,
- finish_reason=finish_reason,
- index=0,
- )
- return SimpleNamespace(
- choices=[choice],
- usage=accumulator.openai_usage(),
- model=chosen_model,
- id=accumulator.request_id or f"cursor-{accumulator.session_id}",
- object="chat.completion",
- cursor_internal_tools=cursor_internal_tools,
- )
-
- # ------------------------------------------------------------------
- # Subprocess plumbing
- # ------------------------------------------------------------------
-
- def _build_argv(self, *, model: str, workspace: str) -> list[str]:
- argv = [
- self._command,
- "-p",
- "--output-format",
- "stream-json",
- ]
- # Only forward ``--mode`` to the CLI for values it knows about.
- # The synthetic ``agent`` value means "use cursor's default
- # permissionMode" — achieved by omitting the flag entirely.
- if self._mode in _CURSOR_CLI_MODES:
- argv.extend(["--mode", self._mode])
- argv.extend(
- [
- "--model",
- model,
- "--workspace",
- workspace,
- "--force",
- "--trust",
- ]
- )
- if self.api_key:
- argv.extend(["--api-key", self.api_key])
- argv.extend(self._extra_args)
- return argv
-
- def _allocate_workspace(self) -> tuple[str, bool]:
- """Return ``(workspace, ephemeral)``.
-
- Strategy:
- 1. If the caller pinned an explicit ``workspace`` (env var or kwarg),
- always honour it.
- 2. Otherwise, lazily mint ONE temp dir for the whole client session
- and reuse it across calls. Per-turn fresh dirs cost cursor-agent
- ~4.5s of bootstrap overhead each invocation (measured), and there's
- no isolation benefit between turns of the SAME chat session
- anyway — they're already operating on behalf of the same user.
-
- The session workspace is tracked in ``_ephemeral_dirs`` so
- ``close()`` cleans it up just like the legacy per-call dirs.
- """
- if self._workspace:
- try:
- Path(self._workspace).mkdir(parents=True, exist_ok=True)
- except Exception:
- pass
- return self._workspace, False
- with self._dir_lock:
- if self._session_workspace is None:
- tmp = tempfile.mkdtemp(prefix="hermes-cursor-")
- self._session_workspace = tmp
- self._ephemeral_dirs.append(tmp)
- return self._session_workspace, True
-
- def _run_prompt(
- self,
- *,
- prompt_text: str,
- model: str,
- timeout_seconds: float,
- ) -> _StreamJsonAccumulator:
- workspace, _ephemeral = self._allocate_workspace()
- argv = self._build_argv(model=model, workspace=workspace)
-
- try:
- proc = subprocess.Popen(
- argv,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- text=True,
- bufsize=1,
- cwd=workspace,
- env=_build_subprocess_env(self.api_key),
- )
- except FileNotFoundError as exc:
- raise RuntimeError(
- f"Could not start Cursor Agent CLI '{self._command}'. "
- "Install Cursor CLI (https://cursor.com/dashboard/integrations) "
- "or set HERMES_CURSOR_COMMAND / CURSOR_AGENT_PATH."
- ) from exc
-
- if proc.stdin is None or proc.stdout is None:
- proc.kill()
- raise RuntimeError("cursor-agent process did not expose stdin/stdout pipes.")
-
- self.is_closed = False
- with self._active_process_lock:
- self._active_process = proc
-
- try:
- # Drain stderr concurrently while we feed stdin so a fast-exiting
- # cursor-agent (e.g. on bad auth) can't deadlock or hide its
- # diagnostic message behind our pipe write.
- stderr_tail: deque[str] = deque(maxlen=80)
- inbox: queue.Queue[dict[str, Any]] = queue.Queue()
-
- def _stderr_reader_early() -> None:
- if proc.stderr is None:
- return
- for line in proc.stderr:
- stderr_tail.append(line.rstrip("\n"))
-
- err_thread = threading.Thread(target=_stderr_reader_early, daemon=True)
- err_thread.start()
-
- stdin_error: BaseException | None = None
- try:
- proc.stdin.write(prompt_text)
- proc.stdin.flush()
- except BrokenPipeError as exc:
- # cursor-agent closed stdin before consuming the prompt — almost
- # always means it rejected auth (e.g. invalid API key) or
- # bailed on a flag. Capture the cause; we'll raise after we
- # have stderr context.
- stdin_error = exc
- except Exception as exc: # pragma: no cover - defensive
- stdin_error = exc
- finally:
- try:
- proc.stdin.close()
- except Exception:
- pass
-
- if stdin_error is not None:
- # Give the child a moment to flush its error message, then bail.
- try:
- proc.wait(timeout=3)
- except Exception:
- pass
- err_thread.join(timeout=1)
- exit_code = getattr(proc, "returncode", None)
- if exit_code is None:
- try:
- exit_code = proc.poll()
- except Exception:
- exit_code = None
- stderr_text = "\n".join(stderr_tail).strip()
- redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else ""
- detail = f" stderr: {redacted}" if redacted else ""
- raise RuntimeError(
- "cursor-agent closed stdin before reading the prompt "
- f"(exit {exit_code}).{detail}"
- ) from stdin_error
-
- def _stdout_reader() -> None:
- if proc.stdout is None:
- return
- for line in proc.stdout:
- line = line.strip()
- if not line:
- continue
- try:
- inbox.put(json.loads(line))
- except Exception:
- # Cursor sometimes prints non-JSON warnings before/after
- # the JSON stream — preserve them in stderr_tail-like
- # form so timeouts can surface useful diagnostics.
- stderr_tail.append("[stdout-non-json] " + line)
-
- out_thread = threading.Thread(target=_stdout_reader, daemon=True)
- out_thread.start()
- # err_thread is already running from the pre-stdin-write block above.
-
- accumulator = _StreamJsonAccumulator(
- on_tool_event=self._build_tool_event_bridge(),
- on_text_event=self._build_text_event_bridge(),
- )
- # Idle deadline, not wall-clock. Resets on every successful
- # stream-json event. A turn can run arbitrarily long in total
- # wall time provided the subprocess keeps emitting events
- # (text deltas, tool_calls, tool_results). Only true hangs
- # (no events for ``timeout_seconds``) trigger termination.
- idle_seconds = float(timeout_seconds)
- deadline = time.monotonic() + idle_seconds
-
- while not accumulator.terminal:
- if time.monotonic() >= deadline:
- self._terminate_active_proc(proc)
- raise TimeoutError(
- f"cursor-agent emitted no events for {idle_seconds:.0f}s; "
- f"presumed hung. Set HERMES_CURSOR_TIMEOUT_SECONDS to "
- f"increase the idle threshold."
- )
- if proc.poll() is not None and inbox.empty():
- break
- try:
- event = inbox.get(timeout=0.25)
- except queue.Empty:
- continue
- # Successful event arrival => subprocess is alive and
- # making progress. Reset the idle deadline.
- deadline = time.monotonic() + idle_seconds
- try:
- accumulator.feed(event)
- except Exception:
- # Don't let a malformed event abort the entire request.
- # Keep draining; if the terminal result never comes,
- # the idle deadline above will surface the failure.
- continue
-
- if not accumulator.terminal:
- stderr_text = "\n".join(stderr_tail).strip()
- redacted = redact_sensitive_text(stderr_text, force=True) if stderr_text else ""
- raise RuntimeError(
- "cursor-agent exited before emitting a terminal result. "
- + (f"stderr tail:\n{redacted}" if redacted else "(no stderr)")
- )
-
- return accumulator
- finally:
- self._terminate_active_proc(proc)
-
- def _estimate_per_round_context(self, accumulator: "_StreamJsonAccumulator") -> int:
- """Compute cursor's per-round context estimate without mutating it.
-
- Mirrors the math in :meth:`_StreamJsonAccumulator.openai_usage`
- but returns just the per-round figure so we can feed it into
- the high-water mark before swapping in the messages estimate.
- """
- input_tokens_raw = int(accumulator.usage.get("inputTokens", 0))
- cache_read_raw = int(accumulator.usage.get("cacheReadTokens", 0))
- rounds = max(len(accumulator.tool_events) + 1, 1)
- per_round_input = input_tokens_raw // rounds if rounds > 0 else input_tokens_raw
- per_round_cache = cache_read_raw // rounds if rounds > 0 else cache_read_raw
- return per_round_cache + per_round_input
-
- def reset_context_baseline(self) -> None:
- """Reset the bar's monotonic floor (e.g. on ``/new`` or compress).
-
- Hermes' chat session calls ``close()`` on the client when
- starting a fresh session; clients spawned with ``shared=True``
- outlive that. This is the explicit hook for any caller that
- wants the bar to drop back to current-prompt size after a
- deliberate context wipe.
- """
- self._context_high_water = 0
-
- def _build_text_event_bridge(self) -> Any:
- """Adapter for cursor's intermediate ``assistant`` text events.
-
- Cursor emits "planning text" between tool batches (e.g.
- "Searching the agent directory…" → tools → "Reading each
- matching file…" → tools → final synthesis). We surface each
- intermediate piece as a synthetic ``narrate`` tool-progress
- event so the Hermes activity feed shows the agentic chain
- live, interleaved with the real tool events — instead of
- bundling everything into one wall of text at the end.
-
- The synthesis text (the final one after the last tool) is
- excluded so it doesn't double-up with the response body.
- """
- cb = self._tool_progress_callback
- if cb is None:
- return None
-
- def _bridge(text: str) -> None:
- try:
- preview = text.strip().splitlines()[0] if text else ""
- if len(preview) > 240:
- preview = preview[:237] + "..."
- if not preview:
- return
- cb("tool.started", "narrate", preview, {"text": text})
- cb(
- "tool.completed", "narrate", None, None,
- duration=0.0, is_error=False, result=text,
- )
- except Exception:
- pass
-
- return _bridge
-
- def _build_tool_event_bridge(self) -> Any:
- """Adapter from our ``_CursorToolEvent`` stream to Hermes' callback.
-
- ``tool_progress_callback(event_type, name, preview, args, ...)`` is
- the same shape Hermes' built-in tools use (see
- ``agent/tool_executor.py``). We translate cursor's "tool_call
- started/completed" stream-json events into ``tool.started`` /
- ``tool.completed`` callbacks so the user's UI shows cursor's
- internal shell/read/edit activity the same way it shows native
- tool calls from Grok, GPT, Claude, etc.
-
- Without this bridge, cursor's tool activity is invisible to Hermes
- — the user only sees the model's final text and the session's
- ``tool_call_count`` stays at zero even when cursor actually ran
- multiple shell/read commands internally.
- """
- cb = self._tool_progress_callback
- if cb is None:
- return None
-
- def _bridge(stage: str, evt: _CursorToolEvent) -> None:
- try:
- if stage == "started":
- preview = _build_cursor_tool_preview(evt)
- cb("tool.started", evt.name, preview, evt.args)
- elif stage == "completed":
- # cli.py stores ``function_args`` from tool.started in a
- # FIFO queue and pops them on tool.completed for display.
- # ``evt.args`` is the SAME dict reference, so mutating it
- # here surfaces our diff stats to ``get_cute_tool_message``
- # without changing the upstream callback signature.
- if (
- evt.lines_added is not None
- or evt.lines_removed is not None
- ) and isinstance(evt.args, dict):
- evt.args["_diff_stats"] = {
- "added": evt.lines_added or 0,
- "removed": evt.lines_removed or 0,
- }
- if evt.diff_string:
- evt.args["_diff_string"] = evt.diff_string
- cb(
- "tool.completed",
- evt.name,
- None,
- None,
- duration=evt.duration_ms / 1000.0,
- is_error=evt.is_error,
- result=evt.result_text,
- )
- except Exception:
- # The Hermes callback may not accept all our kwargs (e.g.
- # older Hermes builds). Fall back to the simplest form.
- try:
- cb(f"tool.{stage}", evt.name, evt.result_text or "", evt.args)
- except Exception:
- pass
-
- return _bridge
-
- def _terminate_active_proc(self, proc: subprocess.Popen[str]) -> None:
- with self._active_process_lock:
- current = self._active_process
- if current is proc:
- self._active_process = None
- if proc.poll() is not None:
- return
- # cursor-agent exits naturally a few hundred ms after emitting the
- # ``result`` event. Give it that grace period BEFORE force-killing —
- # SIGTERM forces Node.js to run shutdown hooks which can take
- # longer than just letting it exit on its own.
- try:
- proc.wait(timeout=0.7)
- return
- except subprocess.TimeoutExpired:
- pass
- # Still running — force it.
- try:
- proc.terminate()
- proc.wait(timeout=1.5)
- except Exception:
- try:
- proc.kill()
- except Exception:
- pass
-
- # ------------------------------------------------------------------
- # Introspection helpers
- # ------------------------------------------------------------------
-
- def whoami(self) -> dict[str, Any]:
- """Return a dict of ``cursor-agent status`` info (best-effort).
-
- Used by the doctor / setup flow to surface logged-in user + tier.
- Returns an empty dict if the CLI is missing or not authenticated.
- """
- try:
- out = subprocess.check_output(
- [self._command, "status"],
- text=True,
- timeout=10,
- env=_build_subprocess_env(self.api_key),
- )
- except Exception:
- return {}
- info: dict[str, Any] = {"raw": out.strip()}
- for line in out.splitlines():
- line = line.strip()
- if line.startswith("✓ Logged in as "):
- info["email"] = line.removeprefix("✓ Logged in as ").strip()
- info["authenticated"] = True
- return info
-
__all__ = [
"CursorAgentClient",
@@ -1750,4 +33,15 @@ def whoami(self) -> dict[str, Any]:
"DEFAULT_CURSOR_COMMAND",
"DEFAULT_CURSOR_MODE",
"DEFAULT_CURSOR_MODEL",
+ "_CursorToolEvent",
+ "_StreamJsonAccumulator",
+ "_SdkSession",
+ "_build_cursor_tool_preview",
+ "_build_subprocess_env",
+ "_format_messages_as_prompt",
+ "_normalize_cursor_tool_name",
+ "_resolve_command",
+ "cursor_sdk_installed",
+ "resolve_cursor_backend",
+ "run_prompt_via_sdk",
]
diff --git a/agent/error_classifier.py b/agent/error_classifier.py
index e8a44866b28e..809a264d55a1 100644
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -433,6 +433,74 @@ def is_auth(self) -> bool:
]
+def _classify_cursor_sdk_error(error: Exception, result_fn) -> "ClassifiedError | None":
+ """Map cursor-sdk CursorAgentError into Hermes failover hints."""
+ try:
+ from cursor_sdk.errors import (
+ AuthenticationError,
+ CursorAgentError,
+ IntegrationNotConnectedError,
+ NotFoundError,
+ RateLimitError,
+ )
+ except ImportError:
+ return None
+
+ if not isinstance(error, CursorAgentError):
+ return None
+
+ status = getattr(error, "status", None) or getattr(error, "status_code", None)
+ retryable = bool(getattr(error, "is_retryable", False))
+
+ if isinstance(error, IntegrationNotConnectedError):
+ return result_fn(
+ FailoverReason.auth,
+ status_code=status,
+ retryable=False,
+ should_fallback=True,
+ )
+ if isinstance(error, AuthenticationError):
+ return result_fn(
+ FailoverReason.auth,
+ status_code=status,
+ retryable=False,
+ should_rotate_credential=True,
+ should_fallback=True,
+ )
+ if isinstance(error, RateLimitError):
+ return result_fn(
+ FailoverReason.rate_limit,
+ status_code=status or 429,
+ retryable=True,
+ should_rotate_credential=True,
+ should_fallback=True,
+ )
+ if isinstance(error, NotFoundError):
+ return result_fn(
+ FailoverReason.model_not_found,
+ status_code=status or 404,
+ retryable=False,
+ should_fallback=True,
+ )
+
+ if status is not None:
+ by_status = _classify_by_status(int(status), str(getattr(error, "message", error)).lower(), result_fn)
+ if by_status is not None:
+ if retryable and not by_status.retryable:
+ return result_fn(by_status.reason, status_code=status, retryable=True)
+ return by_status
+
+ if retryable:
+ return result_fn(FailoverReason.server_error, status_code=status, retryable=True)
+
+ return result_fn(
+ FailoverReason.format_error,
+ status_code=status,
+ retryable=False,
+ should_fallback=True,
+ )
+
+
# ── Classification pipeline ─────────────────────────────────────────────
def classify_api_error(
@@ -530,6 +598,14 @@ def _result(reason: FailoverReason, **overrides) -> ClassifiedError:
defaults.update(overrides)
return ClassifiedError(**defaults)
+ # ── Cursor SDK structured errors ─────────────────────────────────
+ # cursor-sdk raises typed CursorAgentError subclasses with explicit
+ # retry hints. Map them before generic string heuristics so Hermes'
+ # retry loop respects is_retryable / retry_after from the SDK.
+ _cursor_sdk_err = _classify_cursor_sdk_error(error, _result)
+ if _cursor_sdk_err is not None:
+ return _cursor_sdk_err
+
# ── 1. Provider-specific patterns (highest priority) ────────────
# Provider content-policy / safety-filter block. The provider has made a
diff --git a/docs/cursor_architecture.md b/docs/cursor_architecture.md
index 151cbe82e27e..c9096010ac13 100644
--- a/docs/cursor_architecture.md
+++ b/docs/cursor_architecture.md
@@ -83,7 +83,7 @@ Legend / lifecycle:
Hermes observes but doesn't gate them past Hermes' approvals.mode)
```
-**Key files:** `agent/cursor_agent_client.py` (runtime + accumulator + bridge), `plugins/model-providers/cursor/` (provider profile), `hermes_cli/auth.py` (credentials + status), `agent/agent_runtime_helpers.py:create_openai_client()` (client factory), `agent/conversation_compression.py` (compress + duck-typed reset hook), `agent/display.py` (`get_cute_tool_message`, `extract_edit_diff`; unified diff rendering for cursor edits).
+**Key files:** `agent/cursor/` (package: CLI + SDK backends, typed events, `CursorTurnAccumulator`, bridge), `agent/cursor_agent_client.py` (backward-compat re-exports), `plugins/model-providers/cursor/` (provider profile), `hermes_cli/auth.py` (credentials + status), `agent/agent_runtime_helpers.py:create_openai_client()` (client factory), `agent/conversation_compression.py` (compress + duck-typed reset hook), `agent/display.py` (`get_cute_tool_message`, `extract_edit_diff`; unified diff rendering for cursor edits).
---
@@ -118,7 +118,7 @@ Aliases resolving to `cursor`: `cursor-agent`, `cursor-cli`, `cursor-sub`, `curs
**Per-request lifecycle:**
-1. **Prompt assembly.** `_format_messages_as_prompt()` flattens the OpenAI message list (system/user/assistant/tool) into a single stdin prompt. Tool schemas are inlined as JSON; the model is instructed to emit Hermes tool calls as `{...}` blocks (grammar shared with `copilot_acp_client`).
+1. **Prompt assembly.** `format_messages_as_prompt()` flattens the OpenAI message list (system/user/assistant/tool) into a single stdin prompt. Tool schemas are inlined as JSON; the model is instructed to emit Hermes tool calls as `{...}` blocks (grammar shared with `copilot_acp_client`).
2. **Workspace.** Session-scoped: one temp dir per `CursorAgentClient` instance, reused for every call. Created lazily on first call as `hermes-cursor-*`, tracked in `_ephemeral_dirs` for cleanup at `close()`. Override with `HERMES_CURSOR_WORKSPACE` or the `workspace` ctor arg. A fresh dir per call previously cost roughly 4 to 5 seconds of "first-time workspace bootstrap" tax on every turn; fixed by reusing the dir across the session.
3. **Argv.** `cursor-agent -p --output-format stream-json --model --workspace --force --trust` plus optional `--mode`, `--api-key`, and `HERMES_CURSOR_ARGS`.
4. **Mode mapping:**
@@ -246,6 +246,26 @@ Completed internal events also populate `response.cursor_internal_tools` / `mess
| `HERMES_CURSOR_WORKSPACE` | session-scoped temp dir | Pin workspace directory (reused across all turns of one session by default) |
| `HERMES_CURSOR_BASE_URL` | `cursor://agent` | Provider marker (not HTTP) |
| `HERMES_CURSOR_TIMEOUT_SECONDS` | `1800` | Idle threshold (not wall-clock). Resets on every stream-json event from cursor-agent. A turn may run arbitrarily long in total provided events keep arriving; only true subprocess hangs trigger termination. Default is 30 minutes; cursor-agent's own internal shell ceiling is 10 min so chained long operations can routinely exceed 15 min. Hermes' outer 90s stale-call detector is disabled for cursor so this is the only timeout in effect. |
+| `HERMES_CURSOR_BACKEND` | `auto` | Transport: `auto` (SDK when `cursor-sdk` is installed **and** a real `CURSOR_API_KEY` is set; otherwise CLI), `sdk` (force SDK), `cli` (force `cursor-agent` subprocess). |
+
+## Backend Selection (CLI vs SDK)
+
+Hermes supports two transports for the cursor provider:
+
+| Backend | When used | Auth |
+|---------|-----------|------|
+| **CLI** (`cursor-agent`) | Default for browser-OAuth users (`cursor-agent login`) | CLI session or optional `CURSOR_API_KEY` forwarded to subprocess |
+| **SDK** (`cursor-sdk`) | `HERMES_CURSOR_BACKEND=auto` with `CURSOR_API_KEY` set, or `HERMES_CURSOR_BACKEND=sdk` | User API Key from [Dashboard → Integrations](https://cursor.com/dashboard/integrations) |
+
+Install the SDK extra: `uv pip install 'hermes-agent[cursor]'` or `uv pip install cursor-sdk`.
+
+**Lazy install:** When `HERMES_CURSOR_BACKEND=auto|sdk` and a real `CURSOR_API_KEY` is set, Hermes calls `tools.lazy_deps.ensure("provider.cursor_sdk")` on first SDK use (and offers install in `hermes model` when the key is present but the package is missing). After activation, `hermes update` refreshes `cursor-sdk` via `_refresh_active_lazy_features()` like other lazy backends.
+
+On `auto`, if the SDK path fails because the account lacks `sdk_python_preview_access`, Hermes falls back to the CLI transparently.
+
+Implementation: `agent/cursor/cli_backend.py` and `agent/cursor/sdk_backend.py` both emit typed `CursorTurnEvent`s into `CursorTurnAccumulator` (`agent/cursor/accumulator.py`), so the UI bridge, compression hooks, and status bar stay unchanged regardless of transport.
+
+**Hermes streaming:** True token-by-token Hermes streaming for cursor via the SDK is deferred; both transports accumulate a full turn before returning an OpenAI-shaped response (with optional chunk synthesis for callers that pass `stream=True`).
## Turn-Level Timeout Semantics
@@ -280,23 +300,21 @@ Cursor released a Python SDK (`cursor-sdk`, public beta, v0.1.5 as of
2026-05-23) which exposes a higher-level agent API with native streaming,
typed events (`run.messages()`), proper cancellation (`run.cancel()`),
and a structured error model (`CursorAgentError` with `is_retryable` /
-`retry_after`). It is the architecturally better target than the
-subprocess shim.
-
-We intentionally did **not** adopt it in this PR for three reasons:
-
-1. API access via the SDK is currently allowlist-gated. Users without
- `sdk_python_preview_access` get `IntegrationNotConnectedError`, which
- breaks the "any Cursor subscriber can use this" promise.
-2. SDK auth requires manually generating a User API Key
- (Dashboard → Integrations) and exporting `CURSOR_API_KEY`. The CLI's
- browser OAuth flow is one-time and friction-free; replacing it would
- regress the onboarding experience.
-3. v0.1.5 in two weeks with documented "APIs may change before GA"
- warnings makes upstream pinning risky for a foundational integration.
-
-When all three constraints lift (SDK GA + allowlist removed + auth
-flow supports either API key or CLI-derived token), the inner subprocess
-layer should be replaced by an SDK-backed implementation. The outer
-layer (`auth_type="external_process"`, provider registration, model
-catalog) stays as-is; only `agent/cursor_agent_client.py` changes.
+`retry_after`).
+
+**Status (2026-05-28):** Hermes now supports the SDK as an opt-in/auto
+backend via `HERMES_CURSOR_BACKEND` and `agent/cursor/sdk_backend.py`.
+The CLI subprocess remains the default for users who only have browser
+OAuth (`cursor-agent login`) without a User API Key.
+
+Remaining gaps before SDK becomes the unconditional default:
+
+1. API access via the SDK is still allowlist-gated for some accounts
+ (`sdk_python_preview_access`). Auto mode falls back to CLI when this
+ blocks.
+2. SDK auth still requires manually generating a User API Key — no
+ browser OAuth flow yet.
+3. v0.1.5 ships with "APIs may change before GA" warnings; pin carefully.
+
+When all three constraints lift, `auto` can flip its default to SDK-first
+without changing the outer provider registration layer.
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 97d15142ce04..fa488637c220 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -5472,6 +5472,40 @@ def _model_flow_cursor(config, current_model=""):
}.get(_cursor_mode, _cursor_mode)
print(f" Cursor mode: {_cursor_mode} — {_mode_blurb}")
+ try:
+ from hermes_cli.status import _cursor_transport_label
+
+ transport = _cursor_transport_label()
+ if transport:
+ print(f" Transport: {transport}")
+ except Exception:
+ pass
+
+ _cursor_api_key = (os.environ.get("CURSOR_API_KEY") or "").strip()
+ if _cursor_api_key and _cursor_api_key not in {
+ "cursor-agent-login",
+ "cursor-cli-login",
+ "external-process",
+ "external_process",
+ }:
+ try:
+ from agent.cursor.backend import cursor_sdk_installed, ensure_cursor_sdk
+
+ if not cursor_sdk_installed():
+ try:
+ from hermes_cli.cli_output import prompt_yes_no
+
+ if prompt_yes_no(
+ " Install cursor-sdk for faster SDK transport? [Y/n] ",
+ default=True,
+ ):
+ ensure_cursor_sdk(prompt=True)
+ print(" ✓ cursor-sdk installed")
+ except Exception as exc:
+ print(f" ⚠ cursor-sdk install skipped: {exc}")
+ except Exception:
+ pass
+
# Live catalog first (115+ models incl. composer-2.5-fast default);
# fall back to the curated snapshot if the CLI call fails.
model_list = provider_model_ids(provider_id)
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 2cce67b9c1de..7e7e52c640bd 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -87,6 +87,33 @@ def _effective_provider_label() -> str:
return provider_label(effective)
+def _cursor_transport_label() -> str | None:
+ """Return cursor backend label (sdk vs cli) when cursor is the provider."""
+ try:
+ requested = resolve_requested_provider()
+ effective = resolve_provider(requested)
+ except AuthError:
+ effective = requested or "auto"
+ if effective != "cursor":
+ return None
+ try:
+ from agent.cursor.backend import cursor_sdk_installed, resolve_cursor_backend
+ except Exception:
+ return "cli (cursor-agent)"
+ backend = resolve_cursor_backend()
+ if backend == "sdk":
+ installed = cursor_sdk_installed()
+ return f"sdk (cursor-sdk{' ✓' if installed else ', package missing'})"
+ forced = os.getenv("HERMES_CURSOR_BACKEND", "").strip().lower() or "auto"
+ if forced == "cli":
+ return "cli (cursor-agent, forced)"
+ if not cursor_sdk_installed():
+ return "cli (cursor-agent — install cursor-sdk for SDK)"
+ if not (get_env_value("CURSOR_API_KEY") or "").strip():
+ return "cli (cursor-agent — set CURSOR_API_KEY for SDK)"
+ return "cli (cursor-agent)"
+
+
from hermes_constants import is_termux as _is_termux
@@ -118,6 +145,9 @@ def show_status(args):
print(f" Model: {_configured_model_label(config)}")
print(f" Provider: {_effective_provider_label()}")
+ cursor_transport = _cursor_transport_label()
+ if cursor_transport:
+ print(f" Transport: {cursor_transport}")
# =========================================================================
# API Keys
@@ -146,6 +176,7 @@ def show_status(args):
"FAL": "FAL_KEY",
"ElevenLabs": "ELEVENLABS_API_KEY",
"GitHub": "GITHUB_TOKEN",
+ "Cursor": "CURSOR_API_KEY",
}
def _resolve_env(env_ref) -> str:
diff --git a/plugins/model-providers/cursor/__init__.py b/plugins/model-providers/cursor/__init__.py
index 5092f48ecb1a..6d62f5b95a1f 100644
--- a/plugins/model-providers/cursor/__init__.py
+++ b/plugins/model-providers/cursor/__init__.py
@@ -1,14 +1,11 @@
-"""Cursor provider profile — runs through the ``cursor-agent`` CLI.
+"""Cursor provider profile — CLI or SDK transport.
-Cursor doesn't expose a chat completions endpoint; it ships an agent. We
-spawn ``cursor-agent -p --output-format stream-json --mode ask`` per request
-and translate the line-delimited events into an OpenAI chat-completion
-response. Auth piggybacks on the user's existing ``cursor-agent login`` (or
-``CURSOR_API_KEY``) so every Cursor tier — Hobby, Pro, Pro+, Ultra, Teams —
-can use Hermes through their existing subscription / credits.
+Cursor doesn't expose a chat completions endpoint; it ships an agent. Hermes
+routes requests through ``cursor-agent`` (CLI subprocess) or ``cursor-sdk``
+(Python SDK) depending on ``HERMES_CURSOR_BACKEND`` and ``CURSOR_API_KEY``.
-See ``agent/cursor_agent_client.py`` for the runtime client and
-``docs/plans/2026-05-25-cursor-provider-integration.md`` for the design.
+See ``agent/cursor/`` for the runtime client and
+``docs/cursor_architecture.md`` for the design.
"""
from __future__ import annotations
diff --git a/pyproject.toml b/pyproject.toml
index e1fe62b6d0c5..0b09323bfa18 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -117,6 +117,7 @@ sms = ["aiohttp==3.13.3"]
# to it, which is already provided by the `mcp` extra.
computer-use = ["mcp==1.26.0"]
acp = ["agent-client-protocol==0.9.0"]
+cursor = ["cursor-sdk==0.1.5"]
# mistral: extra REMOVED 2026-05-12 — `mistralai` PyPI project quarantined
# after malicious 2.4.6 release (Mini Shai-Hulud worm). Every version of
# `mistralai` returns 404 on PyPI right now, so any pin we'd write is
diff --git a/tests/agent/test_cursor_agent_client.py b/tests/agent/test_cursor_agent_client.py
index a7ffca1e6fe2..a8ed956bcd79 100644
--- a/tests/agent/test_cursor_agent_client.py
+++ b/tests/agent/test_cursor_agent_client.py
@@ -491,6 +491,7 @@ def setUp(self) -> None:
# Make env predictable.
keys = [k for k in os.environ if k.startswith("HERMES_CURSOR_") or k == "CURSOR_API_KEY" or k == "CURSOR_AGENT_PATH"]
self._saved = {k: os.environ.pop(k) for k in keys}
+ os.environ["HERMES_CURSOR_BACKEND"] = "cli"
def tearDown(self) -> None:
for k, v in self._saved.items():
@@ -503,7 +504,7 @@ def _fake_popen(argv, **kwargs):
fake_proc.env_seen = kwargs.get("env")
return fake_proc
- return patch("agent.cursor_agent_client.subprocess.Popen", side_effect=_fake_popen)
+ return patch("agent.cursor.cli_backend.subprocess.Popen", side_effect=_fake_popen)
def test_happy_path_returns_openai_shaped_response(self) -> None:
proc = _FakeProcess(SUCCESS_STREAM)
@@ -1453,6 +1454,16 @@ def test_tool_call_block_is_extracted(self) -> None:
finally:
client.close()
+ def test_backend_property_reflects_resolution(self) -> None:
+ os.environ["HERMES_CURSOR_BACKEND"] = "auto"
+ with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True):
+ cli_client = CursorAgentClient()
+ self.assertEqual(cli_client.backend, "cli")
+ cli_client.close()
+ sdk_client = CursorAgentClient(api_key="crsr_real_test_key")
+ self.assertEqual(sdk_client.backend, "sdk")
+ sdk_client.close()
+
def test_subprocess_env_has_cursor_api_key_when_provided(self) -> None:
proc = _FakeProcess(SUCCESS_STREAM)
client = CursorAgentClient(api_key="crsr_test_42")
@@ -1642,7 +1653,7 @@ def _fake_popen(argv, **kwargs):
fake_proc.env_seen = kwargs.get("env")
return fake_proc
- return patch("agent.cursor_agent_client.subprocess.Popen", side_effect=_fake_popen)
+ return patch("agent.cursor.cli_backend.subprocess.Popen", side_effect=_fake_popen)
def test_context_estimate_callback_fires_before_subprocess(self) -> None:
# Regression: the status bar sat at 0/200K throughout a long
@@ -1764,7 +1775,7 @@ def _fake_popen(argv, **kwargs):
return proc
try:
- with patch("agent.cursor_agent_client.subprocess.Popen", side_effect=_fake_popen):
+ with patch("agent.cursor.cli_backend.subprocess.Popen", side_effect=_fake_popen):
resp = client.chat.completions.create(
model="composer-2.5",
messages=[{"role": "user", "content": "Hi"}],
diff --git a/tests/agent/test_cursor_events.py b/tests/agent/test_cursor_events.py
new file mode 100644
index 000000000000..3ebc7bcbee26
--- /dev/null
+++ b/tests/agent/test_cursor_events.py
@@ -0,0 +1,84 @@
+"""Tests for typed Cursor turn events and CursorTurnAccumulator."""
+
+from __future__ import annotations
+
+import unittest
+
+from agent.cursor.accumulator import CursorTurnAccumulator
+from agent.cursor.events import (
+ AssistantTextEvent,
+ ToolCompletedEvent,
+ ToolStartedEvent,
+ TurnResultEvent,
+ stream_json_dict_to_events,
+)
+
+
+class TestStreamJsonConversion(unittest.TestCase):
+ def test_assistant_text(self):
+ events = stream_json_dict_to_events({
+ "type": "assistant",
+ "message": {"content": [{"type": "text", "text": "hello"}]},
+ })
+ self.assertEqual(len(events), 1)
+ self.assertIsInstance(events[0], AssistantTextEvent)
+ self.assertEqual(events[0].text, "hello")
+
+ def test_tool_started_and_completed(self):
+ started = stream_json_dict_to_events({
+ "type": "tool_call",
+ "subtype": "started",
+ "call_id": "c1",
+ "tool_call": {"shellToolCall": {"args": {"command": "ls"}}},
+ })
+ self.assertIsInstance(started[0], ToolStartedEvent)
+
+ completed = stream_json_dict_to_events({
+ "type": "tool_call",
+ "subtype": "completed",
+ "call_id": "c1",
+ "tool_call": {
+ "shellToolCall": {
+ "args": {"command": "ls"},
+ "result": {"success": {"stdout": "ok"}},
+ }
+ },
+ })
+ self.assertIsInstance(completed[0], ToolCompletedEvent)
+
+
+class TestCursorTurnAccumulator(unittest.TestCase):
+ def test_synthesis_after_tools(self):
+ acc = CursorTurnAccumulator()
+ acc.feed(AssistantTextEvent(text="Searching…"))
+ acc.feed(ToolStartedEvent(call_id="c1", envelope_key="grepToolCall", args={}))
+ acc.feed(ToolCompletedEvent(
+ call_id="c1",
+ envelope_key="grepToolCall",
+ args={},
+ result_payload={"result": {"success": {}}},
+ ))
+ acc.feed(AssistantTextEvent(text="Found it."))
+ acc.feed(TurnResultEvent(is_error=False, result_text="Found it."))
+ self.assertEqual(acc.synthesis_text(), "Found it.")
+ self.assertIn("Searching", acc.narration_text())
+
+ def test_legacy_dict_feed(self):
+ acc = CursorTurnAccumulator()
+ acc.feed({
+ "type": "assistant",
+ "message": {"content": [{"type": "text", "text": "plain"}]},
+ })
+ acc.feed({
+ "type": "result",
+ "subtype": "success",
+ "is_error": False,
+ "result": "plain",
+ "usage": {"inputTokens": 10, "outputTokens": 2},
+ })
+ self.assertEqual(acc.synthesis_text(), "plain")
+ self.assertTrue(acc.terminal)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/agent/test_cursor_sdk_backend.py b/tests/agent/test_cursor_sdk_backend.py
new file mode 100644
index 000000000000..dfd22d521f3a
--- /dev/null
+++ b/tests/agent/test_cursor_sdk_backend.py
@@ -0,0 +1,160 @@
+"""Unit tests for cursor-sdk backend selection and typed event translation."""
+
+from __future__ import annotations
+
+import os
+import unittest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+from agent.cursor.backend import cursor_sdk_installed, resolve_cursor_backend
+from agent.cursor.events import (
+ AssistantTextEvent,
+ ToolStartedEvent,
+ interaction_update_to_events,
+ sdk_message_to_events,
+)
+from agent.cursor.sdk_backend import SdkSession, run_prompt_via_sdk
+
+
+class TestBackendResolution(unittest.TestCase):
+ def setUp(self):
+ self._saved = {
+ k: os.environ.get(k)
+ for k in ("HERMES_CURSOR_BACKEND", "CURSOR_API_KEY")
+ }
+ for k in self._saved:
+ os.environ.pop(k, None)
+
+ def tearDown(self):
+ for k, v in self._saved.items():
+ if v is None:
+ os.environ.pop(k, None)
+ else:
+ os.environ[k] = v
+
+ def test_auto_without_key_uses_cli(self):
+ with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True):
+ self.assertEqual(resolve_cursor_backend(api_key=None), "cli")
+
+ def test_auto_with_key_uses_sdk_when_installed(self):
+ with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True):
+ self.assertEqual(
+ resolve_cursor_backend(api_key="crsr_real_key_12345"),
+ "sdk",
+ )
+
+ def test_auto_with_sentinel_uses_cli(self):
+ with patch("agent.cursor.backend.cursor_sdk_installed", return_value=True):
+ self.assertEqual(
+ resolve_cursor_backend(api_key="cursor-agent-login"),
+ "cli",
+ )
+
+ def test_forced_cli(self):
+ os.environ["HERMES_CURSOR_BACKEND"] = "cli"
+ self.assertEqual(resolve_cursor_backend(api_key="crsr_x"), "cli")
+
+ def test_forced_sdk_requires_package(self):
+ os.environ["HERMES_CURSOR_BACKEND"] = "sdk"
+ os.environ["CURSOR_API_KEY"] = "crsr_x"
+ with patch("agent.cursor.backend.cursor_sdk_installed", return_value=False), patch(
+ "agent.cursor.backend.ensure_cursor_sdk",
+ side_effect=RuntimeError("cursor-sdk is not installed"),
+ ):
+ with self.assertRaises(RuntimeError):
+ resolve_cursor_backend(api_key="crsr_x")
+
+
+class TestEventTranslation(unittest.TestCase):
+ def test_assistant_sdk_message(self):
+ msg = SimpleNamespace(
+ type="assistant",
+ message=SimpleNamespace(
+ content=[SimpleNamespace(type="text", text="hello")]
+ ),
+ )
+ events = sdk_message_to_events(msg)
+ self.assertEqual(len(events), 1)
+ self.assertIsInstance(events[0], AssistantTextEvent)
+ self.assertEqual(events[0].text, "hello")
+
+ def test_tool_call_started(self):
+ msg = SimpleNamespace(
+ type="tool_call",
+ call_id="c1",
+ name="shell",
+ status="running",
+ args={"command": "ls"},
+ result=None,
+ )
+ events = sdk_message_to_events(msg)
+ self.assertIsInstance(events[0], ToolStartedEvent)
+ self.assertEqual(events[0].envelope_key, "shellToolCall")
+
+ def test_text_delta_interaction(self):
+ update = SimpleNamespace(type="text-delta", text="partial ")
+ events = interaction_update_to_events(update)
+ self.assertIsInstance(events[0], AssistantTextEvent)
+ self.assertEqual(events[0].text, "partial ")
+
+
+class TestRunPromptViaSdk(unittest.TestCase):
+ def test_streams_events_into_accumulator(self):
+ sdk_session = SdkSession()
+ fake_agent = MagicMock()
+ fake_run = MagicMock()
+ fake_result = SimpleNamespace(status="finished", result="done", id="r1", duration_ms=10)
+
+ def _events():
+ yield SimpleNamespace(
+ kind="sdk_message",
+ sdk_message=SimpleNamespace(
+ type="assistant",
+ message=SimpleNamespace(
+ content=[SimpleNamespace(type="text", text="hi")]
+ ),
+ ),
+ interaction_update=None,
+ result=None,
+ )
+ yield SimpleNamespace(
+ kind="result",
+ sdk_message=None,
+ interaction_update=None,
+ result={
+ "status": "finished",
+ "result": "done",
+ "runId": "r1",
+ "durationMs": 10,
+ "usage": {"inputTokens": 10, "outputTokens": 2},
+ },
+ )
+
+ fake_run.events.side_effect = _events
+ fake_run.wait.return_value = fake_result
+ fake_run.supports.return_value = True
+ fake_agent.send.return_value = fake_run
+
+ with patch("cursor_sdk.Agent.create", return_value=fake_agent), patch.object(
+ SdkSession,
+ "get_client",
+ return_value=MagicMock(),
+ ), patch("agent.cursor.sdk_backend.ensure_cursor_sdk"):
+ acc = run_prompt_via_sdk(
+ prompt_text="ping",
+ model="composer-2.5",
+ api_key="crsr_test_key",
+ workspace="/tmp/ws",
+ mode="agent",
+ timeout_seconds=30,
+ on_tool_event=None,
+ on_text_event=None,
+ sdk_session=sdk_session,
+ )
+ self.assertFalse(acc.is_error)
+ fake_agent.close.assert_called_once()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index 393397349d81..f5e4bbcde4be 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -86,6 +86,9 @@
# when model.auth_mode=entra_id is selected; key-based azure-foundry
# users never pay this import.
"provider.azure_identity": ("azure-identity==1.25.3",),
+ # Cursor SDK transport — lazy-installed when HERMES_CURSOR_BACKEND=auto|sdk
+ # and CURSOR_API_KEY is set. CLI-only OAuth users never pull this in.
+ "provider.cursor_sdk": ("cursor-sdk==0.1.5",),
# ─── Web search backends ───────────────────────────────────────────────
"search.exa": ("exa-py==2.10.2",),