diff --git a/src/conductor/providers/_event_format.py b/src/conductor/providers/_event_format.py new file mode 100644 index 0000000..b2cdf31 --- /dev/null +++ b/src/conductor/providers/_event_format.py @@ -0,0 +1,97 @@ +"""Helpers for formatting tool-call event payloads emitted to subscribers. + +The console renderer, JSONL event logger, and web dashboard all consume +``agent_tool_start`` / ``agent_tool_complete`` events from both providers. +These helpers ensure the payloads are human-readable strings rather than +Python ``repr()`` output of structured SDK objects. + +See https://github.com/microsoft/conductor/issues/93. +""" + +from __future__ import annotations + +import json +from typing import Any + + +def format_tool_arguments(arguments: Any, max_length: int = 500) -> str | None: + """Render tool-call arguments as a compact, human-readable string. + + Dict-like arguments are JSON-encoded so the dashboard sees ``{"k": "v"}`` + rather than Python repr (``{'k': 'v'}`` with single quotes and doubled + backslashes on Windows paths). Falls back to ``str(arguments)`` for + inputs that aren't JSON-serializable. + + When the rendered string exceeds ``max_length``, it is truncated to + exactly ``max_length`` characters and a single-character ellipsis + (``"…"``) is appended, so the returned string is at most + ``max_length + 1`` characters long. + + Args: + arguments: The tool-call arguments object (typically a dict). + max_length: Maximum length of the rendered string before an + ellipsis is appended. Note: the returned string may be + ``max_length + 1`` characters long when truncation occurs. + + Returns: + A formatted string, or ``None`` when ``arguments`` is falsy + (including ``None``, ``""``, and ``{}``). + """ + if not arguments: + return None + + try: + # ``default=str`` lets us serialize objects (e.g. Path) without crashing. + rendered = json.dumps(arguments, default=str, ensure_ascii=False) + except (TypeError, ValueError): + rendered = str(arguments) + + if len(rendered) > max_length: + return rendered[:max_length] + "…" + return rendered + + +def extract_tool_result_text(result: Any, max_length: int = 500) -> str | None: + """Extract human-readable text from a tool-call result. + + The Copilot SDK emits structured ``Result(content=..., detailed_content=..., + contents=..., kind=...)`` objects whose ``str()`` is the unhelpful + Python repr — newlines escaped, paths doubled, wrapper visible. This + helper unwraps the text payload as follows: + + 1. Plain strings (e.g. from the Claude provider's ``MCPManager``) are + returned unchanged. + 2. For other objects, the helper reads the ``content`` attribute; if + absent or ``None``, it falls back to ``detailed_content``. + 3. If neither attribute yields a non-empty string, ``str(result)`` is + used as a last resort for unknown shapes. + + When the extracted text exceeds ``max_length``, it is truncated to + exactly ``max_length`` characters and a single-character ellipsis + (``"…"``) is appended, so the returned string is at most + ``max_length + 1`` characters long. + + Args: + result: The tool result object emitted by the SDK. + max_length: Maximum length of the extracted text before an + ellipsis is appended. Note: the returned string may be + ``max_length + 1`` characters long when truncation occurs. + + Returns: + A formatted string, or ``None`` when ``result`` is falsy + (including ``None`` and ``""``). + """ + if not result: + return None + + if isinstance(result, str): + text: str = result + else: + text_attr = getattr(result, "content", None) + if text_attr is None: + text_attr = getattr(result, "detailed_content", None) + text = text_attr if isinstance(text_attr, str) and text_attr else str(result) + + if len(text) > max_length: + return text[:max_length] + "…" + return text diff --git a/src/conductor/providers/claude.py b/src/conductor/providers/claude.py index cd4307c..955d180 100644 --- a/src/conductor/providers/claude.py +++ b/src/conductor/providers/claude.py @@ -30,6 +30,10 @@ from conductor.exceptions import ProviderError, ValidationError from conductor.executor.output import validate_output +from conductor.providers._event_format import ( + extract_tool_result_text, + format_tool_arguments, +) from conductor.providers.base import AgentOutput, AgentProvider, EventCallback, match_model_id from conductor.providers.reasoning import ( ReasoningEffort, @@ -1539,7 +1543,7 @@ async def _execute_agentic_loop( if event_callback: try: arguments = ( - str(dict(tool_use.input))[:500] + format_tool_arguments(dict(tool_use.input)) if hasattr(tool_use, "input") and tool_use.input else None ) @@ -1570,7 +1574,7 @@ async def _execute_agentic_loop( "agent_tool_complete", { "tool_name": tool_use.name, - "result": str(result)[:500] if result else None, + "result": extract_tool_result_text(result), }, ) except Exception: diff --git a/src/conductor/providers/copilot.py b/src/conductor/providers/copilot.py index 0a624a5..03c3304 100644 --- a/src/conductor/providers/copilot.py +++ b/src/conductor/providers/copilot.py @@ -18,6 +18,10 @@ from typing import TYPE_CHECKING, Any from conductor.exceptions import ProviderError, ValidationError +from conductor.providers._event_format import ( + extract_tool_result_text, + format_tool_arguments, +) from conductor.providers.base import AgentOutput, AgentProvider, EventCallback, match_model_id from conductor.providers.reasoning import ReasoningEffort, resolve_reasoning_effort @@ -1219,8 +1223,7 @@ def _print(renderable: Any) -> None: if full_mode: args = getattr(event.data, "arguments", None) or getattr(event.data, "args", None) if args: - args_str = str(args) - args_preview = args_str[:200] + "..." if len(args_str) > 200 else args_str + args_preview = format_tool_arguments(args, max_length=200) or "" arg_text = Text() arg_text.append(" │ ", style="dim") arg_text.append("args: ", style="dim italic") @@ -1241,11 +1244,7 @@ def _print(renderable: Any) -> None: if full_mode: result = getattr(event.data, "result", None) or getattr(event.data, "output", None) if result: - result_str = str(result) - if len(result_str) > 200: - result_preview = result_str[:200] + "..." - else: - result_preview = result_str + result_preview = extract_tool_result_text(result, max_length=200) or "" result_text = Text() result_text.append(" │ ", style="dim") result_text.append("result: ", style="dim italic") @@ -1327,7 +1326,7 @@ def _forward_event(event_type: str, event: Any, callback: EventCallback) -> None "agent_tool_start", { "tool_name": str(tool_name), - "arguments": str(arguments)[:500] if arguments else None, + "arguments": format_tool_arguments(arguments), }, ) @@ -1340,7 +1339,7 @@ def _forward_event(event_type: str, event: Any, callback: EventCallback) -> None "agent_tool_complete", { "tool_name": str(tool_name) if tool_name else None, - "result": str(result)[:500] if result else None, + "result": extract_tool_result_text(result), }, ) diff --git a/tests/test_providers/test_claude_event_callback.py b/tests/test_providers/test_claude_event_callback.py index fb4c764..521afeb 100644 --- a/tests/test_providers/test_claude_event_callback.py +++ b/tests/test_providers/test_claude_event_callback.py @@ -392,7 +392,9 @@ async def test_tool_arguments_truncated(self) -> None: start_events = [(t, d) for t, d in events if t == "agent_tool_start"] assert len(start_events) == 1 - assert len(start_events[0][1]["arguments"]) <= 500 + # Truncated to 500 chars + a single-char "…" ellipsis when long. + assert len(start_events[0][1]["arguments"]) <= 501 + assert start_events[0][1]["arguments"].endswith("…") @pytest.mark.asyncio async def test_tool_result_truncated(self) -> None: @@ -422,7 +424,9 @@ async def test_tool_result_truncated(self) -> None: complete_events = [(t, d) for t, d in events if t == "agent_tool_complete"] assert len(complete_events) == 1 - assert len(complete_events[0][1]["result"]) <= 500 + # Truncated to 500 chars + a single-char "…" ellipsis when long. + assert len(complete_events[0][1]["result"]) <= 501 + assert complete_events[0][1]["result"].endswith("…") # --------------------------------------------------------------------------- diff --git a/tests/test_providers/test_copilot_event_callback.py b/tests/test_providers/test_copilot_event_callback.py new file mode 100644 index 0000000..97a8251 --- /dev/null +++ b/tests/test_providers/test_copilot_event_callback.py @@ -0,0 +1,221 @@ +"""Tests for event_callback wiring in the Copilot provider. + +Mirrors ``test_claude_event_callback.py`` so both providers verify that +``agent_tool_start`` / ``agent_tool_complete`` payloads are formatted as +human-readable strings (JSON for arguments, unwrapped text for results) +rather than Python ``repr()`` of structured SDK objects. + +Related issues: +- https://github.com/microsoft/conductor/issues/93 (formatting) +- https://github.com/microsoft/conductor/issues/39 (provider parity) +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any +from unittest.mock import MagicMock + +from conductor.providers.copilot import CopilotProvider + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_event(event_type: str, **data_attrs: Any) -> MagicMock: + """Create a mock SDK event with the given event_type and data attributes.""" + event = MagicMock() + event.event_type = event_type + event.data = MagicMock(spec_set=list(data_attrs.keys())) + for key, value in data_attrs.items(): + setattr(event.data, key, value) + return event + + +@dataclass +class _FakeResult: + """Stand-in for the Copilot SDK's structured Result object.""" + + content: str | None = None + contents: object | None = None + detailed_content: str | None = None + kind: object | None = None + + +# --------------------------------------------------------------------------- +# tool.execution_start → agent_tool_start +# --------------------------------------------------------------------------- + + +class TestForwardToolStart: + """Verify _forward_event maps tool.execution_start correctly.""" + + def test_dict_arguments_render_as_json(self) -> None: + """Dict args must serialize as JSON, not Python repr (issue #93).""" + events: list[tuple[str, dict[str, Any]]] = [] + + event = _make_event( + "tool.execution_start", + tool_name="my_tool", + arguments={"path": r"C:\Users\dev", "limit": 10}, + ) + CopilotProvider._forward_event( + "tool.execution_start", event, lambda t, d: events.append((t, d)) + ) + + assert len(events) == 1 + evt_type, evt_data = events[0] + assert evt_type == "agent_tool_start" + assert evt_data["tool_name"] == "my_tool" + # JSON quoting (double quotes, not single quotes from Python repr). + assert evt_data["arguments"] is not None + assert '"path"' in evt_data["arguments"] + assert "'path'" not in evt_data["arguments"] + # Windows path single-escaped (JSON escapes once: \\ source = \ display). + assert r'"C:\\Users\\dev"' in evt_data["arguments"] + + def test_arguments_truncated_with_ellipsis(self) -> None: + """Long arguments must be truncated and end with the … sentinel.""" + events: list[tuple[str, dict[str, Any]]] = [] + + event = _make_event( + "tool.execution_start", + tool_name="my_tool", + arguments={"data": "x" * 600}, + ) + CopilotProvider._forward_event( + "tool.execution_start", event, lambda t, d: events.append((t, d)) + ) + + assert len(events) == 1 + arguments = events[0][1]["arguments"] + assert arguments is not None + # Truncated to 500 chars + a single-char "…" ellipsis when long. + assert len(arguments) <= 501 + assert arguments.endswith("…") + + def test_args_attribute_alias_supported(self) -> None: + """The SDK may emit either .arguments or .args; both must work.""" + events: list[tuple[str, dict[str, Any]]] = [] + + event = _make_event( + "tool.execution_start", + tool_name="my_tool", + args={"alias": True}, + ) + CopilotProvider._forward_event( + "tool.execution_start", event, lambda t, d: events.append((t, d)) + ) + + assert events[0][1]["arguments"] is not None + assert '"alias"' in events[0][1]["arguments"] + + def test_missing_arguments_yields_none(self) -> None: + events: list[tuple[str, dict[str, Any]]] = [] + + event = _make_event("tool.execution_start", tool_name="my_tool", arguments=None) + CopilotProvider._forward_event( + "tool.execution_start", event, lambda t, d: events.append((t, d)) + ) + + assert events[0][1]["arguments"] is None + + +# --------------------------------------------------------------------------- +# tool.execution_complete → agent_tool_complete +# --------------------------------------------------------------------------- + + +class TestForwardToolComplete: + """Verify _forward_event maps tool.execution_complete correctly.""" + + def test_structured_result_unwraps_content(self) -> None: + """SDK Result objects must be unwrapped to plain text (issue #93).""" + events: list[tuple[str, dict[str, Any]]] = [] + + result = _FakeResult( + content="line one\nline two", + detailed_content="line one\nline two (full)", + ) + event = _make_event( + "tool.execution_complete", + tool_name="my_tool", + result=result, + ) + CopilotProvider._forward_event( + "tool.execution_complete", event, lambda t, d: events.append((t, d)) + ) + + assert len(events) == 1 + evt_type, evt_data = events[0] + assert evt_type == "agent_tool_complete" + assert evt_data["tool_name"] == "my_tool" + # Real newline preserved, not literal backslash-n. + assert evt_data["result"] == "line one\nline two" + # Crucially NOT the Python repr of the Result wrapper. + assert "_FakeResult(" not in evt_data["result"] + assert "\\n" not in evt_data["result"] + + def test_falls_back_to_detailed_content(self) -> None: + events: list[tuple[str, dict[str, Any]]] = [] + + result = _FakeResult(content=None, detailed_content="full payload") + event = _make_event( + "tool.execution_complete", + tool_name="my_tool", + result=result, + ) + CopilotProvider._forward_event( + "tool.execution_complete", event, lambda t, d: events.append((t, d)) + ) + + assert events[0][1]["result"] == "full payload" + + def test_result_truncated_with_ellipsis(self) -> None: + events: list[tuple[str, dict[str, Any]]] = [] + + result = _FakeResult(content="y" * 600) + event = _make_event( + "tool.execution_complete", + tool_name="my_tool", + result=result, + ) + CopilotProvider._forward_event( + "tool.execution_complete", event, lambda t, d: events.append((t, d)) + ) + + result_text = events[0][1]["result"] + assert result_text is not None + # Truncated to 500 chars + a single-char "…" ellipsis when long. + assert len(result_text) <= 501 + assert result_text.endswith("…") + + def test_output_attribute_alias_supported(self) -> None: + """The SDK may emit either .result or .output; both must work.""" + events: list[tuple[str, dict[str, Any]]] = [] + + event = _make_event( + "tool.execution_complete", + tool_name="my_tool", + output="aliased text", + ) + CopilotProvider._forward_event( + "tool.execution_complete", event, lambda t, d: events.append((t, d)) + ) + + assert events[0][1]["result"] == "aliased text" + + def test_missing_result_yields_none(self) -> None: + events: list[tuple[str, dict[str, Any]]] = [] + + event = _make_event( + "tool.execution_complete", + tool_name="my_tool", + result=None, + ) + CopilotProvider._forward_event( + "tool.execution_complete", event, lambda t, d: events.append((t, d)) + ) + + assert events[0][1]["result"] is None diff --git a/tests/test_providers/test_event_format.py b/tests/test_providers/test_event_format.py new file mode 100644 index 0000000..bae0642 --- /dev/null +++ b/tests/test_providers/test_event_format.py @@ -0,0 +1,200 @@ +"""Tests for tool-event payload formatting helpers (issue #93).""" + +from __future__ import annotations + +from dataclasses import dataclass + +from conductor.providers._event_format import ( + extract_tool_result_text, + format_tool_arguments, +) + + +class TestFormatToolArguments: + """Tests for format_tool_arguments.""" + + def test_returns_none_for_empty(self) -> None: + assert format_tool_arguments(None) is None + assert format_tool_arguments({}) is None + assert format_tool_arguments("") is None + + def test_dict_renders_as_json(self) -> None: + """Dict args render as JSON, not Python repr (issue #93).""" + result = format_tool_arguments({"path": "/tmp/file", "limit": 10}) + assert result is not None + # JSON uses double quotes; Python repr uses single quotes. + assert '"path"' in result + assert "'path'" not in result + + def test_windows_path_not_double_escaped(self) -> None: + """Windows paths shouldn't show doubled backslashes.""" + result = format_tool_arguments({"path": r"C:\Users\dev"}) + # JSON escapes backslashes, but only once: \\ in source = \ on display + assert result is not None + assert r'"C:\\Users\\dev"' in result + + def test_truncation_appends_ellipsis(self) -> None: + long_value = "x" * 600 + result = format_tool_arguments({"v": long_value}, max_length=100) + assert result is not None + assert len(result) == 101 # 100 chars + 1-char ellipsis "…" + assert result.endswith("…") + + def test_no_truncation_at_or_below_max_length(self) -> None: + """Inputs at or below max_length must be returned unchanged.""" + # The JSON-encoded form is '{"v": "xxxx"}' = 13 chars total. + # Pick max_length so the encoded string is exactly max_length. + encoded = format_tool_arguments({"v": "xxxx"}) + assert encoded == '{"v": "xxxx"}' + exact = format_tool_arguments({"v": "xxxx"}, max_length=len(encoded)) + assert exact == encoded + assert not exact.endswith("…") + + below = format_tool_arguments({"v": "xxxx"}, max_length=len(encoded) + 1) + assert below == encoded + assert not below.endswith("…") + + def test_truncation_at_one_over_max_length(self) -> None: + """Inputs one char over max_length must be truncated to max_length + 1.""" + encoded = format_tool_arguments({"v": "xxxx"}) + assert encoded == '{"v": "xxxx"}' + truncated = format_tool_arguments({"v": "xxxx"}, max_length=len(encoded) - 1) + assert truncated is not None + assert len(truncated) == len(encoded) # (max_length-1) + 1 ellipsis = max_length + assert truncated.endswith("…") + + def test_non_serializable_falls_back_to_str(self) -> None: + class Custom: + def __str__(self) -> str: + return "custom-repr" + + # Custom objects in the dict are stringified by default=str. + result = format_tool_arguments({"obj": Custom()}) + assert result is not None + assert "custom-repr" in result + + def test_string_argument_passes_through(self) -> None: + # Some SDKs may pass string args; JSON-encode preserves them. + result = format_tool_arguments("hello") + assert result == '"hello"' + + +class TestExtractToolResultText: + """Tests for extract_tool_result_text.""" + + def test_returns_none_for_empty(self) -> None: + assert extract_tool_result_text(None) is None + assert extract_tool_result_text("") is None + + def test_plain_string_passes_through(self) -> None: + """Claude's MCPManager already returns strings.""" + assert extract_tool_result_text("file contents") == "file contents" + + def test_extracts_content_from_structured_result(self) -> None: + """The Copilot SDK's Result object exposes a .content field.""" + + @dataclass + class Result: + content: str | None = None + contents: object | None = None + detailed_content: str | None = None + kind: object | None = None + + r = Result(content="hello world", detailed_content="hello world (full)") + out = extract_tool_result_text(r) + # Prefers content over detailed_content + assert out == "hello world" + + def test_falls_back_to_detailed_content(self) -> None: + @dataclass + class Result: + content: str | None = None + detailed_content: str | None = None + + r = Result(content=None, detailed_content="full payload") + assert extract_tool_result_text(r) == "full payload" + + def test_newlines_preserved_not_escaped(self) -> None: + """Real \\n stays as a newline, not a literal backslash-n (issue #93).""" + + @dataclass + class Result: + content: str | None = None + + r = Result(content="line one\nline two") + out = extract_tool_result_text(r) + assert out == "line one\nline two" + # Crucially NOT the Python repr "Result(content='line one\\nline two')" + assert "Result(" not in (out or "") + assert "\\n" not in (out or "") + + def test_unknown_object_falls_back_to_str(self) -> None: + class Mystery: + def __str__(self) -> str: + return "mystery-output" + + assert extract_tool_result_text(Mystery()) == "mystery-output" + + def test_truncation_appends_ellipsis(self) -> None: + out = extract_tool_result_text("x" * 600, max_length=100) + assert out is not None + assert len(out) == 101 + assert out.endswith("…") + + def test_no_truncation_at_or_below_max_length(self) -> None: + """Strings at or below max_length must be returned unchanged.""" + exact = extract_tool_result_text("x" * 100, max_length=100) + assert exact == "x" * 100 + assert exact is not None + assert not exact.endswith("…") + + below = extract_tool_result_text("x" * 99, max_length=100) + assert below == "x" * 99 + + def test_truncation_at_one_over_max_length(self) -> None: + """Strings one char over max_length get truncated to max_length + 1.""" + out = extract_tool_result_text("x" * 101, max_length=100) + assert out is not None + assert len(out) == 101 + assert out.endswith("…") + assert out[:-1] == "x" * 100 + + def test_non_string_content_falls_back_to_str(self) -> None: + """A non-string content attribute should not be returned as-is.""" + + @dataclass + class Result: + content: object = None + detailed_content: str | None = None + + def __str__(self) -> str: # pragma: no cover - exercised by helper + return f"Result(content={self.content!r})" + + # int content: falls through the isinstance(str) guard to str(result). + r_int = Result(content=42) + assert extract_tool_result_text(r_int) == "Result(content=42)" + + # dict content: same fallback. + r_dict = Result(content={"k": "v"}) + assert extract_tool_result_text(r_dict) == "Result(content={'k': 'v'})" + + def test_non_string_content_falls_back_to_detailed_content(self) -> None: + """When content is a non-string non-None value, the helper currently + treats it as 'present but unusable' and falls back to str(result), + not to detailed_content. Lock that contract in so a future refactor + is forced to think about it. + """ + + @dataclass + class Result: + content: object = None + detailed_content: str | None = None + + def __str__(self) -> str: + return "" + + r = Result(content=42, detailed_content="useful text") + # content is not None, so detailed_content is never consulted; the + # non-string content fails the isinstance(str) guard, so str(result) + # is used. + assert extract_tool_result_text(r) == ""