Parse XML-like structure in agent mode (especially useful for local Qwen models)

Your Name · Your Name · commit ecafc65ab5e5 · 2026-03-10T02:11:48.000-04:00
diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
@@ -32,15 +32,15 @@
 
 import httpx
 from litellm import experimental_mcp_client
-from litellm.types.utils import ChatCompletionMessageToolCall, Function, ModelResponse
+from litellm.types.utils import ModelResponse
 from prompt_toolkit.patch_stdout import patch_stdout
 from rich.console import Console
 
 import cecli.prompts.utils.system as prompts
 from cecli import __version__, models, urls, utils
 from cecli.commands import Commands, SwitchCoderSignal
 from cecli.exceptions import LiteLLMExceptions
-from cecli.helpers import command_parser, coroutines, nested
+from cecli.helpers import command_parser, coroutines, nested, responses
 from cecli.helpers.conversation import (
     ConversationChunks,
     ConversationManager,
@@ -1581,8 +1581,8 @@ async def run_one(self, user_message, preproc):
             self.reflected_message = None
             self.tool_reflection = False
 
-            if float(self.total_cost) > self.cost_multiplier * nested.getter(
-                self.args, "cost_limit", float("inf")
+            if float(self.total_cost) > self.cost_multiplier * (
+                nested.getter(self.args, "cost_limit", float("inf")) or float("inf")
             ):
                 if await self.io.confirm_ask(
                     "You have reached your configured cost limit. Continue?",
@@ -3311,66 +3311,16 @@ def consolidate_chunks(self):
         # If no native tool calls, check if the content contains JSON tool calls
         # This handles models that write JSON in text instead of using native calling
         if not self.partial_response_tool_calls and self.partial_response_content:
-            try:
-                # Simple extraction of JSON-like structures that look like tool calls
-                # Only look for tool calls if it looks like JSON
-                if "{" in self.partial_response_content or "[" in self.partial_response_content:
-                    json_chunks = utils.split_concatenated_json(self.partial_response_content)
-                    extracted_calls = []
-                    chunk_index = 0
-
-                    for chunk in json_chunks:
-                        chunk_index += 1
-                        try:
-                            json_obj = json.loads(chunk)
-                            if (
-                                isinstance(json_obj, dict)
-                                and "name" in json_obj
-                                and "arguments" in json_obj
-                            ):
-                                # Create a Pydantic model for the tool call
-                                function_obj = Function(
-                                    name=json_obj["name"],
-                                    arguments=(
-                                        json.dumps(json_obj["arguments"])
-                                        if isinstance(json_obj["arguments"], (dict, list))
-                                        else str(json_obj["arguments"])
-                                    ),
-                                )
-                                tool_call_obj = ChatCompletionMessageToolCall(
-                                    type="function",
-                                    function=function_obj,
-                                    id=f"call_{len(extracted_calls)}_{int(time.time())}_{chunk_index}",
-                                )
-                                extracted_calls.append(tool_call_obj)
-                            elif isinstance(json_obj, list):
-                                for item in json_obj:
-                                    if (
-                                        isinstance(item, dict)
-                                        and "name" in item
-                                        and "arguments" in item
-                                    ):
-                                        function_obj = Function(
-                                            name=item["name"],
-                                            arguments=(
-                                                json.dumps(item["arguments"])
-                                                if isinstance(item["arguments"], (dict, list))
-                                                else str(item["arguments"])
-                                            ),
-                                        )
-                                        tool_call_obj = ChatCompletionMessageToolCall(
-                                            type="function",
-                                            function=function_obj,
-                                            id=f"call_{len(extracted_calls)}_{int(time.time())}_{chunk_index}",
-                                        )
-                                        extracted_calls.append(tool_call_obj)
-                        except json.JSONDecodeError:
-                            continue
-
-                    if extracted_calls:
-                        self.partial_response_tool_calls = extracted_calls
-            except Exception:
-                pass
+            extracted_calls = responses.extract_tools_from_content_json(
+                self.partial_response_content
+            )
+            if not extracted_calls:
+                extracted_calls = responses.extract_tools_from_content_xml(
+                    self.partial_response_content
+                )
+
+            if extracted_calls:
+                self.partial_response_tool_calls = extracted_calls
 
         return response, func_err, content_err
 
diff --git a/cecli/helpers/responses.py b/cecli/helpers/responses.py
@@ -1,4 +1,11 @@
+import json
 import re
+import time
+from typing import List, Optional
+
+from litellm.types.utils import ChatCompletionMessageToolCall, Function
+
+from cecli import utils
 
 
 def preprocess_json(response: str) -> str:
@@ -19,3 +26,107 @@ def normalize(match):
         return "\\\\" + suffix
 
     return re.sub(pattern, normalize, response)
+
+
+def extract_tools_from_content_json(content: str) -> Optional[List[ChatCompletionMessageToolCall]]:
+    """
+    Simple extraction of JSON-like structures that look like tool calls.
+    This handles models that write JSON in text instead of using native calling.
+    """
+    if not content or ("{" not in content and "[" not in content):
+        return None
+
+    try:
+        json_chunks = utils.split_concatenated_json(content)
+        extracted_calls = []
+        chunk_index = 0
+
+        for chunk in json_chunks:
+            chunk_index += 1
+            try:
+                json_obj = json.loads(chunk)
+                if isinstance(json_obj, dict) and "name" in json_obj and "arguments" in json_obj:
+                    # Create a Pydantic model for the tool call
+                    function_obj = Function(
+                        name=json_obj["name"],
+                        arguments=(
+                            json.dumps(json_obj["arguments"])
+                            if isinstance(json_obj["arguments"], (dict, list))
+                            else str(json_obj["arguments"])
+                        ),
+                    )
+                    tool_call_obj = ChatCompletionMessageToolCall(
+                        type="function",
+                        function=function_obj,
+                        id=f"call_{len(extracted_calls)}_{int(time.time())}_{chunk_index}",
+                    )
+                    extracted_calls.append(tool_call_obj)
+                elif isinstance(json_obj, list):
+                    for item in json_obj:
+                        if isinstance(item, dict) and "name" in item and "arguments" in item:
+                            function_obj = Function(
+                                name=item["name"],
+                                arguments=(
+                                    json.dumps(item["arguments"])
+                                    if isinstance(item["arguments"], (dict, list))
+                                    else str(item["arguments"])
+                                ),
+                            )
+                            tool_call_obj = ChatCompletionMessageToolCall(
+                                type="function",
+                                function=function_obj,
+                                id=f"call_{len(extracted_calls)}_{int(time.time())}_{chunk_index}",
+                            )
+                            extracted_calls.append(tool_call_obj)
+            except json.JSONDecodeError:
+                continue
+
+        return extracted_calls if extracted_calls else None
+    except Exception:
+        return None
+
+
+def extract_tools_from_content_xml(content: str) -> Optional[List[ChatCompletionMessageToolCall]]:
+    """
+    Extraction of Qwen-style XML tool calls.
+    Example:
+    <function=UpdateTodoList>
+    <parameter=tasks>
+    [{"task": "Update task list", "done": false, "current": true}]
+    </parameter>
+    </function>
+    """
+    if not content or "<function=" not in content:
+        return None
+
+    try:
+        extracted_calls = []
+        # Find all blocks between <function=...> and </function>
+        func_blocks = re.finditer(r"<function=(.*?)>(.*?)</function>", content, re.DOTALL)
+
+        for i, block_match in enumerate(func_blocks):
+            func_name = block_match.group(1).strip()
+            block_content = block_match.group(2).strip()
+
+            params_dict = {}
+            param_pattern = r"<parameter=(.*?)>(.*?)</parameter>"
+            for param_match in re.finditer(param_pattern, block_content, re.DOTALL):
+                key = param_match.group(1).strip()
+                value_str = param_match.group(2).strip()
+                try:
+                    params_dict[key] = json.loads(value_str)
+                except json.JSONDecodeError:
+                    params_dict[key] = value_str
+
+            function_obj = Function(name=func_name, arguments=json.dumps(params_dict))
+
+            tool_call_obj = ChatCompletionMessageToolCall(
+                type="function",
+                function=function_obj,
+                id=f"xml_call_{i}_{int(time.time())}",
+            )
+            extracted_calls.append(tool_call_obj)
+
+        return extracted_calls if extracted_calls else None
+    except Exception:
+        return None