From e9d6068285140d9c8485f69bd6c3abb0474be0dc Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 07:36:31 +0000 Subject: [PATCH 01/10] Add E2BSandboxToolset integration for Haystack agents Introduces `E2BSandboxToolset`, a Haystack `Toolset` subclass that connects to an E2B cloud sandbox and exposes four tools to any Haystack Agent: `run_bash_command`, `read_file`, `write_file`, and `list_directory`. Key design points: - Sandbox connection is established lazily via `warm_up()`, which is called automatically by the Haystack pipeline/agent before the first tool invocation and is idempotent. - `close()` shuts down the sandbox and releases resources. - API key is managed via Haystack's `Secret` (defaults to the `E2B_API_KEY` environment variable). - Full `to_dict` / `from_dict` serialisation support; the live sandbox instance is not serialised and is re-created on `warm_up()`. - `e2b` added as an optional test dependency in `pyproject.toml`. - 38 unit tests covering init, warm-up lifecycle, each tool operation, error handling, and round-trip serialisation. https://claude.ai/code/session_01DwDqKPEtssXgxqEaArcXiN --- haystack_experimental/tools/__init__.py | 3 + haystack_experimental/tools/e2b/__init__.py | 16 + .../tools/e2b/sandbox_toolset.py | 358 +++++++++++++++ pyproject.toml | 1 + test/tools/__init__.py | 3 + test/tools/e2b/__init__.py | 3 + test/tools/e2b/test_sandbox_toolset.py | 421 ++++++++++++++++++ 7 files changed, 805 insertions(+) create mode 100644 haystack_experimental/tools/__init__.py create mode 100644 haystack_experimental/tools/e2b/__init__.py create mode 100644 haystack_experimental/tools/e2b/sandbox_toolset.py create mode 100644 test/tools/__init__.py create mode 100644 test/tools/e2b/__init__.py create mode 100644 test/tools/e2b/test_sandbox_toolset.py diff --git a/haystack_experimental/tools/__init__.py b/haystack_experimental/tools/__init__.py new file mode 100644 index 00000000..c1764a6e --- /dev/null +++ b/haystack_experimental/tools/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/haystack_experimental/tools/e2b/__init__.py b/haystack_experimental/tools/e2b/__init__.py new file mode 100644 index 00000000..5742d872 --- /dev/null +++ b/haystack_experimental/tools/e2b/__init__.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +import sys +from typing import TYPE_CHECKING + +from lazy_imports import LazyImporter + +_import_structure = {"sandbox_toolset": ["E2BSandboxToolset"]} + +if TYPE_CHECKING: + from .sandbox_toolset import E2BSandboxToolset as E2BSandboxToolset + +else: + sys.modules[__name__] = LazyImporter(name=__name__, module_file=__file__, import_structure=_import_structure) diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py new file mode 100644 index 00000000..0b5c1c9d --- /dev/null +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -0,0 +1,358 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import dataclass, field +from typing import Any + +from haystack import logging +from haystack.lazy_imports import LazyImport +from haystack.tools import Tool, Toolset +from haystack.utils import Secret, deserialize_secrets_inplace + +with LazyImport(message="Run 'pip install e2b'") as e2b_import: + from e2b import Sandbox + +logger = logging.getLogger(__name__) + + +@dataclass +class E2BSandboxToolset(Toolset): + """ + A Haystack Toolset that provides bash command execution and filesystem access + inside an E2B cloud sandbox environment. + + E2BSandboxToolset creates and manages a connection to an E2B sandbox, exposing + the following tools to a Haystack Agent: + + - **run_bash_command**: Execute arbitrary bash commands and capture stdout/stderr. + - **read_file**: Read the content of a file from the sandbox filesystem. + - **write_file**: Write content to a file in the sandbox filesystem. + - **list_directory**: List the contents of a directory in the sandbox. + + The sandbox connection is established lazily via `warm_up()`, which is called + automatically when the toolset is used within a Haystack pipeline or agent. The + sandbox stays open for the configured `timeout` period of inactivity. + + ### Usage example + + ```python + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.dataclasses import ChatMessage + + from haystack_experimental.components.agents import Agent + from haystack_experimental.tools.e2b import E2BSandboxToolset + + # Create the toolset – the sandbox connection is established during warm_up + sandbox_toolset = E2BSandboxToolset( + api_key=Secret.from_env_var("E2B_API_KEY"), + sandbox_template="base", + timeout=300, + ) + + agent = Agent( + chat_generator=OpenAIChatGenerator(model="gpt-4o"), + tools=[sandbox_toolset], + ) + ``` + + The `warm_up()` call is handled automatically when the agent's pipeline starts, so + you generally do not need to call it manually. If you use the toolset standalone, + call `warm_up()` before the first tool invocation: + + ```python + sandbox_toolset.warm_up() + result = sandbox_toolset["run_bash_command"].invoke(command="echo hello") + sandbox_toolset.close() + ``` + """ + + api_key: Secret = field(default_factory=lambda: Secret.from_env_var("E2B_API_KEY")) + sandbox_template: str = field(default="base") + timeout: int = field(default=300) + environment_vars: dict[str, str] = field(default_factory=dict) + + # Private – not part of the public interface / serialized state + _sandbox: Any = field(default=None, init=False, repr=False, compare=False) + + def __post_init__(self) -> None: + """ + Build the Tool objects that wrap the sandbox operations. + + The actual sandbox connection is deferred to `warm_up()`. + """ + # Build tool list referencing bound methods on this instance so that + # every tool shares the same sandbox connection. + tools = [ + Tool( + name="run_bash_command", + description=( + "Execute a bash command inside the E2B sandbox and return the combined stdout, " + "stderr, and exit code. Use this to run shell scripts, install packages, compile " + "code, or perform any system-level operation." + ), + parameters={ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The bash command to execute.", + }, + "timeout": { + "type": "integer", + "description": ( + "Maximum number of seconds to wait for the command to finish. " + "Defaults to 60 seconds." + ), + }, + }, + "required": ["command"], + }, + function=self._run_bash_command, + ), + Tool( + name="read_file", + description=( + "Read the text content of a file from the E2B sandbox filesystem and return it " + "as a string. The file must exist; use list_directory to verify paths first." + ), + parameters={ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute or relative path of the file to read.", + }, + }, + "required": ["path"], + }, + function=self._read_file, + ), + Tool( + name="write_file", + description=( + "Write text content to a file in the E2B sandbox filesystem. " + "Parent directories are created automatically if they do not exist. " + "Existing files are overwritten." + ), + parameters={ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute or relative path of the file to write.", + }, + "content": { + "type": "string", + "description": "Text content to write into the file.", + }, + }, + "required": ["path", "content"], + }, + function=self._write_file, + ), + Tool( + name="list_directory", + description=( + "List the files and subdirectories inside a directory in the E2B sandbox " + "filesystem. Returns a newline-separated list of names with a trailing '/' " + "appended to subdirectory names." + ), + parameters={ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Absolute or relative path of the directory to list.", + }, + }, + "required": ["path"], + }, + function=self._list_directory, + ), + ] + # Initialise the parent Toolset with our tools. + # We bypass super().__post_init__ duplicate-name check by assigning directly so + # that we can call the parent __post_init__ with the correct list in place. + self.tools = tools + super().__post_init__() + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + def warm_up(self) -> None: + """ + Establish the connection to the E2B sandbox. + + This method is called automatically by the Haystack pipeline before the first + tool invocation. It is idempotent – calling it multiple times has no effect if + the sandbox is already running. + + :raises RuntimeError: If the E2B sandbox cannot be created (e.g. invalid API key + or network error). + """ + if self._sandbox is not None: + return + + e2b_import.check() + resolved_key = self.api_key.resolve_value() + try: + logger.info( + "Starting E2B sandbox (template={template}, timeout={timeout}s)", + template=self.sandbox_template, + timeout=self.timeout, + ) + self._sandbox = Sandbox( + api_key=resolved_key, + template=self.sandbox_template, + timeout=self.timeout, + envs=self.environment_vars if self.environment_vars else None, + ) + logger.info("E2B sandbox started (id={sandbox_id})", sandbox_id=self._sandbox.sandbox_id) + except Exception as e: + raise RuntimeError(f"Failed to start E2B sandbox: {e}") from e + + def close(self) -> None: + """ + Shut down the E2B sandbox and release all associated resources. + + Call this method when you are done using the toolset to avoid leaving + idle sandboxes running and incurring unnecessary costs. + """ + if self._sandbox is None: + return + try: + self._sandbox.kill() + logger.info("E2B sandbox closed") + except Exception as e: + logger.warning("Failed to close E2B sandbox: {error}", error=e) + finally: + self._sandbox = None + + # ------------------------------------------------------------------ + # Serialisation + # ------------------------------------------------------------------ + + def to_dict(self) -> dict[str, Any]: + """ + Serialize the toolset configuration to a dictionary. + + The sandbox instance itself is not serialised; a fresh connection is + established when `warm_up()` is called after deserialisation. + + :returns: Dictionary containing the serialised toolset configuration. + """ + from haystack.core.serialization import generate_qualified_class_name + + return { + "type": generate_qualified_class_name(type(self)), + "data": { + "api_key": self.api_key.to_dict(), + "sandbox_template": self.sandbox_template, + "timeout": self.timeout, + "environment_vars": self.environment_vars, + }, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "E2BSandboxToolset": + """ + Deserialize an E2BSandboxToolset from a dictionary. + + :param data: Dictionary created by `to_dict()`. + :returns: A new E2BSandboxToolset instance ready to be warmed up. + """ + inner = data["data"] + deserialize_secrets_inplace(inner, keys=["api_key"]) + return cls( + api_key=inner["api_key"], + sandbox_template=inner.get("sandbox_template", "base"), + timeout=inner.get("timeout", 300), + environment_vars=inner.get("environment_vars", {}), + ) + + # ------------------------------------------------------------------ + # Private tool implementations + # ------------------------------------------------------------------ + + def _require_sandbox(self) -> "Sandbox": + """Return the active sandbox or raise a helpful error if warm_up was not called.""" + if self._sandbox is None: + raise RuntimeError( + "E2B sandbox is not running. Call warm_up() before using the toolset, " + "or add the toolset to a Haystack pipeline/agent which calls warm_up() automatically." + ) + return self._sandbox + + def _run_bash_command(self, command: str, timeout: int = 60) -> str: + """ + Execute a bash command in the sandbox. + + :param command: The bash command to run. + :param timeout: Seconds to wait before killing the process. + :returns: A formatted string containing exit_code, stdout and stderr. + """ + sandbox = self._require_sandbox() + try: + result = sandbox.commands.run(command, timeout=timeout) + return ( + f"exit_code: {result.exit_code}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + except Exception as e: + raise RuntimeError(f"Failed to run bash command: {e}") from e + + def _read_file(self, path: str) -> str: + """ + Read a file from the sandbox filesystem. + + :param path: Path to the file. + :returns: The text content of the file. + """ + sandbox = self._require_sandbox() + try: + content = sandbox.files.read(path) + # e2b may return bytes; decode if necessary + if isinstance(content, bytes): + return content.decode("utf-8", errors="replace") + return str(content) + except Exception as e: + raise RuntimeError(f"Failed to read file '{path}': {e}") from e + + def _write_file(self, path: str, content: str) -> str: + """ + Write content to a file in the sandbox filesystem. + + :param path: Destination path inside the sandbox. + :param content: Text to write. + :returns: A confirmation message with the file path. + """ + sandbox = self._require_sandbox() + try: + sandbox.files.write(path, content) + return f"File written successfully: {path}" + except Exception as e: + raise RuntimeError(f"Failed to write file '{path}': {e}") from e + + def _list_directory(self, path: str) -> str: + """ + List the contents of a directory in the sandbox filesystem. + + :param path: Directory path to list. + :returns: Newline-separated list of entries (directories end with '/'). + """ + sandbox = self._require_sandbox() + try: + entries = sandbox.files.list(path) + lines = [] + for entry in entries: + name = entry.name + # Mark directories with a trailing slash for clarity + if getattr(entry, "is_dir", False) or getattr(entry, "type", "") == "dir": + name = name + "/" + lines.append(name) + return "\n".join(lines) if lines else "(empty directory)" + except Exception as e: + raise RuntimeError(f"Failed to list directory '{path}': {e}") from e diff --git a/pyproject.toml b/pyproject.toml index d91ccfdd..624d56db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ extra-dependencies = [ "tiktoken", # LLM-based Summarizer "nltk>=3.9.1", # LLM-based Summarizer "mem0ai", # Mem0MemoryStore + "e2b", # E2BSandboxToolset "amazon-bedrock-haystack", "google-genai-haystack", "cohere-haystack", diff --git a/test/tools/__init__.py b/test/tools/__init__.py new file mode 100644 index 00000000..c1764a6e --- /dev/null +++ b/test/tools/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/test/tools/e2b/__init__.py b/test/tools/e2b/__init__.py new file mode 100644 index 00000000..c1764a6e --- /dev/null +++ b/test/tools/e2b/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/test/tools/e2b/test_sandbox_toolset.py b/test/tools/e2b/test_sandbox_toolset.py new file mode 100644 index 00000000..6e6c2889 --- /dev/null +++ b/test/tools/e2b/test_sandbox_toolset.py @@ -0,0 +1,421 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from unittest.mock import MagicMock, patch + +import pytest +from haystack.utils import Secret + +from haystack_experimental.tools.e2b.sandbox_toolset import E2BSandboxToolset + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_toolset(**kwargs) -> E2BSandboxToolset: + """Create an E2BSandboxToolset with a dummy API key for testing.""" + defaults = { + "api_key": Secret.from_token("test-api-key"), + "sandbox_template": "base", + "timeout": 120, + "environment_vars": {}, + } + defaults.update(kwargs) + return E2BSandboxToolset(**defaults) + + +def _make_sandbox_mock() -> MagicMock: + """Return a MagicMock that mimics the e2b Sandbox object.""" + sandbox = MagicMock() + sandbox.sandbox_id = "sandbox-test-123" + return sandbox + + +# --------------------------------------------------------------------------- +# Initialisation +# --------------------------------------------------------------------------- + + +class TestE2BSandboxToolsetInit: + def test_default_parameters(self): + toolset = _make_toolset() + assert toolset.sandbox_template == "base" + assert toolset.timeout == 120 + assert toolset.environment_vars == {} + assert toolset._sandbox is None + + def test_custom_parameters(self): + toolset = _make_toolset( + sandbox_template="my-template", + timeout=600, + environment_vars={"FOO": "bar"}, + ) + assert toolset.sandbox_template == "my-template" + assert toolset.timeout == 600 + assert toolset.environment_vars == {"FOO": "bar"} + + def test_tools_are_created(self): + toolset = _make_toolset() + tool_names = {tool.name for tool in toolset} + assert tool_names == {"run_bash_command", "read_file", "write_file", "list_directory"} + + def test_tools_have_descriptions(self): + toolset = _make_toolset() + for tool in toolset: + assert tool.description, f"Tool '{tool.name}' has no description" + + def test_tools_have_valid_parameters_schema(self): + toolset = _make_toolset() + for tool in toolset: + assert "type" in tool.parameters + assert "properties" in tool.parameters + + def test_run_bash_command_required_parameter(self): + toolset = _make_toolset() + bash_tool = next(t for t in toolset if t.name == "run_bash_command") + assert "command" in bash_tool.parameters["required"] + + def test_read_file_required_parameter(self): + toolset = _make_toolset() + read_tool = next(t for t in toolset if t.name == "read_file") + assert "path" in read_tool.parameters["required"] + + def test_write_file_required_parameters(self): + toolset = _make_toolset() + write_tool = next(t for t in toolset if t.name == "write_file") + assert "path" in write_tool.parameters["required"] + assert "content" in write_tool.parameters["required"] + + def test_list_directory_required_parameter(self): + toolset = _make_toolset() + list_tool = next(t for t in toolset if t.name == "list_directory") + assert "path" in list_tool.parameters["required"] + + def test_toolset_len(self): + toolset = _make_toolset() + assert len(toolset) == 4 + + def test_toolset_contains_by_name(self): + toolset = _make_toolset() + assert "run_bash_command" in toolset + assert "read_file" in toolset + assert "write_file" in toolset + assert "list_directory" in toolset + assert "nonexistent_tool" not in toolset + + +# --------------------------------------------------------------------------- +# warm_up +# --------------------------------------------------------------------------- + + +class TestE2BSandboxToolsetWarmUp: + @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") + @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): + mock_e2b_import.check.return_value = None + mock_sandbox_instance = _make_sandbox_mock() + mock_sandbox_cls.return_value = mock_sandbox_instance + + toolset = _make_toolset() + toolset.warm_up() + + mock_sandbox_cls.assert_called_once_with( + api_key="test-api-key", + template="base", + timeout=120, + envs=None, + ) + assert toolset._sandbox is mock_sandbox_instance + + @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") + @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + def test_warm_up_passes_environment_vars(self, mock_sandbox_cls, mock_e2b_import): + mock_e2b_import.check.return_value = None + mock_sandbox_cls.return_value = _make_sandbox_mock() + + toolset = _make_toolset(environment_vars={"MY_VAR": "value"}) + toolset.warm_up() + + _, kwargs = mock_sandbox_cls.call_args + assert kwargs["envs"] == {"MY_VAR": "value"} + + @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") + @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + def test_warm_up_is_idempotent(self, mock_sandbox_cls, mock_e2b_import): + mock_e2b_import.check.return_value = None + mock_sandbox_cls.return_value = _make_sandbox_mock() + + toolset = _make_toolset() + toolset.warm_up() + toolset.warm_up() + + # Sandbox should only be created once + mock_sandbox_cls.assert_called_once() + + @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") + @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + def test_warm_up_raises_on_sandbox_error(self, mock_sandbox_cls, mock_e2b_import): + mock_e2b_import.check.return_value = None + mock_sandbox_cls.side_effect = Exception("connection refused") + + toolset = _make_toolset() + with pytest.raises(RuntimeError, match="Failed to start E2B sandbox"): + toolset.warm_up() + + +# --------------------------------------------------------------------------- +# close +# --------------------------------------------------------------------------- + + +class TestE2BSandboxToolsetClose: + def test_close_without_warm_up_is_noop(self): + toolset = _make_toolset() + toolset.close() # must not raise + assert toolset._sandbox is None + + def test_close_kills_sandbox(self): + toolset = _make_toolset() + mock_sandbox = _make_sandbox_mock() + toolset._sandbox = mock_sandbox + + toolset.close() + + mock_sandbox.kill.assert_called_once() + assert toolset._sandbox is None + + def test_close_clears_sandbox_on_kill_error(self): + toolset = _make_toolset() + mock_sandbox = _make_sandbox_mock() + mock_sandbox.kill.side_effect = Exception("kill failed") + toolset._sandbox = mock_sandbox + + toolset.close() # must not raise + + assert toolset._sandbox is None + + +# --------------------------------------------------------------------------- +# Tool invocations +# --------------------------------------------------------------------------- + + +class TestE2BSandboxToolsetRunBashCommand: + def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: + toolset = _make_toolset() + mock_sandbox = _make_sandbox_mock() + toolset._sandbox = mock_sandbox + return toolset, mock_sandbox + + def test_run_bash_command_returns_formatted_output(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_result = MagicMock() + mock_result.exit_code = 0 + mock_result.stdout = "hello world\n" + mock_result.stderr = "" + mock_sandbox.commands.run.return_value = mock_result + + output = toolset._run_bash_command("echo hello world") + + assert "exit_code: 0" in output + assert "hello world" in output + mock_sandbox.commands.run.assert_called_once_with("echo hello world", timeout=60) + + def test_run_bash_command_passes_custom_timeout(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.commands.run.return_value = MagicMock(exit_code=0, stdout="", stderr="") + + toolset._run_bash_command("sleep 5", timeout=30) + + mock_sandbox.commands.run.assert_called_once_with("sleep 5", timeout=30) + + def test_run_bash_command_raises_when_no_sandbox(self): + toolset = _make_toolset() + with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + toolset._run_bash_command("ls") + + def test_run_bash_command_wraps_sandbox_exception(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.commands.run.side_effect = Exception("timeout") + + with pytest.raises(RuntimeError, match="Failed to run bash command"): + toolset._run_bash_command("sleep 1000") + + +class TestE2BSandboxToolsetReadFile: + def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: + toolset = _make_toolset() + mock_sandbox = _make_sandbox_mock() + toolset._sandbox = mock_sandbox + return toolset, mock_sandbox + + def test_read_file_returns_string(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.read.return_value = "file content" + + result = toolset._read_file("/some/file.txt") + + assert result == "file content" + mock_sandbox.files.read.assert_called_once_with("/some/file.txt") + + def test_read_file_decodes_bytes(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.read.return_value = b"binary content" + + result = toolset._read_file("/binary.bin") + + assert result == "binary content" + + def test_read_file_raises_when_no_sandbox(self): + toolset = _make_toolset() + with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + toolset._read_file("/some/file.txt") + + def test_read_file_wraps_sandbox_exception(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.read.side_effect = Exception("file not found") + + with pytest.raises(RuntimeError, match="Failed to read file"): + toolset._read_file("/nonexistent.txt") + + +class TestE2BSandboxToolsetWriteFile: + def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: + toolset = _make_toolset() + mock_sandbox = _make_sandbox_mock() + toolset._sandbox = mock_sandbox + return toolset, mock_sandbox + + def test_write_file_returns_confirmation(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + + result = toolset._write_file("/output/result.txt", "hello") + + assert "/output/result.txt" in result + mock_sandbox.files.write.assert_called_once_with("/output/result.txt", "hello") + + def test_write_file_raises_when_no_sandbox(self): + toolset = _make_toolset() + with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + toolset._write_file("/some/path.txt", "content") + + def test_write_file_wraps_sandbox_exception(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.write.side_effect = Exception("permission denied") + + with pytest.raises(RuntimeError, match="Failed to write file"): + toolset._write_file("/protected/file.txt", "data") + + +class TestE2BSandboxToolsetListDirectory: + def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: + toolset = _make_toolset() + mock_sandbox = _make_sandbox_mock() + toolset._sandbox = mock_sandbox + return toolset, mock_sandbox + + def _make_entry(self, name: str, is_dir: bool = False) -> MagicMock: + entry = MagicMock() + entry.name = name + entry.is_dir = is_dir + return entry + + def test_list_directory_returns_names(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.list.return_value = [ + self._make_entry("file.txt"), + self._make_entry("subdir", is_dir=True), + ] + + result = toolset._list_directory("/home/user") + + assert "file.txt" in result + assert "subdir/" in result + mock_sandbox.files.list.assert_called_once_with("/home/user") + + def test_list_directory_empty(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.list.return_value = [] + + result = toolset._list_directory("/empty") + + assert result == "(empty directory)" + + def test_list_directory_raises_when_no_sandbox(self): + toolset = _make_toolset() + with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + toolset._list_directory("/home") + + def test_list_directory_wraps_sandbox_exception(self): + toolset, mock_sandbox = self._toolset_with_sandbox() + mock_sandbox.files.list.side_effect = Exception("not a directory") + + with pytest.raises(RuntimeError, match="Failed to list directory"): + toolset._list_directory("/nonexistent") + + +# --------------------------------------------------------------------------- +# Serialisation +# --------------------------------------------------------------------------- + + +class TestE2BSandboxToolsetSerialisation: + """Serialisation tests use env-var secrets (the only serialisable Secret type).""" + + def _make_env_toolset(self, **kwargs) -> E2BSandboxToolset: + defaults = { + "api_key": Secret.from_env_var("E2B_API_KEY"), + "sandbox_template": "base", + "timeout": 120, + "environment_vars": {}, + } + defaults.update(kwargs) + return E2BSandboxToolset(**defaults) + + def test_to_dict_contains_expected_keys(self): + toolset = self._make_env_toolset(sandbox_template="my-template", timeout=600) + data = toolset.to_dict() + + assert "type" in data + assert "data" in data + assert data["data"]["sandbox_template"] == "my-template" + assert data["data"]["timeout"] == 600 + + def test_to_dict_does_not_include_sandbox_instance(self): + toolset = self._make_env_toolset() + toolset._sandbox = _make_sandbox_mock() # simulate warm-up + data = toolset.to_dict() + + assert "_sandbox" not in data["data"] + assert "sandbox" not in data["data"] + + def test_from_dict_round_trip(self): + original = self._make_env_toolset( + sandbox_template="custom", + timeout=900, + environment_vars={"KEY": "value"}, + ) + data = original.to_dict() + restored = E2BSandboxToolset.from_dict(data) + + assert restored.sandbox_template == "custom" + assert restored.timeout == 900 + assert restored.environment_vars == {"KEY": "value"} + assert restored._sandbox is None # sandbox not restored + + def test_from_dict_creates_tools(self): + original = self._make_env_toolset() + data = original.to_dict() + restored = E2BSandboxToolset.from_dict(data) + + assert len(restored) == 4 + assert "run_bash_command" in restored + + def test_to_dict_type_is_qualified_class_name(self): + toolset = self._make_env_toolset() + data = toolset.to_dict() + assert "E2BSandboxToolset" in data["type"] From ca293f90d46b05839cac83e994d36d36296aa5d8 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 07:49:26 +0000 Subject: [PATCH 02/10] fix: D205 docstring summary line for E2BSandboxToolset https://claude.ai/code/session_01DwDqKPEtssXgxqEaArcXiN --- haystack_experimental/tools/e2b/sandbox_toolset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py index 0b5c1c9d..9c9dfb1e 100644 --- a/haystack_experimental/tools/e2b/sandbox_toolset.py +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -19,8 +19,7 @@ @dataclass class E2BSandboxToolset(Toolset): """ - A Haystack Toolset that provides bash command execution and filesystem access - inside an E2B cloud sandbox environment. + A Haystack Toolset that provides bash command execution and filesystem access inside an E2B sandbox. E2BSandboxToolset creates and manages a connection to an E2B sandbox, exposing the following tools to a Haystack Agent: From d053ff65b8718e3529d5b0d20c234c6e09d5072d Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 07:51:45 +0000 Subject: [PATCH 03/10] fix: apply ruff format to sandbox_toolset.py https://claude.ai/code/session_01DwDqKPEtssXgxqEaArcXiN --- .../tools/e2b/sandbox_toolset.py | 34 ++++--------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py index 9c9dfb1e..c53ac01f 100644 --- a/haystack_experimental/tools/e2b/sandbox_toolset.py +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -93,15 +93,11 @@ def __post_init__(self) -> None: parameters={ "type": "object", "properties": { - "command": { - "type": "string", - "description": "The bash command to execute.", - }, + "command": {"type": "string", "description": "The bash command to execute."}, "timeout": { "type": "integer", "description": ( - "Maximum number of seconds to wait for the command to finish. " - "Defaults to 60 seconds." + "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." ), }, }, @@ -118,10 +114,7 @@ def __post_init__(self) -> None: parameters={ "type": "object", "properties": { - "path": { - "type": "string", - "description": "Absolute or relative path of the file to read.", - }, + "path": {"type": "string", "description": "Absolute or relative path of the file to read."} }, "required": ["path"], }, @@ -137,14 +130,8 @@ def __post_init__(self) -> None: parameters={ "type": "object", "properties": { - "path": { - "type": "string", - "description": "Absolute or relative path of the file to write.", - }, - "content": { - "type": "string", - "description": "Text content to write into the file.", - }, + "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, + "content": {"type": "string", "description": "Text content to write into the file."}, }, "required": ["path", "content"], }, @@ -160,10 +147,7 @@ def __post_init__(self) -> None: parameters={ "type": "object", "properties": { - "path": { - "type": "string", - "description": "Absolute or relative path of the directory to list.", - }, + "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} }, "required": ["path"], }, @@ -295,11 +279,7 @@ def _run_bash_command(self, command: str, timeout: int = 60) -> str: sandbox = self._require_sandbox() try: result = sandbox.commands.run(command, timeout=timeout) - return ( - f"exit_code: {result.exit_code}\n" - f"stdout:\n{result.stdout}\n" - f"stderr:\n{result.stderr}" - ) + return f"exit_code: {result.exit_code}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" except Exception as e: raise RuntimeError(f"Failed to run bash command: {e}") from e From 2a25f85085860181613fcc94d6bb95c5759f9a36 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 08:31:01 +0000 Subject: [PATCH 04/10] refactor: replace E2BSandboxToolset with separate standalone tools Address reviewer feedback (sjrl, tholor) to expose individual pre-made Tool objects instead of a monolithic Toolset, so users can load any subset of the four tools into their agent. Changes: - Replace E2BSandboxToolset (Toolset subclass) with E2BSandbox (plain dataclass) that manages the sandbox lifecycle (warm_up / close / to_dict / from_dict). - Add four individual tool factory functions: create_run_bash_command_tool(sandbox) create_read_file_tool(sandbox) create_write_file_tool(sandbox) create_list_directory_tool(sandbox) - Add create_e2b_tools() convenience factory that returns (sandbox, tools) so callers can pass any subset; all tools share the same E2BSandbox instance, preserving filesystem / process state across invocations. - Update __init__.py to export the new public names. - Rewrite tests to match the new API and fix the Copilot review comment: test_class_defaults now instantiates E2BSandbox with only api_key to validate the real class defaults rather than helper-overridden values. https://claude.ai/code/session_01DwDqKPEtssXgxqEaArcXiN --- haystack_experimental/tools/e2b/__init__.py | 18 +- .../tools/e2b/sandbox_toolset.py | 378 +++++++------ test/tools/e2b/test_sandbox_toolset.py | 522 +++++++++--------- 3 files changed, 488 insertions(+), 430 deletions(-) diff --git a/haystack_experimental/tools/e2b/__init__.py b/haystack_experimental/tools/e2b/__init__.py index 5742d872..be52e122 100644 --- a/haystack_experimental/tools/e2b/__init__.py +++ b/haystack_experimental/tools/e2b/__init__.py @@ -7,10 +7,24 @@ from lazy_imports import LazyImporter -_import_structure = {"sandbox_toolset": ["E2BSandboxToolset"]} +_import_structure = { + "sandbox_toolset": [ + "E2BSandbox", + "create_e2b_tools", + "create_run_bash_command_tool", + "create_read_file_tool", + "create_write_file_tool", + "create_list_directory_tool", + ] +} if TYPE_CHECKING: - from .sandbox_toolset import E2BSandboxToolset as E2BSandboxToolset + from .sandbox_toolset import E2BSandbox as E2BSandbox + from .sandbox_toolset import create_e2b_tools as create_e2b_tools + from .sandbox_toolset import create_list_directory_tool as create_list_directory_tool + from .sandbox_toolset import create_read_file_tool as create_read_file_tool + from .sandbox_toolset import create_run_bash_command_tool as create_run_bash_command_tool + from .sandbox_toolset import create_write_file_tool as create_write_file_tool else: sys.modules[__name__] = LazyImporter(name=__name__, module_file=__file__, import_structure=_import_structure) diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py index c53ac01f..09c5625a 100644 --- a/haystack_experimental/tools/e2b/sandbox_toolset.py +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -7,7 +7,7 @@ from haystack import logging from haystack.lazy_imports import LazyImport -from haystack.tools import Tool, Toolset +from haystack.tools import Tool from haystack.utils import Secret, deserialize_secrets_inplace with LazyImport(message="Run 'pip install e2b'") as e2b_import: @@ -17,52 +17,43 @@ @dataclass -class E2BSandboxToolset(Toolset): +class E2BSandbox: """ - A Haystack Toolset that provides bash command execution and filesystem access inside an E2B sandbox. + Manages the lifecycle of an E2B cloud sandbox. - E2BSandboxToolset creates and manages a connection to an E2B sandbox, exposing - the following tools to a Haystack Agent: - - - **run_bash_command**: Execute arbitrary bash commands and capture stdout/stderr. - - **read_file**: Read the content of a file from the sandbox filesystem. - - **write_file**: Write content to a file in the sandbox filesystem. - - **list_directory**: List the contents of a directory in the sandbox. - - The sandbox connection is established lazily via `warm_up()`, which is called - automatically when the toolset is used within a Haystack pipeline or agent. The - sandbox stays open for the configured `timeout` period of inactivity. + Use :func:`create_e2b_tools` to obtain both an ``E2BSandbox`` and the + individual :class:`~haystack.tools.Tool` objects at once, or instantiate + this class directly and pass it to the individual ``create_*_tool`` + factory functions to build a custom subset of tools. ### Usage example ```python from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage + from haystack.utils import Secret from haystack_experimental.components.agents import Agent - from haystack_experimental.tools.e2b import E2BSandboxToolset + from haystack_experimental.tools.e2b import create_e2b_tools - # Create the toolset – the sandbox connection is established during warm_up - sandbox_toolset = E2BSandboxToolset( + sandbox, tools = create_e2b_tools( api_key=Secret.from_env_var("E2B_API_KEY"), sandbox_template="base", timeout=300, ) - agent = Agent( chat_generator=OpenAIChatGenerator(model="gpt-4o"), - tools=[sandbox_toolset], + tools=tools, ) ``` - The `warm_up()` call is handled automatically when the agent's pipeline starts, so - you generally do not need to call it manually. If you use the toolset standalone, - call `warm_up()` before the first tool invocation: + Lifecycle is handled automatically by the Agent's pipeline. If you use the + tools standalone, call :meth:`warm_up` before the first tool invocation: ```python - sandbox_toolset.warm_up() - result = sandbox_toolset["run_bash_command"].invoke(command="echo hello") - sandbox_toolset.close() + sandbox.warm_up() + # … use tools … + sandbox.close() ``` """ @@ -71,95 +62,9 @@ class E2BSandboxToolset(Toolset): timeout: int = field(default=300) environment_vars: dict[str, str] = field(default_factory=dict) - # Private – not part of the public interface / serialized state + # Private – not serialised _sandbox: Any = field(default=None, init=False, repr=False, compare=False) - def __post_init__(self) -> None: - """ - Build the Tool objects that wrap the sandbox operations. - - The actual sandbox connection is deferred to `warm_up()`. - """ - # Build tool list referencing bound methods on this instance so that - # every tool shares the same sandbox connection. - tools = [ - Tool( - name="run_bash_command", - description=( - "Execute a bash command inside the E2B sandbox and return the combined stdout, " - "stderr, and exit code. Use this to run shell scripts, install packages, compile " - "code, or perform any system-level operation." - ), - parameters={ - "type": "object", - "properties": { - "command": {"type": "string", "description": "The bash command to execute."}, - "timeout": { - "type": "integer", - "description": ( - "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." - ), - }, - }, - "required": ["command"], - }, - function=self._run_bash_command, - ), - Tool( - name="read_file", - description=( - "Read the text content of a file from the E2B sandbox filesystem and return it " - "as a string. The file must exist; use list_directory to verify paths first." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the file to read."} - }, - "required": ["path"], - }, - function=self._read_file, - ), - Tool( - name="write_file", - description=( - "Write text content to a file in the E2B sandbox filesystem. " - "Parent directories are created automatically if they do not exist. " - "Existing files are overwritten." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, - "content": {"type": "string", "description": "Text content to write into the file."}, - }, - "required": ["path", "content"], - }, - function=self._write_file, - ), - Tool( - name="list_directory", - description=( - "List the files and subdirectories inside a directory in the E2B sandbox " - "filesystem. Returns a newline-separated list of names with a trailing '/' " - "appended to subdirectory names." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} - }, - "required": ["path"], - }, - function=self._list_directory, - ), - ] - # Initialise the parent Toolset with our tools. - # We bypass super().__post_init__ duplicate-name check by assigning directly so - # that we can call the parent __post_init__ with the correct list in place. - self.tools = tools - super().__post_init__() - # ------------------------------------------------------------------ # Lifecycle # ------------------------------------------------------------------ @@ -168,12 +73,10 @@ def warm_up(self) -> None: """ Establish the connection to the E2B sandbox. - This method is called automatically by the Haystack pipeline before the first - tool invocation. It is idempotent – calling it multiple times has no effect if - the sandbox is already running. + Idempotent – calling it multiple times has no effect if the sandbox is + already running. - :raises RuntimeError: If the E2B sandbox cannot be created (e.g. invalid API key - or network error). + :raises RuntimeError: If the E2B sandbox cannot be created. """ if self._sandbox is not None: return @@ -200,8 +103,7 @@ def close(self) -> None: """ Shut down the E2B sandbox and release all associated resources. - Call this method when you are done using the toolset to avoid leaving - idle sandboxes running and incurring unnecessary costs. + Call this when you are done to avoid leaving idle sandboxes running. """ if self._sandbox is None: return @@ -219,12 +121,9 @@ def close(self) -> None: def to_dict(self) -> dict[str, Any]: """ - Serialize the toolset configuration to a dictionary. - - The sandbox instance itself is not serialised; a fresh connection is - established when `warm_up()` is called after deserialisation. + Serialize the sandbox configuration to a dictionary. - :returns: Dictionary containing the serialised toolset configuration. + :returns: Dictionary containing the serialised configuration. """ from haystack.core.serialization import generate_qualified_class_name @@ -239,12 +138,12 @@ def to_dict(self) -> dict[str, Any]: } @classmethod - def from_dict(cls, data: dict[str, Any]) -> "E2BSandboxToolset": + def from_dict(cls, data: dict[str, Any]) -> "E2BSandbox": """ - Deserialize an E2BSandboxToolset from a dictionary. + Deserialize an :class:`E2BSandbox` from a dictionary. - :param data: Dictionary created by `to_dict()`. - :returns: A new E2BSandboxToolset instance ready to be warmed up. + :param data: Dictionary created by :meth:`to_dict`. + :returns: A new :class:`E2BSandbox` instance ready to be warmed up. """ inner = data["data"] deserialize_secrets_inplace(inner, keys=["api_key"]) @@ -256,82 +155,225 @@ def from_dict(cls, data: dict[str, Any]) -> "E2BSandboxToolset": ) # ------------------------------------------------------------------ - # Private tool implementations + # Internal helpers (used by the tool factories) # ------------------------------------------------------------------ def _require_sandbox(self) -> "Sandbox": - """Return the active sandbox or raise a helpful error if warm_up was not called.""" + """Return the active sandbox or raise a helpful error.""" if self._sandbox is None: raise RuntimeError( - "E2B sandbox is not running. Call warm_up() before using the toolset, " - "or add the toolset to a Haystack pipeline/agent which calls warm_up() automatically." + "E2B sandbox is not running. Call warm_up() before using the tools, " + "or add the sandbox to a Haystack pipeline/agent which calls warm_up() automatically." ) return self._sandbox - def _run_bash_command(self, command: str, timeout: int = 60) -> str: - """ - Execute a bash command in the sandbox. - :param command: The bash command to run. - :param timeout: Seconds to wait before killing the process. - :returns: A formatted string containing exit_code, stdout and stderr. - """ - sandbox = self._require_sandbox() +# --------------------------------------------------------------------------- +# Individual tool factories +# --------------------------------------------------------------------------- + + +def create_run_bash_command_tool(sandbox: E2BSandbox) -> Tool: + """ + Create a ``run_bash_command`` :class:`~haystack.tools.Tool` bound to *sandbox*. + + :param sandbox: The :class:`E2BSandbox` instance that will execute commands. + :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + """ + + def run_bash_command(command: str, timeout: int = 60) -> str: + sb = sandbox._require_sandbox() try: - result = sandbox.commands.run(command, timeout=timeout) + result = sb.commands.run(command, timeout=timeout) return f"exit_code: {result.exit_code}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" except Exception as e: raise RuntimeError(f"Failed to run bash command: {e}") from e - def _read_file(self, path: str) -> str: - """ - Read a file from the sandbox filesystem. + return Tool( + name="run_bash_command", + description=( + "Execute a bash command inside the E2B sandbox and return the combined stdout, " + "stderr, and exit code. Use this to run shell scripts, install packages, compile " + "code, or perform any system-level operation." + ), + parameters={ + "type": "object", + "properties": { + "command": {"type": "string", "description": "The bash command to execute."}, + "timeout": { + "type": "integer", + "description": ( + "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." + ), + }, + }, + "required": ["command"], + }, + function=run_bash_command, + ) - :param path: Path to the file. - :returns: The text content of the file. - """ - sandbox = self._require_sandbox() + +def create_read_file_tool(sandbox: E2BSandbox) -> Tool: + """ + Create a ``read_file`` :class:`~haystack.tools.Tool` bound to *sandbox*. + + :param sandbox: The :class:`E2BSandbox` instance to read files from. + :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + """ + + def read_file(path: str) -> str: + sb = sandbox._require_sandbox() try: - content = sandbox.files.read(path) - # e2b may return bytes; decode if necessary + content = sb.files.read(path) if isinstance(content, bytes): return content.decode("utf-8", errors="replace") return str(content) except Exception as e: raise RuntimeError(f"Failed to read file '{path}': {e}") from e - def _write_file(self, path: str, content: str) -> str: - """ - Write content to a file in the sandbox filesystem. + return Tool( + name="read_file", + description=( + "Read the text content of a file from the E2B sandbox filesystem and return it " + "as a string. The file must exist; use list_directory to verify paths first." + ), + parameters={ + "type": "object", + "properties": {"path": {"type": "string", "description": "Absolute or relative path of the file to read."}}, + "required": ["path"], + }, + function=read_file, + ) - :param path: Destination path inside the sandbox. - :param content: Text to write. - :returns: A confirmation message with the file path. - """ - sandbox = self._require_sandbox() + +def create_write_file_tool(sandbox: E2BSandbox) -> Tool: + """ + Create a ``write_file`` :class:`~haystack.tools.Tool` bound to *sandbox*. + + :param sandbox: The :class:`E2BSandbox` instance to write files to. + :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + """ + + def write_file(path: str, content: str) -> str: + sb = sandbox._require_sandbox() try: - sandbox.files.write(path, content) + sb.files.write(path, content) return f"File written successfully: {path}" except Exception as e: raise RuntimeError(f"Failed to write file '{path}': {e}") from e - def _list_directory(self, path: str) -> str: - """ - List the contents of a directory in the sandbox filesystem. + return Tool( + name="write_file", + description=( + "Write text content to a file in the E2B sandbox filesystem. " + "Parent directories are created automatically if they do not exist. " + "Existing files are overwritten." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, + "content": {"type": "string", "description": "Text content to write into the file."}, + }, + "required": ["path", "content"], + }, + function=write_file, + ) - :param path: Directory path to list. - :returns: Newline-separated list of entries (directories end with '/'). - """ - sandbox = self._require_sandbox() + +def create_list_directory_tool(sandbox: E2BSandbox) -> Tool: + """ + Create a ``list_directory`` :class:`~haystack.tools.Tool` bound to *sandbox*. + + :param sandbox: The :class:`E2BSandbox` instance to list directories from. + :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + """ + + def list_directory(path: str) -> str: + sb = sandbox._require_sandbox() try: - entries = sandbox.files.list(path) + entries = sb.files.list(path) lines = [] for entry in entries: name = entry.name - # Mark directories with a trailing slash for clarity if getattr(entry, "is_dir", False) or getattr(entry, "type", "") == "dir": name = name + "/" lines.append(name) return "\n".join(lines) if lines else "(empty directory)" except Exception as e: raise RuntimeError(f"Failed to list directory '{path}': {e}") from e + + return Tool( + name="list_directory", + description=( + "List the files and subdirectories inside a directory in the E2B sandbox " + "filesystem. Returns a newline-separated list of names with a trailing '/' " + "appended to subdirectory names." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} + }, + "required": ["path"], + }, + function=list_directory, + ) + + +# --------------------------------------------------------------------------- +# Convenience factory +# --------------------------------------------------------------------------- + + +def create_e2b_tools( + api_key: Secret | None = None, + sandbox_template: str = "base", + timeout: int = 300, + environment_vars: dict[str, str] | None = None, +) -> tuple["E2BSandbox", list[Tool]]: + """ + Create an :class:`E2BSandbox` and all four E2B tools in one call. + + Returns both the sandbox (for lifecycle management) and the list of tools + so that callers can pass any subset of the tools to an Agent. + + :param api_key: E2B API key. Defaults to ``Secret.from_env_var("E2B_API_KEY")``. + :param sandbox_template: E2B sandbox template name. Defaults to ``"base"``. + :param timeout: Sandbox inactivity timeout in seconds. Defaults to ``300``. + :param environment_vars: Optional environment variables to inject into the sandbox. + :returns: A ``(sandbox, tools)`` tuple where *tools* is a list of four + :class:`~haystack.tools.Tool` objects: ``run_bash_command``, ``read_file``, + ``write_file``, and ``list_directory``. + + ### Usage example + + ```python + from haystack.utils import Secret + from haystack_experimental.tools.e2b import create_e2b_tools + + sandbox, tools = create_e2b_tools( + api_key=Secret.from_env_var("E2B_API_KEY"), + ) + + # Use all four tools: + agent = Agent(chat_generator=..., tools=tools) + + # Or only a subset – they still share the same sandbox connection, + # so run_bash_command and read_file operate inside the same environment: + bash_tool, read_tool = tools[0], tools[1] + agent = Agent(chat_generator=..., tools=[bash_tool, read_tool]) + ``` + """ + if api_key is None: + api_key = Secret.from_env_var("E2B_API_KEY") + sandbox = E2BSandbox( + api_key=api_key, sandbox_template=sandbox_template, timeout=timeout, environment_vars=environment_vars or {} + ) + tools = [ + create_run_bash_command_tool(sandbox), + create_read_file_tool(sandbox), + create_write_file_tool(sandbox), + create_list_directory_tool(sandbox), + ] + return sandbox, tools diff --git a/test/tools/e2b/test_sandbox_toolset.py b/test/tools/e2b/test_sandbox_toolset.py index 6e6c2889..e3cde290 100644 --- a/test/tools/e2b/test_sandbox_toolset.py +++ b/test/tools/e2b/test_sandbox_toolset.py @@ -7,7 +7,14 @@ import pytest from haystack.utils import Secret -from haystack_experimental.tools.e2b.sandbox_toolset import E2BSandboxToolset +from haystack_experimental.tools.e2b.sandbox_toolset import ( + E2BSandbox, + create_e2b_tools, + create_list_directory_tool, + create_read_file_tool, + create_run_bash_command_tool, + create_write_file_tool, +) # --------------------------------------------------------------------------- @@ -15,16 +22,11 @@ # --------------------------------------------------------------------------- -def _make_toolset(**kwargs) -> E2BSandboxToolset: - """Create an E2BSandboxToolset with a dummy API key for testing.""" - defaults = { - "api_key": Secret.from_token("test-api-key"), - "sandbox_template": "base", - "timeout": 120, - "environment_vars": {}, - } +def _make_sandbox(**kwargs) -> E2BSandbox: + """Create an E2BSandbox with a dummy API key for testing.""" + defaults = {"api_key": Secret.from_token("test-api-key")} defaults.update(kwargs) - return E2BSandboxToolset(**defaults) + return E2BSandbox(**defaults) def _make_sandbox_mock() -> MagicMock: @@ -34,94 +36,54 @@ def _make_sandbox_mock() -> MagicMock: return sandbox +def _sandbox_with_mock() -> tuple[E2BSandbox, MagicMock]: + """Return an E2BSandbox that already has a mocked underlying sandbox.""" + sb = _make_sandbox() + mock = _make_sandbox_mock() + sb._sandbox = mock + return sb, mock + + # --------------------------------------------------------------------------- -# Initialisation +# E2BSandbox – initialisation # --------------------------------------------------------------------------- -class TestE2BSandboxToolsetInit: - def test_default_parameters(self): - toolset = _make_toolset() - assert toolset.sandbox_template == "base" - assert toolset.timeout == 120 - assert toolset.environment_vars == {} - assert toolset._sandbox is None +class TestE2BSandboxInit: + def test_class_defaults(self): + """Verify the real class defaults, not values set by a helper.""" + sandbox = E2BSandbox(api_key=Secret.from_token("test-api-key")) + assert sandbox.sandbox_template == "base" + assert sandbox.timeout == 300 + assert sandbox.environment_vars == {} + assert sandbox._sandbox is None def test_custom_parameters(self): - toolset = _make_toolset( + sandbox = _make_sandbox( sandbox_template="my-template", timeout=600, environment_vars={"FOO": "bar"}, ) - assert toolset.sandbox_template == "my-template" - assert toolset.timeout == 600 - assert toolset.environment_vars == {"FOO": "bar"} - - def test_tools_are_created(self): - toolset = _make_toolset() - tool_names = {tool.name for tool in toolset} - assert tool_names == {"run_bash_command", "read_file", "write_file", "list_directory"} - - def test_tools_have_descriptions(self): - toolset = _make_toolset() - for tool in toolset: - assert tool.description, f"Tool '{tool.name}' has no description" - - def test_tools_have_valid_parameters_schema(self): - toolset = _make_toolset() - for tool in toolset: - assert "type" in tool.parameters - assert "properties" in tool.parameters - - def test_run_bash_command_required_parameter(self): - toolset = _make_toolset() - bash_tool = next(t for t in toolset if t.name == "run_bash_command") - assert "command" in bash_tool.parameters["required"] - - def test_read_file_required_parameter(self): - toolset = _make_toolset() - read_tool = next(t for t in toolset if t.name == "read_file") - assert "path" in read_tool.parameters["required"] - - def test_write_file_required_parameters(self): - toolset = _make_toolset() - write_tool = next(t for t in toolset if t.name == "write_file") - assert "path" in write_tool.parameters["required"] - assert "content" in write_tool.parameters["required"] - - def test_list_directory_required_parameter(self): - toolset = _make_toolset() - list_tool = next(t for t in toolset if t.name == "list_directory") - assert "path" in list_tool.parameters["required"] - - def test_toolset_len(self): - toolset = _make_toolset() - assert len(toolset) == 4 - - def test_toolset_contains_by_name(self): - toolset = _make_toolset() - assert "run_bash_command" in toolset - assert "read_file" in toolset - assert "write_file" in toolset - assert "list_directory" in toolset - assert "nonexistent_tool" not in toolset + assert sandbox.sandbox_template == "my-template" + assert sandbox.timeout == 600 + assert sandbox.environment_vars == {"FOO": "bar"} # --------------------------------------------------------------------------- -# warm_up +# E2BSandbox – warm_up # --------------------------------------------------------------------------- -class TestE2BSandboxToolsetWarmUp: +class TestE2BSandboxWarmUp: @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): mock_e2b_import.check.return_value = None - mock_sandbox_instance = _make_sandbox_mock() - mock_sandbox_cls.return_value = mock_sandbox_instance + mock_instance = _make_sandbox_mock() + mock_sandbox_cls.return_value = mock_instance - toolset = _make_toolset() - toolset.warm_up() + sb = _make_sandbox(sandbox_template="base", timeout=120) + sb.warm_up() mock_sandbox_cls.assert_called_once_with( api_key="test-api-key", @@ -129,7 +91,7 @@ def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): timeout=120, envs=None, ) - assert toolset._sandbox is mock_sandbox_instance + assert sb._sandbox is mock_instance @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") @@ -137,8 +99,8 @@ def test_warm_up_passes_environment_vars(self, mock_sandbox_cls, mock_e2b_import mock_e2b_import.check.return_value = None mock_sandbox_cls.return_value = _make_sandbox_mock() - toolset = _make_toolset(environment_vars={"MY_VAR": "value"}) - toolset.warm_up() + sb = _make_sandbox(environment_vars={"MY_VAR": "value"}) + sb.warm_up() _, kwargs = mock_sandbox_cls.call_args assert kwargs["envs"] == {"MY_VAR": "value"} @@ -149,11 +111,10 @@ def test_warm_up_is_idempotent(self, mock_sandbox_cls, mock_e2b_import): mock_e2b_import.check.return_value = None mock_sandbox_cls.return_value = _make_sandbox_mock() - toolset = _make_toolset() - toolset.warm_up() - toolset.warm_up() + sb = _make_sandbox() + sb.warm_up() + sb.warm_up() - # Sandbox should only be created once mock_sandbox_cls.assert_called_once() @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") @@ -162,260 +123,301 @@ def test_warm_up_raises_on_sandbox_error(self, mock_sandbox_cls, mock_e2b_import mock_e2b_import.check.return_value = None mock_sandbox_cls.side_effect = Exception("connection refused") - toolset = _make_toolset() + sb = _make_sandbox() with pytest.raises(RuntimeError, match="Failed to start E2B sandbox"): - toolset.warm_up() + sb.warm_up() # --------------------------------------------------------------------------- -# close +# E2BSandbox – close # --------------------------------------------------------------------------- -class TestE2BSandboxToolsetClose: +class TestE2BSandboxClose: def test_close_without_warm_up_is_noop(self): - toolset = _make_toolset() - toolset.close() # must not raise - assert toolset._sandbox is None + sb = _make_sandbox() + sb.close() + assert sb._sandbox is None def test_close_kills_sandbox(self): - toolset = _make_toolset() - mock_sandbox = _make_sandbox_mock() - toolset._sandbox = mock_sandbox + sb, mock = _sandbox_with_mock() + sb.close() + mock.kill.assert_called_once() + assert sb._sandbox is None - toolset.close() + def test_close_clears_sandbox_on_kill_error(self): + sb, mock = _sandbox_with_mock() + mock.kill.side_effect = Exception("kill failed") + sb.close() # must not raise + assert sb._sandbox is None - mock_sandbox.kill.assert_called_once() - assert toolset._sandbox is None - def test_close_clears_sandbox_on_kill_error(self): - toolset = _make_toolset() - mock_sandbox = _make_sandbox_mock() - mock_sandbox.kill.side_effect = Exception("kill failed") - toolset._sandbox = mock_sandbox +# --------------------------------------------------------------------------- +# E2BSandbox – serialisation +# --------------------------------------------------------------------------- + + +class TestE2BSandboxSerialisation: + def _make_env_sandbox(self, **kwargs) -> E2BSandbox: + defaults = {"api_key": Secret.from_env_var("E2B_API_KEY")} + defaults.update(kwargs) + return E2BSandbox(**defaults) + + def test_to_dict_contains_expected_keys(self): + sb = self._make_env_sandbox(sandbox_template="my-template", timeout=600) + data = sb.to_dict() - toolset.close() # must not raise + assert "type" in data + assert "data" in data + assert data["data"]["sandbox_template"] == "my-template" + assert data["data"]["timeout"] == 600 - assert toolset._sandbox is None + def test_to_dict_does_not_include_sandbox_instance(self): + sb = self._make_env_sandbox() + sb._sandbox = _make_sandbox_mock() + data = sb.to_dict() + + assert "_sandbox" not in data["data"] + assert "sandbox" not in data["data"] + + def test_from_dict_round_trip(self): + original = self._make_env_sandbox( + sandbox_template="custom", + timeout=900, + environment_vars={"KEY": "value"}, + ) + data = original.to_dict() + restored = E2BSandbox.from_dict(data) + + assert restored.sandbox_template == "custom" + assert restored.timeout == 900 + assert restored.environment_vars == {"KEY": "value"} + assert restored._sandbox is None + + def test_to_dict_type_is_qualified_class_name(self): + sb = self._make_env_sandbox() + data = sb.to_dict() + assert "E2BSandbox" in data["type"] # --------------------------------------------------------------------------- -# Tool invocations +# Individual tool factories – structure # --------------------------------------------------------------------------- -class TestE2BSandboxToolsetRunBashCommand: - def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: - toolset = _make_toolset() - mock_sandbox = _make_sandbox_mock() - toolset._sandbox = mock_sandbox - return toolset, mock_sandbox +class TestToolFactories: + def test_create_run_bash_command_tool_name_and_schema(self): + sb = _make_sandbox() + tool = create_run_bash_command_tool(sb) + assert tool.name == "run_bash_command" + assert tool.description + assert "command" in tool.parameters["required"] + + def test_create_read_file_tool_name_and_schema(self): + sb = _make_sandbox() + tool = create_read_file_tool(sb) + assert tool.name == "read_file" + assert tool.description + assert "path" in tool.parameters["required"] + + def test_create_write_file_tool_name_and_schema(self): + sb = _make_sandbox() + tool = create_write_file_tool(sb) + assert tool.name == "write_file" + assert tool.description + assert "path" in tool.parameters["required"] + assert "content" in tool.parameters["required"] + + def test_create_list_directory_tool_name_and_schema(self): + sb = _make_sandbox() + tool = create_list_directory_tool(sb) + assert tool.name == "list_directory" + assert tool.description + assert "path" in tool.parameters["required"] + + def test_create_e2b_tools_returns_four_tools(self): + sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) + assert len(tools) == 4 + names = {t.name for t in tools} + assert names == {"run_bash_command", "read_file", "write_file", "list_directory"} + + def test_create_e2b_tools_shares_same_sandbox(self): + sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) + # Inject a mock sandbox to verify the tools reference the same E2BSandbox + mock = _make_sandbox_mock() + mock.commands.run.return_value = MagicMock(exit_code=0, stdout="ok", stderr="") + sb._sandbox = mock + + bash_tool = next(t for t in tools if t.name == "run_bash_command") + bash_tool.invoke(command="echo ok") + + mock.commands.run.assert_called_once() + + def test_tools_are_independent_subsets(self): + """Users can select any subset of tools.""" + sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) + bash_only = [t for t in tools if t.name == "run_bash_command"] + assert len(bash_only) == 1 + + def test_create_e2b_tools_default_api_key(self): + """create_e2b_tools uses E2B_API_KEY env var when api_key is omitted.""" + sb, _ = create_e2b_tools() + assert sb.api_key is not None - def test_run_bash_command_returns_formatted_output(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_result = MagicMock() - mock_result.exit_code = 0 - mock_result.stdout = "hello world\n" - mock_result.stderr = "" - mock_sandbox.commands.run.return_value = mock_result - output = toolset._run_bash_command("echo hello world") +# --------------------------------------------------------------------------- +# run_bash_command tool behaviour +# --------------------------------------------------------------------------- + + +class TestRunBashCommandTool: + def test_returns_formatted_output(self): + sb, mock = _sandbox_with_mock() + mock_result = MagicMock(exit_code=0, stdout="hello world\n", stderr="") + mock.commands.run.return_value = mock_result + tool = create_run_bash_command_tool(sb) + + output = tool.invoke(command="echo hello world") assert "exit_code: 0" in output assert "hello world" in output - mock_sandbox.commands.run.assert_called_once_with("echo hello world", timeout=60) + mock.commands.run.assert_called_once_with("echo hello world", timeout=60) - def test_run_bash_command_passes_custom_timeout(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.commands.run.return_value = MagicMock(exit_code=0, stdout="", stderr="") + def test_passes_custom_timeout(self): + sb, mock = _sandbox_with_mock() + mock.commands.run.return_value = MagicMock(exit_code=0, stdout="", stderr="") + tool = create_run_bash_command_tool(sb) - toolset._run_bash_command("sleep 5", timeout=30) + tool.invoke(command="sleep 5", timeout=30) - mock_sandbox.commands.run.assert_called_once_with("sleep 5", timeout=30) + mock.commands.run.assert_called_once_with("sleep 5", timeout=30) - def test_run_bash_command_raises_when_no_sandbox(self): - toolset = _make_toolset() + def test_raises_when_no_sandbox(self): + sb = _make_sandbox() + tool = create_run_bash_command_tool(sb) with pytest.raises(RuntimeError, match="E2B sandbox is not running"): - toolset._run_bash_command("ls") - - def test_run_bash_command_wraps_sandbox_exception(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.commands.run.side_effect = Exception("timeout") + tool.invoke(command="ls") + def test_wraps_sandbox_exception(self): + sb, mock = _sandbox_with_mock() + mock.commands.run.side_effect = Exception("timeout") + tool = create_run_bash_command_tool(sb) with pytest.raises(RuntimeError, match="Failed to run bash command"): - toolset._run_bash_command("sleep 1000") + tool.invoke(command="sleep 1000") -class TestE2BSandboxToolsetReadFile: - def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: - toolset = _make_toolset() - mock_sandbox = _make_sandbox_mock() - toolset._sandbox = mock_sandbox - return toolset, mock_sandbox +# --------------------------------------------------------------------------- +# read_file tool behaviour +# --------------------------------------------------------------------------- + - def test_read_file_returns_string(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.read.return_value = "file content" +class TestReadFileTool: + def test_returns_string(self): + sb, mock = _sandbox_with_mock() + mock.files.read.return_value = "file content" + tool = create_read_file_tool(sb) - result = toolset._read_file("/some/file.txt") + result = tool.invoke(path="/some/file.txt") assert result == "file content" - mock_sandbox.files.read.assert_called_once_with("/some/file.txt") + mock.files.read.assert_called_once_with("/some/file.txt") - def test_read_file_decodes_bytes(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.read.return_value = b"binary content" + def test_decodes_bytes(self): + sb, mock = _sandbox_with_mock() + mock.files.read.return_value = b"binary content" + tool = create_read_file_tool(sb) - result = toolset._read_file("/binary.bin") + result = tool.invoke(path="/binary.bin") assert result == "binary content" - def test_read_file_raises_when_no_sandbox(self): - toolset = _make_toolset() + def test_raises_when_no_sandbox(self): + sb = _make_sandbox() + tool = create_read_file_tool(sb) with pytest.raises(RuntimeError, match="E2B sandbox is not running"): - toolset._read_file("/some/file.txt") - - def test_read_file_wraps_sandbox_exception(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.read.side_effect = Exception("file not found") + tool.invoke(path="/some/file.txt") + def test_wraps_sandbox_exception(self): + sb, mock = _sandbox_with_mock() + mock.files.read.side_effect = Exception("file not found") + tool = create_read_file_tool(sb) with pytest.raises(RuntimeError, match="Failed to read file"): - toolset._read_file("/nonexistent.txt") + tool.invoke(path="/nonexistent.txt") + +# --------------------------------------------------------------------------- +# write_file tool behaviour +# --------------------------------------------------------------------------- -class TestE2BSandboxToolsetWriteFile: - def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: - toolset = _make_toolset() - mock_sandbox = _make_sandbox_mock() - toolset._sandbox = mock_sandbox - return toolset, mock_sandbox - def test_write_file_returns_confirmation(self): - toolset, mock_sandbox = self._toolset_with_sandbox() +class TestWriteFileTool: + def test_returns_confirmation(self): + sb, mock = _sandbox_with_mock() + tool = create_write_file_tool(sb) - result = toolset._write_file("/output/result.txt", "hello") + result = tool.invoke(path="/output/result.txt", content="hello") assert "/output/result.txt" in result - mock_sandbox.files.write.assert_called_once_with("/output/result.txt", "hello") + mock.files.write.assert_called_once_with("/output/result.txt", "hello") - def test_write_file_raises_when_no_sandbox(self): - toolset = _make_toolset() + def test_raises_when_no_sandbox(self): + sb = _make_sandbox() + tool = create_write_file_tool(sb) with pytest.raises(RuntimeError, match="E2B sandbox is not running"): - toolset._write_file("/some/path.txt", "content") - - def test_write_file_wraps_sandbox_exception(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.write.side_effect = Exception("permission denied") + tool.invoke(path="/some/path.txt", content="content") + def test_wraps_sandbox_exception(self): + sb, mock = _sandbox_with_mock() + mock.files.write.side_effect = Exception("permission denied") + tool = create_write_file_tool(sb) with pytest.raises(RuntimeError, match="Failed to write file"): - toolset._write_file("/protected/file.txt", "data") + tool.invoke(path="/protected/file.txt", content="data") -class TestE2BSandboxToolsetListDirectory: - def _toolset_with_sandbox(self) -> tuple[E2BSandboxToolset, MagicMock]: - toolset = _make_toolset() - mock_sandbox = _make_sandbox_mock() - toolset._sandbox = mock_sandbox - return toolset, mock_sandbox +# --------------------------------------------------------------------------- +# list_directory tool behaviour +# --------------------------------------------------------------------------- + +class TestListDirectoryTool: def _make_entry(self, name: str, is_dir: bool = False) -> MagicMock: entry = MagicMock() entry.name = name entry.is_dir = is_dir return entry - def test_list_directory_returns_names(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.list.return_value = [ + def test_returns_names(self): + sb, mock = _sandbox_with_mock() + mock.files.list.return_value = [ self._make_entry("file.txt"), self._make_entry("subdir", is_dir=True), ] + tool = create_list_directory_tool(sb) - result = toolset._list_directory("/home/user") + result = tool.invoke(path="/home/user") assert "file.txt" in result assert "subdir/" in result - mock_sandbox.files.list.assert_called_once_with("/home/user") + mock.files.list.assert_called_once_with("/home/user") - def test_list_directory_empty(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.list.return_value = [] + def test_empty_directory(self): + sb, mock = _sandbox_with_mock() + mock.files.list.return_value = [] + tool = create_list_directory_tool(sb) - result = toolset._list_directory("/empty") + result = tool.invoke(path="/empty") assert result == "(empty directory)" - def test_list_directory_raises_when_no_sandbox(self): - toolset = _make_toolset() + def test_raises_when_no_sandbox(self): + sb = _make_sandbox() + tool = create_list_directory_tool(sb) with pytest.raises(RuntimeError, match="E2B sandbox is not running"): - toolset._list_directory("/home") - - def test_list_directory_wraps_sandbox_exception(self): - toolset, mock_sandbox = self._toolset_with_sandbox() - mock_sandbox.files.list.side_effect = Exception("not a directory") + tool.invoke(path="/home") + def test_wraps_sandbox_exception(self): + sb, mock = _sandbox_with_mock() + mock.files.list.side_effect = Exception("not a directory") + tool = create_list_directory_tool(sb) with pytest.raises(RuntimeError, match="Failed to list directory"): - toolset._list_directory("/nonexistent") - - -# --------------------------------------------------------------------------- -# Serialisation -# --------------------------------------------------------------------------- - - -class TestE2BSandboxToolsetSerialisation: - """Serialisation tests use env-var secrets (the only serialisable Secret type).""" - - def _make_env_toolset(self, **kwargs) -> E2BSandboxToolset: - defaults = { - "api_key": Secret.from_env_var("E2B_API_KEY"), - "sandbox_template": "base", - "timeout": 120, - "environment_vars": {}, - } - defaults.update(kwargs) - return E2BSandboxToolset(**defaults) - - def test_to_dict_contains_expected_keys(self): - toolset = self._make_env_toolset(sandbox_template="my-template", timeout=600) - data = toolset.to_dict() - - assert "type" in data - assert "data" in data - assert data["data"]["sandbox_template"] == "my-template" - assert data["data"]["timeout"] == 600 - - def test_to_dict_does_not_include_sandbox_instance(self): - toolset = self._make_env_toolset() - toolset._sandbox = _make_sandbox_mock() # simulate warm-up - data = toolset.to_dict() - - assert "_sandbox" not in data["data"] - assert "sandbox" not in data["data"] - - def test_from_dict_round_trip(self): - original = self._make_env_toolset( - sandbox_template="custom", - timeout=900, - environment_vars={"KEY": "value"}, - ) - data = original.to_dict() - restored = E2BSandboxToolset.from_dict(data) - - assert restored.sandbox_template == "custom" - assert restored.timeout == 900 - assert restored.environment_vars == {"KEY": "value"} - assert restored._sandbox is None # sandbox not restored - - def test_from_dict_creates_tools(self): - original = self._make_env_toolset() - data = original.to_dict() - restored = E2BSandboxToolset.from_dict(data) - - assert len(restored) == 4 - assert "run_bash_command" in restored - - def test_to_dict_type_is_qualified_class_name(self): - toolset = self._make_env_toolset() - data = toolset.to_dict() - assert "E2BSandboxToolset" in data["type"] + tool.invoke(path="/nonexistent") From 2a061a6ef24159956140844ca4ce48d4d5480dc7 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 08:45:58 +0000 Subject: [PATCH 05/10] Add E2B agent example script demonstrating shared sandbox across tools --- e2b_agent_example.py | 91 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 e2b_agent_example.py diff --git a/e2b_agent_example.py b/e2b_agent_example.py new file mode 100644 index 00000000..86014c27 --- /dev/null +++ b/e2b_agent_example.py @@ -0,0 +1,91 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Example: Haystack Agent with E2B sandbox tools. + +Demonstrates that all four tools (run_bash_command, read_file, write_file, +list_directory) share the same sandbox instance, so the agent can write a +file in one step and read it back / execute it in the next. + +Requirements: + pip install haystack-ai e2b openai + +Environment variables: + E2B_API_KEY – your E2B API key + OPENAI_API_KEY – your OpenAI API key (or swap the generator below) +""" + +import sys + +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.dataclasses import ChatMessage + +from haystack_experimental.components.agents import Agent +from haystack_experimental.tools.e2b import create_e2b_tools + +# --------------------------------------------------------------------------- +# Example queries that exercise cross-tool state sharing: +# 1. The agent writes a Python script to the sandbox filesystem. +# 2. It executes the script via bash and captures stdout. +# 3. It reads the output file back (or lists a directory) to verify results. +# --------------------------------------------------------------------------- +EXAMPLE_QUERIES = [ + # Simple: purely bash-based data wrangling + ( + "Generate the first 10 Fibonacci numbers using a bash one-liner " + "and show me the results." + ), + # Cross-tool: write → execute → read + ( + "Write a Python script to /tmp/primes.py that prints all prime numbers " + "up to 50, run it, and then read the file back so I can see both the " + "script and its output." + ), + # Multi-step: write → list → bash + ( + "Create a directory /tmp/workspace, write three small text files into it " + "with different content, list the directory to confirm they exist, and " + "then use bash to count the total number of words across all three files." + ), +] + + +def run(query: str, model: str = "gpt-4o-mini") -> None: + print("\n" + "=" * 70) + print(f"Query: {query}") + print("=" * 70) + + # One sandbox, four tools – all sharing the same live sandbox process. + sandbox, tools = create_e2b_tools() + + agent = Agent( + chat_generator=OpenAIChatGenerator(model=model), + tools=tools, + system_prompt=( + "You are a helpful coding assistant with access to a live Linux sandbox. " + "Use the available tools freely to explore, write files, and run commands. " + "All tools operate inside the same sandbox environment, so files written " + "with write_file are immediately available to run_bash_command and read_file." + ), + max_agent_steps=15, + ) + + try: + result = agent.run(messages=[ChatMessage.from_user(query)]) + print("\n--- Agent response ---") + print(result["last_message"].text) + finally: + # Always close the sandbox to release cloud resources. + sandbox.close() + + +if __name__ == "__main__": + # Run a specific query index (0/1/2) or all of them by default. + if len(sys.argv) > 1: + idx = int(sys.argv[1]) + run(EXAMPLE_QUERIES[idx]) + else: + for query in EXAMPLE_QUERIES: + run(query) From 4060320cb5314cddea9582d533a0c89b6077ad7b Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 08:57:27 +0000 Subject: [PATCH 06/10] Replace factory functions with Tool subclasses for E2B tools RunBashCommandTool, ReadFileTool, WriteFileTool, ListDirectoryTool now subclass haystack.tools.Tool directly. Users instantiate them with a shared E2BSandbox instance, mirroring how chat generators are passed to Agent. The create_e2b_tools() convenience function is kept and updated to use the new classes. --- e2b_agent_example.py | 18 +- haystack_experimental/tools/e2b/__init__.py | 16 +- .../tools/e2b/sandbox_toolset.py | 344 +++++++++++------- test/tools/e2b/test_sandbox_toolset.py | 105 +++--- 4 files changed, 291 insertions(+), 192 deletions(-) diff --git a/e2b_agent_example.py b/e2b_agent_example.py index 86014c27..c19a6f88 100644 --- a/e2b_agent_example.py +++ b/e2b_agent_example.py @@ -23,7 +23,13 @@ from haystack.dataclasses import ChatMessage from haystack_experimental.components.agents import Agent -from haystack_experimental.tools.e2b import create_e2b_tools +from haystack_experimental.tools.e2b import ( + E2BSandbox, + ListDirectoryTool, + ReadFileTool, + RunBashCommandTool, + WriteFileTool, +) # --------------------------------------------------------------------------- # Example queries that exercise cross-tool state sharing: @@ -57,8 +63,14 @@ def run(query: str, model: str = "gpt-4o-mini") -> None: print(f"Query: {query}") print("=" * 70) - # One sandbox, four tools – all sharing the same live sandbox process. - sandbox, tools = create_e2b_tools() + # One sandbox passed to each tool class – they all share the same live sandbox process. + sandbox = E2BSandbox() + tools = [ + RunBashCommandTool(sandbox=sandbox), + ReadFileTool(sandbox=sandbox), + WriteFileTool(sandbox=sandbox), + ListDirectoryTool(sandbox=sandbox), + ] agent = Agent( chat_generator=OpenAIChatGenerator(model=model), diff --git a/haystack_experimental/tools/e2b/__init__.py b/haystack_experimental/tools/e2b/__init__.py index be52e122..c5d1eef1 100644 --- a/haystack_experimental/tools/e2b/__init__.py +++ b/haystack_experimental/tools/e2b/__init__.py @@ -10,21 +10,21 @@ _import_structure = { "sandbox_toolset": [ "E2BSandbox", + "RunBashCommandTool", + "ReadFileTool", + "WriteFileTool", + "ListDirectoryTool", "create_e2b_tools", - "create_run_bash_command_tool", - "create_read_file_tool", - "create_write_file_tool", - "create_list_directory_tool", ] } if TYPE_CHECKING: from .sandbox_toolset import E2BSandbox as E2BSandbox + from .sandbox_toolset import ListDirectoryTool as ListDirectoryTool + from .sandbox_toolset import ReadFileTool as ReadFileTool + from .sandbox_toolset import RunBashCommandTool as RunBashCommandTool + from .sandbox_toolset import WriteFileTool as WriteFileTool from .sandbox_toolset import create_e2b_tools as create_e2b_tools - from .sandbox_toolset import create_list_directory_tool as create_list_directory_tool - from .sandbox_toolset import create_read_file_tool as create_read_file_tool - from .sandbox_toolset import create_run_bash_command_tool as create_run_bash_command_tool - from .sandbox_toolset import create_write_file_tool as create_write_file_tool else: sys.modules[__name__] = LazyImporter(name=__name__, module_file=__file__, import_structure=_import_structure) diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py index 09c5625a..5f5a12ea 100644 --- a/haystack_experimental/tools/e2b/sandbox_toolset.py +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -21,29 +21,36 @@ class E2BSandbox: """ Manages the lifecycle of an E2B cloud sandbox. - Use :func:`create_e2b_tools` to obtain both an ``E2BSandbox`` and the - individual :class:`~haystack.tools.Tool` objects at once, or instantiate - this class directly and pass it to the individual ``create_*_tool`` - factory functions to build a custom subset of tools. + Instantiate this class and pass it to one or more E2B tool classes + (``RunBashCommandTool``, ``ReadFileTool``, ``WriteFileTool``, + ``ListDirectoryTool``) to share a single sandbox environment across all + tools. All tools that receive the same ``E2BSandbox`` instance operate + inside the same live sandbox process. ### Usage example ```python from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage - from haystack.utils import Secret from haystack_experimental.components.agents import Agent - from haystack_experimental.tools.e2b import create_e2b_tools - - sandbox, tools = create_e2b_tools( - api_key=Secret.from_env_var("E2B_API_KEY"), - sandbox_template="base", - timeout=300, + from haystack_experimental.tools.e2b import ( + E2BSandbox, + RunBashCommandTool, + ReadFileTool, + WriteFileTool, + ListDirectoryTool, ) + + sandbox = E2BSandbox() agent = Agent( chat_generator=OpenAIChatGenerator(model="gpt-4o"), - tools=tools, + tools=[ + RunBashCommandTool(sandbox=sandbox), + ReadFileTool(sandbox=sandbox), + WriteFileTool(sandbox=sandbox), + ListDirectoryTool(sandbox=sandbox), + ], ) ``` @@ -155,7 +162,7 @@ def from_dict(cls, data: dict[str, Any]) -> "E2BSandbox": ) # ------------------------------------------------------------------ - # Internal helpers (used by the tool factories) + # Internal helpers (used by the tool classes) # ------------------------------------------------------------------ def _require_sandbox(self) -> "Sandbox": @@ -169,156 +176,215 @@ def _require_sandbox(self) -> "Sandbox": # --------------------------------------------------------------------------- -# Individual tool factories +# Tool classes # --------------------------------------------------------------------------- -def create_run_bash_command_tool(sandbox: E2BSandbox) -> Tool: +class RunBashCommandTool(Tool): """ - Create a ``run_bash_command`` :class:`~haystack.tools.Tool` bound to *sandbox*. + A :class:`~haystack.tools.Tool` that executes bash commands inside an E2B sandbox. - :param sandbox: The :class:`E2BSandbox` instance that will execute commands. - :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example + + ```python + sandbox = E2BSandbox() + bash_tool = RunBashCommandTool(sandbox=sandbox) + read_tool = ReadFileTool(sandbox=sandbox) + agent = Agent(chat_generator=..., tools=[bash_tool, read_tool]) + ``` """ - def run_bash_command(command: str, timeout: int = 60) -> str: - sb = sandbox._require_sandbox() - try: - result = sb.commands.run(command, timeout=timeout) - return f"exit_code: {result.exit_code}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" - except Exception as e: - raise RuntimeError(f"Failed to run bash command: {e}") from e - - return Tool( - name="run_bash_command", - description=( - "Execute a bash command inside the E2B sandbox and return the combined stdout, " - "stderr, and exit code. Use this to run shell scripts, install packages, compile " - "code, or perform any system-level operation." - ), - parameters={ - "type": "object", - "properties": { - "command": {"type": "string", "description": "The bash command to execute."}, - "timeout": { - "type": "integer", - "description": ( - "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." - ), + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance that will execute commands. + """ + + def run_bash_command(command: str, timeout: int = 60) -> str: + sb = sandbox._require_sandbox() + try: + result = sb.commands.run(command, timeout=timeout) + return f"exit_code: {result.exit_code}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" + except Exception as e: + raise RuntimeError(f"Failed to run bash command: {e}") from e + + super().__init__( + name="run_bash_command", + description=( + "Execute a bash command inside the E2B sandbox and return the combined stdout, " + "stderr, and exit code. Use this to run shell scripts, install packages, compile " + "code, or perform any system-level operation." + ), + parameters={ + "type": "object", + "properties": { + "command": {"type": "string", "description": "The bash command to execute."}, + "timeout": { + "type": "integer", + "description": ( + "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." + ), + }, }, + "required": ["command"], }, - "required": ["command"], - }, - function=run_bash_command, - ) + function=run_bash_command, + ) + self._e2b_sandbox = sandbox -def create_read_file_tool(sandbox: E2BSandbox) -> Tool: +class ReadFileTool(Tool): """ - Create a ``read_file`` :class:`~haystack.tools.Tool` bound to *sandbox*. + A :class:`~haystack.tools.Tool` that reads files from an E2B sandbox filesystem. - :param sandbox: The :class:`E2BSandbox` instance to read files from. - :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example + + ```python + sandbox = E2BSandbox() + read_tool = ReadFileTool(sandbox=sandbox) + agent = Agent(chat_generator=..., tools=[read_tool]) + ``` """ - def read_file(path: str) -> str: - sb = sandbox._require_sandbox() - try: - content = sb.files.read(path) - if isinstance(content, bytes): - return content.decode("utf-8", errors="replace") - return str(content) - except Exception as e: - raise RuntimeError(f"Failed to read file '{path}': {e}") from e - - return Tool( - name="read_file", - description=( - "Read the text content of a file from the E2B sandbox filesystem and return it " - "as a string. The file must exist; use list_directory to verify paths first." - ), - parameters={ - "type": "object", - "properties": {"path": {"type": "string", "description": "Absolute or relative path of the file to read."}}, - "required": ["path"], - }, - function=read_file, - ) + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance to read files from. + """ + def read_file(path: str) -> str: + sb = sandbox._require_sandbox() + try: + content = sb.files.read(path) + if isinstance(content, bytes): + return content.decode("utf-8", errors="replace") + return str(content) + except Exception as e: + raise RuntimeError(f"Failed to read file '{path}': {e}") from e + + super().__init__( + name="read_file", + description=( + "Read the text content of a file from the E2B sandbox filesystem and return it " + "as a string. The file must exist; use list_directory to verify paths first." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the file to read."} + }, + "required": ["path"], + }, + function=read_file, + ) + self._e2b_sandbox = sandbox -def create_write_file_tool(sandbox: E2BSandbox) -> Tool: + +class WriteFileTool(Tool): """ - Create a ``write_file`` :class:`~haystack.tools.Tool` bound to *sandbox*. + A :class:`~haystack.tools.Tool` that writes files to an E2B sandbox filesystem. + + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. - :param sandbox: The :class:`E2BSandbox` instance to write files to. - :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + ### Usage example + + ```python + sandbox = E2BSandbox() + write_tool = WriteFileTool(sandbox=sandbox) + agent = Agent(chat_generator=..., tools=[write_tool]) + ``` """ - def write_file(path: str, content: str) -> str: - sb = sandbox._require_sandbox() - try: - sb.files.write(path, content) - return f"File written successfully: {path}" - except Exception as e: - raise RuntimeError(f"Failed to write file '{path}': {e}") from e - - return Tool( - name="write_file", - description=( - "Write text content to a file in the E2B sandbox filesystem. " - "Parent directories are created automatically if they do not exist. " - "Existing files are overwritten." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, - "content": {"type": "string", "description": "Text content to write into the file."}, + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance to write files to. + """ + + def write_file(path: str, content: str) -> str: + sb = sandbox._require_sandbox() + try: + sb.files.write(path, content) + return f"File written successfully: {path}" + except Exception as e: + raise RuntimeError(f"Failed to write file '{path}': {e}") from e + + super().__init__( + name="write_file", + description=( + "Write text content to a file in the E2B sandbox filesystem. " + "Parent directories are created automatically if they do not exist. " + "Existing files are overwritten." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, + "content": {"type": "string", "description": "Text content to write into the file."}, + }, + "required": ["path", "content"], }, - "required": ["path", "content"], - }, - function=write_file, - ) + function=write_file, + ) + self._e2b_sandbox = sandbox -def create_list_directory_tool(sandbox: E2BSandbox) -> Tool: +class ListDirectoryTool(Tool): """ - Create a ``list_directory`` :class:`~haystack.tools.Tool` bound to *sandbox*. + A :class:`~haystack.tools.Tool` that lists directory contents in an E2B sandbox. + + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example - :param sandbox: The :class:`E2BSandbox` instance to list directories from. - :returns: A :class:`~haystack.tools.Tool` ready to be passed to an Agent. + ```python + sandbox = E2BSandbox() + list_tool = ListDirectoryTool(sandbox=sandbox) + agent = Agent(chat_generator=..., tools=[list_tool]) + ``` """ - def list_directory(path: str) -> str: - sb = sandbox._require_sandbox() - try: - entries = sb.files.list(path) - lines = [] - for entry in entries: - name = entry.name - if getattr(entry, "is_dir", False) or getattr(entry, "type", "") == "dir": - name = name + "/" - lines.append(name) - return "\n".join(lines) if lines else "(empty directory)" - except Exception as e: - raise RuntimeError(f"Failed to list directory '{path}': {e}") from e - - return Tool( - name="list_directory", - description=( - "List the files and subdirectories inside a directory in the E2B sandbox " - "filesystem. Returns a newline-separated list of names with a trailing '/' " - "appended to subdirectory names." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance to list directories from. + """ + + def list_directory(path: str) -> str: + sb = sandbox._require_sandbox() + try: + entries = sb.files.list(path) + lines = [] + for entry in entries: + name = entry.name + if getattr(entry, "is_dir", False) or getattr(entry, "type", "") == "dir": + name = name + "/" + lines.append(name) + return "\n".join(lines) if lines else "(empty directory)" + except Exception as e: + raise RuntimeError(f"Failed to list directory '{path}': {e}") from e + + super().__init__( + name="list_directory", + description=( + "List the files and subdirectories inside a directory in the E2B sandbox " + "filesystem. Returns a newline-separated list of names with a trailing '/' " + "appended to subdirectory names." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} + }, + "required": ["path"], }, - "required": ["path"], - }, - function=list_directory, - ) + function=list_directory, + ) + self._e2b_sandbox = sandbox # --------------------------------------------------------------------------- @@ -370,10 +436,10 @@ def create_e2b_tools( sandbox = E2BSandbox( api_key=api_key, sandbox_template=sandbox_template, timeout=timeout, environment_vars=environment_vars or {} ) - tools = [ - create_run_bash_command_tool(sandbox), - create_read_file_tool(sandbox), - create_write_file_tool(sandbox), - create_list_directory_tool(sandbox), + tools: list[Tool] = [ + RunBashCommandTool(sandbox=sandbox), + ReadFileTool(sandbox=sandbox), + WriteFileTool(sandbox=sandbox), + ListDirectoryTool(sandbox=sandbox), ] return sandbox, tools diff --git a/test/tools/e2b/test_sandbox_toolset.py b/test/tools/e2b/test_sandbox_toolset.py index e3cde290..e1b71f28 100644 --- a/test/tools/e2b/test_sandbox_toolset.py +++ b/test/tools/e2b/test_sandbox_toolset.py @@ -5,15 +5,16 @@ from unittest.mock import MagicMock, patch import pytest +from haystack.tools.errors import ToolInvocationError from haystack.utils import Secret from haystack_experimental.tools.e2b.sandbox_toolset import ( E2BSandbox, + ListDirectoryTool, + ReadFileTool, + RunBashCommandTool, + WriteFileTool, create_e2b_tools, - create_list_directory_tool, - create_read_file_tool, - create_run_bash_command_tool, - create_write_file_tool, ) @@ -201,49 +202,62 @@ def test_to_dict_type_is_qualified_class_name(self): # --------------------------------------------------------------------------- -# Individual tool factories – structure +# Tool classes – structure # --------------------------------------------------------------------------- -class TestToolFactories: - def test_create_run_bash_command_tool_name_and_schema(self): +class TestToolClasses: + def test_run_bash_command_tool_name_and_schema(self): sb = _make_sandbox() - tool = create_run_bash_command_tool(sb) + tool = RunBashCommandTool(sandbox=sb) assert tool.name == "run_bash_command" assert tool.description assert "command" in tool.parameters["required"] - def test_create_read_file_tool_name_and_schema(self): + def test_read_file_tool_name_and_schema(self): sb = _make_sandbox() - tool = create_read_file_tool(sb) + tool = ReadFileTool(sandbox=sb) assert tool.name == "read_file" assert tool.description assert "path" in tool.parameters["required"] - def test_create_write_file_tool_name_and_schema(self): + def test_write_file_tool_name_and_schema(self): sb = _make_sandbox() - tool = create_write_file_tool(sb) + tool = WriteFileTool(sandbox=sb) assert tool.name == "write_file" assert tool.description assert "path" in tool.parameters["required"] assert "content" in tool.parameters["required"] - def test_create_list_directory_tool_name_and_schema(self): + def test_list_directory_tool_name_and_schema(self): sb = _make_sandbox() - tool = create_list_directory_tool(sb) + tool = ListDirectoryTool(sandbox=sb) assert tool.name == "list_directory" assert tool.description assert "path" in tool.parameters["required"] + def test_tool_stores_sandbox_reference(self): + sb = _make_sandbox() + tool = RunBashCommandTool(sandbox=sb) + assert tool._e2b_sandbox is sb + def test_create_e2b_tools_returns_four_tools(self): sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) assert len(tools) == 4 names = {t.name for t in tools} assert names == {"run_bash_command", "read_file", "write_file", "list_directory"} + def test_create_e2b_tools_returns_correct_types(self): + sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) + tool_types = {type(t) for t in tools} + assert tool_types == {RunBashCommandTool, ReadFileTool, WriteFileTool, ListDirectoryTool} + def test_create_e2b_tools_shares_same_sandbox(self): sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) - # Inject a mock sandbox to verify the tools reference the same E2BSandbox + # All tools must reference the same E2BSandbox instance + assert all(t._e2b_sandbox is sb for t in tools) + + # Inject a mock and verify the tool actually calls through it mock = _make_sandbox_mock() mock.commands.run.return_value = MagicMock(exit_code=0, stdout="ok", stderr="") sb._sandbox = mock @@ -264,9 +278,16 @@ def test_create_e2b_tools_default_api_key(self): sb, _ = create_e2b_tools() assert sb.api_key is not None + def test_tools_from_same_sandbox_share_state(self): + """Tools instantiated with the same sandbox share state.""" + sb = _make_sandbox() + bash_tool = RunBashCommandTool(sandbox=sb) + read_tool = ReadFileTool(sandbox=sb) + assert bash_tool._e2b_sandbox is read_tool._e2b_sandbox + # --------------------------------------------------------------------------- -# run_bash_command tool behaviour +# RunBashCommandTool behaviour # --------------------------------------------------------------------------- @@ -275,7 +296,7 @@ def test_returns_formatted_output(self): sb, mock = _sandbox_with_mock() mock_result = MagicMock(exit_code=0, stdout="hello world\n", stderr="") mock.commands.run.return_value = mock_result - tool = create_run_bash_command_tool(sb) + tool = RunBashCommandTool(sandbox=sb) output = tool.invoke(command="echo hello world") @@ -286,7 +307,7 @@ def test_returns_formatted_output(self): def test_passes_custom_timeout(self): sb, mock = _sandbox_with_mock() mock.commands.run.return_value = MagicMock(exit_code=0, stdout="", stderr="") - tool = create_run_bash_command_tool(sb) + tool = RunBashCommandTool(sandbox=sb) tool.invoke(command="sleep 5", timeout=30) @@ -294,20 +315,20 @@ def test_passes_custom_timeout(self): def test_raises_when_no_sandbox(self): sb = _make_sandbox() - tool = create_run_bash_command_tool(sb) - with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + tool = RunBashCommandTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="E2B sandbox is not running"): tool.invoke(command="ls") def test_wraps_sandbox_exception(self): sb, mock = _sandbox_with_mock() mock.commands.run.side_effect = Exception("timeout") - tool = create_run_bash_command_tool(sb) - with pytest.raises(RuntimeError, match="Failed to run bash command"): + tool = RunBashCommandTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="Failed to run bash command"): tool.invoke(command="sleep 1000") # --------------------------------------------------------------------------- -# read_file tool behaviour +# ReadFileTool behaviour # --------------------------------------------------------------------------- @@ -315,7 +336,7 @@ class TestReadFileTool: def test_returns_string(self): sb, mock = _sandbox_with_mock() mock.files.read.return_value = "file content" - tool = create_read_file_tool(sb) + tool = ReadFileTool(sandbox=sb) result = tool.invoke(path="/some/file.txt") @@ -325,7 +346,7 @@ def test_returns_string(self): def test_decodes_bytes(self): sb, mock = _sandbox_with_mock() mock.files.read.return_value = b"binary content" - tool = create_read_file_tool(sb) + tool = ReadFileTool(sandbox=sb) result = tool.invoke(path="/binary.bin") @@ -333,27 +354,27 @@ def test_decodes_bytes(self): def test_raises_when_no_sandbox(self): sb = _make_sandbox() - tool = create_read_file_tool(sb) - with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + tool = ReadFileTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="E2B sandbox is not running"): tool.invoke(path="/some/file.txt") def test_wraps_sandbox_exception(self): sb, mock = _sandbox_with_mock() mock.files.read.side_effect = Exception("file not found") - tool = create_read_file_tool(sb) - with pytest.raises(RuntimeError, match="Failed to read file"): + tool = ReadFileTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="Failed to read file"): tool.invoke(path="/nonexistent.txt") # --------------------------------------------------------------------------- -# write_file tool behaviour +# WriteFileTool behaviour # --------------------------------------------------------------------------- class TestWriteFileTool: def test_returns_confirmation(self): sb, mock = _sandbox_with_mock() - tool = create_write_file_tool(sb) + tool = WriteFileTool(sandbox=sb) result = tool.invoke(path="/output/result.txt", content="hello") @@ -362,20 +383,20 @@ def test_returns_confirmation(self): def test_raises_when_no_sandbox(self): sb = _make_sandbox() - tool = create_write_file_tool(sb) - with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + tool = WriteFileTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="E2B sandbox is not running"): tool.invoke(path="/some/path.txt", content="content") def test_wraps_sandbox_exception(self): sb, mock = _sandbox_with_mock() mock.files.write.side_effect = Exception("permission denied") - tool = create_write_file_tool(sb) - with pytest.raises(RuntimeError, match="Failed to write file"): + tool = WriteFileTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="Failed to write file"): tool.invoke(path="/protected/file.txt", content="data") # --------------------------------------------------------------------------- -# list_directory tool behaviour +# ListDirectoryTool behaviour # --------------------------------------------------------------------------- @@ -392,7 +413,7 @@ def test_returns_names(self): self._make_entry("file.txt"), self._make_entry("subdir", is_dir=True), ] - tool = create_list_directory_tool(sb) + tool = ListDirectoryTool(sandbox=sb) result = tool.invoke(path="/home/user") @@ -403,7 +424,7 @@ def test_returns_names(self): def test_empty_directory(self): sb, mock = _sandbox_with_mock() mock.files.list.return_value = [] - tool = create_list_directory_tool(sb) + tool = ListDirectoryTool(sandbox=sb) result = tool.invoke(path="/empty") @@ -411,13 +432,13 @@ def test_empty_directory(self): def test_raises_when_no_sandbox(self): sb = _make_sandbox() - tool = create_list_directory_tool(sb) - with pytest.raises(RuntimeError, match="E2B sandbox is not running"): + tool = ListDirectoryTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="E2B sandbox is not running"): tool.invoke(path="/home") def test_wraps_sandbox_exception(self): sb, mock = _sandbox_with_mock() mock.files.list.side_effect = Exception("not a directory") - tool = create_list_directory_tool(sb) - with pytest.raises(RuntimeError, match="Failed to list directory"): + tool = ListDirectoryTool(sandbox=sb) + with pytest.raises(ToolInvocationError, match="Failed to list directory"): tool.invoke(path="/nonexistent") From 4e2491e570034adc93b7ab7d4a77c00c7bbc3d1f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 08:58:03 +0000 Subject: [PATCH 07/10] Move e2b_agent_example.py into haystack_experimental/tools/e2b/ --- .../tools/e2b/e2b_agent_example.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename e2b_agent_example.py => haystack_experimental/tools/e2b/e2b_agent_example.py (100%) diff --git a/e2b_agent_example.py b/haystack_experimental/tools/e2b/e2b_agent_example.py similarity index 100% rename from e2b_agent_example.py rename to haystack_experimental/tools/e2b/e2b_agent_example.py From 6dce4b39310db354fbade2f4058a14a9e868aed2 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 11 Mar 2026 09:03:08 +0000 Subject: [PATCH 08/10] Split E2B tools into dedicated files per class - e2b_sandbox.py: E2BSandbox - bash_tool.py: RunBashCommandTool - read_file_tool.py: ReadFileTool - write_file_tool.py: WriteFileTool - list_directory_tool.py: ListDirectoryTool - sandbox_toolset.py: create_e2b_tools (convenience function only) --- haystack_experimental/tools/e2b/__init__.py | 24 +- haystack_experimental/tools/e2b/bash_tool.py | 68 +++ .../tools/e2b/e2b_sandbox.py | 173 ++++++++ .../tools/e2b/list_directory_tool.py | 62 +++ .../tools/e2b/read_file_tool.py | 57 +++ .../tools/e2b/sandbox_toolset.py | 394 +----------------- .../tools/e2b/write_file_tool.py | 57 +++ test/tools/e2b/test_sandbox_toolset.py | 30 +- 8 files changed, 449 insertions(+), 416 deletions(-) create mode 100644 haystack_experimental/tools/e2b/bash_tool.py create mode 100644 haystack_experimental/tools/e2b/e2b_sandbox.py create mode 100644 haystack_experimental/tools/e2b/list_directory_tool.py create mode 100644 haystack_experimental/tools/e2b/read_file_tool.py create mode 100644 haystack_experimental/tools/e2b/write_file_tool.py diff --git a/haystack_experimental/tools/e2b/__init__.py b/haystack_experimental/tools/e2b/__init__.py index c5d1eef1..e8392743 100644 --- a/haystack_experimental/tools/e2b/__init__.py +++ b/haystack_experimental/tools/e2b/__init__.py @@ -8,23 +8,21 @@ from lazy_imports import LazyImporter _import_structure = { - "sandbox_toolset": [ - "E2BSandbox", - "RunBashCommandTool", - "ReadFileTool", - "WriteFileTool", - "ListDirectoryTool", - "create_e2b_tools", - ] + "e2b_sandbox": ["E2BSandbox"], + "bash_tool": ["RunBashCommandTool"], + "read_file_tool": ["ReadFileTool"], + "write_file_tool": ["WriteFileTool"], + "list_directory_tool": ["ListDirectoryTool"], + "sandbox_toolset": ["create_e2b_tools"], } if TYPE_CHECKING: - from .sandbox_toolset import E2BSandbox as E2BSandbox - from .sandbox_toolset import ListDirectoryTool as ListDirectoryTool - from .sandbox_toolset import ReadFileTool as ReadFileTool - from .sandbox_toolset import RunBashCommandTool as RunBashCommandTool - from .sandbox_toolset import WriteFileTool as WriteFileTool + from .bash_tool import RunBashCommandTool as RunBashCommandTool + from .e2b_sandbox import E2BSandbox as E2BSandbox + from .list_directory_tool import ListDirectoryTool as ListDirectoryTool + from .read_file_tool import ReadFileTool as ReadFileTool from .sandbox_toolset import create_e2b_tools as create_e2b_tools + from .write_file_tool import WriteFileTool as WriteFileTool else: sys.modules[__name__] = LazyImporter(name=__name__, module_file=__file__, import_structure=_import_structure) diff --git a/haystack_experimental/tools/e2b/bash_tool.py b/haystack_experimental/tools/e2b/bash_tool.py new file mode 100644 index 00000000..e3e3491f --- /dev/null +++ b/haystack_experimental/tools/e2b/bash_tool.py @@ -0,0 +1,68 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.tools import Tool + +from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox + + +class RunBashCommandTool(Tool): + """ + A :class:`~haystack.tools.Tool` that executes bash commands inside an E2B sandbox. + + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example + + ```python + from haystack_experimental.tools.e2b import E2BSandbox, RunBashCommandTool, ReadFileTool + + sandbox = E2BSandbox() + agent = Agent( + chat_generator=..., + tools=[ + RunBashCommandTool(sandbox=sandbox), + ReadFileTool(sandbox=sandbox), + ], + ) + ``` + """ + + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance that will execute commands. + """ + + def run_bash_command(command: str, timeout: int = 60) -> str: + sb = sandbox._require_sandbox() + try: + result = sb.commands.run(command, timeout=timeout) + return f"exit_code: {result.exit_code}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" + except Exception as e: + raise RuntimeError(f"Failed to run bash command: {e}") from e + + super().__init__( + name="run_bash_command", + description=( + "Execute a bash command inside the E2B sandbox and return the combined stdout, " + "stderr, and exit code. Use this to run shell scripts, install packages, compile " + "code, or perform any system-level operation." + ), + parameters={ + "type": "object", + "properties": { + "command": {"type": "string", "description": "The bash command to execute."}, + "timeout": { + "type": "integer", + "description": ( + "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." + ), + }, + }, + "required": ["command"], + }, + function=run_bash_command, + ) + self._e2b_sandbox = sandbox diff --git a/haystack_experimental/tools/e2b/e2b_sandbox.py b/haystack_experimental/tools/e2b/e2b_sandbox.py new file mode 100644 index 00000000..710f83c1 --- /dev/null +++ b/haystack_experimental/tools/e2b/e2b_sandbox.py @@ -0,0 +1,173 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from dataclasses import dataclass, field +from typing import Any + +from haystack import logging +from haystack.lazy_imports import LazyImport +from haystack.utils import Secret, deserialize_secrets_inplace + +with LazyImport(message="Run 'pip install e2b'") as e2b_import: + from e2b import Sandbox + +logger = logging.getLogger(__name__) + + +@dataclass +class E2BSandbox: + """ + Manages the lifecycle of an E2B cloud sandbox. + + Instantiate this class and pass it to one or more E2B tool classes + (``RunBashCommandTool``, ``ReadFileTool``, ``WriteFileTool``, + ``ListDirectoryTool``) to share a single sandbox environment across all + tools. All tools that receive the same ``E2BSandbox`` instance operate + inside the same live sandbox process. + + ### Usage example + + ```python + from haystack.components.generators.chat import OpenAIChatGenerator + + from haystack_experimental.components.agents import Agent + from haystack_experimental.tools.e2b import ( + E2BSandbox, + RunBashCommandTool, + ReadFileTool, + WriteFileTool, + ListDirectoryTool, + ) + + sandbox = E2BSandbox() + agent = Agent( + chat_generator=OpenAIChatGenerator(model="gpt-4o"), + tools=[ + RunBashCommandTool(sandbox=sandbox), + ReadFileTool(sandbox=sandbox), + WriteFileTool(sandbox=sandbox), + ListDirectoryTool(sandbox=sandbox), + ], + ) + ``` + + Lifecycle is handled automatically by the Agent's pipeline. If you use the + tools standalone, call :meth:`warm_up` before the first tool invocation: + + ```python + sandbox.warm_up() + # … use tools … + sandbox.close() + ``` + """ + + api_key: Secret = field(default_factory=lambda: Secret.from_env_var("E2B_API_KEY")) + sandbox_template: str = field(default="base") + timeout: int = field(default=300) + environment_vars: dict[str, str] = field(default_factory=dict) + + # Private – not serialised + _sandbox: Any = field(default=None, init=False, repr=False, compare=False) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + def warm_up(self) -> None: + """ + Establish the connection to the E2B sandbox. + + Idempotent – calling it multiple times has no effect if the sandbox is + already running. + + :raises RuntimeError: If the E2B sandbox cannot be created. + """ + if self._sandbox is not None: + return + + e2b_import.check() + resolved_key = self.api_key.resolve_value() + try: + logger.info( + "Starting E2B sandbox (template={template}, timeout={timeout}s)", + template=self.sandbox_template, + timeout=self.timeout, + ) + self._sandbox = Sandbox( + api_key=resolved_key, + template=self.sandbox_template, + timeout=self.timeout, + envs=self.environment_vars if self.environment_vars else None, + ) + logger.info("E2B sandbox started (id={sandbox_id})", sandbox_id=self._sandbox.sandbox_id) + except Exception as e: + raise RuntimeError(f"Failed to start E2B sandbox: {e}") from e + + def close(self) -> None: + """ + Shut down the E2B sandbox and release all associated resources. + + Call this when you are done to avoid leaving idle sandboxes running. + """ + if self._sandbox is None: + return + try: + self._sandbox.kill() + logger.info("E2B sandbox closed") + except Exception as e: + logger.warning("Failed to close E2B sandbox: {error}", error=e) + finally: + self._sandbox = None + + # ------------------------------------------------------------------ + # Serialisation + # ------------------------------------------------------------------ + + def to_dict(self) -> dict[str, Any]: + """ + Serialize the sandbox configuration to a dictionary. + + :returns: Dictionary containing the serialised configuration. + """ + from haystack.core.serialization import generate_qualified_class_name + + return { + "type": generate_qualified_class_name(type(self)), + "data": { + "api_key": self.api_key.to_dict(), + "sandbox_template": self.sandbox_template, + "timeout": self.timeout, + "environment_vars": self.environment_vars, + }, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "E2BSandbox": + """ + Deserialize an :class:`E2BSandbox` from a dictionary. + + :param data: Dictionary created by :meth:`to_dict`. + :returns: A new :class:`E2BSandbox` instance ready to be warmed up. + """ + inner = data["data"] + deserialize_secrets_inplace(inner, keys=["api_key"]) + return cls( + api_key=inner["api_key"], + sandbox_template=inner.get("sandbox_template", "base"), + timeout=inner.get("timeout", 300), + environment_vars=inner.get("environment_vars", {}), + ) + + # ------------------------------------------------------------------ + # Internal helpers (used by the tool classes) + # ------------------------------------------------------------------ + + def _require_sandbox(self) -> "Sandbox": + """Return the active sandbox or raise a helpful error.""" + if self._sandbox is None: + raise RuntimeError( + "E2B sandbox is not running. Call warm_up() before using the tools, " + "or add the sandbox to a Haystack pipeline/agent which calls warm_up() automatically." + ) + return self._sandbox diff --git a/haystack_experimental/tools/e2b/list_directory_tool.py b/haystack_experimental/tools/e2b/list_directory_tool.py new file mode 100644 index 00000000..0081761d --- /dev/null +++ b/haystack_experimental/tools/e2b/list_directory_tool.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.tools import Tool + +from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox + + +class ListDirectoryTool(Tool): + """ + A :class:`~haystack.tools.Tool` that lists directory contents in an E2B sandbox. + + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example + + ```python + from haystack_experimental.tools.e2b import E2BSandbox, ListDirectoryTool + + sandbox = E2BSandbox() + agent = Agent(chat_generator=..., tools=[ListDirectoryTool(sandbox=sandbox)]) + ``` + """ + + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance to list directories from. + """ + + def list_directory(path: str) -> str: + sb = sandbox._require_sandbox() + try: + entries = sb.files.list(path) + lines = [] + for entry in entries: + name = entry.name + if getattr(entry, "is_dir", False) or getattr(entry, "type", "") == "dir": + name = name + "/" + lines.append(name) + return "\n".join(lines) if lines else "(empty directory)" + except Exception as e: + raise RuntimeError(f"Failed to list directory '{path}': {e}") from e + + super().__init__( + name="list_directory", + description=( + "List the files and subdirectories inside a directory in the E2B sandbox " + "filesystem. Returns a newline-separated list of names with a trailing '/' " + "appended to subdirectory names." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} + }, + "required": ["path"], + }, + function=list_directory, + ) + self._e2b_sandbox = sandbox diff --git a/haystack_experimental/tools/e2b/read_file_tool.py b/haystack_experimental/tools/e2b/read_file_tool.py new file mode 100644 index 00000000..ce07924a --- /dev/null +++ b/haystack_experimental/tools/e2b/read_file_tool.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.tools import Tool + +from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox + + +class ReadFileTool(Tool): + """ + A :class:`~haystack.tools.Tool` that reads files from an E2B sandbox filesystem. + + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example + + ```python + from haystack_experimental.tools.e2b import E2BSandbox, ReadFileTool + + sandbox = E2BSandbox() + agent = Agent(chat_generator=..., tools=[ReadFileTool(sandbox=sandbox)]) + ``` + """ + + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance to read files from. + """ + + def read_file(path: str) -> str: + sb = sandbox._require_sandbox() + try: + content = sb.files.read(path) + if isinstance(content, bytes): + return content.decode("utf-8", errors="replace") + return str(content) + except Exception as e: + raise RuntimeError(f"Failed to read file '{path}': {e}") from e + + super().__init__( + name="read_file", + description=( + "Read the text content of a file from the E2B sandbox filesystem and return it " + "as a string. The file must exist; use list_directory to verify paths first." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the file to read."} + }, + "required": ["path"], + }, + function=read_file, + ) + self._e2b_sandbox = sandbox diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py index 5f5a12ea..ef35d768 100644 --- a/haystack_experimental/tools/e2b/sandbox_toolset.py +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -2,394 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 -from dataclasses import dataclass, field -from typing import Any - -from haystack import logging -from haystack.lazy_imports import LazyImport from haystack.tools import Tool -from haystack.utils import Secret, deserialize_secrets_inplace - -with LazyImport(message="Run 'pip install e2b'") as e2b_import: - from e2b import Sandbox - -logger = logging.getLogger(__name__) - - -@dataclass -class E2BSandbox: - """ - Manages the lifecycle of an E2B cloud sandbox. - - Instantiate this class and pass it to one or more E2B tool classes - (``RunBashCommandTool``, ``ReadFileTool``, ``WriteFileTool``, - ``ListDirectoryTool``) to share a single sandbox environment across all - tools. All tools that receive the same ``E2BSandbox`` instance operate - inside the same live sandbox process. - - ### Usage example - - ```python - from haystack.components.generators.chat import OpenAIChatGenerator - from haystack.dataclasses import ChatMessage - - from haystack_experimental.components.agents import Agent - from haystack_experimental.tools.e2b import ( - E2BSandbox, - RunBashCommandTool, - ReadFileTool, - WriteFileTool, - ListDirectoryTool, - ) - - sandbox = E2BSandbox() - agent = Agent( - chat_generator=OpenAIChatGenerator(model="gpt-4o"), - tools=[ - RunBashCommandTool(sandbox=sandbox), - ReadFileTool(sandbox=sandbox), - WriteFileTool(sandbox=sandbox), - ListDirectoryTool(sandbox=sandbox), - ], - ) - ``` - - Lifecycle is handled automatically by the Agent's pipeline. If you use the - tools standalone, call :meth:`warm_up` before the first tool invocation: - - ```python - sandbox.warm_up() - # … use tools … - sandbox.close() - ``` - """ - - api_key: Secret = field(default_factory=lambda: Secret.from_env_var("E2B_API_KEY")) - sandbox_template: str = field(default="base") - timeout: int = field(default=300) - environment_vars: dict[str, str] = field(default_factory=dict) - - # Private – not serialised - _sandbox: Any = field(default=None, init=False, repr=False, compare=False) - - # ------------------------------------------------------------------ - # Lifecycle - # ------------------------------------------------------------------ - - def warm_up(self) -> None: - """ - Establish the connection to the E2B sandbox. - - Idempotent – calling it multiple times has no effect if the sandbox is - already running. - - :raises RuntimeError: If the E2B sandbox cannot be created. - """ - if self._sandbox is not None: - return - - e2b_import.check() - resolved_key = self.api_key.resolve_value() - try: - logger.info( - "Starting E2B sandbox (template={template}, timeout={timeout}s)", - template=self.sandbox_template, - timeout=self.timeout, - ) - self._sandbox = Sandbox( - api_key=resolved_key, - template=self.sandbox_template, - timeout=self.timeout, - envs=self.environment_vars if self.environment_vars else None, - ) - logger.info("E2B sandbox started (id={sandbox_id})", sandbox_id=self._sandbox.sandbox_id) - except Exception as e: - raise RuntimeError(f"Failed to start E2B sandbox: {e}") from e - - def close(self) -> None: - """ - Shut down the E2B sandbox and release all associated resources. - - Call this when you are done to avoid leaving idle sandboxes running. - """ - if self._sandbox is None: - return - try: - self._sandbox.kill() - logger.info("E2B sandbox closed") - except Exception as e: - logger.warning("Failed to close E2B sandbox: {error}", error=e) - finally: - self._sandbox = None - - # ------------------------------------------------------------------ - # Serialisation - # ------------------------------------------------------------------ - - def to_dict(self) -> dict[str, Any]: - """ - Serialize the sandbox configuration to a dictionary. - - :returns: Dictionary containing the serialised configuration. - """ - from haystack.core.serialization import generate_qualified_class_name - - return { - "type": generate_qualified_class_name(type(self)), - "data": { - "api_key": self.api_key.to_dict(), - "sandbox_template": self.sandbox_template, - "timeout": self.timeout, - "environment_vars": self.environment_vars, - }, - } - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> "E2BSandbox": - """ - Deserialize an :class:`E2BSandbox` from a dictionary. - - :param data: Dictionary created by :meth:`to_dict`. - :returns: A new :class:`E2BSandbox` instance ready to be warmed up. - """ - inner = data["data"] - deserialize_secrets_inplace(inner, keys=["api_key"]) - return cls( - api_key=inner["api_key"], - sandbox_template=inner.get("sandbox_template", "base"), - timeout=inner.get("timeout", 300), - environment_vars=inner.get("environment_vars", {}), - ) - - # ------------------------------------------------------------------ - # Internal helpers (used by the tool classes) - # ------------------------------------------------------------------ - - def _require_sandbox(self) -> "Sandbox": - """Return the active sandbox or raise a helpful error.""" - if self._sandbox is None: - raise RuntimeError( - "E2B sandbox is not running. Call warm_up() before using the tools, " - "or add the sandbox to a Haystack pipeline/agent which calls warm_up() automatically." - ) - return self._sandbox - - -# --------------------------------------------------------------------------- -# Tool classes -# --------------------------------------------------------------------------- - - -class RunBashCommandTool(Tool): - """ - A :class:`~haystack.tools.Tool` that executes bash commands inside an E2B sandbox. - - Pass the same :class:`E2BSandbox` instance to multiple tool classes so they - all operate in the same live sandbox environment. - - ### Usage example - - ```python - sandbox = E2BSandbox() - bash_tool = RunBashCommandTool(sandbox=sandbox) - read_tool = ReadFileTool(sandbox=sandbox) - agent = Agent(chat_generator=..., tools=[bash_tool, read_tool]) - ``` - """ - - def __init__(self, sandbox: E2BSandbox) -> None: - """ - :param sandbox: The :class:`E2BSandbox` instance that will execute commands. - """ - - def run_bash_command(command: str, timeout: int = 60) -> str: - sb = sandbox._require_sandbox() - try: - result = sb.commands.run(command, timeout=timeout) - return f"exit_code: {result.exit_code}\nstdout:\n{result.stdout}\nstderr:\n{result.stderr}" - except Exception as e: - raise RuntimeError(f"Failed to run bash command: {e}") from e - - super().__init__( - name="run_bash_command", - description=( - "Execute a bash command inside the E2B sandbox and return the combined stdout, " - "stderr, and exit code. Use this to run shell scripts, install packages, compile " - "code, or perform any system-level operation." - ), - parameters={ - "type": "object", - "properties": { - "command": {"type": "string", "description": "The bash command to execute."}, - "timeout": { - "type": "integer", - "description": ( - "Maximum number of seconds to wait for the command to finish. Defaults to 60 seconds." - ), - }, - }, - "required": ["command"], - }, - function=run_bash_command, - ) - self._e2b_sandbox = sandbox - - -class ReadFileTool(Tool): - """ - A :class:`~haystack.tools.Tool` that reads files from an E2B sandbox filesystem. - - Pass the same :class:`E2BSandbox` instance to multiple tool classes so they - all operate in the same live sandbox environment. - - ### Usage example - - ```python - sandbox = E2BSandbox() - read_tool = ReadFileTool(sandbox=sandbox) - agent = Agent(chat_generator=..., tools=[read_tool]) - ``` - """ - - def __init__(self, sandbox: E2BSandbox) -> None: - """ - :param sandbox: The :class:`E2BSandbox` instance to read files from. - """ - - def read_file(path: str) -> str: - sb = sandbox._require_sandbox() - try: - content = sb.files.read(path) - if isinstance(content, bytes): - return content.decode("utf-8", errors="replace") - return str(content) - except Exception as e: - raise RuntimeError(f"Failed to read file '{path}': {e}") from e - - super().__init__( - name="read_file", - description=( - "Read the text content of a file from the E2B sandbox filesystem and return it " - "as a string. The file must exist; use list_directory to verify paths first." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the file to read."} - }, - "required": ["path"], - }, - function=read_file, - ) - self._e2b_sandbox = sandbox - - -class WriteFileTool(Tool): - """ - A :class:`~haystack.tools.Tool` that writes files to an E2B sandbox filesystem. - - Pass the same :class:`E2BSandbox` instance to multiple tool classes so they - all operate in the same live sandbox environment. - - ### Usage example - - ```python - sandbox = E2BSandbox() - write_tool = WriteFileTool(sandbox=sandbox) - agent = Agent(chat_generator=..., tools=[write_tool]) - ``` - """ - - def __init__(self, sandbox: E2BSandbox) -> None: - """ - :param sandbox: The :class:`E2BSandbox` instance to write files to. - """ - - def write_file(path: str, content: str) -> str: - sb = sandbox._require_sandbox() - try: - sb.files.write(path, content) - return f"File written successfully: {path}" - except Exception as e: - raise RuntimeError(f"Failed to write file '{path}': {e}") from e - - super().__init__( - name="write_file", - description=( - "Write text content to a file in the E2B sandbox filesystem. " - "Parent directories are created automatically if they do not exist. " - "Existing files are overwritten." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, - "content": {"type": "string", "description": "Text content to write into the file."}, - }, - "required": ["path", "content"], - }, - function=write_file, - ) - self._e2b_sandbox = sandbox - - -class ListDirectoryTool(Tool): - """ - A :class:`~haystack.tools.Tool` that lists directory contents in an E2B sandbox. - - Pass the same :class:`E2BSandbox` instance to multiple tool classes so they - all operate in the same live sandbox environment. - - ### Usage example - - ```python - sandbox = E2BSandbox() - list_tool = ListDirectoryTool(sandbox=sandbox) - agent = Agent(chat_generator=..., tools=[list_tool]) - ``` - """ - - def __init__(self, sandbox: E2BSandbox) -> None: - """ - :param sandbox: The :class:`E2BSandbox` instance to list directories from. - """ - - def list_directory(path: str) -> str: - sb = sandbox._require_sandbox() - try: - entries = sb.files.list(path) - lines = [] - for entry in entries: - name = entry.name - if getattr(entry, "is_dir", False) or getattr(entry, "type", "") == "dir": - name = name + "/" - lines.append(name) - return "\n".join(lines) if lines else "(empty directory)" - except Exception as e: - raise RuntimeError(f"Failed to list directory '{path}': {e}") from e - - super().__init__( - name="list_directory", - description=( - "List the files and subdirectories inside a directory in the E2B sandbox " - "filesystem. Returns a newline-separated list of names with a trailing '/' " - "appended to subdirectory names." - ), - parameters={ - "type": "object", - "properties": { - "path": {"type": "string", "description": "Absolute or relative path of the directory to list."} - }, - "required": ["path"], - }, - function=list_directory, - ) - self._e2b_sandbox = sandbox - +from haystack.utils import Secret -# --------------------------------------------------------------------------- -# Convenience factory -# --------------------------------------------------------------------------- +from haystack_experimental.tools.e2b.bash_tool import RunBashCommandTool +from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox +from haystack_experimental.tools.e2b.list_directory_tool import ListDirectoryTool +from haystack_experimental.tools.e2b.read_file_tool import ReadFileTool +from haystack_experimental.tools.e2b.write_file_tool import WriteFileTool def create_e2b_tools( @@ -397,7 +17,7 @@ def create_e2b_tools( sandbox_template: str = "base", timeout: int = 300, environment_vars: dict[str, str] | None = None, -) -> tuple["E2BSandbox", list[Tool]]: +) -> tuple[E2BSandbox, list[Tool]]: """ Create an :class:`E2BSandbox` and all four E2B tools in one call. diff --git a/haystack_experimental/tools/e2b/write_file_tool.py b/haystack_experimental/tools/e2b/write_file_tool.py new file mode 100644 index 00000000..38f6cb26 --- /dev/null +++ b/haystack_experimental/tools/e2b/write_file_tool.py @@ -0,0 +1,57 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.tools import Tool + +from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox + + +class WriteFileTool(Tool): + """ + A :class:`~haystack.tools.Tool` that writes files to an E2B sandbox filesystem. + + Pass the same :class:`E2BSandbox` instance to multiple tool classes so they + all operate in the same live sandbox environment. + + ### Usage example + + ```python + from haystack_experimental.tools.e2b import E2BSandbox, WriteFileTool + + sandbox = E2BSandbox() + agent = Agent(chat_generator=..., tools=[WriteFileTool(sandbox=sandbox)]) + ``` + """ + + def __init__(self, sandbox: E2BSandbox) -> None: + """ + :param sandbox: The :class:`E2BSandbox` instance to write files to. + """ + + def write_file(path: str, content: str) -> str: + sb = sandbox._require_sandbox() + try: + sb.files.write(path, content) + return f"File written successfully: {path}" + except Exception as e: + raise RuntimeError(f"Failed to write file '{path}': {e}") from e + + super().__init__( + name="write_file", + description=( + "Write text content to a file in the E2B sandbox filesystem. " + "Parent directories are created automatically if they do not exist. " + "Existing files are overwritten." + ), + parameters={ + "type": "object", + "properties": { + "path": {"type": "string", "description": "Absolute or relative path of the file to write."}, + "content": {"type": "string", "description": "Text content to write into the file."}, + }, + "required": ["path", "content"], + }, + function=write_file, + ) + self._e2b_sandbox = sandbox diff --git a/test/tools/e2b/test_sandbox_toolset.py b/test/tools/e2b/test_sandbox_toolset.py index e1b71f28..24d0b14f 100644 --- a/test/tools/e2b/test_sandbox_toolset.py +++ b/test/tools/e2b/test_sandbox_toolset.py @@ -8,14 +8,12 @@ from haystack.tools.errors import ToolInvocationError from haystack.utils import Secret -from haystack_experimental.tools.e2b.sandbox_toolset import ( - E2BSandbox, - ListDirectoryTool, - ReadFileTool, - RunBashCommandTool, - WriteFileTool, - create_e2b_tools, -) +from haystack_experimental.tools.e2b.bash_tool import RunBashCommandTool +from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox +from haystack_experimental.tools.e2b.list_directory_tool import ListDirectoryTool +from haystack_experimental.tools.e2b.read_file_tool import ReadFileTool +from haystack_experimental.tools.e2b.sandbox_toolset import create_e2b_tools +from haystack_experimental.tools.e2b.write_file_tool import WriteFileTool # --------------------------------------------------------------------------- @@ -76,8 +74,8 @@ def test_custom_parameters(self): class TestE2BSandboxWarmUp: - @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") - @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): mock_e2b_import.check.return_value = None mock_instance = _make_sandbox_mock() @@ -94,8 +92,8 @@ def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): ) assert sb._sandbox is mock_instance - @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") - @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") def test_warm_up_passes_environment_vars(self, mock_sandbox_cls, mock_e2b_import): mock_e2b_import.check.return_value = None mock_sandbox_cls.return_value = _make_sandbox_mock() @@ -106,8 +104,8 @@ def test_warm_up_passes_environment_vars(self, mock_sandbox_cls, mock_e2b_import _, kwargs = mock_sandbox_cls.call_args assert kwargs["envs"] == {"MY_VAR": "value"} - @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") - @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") def test_warm_up_is_idempotent(self, mock_sandbox_cls, mock_e2b_import): mock_e2b_import.check.return_value = None mock_sandbox_cls.return_value = _make_sandbox_mock() @@ -118,8 +116,8 @@ def test_warm_up_is_idempotent(self, mock_sandbox_cls, mock_e2b_import): mock_sandbox_cls.assert_called_once() - @patch("haystack_experimental.tools.e2b.sandbox_toolset.e2b_import") - @patch("haystack_experimental.tools.e2b.sandbox_toolset.Sandbox") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") def test_warm_up_raises_on_sandbox_error(self, mock_sandbox_cls, mock_e2b_import): mock_e2b_import.check.return_value = None mock_sandbox_cls.side_effect = Exception("connection refused") From 714aa5c05e3bb1465ab5828a5d1ca83784c9c931 Mon Sep 17 00:00:00 2001 From: tholor Date: Wed, 11 Mar 2026 18:06:47 +0100 Subject: [PATCH 09/10] adjust sandbox class and example script --- .../tools/e2b/e2b_agent_example.py | 14 +++++------ .../tools/e2b/e2b_sandbox.py | 23 +++++++++++-------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/haystack_experimental/tools/e2b/e2b_agent_example.py b/haystack_experimental/tools/e2b/e2b_agent_example.py index c19a6f88..85436860 100644 --- a/haystack_experimental/tools/e2b/e2b_agent_example.py +++ b/haystack_experimental/tools/e2b/e2b_agent_example.py @@ -21,7 +21,7 @@ from haystack.components.generators.chat import OpenAIChatGenerator from haystack.dataclasses import ChatMessage - +from haystack.utils import Secret from haystack_experimental.components.agents import Agent from haystack_experimental.tools.e2b import ( E2BSandbox, @@ -65,6 +65,7 @@ def run(query: str, model: str = "gpt-4o-mini") -> None: # One sandbox passed to each tool class – they all share the same live sandbox process. sandbox = E2BSandbox() + sandbox.warm_up() tools = [ RunBashCommandTool(sandbox=sandbox), ReadFileTool(sandbox=sandbox), @@ -84,13 +85,10 @@ def run(query: str, model: str = "gpt-4o-mini") -> None: max_agent_steps=15, ) - try: - result = agent.run(messages=[ChatMessage.from_user(query)]) - print("\n--- Agent response ---") - print(result["last_message"].text) - finally: - # Always close the sandbox to release cloud resources. - sandbox.close() + result = agent.run(messages=[ChatMessage.from_user(query)]) + print("\n--- Agent response ---") + print(result["last_message"].text) + if __name__ == "__main__": diff --git a/haystack_experimental/tools/e2b/e2b_sandbox.py b/haystack_experimental/tools/e2b/e2b_sandbox.py index 710f83c1..201bbb08 100644 --- a/haystack_experimental/tools/e2b/e2b_sandbox.py +++ b/haystack_experimental/tools/e2b/e2b_sandbox.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from dataclasses import dataclass, field from typing import Any from haystack import logging @@ -15,7 +14,6 @@ logger = logging.getLogger(__name__) -@dataclass class E2BSandbox: """ Manages the lifecycle of an E2B cloud sandbox. @@ -62,13 +60,18 @@ class E2BSandbox: ``` """ - api_key: Secret = field(default_factory=lambda: Secret.from_env_var("E2B_API_KEY")) - sandbox_template: str = field(default="base") - timeout: int = field(default=300) - environment_vars: dict[str, str] = field(default_factory=dict) - - # Private – not serialised - _sandbox: Any = field(default=None, init=False, repr=False, compare=False) + def __init__( + self, + api_key: Secret | None = None, + sandbox_template: str = "base", + timeout: int = 120, + environment_vars: dict[str, str] | None = None, + ): + self.api_key = api_key or Secret.from_env_var("E2B_API_KEY") + self.sandbox_template = sandbox_template + self.timeout = timeout + self.environment_vars = environment_vars or {} + self._sandbox: Any = None # ------------------------------------------------------------------ # Lifecycle @@ -94,7 +97,7 @@ def warm_up(self) -> None: template=self.sandbox_template, timeout=self.timeout, ) - self._sandbox = Sandbox( + self._sandbox = Sandbox.create( api_key=resolved_key, template=self.sandbox_template, timeout=self.timeout, From 4ab141aebf388775f57c923fcc0e67a244952c26 Mon Sep 17 00:00:00 2001 From: tholor Date: Wed, 11 Mar 2026 18:17:04 +0100 Subject: [PATCH 10/10] fix serialization --- haystack_experimental/tools/e2b/__init__.py | 4 +- haystack_experimental/tools/e2b/bash_tool.py | 14 +++ .../tools/e2b/e2b_pipeline_example.py | 99 +++++++++++++++ .../tools/e2b/e2b_sandbox.py | 2 +- .../tools/e2b/list_directory_tool.py | 14 +++ .../tools/e2b/read_file_tool.py | 14 +++ .../tools/e2b/sandbox_toolset.py | 114 +++++++++++------- .../tools/e2b/write_file_tool.py | 14 +++ test/tools/e2b/test_sandbox_toolset.py | 74 +++++------- 9 files changed, 263 insertions(+), 86 deletions(-) create mode 100644 haystack_experimental/tools/e2b/e2b_pipeline_example.py diff --git a/haystack_experimental/tools/e2b/__init__.py b/haystack_experimental/tools/e2b/__init__.py index e8392743..fa0a970b 100644 --- a/haystack_experimental/tools/e2b/__init__.py +++ b/haystack_experimental/tools/e2b/__init__.py @@ -13,7 +13,7 @@ "read_file_tool": ["ReadFileTool"], "write_file_tool": ["WriteFileTool"], "list_directory_tool": ["ListDirectoryTool"], - "sandbox_toolset": ["create_e2b_tools"], + "sandbox_toolset": ["E2BToolset"], } if TYPE_CHECKING: @@ -21,7 +21,7 @@ from .e2b_sandbox import E2BSandbox as E2BSandbox from .list_directory_tool import ListDirectoryTool as ListDirectoryTool from .read_file_tool import ReadFileTool as ReadFileTool - from .sandbox_toolset import create_e2b_tools as create_e2b_tools + from .sandbox_toolset import E2BToolset as E2BToolset from .write_file_tool import WriteFileTool as WriteFileTool else: diff --git a/haystack_experimental/tools/e2b/bash_tool.py b/haystack_experimental/tools/e2b/bash_tool.py index e3e3491f..67f85c38 100644 --- a/haystack_experimental/tools/e2b/bash_tool.py +++ b/haystack_experimental/tools/e2b/bash_tool.py @@ -2,6 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 +from typing import Any + +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import Tool from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox @@ -66,3 +69,14 @@ def run_bash_command(command: str, timeout: int = 60) -> str: function=run_bash_command, ) self._e2b_sandbox = sandbox + + def to_dict(self) -> dict[str, Any]: + return { + "type": generate_qualified_class_name(type(self)), + "data": {"sandbox": self._e2b_sandbox.to_dict()}, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RunBashCommandTool": + sandbox = E2BSandbox.from_dict(data["data"]["sandbox"]) + return cls(sandbox=sandbox) diff --git a/haystack_experimental/tools/e2b/e2b_pipeline_example.py b/haystack_experimental/tools/e2b/e2b_pipeline_example.py new file mode 100644 index 00000000..27c9b999 --- /dev/null +++ b/haystack_experimental/tools/e2b/e2b_pipeline_example.py @@ -0,0 +1,99 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Example: Haystack Pipeline with an Agent and E2B sandbox tools. + +Demonstrates that a Pipeline containing an Agent with E2BToolset can be: + 1. Serialised to YAML + 2. Written to disk + 3. Loaded back from YAML with full sandbox config intact + +All four tools (run_bash_command, read_file, write_file, list_directory) +share a single E2BSandbox after the round-trip, so the agent operates +in one live sandbox environment. + +Requirements: + pip install haystack-ai e2b openai + +Environment variables: + E2B_API_KEY – your E2B API key + OPENAI_API_KEY – your OpenAI API key +""" + +import tempfile +from pathlib import Path + +from haystack.components.generators.chat import OpenAIChatGenerator +from haystack.core.pipeline import Pipeline +from haystack.dataclasses import ChatMessage + +from haystack_experimental.components.agents import Agent +from haystack_experimental.tools.e2b import E2BToolset + + +def build_pipeline() -> Pipeline: + agent = Agent( + chat_generator=OpenAIChatGenerator(model="gpt-4o-mini"), + tools=E2BToolset(sandbox_template="base", timeout=120), + system_prompt=( + "You are a helpful coding assistant with access to a live Linux sandbox. " + "Use the available tools freely to explore, write files, and run commands." + ), + max_agent_steps=10, + ) + pipeline = Pipeline() + pipeline.add_component("agent", agent) + return pipeline + + +def roundtrip_yaml(pipeline: Pipeline) -> Pipeline: + """Serialise to YAML, save to a temp file, load it back.""" + yaml_str = pipeline.dumps() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write(yaml_str) + yaml_path = Path(f.name) + + print(f"Pipeline YAML written to {yaml_path}\n") + print(yaml_str) + print("---\n") + + restored = Pipeline.loads(yaml_path.read_text()) + return restored + + +def verify_roundtrip(original: Pipeline, restored: Pipeline) -> None: + """Check that the restored pipeline has the same structure.""" + orig_agent: Agent = original.get_component("agent") + rest_agent: Agent = restored.get_component("agent") + + orig_ts: E2BToolset = orig_agent.tools # type: ignore[assignment] + rest_ts: E2BToolset = rest_agent.tools # type: ignore[assignment] + + assert type(rest_ts).__name__ == "E2BToolset", "Toolset type mismatch" + assert [t.name for t in rest_ts] == [t.name for t in orig_ts], "Tool names mismatch" + assert rest_ts.sandbox.sandbox_template == orig_ts.sandbox.sandbox_template + assert rest_ts.sandbox.timeout == orig_ts.sandbox.timeout + + sandbox_ids = {id(t._e2b_sandbox) for t in rest_ts} + assert len(sandbox_ids) == 1, "Tools should share a single sandbox after round-trip" + + print("All assertions passed: YAML round-trip preserves pipeline structure.\n") + + +def run_agent(pipeline: Pipeline, query: str) -> None: + """Run the agent with a query (requires live API keys).""" + print(f"Query: {query}\n") + result = pipeline.run(data={"agent": {"messages": [ChatMessage.from_user(query)]}}) + print("--- Agent response ---") + print(result["agent"]["last_message"].text) + + +if __name__ == "__main__": + pipeline = build_pipeline() + restored = roundtrip_yaml(pipeline) + verify_roundtrip(pipeline, restored) + + run_agent(restored, "Write a Python one-liner to /tmp/hello.py that prints 'Hello from E2B!', run it, then show me the output.") diff --git a/haystack_experimental/tools/e2b/e2b_sandbox.py b/haystack_experimental/tools/e2b/e2b_sandbox.py index 201bbb08..1693a81c 100644 --- a/haystack_experimental/tools/e2b/e2b_sandbox.py +++ b/haystack_experimental/tools/e2b/e2b_sandbox.py @@ -158,7 +158,7 @@ def from_dict(cls, data: dict[str, Any]) -> "E2BSandbox": return cls( api_key=inner["api_key"], sandbox_template=inner.get("sandbox_template", "base"), - timeout=inner.get("timeout", 300), + timeout=inner.get("timeout", 120), environment_vars=inner.get("environment_vars", {}), ) diff --git a/haystack_experimental/tools/e2b/list_directory_tool.py b/haystack_experimental/tools/e2b/list_directory_tool.py index 0081761d..38d5d114 100644 --- a/haystack_experimental/tools/e2b/list_directory_tool.py +++ b/haystack_experimental/tools/e2b/list_directory_tool.py @@ -2,6 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 +from typing import Any + +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import Tool from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox @@ -60,3 +63,14 @@ def list_directory(path: str) -> str: function=list_directory, ) self._e2b_sandbox = sandbox + + def to_dict(self) -> dict[str, Any]: + return { + "type": generate_qualified_class_name(type(self)), + "data": {"sandbox": self._e2b_sandbox.to_dict()}, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ListDirectoryTool": + sandbox = E2BSandbox.from_dict(data["data"]["sandbox"]) + return cls(sandbox=sandbox) diff --git a/haystack_experimental/tools/e2b/read_file_tool.py b/haystack_experimental/tools/e2b/read_file_tool.py index ce07924a..f2e676d4 100644 --- a/haystack_experimental/tools/e2b/read_file_tool.py +++ b/haystack_experimental/tools/e2b/read_file_tool.py @@ -2,6 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 +from typing import Any + +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import Tool from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox @@ -55,3 +58,14 @@ def read_file(path: str) -> str: function=read_file, ) self._e2b_sandbox = sandbox + + def to_dict(self) -> dict[str, Any]: + return { + "type": generate_qualified_class_name(type(self)), + "data": {"sandbox": self._e2b_sandbox.to_dict()}, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "ReadFileTool": + sandbox = E2BSandbox.from_dict(data["data"]["sandbox"]) + return cls(sandbox=sandbox) diff --git a/haystack_experimental/tools/e2b/sandbox_toolset.py b/haystack_experimental/tools/e2b/sandbox_toolset.py index ef35d768..04344efc 100644 --- a/haystack_experimental/tools/e2b/sandbox_toolset.py +++ b/haystack_experimental/tools/e2b/sandbox_toolset.py @@ -2,7 +2,10 @@ # # SPDX-License-Identifier: Apache-2.0 -from haystack.tools import Tool +from typing import Any + +from haystack.core.serialization import generate_qualified_class_name +from haystack.tools import Toolset from haystack.utils import Secret from haystack_experimental.tools.e2b.bash_tool import RunBashCommandTool @@ -12,54 +15,81 @@ from haystack_experimental.tools.e2b.write_file_tool import WriteFileTool -def create_e2b_tools( - api_key: Secret | None = None, - sandbox_template: str = "base", - timeout: int = 300, - environment_vars: dict[str, str] | None = None, -) -> tuple[E2BSandbox, list[Tool]]: +class E2BToolset(Toolset): """ - Create an :class:`E2BSandbox` and all four E2B tools in one call. - - Returns both the sandbox (for lifecycle management) and the list of tools - so that callers can pass any subset of the tools to an Agent. + A :class:`~haystack.tools.Toolset` that bundles all E2B sandbox tools. - :param api_key: E2B API key. Defaults to ``Secret.from_env_var("E2B_API_KEY")``. - :param sandbox_template: E2B sandbox template name. Defaults to ``"base"``. - :param timeout: Sandbox inactivity timeout in seconds. Defaults to ``300``. - :param environment_vars: Optional environment variables to inject into the sandbox. - :returns: A ``(sandbox, tools)`` tuple where *tools* is a list of four - :class:`~haystack.tools.Tool` objects: ``run_bash_command``, ``read_file``, - ``write_file``, and ``list_directory``. + All tools in the set share a single :class:`E2BSandbox` instance so they + operate inside the same live sandbox process. The toolset owns the sandbox + lifecycle: calling :meth:`warm_up` starts the sandbox, and serialisation + round-trips preserve the shared-sandbox relationship. ### Usage example ```python - from haystack.utils import Secret - from haystack_experimental.tools.e2b import create_e2b_tools - - sandbox, tools = create_e2b_tools( - api_key=Secret.from_env_var("E2B_API_KEY"), - ) + from haystack.components.generators.chat import OpenAIChatGenerator - # Use all four tools: - agent = Agent(chat_generator=..., tools=tools) + from haystack_experimental.components.agents import Agent + from haystack_experimental.tools.e2b import E2BToolset - # Or only a subset – they still share the same sandbox connection, - # so run_bash_command and read_file operate inside the same environment: - bash_tool, read_tool = tools[0], tools[1] - agent = Agent(chat_generator=..., tools=[bash_tool, read_tool]) + agent = Agent( + chat_generator=OpenAIChatGenerator(model="gpt-4o"), + tools=E2BToolset(), + ) ``` """ - if api_key is None: - api_key = Secret.from_env_var("E2B_API_KEY") - sandbox = E2BSandbox( - api_key=api_key, sandbox_template=sandbox_template, timeout=timeout, environment_vars=environment_vars or {} - ) - tools: list[Tool] = [ - RunBashCommandTool(sandbox=sandbox), - ReadFileTool(sandbox=sandbox), - WriteFileTool(sandbox=sandbox), - ListDirectoryTool(sandbox=sandbox), - ] - return sandbox, tools + + def __init__( + self, + api_key: Secret | None = None, + sandbox_template: str = "base", + timeout: int = 120, + environment_vars: dict[str, str] | None = None, + ): + """ + :param api_key: E2B API key. Defaults to ``Secret.from_env_var("E2B_API_KEY")``. + :param sandbox_template: E2B sandbox template name. Defaults to ``"base"``. + :param timeout: Sandbox inactivity timeout in seconds. Defaults to ``300``. + :param environment_vars: Optional environment variables to inject into the sandbox. + """ + self.sandbox = E2BSandbox( + api_key=api_key, + sandbox_template=sandbox_template, + timeout=timeout, + environment_vars=environment_vars, + ) + super().__init__( + tools=[ + RunBashCommandTool(sandbox=self.sandbox), + ReadFileTool(sandbox=self.sandbox), + WriteFileTool(sandbox=self.sandbox), + ListDirectoryTool(sandbox=self.sandbox), + ] + ) + + def warm_up(self) -> None: + """Start the shared E2B sandbox (idempotent).""" + self.sandbox.warm_up() + + def close(self) -> None: + """Shut down the shared E2B sandbox and release cloud resources.""" + self.sandbox.close() + + def to_dict(self) -> dict[str, Any]: + return { + "type": generate_qualified_class_name(type(self)), + "data": self.sandbox.to_dict()["data"], + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "E2BToolset": + from haystack.utils import deserialize_secrets_inplace + + inner = data["data"] + deserialize_secrets_inplace(inner, keys=["api_key"]) + return cls( + api_key=inner["api_key"], + sandbox_template=inner.get("sandbox_template", "base"), + timeout=inner.get("timeout", 120), + environment_vars=inner.get("environment_vars", {}), + ) diff --git a/haystack_experimental/tools/e2b/write_file_tool.py b/haystack_experimental/tools/e2b/write_file_tool.py index 38f6cb26..304d2b95 100644 --- a/haystack_experimental/tools/e2b/write_file_tool.py +++ b/haystack_experimental/tools/e2b/write_file_tool.py @@ -2,6 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 +from typing import Any + +from haystack.core.serialization import generate_qualified_class_name from haystack.tools import Tool from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox @@ -55,3 +58,14 @@ def write_file(path: str, content: str) -> str: function=write_file, ) self._e2b_sandbox = sandbox + + def to_dict(self) -> dict[str, Any]: + return { + "type": generate_qualified_class_name(type(self)), + "data": {"sandbox": self._e2b_sandbox.to_dict()}, + } + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "WriteFileTool": + sandbox = E2BSandbox.from_dict(data["data"]["sandbox"]) + return cls(sandbox=sandbox) diff --git a/test/tools/e2b/test_sandbox_toolset.py b/test/tools/e2b/test_sandbox_toolset.py index 24d0b14f..84f4f728 100644 --- a/test/tools/e2b/test_sandbox_toolset.py +++ b/test/tools/e2b/test_sandbox_toolset.py @@ -12,7 +12,7 @@ from haystack_experimental.tools.e2b.e2b_sandbox import E2BSandbox from haystack_experimental.tools.e2b.list_directory_tool import ListDirectoryTool from haystack_experimental.tools.e2b.read_file_tool import ReadFileTool -from haystack_experimental.tools.e2b.sandbox_toolset import create_e2b_tools +from haystack_experimental.tools.e2b.sandbox_toolset import E2BToolset from haystack_experimental.tools.e2b.write_file_tool import WriteFileTool @@ -53,7 +53,7 @@ def test_class_defaults(self): """Verify the real class defaults, not values set by a helper.""" sandbox = E2BSandbox(api_key=Secret.from_token("test-api-key")) assert sandbox.sandbox_template == "base" - assert sandbox.timeout == 300 + assert sandbox.timeout == 120 assert sandbox.environment_vars == {} assert sandbox._sandbox is None @@ -75,16 +75,16 @@ def test_custom_parameters(self): class TestE2BSandboxWarmUp: @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") - @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") - def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox.create") + def test_warm_up_creates_sandbox(self, mock_sandbox_create, mock_e2b_import): mock_e2b_import.check.return_value = None mock_instance = _make_sandbox_mock() - mock_sandbox_cls.return_value = mock_instance + mock_sandbox_create.return_value = mock_instance sb = _make_sandbox(sandbox_template="base", timeout=120) sb.warm_up() - mock_sandbox_cls.assert_called_once_with( + mock_sandbox_create.assert_called_once_with( api_key="test-api-key", template="base", timeout=120, @@ -93,34 +93,34 @@ def test_warm_up_creates_sandbox(self, mock_sandbox_cls, mock_e2b_import): assert sb._sandbox is mock_instance @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") - @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") - def test_warm_up_passes_environment_vars(self, mock_sandbox_cls, mock_e2b_import): + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox.create") + def test_warm_up_passes_environment_vars(self, mock_sandbox_create, mock_e2b_import): mock_e2b_import.check.return_value = None - mock_sandbox_cls.return_value = _make_sandbox_mock() + mock_sandbox_create.return_value = _make_sandbox_mock() sb = _make_sandbox(environment_vars={"MY_VAR": "value"}) sb.warm_up() - _, kwargs = mock_sandbox_cls.call_args + _, kwargs = mock_sandbox_create.call_args assert kwargs["envs"] == {"MY_VAR": "value"} @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") - @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") - def test_warm_up_is_idempotent(self, mock_sandbox_cls, mock_e2b_import): + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox.create") + def test_warm_up_is_idempotent(self, mock_sandbox_create, mock_e2b_import): mock_e2b_import.check.return_value = None - mock_sandbox_cls.return_value = _make_sandbox_mock() + mock_sandbox_create.return_value = _make_sandbox_mock() sb = _make_sandbox() sb.warm_up() sb.warm_up() - mock_sandbox_cls.assert_called_once() + mock_sandbox_create.assert_called_once() @patch("haystack_experimental.tools.e2b.e2b_sandbox.e2b_import") - @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox") - def test_warm_up_raises_on_sandbox_error(self, mock_sandbox_cls, mock_e2b_import): + @patch("haystack_experimental.tools.e2b.e2b_sandbox.Sandbox.create") + def test_warm_up_raises_on_sandbox_error(self, mock_sandbox_create, mock_e2b_import): mock_e2b_import.check.return_value = None - mock_sandbox_cls.side_effect = Exception("connection refused") + mock_sandbox_create.side_effect = Exception("connection refused") sb = _make_sandbox() with pytest.raises(RuntimeError, match="Failed to start E2B sandbox"): @@ -239,42 +239,34 @@ def test_tool_stores_sandbox_reference(self): tool = RunBashCommandTool(sandbox=sb) assert tool._e2b_sandbox is sb - def test_create_e2b_tools_returns_four_tools(self): - sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) - assert len(tools) == 4 - names = {t.name for t in tools} + def test_e2b_toolset_contains_four_tools(self): + ts = E2BToolset(api_key=Secret.from_token("test-api-key")) + assert len(ts) == 4 + names = {t.name for t in ts} assert names == {"run_bash_command", "read_file", "write_file", "list_directory"} - def test_create_e2b_tools_returns_correct_types(self): - sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) - tool_types = {type(t) for t in tools} + def test_e2b_toolset_has_correct_tool_types(self): + ts = E2BToolset(api_key=Secret.from_token("test-api-key")) + tool_types = {type(t) for t in ts} assert tool_types == {RunBashCommandTool, ReadFileTool, WriteFileTool, ListDirectoryTool} - def test_create_e2b_tools_shares_same_sandbox(self): - sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) - # All tools must reference the same E2BSandbox instance - assert all(t._e2b_sandbox is sb for t in tools) + def test_e2b_toolset_shares_same_sandbox(self): + ts = E2BToolset(api_key=Secret.from_token("test-api-key")) + assert all(t._e2b_sandbox is ts.sandbox for t in ts) - # Inject a mock and verify the tool actually calls through it mock = _make_sandbox_mock() mock.commands.run.return_value = MagicMock(exit_code=0, stdout="ok", stderr="") - sb._sandbox = mock + ts.sandbox._sandbox = mock - bash_tool = next(t for t in tools if t.name == "run_bash_command") + bash_tool = next(t for t in ts if t.name == "run_bash_command") bash_tool.invoke(command="echo ok") mock.commands.run.assert_called_once() - def test_tools_are_independent_subsets(self): - """Users can select any subset of tools.""" - sb, tools = create_e2b_tools(api_key=Secret.from_token("test-api-key")) - bash_only = [t for t in tools if t.name == "run_bash_command"] - assert len(bash_only) == 1 - - def test_create_e2b_tools_default_api_key(self): - """create_e2b_tools uses E2B_API_KEY env var when api_key is omitted.""" - sb, _ = create_e2b_tools() - assert sb.api_key is not None + def test_e2b_toolset_default_api_key(self): + """E2BToolset uses E2B_API_KEY env var when api_key is omitted.""" + ts = E2BToolset() + assert ts.sandbox.api_key is not None def test_tools_from_same_sandbox_share_state(self): """Tools instantiated with the same sandbox share state."""