diff --git a/pyproject.toml b/pyproject.toml index f11bc59..53582a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "toolguard" -version = "0.2.13" +version = "0.2.14" description = "Policy adherence code generation for guarding AI agent tools" readme = "README.md" @@ -20,7 +20,6 @@ dependencies = [ "langchain-core>=0.3.72", "litellm<=1.82.6", # https://github.com/BerriAI/litellm/issues/24512 "markdown>=3.7", - "mellea<0.4.0", # mellea 0.4.0 requires python >=3.11 "pydantic>=2.11.0", "pytest>=8.3.3", "pytest-asyncio>=1.3.0", diff --git a/src/toolguard/buildtime/gen_py/gen_toolguards.py b/src/toolguard/buildtime/gen_py/gen_toolguards.py index d3876bf..fd6cc17 100644 --- a/src/toolguard/buildtime/gen_py/gen_toolguards.py +++ b/src/toolguard/buildtime/gen_py/gen_toolguards.py @@ -2,12 +2,10 @@ from pathlib import Path from typing import Callable, List, Optional -import mellea from loguru import logger from toolguard.buildtime.gen_py.domain_from_funcs import generate_domain_from_functions from toolguard.buildtime.gen_py.domain_from_openapi import generate_domain_from_openapi -from toolguard.buildtime.gen_py.mellea_simple import SimpleBackend from toolguard.buildtime.gen_py.tool_guard_generator import ToolGuardGenerator from toolguard.buildtime.llm import I_TG_LLM from toolguard.buildtime.utils import py, pyright, pytest @@ -85,13 +83,8 @@ async def generate_toolguards_from_domain( if len(spec.policy_items) > 0 ] - # mellea_workaround = {"model_options": {"reasoning_effort": "medium"}}#FIXME https://github.com/generative-computing/mellea/issues/270 - # kw_args = llm.kw_args - # kw_args.update(mellea_workaround) - mellea_backend = SimpleBackend(llm) - m = mellea.MelleaSession(mellea_backend) tools_generator = [ - ToolGuardGenerator(app_name, tool_policy, py_root, domain, m) + ToolGuardGenerator(app_name, tool_policy, py_root, domain, llm) for tool_policy in not_empty_specs ] with py.temp_python_path(py_root): diff --git a/src/toolguard/buildtime/gen_py/mellea_simple.py b/src/toolguard/buildtime/gen_py/mellea_simple.py deleted file mode 100644 index f2a8f48..0000000 --- a/src/toolguard/buildtime/gen_py/mellea_simple.py +++ /dev/null @@ -1,50 +0,0 @@ -"""This module holds shim backends used for smoke tests.""" - -from mellea.backends import Backend, BaseModelSubclass -from mellea.core import CBlock, Component, Context, GenerateLog, ModelOutputThunk -from mellea.formatters import Formatter -from mellea.formatters.template_formatter import TemplateFormatter - -from toolguard.buildtime.llm import I_TG_LLM - - -class SimpleBackend(Backend): - formatter: Formatter - llm: I_TG_LLM - - def __init__(self, llm: I_TG_LLM): - self.llm = llm - self.formatter = TemplateFormatter(model_id="") - - # _generate_from_context() for mellea > 0.4.0 - async def generate_from_context( - self, - action: Component | CBlock, - ctx: Context, - *, - format: type[BaseModelSubclass] | None = None, - model_options: dict | None = None, - tool_calls: bool = False, - ) -> tuple[ModelOutputThunk, Context]: - prompt = self.formatter.print(action) - msg = { - "role": "user", - "content": [{"type": "text", "text": prompt}], - } - - resp = await self.llm.generate([msg]) - - mot = ModelOutputThunk(value=resp, parsed_repr=resp) - mot._generate_log = GenerateLog() - return mot, ctx.add(action).add(mot) - - async def generate_from_raw( - self, - actions: list[Component | CBlock], - ctx: Context, - *, - format: type[BaseModelSubclass] | None = None, - model_options: dict | None = None, - tool_calls: bool = False, - ) -> list[ModelOutputThunk]: - raise NotImplementedError() diff --git a/src/toolguard/buildtime/gen_py/prompt_runner.py b/src/toolguard/buildtime/gen_py/prompt_runner.py new file mode 100644 index 0000000..a92ab05 --- /dev/null +++ b/src/toolguard/buildtime/gen_py/prompt_runner.py @@ -0,0 +1,65 @@ +"""Lightweight replacement for mellea's @generative decorator. + +Builds a prompt from a function's name, signature, docstring, and bound +keyword arguments, then sends it to an I_TG_LLM backend and returns the +raw text response. +""" + +import inspect +from typing import Any, Callable, Dict, List + +from toolguard.buildtime.llm import I_TG_LLM + + +def _format_arg(func: Callable, key: str, val: Any) -> str: + """Format a single argument line like mellea's Arguments component.""" + sig = inspect.signature(func) + param = sig.parameters.get(key) + if param and param.annotation is not inspect.Parameter.empty: + param_type = param.annotation + else: + param_type = type(val) + + if param_type is str: + display_val = f'"{val!s}"' + else: + display_val = str(val) + + return f"- {key}: {display_val} (type: {param_type})" + + +def build_prompt(func: Callable, **kwargs: Any) -> str: + """Build the same prompt that mellea's GenerativeSlot + TemplateFormatter produces.""" + sig_str = str(inspect.signature(func)) + docstring = inspect.getdoc(func) or "No documentation provided." + + lines = [ + "Your task is to imitate the output of the following function for the given arguments.", + "Reply Nothing else but the output of the function.", + "", + "Function:", + f"def {func.__name__}{sig_str}:", + f' """{docstring}"""', + ] + + if kwargs: + arg_lines = [_format_arg(func, k, v) for k, v in kwargs.items()] + lines.append("") + lines.append("Arguments:") + lines.extend(arg_lines) + + return "\n".join(lines) + + +async def run_prompt( + llm: I_TG_LLM, + func: Callable, + **kwargs: Any, +) -> str: + """Build a prompt from *func*'s metadata + *kwargs*, send it to *llm*, return the response.""" + prompt = build_prompt(func, **kwargs) + msg: Dict = { + "role": "user", + "content": [{"type": "text", "text": prompt}], + } + return await llm.generate([msg]) diff --git a/src/toolguard/buildtime/gen_py/prompts/gen_tests.py b/src/toolguard/buildtime/gen_py/prompts/gen_tests.py index b4cd140..083ed61 100644 --- a/src/toolguard/buildtime/gen_py/prompts/gen_tests.py +++ b/src/toolguard/buildtime/gen_py/prompts/gen_tests.py @@ -2,13 +2,12 @@ from typing import List -from mellea import generative - +from toolguard.buildtime.gen_py.prompt_runner import run_prompt +from toolguard.buildtime.llm import I_TG_LLM from toolguard.runtime.data_types import Domain, FileTwin, ToolGuardSpecItem -@generative -async def generate_init_tests( +async def _generate_init_tests_template( fn_src: FileTwin, policy_item: ToolGuardSpecItem, domain: Domain, @@ -113,8 +112,7 @@ async def test_violation_book_room_in_the_past(): ... -@generative -async def improve_tests( +async def _improve_tests_template( prev_impl: str, domain: Domain, policy_item: ToolGuardSpecItem, @@ -139,3 +137,41 @@ async def improve_tests( - You can add import statements, but dont remove them. """ ... + + +async def generate_init_tests( + llm: I_TG_LLM, + *, + fn_src: FileTwin, + policy_item: ToolGuardSpecItem, + domain: Domain, + dependent_tool_names: List[str], +) -> str: + return await run_prompt( + llm, + _generate_init_tests_template, + fn_src=fn_src, + policy_item=policy_item, + domain=domain, + dependent_tool_names=dependent_tool_names, + ) + + +async def improve_tests( + llm: I_TG_LLM, + *, + prev_impl: str, + domain: Domain, + policy_item: ToolGuardSpecItem, + review_comments: List[str], + dependent_tool_names: List[str], +) -> str: + return await run_prompt( + llm, + _improve_tests_template, + prev_impl=prev_impl, + domain=domain, + policy_item=policy_item, + review_comments=review_comments, + dependent_tool_names=dependent_tool_names, + ) diff --git a/src/toolguard/buildtime/gen_py/prompts/improve_guard.py b/src/toolguard/buildtime/gen_py/prompts/improve_guard.py index 1ed2be2..31de1c3 100644 --- a/src/toolguard/buildtime/gen_py/prompts/improve_guard.py +++ b/src/toolguard/buildtime/gen_py/prompts/improve_guard.py @@ -2,13 +2,12 @@ from typing import List -from mellea import generative - +from toolguard.buildtime.gen_py.prompt_runner import run_prompt +from toolguard.buildtime.llm import I_TG_LLM from toolguard.runtime.data_types import FileTwin -@generative -async def improve_tool_guard( +async def _improve_tool_guard_template( policy_txt: str, dependent_tool_names: List[str], prev_impl: str, @@ -168,3 +167,25 @@ async def airline_cancelled(): ``` """ ... + + +async def improve_tool_guard( + llm: I_TG_LLM, + *, + policy_txt: str, + dependent_tool_names: List[str], + prev_impl: str, + review_comments: List[str], + api: FileTwin, + data_types: FileTwin, +) -> str: + return await run_prompt( + llm, + _improve_tool_guard_template, + policy_txt=policy_txt, + dependent_tool_names=dependent_tool_names, + prev_impl=prev_impl, + review_comments=review_comments, + api=api, + data_types=data_types, + ) diff --git a/src/toolguard/buildtime/gen_py/prompts/pseudo_code.py b/src/toolguard/buildtime/gen_py/prompts/pseudo_code.py index a1ea895..083af42 100644 --- a/src/toolguard/buildtime/gen_py/prompts/pseudo_code.py +++ b/src/toolguard/buildtime/gen_py/prompts/pseudo_code.py @@ -1,13 +1,12 @@ # mypy: ignore-errors -from mellea import generative - +from toolguard.buildtime.gen_py.prompt_runner import run_prompt +from toolguard.buildtime.llm import I_TG_LLM from toolguard.runtime.data_types import FileTwin -@generative -async def tool_policy_pseudo_code( +async def _pseudo_code_template( policy_txt: str, fn_to_analyze: str, data_types: FileTwin, api: FileTwin ) -> str: """ @@ -169,3 +168,22 @@ def are_relatives(self, person1_id: str, person2_id: str) -> bool: pass ``` """ ... + + +async def tool_policy_pseudo_code( + llm: I_TG_LLM, + *, + policy_txt: str, + fn_to_analyze: str, + data_types: FileTwin, + api: FileTwin, + model_options: dict | None = None, +) -> str: + return await run_prompt( + llm, + _pseudo_code_template, + policy_txt=policy_txt, + fn_to_analyze=fn_to_analyze, + data_types=data_types, + api=api, + ) diff --git a/src/toolguard/buildtime/gen_py/tool_dependencies.py b/src/toolguard/buildtime/gen_py/tool_dependencies.py index 5fd5fd9..edbd593 100644 --- a/src/toolguard/buildtime/gen_py/tool_dependencies.py +++ b/src/toolguard/buildtime/gen_py/tool_dependencies.py @@ -1,9 +1,8 @@ import re from typing import Any, Dict, Set -from mellea import MelleaSession - from toolguard.buildtime.gen_py import prompts +from toolguard.buildtime.llm import I_TG_LLM from toolguard.runtime.data_types import Domain MAX_TRIALS = 3 @@ -13,12 +12,12 @@ async def tool_dependencies( policy_txt: str, tool_signature: str, domain: Domain, - m: MelleaSession, + llm: I_TG_LLM, trial=0, ) -> Set[str]: model_options: Dict[str, Any] = {} # {ModelOption.TEMPERATURE: 0.8} pseudo_code = await prompts.tool_policy_pseudo_code( - m, + llm, policy_txt=policy_txt, fn_to_analyze=tool_signature, data_types=domain.app_types, @@ -30,7 +29,7 @@ async def tool_dependencies( return fn_names if trial <= MAX_TRIALS: # as tool_policy_pseudo_code has some temerature, we retry hoping next time the pseudo code will be correct - return await tool_dependencies(policy_txt, tool_signature, domain, m, trial + 1) + return await tool_dependencies(policy_txt, tool_signature, domain, llm, trial + 1) raise Exception("Failed to analyze api dependencies") diff --git a/src/toolguard/buildtime/gen_py/tool_guard_generator.py b/src/toolguard/buildtime/gen_py/tool_guard_generator.py index 29454a9..f223bbe 100644 --- a/src/toolguard/buildtime/gen_py/tool_guard_generator.py +++ b/src/toolguard/buildtime/gen_py/tool_guard_generator.py @@ -7,9 +7,9 @@ from typing import Callable, List, Optional, Tuple, Type from loguru import logger -from mellea import MelleaSession from toolguard.buildtime.gen_py import prompts +from toolguard.buildtime.llm import I_TG_LLM from toolguard.buildtime.gen_py.naming_conv import ( guard_fn_module_name, guard_fn_name, @@ -50,13 +50,13 @@ def __init__( tool_policy: ToolGuardSpec, py_path: Path, domain: RuntimeDomain, - m: MelleaSession, + llm: I_TG_LLM, ) -> None: self.py_path = py_path self.app_name = app_name self.tool_policy = tool_policy self.domain = domain - self.m = m + self.llm = llm def _create_dirs(self): app_path = self.py_path / py.to_py_module_name(self.app_name) @@ -111,7 +111,7 @@ async def _generate_item_tests_and_guard( dep_tools = [] if self.domain.app_api_size > 1: dep_tools = list( - await tool_dependencies(item.description, sig_str, self.domain, self.m) + await tool_dependencies(item.description, sig_str, self.domain, self.llm) ) logger.debug(f"Dependencies of '{item.name}': {dep_tools}") @@ -162,7 +162,7 @@ async def _generate_tests( first_time = trial_no == "a" if first_time: res = await prompts.generate_init_tests( - self.m, + self.llm, fn_src=guard, policy_item=item, domain=domain, # noqa: B023 @@ -171,7 +171,7 @@ async def _generate_tests( else: assert test_file res = await prompts.improve_tests( - self.m, + self.llm, prev_impl=test_file.content, # noqa: B023 domain=domain, # noqa: B023 policy_item=item, @@ -261,7 +261,7 @@ async def _improve_guard( domain = self.domain.get_definitions_only() # omit runtime fields prev_python = get_code_content(prev_guard.content) res = await prompts.improve_tool_guard( - self.m, + self.llm, policy_txt=item.description, dependent_tool_names=dep_tools, prev_impl=prev_python, # noqa: B023 diff --git a/tests/buildtime/gen_py/dependencies/test_appointment_dependencies.py b/tests/buildtime/gen_py/dependencies/test_appointment_dependencies.py index d8ac748..1b85507 100644 --- a/tests/buildtime/gen_py/dependencies/test_appointment_dependencies.py +++ b/tests/buildtime/gen_py/dependencies/test_appointment_dependencies.py @@ -1,11 +1,9 @@ import os from pathlib import Path -import mellea import pytest from toolguard.buildtime.gen_py.domain_from_openapi import generate_domain_from_openapi -from toolguard.buildtime.gen_py.mellea_simple import SimpleBackend from toolguard.buildtime.gen_py.tool_dependencies import tool_dependencies from toolguard.buildtime.llm import I_TG_LLM from toolguard.buildtime.llm.tg_litellm import LitellmModel @@ -36,10 +34,7 @@ async def test_appointment_slot_fee_dependency(litellm_llm: I_TG_LLM): policy_txt = "Gold members receive a 10% discount on the slot visit fee." tool_signature = "schedule_appointment(self, args:ScheduleAppointmentArgs)" - mellea_backend = SimpleBackend(litellm_llm) - mellea_session = mellea.MelleaSession(mellea_backend) - - deps = await tool_dependencies(policy_txt, tool_signature, domain, mellea_session) + deps = await tool_dependencies(policy_txt, tool_signature, domain, litellm_llm) assert len(deps) == 2 assert "get_user" in deps diff --git a/tests/buildtime/gen_py/dependencies/test_tau2_dependencies.py b/tests/buildtime/gen_py/dependencies/test_tau2_dependencies.py index cbcc7f4..d1ac328 100644 --- a/tests/buildtime/gen_py/dependencies/test_tau2_dependencies.py +++ b/tests/buildtime/gen_py/dependencies/test_tau2_dependencies.py @@ -2,15 +2,14 @@ import os from pathlib import Path -import mellea import pytest from tau2.domains.airline.data_model import FlightBase from tau2.domains.airline.tools import AirlineTools from tau2.environment.toolkit import ToolType, is_tool from toolguard.buildtime.gen_py.domain_from_funcs import generate_domain_from_functions -from toolguard.buildtime.gen_py.mellea_simple import SimpleBackend from toolguard.buildtime.gen_py.tool_dependencies import tool_dependencies +from toolguard.buildtime.llm import I_TG_LLM from toolguard.buildtime.llm.tg_litellm import LitellmModel current_dir = str(Path(__file__).parent) @@ -58,8 +57,8 @@ def teardown_class(cls): print("Tearing down class resources") @pytest.fixture(autouse=True) - def session(self): - llm = LitellmModel( + def llm(self) -> I_TG_LLM: + return LitellmModel( model_name=os.getenv("MODEL_NAME") or "Azure/gpt-5-2025-08-07", provider=os.getenv("LLM_PROVIDER") or "azure", kw_args={ @@ -68,67 +67,65 @@ def session(self): "api_key": os.getenv("LLM_API_KEY"), }, ) - mellea_backend = SimpleBackend(llm) - return mellea.MelleaSession(mellea_backend) @pytest.mark.asyncio - async def test_args_only(self, session): + async def test_args_only(self, llm): policy = "The total number of passengers in a reservation does not exceed five." assert ( await tool_dependencies( - policy, book_reservation_signature, self.domain, session + policy, book_reservation_signature, self.domain, llm ) == set() ) @pytest.mark.asyncio - async def test_payment_in_user(self, session): + async def test_payment_in_user(self, llm): policy = """All payment methods used are already present in the user's profile. Each reservation can use at most one travel certificate, one credit card, and three gift cards. """ assert await tool_dependencies( - policy, book_reservation_signature, self.domain, session + policy, book_reservation_signature, self.domain, llm ) == {"get_user_details"} @pytest.mark.asyncio - async def test_payment_in_args(self, session): + async def test_payment_in_args(self, llm): policy = "Each reservation can use at most one travel certificate, one credit card, and three gift cards." deps = await tool_dependencies( - policy, book_reservation_signature, self.domain, session + policy, book_reservation_signature, self.domain, llm ) assert deps == {"get_user_details"} @pytest.mark.asyncio - async def test_membership(self, session): + async def test_membership(self, llm): policy = """ If the booking user is a regular member, 0 free checked bag for each basic economy passenger, 1 free checked bag for each economy passenger, and 2 free checked bags for each business passenger. If the booking user is a silver member, 1 free checked bag for each basic economy passenger, 2 free checked bag for each economy passenger, and 3 free checked bags for each business passenger. If the booking user is a gold member, 2 free checked bag for each basic economy passenger, 3 free checked bag for each economy passenger, and 3 free checked bags for each business passenger. """ assert await tool_dependencies( - policy, book_reservation_signature, self.domain, session + policy, book_reservation_signature, self.domain, llm ) == {"get_user_details"} @pytest.mark.asyncio - async def test_flight_status(self, session): + async def test_flight_status(self, llm): policy = """The agent must ensure that the flight status is 'available' before booking. Flights with status 'delayed', 'on time', or 'flying' cannot be booked. """ assert await tool_dependencies( - policy, book_reservation_signature, self.domain, session + policy, book_reservation_signature, self.domain, llm ) == {"get_flight_status"} @pytest.mark.asyncio - async def test_update_flight_basic_economy(self, session): + async def test_update_flight_basic_economy(self, llm): policy = "Basic economy flights cannot be modified. The agent must verify the reservation's cabin class before calling the flight update API." assert await tool_dependencies( - policy, update_flights_signature, self.domain, session + policy, update_flights_signature, self.domain, llm ) == {"get_reservation_details"} # This test succeeds only with §§advanced models (eg, o1. but not gpt-4o) @pytest.mark.asyncio - async def test_indirect_api(self, session): + async def test_indirect_api(self, llm): policy = "When changing flights in a reservation, the agent must ensure that the origin, destination, and trip type remain unchanged." deps = await tool_dependencies( - policy, update_flights_signature, self.domain, session + policy, update_flights_signature, self.domain, llm ) assert deps == {"get_reservation_details", "get_scheduled_flight"} diff --git a/uv.lock b/uv.lock index b8b04e1..c5a340d 100644 --- a/uv.lock +++ b/uv.lock @@ -8,15 +8,6 @@ resolution-markers = [ "python_full_version < '3.11'", ] -[[package]] -name = "absl-py" -version = "2.3.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/10/2a/c93173ffa1b39c1d0395b7e842bbdc62e556ca9d8d3b5572926f3e4ca752/absl_py-2.3.1.tar.gz", hash = "sha256:a97820526f7fbfd2ec1bce83f3f25e3a14840dac0d8e02a0b71cd75db3f77fc9", size = 116588, upload-time = "2025-07-03T09:31:44.05Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d", size = 135811, upload-time = "2025-07-03T09:31:42.253Z" }, -] - [[package]] name = "addict" version = "2.4.0" @@ -152,24 +143,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, ] -[[package]] -name = "ansicolors" -version = "1.1.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/76/31/7faed52088732704523c259e24c26ce6f2f33fbeff2ff59274560c27628e/ansicolors-1.1.8.zip", hash = "sha256:99f94f5e3348a0bcd43c82e5fc4414013ccc19d70bd939ad71e0133ce9c372e0", size = 23027, upload-time = "2017-06-02T21:22:10.729Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/53/18/a56e2fe47b259bb52201093a3a9d4a32014f9d85071ad07e9d60600890ca/ansicolors-1.1.8-py2.py3-none-any.whl", hash = "sha256:00d2dde5a675579325902536738dd27e4fac1fd68f773fe36c21044eb559e187", size = 13847, upload-time = "2017-06-02T21:22:12.67Z" }, -] - -[[package]] -name = "antlr4-python3-runtime" -version = "4.13.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/33/5f/2cdf6f7aca3b20d3f316e9f505292e1f256a32089bd702034c29ebde6242/antlr4_python3_runtime-4.13.2.tar.gz", hash = "sha256:909b647e1d2fc2b70180ac586df3933e38919c85f98ccc656a96cd3f25ef3916", size = 117467, upload-time = "2024-08-03T19:00:12.757Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/89/03/a851e84fcbb85214dc637b6378121ef9a0dd61b4c65264675d8a5c9b1ae7/antlr4_python3_runtime-4.13.2-py3-none-any.whl", hash = "sha256:fe3835eb8d33daece0e799090eda89719dbccee7aa39ef94eed3818cafa5a7e8", size = 144462, upload-time = "2024-08-03T19:00:11.134Z" }, -] - [[package]] name = "anyio" version = "4.12.1" @@ -777,20 +750,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, ] -[[package]] -name = "docker" -version = "7.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "requests" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, -] - [[package]] name = "docstring-parser" version = "0.17.0" @@ -1150,19 +1109,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" }, ] -[[package]] -name = "granite-common" -version = "0.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jsonschema" }, - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cd/10/ca8f59c644a3574a443bb85ff807f1ebbe726a6ad75bd471e092ab002f37/granite_common-0.4.1.tar.gz", hash = "sha256:5290e03d43e2962218aaf13c9c43877af6fb7869332a4ea35983c4f6a206d801", size = 714066, upload-time = "2026-02-25T01:10:45.253Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/ee/c52f5ddb073c111c19f15889646fd65e0be2b4e0b01764237d1ecbcd5bfe/granite_common-0.4.1-py3-none-any.whl", hash = "sha256:e82df48f69a98b46dbff8a36c10a64a13d4400fed2425f2ad9a6981031544062", size = 86633, upload-time = "2026-02-25T01:10:43.845Z" }, -] - [[package]] name = "grpcio" version = "1.67.1" @@ -1509,15 +1455,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, ] -[[package]] -name = "json5" -version = "0.14.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9c/4b/6f8906aaf67d501e259b0adab4d312945bb7211e8b8d4dcc77c92320edaa/json5-0.14.0.tar.gz", hash = "sha256:b3f492fad9f6cdbced8b7d40b28b9b1c9701c5f561bef0d33b81c2ff433fefcb", size = 52656, upload-time = "2026-03-27T22:50:48.108Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/42/cf027b4ac873b076189d935b135397675dac80cb29acb13e1ab86ad6c631/json5-0.14.0-py3-none-any.whl", hash = "sha256:56cf861bab076b1178eb8c92e1311d273a9b9acea2ccc82c276abf839ebaef3a", size = 36271, upload-time = "2026-03-27T22:50:47.073Z" }, -] - [[package]] name = "jsonpatch" version = "1.33" @@ -1740,19 +1677,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/34/01/9a3f0ff60afcb30383ea9775e9f9a233c0127bad7c786d878f78b487bebb/langsmith-0.6.1-py3-none-any.whl", hash = "sha256:cad1f0a5cb8baf01490d2d90b7515d2cecc31648237bf070d2e6c0e7d58a2079", size = 282977, upload-time = "2026-01-06T20:15:36.579Z" }, ] -[[package]] -name = "latex2sympy2-extended" -version = "1.10.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "antlr4-python3-runtime" }, - { name = "sympy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f4/de/472f9115c14c6f6d8a5889cabe3418283d708bde62ce00402c29441deed4/latex2sympy2_extended-1.10.2.tar.gz", hash = "sha256:41a517ffcc5a140e910a7d1646ce6ff440817e5f9d48fc8279d88bd0925bc389", size = 206188, upload-time = "2025-07-02T15:26:06.225Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/60/dfbbf40e3a371388c0e03ff65b01319b7d4023e883df6d7261125772ffdc/latex2sympy2_extended-1.10.2-py3-none-any.whl", hash = "sha256:f910442c5b02a466c1046f47d05cc5285181068b882399281f30102715337fb7", size = 207855, upload-time = "2025-07-02T15:26:04.88Z" }, -] - [[package]] name = "litellm" version = "1.80.11" @@ -1777,23 +1701,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/97/0b/9e637344f24f3fe0e8039cd2337389fe05e0d31f518bc3e0a5cdbe45784a/litellm-1.80.11-py3-none-any.whl", hash = "sha256:406283d66ead77dc7ff0e0b2559c80e9e497d8e7c2257efb1cb9210a20d09d54", size = 11456346, upload-time = "2025-12-22T12:47:26.469Z" }, ] -[[package]] -name = "llm-sandbox" -version = "0.3.37" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ff/96/fa676c1551ed96a6d4814c1a5ad8561d723be7344e03bc293d9ee53b3db7/llm_sandbox-0.3.37.tar.gz", hash = "sha256:bec2d2d2b6eb5311fd70e4aa6a21e60c3c7e8dee36f89aac47a1fd072485adc6", size = 605861, upload-time = "2026-03-02T06:45:28.168Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/d0/63e594b6ae23ed4b63d57d5e442308f9c70f61f16002da4c41a4436939a6/llm_sandbox-0.3.37-py3-none-any.whl", hash = "sha256:8c0a01a79e8db45bcf709a349aee2983cc3ed22d56f3110ec9fbcdeef43c78f3", size = 106853, upload-time = "2026-03-02T06:45:26.766Z" }, -] - -[package.optional-dependencies] -docker = [ - { name = "docker" }, -] - [[package]] name = "loguru" version = "0.7.3" @@ -1943,18 +1850,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, ] -[[package]] -name = "math-verify" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "latex2sympy2-extended" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/35/b5/b1db6fa6b6c28ebbe1889ee11a4703a72a2ca7750ec415f4559c758cf01a/math_verify-0.8.0.tar.gz", hash = "sha256:3295e0adb94bfe553ff6e3189c44f1916a85aa24ab5d1900f2086a706e28f7c4", size = 60191, upload-time = "2025-07-02T15:52:07.209Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/9f/59979f699b5c97334298f1295bc9fcdc9904d98d2276479bffff863d23b1/math_verify-0.8.0-py3-none-any.whl", hash = "sha256:31ca651296d817a9bb3fd58ca1fd0d192dcea709b1e5ecf2d0a4514c16f89087", size = 29994, upload-time = "2025-07-02T15:52:05.023Z" }, -] - [[package]] name = "matplotlib" version = "3.10.8" @@ -2050,46 +1945,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] -[[package]] -name = "mellea" -version = "0.3.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ansicolors" }, - { name = "click" }, - { name = "fastapi" }, - { name = "granite-common" }, - { name = "huggingface-hub" }, - { name = "jinja2" }, - { name = "json5" }, - { name = "llm-sandbox", extra = ["docker"] }, - { name = "math-verify" }, - { name = "mistletoe" }, - { name = "ollama" }, - { name = "openai" }, - { name = "pillow" }, - { name = "pydantic" }, - { name = "requests" }, - { name = "rouge-score" }, - { name = "typer" }, - { name = "types-requests" }, - { name = "types-tqdm" }, - { name = "uvicorn" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ca/e9/25d87d92064b9781ef3e15d032f6f50deb2ff70b35d3c27f21ff595666ca/mellea-0.3.2.tar.gz", hash = "sha256:b73c5c1da473891e85005042a8e1ac26eae4026f448306884de3c8ba56df1965", size = 3583161, upload-time = "2026-02-26T13:43:30.979Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/d8/4bcf1174810518d7f9fb27ae8d77a9001bd975ef26d24f734f89df051ddc/mellea-0.3.2-py3-none-any.whl", hash = "sha256:ef9beb4b5b3f8c2099d17df592067646ecac2bc47b8f7f8b19951c3acbe37174", size = 3864719, upload-time = "2026-02-26T13:43:29.118Z" }, -] - -[[package]] -name = "mistletoe" -version = "1.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/31/ae/d33647e2a26a8899224f36afc5e7b7a670af30f1fd87231e9f07ca19d673/mistletoe-1.5.1.tar.gz", hash = "sha256:c5571ce6ca9cfdc7ce9151c3ae79acb418e067812000907616427197648030a3", size = 111769, upload-time = "2025-12-07T16:19:01.066Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/60/0980fefdc4d12c18c1bbab9d62852f27aded8839233c7b0a9827aaf395f5/mistletoe-1.5.1-py3-none-any.whl", hash = "sha256:d3e97664798261503f685f6a6281b092628367cf3128fc68a015a993b0c4feb3", size = 55331, upload-time = "2025-12-07T16:18:59.65Z" }, -] - [[package]] name = "more-itertools" version = "10.8.0" @@ -2099,15 +1954,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" }, ] -[[package]] -name = "mpmath" -version = "1.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, -] - [[package]] name = "multidict" version = "6.7.0" @@ -2228,21 +2074,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3d/2e/cf2ffeb386ac3763526151163ad7da9f1b586aac96d2b4f7de1eaebf0c61/narwhals-2.15.0-py3-none-any.whl", hash = "sha256:cbfe21ca19d260d9fd67f995ec75c44592d1f106933b03ddd375df7ac841f9d6", size = 432856, upload-time = "2026-01-06T08:10:11.511Z" }, ] -[[package]] -name = "nltk" -version = "3.9.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "joblib" }, - { name = "regex" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" }, -] - [[package]] name = "nodeenv" version = "1.10.0" @@ -2380,19 +2211,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/73/edeacba3167b1ca66d51b1a5a14697c2c40098b5ffa01811c67b1785a5ab/numpy-2.4.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a39fb973a726e63223287adc6dafe444ce75af952d711e400f3bf2b36ef55a7b", size = 12489376, upload-time = "2025-12-20T16:18:16.524Z" }, ] -[[package]] -name = "ollama" -version = "0.6.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "httpx" }, - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9d/5a/652dac4b7affc2b37b95386f8ae78f22808af09d720689e3d7a86b6ed98e/ollama-0.6.1.tar.gz", hash = "sha256:478c67546836430034b415ed64fa890fd3d1ff91781a9d548b3325274e69d7c6", size = 51620, upload-time = "2025-11-13T23:02:17.416Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/47/4f/4a617ee93d8208d2bcf26b2d8b9402ceaed03e3853c754940e2290fed063/ollama-0.6.1-py3-none-any.whl", hash = "sha256:fc4c984b345735c5486faeee67d8a265214a31cbb828167782dc642ce0a2bf8c", size = 14354, upload-time = "2025-11-13T23:02:16.292Z" }, -] - [[package]] name = "openai" version = "2.14.0" @@ -3606,19 +3424,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" }, ] -[[package]] -name = "rouge-score" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "absl-py" }, - { name = "nltk" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e2/c5/9136736c37022a6ad27fea38f3111eb8f02fe75d067f9a985cc358653102/rouge_score-0.1.2.tar.gz", hash = "sha256:c7d4da2683e68c9abf0135ef915d63a46643666f848e558a1b9f7ead17ff0f04", size = 17400, upload-time = "2022-07-22T22:46:22.909Z" } - [[package]] name = "rpds-py" version = "0.30.0" @@ -4055,18 +3860,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, ] -[[package]] -name = "sympy" -version = "1.14.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mpmath" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, -] - [[package]] name = "tabulate" version = "0.9.0" @@ -4259,7 +4052,6 @@ dependencies = [ { name = "litellm" }, { name = "loguru" }, { name = "markdown" }, - { name = "mellea" }, { name = "pydantic" }, { name = "pyright" }, { name = "pytest" }, @@ -4283,7 +4075,6 @@ requires-dist = [ { name = "litellm", specifier = "<=1.82.6" }, { name = "loguru" }, { name = "markdown", specifier = ">=3.7" }, - { name = "mellea", specifier = "<0.4.0" }, { name = "pydantic", specifier = ">=2.11.0" }, { name = "pyright", specifier = ">=1.1.408" }, { name = "pytest", specifier = ">=8.3.3" }, @@ -4338,30 +4129,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" }, ] -[[package]] -name = "types-requests" -version = "2.33.0.20260327" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/02/5f/2e3dbae6e21be6ae026563bad96cbf76602d73aa85ea09f13419ddbdabb4/types_requests-2.33.0.20260327.tar.gz", hash = "sha256:f4f74f0b44f059e3db420ff17bd1966e3587cdd34062fe38a23cda97868f8dd8", size = 23804, upload-time = "2026-03-27T04:23:38.737Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/55/951e733616c92cb96b57554746d2f65f4464d080cc2cc093605f897aba89/types_requests-2.33.0.20260327-py3-none-any.whl", hash = "sha256:fde0712be6d7c9a4d490042d6323115baf872d9a71a22900809d0432de15776e", size = 20737, upload-time = "2026-03-27T04:23:37.813Z" }, -] - -[[package]] -name = "types-tqdm" -version = "4.67.3.20260303" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "types-requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e1/64/3e7cb0f40c4bf9578098b6873df33a96f7e0de90f3a039e614d22bfde40a/types_tqdm-4.67.3.20260303.tar.gz", hash = "sha256:7bfddb506a75aedb4030fabf4f05c5638c9a3bbdf900d54ec6c82be9034bfb96", size = 18117, upload-time = "2026-03-03T04:03:49.679Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/37/32/e4a1fce59155c74082f1a42d0ffafa59652bfb8cff35b04d56333877748e/types_tqdm-4.67.3.20260303-py3-none-any.whl", hash = "sha256:459decf677e4b05cef36f9012ef8d6e20578edefb6b78c15bd0b546247eda62d", size = 24572, upload-time = "2026-03-03T04:03:48.913Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0"