From 84f6bed2e1b061a4ba4a2be69d21e78fc89bfdfd Mon Sep 17 00:00:00 2001 From: lmcalvo Date: Mon, 6 Apr 2026 12:55:38 +0200 Subject: [PATCH 1/4] Add token usage tracking to OpenAI requests --- .../test/utils/ai_utils/test_openai_usage.py | 109 ++++++++++++++++++ toolium/utils/ai_utils/evaluate_answer.py | 2 +- toolium/utils/ai_utils/openai.py | 17 ++- toolium/utils/ai_utils/text_analysis.py | 3 +- toolium/utils/ai_utils/text_similarity.py | 2 +- 5 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 toolium/test/utils/ai_utils/test_openai_usage.py diff --git a/toolium/test/utils/ai_utils/test_openai_usage.py b/toolium/test/utils/ai_utils/test_openai_usage.py new file mode 100644 index 00000000..efca115d --- /dev/null +++ b/toolium/test/utils/ai_utils/test_openai_usage.py @@ -0,0 +1,109 @@ +""" +Copyright 2026 Telefónica Innovación Digital, S.L. +This file is part of Toolium. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +from unittest import mock + +import pytest + +from toolium.test.utils.ai_utils.common import ( + configure_default_openai_model, # noqa: F401, fixture needed to set the OpenAI model for all tests in this module +) +from toolium.utils.ai_utils.openai import openai_request + + +def _build_mock_completion(content='test response', prompt_tokens=10, completion_tokens=5, total_tokens=15): + """Build a mock OpenAI completion object with usage data""" + mock_usage = mock.MagicMock() + mock_usage.prompt_tokens = prompt_tokens + mock_usage.completion_tokens = completion_tokens + mock_usage.total_tokens = total_tokens + + mock_message = mock.MagicMock() + mock_message.content = content + + mock_choice = mock.MagicMock() + mock_choice.message = mock_message + + mock_completion = mock.MagicMock() + mock_completion.choices = [mock_choice] + mock_completion.usage = mock_usage + return mock_completion + + +@mock.patch('toolium.utils.ai_utils.openai.OpenAI') +def test_openai_request_returns_token_usage(mock_openai_class): + mock_client = mock.MagicMock() + mock_openai_class.return_value = mock_client + mock_client.chat.completions.create.return_value = _build_mock_completion( + content='hello', prompt_tokens=20, completion_tokens=10, total_tokens=30 + ) + + response, token_usage = openai_request('system', 'user') + + assert response == 'hello' + assert token_usage == {'prompt_tokens': 20, 'completion_tokens': 10, 'total_tokens': 30} + + +@mock.patch('toolium.utils.ai_utils.openai.OpenAI') +def test_openai_request_returns_empty_token_usage_when_no_usage(mock_openai_class): + mock_client = mock.MagicMock() + mock_openai_class.return_value = mock_client + mock_completion = _build_mock_completion() + mock_completion.usage = None + mock_client.chat.completions.create.return_value = mock_completion + + response, token_usage = openai_request('system', 'user') + + assert response == 'test response' + assert token_usage == {} + + +@mock.patch('toolium.utils.ai_utils.openai.OpenAI') +def test_openai_request_with_response_format_returns_token_usage(mock_openai_class): + mock_client = mock.MagicMock() + mock_openai_class.return_value = mock_client + + mock_parsed = mock.MagicMock() + mock_message = mock.MagicMock() + mock_message.parsed = mock_parsed + mock_choice = mock.MagicMock() + mock_choice.message = mock_message + mock_completion = mock.MagicMock() + mock_completion.choices = [mock_choice] + mock_usage = mock.MagicMock() + mock_usage.prompt_tokens = 50 + mock_usage.completion_tokens = 25 + mock_usage.total_tokens = 75 + mock_completion.usage = mock_usage + mock_client.beta.chat.completions.parse.return_value = mock_completion + + response, token_usage = openai_request('system', 'user', response_format=mock.MagicMock()) + + assert response is mock_parsed + assert token_usage == {'prompt_tokens': 50, 'completion_tokens': 25, 'total_tokens': 75} + + +@pytest.mark.skipif(not os.getenv('AZURE_OPENAI_API_KEY'), reason='AZURE_OPENAI_API_KEY environment variable not set') +def test_openai_request_returns_token_usage_with_azure(): + response, token_usage = openai_request('You are a helpful assistant.', 'Say hello.', azure=True) + + assert isinstance(response, str) + assert len(response) > 0 + assert token_usage['prompt_tokens'] > 0 + assert token_usage['completion_tokens'] > 0 + assert token_usage['total_tokens'] > 0 diff --git a/toolium/utils/ai_utils/evaluate_answer.py b/toolium/utils/ai_utils/evaluate_answer.py index 3bc664f1..623822f6 100644 --- a/toolium/utils/ai_utils/evaluate_answer.py +++ b/toolium/utils/ai_utils/evaluate_answer.py @@ -116,7 +116,7 @@ def get_answer_evaluation_with_openai( if response_format: kwargs['response_format'] = response_format - response = openai_request(system_message, user_message, model_name, azure, **kwargs) + response, _ = openai_request(system_message, user_message, model_name, azure, **kwargs) try: if response_format and hasattr(response, 'similarity'): diff --git a/toolium/utils/ai_utils/openai.py b/toolium/utils/ai_utils/openai.py index 6c20d7a7..9116cda3 100644 --- a/toolium/utils/ai_utils/openai.py +++ b/toolium/utils/ai_utils/openai.py @@ -38,7 +38,7 @@ def openai_request(system_message, user_message, model_name=None, azure=False, * :param model_name: name of the model to use :param azure: whether to use Azure OpenAI or standard OpenAI :param kwargs: additional parameters to be passed to the OpenAI client (azure_endpoint, timeout, etc.) - :returns: response from OpenAI + :returns: tuple with response from OpenAI and token usage dict """ if OpenAI is None: raise ImportError("OpenAI is not installed. Please run 'pip install toolium[ai]' to use OpenAI features") @@ -67,5 +67,18 @@ def openai_request(system_message, user_message, model_name=None, azure=False, * else: completion = client.chat.completions.create(model=model_name, messages=messages) response = completion.choices[0].message.content + token_usage = {} + if completion.usage: + token_usage = { + 'prompt_tokens': completion.usage.prompt_tokens, + 'completion_tokens': completion.usage.completion_tokens, + 'total_tokens': completion.usage.total_tokens, + } + logger.info( + 'OpenAI token usage: prompt_tokens=%d, completion_tokens=%d, total_tokens=%d', + completion.usage.prompt_tokens, + completion.usage.completion_tokens, + completion.usage.total_tokens, + ) logger.debug('OpenAI response: %s', response) - return response + return response, token_usage diff --git a/toolium/utils/ai_utils/text_analysis.py b/toolium/utils/ai_utils/text_analysis.py index 0a4535ba..89900421 100644 --- a/toolium/utils/ai_utils/text_analysis.py +++ b/toolium/utils/ai_utils/text_analysis.py @@ -83,7 +83,8 @@ def get_text_criteria_analysis(text_input, text_criteria, model_name=None, azure """ # Build prompt using base prompt and target features system_message = build_system_message(text_criteria) - return openai_request(system_message, text_input, model_name, azure, **kwargs) + response, _ = openai_request(system_message, text_input, model_name, azure, **kwargs) + return response def assert_text_criteria(text_input, text_criteria, threshold, model_name=None, azure=False, **kwargs): diff --git a/toolium/utils/ai_utils/text_similarity.py b/toolium/utils/ai_utils/text_similarity.py index 0390beb0..9ff7d1f3 100644 --- a/toolium/utils/ai_utils/text_similarity.py +++ b/toolium/utils/ai_utils/text_similarity.py @@ -102,7 +102,7 @@ def get_text_similarity_with_openai(text, expected_text, model_name=None, azure= ' but its meaning should be similar.' ) user_message = f'The expected answer is: {expected_text}. The LLM answer is: {text}.' - response = openai_request(system_message, user_message, model_name, azure, **kwargs) + response, _ = openai_request(system_message, user_message, model_name, azure, **kwargs) try: response = json.loads(response) similarity = float(response['similarity']) From ba8bab2907525d36c3cda0ba5130be453a295f21 Mon Sep 17 00:00:00 2001 From: lmcalvo Date: Mon, 6 Apr 2026 13:10:49 +0200 Subject: [PATCH 2/4] Use model_dump() for complete token usage details --- .../test/utils/ai_utils/test_openai_usage.py | 36 ++++++++++++++----- toolium/utils/ai_utils/openai.py | 6 +--- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/toolium/test/utils/ai_utils/test_openai_usage.py b/toolium/test/utils/ai_utils/test_openai_usage.py index efca115d..5f304a5b 100644 --- a/toolium/test/utils/ai_utils/test_openai_usage.py +++ b/toolium/test/utils/ai_utils/test_openai_usage.py @@ -28,10 +28,27 @@ def _build_mock_completion(content='test response', prompt_tokens=10, completion_tokens=5, total_tokens=15): """Build a mock OpenAI completion object with usage data""" + usage_dump = { + 'prompt_tokens': prompt_tokens, + 'completion_tokens': completion_tokens, + 'total_tokens': total_tokens, + 'completion_tokens_details': { + 'accepted_prediction_tokens': 0, + 'audio_tokens': 0, + 'reasoning_tokens': 0, + 'rejected_prediction_tokens': 0, + }, + 'prompt_tokens_details': { + 'audio_tokens': 0, + 'cached_tokens': 0, + }, + } + mock_usage = mock.MagicMock() mock_usage.prompt_tokens = prompt_tokens mock_usage.completion_tokens = completion_tokens mock_usage.total_tokens = total_tokens + mock_usage.model_dump.return_value = usage_dump mock_message = mock.MagicMock() mock_message.content = content @@ -56,7 +73,11 @@ def test_openai_request_returns_token_usage(mock_openai_class): response, token_usage = openai_request('system', 'user') assert response == 'hello' - assert token_usage == {'prompt_tokens': 20, 'completion_tokens': 10, 'total_tokens': 30} + assert token_usage['prompt_tokens'] == 20 + assert token_usage['completion_tokens'] == 10 + assert token_usage['total_tokens'] == 30 + assert 'completion_tokens_details' in token_usage + assert 'prompt_tokens_details' in token_usage @mock.patch('toolium.utils.ai_utils.openai.OpenAI') @@ -83,19 +104,16 @@ def test_openai_request_with_response_format_returns_token_usage(mock_openai_cla mock_message.parsed = mock_parsed mock_choice = mock.MagicMock() mock_choice.message = mock_message - mock_completion = mock.MagicMock() + mock_completion = _build_mock_completion(prompt_tokens=50, completion_tokens=25, total_tokens=75) mock_completion.choices = [mock_choice] - mock_usage = mock.MagicMock() - mock_usage.prompt_tokens = 50 - mock_usage.completion_tokens = 25 - mock_usage.total_tokens = 75 - mock_completion.usage = mock_usage mock_client.beta.chat.completions.parse.return_value = mock_completion response, token_usage = openai_request('system', 'user', response_format=mock.MagicMock()) assert response is mock_parsed - assert token_usage == {'prompt_tokens': 50, 'completion_tokens': 25, 'total_tokens': 75} + assert token_usage['prompt_tokens'] == 50 + assert token_usage['completion_tokens'] == 25 + assert token_usage['total_tokens'] == 75 @pytest.mark.skipif(not os.getenv('AZURE_OPENAI_API_KEY'), reason='AZURE_OPENAI_API_KEY environment variable not set') @@ -107,3 +125,5 @@ def test_openai_request_returns_token_usage_with_azure(): assert token_usage['prompt_tokens'] > 0 assert token_usage['completion_tokens'] > 0 assert token_usage['total_tokens'] > 0 + assert 'completion_tokens_details' in token_usage + assert 'prompt_tokens_details' in token_usage diff --git a/toolium/utils/ai_utils/openai.py b/toolium/utils/ai_utils/openai.py index 9116cda3..0974f16c 100644 --- a/toolium/utils/ai_utils/openai.py +++ b/toolium/utils/ai_utils/openai.py @@ -69,11 +69,7 @@ def openai_request(system_message, user_message, model_name=None, azure=False, * response = completion.choices[0].message.content token_usage = {} if completion.usage: - token_usage = { - 'prompt_tokens': completion.usage.prompt_tokens, - 'completion_tokens': completion.usage.completion_tokens, - 'total_tokens': completion.usage.total_tokens, - } + token_usage = completion.usage.model_dump() logger.info( 'OpenAI token usage: prompt_tokens=%d, completion_tokens=%d, total_tokens=%d', completion.usage.prompt_tokens, From 08664e606e11b7e1d9656ae42323638da2233916 Mon Sep 17 00:00:00 2001 From: lmcalvo Date: Mon, 6 Apr 2026 14:35:22 +0200 Subject: [PATCH 3/4] Changelog update --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7ebc7667..7d44ff25 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Toolium Changelog v3.8.1 ------ +- Add token usage tracking to OpenAI requests + *Release date: In development* v3.8.0 From e53ab523c4e0e507d2116da3784959d7b2308704 Mon Sep 17 00:00:00 2001 From: lmcalvo Date: Tue, 7 Apr 2026 12:35:35 +0200 Subject: [PATCH 4/4] CHANGELOG update --- CHANGELOG.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7d44ff25..76094019 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,10 +4,10 @@ Toolium Changelog v3.8.1 ------ -- Add token usage tracking to OpenAI requests - *Release date: In development* +- Add token usage tracking to OpenAI requests + v3.8.0 ------