diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7ebc7667..76094019 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,8 @@ v3.8.1 *Release date: In development* +- Add token usage tracking to OpenAI requests + v3.8.0 ------ diff --git a/toolium/test/utils/ai_utils/test_openai_usage.py b/toolium/test/utils/ai_utils/test_openai_usage.py new file mode 100644 index 00000000..5f304a5b --- /dev/null +++ b/toolium/test/utils/ai_utils/test_openai_usage.py @@ -0,0 +1,129 @@ +""" +Copyright 2026 Telefónica Innovación Digital, S.L. +This file is part of Toolium. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +from unittest import mock + +import pytest + +from toolium.test.utils.ai_utils.common import ( + configure_default_openai_model, # noqa: F401, fixture needed to set the OpenAI model for all tests in this module +) +from toolium.utils.ai_utils.openai import openai_request + + +def _build_mock_completion(content='test response', prompt_tokens=10, completion_tokens=5, total_tokens=15): + """Build a mock OpenAI completion object with usage data""" + usage_dump = { + 'prompt_tokens': prompt_tokens, + 'completion_tokens': completion_tokens, + 'total_tokens': total_tokens, + 'completion_tokens_details': { + 'accepted_prediction_tokens': 0, + 'audio_tokens': 0, + 'reasoning_tokens': 0, + 'rejected_prediction_tokens': 0, + }, + 'prompt_tokens_details': { + 'audio_tokens': 0, + 'cached_tokens': 0, + }, + } + + mock_usage = mock.MagicMock() + mock_usage.prompt_tokens = prompt_tokens + mock_usage.completion_tokens = completion_tokens + mock_usage.total_tokens = total_tokens + mock_usage.model_dump.return_value = usage_dump + + mock_message = mock.MagicMock() + mock_message.content = content + + mock_choice = mock.MagicMock() + mock_choice.message = mock_message + + mock_completion = mock.MagicMock() + mock_completion.choices = [mock_choice] + mock_completion.usage = mock_usage + return mock_completion + + +@mock.patch('toolium.utils.ai_utils.openai.OpenAI') +def test_openai_request_returns_token_usage(mock_openai_class): + mock_client = mock.MagicMock() + mock_openai_class.return_value = mock_client + mock_client.chat.completions.create.return_value = _build_mock_completion( + content='hello', prompt_tokens=20, completion_tokens=10, total_tokens=30 + ) + + response, token_usage = openai_request('system', 'user') + + assert response == 'hello' + assert token_usage['prompt_tokens'] == 20 + assert token_usage['completion_tokens'] == 10 + assert token_usage['total_tokens'] == 30 + assert 'completion_tokens_details' in token_usage + assert 'prompt_tokens_details' in token_usage + + +@mock.patch('toolium.utils.ai_utils.openai.OpenAI') +def test_openai_request_returns_empty_token_usage_when_no_usage(mock_openai_class): + mock_client = mock.MagicMock() + mock_openai_class.return_value = mock_client + mock_completion = _build_mock_completion() + mock_completion.usage = None + mock_client.chat.completions.create.return_value = mock_completion + + response, token_usage = openai_request('system', 'user') + + assert response == 'test response' + assert token_usage == {} + + +@mock.patch('toolium.utils.ai_utils.openai.OpenAI') +def test_openai_request_with_response_format_returns_token_usage(mock_openai_class): + mock_client = mock.MagicMock() + mock_openai_class.return_value = mock_client + + mock_parsed = mock.MagicMock() + mock_message = mock.MagicMock() + mock_message.parsed = mock_parsed + mock_choice = mock.MagicMock() + mock_choice.message = mock_message + mock_completion = _build_mock_completion(prompt_tokens=50, completion_tokens=25, total_tokens=75) + mock_completion.choices = [mock_choice] + mock_client.beta.chat.completions.parse.return_value = mock_completion + + response, token_usage = openai_request('system', 'user', response_format=mock.MagicMock()) + + assert response is mock_parsed + assert token_usage['prompt_tokens'] == 50 + assert token_usage['completion_tokens'] == 25 + assert token_usage['total_tokens'] == 75 + + +@pytest.mark.skipif(not os.getenv('AZURE_OPENAI_API_KEY'), reason='AZURE_OPENAI_API_KEY environment variable not set') +def test_openai_request_returns_token_usage_with_azure(): + response, token_usage = openai_request('You are a helpful assistant.', 'Say hello.', azure=True) + + assert isinstance(response, str) + assert len(response) > 0 + assert token_usage['prompt_tokens'] > 0 + assert token_usage['completion_tokens'] > 0 + assert token_usage['total_tokens'] > 0 + assert 'completion_tokens_details' in token_usage + assert 'prompt_tokens_details' in token_usage diff --git a/toolium/utils/ai_utils/evaluate_answer.py b/toolium/utils/ai_utils/evaluate_answer.py index 3bc664f1..623822f6 100644 --- a/toolium/utils/ai_utils/evaluate_answer.py +++ b/toolium/utils/ai_utils/evaluate_answer.py @@ -116,7 +116,7 @@ def get_answer_evaluation_with_openai( if response_format: kwargs['response_format'] = response_format - response = openai_request(system_message, user_message, model_name, azure, **kwargs) + response, _ = openai_request(system_message, user_message, model_name, azure, **kwargs) try: if response_format and hasattr(response, 'similarity'): diff --git a/toolium/utils/ai_utils/openai.py b/toolium/utils/ai_utils/openai.py index 6c20d7a7..0974f16c 100644 --- a/toolium/utils/ai_utils/openai.py +++ b/toolium/utils/ai_utils/openai.py @@ -38,7 +38,7 @@ def openai_request(system_message, user_message, model_name=None, azure=False, * :param model_name: name of the model to use :param azure: whether to use Azure OpenAI or standard OpenAI :param kwargs: additional parameters to be passed to the OpenAI client (azure_endpoint, timeout, etc.) - :returns: response from OpenAI + :returns: tuple with response from OpenAI and token usage dict """ if OpenAI is None: raise ImportError("OpenAI is not installed. Please run 'pip install toolium[ai]' to use OpenAI features") @@ -67,5 +67,14 @@ def openai_request(system_message, user_message, model_name=None, azure=False, * else: completion = client.chat.completions.create(model=model_name, messages=messages) response = completion.choices[0].message.content + token_usage = {} + if completion.usage: + token_usage = completion.usage.model_dump() + logger.info( + 'OpenAI token usage: prompt_tokens=%d, completion_tokens=%d, total_tokens=%d', + completion.usage.prompt_tokens, + completion.usage.completion_tokens, + completion.usage.total_tokens, + ) logger.debug('OpenAI response: %s', response) - return response + return response, token_usage diff --git a/toolium/utils/ai_utils/text_analysis.py b/toolium/utils/ai_utils/text_analysis.py index 0a4535ba..89900421 100644 --- a/toolium/utils/ai_utils/text_analysis.py +++ b/toolium/utils/ai_utils/text_analysis.py @@ -83,7 +83,8 @@ def get_text_criteria_analysis(text_input, text_criteria, model_name=None, azure """ # Build prompt using base prompt and target features system_message = build_system_message(text_criteria) - return openai_request(system_message, text_input, model_name, azure, **kwargs) + response, _ = openai_request(system_message, text_input, model_name, azure, **kwargs) + return response def assert_text_criteria(text_input, text_criteria, threshold, model_name=None, azure=False, **kwargs): diff --git a/toolium/utils/ai_utils/text_similarity.py b/toolium/utils/ai_utils/text_similarity.py index 0390beb0..9ff7d1f3 100644 --- a/toolium/utils/ai_utils/text_similarity.py +++ b/toolium/utils/ai_utils/text_similarity.py @@ -102,7 +102,7 @@ def get_text_similarity_with_openai(text, expected_text, model_name=None, azure= ' but its meaning should be similar.' ) user_message = f'The expected answer is: {expected_text}. The LLM answer is: {text}.' - response = openai_request(system_message, user_message, model_name, azure, **kwargs) + response, _ = openai_request(system_message, user_message, model_name, azure, **kwargs) try: response = json.loads(response) similarity = float(response['similarity'])