diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/data_folder/sample_evaluations_score_model_grader_with_audio.wav b/sdk/ai/azure-ai-projects/samples/evaluations/data_folder/sample_evaluations_score_model_grader_with_audio.wav new file mode 100644 index 000000000000..6ce78c4215b3 Binary files /dev/null and b/sdk/ai/azure-ai-projects/samples/evaluations/data_folder/sample_evaluations_score_model_grader_with_audio.wav differ diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_audio.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_audio.py new file mode 100644 index 000000000000..aaed771b3774 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_audio.py @@ -0,0 +1,216 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to use the synchronous + `openai.evals.*` methods to create, get, and list evaluations and eval runs. + + The OpenAI official tutorial is here: https://cookbook.openai.com/examples/evaluation/use-cases/evalsapi_audio_inputs + +USAGE: + python sample_evaluations_score_model_grader_with_audio.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0" azure-identity python-dotenv + + Set these environment variables with your own values: + 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. + 2) AZURE_AI_MODEL_DEPLOYMENT_NAME - Required. The name of the model deployment to use for evaluation. + 3) AZURE_AI_MODEL_DEPLOYMENT_NAME_FOR_AUDIO - Required. The name of the model deployment for audio to use for evaluation, recommend to use "gpt-4o-audio-preview" +""" + +import os +import base64 + +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +import time +from pprint import pprint +from openai.types.evals.create_eval_completions_run_data_source_param import ( + CreateEvalCompletionsRunDataSourceParam, + SourceFileContent, + SourceFileContentContent, + InputMessagesTemplate, + InputMessagesTemplateTemplateEvalItem, +) +from openai.types.responses import EasyInputMessageParam, ResponseInputAudioParam +from openai.types.eval_create_params import DataSourceConfigCustom +from dotenv import load_dotenv + + +load_dotenv() +file_path = os.path.abspath(__file__) +folder_path = os.path.dirname(file_path) + +endpoint = os.getenv("AZURE_AI_PROJECT_ENDPOINT") +model_deployment_name = os.getenv("AZURE_AI_MODEL_DEPLOYMENT_NAME") +model_deployment_name_for_audio = os.getenv("AZURE_AI_MODEL_DEPLOYMENT_NAME_FOR_AUDIO") + +missing_env_vars = [ + name + for name, value in { + "AZURE_AI_PROJECT_ENDPOINT": endpoint, + "AZURE_AI_MODEL_DEPLOYMENT_NAME": model_deployment_name, + "AZURE_AI_MODEL_DEPLOYMENT_NAME_FOR_AUDIO": model_deployment_name_for_audio, + }.items() + if not value +] + +if missing_env_vars: + raise RuntimeError( + "Missing required environment variables for this sample: " + + ", ".join(missing_env_vars) + + ". Please set them as described in the sample docstring." + ) + + +def audio_to_base64(audio_path: str) -> str: + """Read an audio file and return its base64-encoded content.""" + with open(audio_path, "rb") as f: + return base64.b64encode(f.read()).decode() + + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, +): + + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "audio_data": { + "type": "string", + "description": "Base64-encoded WAV audio data." + }, + "expected": { + "type": "string", + "description": "The expected content in the audio." + } + }, + "required": [ + "audio_data", + "expected", + ], + }, + "include_sample_schema": True, + } + ) + + testing_criteria = [ + { + "type": "score_model", + "name": "score_grader", + "model": model_deployment_name, + "input": [ + { + "role": "system", + "content": "You are an audio analyzer. Listen to the audio, return a float score in [0,1] where 1 means the audio has the same meaning as {{item.expected}}.", + }, + { + "role": "user", + "content": "{{sample.output_text}}", + } + ], + "range": [ + 0.0, + 1.0 + ], + "pass_threshold": 0.5, + }, + ] + + print("Creating evaluation") + eval_object = client.evals.create( + name="OpenAI graders test", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore + ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") + + print("Get evaluation by Id") + eval_object_response = client.evals.retrieve(eval_object.id) + print("Evaluation Response:") + pprint(eval_object_response) + + source_file_content_content = SourceFileContentContent( + item={ + "audio_data": audio_to_base64(os.path.join(folder_path, "data_folder/sample_evaluations_score_model_grader_with_audio.wav")), + "expected": "Don't forget a jacket", + }, + ) + source_file_content = SourceFileContent( + type="file_content", + content=[source_file_content_content], + ) + input_messages = InputMessagesTemplate( + type="template", + template=[ + EasyInputMessageParam( + role="system", + content="You are an assistant that can analyze audio input. You will be given an audio input to analyze.", + ), + InputMessagesTemplateTemplateEvalItem( + role="user", + type="message", + content="Listen to the following audio and convert to text.", + ), + InputMessagesTemplateTemplateEvalItem( + role="user", + type="message", + content=ResponseInputAudioParam( + type="input_audio", + input_audio={ + "data": "{{item.audio_data}}", + "format": "wav", + } + ) + ) + ], + ) + + print("Creating Eval Run") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name="Eval", + metadata={"team": "eval-exp", "scenario": "notifications-v1"}, + data_source=CreateEvalCompletionsRunDataSourceParam( + type="completions", + source=source_file_content, + model=model_deployment_name_for_audio, + input_messages=input_messages, + sampling_params={ + "temperature": 0.8, + }, + ), + ) + print(f"Eval Run created (id: {eval_run_object.id}, name: {eval_run_object.name})") + pprint(eval_run_object) + + print("Get Eval Run by Id") + eval_run_response = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + print("Eval Run Response:") + pprint(eval_run_response) + + while True: + run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id) + if run.status == "completed" or run.status == "failed": + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"Eval Run Report URL: {run.report_url}") + + break + time.sleep(5) + print("Waiting for eval run to complete...") + + client.evals.delete(eval_id=eval_object.id) + print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_audio_model_target.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_audio_model_target.py new file mode 100644 index 000000000000..7c038fdec59f --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_audio_model_target.py @@ -0,0 +1,204 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to use the synchronous + `openai.evals.*` methods to create, get, and list evaluations and eval runs. + + The OpenAI official tutorial is here: https://cookbook.openai.com/examples/evaluation/use-cases/evalsapi_audio_inputs + +USAGE: + python sample_evaluations_score_model_grader_with_audio_model_target.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b2" azure-identity python-dotenv + + Set these environment variables with your own values: + 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. + 2) AZURE_AI_MODEL_DEPLOYMENT_NAME - Required. The name of the model deployment to use for evaluation. + 3) AZURE_AI_MODEL_DEPLOYMENT_NAME_FOR_AUDIO - Required. The name of the model deployment for audio to use for evaluation, recommend to use "gpt-4o-audio-preview" +""" + +import os +import base64 + +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +import time +from pprint import pprint +from openai.types.evals.create_eval_completions_run_data_source_param import ( + SourceFileContent, + SourceFileContentContent, + InputMessagesTemplate, + InputMessagesTemplateTemplateEvalItem, +) +from openai.types.responses import EasyInputMessageParam, ResponseInputAudioParam +from openai.types.eval_create_params import DataSourceConfigCustom +from dotenv import load_dotenv + + +load_dotenv() +file_path = os.path.abspath(__file__) +folder_path = os.path.dirname(file_path) + +endpoint = os.environ.get("AZURE_AI_PROJECT_ENDPOINT", "") +model_deployment_name = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "") +model_deployment_name_for_audio = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME_FOR_AUDIO", "") + + +def audio_to_base64(audio_path: str) -> str: + """Read an audio file and return its base64-encoded content.""" + with open(audio_path, "rb") as f: + return base64.b64encode(f.read()).decode() + + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, +): + + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "audio_data": { + "type": "string", + "description": "Base64-encoded WAV audio data." + }, + "expected": { + "type": "string", + "description": "The expected content in the audio." + } + }, + "required": [ + "audio_data", + "expected", + ], + }, + "include_sample_schema": True, + } + ) + + testing_criteria = [ + { + "type": "score_model", + "name": "score_grader", + "model": model_deployment_name, + "input": [ + { + "role": "system", + "content": "You are an audio analyzer. Listen to the audio, return a float score in [0,1] where 1 means the audio has the same meaning as {{item.expected}}.", + }, + { + "role": "user", + "content": "{{sample.output_text}}", + } + ], + "range": [ + 0.0, + 1.0 + ], + "pass_threshold": 0.5, + }, + ] + + print("Creating evaluation") + eval_object = client.evals.create( + name="OpenAI graders test - model target", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore + ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") + + print("Get evaluation by Id") + eval_object_response = client.evals.retrieve(eval_object.id) + print("Evaluation Response:") + pprint(eval_object_response) + + source_file_content_content = SourceFileContentContent( + item={ + "audio_data": audio_to_base64(os.path.join(folder_path, "data_folder/sample_evaluations_score_model_grader_with_audio.wav")), + "expected": "Don't forget a jacket", + }, + ) + source_file_content = SourceFileContent( + type="file_content", + content=[source_file_content_content], + ) + input_messages = InputMessagesTemplate( + type="template", + template=[ + EasyInputMessageParam( + role="system", + content="You are an assistant that can analyze audio input. You will be given an audio input to analyze.", + ), + InputMessagesTemplateTemplateEvalItem( + role="user", + type="message", + content="Listen to the following audio and convert to text.", + ), + InputMessagesTemplateTemplateEvalItem( + role="user", + type="message", + content=ResponseInputAudioParam( + type="input_audio", + input_audio={ + "data": "{{item.audio_data}}", + "format": "wav", + }, + ), + ), + ], + ) + + data_source = { + "type": "azure_ai_target_completions", + "source": source_file_content, + "input_messages": input_messages, + "target": { + "type": "azure_ai_model", + "model": model_deployment_name_for_audio, + "sampling_params": { # Note: model sampling parameters are optional and can differ per model + "top_p": 1.0, + "max_completion_tokens": 5000, + }, + }, + } + + print("Creating Eval Run") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name="Eval", + metadata={"team": "eval-exp", "scenario": "notifications-v1"}, + data_source=data_source, # type: ignore + ) + print(f"Eval Run created (id: {eval_run_object.id}, name: {eval_run_object.name})") + pprint(eval_run_object) + + print("Get Eval Run by Id") + eval_run_response = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + print("Eval Run Response:") + pprint(eval_run_response) + + while True: + run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id) + if run.status == "completed" or run.status == "failed": + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"Eval Run Report URL: {run.report_url}") + + break + time.sleep(5) + print("Waiting for eval run to complete...") + + client.evals.delete(eval_id=eval_object.id) + print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_image_agent_target.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_image_agent_target.py new file mode 100644 index 000000000000..526a98e1f463 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_image_agent_target.py @@ -0,0 +1,201 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to use the synchronous + `openai.evals.*` methods to create, get and list evaluation and eval runs. + +USAGE: + python sample_evaluations_score_model_grader_with_image.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b2" azure-identity python-dotenv Pillow + + Set these environment variables with your own values: + 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. + 2) AZURE_AI_MODEL_DEPLOYMENT_NAME - Required. The name of the model deployment to use for evaluation. +""" + +import os +import base64 +from PIL import Image +from io import BytesIO + +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +import time +from pprint import pprint +from openai.types.evals.create_eval_completions_run_data_source_param import ( + CreateEvalCompletionsRunDataSourceParam, + SourceFileContent, + SourceFileContentContent, + InputMessagesTemplate, + InputMessagesTemplateTemplateEvalItem, + InputMessagesTemplateTemplateEvalItemContentInputImage, +) +from openai.types.responses import EasyInputMessageParam +from openai.types.eval_create_params import DataSourceConfigCustom +from dotenv import load_dotenv + + +load_dotenv() +file_path = os.path.abspath(__file__) +folder_path = os.path.dirname(file_path) + +endpoint = os.environ.get("FOUNDRY_PROJECT_ENDPOINT", "") +model_deployment_name = os.environ.get("FOUNDRY_MODEL_NAME", "") +agent_name = os.environ.get("AGENT_NAME", "") +agent_version = os.environ.get("AGENT_VERSION", "") + + +def image_to_data_uri(image_path: str) -> str: + with Image.open(image_path) as img: + buffered = BytesIO() + img.save(buffered, format=img.format or "PNG") + img_str = base64.b64encode(buffered.getvalue()).decode() + mime_type = f"image/{img.format.lower()}" if img.format else "image/png" + return f"data:{mime_type};base64,{img_str}" + + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, +): + + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "image_url": {"type": "string", "description": "The URL of the image to be evaluated."}, + "caption": {"type": "string", "description": "The caption describing the image."}, + }, + "required": [ + "image_url", + "caption", + ], + }, + "include_sample_schema": True, + } + ) + + testing_criteria = [ + { + "type": "score_model", + "name": "score_grader", + "model": model_deployment_name, + "input": [ + { + "role": "system", + "content": "You are an expert grader. Judge how well the model response {{sample.output_text}} describes the image as well as matches the caption {{item.caption}}. Output a score of 1 if it's an excellent match with both. If it's somewhat compatible, output a score around 0.5. Otherwise, give a score of 0.", + }, + { + "role": "user", + "content": { + "type": "input_image", + "image_url": "{{item.image_url}}", + "detail": "auto", + }, + }, + ], + "range": [0.0, 1.0], + "pass_threshold": 0.5, + }, + ] + + print("Creating evaluation") + eval_object = client.evals.create( + name="OpenAI graders test - model target", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore + ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") + + print("Get evaluation by Id") + eval_object_response = client.evals.retrieve(eval_object.id) + print("Evaluation Response:") + pprint(eval_object_response) + + image_path = os.path.join(folder_path, "data_folder/sample_evaluations_score_model_grader_with_image.jpg") + source_file_content_content1 = SourceFileContentContent( + item={ + "image_url": image_to_data_uri(image_path), + "caption": "industrial plants in the distance at night", + }, + ) + source_file_content_content2 = SourceFileContentContent( + item={ + "image_url": "https://ep1.pinkbike.org/p4pb6973204/p4pb6973204.jpg", + "caption": "all shots by by person and rider shots can be found on his website.", + }, + ) + source_file_content = SourceFileContent( + type="file_content", + content=[source_file_content_content1, source_file_content_content2], + ) + input_messages = InputMessagesTemplate( + type="template", + template=[ + EasyInputMessageParam( + role="system", + content="You are an assistant that analyzes images and provides captions that accurately describe the content of the image.", + ), + InputMessagesTemplateTemplateEvalItem( + role="user", + type="message", + content=InputMessagesTemplateTemplateEvalItemContentInputImage( + type="input_image", + image_url="{{item.image_url}}", + detail="auto", + ), + ), + ], + ) + + data_source = { + "type": "azure_ai_target_completions", + "source": source_file_content, + "input_messages": input_messages, + "target": { + "type": "azure_ai_agent", + "name": agent_name, + "version": agent_version, # Version is optional. Defaults to latest version if not specified + }, + } + + print("Creating Eval Run") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name="Eval", + metadata={"team": "eval-exp", "scenario": "notifications-v1"}, + data_source=data_source # type: ignore + ) + print(f"Eval Run created (id: {eval_run_object.id}, name: {eval_run_object.name})") + pprint(eval_run_object) + + print("Get Eval Run by Id") + eval_run_response = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + print("Eval Run Response:") + pprint(eval_run_response) + + while True: + run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id) + if run.status == "completed" or run.status == "failed": + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"Eval Run Report URL: {run.report_url}") + + break + time.sleep(5) + print("Waiting for eval run to complete...") + + client.evals.delete(eval_id=eval_object.id) + print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_image_model_target.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_image_model_target.py new file mode 100644 index 000000000000..3ab96504a2a8 --- /dev/null +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluations_score_model_grader_with_image_model_target.py @@ -0,0 +1,214 @@ +# pylint: disable=line-too-long,useless-suppression +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +DESCRIPTION: + Given an AIProjectClient, this sample demonstrates how to use the synchronous + `openai.evals.*` methods to create, get and list evaluation and eval runs. + +USAGE: + python sample_evaluations_score_model_grader_with_image_model_target.py + + Before running the sample: + + pip install "azure-ai-projects>=2.0.0b2" azure-identity python-dotenv Pillow + + Set these environment variables with your own values: + 1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your + Microsoft Foundry project. It has the form: https://.services.ai.azure.com/api/projects/. + 2) AZURE_AI_MODEL_DEPLOYMENT_NAME - Required. The name of the model deployment to use for evaluation. +""" + +import os +import base64 +from PIL import Image +from io import BytesIO + +from azure.identity import DefaultAzureCredential +from azure.ai.projects import AIProjectClient +import time +from pprint import pprint +from openai.types.evals.create_eval_completions_run_data_source_param import ( + SourceFileContent, + SourceFileContentContent, + InputMessagesTemplate, + InputMessagesTemplateTemplateEvalItem, + InputMessagesTemplateTemplateEvalItemContentInputImage, +) +from openai.types.responses import EasyInputMessageParam +from openai.types.eval_create_params import DataSourceConfigCustom +from dotenv import load_dotenv + + +load_dotenv() +file_path = os.path.abspath(__file__) +folder_path = os.path.dirname(file_path) + +endpoint = os.environ.get("AZURE_AI_PROJECT_ENDPOINT") +model_deployment_name = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME") + +if not endpoint: + raise EnvironmentError( + "AZURE_AI_PROJECT_ENDPOINT environment variable is required but was not found or is empty. " + "Set it to your Azure AI Project endpoint, e.g. " + "https://.services.ai.azure.com/api/projects/." + ) + +if not model_deployment_name: + raise EnvironmentError( + "AZURE_AI_MODEL_DEPLOYMENT_NAME environment variable is required but was not found or is empty. " + "Set it to the name of the model deployment to use for evaluation." + ) + + +def image_to_data_uri(image_path: str) -> str: + with Image.open(image_path) as img: + buffered = BytesIO() + img.save(buffered, format=img.format or "PNG") + img_str = base64.b64encode(buffered.getvalue()).decode() + mime_type = f"image/{img.format.lower()}" if img.format else "image/png" + return f"data:{mime_type};base64,{img_str}" + + +with ( + DefaultAzureCredential() as credential, + AIProjectClient(endpoint=endpoint, credential=credential) as project_client, + project_client.get_openai_client() as client, +): + + data_source_config = DataSourceConfigCustom( + { + "type": "custom", + "item_schema": { + "type": "object", + "properties": { + "image_url": {"type": "string", "description": "The URL of the image to be evaluated."}, + "caption": {"type": "string", "description": "The caption describing the image."}, + }, + "required": [ + "image_url", + "caption", + ], + }, + "include_sample_schema": True, + } + ) + + testing_criteria = [ + { + "type": "score_model", + "name": "score_grader", + "model": model_deployment_name, + "input": [ + { + "role": "system", + "content": "You are an expert grader. Judge how well the model response {{sample.output_text}} describes the image as well as matches the caption {{item.caption}}. Output a score of 1 if it's an excellent match with both. If it's somewhat compatible, output a score around 0.5. Otherwise, give a score of 0.", + }, + { + "role": "user", + "content": { + "type": "input_image", + "image_url": "{{item.image_url}}", + "detail": "auto", + }, + }, + ], + "range": [0.0, 1.0], + "pass_threshold": 0.5, + }, + ] + + print("Creating evaluation") + eval_object = client.evals.create( + name="OpenAI graders test - agent target", + data_source_config=data_source_config, + testing_criteria=testing_criteria, # type: ignore + ) + print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})") + + print("Get evaluation by Id") + eval_object_response = client.evals.retrieve(eval_object.id) + print("Evaluation Response:") + pprint(eval_object_response) + + image_path = os.path.join(folder_path, "data_folder/sample_evaluations_score_model_grader_with_image.jpg") + source_file_content_content1 = SourceFileContentContent( + item={ + "image_url": image_to_data_uri(image_path), + "caption": "industrial plants in the distance at night", + }, + ) + source_file_content_content2 = SourceFileContentContent( + item={ + "image_url": "https://ep1.pinkbike.org/p4pb6973204/p4pb6973204.jpg", + "caption": "all shots by person and rider shots can be found on his website.", + }, + ) + source_file_content = SourceFileContent( + type="file_content", + content=[source_file_content_content1, source_file_content_content2], + ) + input_messages = InputMessagesTemplate( + type="template", + template=[ + EasyInputMessageParam( + role="system", + content="You are an assistant that analyzes images and provides captions that accurately describe the content of the image.", + ), + InputMessagesTemplateTemplateEvalItem( + role="user", + type="message", + content=InputMessagesTemplateTemplateEvalItemContentInputImage( + type="input_image", + image_url="{{item.image_url}}", + detail="auto", + ), + ), + ], + ) + + data_source = { + "type": "azure_ai_target_completions", + "source": source_file_content, + "input_messages": input_messages, + "target": { + "type": "azure_ai_model", + "model": model_deployment_name, + "sampling_params": { # Note: model sampling parameters are optional and can differ per model + "top_p": 1.0, + "max_completion_tokens": 5000, + }, + }, + } + + print("Creating Eval Run") + eval_run_object = client.evals.runs.create( + eval_id=eval_object.id, + name="Eval", + metadata={"team": "eval-exp", "scenario": "notifications-v1"}, + data_source=data_source, # type: ignore + ) + print(f"Eval Run created (id: {eval_run_object.id}, name: {eval_run_object.name})") + pprint(eval_run_object) + + print("Get Eval Run by Id") + eval_run_response = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id) + print("Eval Run Response:") + pprint(eval_run_response) + + while True: + run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id) + if run.status == "completed" or run.status == "failed": + output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) + pprint(output_items) + print(f"Eval Run Report URL: {run.report_url}") + + break + time.sleep(5) + print("Waiting for eval run to complete...") + + client.evals.delete(eval_id=eval_object.id) + print("Evaluation deleted")