Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions functioncalling_finetuning/evals/evaluation_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,10 @@ def load_hermes_dataset(max_samples=500):
return samples


async def get_model_response_async(model, system_prompt, user_query, semaphore):
async def get_model_response_async(model, system_prompt, user_query, semaphore, client):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For better code clarity and maintainability, please add a type hint for the new client parameter. This function currently lacks type hints, and adding them, starting with the new parameter, would improve readability.

Suggested change
async def get_model_response_async(model, system_prompt, user_query, semaphore, client):
async def get_model_response_async(model, system_prompt, user_query, semaphore, client: AsyncOpenAI):

"""Get OpenAI model response asynchronously with rate limiting."""

async with semaphore:
client = AsyncOpenAI(
api_key=API_KEY,
base_url=BASE_URL, # Adjust if using a different endpoint
)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
Expand Down Expand Up @@ -346,12 +342,12 @@ def print_model_summary(model, results):
print(f"Format Valid: {format_valid:.3f}")


async def process_single_sample(sample: Dict[str, Any], model: str, semaphore: asyncio.Semaphore) -> Dict[str, Any]:
async def process_single_sample(sample: Dict[str, Any], model: str, semaphore: asyncio.Semaphore, client) -> Dict[str, Any]:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The new client parameter is missing a type hint. Since other parameters in this function are type-hinted, adding one for client would maintain consistency and improve code clarity.

Suggested change
async def process_single_sample(sample: Dict[str, Any], model: str, semaphore: asyncio.Semaphore, client) -> Dict[str, Any]:
async def process_single_sample(sample: Dict[str, Any], model: str, semaphore: asyncio.Semaphore, client: AsyncOpenAI) -> Dict[str, Any]:

"""Process a single sample asynchronously."""

# Get model response
response = await get_model_response_async(
model, sample["system_prompt"], sample["user_query"], semaphore
model, sample["system_prompt"], sample["user_query"], semaphore, client
)

# print(f"Sample {sample['id']} response: {response}")
Expand Down Expand Up @@ -386,6 +382,12 @@ async def run_benchmark_async(eval_samples: List[Dict[str, Any]], models: List[s

all_results = []

# Create shared client for all requests
client = AsyncOpenAI(
api_key=API_KEY,
base_url=BASE_URL
)
Comment on lines +385 to +389

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The newly created shared AsyncOpenAI client is not being closed after use. This can lead to resource leaks (like unclosed connections), which undermines the goal of this PR. It's best practice to use the client as an asynchronous context manager with async with to ensure it's always closed properly. The subsequent for loop (starting on line 391) should be indented to be within this async with block.

    async with AsyncOpenAI(
        api_key=API_KEY,
        base_url=BASE_URL
    ) as client:


for model in models:
print(f"\nEvaluating {model}...")
start_time = time.time()
Expand All @@ -397,7 +399,7 @@ async def run_benchmark_async(eval_samples: List[Dict[str, Any]], models: List[s
tasks = []
for sample in eval_samples:
task = asyncio.create_task(
process_single_sample(sample, model, semaphore)
process_single_sample(sample, model, semaphore, client)
)
tasks.append(task)

Expand Down