Reproduce Code in Nestjs
import { Inject, Injectable } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import type { Cache } from 'cache-manager';
import { createAiGateway } from 'ai-gateway-provider';
import { createAzure } from 'ai-gateway-provider/providers/azure';
import { wrapLanguageModel } from 'ai';
import { createHash } from 'crypto';
import { CACHE_MANAGER } from '@nestjs/cache-manager';
import { EnvironmentVariables } from 'src/system/config';
export enum AIModel {
GPT_4_1 = 'GPT_4_1',
GPT_5_1_MINI = 'GPT_5_1_MINI',
GPT_5_2 = 'GPT_5_2',
GPT_5 = 'GPT_5',
}
@Injectable()
export class VercelAIService {
private readonly aiGateway: ReturnType<typeof createAiGateway>;
private readonly azure: ReturnType<typeof createAzure>;
private readonly deploymentNames: Record<AIModel, string>;
constructor(
private configService: ConfigService<EnvironmentVariables>,
@Inject(CACHE_MANAGER) private readonly cacheManager: Cache,
) {
// Initialize Cloudflare AI Gateway
this.aiGateway = createAiGateway({
accountId: this.configService.getOrThrow<string>(
'CLOUDFLARE_AI_GATEWAY_ACCOUNT_ID',
),
gateway: this.configService.getOrThrow<string>(
'CLOUDFLARE_AI_GATEWAY_ID',
),
apiKey: this.configService.getOrThrow<string>(
'CLOUDFLARE_AI_GATEWAY_TOKEN',
),
});
// Initialize Azure OpenAI provider with Cloudflare AI Gateway
this.azure = createAzure({
resourceName: this.configService.getOrThrow<string>(
'AZURE_OPENAI_RESOURCE_NAME',
),
apiKey: this.configService.getOrThrow<string>('AZURE_OPENAI_API_KEY'),
useDeploymentBasedUrls: true,
apiVersion: this.configService.getOrThrow<string>(
'AZURE_OPENAI_API_VERSION',
),
});
this.deploymentNames = {
[AIModel.GPT_4_1]: this.configService.getOrThrow<string>(
'AZURE_OPENAI_GPT41_DEPLOYMENT_NAME',
),
[AIModel.GPT_5_1_MINI]: this.configService.getOrThrow<string>(
'AZURE_OPENAI_GPT51_MINI_DEPLOYMENT_NAME',
),
[AIModel.GPT_5_2]: this.configService.getOrThrow<string>(
'AZURE_OPENAI_GPT52_DEPLOYMENT_NAME',
),
[AIModel.GPT_5]: this.configService.getOrThrow<string>(
'AZURE_OPENAI_GPT5_DEPLOYMENT_NAME',
),
};
}
getModel(model: AIModel): ReturnType<typeof wrapLanguageModel> {
const deploymentName =
this.deploymentNames[model] || this.deploymentNames[AIModel.GPT_4_1];
// Use .chat() for chat completions endpoint
const baseModel = this.azure.chat(deploymentName);
// Route through Cloudflare AI Gateway
const gatewayModel = this.aiGateway(baseModel);
return gatewayModel;
}
}
Explanation:
The above code works. However, when we change this.azure.chat to this.azure.responses, azure returns
{
"error": {
"code": "404",
"message": "Resource not found"
}
}
On chat completion api, we are not able to set maxCompletionToken on a chat api, so we run into
{
"error": {
"message": "Could not finish the message because max_tokens or model output limit was reached. Please try again with higher max_tokens.",
"type": "invalid_request_error",
"param": null,
"code": null
}
}
(Note that max_tokens is not supported on GPT5 model.)
The way for it to work is to enable responses api, and call the azure no deployment specific endpoint from cloudflare server. See AI SDK code (!options.useDeploymentBasedUrls codepath)
const url = ({ path, modelId }: { path: string; modelId: string }) => {
const baseUrlPrefix =
options.baseURL ?? `https://${getResourceName()}.openai.azure.com/openai`;
let fullUrl: URL;
if (options.useDeploymentBasedUrls) {
// Use deployment-based format for compatibility with certain Azure OpenAI models
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path}`);
} else {
// Use v1 API format - no deployment ID in URL
fullUrl = new URL(`${baseUrlPrefix}/v1${path}`);
}
fullUrl.searchParams.set('api-version', apiVersion);
return fullUrl.toString();
};
Reproduce Code in Nestjs
Explanation:
The above code works. However, when we change
this.azure.chattothis.azure.responses, azure returnsOn chat completion api, we are not able to set maxCompletionToken on a chat api, so we run into
(Note that max_tokens is not supported on GPT5 model.)
The way for it to work is to enable responses api, and call the azure no deployment specific endpoint from cloudflare server. See AI SDK code (!options.useDeploymentBasedUrls codepath)