From 0342290a898dcfc7c03089e7988036d82daecc27 Mon Sep 17 00:00:00 2001 From: chan000518 Date: Sat, 4 Oct 2025 11:32:50 +0900 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20LLM=20=EB=AA=A8=EB=93=88?= =?UTF-8?q?=ED=99=94=20=EB=B0=8F=20Gemini=20=ED=86=B5=ED=95=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TASK.md | 211 +++++++++++++++++++++++--- package.json | 1 + src/config.ts | 5 +- src/controllers/ai.controller.ts | 4 +- src/llm/index.ts | 28 ++++ src/llm/model-registry.ts | 12 ++ src/llm/providers/gemini.ts | 75 +++++++++ src/llm/providers/openai-responses.ts | 158 +++++++++++++++++++ src/llm/types.ts | 33 ++++ src/services/qa.service.ts | 75 ++++----- src/types/ai.types.ts | 13 ++ 11 files changed, 555 insertions(+), 60 deletions(-) create mode 100644 src/llm/index.ts create mode 100644 src/llm/model-registry.ts create mode 100644 src/llm/providers/gemini.ts create mode 100644 src/llm/providers/openai-responses.ts create mode 100644 src/llm/types.ts diff --git a/TASK.md b/TASK.md index 803edc7..3bb0eca 100644 --- a/TASK.md +++ b/TASK.md @@ -1,23 +1,188 @@ -## 리팩토링 계획 - -### Phase 1: Repository 계층 도입 (데이터 로직 분리) - -1. **[Repo] `src/repositories` 디렉토리 생성**: 데이터베이스 쿼리 로직을 모아둘 디렉토리를 생성합니다. -2. **[Repo] `post.repository.ts` 생성 및 이전**: - * `blog_post`, `post_chunks`, `post_title_embeddings` 테이블 관련 쿼리를 이 파일로 옮깁니다. - * `qa.service.ts`의 `findPostById`, `findSimilarChunks` 로직을 이전합니다. - * `embedding.service.ts`의 `storeTitleEmbedding`, `storeContentEmbeddings` 로직을 이전합니다. -3. **[Repo] `persona.repository.ts` 생성 및 이전**: - * `persona` 테이블 관련 쿼리를 이 파일로 옮깁니다. - * `qa.service.ts`의 `getSpeechTonePrompt` 내부 DB 조회 로직을 `findPersonaById`와 같은 함수로 분리하여 이전합니다. -4. **[Service] 서비스 계층 수정**: - * `qa.service.ts`와 `embedding.service.ts`가 DB에 직접 접근하는 대신, 새로 만든 Repository의 함수를 호출하도록 코드를 수정합니다. - -### Phase 2: 프롬프트 관리 분리 - -5. **[Prompt] `src/prompts` 디렉토리 생성**: 프롬프트 템플릿을 관리할 디렉토리를 생성합니다. -6. **[Prompt] `qa.prompts.ts` 파일 생성**: - * `qa.service.ts`에 하드코딩된 시스템 프롬프트와 사용자 메시지 생성 로직을 이 파일로 옮깁니다. - * `createRagPrompt`, `createPostContextPrompt`와 같이 동적으로 프롬프트를 생성하는 함수를 만듭니다. -7. **[Service] `qa.service.ts` 수정**: - * `qa.prompts.ts`에서 프롬프트 생성 함수를 가져와(import) 사용하도록 수정합니다. \ No newline at end of file +## 작업 계획 + +작업 순서 +- 1) LLM 모듈화(퍼사드/프로바이더/모델 레지스트리) + GPT-5 mini 기본 적용 +- 2) Gemini 도입: 병행 사용(전용) + 대체 사용(퍼사드) +- 3) 토큰 카운트 및 비용 로깅 추가(양 프로바이더 공통) + +참고: 아래 문서의 섹션 순서와 무관하게 실제 구현 순서는 위의 "작업 순서"를 따릅니다. + + +### 1) LLM 모듈화(퍼사드/프로바이더/모델 레지스트리) + GPT-5 mini 기본 적용 + +목적: LLM 호출을 모듈화하여 옵션 기반으로 모델/프로바이더를 교체 가능하게 만들고, 기본 모델을 `gpt-5-mini`로 전환합니다. 토크나이저/가격표는 3단계에서 처리합니다. + +1. [구조] 파일/모듈 구성 + - 디렉토리: `src/llm/` + - `src/llm/types.ts` — 공통 인터페이스 정의 + - `GenerateRequest`: `{ provider?: 'openai'|'gemini', model?: string, messages?: OpenAIStyleMessages, contents?: GeminiStyleContents, stream?: boolean, tools?, options?: { temperature?, top_p?, max_output_tokens?, reasoning?, text? }, meta?: { userId?, categoryId?, postId? } }` + - `GenerateStream`: `onToken(text)`, `onToolCall(json)`, `onEnd()`, `onError(err)`(또는 AsyncIterable) + - `src/llm/modelRegistry.ts` — 모델 레지스트리/기본값 + - 논리 모델 키 → `{ provider, modelId, defaults, tokenizerKey?, pricingKey? }` + - 기본값: `defaultChat = { provider: 'openai', modelId: 'gpt-5-mini' }` + - `src/llm/providers/openaiResponses.ts` — OpenAI Responses API 구현 + - `src/llm/providers/gemini.ts` — @google/gemini 구현 + - `src/llm/index.ts` — 퍼사드: `generate(req: GenerateRequest): GenerateStream` 선택 라우팅 + - 기존 서비스(`qa.service.ts`)는 퍼사드만 사용하도록 변경 + +2. [기본 모델] GPT-5 mini 적용(Responses API) + - `src/config.ts`의 `CHAT_MODEL` 기본값을 `gpt-5-mini`로 변경 + - OpenAI 경로: `openai.responses.create/stream`로 마이그레이션(SSE 어댑터 포함) + - 기존 Chat Completions 경로는 임시 백업/옵션으로 유지 가능(필요 시) + +3. [옵션 기반 모델/프로바이더 선택] + - 요청 바디에 `llm?: { provider?: 'openai'|'gemini', model?: string, options?: {...} }` 허용 + - 미지정 시 레지스트리의 기본값 사용(`gpt-5-mini` on OpenAI) + - 향후 기능(Reasoning/Text 옵션, tool calls, timeout 등) 확장 용이 + +4. [검증/수용 기준] + - `/ai/ask` SSE 정상 동작(중단/지연 없음) + - 기존 프롬프트/툴 호출이 동일하게 동작(필요 시 어댑터) + - 로그/오류 처리 기존 수준 유지 + +### 2) Gemini 도입: 병행 사용(전용) + 대체 사용(퍼사드) + +목적: Gemini를 독립 엔드포인트로 직접 쓰는 경로와, 기존 GPT 경로의 대체 제공자로 모두 사용할 수 있게 합니다(퍼사드 경유). 이후 3단계에서 토큰/비용 로깅을 공통 적용합니다. + +1. [Config] Gemini 키/모델 설정 + - `.env` + - `GEMINI_API_KEY=...` + - `GEMINI_CHAT_MODEL=gemini-2.5-flash` (예: 변경 가능) + - `src/config.ts`에 항목 반영 및 기본값/검증 추가(Provider 고정 ENV는 사용하지 않음) + +2. [Provider] 퍼사드에 Gemini 구현 추가 + - 1단계에서 만든 LLM 퍼사드(`src/llm/index.ts`)에 Gemini 프로바이더를 추가 + - 구현 위치: `src/llm/providers/gemini.ts` (OpenAI 구현은 `src/llm/providers/openaiResponses.ts`) + - 퍼사드 인터페이스로 라우팅되어 기존 `qa.service.ts`는 퍼사드만 사용(교체 투명) + +3. [Gemini 호출] @google/genai SDK 적용 및 스트리밍 + - 의존성: `@google/genai` 추가 (설치 커맨드: `npm i @google/genai`) + - 클라이언트: `import { GoogleGenAI } from "@google/genai"; const ai = new GoogleGenAI({});` (`GEMINI_API_KEY`는 환경변수에서 자동 주입) + - 비스트리밍(우선 적용): + - `ai.models.generateContent({ model: GEMINI_CHAT_MODEL, contents, config: { thinkingConfig: { thinkingBudget }}})` + - 기본값으로 `thinkingBudget=0`(생각 비활성화) 적용, `.env`에서 오버라이드 가능 + - 응답 텍스트를 한번에 수신한 뒤 SSE로 순차 chunk 분할하여 `answer` 이벤트로 전송(간단 구현) + - 스트리밍(선택 적용): + - SDK 제공 시 스트리밍 API 사용(예: `generateContentStream` 유사 기능)으로 델타를 받아 즉시 SSE로 전달 + - SDK에서 미지원일 경우, 비스트리밍으로 우선 릴리즈 후 스트리밍 전환 + - (옵션) Safety 설정, generationConfig(temperature/topP/maxOutputTokens) 파라미터는 설정값으로 노출 + +5. [토큰 카운팅] Gemini 대응 + - 사전 카운트(가능 시): SDK의 토큰 카운트 API(`tokens:count`/`countTokens`)가 제공되면 이를 사용해 프롬프트 토큰 계산 → 비용 선로깅 + - 네트워크 요청이므로 로깅 토글이 켜져 있을 때만 수행하도록 옵션화 + - 사후 카운트: 응답 텍스트 기준 동일 API로 출력 토큰 계산(또는 비가용 시 근사치) + - 폴백 전략: 카운트 API가 불가한 환경에서는 근사치 사용(문자수/4), 추후 정확도 개선 시 교체 + +6. [가격 정책] Gemini 추가 + - `src/config/pricing.ts`의 `PRICING_TABLE`에 Gemini 모델(`gemini-2.5-flash`, 임베딩 모델 등) 단가 추가 + - 동일한 `calcCost`, `formatCost` 로직 재사용 + +7. [생각(Thinking) 설정] 기본 비활성화 + - Gemini 2.5 Flash의 생각 기능은 응답 품질 대신 비용/지연이 증가하므로 기본 `thinkingBudget=0`으로 비활성화 + - `.env`에 `GEMINI_THINKING_BUDGET`를 두어 필요 시 활성화(정수값) + +8. [도구/함수 호출] 호환성 계획(선택) + - 현재 OpenAI `tool_calls`를 사용 중. Gemini는 `functionDeclarations`/`toolConfig` 형태로 유사 기능 제공 + - 1단계: Gemini 경로에서는 도구 호출 비활성화(빠른 도입) + - 2단계: 필요 시 `report_content_insufficient`를 Gemini `functionDeclarations`로 매핑하여 동일 동작 구현 + +9. [Wiring] 사용 패턴 + - 독립 사용(A): `POST /ai/gemini/ask`로 직접 호출(옵션: thinkingBudget 등) + - 대체 사용(B): 기존 `POST /ai/ask`에 `llm.provider?: 'openai'|'gemini'`, `llm.model?` 허용 → 퍼사드가 라우팅 + - 로깅 시 `provider` 필드를 포함(3단계에서 적용) + +10. [검증/수용 기준] + - OpenAI/Gemini 각각에서 동일한 SSE 응답 형식으로 동작 + - 요청 전/후 토큰·비용 로그가 두 프로바이더 모두에서 출력 + - 로깅 토글이 정상 작동, 스트리밍 성능 저하 없음 + +설정 확정 +- `GEMINI_CHAT_MODEL=gemini-2.5-flash` +- `GEMINI_THINKING_BUDGET=0` (기본값으로 비활성화) + +### 토큰 카운트 및 비용 로깅 추가 + +목적: LLM에 요청을 보내기 직전에 프롬프트(메시지) 토큰 수를 계산해 예상 입력 비용을 콘솔로 로깅하고, 스트리밍 응답 완료 후 실제 출력 토큰 수 기반 최종 비용을 추가 로깅합니다. 초기에는 `console.log`만 사용합니다. + +1. [Utils] 토크나이저 유틸 추가 + - 파일: `src/utils/tokenizer.ts` + - 내용: + - `getTokenizerForModel(model: string)` → 모델명에 따라 적절한 인코딩을 선택 + - `gpt-5*` → 자료 제공 전까지 임시로 `o200k_base` 사용(TBD, 전환 시 교체) + - `gpt-4o`, `gpt-4o-mini`, 기타 `o`계열 → `o200k_base` + - `countTextTokens(text: string, model: string): number` + - `countChatMessagesTokens(messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[], model: string): number` + - 메시지 `content`들을 토크나이즈하여 합산하고, 채팅 포맷 오버헤드(메시지당 소량, 모델별 상수)를 보정치로 가산 + - 주의: 보정치는 근사치이며, 정확한 정산은 응답 토큰 합산으로 후처리 + - 비고: 이미 프로젝트에 `@dqbd/tiktoken`이 포함되어 있으므로 이를 사용합니다. + +2. [Config] 가격 정책 맵 구조 설계 (임시 하드코딩 + ENV 오버라이드) + - 파일: `src/config/pricing.ts` + - 내용: + - `export type Pricing = { input_per_1k: number; output_per_1k: number; cached_input_per_1k?: number; currency: 'USD' | 'KRW' }` + - `PRICING_TABLE: Record`: 모델명 키에 따른 단가 설정 + - 선택: `LLM_PRICING_OVERRIDES`(JSON) 환경변수로 런타임 오버라이드 허용 + - 초기값은 사용자 제공 정책으로 채울 예정. 제공 전까지는 로깅에 `N/A` 표기 또는 0 처리. + + - 초기 PRICING_TABLE(제공 정책 반영, 단위: per 1K tokens, 통화: USD) + - `gpt-5`: { input_per_1k: 0.00125, cached_input_per_1k: 0.000125, output_per_1k: 0.01, currency: 'USD' } + - `gpt-5-mini`: { input_per_1k: 0.00025, cached_input_per_1k: 0.000025, output_per_1k: 0.002, currency: 'USD' } + - `gpt-5-nano`: { input_per_1k: 0.00005, cached_input_per_1k: 0.000005, output_per_1k: 0.0004, currency: 'USD' } + +3. [Utils] 비용 계산 유틸 추가 + - 파일: `src/utils/cost.ts` + - 내용: + - `getModelPricing(model: string): Pricing | null` + - `calcCost(tokens: number, per_1k: number): number` → 반올림 1~4자리(옵션) + - 화폐 표기 함수(선택): `formatCost(amount: number, currency: string)` + +4. [Facade] LLM 퍼사드에 비용 로깅 통합 + - 위치: `src/llm/index.ts` 퍼사드 내부에서 공통 로깅 수행 + - 기능 흐름(공통): + 1) 요청 전: 메시지/콘텐츠 토큰 카운트 → `promptTokens` + - OpenAI: `countChatMessagesTokens`(토크나이저) + - Gemini: `countTokens` API 가능 시 사용(불가 시 근사치) + 2) 단가 조회: `getModelPricing(model)` → `estInputCost` + 3) 선로깅: `{type:'llm.request', provider, model, promptTokens, estInputCost, corrId, userId, categoryId, postId}` + 4) 실제 호출: 등록된 프로바이더(OpenAI Responses 또는 Gemini)로 위임, 스트림은 그대로 중계 + 5) 스트림 종료 후: 출력 텍스트/함수인자 토큰 합산 → `completionTokens` + 6) 비용 계산: 입력/출력(+cached 입력이 있으면 분리) → `totalCost` + 7) 후로깅: `{type:'llm.response', provider, model, promptTokens, completionTokens, inputCost, outputCost, totalCost, durationMs, corrId, cachedInputTokens}` + - 주의: 기존 SSE 흐름(이벤트명/포맷) 불변 유지. 퍼사드는 원본 델타를 그대로 전달. + - 상관관계 ID(`corrId`)는 `uuid` 생성(또는 요청별 식별자 전달 시 사용). + +5. [Wiring] `qa.service.ts`에서 퍼사드 사용 + - 기존 직접 호출부를 LLM 퍼사드로 교체(`generate(req)`) + - 요청 바디의 `llm` 옵션을 퍼사드에 그대로 전달(provider/model/options) + - 출력 토큰 카운트/비용 로깅은 퍼사드 내부에서 처리 + +6. [옵션] 임베딩 호출 비용 로깅(확장) + - 파일: `src/services/embedding.service.ts` + - `createEmbeddings` 호출 직전 `input` 텍스트 전체 토큰 수 계산(`countTextTokens` 누적) → 입력 비용 로깅 + - 임베딩 모델 단가(`text-embedding-3-*`)도 `PRICING_TABLE`에 포함 + +7. [환경변수] 로깅 토글 및 라운딩 + - `.env` 키 추가(기본값은 off) + - `LLM_COST_LOG=true|false` (기본: true로 해도 무방) + - `LLM_COST_ROUND=2` (소수점 자리수, 선택) + - 로깅은 토글 꺼져 있으면 수행하지 않음 + +8. [로그 포맷] 예시(JSON 라인) + - 요청 전: `{ "type": "llm.request", "corrId": "...", "provider": "openai", "model": "gpt-5-mini", "promptTokens": 1234, "estInputCost": 0.00031, "userId": "...", "categoryId": 1, "postId": 42 }` + - 응답 후: `{ "type": "llm.response", "corrId": "...", "provider": "openai", "model": "gpt-5-mini", "promptTokens": 1234, "completionTokens": 456, "inputCost": 0.00031, "outputCost": 0.00091, "totalCost": 0.00122, "durationMs": 987, "cachedInputTokens": 0 }` + +9. [검증/수용 기준] + - `POST /ai/ask` 호출 시 콘솔에 요청 전/후 로그 각각 1회 출력 + - 모델/프롬프트/토큰 수/예상 비용/총 비용/시간(ms)이 포함되어야 함 + - 로깅 on/off 토글 동작, 라운딩 반영 확인 + - 기존 SSE 동작(끊김/지연) 변화 없음 + +10. [주의/한계] + - 채팅 포맷 오버헤드는 모델별로 상이하며 근사치 사용. 최종 비용은 출력 토큰 카운트까지 반영해 오차 최소화 + - 스트리밍 API는 서버에서 사용량 메타를 즉시 제공하지 않으므로(비스트리밍과 달리), 응답 텍스트 기반 자체 카운트 수행 + - 함수 호출(tool_calls) 토큰은 인자 길이에 비례하여 증가. 누적 텍스트/인자 기반으로 동일하게 카운트 + - Cached Input 과금: 제공 API에서 캐시 히트 토큰 정보를 명시적으로 제공하는 경우에만 `cachedInputTokens`로 분리 산정. 그렇지 않으면 일반 입력으로 계산(보수적) + +11. [다음 단계(선택)] + - `console.log` → 구조화 로거(Pino/Winston)로 교체, 샘플링·보존 기간 설정 + - DB 또는 시계열(예: ClickHouse/Prometheus) 적재로 사용자별 비용 대시보드 구성 diff --git a/package.json b/package.json index 712d202..2911813 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ }, "dependencies": { "@dqbd/tiktoken": "^1.0.13", + "@google/genai": "^0.2.0", "cors": "^2.8.5", "dotenv": "^16.4.5", "express": "^4.19.2", diff --git a/src/config.ts b/src/config.ts index 18993cc..1be73a1 100644 --- a/src/config.ts +++ b/src/config.ts @@ -12,7 +12,10 @@ const configSchema = z.object({ TOKEN_AUDIENCE: z.string().default('bubblog'), ALGORITHM: z.string().default('HS256'), EMBED_MODEL: z.string().default('text-embedding-3-small'), - CHAT_MODEL: z.string().default('gpt-4o'), + CHAT_MODEL: z.string().default('gpt-5-mini'), + GEMINI_API_KEY: z.string().optional(), + GEMINI_CHAT_MODEL: z.string().default('gemini-2.5-flash'), + GEMINI_THINKING_BUDGET: z.string().optional(), }); const config = configSchema.parse(process.env); diff --git a/src/controllers/ai.controller.ts b/src/controllers/ai.controller.ts index e6ab029..1b9a593 100644 --- a/src/controllers/ai.controller.ts +++ b/src/controllers/ai.controller.ts @@ -49,13 +49,13 @@ export const askHandler = async ( next: NextFunction ) => { try { - const { question, user_id, category_id, speech_tone, post_id } = req.body; + const { question, user_id, category_id, speech_tone, post_id, llm } = req.body as any; res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Cache-Control', 'no-cache'); res.setHeader('Connection', 'keep-alive'); - const stream = await answerStream(question, user_id, category_id, speech_tone, post_id); + const stream = await answerStream(question, user_id, category_id, speech_tone, post_id, llm); stream.pipe(res); } catch (error) { diff --git a/src/llm/index.ts b/src/llm/index.ts new file mode 100644 index 0000000..9c39531 --- /dev/null +++ b/src/llm/index.ts @@ -0,0 +1,28 @@ +import { PassThrough } from 'stream'; +import { GenerateRequest } from './types'; +import { getDefaultChat } from './model-registry'; +import { generateOpenAIStream } from './providers/openai-responses'; +import { generateGeminiStream } from './providers/gemini'; + +export const generate = async (req: GenerateRequest): Promise => { + const merged = { ...req }; + if (!merged.provider || !merged.model) { + const def = getDefaultChat(); + merged.provider = merged.provider || def.provider; + merged.model = merged.model || def.modelId; + } + + if (merged.provider === 'openai') { + return generateOpenAIStream(merged); + } + + if (merged.provider === 'gemini') { + return generateGeminiStream(merged); + } + + const stream = new PassThrough(); + stream.write(`event: error\n`); + stream.write(`data: ${JSON.stringify({ message: 'Unknown provider' })}\n\n`); + stream.end(); + return stream; +}; diff --git a/src/llm/model-registry.ts b/src/llm/model-registry.ts new file mode 100644 index 0000000..14cbdfe --- /dev/null +++ b/src/llm/model-registry.ts @@ -0,0 +1,12 @@ +import { ProviderName } from './types'; + +type ModelEntry = { + provider: ProviderName; + modelId: string; +}; + +// Minimal registry for now; can expand with tokenizer/pricing later. +const DEFAULT_CHAT: ModelEntry = { provider: 'openai', modelId: 'gpt-5-mini' }; + +export const getDefaultChat = (): ModelEntry => DEFAULT_CHAT; + diff --git a/src/llm/providers/gemini.ts b/src/llm/providers/gemini.ts new file mode 100644 index 0000000..a399317 --- /dev/null +++ b/src/llm/providers/gemini.ts @@ -0,0 +1,75 @@ +import { PassThrough } from 'stream'; +import config from '../../config'; +import { GenerateRequest } from '../types'; + +// Using @google/genai per project plan; keep types loose for compatibility +// eslint-disable-next-line @typescript-eslint/no-var-requires +const { GoogleGenAI } = require('@google/genai'); + +const buildPromptFromMessages = (messages: { role: string; content: string }[]) => { + // Simple concatenation preserving roles + return messages + .map((m) => `[${m.role}]\n${m.content}`) + .join('\n\n'); +}; + +export const generateGeminiStream = async (req: GenerateRequest): Promise => { + const stream = new PassThrough(); + try { + const modelId = req.model || process.env.GEMINI_CHAT_MODEL || 'gemini-2.5-flash'; + const apiKey = (config as any).GEMINI_API_KEY || process.env.GEMINI_API_KEY; + if (!apiKey) { + stream.write(`event: error\n`); + stream.write(`data: ${JSON.stringify({ message: 'Gemini API key not configured' })}\n\n`); + stream.end(); + return stream; + } + + const ai = new GoogleGenAI({ apiKey }); + + const text = buildPromptFromMessages(req.messages || []); + + const generationConfig: any = {}; + if (req.options?.temperature != null) generationConfig.temperature = req.options.temperature; + if (req.options?.top_p != null) generationConfig.topP = req.options.top_p; + if (req.options?.max_output_tokens != null) generationConfig.maxOutputTokens = req.options.max_output_tokens; + + const thinkingBudget = parseInt(process.env.GEMINI_THINKING_BUDGET || '0', 10) || 0; + const configBlock: any = thinkingBudget > 0 ? { thinkingConfig: { thinkingBudget } } : {}; + + // Non-streaming first, then chunk SSE + const result = await ai.models.generateContent({ + model: modelId, + contents: [ + { + role: 'user', + parts: [{ text }], + }, + ], + generationConfig, + config: configBlock, + }); + + // Try common text access paths + const outputText = (result?.response?.text && result.response.text()) || (result?.text && result.text()) || ''; + + const finalText = typeof outputText === 'string' ? outputText : ''; + + const chunkSize = 400; + for (let i = 0; i < finalText.length; i += chunkSize) { + const chunk = finalText.slice(i, i + chunkSize); + stream.write(`event: answer\n`); + stream.write(`data: ${JSON.stringify(chunk)}\n\n`); + } + stream.write(`event: end\n`); + stream.write(`data: [DONE]\n\n`); + stream.end(); + return stream; + } catch (err) { + stream.write(`event: error\n`); + stream.write(`data: ${JSON.stringify({ message: 'Internal server error' })}\n\n`); + stream.end(); + return stream; + } +}; + diff --git a/src/llm/providers/openai-responses.ts b/src/llm/providers/openai-responses.ts new file mode 100644 index 0000000..1100db6 --- /dev/null +++ b/src/llm/providers/openai-responses.ts @@ -0,0 +1,158 @@ +import { PassThrough } from 'stream'; +import OpenAI from 'openai'; +import config from '../../config'; +import { GenerateRequest, OpenAIStyleMessage, OpenAIStyleTool } from '../types'; + +const openai = new OpenAI({ apiKey: config.OPENAI_API_KEY }); + +const toResponsesInput = (messages: OpenAIStyleMessage[] = []) => { + // Convert simple chat-style messages to Responses API input format + return messages.map((m) => ({ + role: m.role, + content: [{ type: 'text', text: m.content }], + })); +}; + +export const generateOpenAIStream = async (req: GenerateRequest): Promise => { + const stream = new PassThrough(); + const model = req.model || 'gpt-5-mini'; + const messages = req.messages || []; + const tools = (req.tools || []) as unknown as OpenAI.Responses.ResponseCreateParams['tools']; + + // For gpt-5-* prefer Responses API. For other models, fall back to Chat Completions streaming. + const isGpt5Family = /(^|\b)gpt-5/i.test(model); + + try { + if (isGpt5Family) { + // Prefer Responses API streaming for gpt-5 + try { + const responsesStream: any = await (openai as any).responses.stream({ + model, + input: toResponsesInput(messages) as any, + tools: tools as any, + temperature: req.options?.temperature, + top_p: req.options?.top_p, + max_output_tokens: req.options?.max_output_tokens, + }); + + responsesStream.on('response.output_text.delta', (delta: string) => { + if (delta) { + stream.write(`event: answer\n`); + stream.write(`data: ${JSON.stringify(delta)}\n\n`); + } + }); + + // Stream tool-call arguments as answer chunks to maintain SSE shape + responsesStream.on('response.tool_call.delta', (toolDelta: any) => { + const argsDelta = toolDelta?.arguments_delta || toolDelta?.arguments || ''; + if (argsDelta) { + stream.write(`event: answer\n`); + stream.write(`data: ${JSON.stringify(argsDelta)}\n\n`); + } + }); + + responsesStream.on('response.completed', () => { + stream.write(`event: end\n`); + stream.write(`data: [DONE]\n\n`); + stream.end(); + }); + + responsesStream.on('error', (e: any) => { + stream.write(`event: error\n`); + stream.write(`data: ${JSON.stringify({ message: 'Internal server error' })}\n\n`); + stream.end(); + }); + + // Ensure the stream starts and we await its completion + await responsesStream.done(); + return stream; + } catch (e) { + // Fallback to non-streaming Responses if streaming path fails + try { + const response = await openai.responses.create({ + model, + input: toResponsesInput(messages) as any, + // Avoid tools in non-streaming mode to ensure text output + temperature: req.options?.temperature, + top_p: req.options?.top_p, + max_output_tokens: req.options?.max_output_tokens, + }); + const text = (response as any).output_text ?? ''; + const answerText = typeof text === 'string' ? text : ''; + const fallbackText = (() => { + try { + const outputs = (response as any).output || []; + if (Array.isArray(outputs) && outputs.length > 0) { + const parts = outputs + .flatMap((o: any) => o.content || []) + .filter((c: any) => c.type === 'output_text') + .map((c: any) => c.text) + .join(''); + return parts || ''; + } + } catch { + // ignore + } + return ''; + })(); + const finalText = answerText || fallbackText; + const chunkSize = 400; + for (let i = 0; i < finalText.length; i += chunkSize) { + const chunk = finalText.slice(i, i + chunkSize); + stream.write(`event: answer\n`); + stream.write(`data: ${JSON.stringify(chunk)}\n\n`); + } + stream.write(`event: end\n`); + stream.write(`data: [DONE]\n\n`); + stream.end(); + return stream; + } catch (e2) { + // fall through to chat completions streaming below + } + } + } + + // Chat Completions streaming as universal fallback + const chatStream = await openai.chat.completions.create({ + model, + messages: messages as any, + tools: (req.tools as OpenAI.Chat.Completions.ChatCompletionTool[]) || undefined, + tool_choice: req.tools && req.tools.length > 0 ? 'auto' : undefined, + stream: true, + temperature: req.options?.temperature, + top_p: req.options?.top_p, + max_tokens: req.options?.max_output_tokens as any, + }); + + for await (const chunk of chatStream) { + const content = chunk.choices[0]?.delta?.content || ''; + const toolCalls = chunk.choices[0]?.delta?.tool_calls; + + if (toolCalls) { + for (const toolCall of toolCalls) { + if (toolCall.function?.arguments) { + stream.write(`event: answer\n`); + stream.write(`data: ${JSON.stringify(toolCall.function.arguments)}\n\n`); + } + } + } else if (content) { + stream.write(`event: answer\n`); + stream.write(`data: ${JSON.stringify(content)}\n\n`); + } + + if (chunk.choices[0]?.finish_reason) { + stream.write(`event: end\n`); + stream.write(`data: [DONE]\n\n`); + stream.end(); + break; + } + } + + return stream; + } catch (err) { + stream.write(`event: error\n`); + stream.write(`data: ${JSON.stringify({ message: 'Internal server error' })}\n\n`); + stream.end(); + return stream; + } +}; diff --git a/src/llm/types.ts b/src/llm/types.ts new file mode 100644 index 0000000..b17bc54 --- /dev/null +++ b/src/llm/types.ts @@ -0,0 +1,33 @@ +export type ProviderName = 'openai' | 'gemini'; + +export type OpenAIStyleMessage = { + role: 'system' | 'user' | 'assistant' | 'tool' | 'function'; + content: string; +}; + +export type OpenAIStyleTool = { + type: 'function'; + function: { + name: string; + description?: string; + parameters?: Record; + }; +}; + +export type GenerateRequest = { + provider?: ProviderName; + model?: string; + messages?: OpenAIStyleMessage[]; + tools?: OpenAIStyleTool[]; + options?: { + temperature?: number; + top_p?: number; + max_output_tokens?: number; + }; + meta?: { + userId?: string; + categoryId?: number; + postId?: number; + }; +}; + diff --git a/src/services/qa.service.ts b/src/services/qa.service.ts index 2361bbc..549a919 100644 --- a/src/services/qa.service.ts +++ b/src/services/qa.service.ts @@ -1,14 +1,10 @@ import { createEmbeddings } from './embedding.service'; import { PassThrough } from 'stream'; -import OpenAI from 'openai'; import config from '../config'; import * as postRepository from '../repositories/post.repository'; import * as personaRepository from '../repositories/persona.repository'; import * as qaPrompts from '../prompts/qa.prompts'; - -const openai = new OpenAI({ - apiKey: config.OPENAI_API_KEY, -}); +import { generate } from '../llm'; const preprocessContent = (content: string): string => { const plainText = content.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim(); @@ -27,20 +23,40 @@ const getSpeechTonePrompt = async (speechTone: number, userId: string): Promise< return "간결하고 명확한 말투로 답변해"; // Default } +type LlmOverride = { + provider?: 'openai' | 'gemini'; + model?: string; + options?: { temperature?: number; top_p?: number; max_output_tokens?: number }; +}; + export const answerStream = async ( question: string, userId: string, categoryId?: number, speechTone: number = -1, - postId?: number + postId?: number, + llm?: LlmOverride ): Promise => { const stream = new PassThrough(); - let messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = []; - let tools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined = undefined; + let messages: { role: 'system' | 'user' | 'assistant' | 'tool' | 'function'; content: string }[] = []; + let tools: + | { + type: 'function'; + function: { name: string; description?: string; parameters?: Record }; + }[] + | undefined = undefined; (async () => { const speechTonePrompt = await getSpeechTonePrompt(speechTone, userId); + const toSimpleMessages = ( + raw: any[] + ): { role: 'system' | 'user' | 'assistant' | 'tool' | 'function'; content: string }[] => { + return (raw || []).map((m: any) => ({ + role: m.role, + content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content), + })); + }; if (postId) { const post = await postRepository.findPostById(postId); @@ -61,7 +77,9 @@ export const answerStream = async ( stream.write(`event: exist_in_post_status\ndata: true\n\n`); stream.write(`event: context\ndata: ${JSON.stringify([{ postId: post.id, postTitle: post.title }])}\n\n`); - messages = qaPrompts.createPostContextPrompt(post, processedContent, question, speechTonePrompt); + messages = toSimpleMessages( + qaPrompts.createPostContextPrompt(post, processedContent, question, speechTonePrompt) + ); } else { const [questionEmbedding] = await createEmbeddings([question]); @@ -73,7 +91,9 @@ export const answerStream = async ( const context = similarChunks.map(chunk => ({ postId: chunk.postId, postTitle: chunk.postTitle })); stream.write(`event: context\ndata: ${JSON.stringify(context)}\n\n`); - messages = qaPrompts.createRagPrompt(question, similarChunks, speechTonePrompt); + messages = toSimpleMessages( + qaPrompts.createRagPrompt(question, similarChunks, speechTonePrompt) + ); tools = [ { type: "function", @@ -92,34 +112,21 @@ export const answerStream = async ( ]; } - const responseStream = await openai.chat.completions.create({ - model: config.CHAT_MODEL, + const llmStream = await generate({ + provider: llm?.provider || 'openai', + model: llm?.model || config.CHAT_MODEL, messages, tools, - tool_choice: tools ? 'auto' : undefined, - stream: true, + options: llm?.options, + meta: { userId, categoryId, postId }, }); - for await (const chunk of responseStream) { - const content = chunk.choices[0]?.delta?.content || ""; - const toolCalls = chunk.choices[0]?.delta?.tool_calls; - - if (toolCalls) { - for (const toolCall of toolCalls) { - if (toolCall.function?.arguments) { - stream.write(`event: answer\ndata: ${JSON.stringify(toolCall.function.arguments)}\n\n`); - } - } - } else if (content) { - stream.write(`event: answer\ndata: ${JSON.stringify(content)}\n\n`); - } - - if (chunk.choices[0]?.finish_reason) { - stream.write(`event: end\ndata: [DONE]\n\n`); - stream.end(); - break; - } - } + llmStream.on('data', (chunk) => { + stream.write(chunk); + }); + llmStream.on('end', () => { + stream.end(); + }); })().catch(err => { console.error('Stream process error:', err); diff --git a/src/types/ai.types.ts b/src/types/ai.types.ts index e596d1a..b9ed539 100644 --- a/src/types/ai.types.ts +++ b/src/types/ai.types.ts @@ -28,6 +28,19 @@ export const askSchema = z.object({ category_id: z.number().optional(), post_id: z.number().optional(), speech_tone: z.number().optional(), + llm: z + .object({ + provider: z.enum(['openai', 'gemini']).optional(), + model: z.string().optional(), + options: z + .object({ + temperature: z.number().optional(), + top_p: z.number().optional(), + max_output_tokens: z.number().optional(), + }) + .optional(), + }) + .optional(), }), }); From a3d6079c797c7eee1af582a070e1ad9bd909a67f Mon Sep 17 00:00:00 2001 From: chan000518 Date: Sat, 4 Oct 2025 11:35:22 +0900 Subject: [PATCH 2/3] =?UTF-8?q?=E2=9C=A8=20LLM=20=EB=B9=84=EC=9A=A9=20?= =?UTF-8?q?=EB=A1=9C=EA=B9=85=20=EB=B0=8F=20=EA=B0=80=EA=B2=A9=20=EA=B3=84?= =?UTF-8?q?=EC=82=B0=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config.ts | 2 + src/llm/index.ts | 137 ++++++++++++++++++++++++++++++++++++++--- src/utils/cost.ts | 40 ++++++++++++ src/utils/tokenizer.ts | 32 ++++++++++ 4 files changed, 201 insertions(+), 10 deletions(-) create mode 100644 src/utils/cost.ts create mode 100644 src/utils/tokenizer.ts diff --git a/src/config.ts b/src/config.ts index 1be73a1..ba81564 100644 --- a/src/config.ts +++ b/src/config.ts @@ -16,6 +16,8 @@ const configSchema = z.object({ GEMINI_API_KEY: z.string().optional(), GEMINI_CHAT_MODEL: z.string().default('gemini-2.5-flash'), GEMINI_THINKING_BUDGET: z.string().optional(), + LLM_COST_LOG: z.string().default('false'), + LLM_COST_ROUND: z.coerce.number().default(4), }); const config = configSchema.parse(process.env); diff --git a/src/llm/index.ts b/src/llm/index.ts index 9c39531..ba30ce4 100644 --- a/src/llm/index.ts +++ b/src/llm/index.ts @@ -3,6 +3,10 @@ import { GenerateRequest } from './types'; import { getDefaultChat } from './model-registry'; import { generateOpenAIStream } from './providers/openai-responses'; import { generateGeminiStream } from './providers/gemini'; +import { countChatMessagesTokens, countTextTokens } from '../utils/tokenizer'; +import { calcCost, getModelPricing } from '../utils/cost'; +import config from '../config'; +import { randomUUID } from 'crypto'; export const generate = async (req: GenerateRequest): Promise => { const merged = { ...req }; @@ -12,17 +16,130 @@ export const generate = async (req: GenerateRequest): Promise => { merged.model = merged.model || def.modelId; } - if (merged.provider === 'openai') { - return generateOpenAIStream(merged); - } + const doLog = (config.LLM_COST_LOG || '').toString().toLowerCase() === 'true'; + const round = config.LLM_COST_ROUND ?? 4; + const corrId = randomUUID(); + const model = merged.model as string; + const provider = merged.provider as string; - if (merged.provider === 'gemini') { - return generateGeminiStream(merged); + // Pre-call logging: prompt tokens + estimated input cost + const messages = merged.messages || []; + let promptTokens = 0; + try { + promptTokens = countChatMessagesTokens(messages as any, model); + } catch { + // ignore + } + const pricing = getModelPricing(model); + const estInputCost = pricing ? calcCost(promptTokens, pricing.input_per_1k) : 0; + if (doLog) { + const pre = { + type: 'llm.request', + corrId, + provider, + model, + promptTokens, + estInputCost, + userId: merged.meta?.userId, + categoryId: merged.meta?.categoryId, + postId: merged.meta?.postId, + }; + console.log(JSON.stringify(pre)); } - const stream = new PassThrough(); - stream.write(`event: error\n`); - stream.write(`data: ${JSON.stringify({ message: 'Unknown provider' })}\n\n`); - stream.end(); - return stream; + const startedAt = Date.now(); + + const providerStream = + merged.provider === 'openai' + ? await generateOpenAIStream(merged) + : merged.provider === 'gemini' + ? await generateGeminiStream(merged) + : (() => { + const s = new PassThrough(); + s.write(`event: error\n`); + s.write(`data: ${JSON.stringify({ message: 'Unknown provider' })}\n\n`); + s.end(); + return s; + })(); + + // Wrap provider stream to accumulate output tokens + const outer = new PassThrough(); + let buffer = ''; + let outputText = ''; + + const flushBuffer = () => { + // Split by double newline to get SSE events + const chunks = buffer.split('\n\n'); + // Keep last partial + buffer = chunks.pop() || ''; + for (const block of chunks) { + const lines = block.split('\n'); + let evt: string | null = null; + let dataLine: string | null = null; + for (const line of lines) { + if (line.startsWith('event:')) evt = line.slice(6).trim(); + if (line.startsWith('data:')) dataLine = line.slice(5).trim(); + } + if (evt === 'answer' && dataLine) { + try { + const parsed = JSON.parse(dataLine); + if (typeof parsed === 'string') outputText += parsed; + else outputText += JSON.stringify(parsed); + } catch { + outputText += dataLine; + } + } + outer.write(block + '\n\n'); + } + }; + + providerStream.on('data', (chunk) => { + const str = Buffer.isBuffer(chunk) ? chunk.toString('utf8') : String(chunk); + buffer += str; + flushBuffer(); + }); + providerStream.on('end', () => { + if (buffer.length > 0) { + outer.write(buffer); + buffer = ''; + } + + const completionTokens = (() => { + try { + return countTextTokens(outputText, model); + } catch { + return 0; + } + })(); + const durationMs = Date.now() - startedAt; + if (doLog) { + const inputCost = pricing ? calcCost(promptTokens, pricing.input_per_1k) : 0; + const outputCost = pricing ? calcCost(completionTokens, pricing.output_per_1k) : 0; + const totalCost = inputCost + outputCost; + const post = { + type: 'llm.response', + corrId, + provider, + model, + promptTokens, + completionTokens, + inputCost, + outputCost, + totalCost, + durationMs, + }; + console.log(JSON.stringify(post)); + } + outer.end(); + }); + providerStream.on('error', (e) => { + if (doLog) { + console.log( + JSON.stringify({ type: 'llm.error', corrId, provider, model, message: (e as any)?.message || 'error' }) + ); + } + outer.emit('error', e); + }); + + return outer; }; diff --git a/src/utils/cost.ts b/src/utils/cost.ts new file mode 100644 index 0000000..62334db --- /dev/null +++ b/src/utils/cost.ts @@ -0,0 +1,40 @@ +export type Pricing = { + input_per_1k: number; + output_per_1k: number; + cached_input_per_1k?: number; + currency: 'USD' | 'KRW' | string; +}; + +const PRICING_TABLE: Record = { + // OpenAI + 'gpt-5-mini': { input_per_1k: 0.00025, output_per_1k: 0.002, cached_input_per_1k: 0.000025, currency: 'USD' }, + 'gpt-5-nano': { input_per_1k: 0.00005, output_per_1k: 0.0004, cached_input_per_1k: 0.000005, currency: 'USD' }, + 'gpt-4o': { input_per_1k: 0.005, output_per_1k: 0.015, currency: 'USD' }, + 'gpt-4o-mini': { input_per_1k: 0.0005, output_per_1k: 0.0015, currency: 'USD' }, + // Embeddings + 'text-embedding-3-small': { input_per_1k: 0.00002, output_per_1k: 0, currency: 'USD' }, + // Gemini (example values — update per official pricing if needed) + 'gemini-2.5-flash': { input_per_1k: 0.0001, output_per_1k: 0.0004, currency: 'USD' }, +}; + +export const getModelPricing = (model: string): Pricing | null => { + if (!model) return null; + const key = model.toLowerCase(); + if (PRICING_TABLE[key]) return PRICING_TABLE[key]; + // naive aliasing for common variants + if (key.startsWith('gpt-4o')) return PRICING_TABLE['gpt-4o']; + if (key.startsWith('gpt-5-mini')) return PRICING_TABLE['gpt-5-mini']; + return null; +}; + +export const calcCost = (tokens: number, per_1k: number): number => { + if (!per_1k) return 0; + return (tokens / 1000) * per_1k; +}; + +export const formatCost = (amount: number, currency: string, round: number = 4): string => { + const factor = Math.pow(10, Math.max(0, round)); + const rounded = Math.round(amount * factor) / factor; + return `${rounded} ${currency}`; +}; + diff --git a/src/utils/tokenizer.ts b/src/utils/tokenizer.ts new file mode 100644 index 0000000..48f8fbf --- /dev/null +++ b/src/utils/tokenizer.ts @@ -0,0 +1,32 @@ +import { get_encoding } from '@dqbd/tiktoken'; + +const encodingForModel = (model: string): string => { + const lower = (model || '').toLowerCase(); + if (lower.includes('gpt-5') || lower.includes('gpt-4o') || lower.includes('o1') || lower.includes('o3')) { + return 'o200k_base'; + } + return 'cl100k_base'; +}; + +export const countTextTokens = (text: string, model: string): number => { + const encKey = encodingForModel(model); + const enc = get_encoding(encKey); + try { + const tokens = enc.encode(text || ''); + return tokens.length; + } finally { + // no explicit free in @dqbd/tiktoken browser build; safe to let GC handle + } +}; + +type SimpleMessage = { role: string; content: string }; + +export const countChatMessagesTokens = (messages: SimpleMessage[], model: string): number => { + // Approximate: sum content token counts + minimal role overhead + const overheadPerMsg = 3; // rough + const roleOverhead = 1; + return messages.reduce((sum, m) => { + return sum + countTextTokens(m.content || '', model) + overheadPerMsg + roleOverhead; + }, 0); +}; + From 77c033fcb1cad0961aedab78b662575895a6f81a Mon Sep 17 00:00:00 2001 From: chan000518 Date: Sun, 5 Oct 2025 10:33:47 +0900 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=90=9B=20=ED=83=80=EC=9E=85=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package-lock.json | 219 ++++++++++++++++++++++++++++++++++++++++- src/utils/tokenizer.ts | 10 +- 2 files changed, 223 insertions(+), 6 deletions(-) diff --git a/package-lock.json b/package-lock.json index 65109c4..5814cd7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "license": "ISC", "dependencies": { "@dqbd/tiktoken": "^1.0.13", + "@google/genai": "^0.2.0", "cors": "^2.8.5", "dotenv": "^16.4.5", "express": "^4.19.2", @@ -47,6 +48,18 @@ "resolved": "https://registry.npmjs.org/@dqbd/tiktoken/-/tiktoken-1.0.22.tgz", "integrity": "sha512-RYhO8xeHkMNX5Ixqf4M1Ve3siCYJY/dI0yLnlX4M4oIEDOvjMIQ+E+3OUpAaZcWTaMtQJzGcDAghYfllpx3i/w==" }, + "node_modules/@google/genai": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-0.2.0.tgz", + "integrity": "sha512-r7EiRHSqc6D1lDIMvM4OemjUwPpUbYb9jTxe1eLCiFbooHrmPc6U9z3n56E/iWzigkZmjRh4IC0CMzoB1aql9w==", + "dependencies": { + "google-auth-library": "^9.14.2", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@jridgewell/resolve-uri": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", @@ -284,6 +297,14 @@ "node": ">=0.4.0" } }, + "node_modules/agent-base": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz", + "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==", + "engines": { + "node": ">= 14" + } + }, "node_modules/agentkeepalive": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", @@ -330,6 +351,33 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "dev": true }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/bignumber.js": { + "version": "9.3.1", + "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", + "integrity": "sha512-Ko0uX15oIUS7wJ3Rb30Fs6SkVbLmPBAKdlm7q9+ak9bbIeFf0MwuBsQV6z7+X768/cHsfg+WlysDWJcmthjsjQ==", + "engines": { + "node": "*" + } + }, "node_modules/binary-extensions": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", @@ -535,7 +583,6 @@ "version": "4.4.1", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", - "dev": true, "dependencies": { "ms": "^2.1.3" }, @@ -747,6 +794,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, "node_modules/fill-range": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", @@ -859,6 +911,34 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gcp-metadata": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz", + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, "node_modules/get-intrinsic": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", @@ -906,6 +986,49 @@ "node": ">= 6" } }, + "node_modules/google-auth-library": { + "version": "9.15.1", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz", + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/google-auth-library/node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/google-auth-library/node_modules/jws": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz", + "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", + "dependencies": { + "jwa": "^2.0.0", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/google-logging-utils": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz", + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==", + "engines": { + "node": ">=14" + } + }, "node_modules/gopd": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", @@ -917,6 +1040,37 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/gtoken/node_modules/jwa": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", + "integrity": "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg==", + "dependencies": { + "buffer-equal-constant-time": "^1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/gtoken/node_modules/jws": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz", + "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==", + "dependencies": { + "jwa": "^2.0.0", + "safe-buffer": "^5.0.1" + } + }, "node_modules/has-flag": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", @@ -977,6 +1131,18 @@ "node": ">= 0.8" } }, + "node_modules/https-proxy-agent": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", + "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/humanize-ms": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", @@ -1057,6 +1223,25 @@ "node": ">=0.12.0" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/json-bigint": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz", + "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==", + "dependencies": { + "bignumber.js": "^9.0.0" + } + }, "node_modules/jsonwebtoken": { "version": "9.0.2", "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz", @@ -1922,6 +2107,18 @@ "node": ">= 0.4.0" } }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", @@ -1958,6 +2155,26 @@ "webidl-conversions": "^3.0.0" } }, + "node_modules/ws": { + "version": "8.18.3", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", + "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/xtend": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", diff --git a/src/utils/tokenizer.ts b/src/utils/tokenizer.ts index 48f8fbf..bc662f4 100644 --- a/src/utils/tokenizer.ts +++ b/src/utils/tokenizer.ts @@ -1,11 +1,11 @@ -import { get_encoding } from '@dqbd/tiktoken'; +import { get_encoding, type TiktokenEncoding } from '@dqbd/tiktoken'; -const encodingForModel = (model: string): string => { - const lower = (model || '').toLowerCase(); +const encodingForModel = (model?: string): TiktokenEncoding => { + const lower = (model ?? '').toLowerCase(); if (lower.includes('gpt-5') || lower.includes('gpt-4o') || lower.includes('o1') || lower.includes('o3')) { - return 'o200k_base'; + return 'o200k_base' as TiktokenEncoding; } - return 'cl100k_base'; + return 'cl100k_base' as TiktokenEncoding; }; export const countTextTokens = (text: string, model: string): number => {