diff --git a/.changeset/README.md b/.changeset/README.md new file mode 100644 index 0000000..e5b6d8d --- /dev/null +++ b/.changeset/README.md @@ -0,0 +1,8 @@ +# Changesets + +Hello and welcome! This folder has been automatically generated by `@changesets/cli`, a build tool that works +with multi-package repos, or single-package repos to help you version and publish your code. You can +find the full documentation for it [in our repository](https://github.com/changesets/changesets) + +We have a quick list of common questions to get you started engaging with this project in +[our documentation](https://github.com/changesets/changesets/blob/main/docs/common-questions.md) diff --git a/.changeset/config.json b/.changeset/config.json new file mode 100644 index 0000000..c96f647 --- /dev/null +++ b/.changeset/config.json @@ -0,0 +1,11 @@ +{ + "$schema": "https://unpkg.com/@changesets/config@3.1.4/schema.json", + "changelog": ["@changesets/changelog-github", { "repo": "reaatech/llm-cache" }], + "commit": false, + "fixed": [], + "linked": [], + "access": "public", + "baseBranch": "main", + "updateInternalDependencies": "patch", + "ignore": [] +} diff --git a/.eslintrc.cjs b/.eslintrc.cjs deleted file mode 100644 index 3495716..0000000 --- a/.eslintrc.cjs +++ /dev/null @@ -1,38 +0,0 @@ -module.exports = { - root: true, - parser: '@typescript-eslint/parser', - parserOptions: { - ecmaVersion: 2022, - sourceType: 'module', - project: ['./tsconfig.json', './examples/tsconfig.json', './packages/*/tsconfig.json', './packages/adapters/*/tsconfig.json'], - }, - plugins: ['@typescript-eslint'], - extends: [ - 'eslint:recommended', - 'plugin:@typescript-eslint/recommended', - 'plugin:@typescript-eslint/recommended-requiring-type-checking', - 'prettier', - ], - rules: { - '@typescript-eslint/no-unused-vars': ['error', { argsIgnorePattern: '^_' }], - '@typescript-eslint/explicit-function-return-type': 'off', - '@typescript-eslint/explicit-module-boundary-types': 'off', - '@typescript-eslint/no-explicit-any': 'error', - '@typescript-eslint/no-floating-promises': 'error', - '@typescript-eslint/await-thenable': 'error', - '@typescript-eslint/no-misused-promises': 'error', - }, - overrides: [ - { - files: ['*.test.ts'], - rules: { - '@typescript-eslint/no-unsafe-assignment': 'off', - '@typescript-eslint/no-unsafe-member-access': 'off', - '@typescript-eslint/no-unsafe-call': 'off', - '@typescript-eslint/no-unsafe-argument': 'off', - '@typescript-eslint/no-redundant-type-constituents': 'off', - }, - }, - ], - ignorePatterns: ['dist/', 'node_modules/', '*.js', '**/*.d.ts'], -}; diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05b8cc9..27b7894 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,9 +18,7 @@ jobs: uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v3 - with: - version: 8 + uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v4 @@ -29,10 +27,7 @@ jobs: cache: 'pnpm' - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Build workspace - run: pnpm build + run: pnpm install - name: Lint run: pnpm lint @@ -48,9 +43,7 @@ jobs: uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v3 - with: - version: 8 + uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v4 @@ -59,7 +52,7 @@ jobs: cache: 'pnpm' - name: Install dependencies - run: pnpm install --frozen-lockfile + run: pnpm install - name: Dependency audit run: pnpm audit --audit-level=high @@ -71,6 +64,7 @@ jobs: strategy: fail-fast: false matrix: + node-version: ['20', '22'] package: - packages/core - packages/adapters/redis @@ -84,21 +78,16 @@ jobs: uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v3 - with: - version: 8 + uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: - node-version: ${{ env.NODE_VERSION }} + node-version: ${{ matrix.node-version }} cache: 'pnpm' - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Build workspace - run: pnpm build + run: pnpm install - name: Test ${{ matrix.package }} run: pnpm --filter ${{ matrix.package }} test @@ -112,9 +101,7 @@ jobs: uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v3 - with: - version: 8 + uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v4 @@ -123,19 +110,19 @@ jobs: cache: 'pnpm' - name: Install dependencies - run: pnpm install --frozen-lockfile + run: pnpm install - - name: Build workspace + - name: Build packages run: pnpm build - name: Run coverage (core) - run: pnpm --filter @llm-cache/core test:coverage + run: pnpm --filter @reaatech/llm-cache test:coverage - name: Run coverage (cost-tracker) - run: pnpm --filter @llm-cache/cost-tracker test:coverage + run: pnpm --filter @reaatech/llm-cache-cost-tracker test:coverage - name: Run coverage (observability) - run: pnpm --filter @llm-cache/observability test:coverage + run: pnpm --filter @reaatech/llm-cache-observability test:coverage build: name: Build All Packages @@ -146,74 +133,16 @@ jobs: uses: actions/checkout@v4 - name: Setup pnpm - uses: pnpm/action-setup@v3 - with: - version: 8 - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ env.NODE_VERSION }} - cache: 'pnpm' - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Build all packages - run: pnpm build - - deploy-staging: - name: Deploy to Staging - needs: [build, coverage] - if: github.ref == 'refs/heads/develop' - runs-on: ubuntu-latest - environment: staging - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Deploy - run: echo "Deploy to staging would run here" - - deploy-production: - name: Deploy to Production - needs: [build, coverage] - if: github.ref == 'refs/heads/main' - runs-on: ubuntu-latest - environment: production - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Deploy - run: echo "Deploy to production would run here" - - publish: - name: Publish to npm - needs: [build, coverage] - if: startsWith(github.ref, 'refs/tags/v') - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup pnpm - uses: pnpm/action-setup@v3 - with: - version: 8 + uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version: ${{ env.NODE_VERSION }} - registry-url: 'https://registry.npmjs.org' cache: 'pnpm' - name: Install dependencies - run: pnpm install --frozen-lockfile + run: pnpm install - name: Build all packages run: pnpm build - - - name: Publish packages - run: pnpm -r publish --access public --no-git-checks - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a44c2da --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,82 @@ +name: Release + +on: + workflow_dispatch: + # push: + # branches: [main] + # Uncomment the push trigger AFTER first publish succeeds (see GITHUB_TO_NPM.md section 7) + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +env: + NODE_VERSION: 20 + +jobs: + release: + name: Release + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + id-token: write + packages: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: 'pnpm' + registry-url: 'https://registry.npmjs.org' + + - name: Install dependencies + run: pnpm install + + - name: Build packages + run: pnpm build + + - name: Create release PR or publish to npm + id: changesets + uses: changesets/action@v1 + with: + publish: pnpm release + version: pnpm version-packages + commit: 'chore(release): version packages' + title: 'chore(release): version packages' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + NPM_CONFIG_PROVENANCE: 'true' + + - name: Mirror published packages to GitHub Packages + if: steps.changesets.outputs.published == 'true' + env: + NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PUBLISHED_PACKAGES: ${{ steps.changesets.outputs.publishedPackages }} + run: | + cat > .npmrc < $dir" + (cd "$dir" && npm publish --registry=https://npm.pkg.github.com) + done diff --git a/.gitignore b/.gitignore index 155083d..da0ac0f 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,12 @@ Thumbs.db # Logs logs/ + +# Stray build output in src/ (prevents tsup confusion and tarball bloat) +packages/*/src/**/*.js +packages/*/src/**/*.js.map +packages/*/src/**/*.d.ts +packages/*/src/**/*.d.ts.map *.log npm-debug.log* pnpm-debug.log* diff --git a/.npmrc b/.npmrc new file mode 100644 index 0000000..f7bafc7 --- /dev/null +++ b/.npmrc @@ -0,0 +1,2 @@ +shamefully-hoist=false +strict-peer-dependencies=true diff --git a/.prettierrc b/.prettierrc deleted file mode 100644 index 29b9d1f..0000000 --- a/.prettierrc +++ /dev/null @@ -1,8 +0,0 @@ -{ - "semi": true, - "trailingComma": "es5", - "singleQuote": true, - "printWidth": 100, - "tabWidth": 2, - "useTabs": false -} diff --git a/ABC_REMEDIATION.md b/ABC_REMEDIATION.md new file mode 100644 index 0000000..7b6e96d --- /dev/null +++ b/ABC_REMEDIATION.md @@ -0,0 +1,578 @@ +# agent-budget-controller — Conformance Remediation + +A checklist of gaps between this repo and the reference repo +[`a2a-reference-ts`](https://github.com/reaatech/a2a-reference-ts), with the goal of bringing +`agent-budget-controller` into conformity before its first publish to npm + GitHub Packages. + +**Reference state (compared against):** `~/dev/2026-04/a2a-reference-ts` @ `0196561` +**Current state (this repo):** pre-first-publish; queued changeset bumping all packages to `0.1.0`. + +Items are grouped by impact. **Critical** items will block or break a publish if not fixed. **High** +items affect end-user compatibility. **Medium** items are conformity / DX. **Low** items are stylistic. + +--- + +## Legend + +- [ ] = not done +- ⚠️ = will block publish or break consumers +- 🔁 = mechanical change +- 🧹 = invasive — touches many files + +--- + +## CRITICAL — fix before first publish + +### [ ] C1. Stop ignoring `CHANGELOG.md` in `.gitignore` ⚠️ + +`.gitignore:46` contains: + +``` +# Changeset CHANGELOG files (auto-generated) +**/CHANGELOG.md +``` + +This breaks the entire changesets release flow. `changesets/action@v1` writes per-package +`CHANGELOG.md` files when it runs `pnpm version-packages`, and the **release PR** it opens contains +those files staged for commit. With them gitignored: + +- The PR's diff will appear empty for changelogs. +- npm's "Releases" tab and GitHub's release notes will have nothing to display. +- Consumers can't see what changed between versions. + +**Fix:** delete the two lines above from `.gitignore`. Changelogs should be tracked. + +`a2a-reference-ts/.gitignore` does not ignore them. (Compare line counts: ABC has the rule, A2A +does not.) + +--- + +### [ ] C2. Reorder `exports` conditional keys — `types` MUST come first ⚠️ + +Every package's `package.json` currently has: + +```json +"exports": { + ".": { + "import": "./dist/index.js", + "types": "./dist/index.d.ts" + } +} +``` + +Per the [Node.js conditional-exports spec](https://nodejs.org/api/packages.html#conditional-exports) +and TypeScript's `--moduleResolution node16/nodenext`, **`types` must be the first key**. Otherwise +TS picks up the `.js` resolution before the `.d.ts` resolution, and consumers get +`Could not find a declaration file for module ...` errors under modern resolution modes. + +**Fix in all 8 packages** (`packages/{budget-engine,cli,llm-router-plugin,middleware,otel-bridge,pricing,spend-tracker,types}/package.json`): + +```json +"exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } +} +``` + +(After H1 below, this becomes a 3-key block with `require` last.) + +Reference: `packages/core/package.json:20-26` in `a2a-reference-ts`. + +--- + +## HIGH — end-user compatibility + +### [ ] H1. Publish dual ESM/CJS (currently ESM-only) 🔁 + +`a2a-reference-ts` ships every package as both ESM and CJS: + +```ts +// tsup.config.ts +format: ['cjs', 'esm'] +``` + +```json +// package.json +"main": "./dist/index.cjs", +"module": "./dist/index.js", +"types": "./dist/index.d.ts", +"exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "require": "./dist/index.cjs" + } +} +``` + +Every ABC package is ESM-only. Anyone with `module: "commonjs"` in their `tsconfig` (still common) +or running Node tooling that does `require()` cannot consume the package without `.mjs`/dynamic-import +gymnastics. For library packages this is a real adoption barrier. + +**Fix per package:** + +1. `tsup.config.ts`: `format: ['esm']` → `format: ['cjs', 'esm']` +2. `package.json`: + - Add `"module": "./dist/index.js"` + - Change `"main": "./dist/index.js"` → `"main": "./dist/index.cjs"` + - Update `exports.import` stays `./dist/index.js`, add `"require": "./dist/index.cjs"` + +Applies to all 8 packages. + +> Consider if any package is intentionally ESM-only (e.g. uses top-level await or pure-ESM deps). +> The CLI is a candidate to stay ESM-only since it's executed, not imported. Decide per-package. + +--- + +## MEDIUM — toolchain & conformity + +### [ ] M1. Migrate from ESLint+Prettier+husky+lint-staged to Biome 🧹 + +`a2a-reference-ts` uses **Biome** for both lint and format (single tool, ~10× faster, no plugin +config). ABC has the older multi-tool stack: + +| Concern | a2a-reference-ts | agent-budget-controller | +|--|--|--| +| Lint | `@biomejs/biome` | `eslint` + `@typescript-eslint/*` + `eslint.config.js` | +| Format | `@biomejs/biome` | `prettier` + `.prettierrc` + `.prettierignore` | +| Pre-commit | (none) | `husky` + `lint-staged` + `.husky/pre-commit` + `.lintstagedrc.json` | +| Style metadata | (none) | `.editorconfig` | + +**Migration steps:** + +1. Copy `biome.json` from `a2a-reference-ts` to repo root. +2. Delete: `eslint.config.js`, `.prettierrc`, `.prettierignore`, `.husky/`, `.lintstagedrc.json`. +3. (Optional) Keep `.editorconfig` — it's IDE-level, harmless. A2A doesn't have one. +4. Update root `package.json`: + - Remove devDeps: `eslint`, `@typescript-eslint/eslint-plugin`, `@typescript-eslint/parser`, + `prettier`, `husky`, `lint-staged`. + - Add devDep: `@biomejs/biome` (^1.9.4 to match A2A). + - Replace scripts: + - `"format": "prettier --write ."` → `"format": "biome format --write ."` + - `"format:check": "prettier --check ."` → drop (use `lint` script instead) OR `"format:check": "biome format ."` + - `"lint": "eslint ."` → `"lint": "biome check ."` + - `"lint:fix": "eslint . --fix"` → `"lint:fix": "biome check --write ."` + - Remove `"prepare": "husky"` entirely (no husky anymore). +5. Run `pnpm install`, then `pnpm lint` to surface any code that biome flags but eslint did not + (typically `noNonNullAssertion`, `noExplicitAny`, etc. are stricter under A2A's `biome.json`). +6. Update `.github/workflows/ci.yml` — drop `pnpm format:check` step (or rename to use biome). + +This is invasive but is the single biggest "conformity" win. + +> If you want to keep husky for some reason (e.g. running tests pre-commit), fine — but the lint +> stack itself should converge on Biome. + +--- + +### [ ] M2. Add `tsconfig.typecheck.json` with workspace path aliases + +`a2a-reference-ts/tsconfig.typecheck.json` lets `pnpm typecheck` resolve cross-workspace imports +**without** building first, by aliasing `@reaatech/...` to each package's `src/index.ts`. Contents: + +```json +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "baseUrl": ".", + "paths": { + "@reaatech/agent-budget-types": ["./packages/types/src/index.ts"], + "@reaatech/agent-budget-pricing": ["./packages/pricing/src/index.ts"], + "@reaatech/agent-budget-spend-tracker": ["./packages/spend-tracker/src/index.ts"], + "@reaatech/agent-budget-engine": ["./packages/budget-engine/src/index.ts"], + "@reaatech/agent-budget-middleware": ["./packages/middleware/src/index.ts"], + "@reaatech/agent-budget-otel-bridge": ["./packages/otel-bridge/src/index.ts"], + "@reaatech/agent-budget-llm-router-plugin": ["./packages/llm-router-plugin/src/index.ts"], + "@reaatech/agent-budget-cli": ["./packages/cli/src/index.ts"] + } + } +} +``` + +Then update root `package.json`: + +```diff +- "typecheck": "tsc --noEmit" ++ "typecheck": "tsc --noEmit -p tsconfig.typecheck.json" +``` + +This eliminates the `^build` dep that ABC's typecheck currently has via composite references. + +--- + +### [ ] M3. Simplify root and per-package `tsconfig.json` (drop composite project mode) + +ABC currently uses TS composite project references. A2A does not — it relies on tsup for output and +on `tsconfig.typecheck.json` (M2) for cross-package typecheck. Simplifying: + +**Root `tsconfig.json`** — match A2A's set of strict flags. ABC is missing several: + +```diff ++ "noImplicitAny": true, ++ "strictNullChecks": true, ++ "strictFunctionTypes": true, ++ "strictBindCallApply": true, ++ "strictPropertyInitialization": true, ++ "noImplicitThis": true, ++ "alwaysStrict": true, ++ "isolatedModules": true, ++ "verbatimModuleSyntax": true, +- "composite": true, +- "rootDir": ".", +- "outDir": "./dist" +``` + +Drop the `references` array (or keep it if you still want `tsc -b --watch` to work — see below). + +Drop `lib: ["ES2022"]` is already there — match A2A's. Add `lib: ["ES2022"]` if missing +(it's already in ABC, fine). + +**Per-package `tsconfig.json`** — collapse from: + +```json +{ + "extends": "../../tsconfig.json", + "compilerOptions": { "outDir": "./dist", "rootDir": "./src", "composite": false }, + "include": ["src/**/*"], + "exclude": ["dist", "node_modules", "tests"], + "references": [{ "path": "../types" }, { "path": "../spend-tracker" }] +} +``` + +to: + +```json +{ + "extends": "../../tsconfig.json", + "compilerOptions": { "outDir": "./dist", "rootDir": "./src" }, + "include": ["src/**/*"] +} +``` + +Match A2A's `packages/core/tsconfig.json`. + +> If you genuinely need `tsc -b --watch` for incremental dev, keep references — but then also keep +> the `tsconfig.tsbuildinfo` clutter (M4). A2A picks the simpler path because tsup handles builds +> and watch is rare. + +--- + +### [ ] M4. Clean up stray `tsconfig.tsbuildinfo` files + +These exist on disk (left over from composite-mode builds): + +``` +./dist/tsconfig.tsbuildinfo +./packages/budget-engine/tsconfig.tsbuildinfo +./packages/cli/tsconfig.tsbuildinfo +./packages/llm-router-plugin/tsconfig.tsbuildinfo +./packages/middleware/tsconfig.tsbuildinfo +./packages/otel-bridge/tsconfig.tsbuildinfo +./packages/pricing/tsconfig.tsbuildinfo +./packages/spend-tracker/tsconfig.tsbuildinfo +./packages/types/tsconfig.tsbuildinfo +``` + +`git ls-files | grep tsbuildinfo` shows none are tracked — good. But: + +1. They live in `dist/` (already gitignored) and per-package roots. +2. `.gitignore` already has `*.tsbuildinfo` (line 8) → safe. +3. After M3 (no composite mode), nothing will regenerate them. Delete them now: `find . -name tsconfig.tsbuildinfo -not -path './node_modules/*' -delete`. + +--- + +### [ ] M5. Bump dev tooling versions to match A2A + +ABC's tooling has drifted: + +| Dep | a2a-reference-ts | agent-budget-controller | Action | +|--|--|--|--| +| `typescript` | `^5.8.3` | `^5.7.2` | bump | +| `vitest` | `^3.1.1` | `^2.1.8` | **major bump** — review breaking changes | +| `@vitest/coverage-v8` | `3.2.4` | `^2.1.9` | bump with vitest | +| `turbo` | `^2.5.0` | `^2.5.0` | ok | +| `tsup` (per-pkg) | `^8.4.0` | `^8.4.0` | ok | +| `@changesets/cli` | `^2.28.1` | `^2.27.11` | bump | +| `@changesets/changelog-github` | `^0.6.0` | `^0.6.0` | ok | + +**Action:** `pnpm up -r typescript vitest @vitest/coverage-v8 @changesets/cli` and verify tests still pass. + +Vitest 2 → 3 is a major and may surface deprecations. Validate `pnpm test` clean before publish. + +--- + +### [ ] M6. Bump `packageManager` to pnpm 10 + +```diff +- "packageManager": "pnpm@9.15.0" ++ "packageManager": "pnpm@10.22.0" +``` + +A2A is on pnpm 10. Newer pnpm versions have safer auto-install behavior and faster resolution. +You'll likely need to regenerate `pnpm-lock.yaml` (`rm pnpm-lock.yaml && pnpm install`) — review +the lockfile diff carefully before committing. + +> CI uses `pnpm/action-setup@v6` which reads `packageManager` from `package.json`, so this propagates +> automatically to GitHub Actions. + +--- + +### [ ] M7. Add `examples/*` to `pnpm-workspace.yaml` + +ABC has `examples/standalone`, `examples/with-llm-router`, `examples/with-otel-cost-exporter` on +disk but `pnpm-workspace.yaml` only contains: + +```yaml +packages: + - 'packages/*' +``` + +Examples can't currently use `workspace:*` to depend on their sibling packages. Match A2A: + +```yaml +packages: + - 'packages/*' + - 'examples/*' +``` + +(A2A also has `'e2e'` — ABC has no `e2e/` directory, so skip that line.) + +After this change, examples that want to dogfood the workspace's own packages can declare e.g.: + +```json +"@reaatech/agent-budget-engine": "workspace:*" +``` + +instead of pinning to the published version. + +--- + +### [ ] M8. Reconcile `.npmrc` + +Different intents: + +```diff +-# agent-budget-controller/.npmrc +-registry=https://registry.npmjs.org/ +-save-exact=true ++# a2a-reference-ts/.npmrc ++shamefully-hoist=false ++strict-peer-dependencies=true +``` + +`save-exact=true` makes new `pnpm add` calls write `1.2.3` instead of `^1.2.3` — fine for apps, +**bad for libraries** since it means consumers can't dedupe. Drop it. + +`strict-peer-dependencies=true` (A2A) catches missing peers at install time — useful in a workspace. + +**Recommended merged content:** + +``` +shamefully-hoist=false +strict-peer-dependencies=true +``` + +(Drop the explicit `registry=` — pnpm defaults to npmjs.org anyway, and per-scope GitHub Packages +overrides go in CI's transient `.npmrc`, not the committed one.) + +--- + +### [ ] M9. Align CI workflow structure + +ABC `ci.yml` is 70 lines, single `build` job. A2A `ci.yml` is 382 lines, separated into: + +``` +install (cache) → audit, format, lint, typecheck, build → test (matrix), coverage → all-checks +``` + +The A2A version is heavier but gives: +- Faster signal: lint failures don't wait for build to finish. +- Discrete required-status-checks for branch protection. +- A "barrel" `all-checks` job to gate merges on a single check. + +ABC has `audit` already (called `security`); the rest is missing. A2A also runs `docker-build` and +`docker-compose` — skip those for ABC (no Dockerfile). + +**Decision needed:** if you want strict conformity, port A2A's `ci.yml`. If you just want +fast/correct CI and don't need split status checks, ABC's current setup is fine. Marking as +**optional** but flagged. + +Also: ABC tests on Node `[22, 23]`, A2A tests on `[20, 22]`. Pick a target. Convention: test the +oldest LTS you support (`20`) plus current (`22`). `23` is non-LTS and rarely worth gating on. + +```diff +- node-version: ['22', '23'] ++ node-version: ['20', '22'] +``` + +--- + +### [ ] M10. Action versions: decide pin v4 vs v6 + +ABC was bumped to `v6` by Dependabot (`actions/checkout@v6`, `pnpm/action-setup@v6`, +`actions/setup-node@v6`, `actions/upload-artifact@v7`). A2A is on `v4` across the board. + +Both work. Recommendation: **stay on v6** in ABC (Dependabot will keep it current) and consider +bumping A2A on its next maintenance pass — don't downgrade ABC for conformity's sake. + +--- + +## LOW — stylistic / housekeeping + +### [ ] L1. Align `package.json` field ordering across packages + +A2A's per-package field order: + +``` +name, version, description, license, author, repository, homepage, bugs, +type, main, module, types, exports, files, publishConfig, +scripts, dependencies, devDependencies +``` + +ABC's order has `repository`, `bugs`, `homepage` before `license`, `author`. Pure cosmetic — no +functional impact. Skip if you don't care. + +--- + +### [ ] L2. Drop `git+` prefix in repository URLs (or add to A2A) + +ABC's `package.json`: `"url": "git+https://github.com/..."` (works, technically more correct). +A2A's `package.json`: `"url": "https://github.com/..."` (also works). + +npm normalizes both. Pick one and align. Recommendation: **keep ABC's form** — `git+` is the spec. + +--- + +### [ ] L3. Bump `version` on root `package.json` from `0.0.0` → `0.1.0` + +`agent-budget-controller/package.json:3` is `"version": "0.0.0"`. Doesn't affect publishing (root +is `private: true`), but visually inconsistent with the per-package versions and with A2A's root +(`0.1.0`). + +Cosmetic. Bump when convenient. + +--- + +### [ ] L4. Resolve the `budget-engine` directory/package name mismatch + +Current state: + +``` +packages/budget-engine/ → @reaatech/agent-budget-engine +``` + +vs. every other package: + +``` +packages/types/ → @reaatech/agent-budget-types +packages/pricing/ → @reaatech/agent-budget-pricing +... +``` + +The directory name should be the package-name suffix (i.e., what comes after `agent-budget-`). +This is the convention A2A follows perfectly (e.g. `packages/core` → `@reaatech/a2a-reference-core`). + +The release workflow already papers over this with a fallback: + +```bash +dir="packages/${name#@reaatech/agent-budget-}" +[ -d "$dir" ] || dir="packages/budget-${name#@reaatech/agent-budget-}" +``` + +**Two options:** + +- **Option A (proper fix):** Rename `packages/budget-engine` → `packages/engine`. Update: + - The directory name (`git mv`). + - `tsconfig.typecheck.json` path (after M2). + - Root `tsconfig.json` `references` (if M3 not yet applied). + - Any `import` path that references `packages/budget-engine` (search before/after). + - Remove the fallback line in `.github/workflows/release.yml`. +- **Option B (accept):** Leave it. The fallback handles it. But it's an asymmetry future contributors + will trip over. + +**Recommended: Option A**, before first publish. After publish, the package name `@reaatech/agent-budget-engine` +is permanent on npm, but the directory is internal and free to rename anytime. + +--- + +### [ ] L5. Decide whether to keep `dev: "tsc -b --watch"` script + +Root `package.json` has `"dev": "tsc -b --watch"` which only works under composite project mode. +After M3 (drop composite), this script breaks. + +- If keeping watch-mode dev: replace with `"dev": "turbo run dev"` and add `"dev": "tsup --watch"` + to each package. +- If not actively using it: delete the script. + +A2A has no `dev` script. + +--- + +### [ ] L6. Sweep miscellaneous `.gitignore` differences + +Items in ABC's `.gitignore` not in A2A's (all harmless, can keep): + +- `.nyc_output/` +- `tmp/`, `temp/`, `*.tmp` +- `*~` + +No action required unless you want strict alignment. + +--- + +### [ ] L7. Engines field consistency + +ABC root `package.json`: + +```json +"engines": { "node": ">=22.0.0", "pnpm": ">=9.0.0" } +``` + +A2A root has no `engines` field (each package may declare its own; only A2A's CLI-ish packages do). + +Engines fields are advisory in pnpm by default but warn on `npm install`. Recommendation: **keep +ABC's** — it's a useful guard. After M6, bump pnpm floor: + +```diff +- "pnpm": ">=9.0.0" ++ "pnpm": ">=10.0.0" +``` + +--- + +## Suggested execution order + +The cheapest path that minimizes rebase pain: + +1. **C1, C2** — both small, both block publish. Fix immediately, single commit. +2. **L4 (rename `budget-engine`)** — do early before other refactors touch the dir. +3. **M7 (workspace yaml)**, **M8 (.npmrc)**, **L3 (root version)** — trivial one-liners. +4. **H1 (dual ESM/CJS)** — touches every package's `tsup.config.ts` and `package.json`. Single + focused commit. +5. **M5, M6 (dep bumps)** — separate commit. Run full test suite. +6. **M3, M4 (tsconfig simplify + cleanup)** — coupled. Drop composite mode and clean buildinfo. +7. **M2 (tsconfig.typecheck.json)** — depends on M3. +8. **M1 (Biome migration)** — biggest blast radius. Save for last in its own PR. Expect Biome + to flag style issues ESLint missed. +9. **M9, L1, L2, L5, L6, L7** — polish, only if you want strict conformity. + +Expected total: 4–6 PRs / commits, ~1 day of work end-to-end. + +--- + +## Verification checklist (post-remediation) + +- [ ] `pnpm install` clean, `pnpm-lock.yaml` regenerated +- [ ] `pnpm typecheck` passes +- [ ] `pnpm lint` passes (Biome, after M1) +- [ ] `pnpm test` green on Node 20 and 22 +- [ ] `pnpm build` produces both `dist/index.js` AND `dist/index.cjs` for each package (after H1) +- [ ] `pnpm changeset status` shows the queued `initial-release.md` is intact +- [ ] CI green on the remediation PR(s) +- [ ] First publish from main triggers the release workflow successfully +- [ ] All 8 packages appear on npm with `latest` tag at `0.1.0` +- [ ] Mirror step writes all 8 packages to `https://github.com/reaatech?tab=packages` +- [ ] `npm view @reaatech/agent-budget-types` shows correct repo, homepage, exports + +When this list is fully checked, ABC will be at structural parity with `a2a-reference-ts`. diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index a7d578b..ba4444e 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -49,7 +49,7 @@ llm-cache is a sophisticated semantic caching layer designed to reduce LLM API c ## Core Components -### 1. Cache Engine (`@llm-cache/core`) +### 1. Cache Engine (`@reaatech/llm-cache`) The heart of the system, responsible for orchestrating cache operations. @@ -100,7 +100,7 @@ class SimilarityMatcher { - Result ranking and scoring - Use case and model filtering -### 2. Embedding Service (`@llm-cache/core/embedding`) +### 2. Embedding Service (`@reaatech/llm-cache/embedding`) Handles all embedding-related operations with cost optimization. @@ -140,7 +140,7 @@ class EmbeddingCache { } ``` -### 3. Storage Adapters (`@llm-cache/adapters/*`) +### 3. Storage Adapters (`@reaatech/llm-cache-adapters/*`) Abstract storage layer with multiple implementations. @@ -257,7 +257,7 @@ class InMemoryAdapter implements StorageAdapter { } ``` -### 4. Cost Tracker (`@llm-cache/cost-tracker`) +### 4. Cost Tracker (`@reaatech/llm-cache-cost-tracker`) Comprehensive cost tracking and savings calculation. @@ -303,7 +303,7 @@ interface ModelPricing { } ``` -### 5. Observability Service (`@llm-cache/observability`) +### 5. Observability Service (`@reaatech/llm-cache-observability`) Enterprise-grade monitoring and logging. @@ -700,15 +700,15 @@ class AuditLogger { **Primary Distribution: npm Library** -llm-cache is primarily distributed as a set of npm packages (`@llm-cache/core`, `@llm-cache/adapters-redis`, etc.) that developers import into their applications. This provides the tightest integration with existing LLM client code and the lowest latency (no network hop to a separate service). +llm-cache is primarily distributed as a set of npm packages (`@reaatech/llm-cache`, `@reaatech/llm-cache-adapters-redis`, etc.) that developers import into their applications. This provides the tightest integration with existing LLM client code and the lowest latency (no network hop to a separate service). **Optional for Users, Required to Develop: HTTP Service Wrapper** -A thin HTTP wrapper is provided as `@llm-cache/server`. Users can choose to import `@llm-cache/core` directly into their application (lowest latency, tightest integration) OR deploy `@llm-cache/server` as a sidecar/centralized service (polyglot environments, service-oriented architectures). +A thin HTTP wrapper is provided as `@reaatech/llm-cache-server`. Users can choose to import `@reaatech/llm-cache` directly into their application (lowest latency, tightest integration) OR deploy `@reaatech/llm-cache-server` as a sidecar/centralized service (polyglot environments, service-oriented architectures). The server package is **optional for end users** but is a **required workspace package to develop and maintain** — it must be built, tested, and released in lockstep with core releases. -The Docker, Kubernetes, and Helm configurations described below apply to the `@llm-cache/server` service wrapper. +The Docker, Kubernetes, and Helm configurations described below apply to the `@reaatech/llm-cache-server` service wrapper. --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b6a312..1d960ec 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -161,7 +161,7 @@ pnpm test pnpm test:coverage # Specific package tests -pnpm test --filter=@llm-cache/core +pnpm test --filter=@reaatech/llm-cache # Watch mode pnpm test:watch diff --git a/GITHUB_TO_NPM.md b/GITHUB_TO_NPM.md new file mode 100644 index 0000000..1d5f4ec --- /dev/null +++ b/GITHUB_TO_NPM.md @@ -0,0 +1,797 @@ +# GitHub → npm Publishing Setup + +A comprehensive runbook for taking a TypeScript pnpm-monorepo from "code on disk" to +"published on npm via GitHub Actions." Written after a real 3-hour-painful first run. +Read this end-to-end before starting; the **GOTCHA** callouts are the time-savers. + +--- + +## 0. Audience and assumptions + +This document targets a Claude Code agent (or human) setting up: + +- A **pnpm monorepo** (workspaces under `packages/*`, optional `examples/*`, optional `e2e`) +- Multiple **scoped public** npm packages under a single `@scope/*` namespace +- **Changesets** for versioning + CHANGELOG generation +- **GitHub Actions** for automated releases via `changesets/action` +- A single npm account that owns the scope (user account or org — both work) + +If the repo doesn't match all of this, adapt — but the **first-publish bootstrap** section +applies regardless, because that's an npm-side problem, not a tooling problem. + +--- + +## 1. The flow at a glance + +``` +Repo prep → Changesets init → CI wired up (manual trigger only) + → npm token + GitHub secrets + Actions permissions + → MANUAL FIRST PUBLISH from local laptop (the painful part) + → Verify packages live on npm + → Re-enable push-to-main trigger + → Done. Future releases run via the Version Packages PR flow. +``` + +The reason for the manual first publish is explained in section 7. Do not skip ahead; +trying to do first publish from CI **does not work** with default token permissions. + +--- + +## 2. Repository preparation + +### 2.1 Per-package `package.json` requirements + +Every package in `packages/*` that you intend to publish must have: + +```json +{ + "name": "@scope/package-name", + "version": "0.1.0", + "description": "...", + "license": "MIT", + "author": "Name (url)", + "repository": { + "type": "git", + "url": "https://github.com/owner/repo.git", + "directory": "packages/package-name" + }, + "homepage": "https://github.com/owner/repo/tree/main/packages/package-name#readme", + "bugs": { + "url": "https://github.com/owner/repo/issues" + }, + "type": "module", + "main": "./dist/index.cjs", + "module": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "require": "./dist/index.cjs" + } + }, + "files": ["dist"], + "publishConfig": { + "access": "public" + } +} +``` + +**Why each field matters:** + +| Field | Why | +|---|---| +| `repository.url` | npm shows "Repository" link on package page; required by GitHub Packages for owner verification. **Must end in `.git`.** npm CLI may auto-correct `https://...github.com/...` to `git+https://...github.com/.../.git` and emit a warning. | +| `repository.directory` | Tells npm which subdir the package lives in within the monorepo. | +| `publishConfig.access: "public"` | **Required for scoped packages.** Without this, the first publish silently tries to push as private, which fails on a free account with `402 Payment Required`. | +| `files: ["dist"]` | Limits the published tarball to the build output. npm always also includes `package.json`, `README*`, `LICENSE*`, `CHANGELOG*` regardless of this list. | +| `main` / `module` / `types` / `exports` | Standard dual-ESM/CJS shape if you build with tsup. Adjust if you use a different bundler. | + +> **GOTCHA:** Don't set `repository.url` without `.git` — npm's auto-correction +> works but pollutes every publish with a warning. Just include `.git` upfront. + +### 2.2 Per-package files that ship in every tarball + +Make sure each `packages//` contains: + +- `README.md` — **must be publish-quality.** Stub READMEs are what your npmjs.com + page will display. Include: title, badges, install, quick start, API reference, + related packages, license link. +- `LICENSE` — copy of the project's root LICENSE. Without this, the MIT text won't + ship in the published tarball even though `package.json` declares the license. +- `dist/` — generated by your build, **gitignored**. Don't commit it. + +> **GOTCHA:** If you previously had build output committed in `packages/*/src/` (e.g. +> tsup misconfigured to write to `src/` instead of `dist/`), purge those stray +> `*.js` / `*.d.ts` / `*.map` files. They confuse tsup on rebuild and bloat tarballs. +> Add this to `.gitignore`: +> ``` +> packages/*/src/**/*.js +> packages/*/src/**/*.js.map +> packages/*/src/**/*.d.ts +> packages/*/src/**/*.d.ts.map +> ``` + +### 2.3 Root `package.json` + +Mark it private so it can never be accidentally published: + +```json +{ + "name": "your-monorepo-name", + "version": "0.1.0", + "private": true, + ... +} +``` + +The root is the workspace orchestrator (turbo, biome, changesets, vitest), not a +publishable package. + +### 2.4 Private packages (examples, e2e tests) + +Anything in `examples/*` and `e2e/` that imports from your packages should be +marked `"private": true`. Changesets respects this and skips them. Verify: + +```bash +for f in examples/*/package.json e2e/package.json; do + echo -n "$f: "; grep -c '"private"' "$f" +done +``` + +--- + +## 3. Changesets initialization + +### 3.1 Install + init + +If `@changesets/cli` isn't already in root `devDependencies`: + +```bash +pnpm add -D -w @changesets/cli @changesets/changelog-github +pnpm changeset init +``` + +This creates `.changeset/config.json` and `.changeset/README.md`. + +### 3.2 Configure `.changeset/config.json` + +The default config is restricted-access (private). Update to: + +```json +{ + "$schema": "https://unpkg.com/@changesets/config@3.1.4/schema.json", + "changelog": [ + "@changesets/changelog-github", + { "repo": "owner/repo" } + ], + "commit": false, + "fixed": [], + "linked": [], + "access": "public", + "baseBranch": "main", + "updateInternalDependencies": "patch", + "ignore": [] +} +``` + +**Field notes:** + +- `changelog` with `@changesets/changelog-github` produces CHANGELOG entries that + hyperlink PRs and contributors. Replace `owner/repo` with your actual repo. +- `access: "public"` is **redundant with per-package `publishConfig`** but + defends against omissions; both should be set. +- `linked: []` means each package versions independently. If you want all packages + to bump together, use `linked: [["@scope/*"]]` — but only if they truly should + always release in lockstep. +- `ignore: []` — leave empty. Private packages are auto-skipped, you don't need + to list them here. + +### 3.3 Add release scripts to root `package.json` + +```json +{ + "scripts": { + "changeset": "changeset", + "version-packages": "changeset version", + "release": "turbo run build && changeset publish" + } +} +``` + +`turbo run build` ensures `dist/` is fresh before publishing. If you don't use +turbo, swap for whatever your build command is. + +--- + +## 4. GitHub Actions release workflow + +### 4.1 Create `.github/workflows/release.yml` + +> **GOTCHA:** Start with `workflow_dispatch` only. Do **not** include +> `push: branches: [main]` until after the first manual publish (section 7). +> Otherwise every push to main will attempt to auto-publish and fail with +> 404s for the entire setup phase. + +```yaml +name: Release + +on: + workflow_dispatch: + # Add `push: branches: [main]` AFTER the first manual publish succeeds. + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +env: + NODE_VERSION: 22 + +jobs: + release: + name: Release + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + id-token: write + packages: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup pnpm + uses: pnpm/action-setup@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: 'pnpm' + registry-url: 'https://registry.npmjs.org' + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build packages + run: pnpm build + + - name: Create release PR or publish to npm + id: changesets + uses: changesets/action@v1 + with: + publish: pnpm release + version: pnpm version-packages + commit: 'chore(release): version packages' + title: 'chore(release): version packages' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NPM_TOKEN: ${{ secrets.NPM_TOKEN }} + NPM_CONFIG_PROVENANCE: 'true' + + # Optional: mirror to GitHub Packages so the repo sidebar shows them. + # Remove this step entirely if you don't care about the sidebar. + - name: Mirror published packages to GitHub Packages + if: steps.changesets.outputs.published == 'true' + env: + NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PUBLISHED_PACKAGES: ${{ steps.changesets.outputs.publishedPackages }} + run: | + cat > .npmrc < $dir" + (cd "$dir" && npm publish --registry=https://npm.pkg.github.com) + done +``` + +**Replace `@scope/prefix-` in the mirror step** with whatever maps your package +name to its directory. For `@reaatech/a2a-reference-core` in `packages/core/`, +that's `@reaatech/a2a-reference-`. + +### 4.2 Permissions explainer + +| Permission | Why | +|---|---| +| `contents: write` | changesets/action commits version bumps and creates the Version Packages PR | +| `pull-requests: write` | Same — opening/updating the Version Packages PR | +| `id-token: write` | Required for npm provenance (OIDC signing) | +| `packages: write` | Required for the GitHub Packages mirror step | + +--- + +## 5. npm account setup + +### 5.1 Account state + +You publish under a scope. The scope must match either: +- An npm **user** account name (e.g., username `reaatech` → `@reaatech/...`), or +- An npm **organization** name (created at npmjs.com/org/create — free for unlimited public packages) + +Either works the same way. **You do not need an org.** If your username is the +scope you want, you're set. + +### 5.2 2FA configuration — **read this carefully** + +Enable 2FA. There is no excuse not to for accounts publishing public libraries +(supply chain attacks). But the configuration here matters for CI/CD. + +**Two settings on the npm 2FA page:** + +1. **2FA enrollment method** (Security Key vs Authenticator App) +2. **"Require 2FA for write actions"** checkbox + +> **GOTCHA — the most painful one in this whole setup:** +> +> The npm CLI cannot use security keys (WebAuthn) for publish operations. +> `npm publish` requires either a TOTP code from an authenticator app, or +> a single-use recovery code. +> +> If you only enroll a security key, you have **no way** to type a 6-digit +> code at the npm OTP prompt. You'll be stuck using recovery codes, each +> of which is single-use. +> +> **Recommendation: enroll BOTH a security key AND a TOTP authenticator app** +> (1Password TOTP, Authy, Google Authenticator). The security key handles +> browser-based auth (web UI, npm login), the TOTP handles CLI publish prompts. +> +> If you've already enrolled only a security key and the npm UI doesn't show +> an option to add a TOTP authenticator, you may need to disable 2FA and +> re-enable starting with TOTP, then add the security key alongside. + +**"Require 2FA for write actions" — uncheck this.** When enabled, every publish +(including from automation tokens) requires interactive 2FA, which CI cannot +provide. With it disabled, you're in **"Authorization only"** mode: 2FA protects +account login, but tokens can publish without OTP. This is the standard config +for solo maintainers with CI-driven releases. + +### 5.3 Generate the npm token + +Go to https://www.npmjs.com/settings//tokens → **Generate New Token** → +**Granular Access Token**. + +| Setting | Value | +|---|---| +| Token name | ` CI` (e.g., `a2a-reference-ts CI`) | +| Expiration | 1 year (set a calendar reminder to rotate) | +| Allowed IP ranges | Blank | +| Packages and scopes | **All packages and scopes** with **Read and write** | +| Bypass 2FA | Leave **unchecked** (only check this if "Require 2FA for write" is on, and that requires an IP allowlist) | + +> **GOTCHA — token scope choice:** +> +> The "Only select packages and scopes" option restricts the token to packages +> **already in the scope**. It cannot create new packages. For first publish, +> this fails with E404. Use "All packages and scopes" instead — it can create +> new packages and write to anything you own. +> +> After first publish completes, you can regenerate with the tighter +> "Only select packages and scopes" option if you prefer least-privilege. + +> **GOTCHA — classic tokens are gone for many accounts:** +> +> npm has been deprecating classic tokens. If you don't see a "Classic Token" +> option, you only get granular tokens. That's fine — granular with "All +> packages and scopes" works. + +Copy the `npm_…` value immediately; npm only shows it once. + +--- + +## 6. GitHub repository configuration + +### 6.1 Add the npm token as a secret + +https://github.com/owner/repo/settings/secrets/actions + +- Click **New repository secret** (use repository, not environment, secret — + see why in section 6.3) +- Name: **`NPM_TOKEN`** (exact spelling) +- Value: paste the `npm_…` token + +### 6.2 Enable Actions to write to the repo and create PRs + +https://github.com/owner/repo/settings/actions + +In the **Workflow permissions** section: +- Set radio to **"Read and write permissions"** +- Check **"Allow GitHub Actions to create and approve pull requests"** +- Save + +Without these, `changesets/action` cannot open the Version Packages PR. + +### 6.3 Repository secret vs environment secret + +Use a **repository secret** unless you have a specific need for environment +gating. Repository secrets are available to all workflows in the repo. +Environment secrets require defining a deployment environment and adding +`environment: ` to the workflow job — extra ceremony you don't need +for a solo public library. + +--- + +## 7. The first-publish bootstrap (the painful part) + +This is the section that took 3 hours to figure out. Read it carefully. + +### 7.1 Why CI cannot do the first publish + +When `changesets/action` runs with no pending changeset files, it falls through +to `changeset publish`. That command checks each non-private package: if its +local version isn't published on npm, it tries to publish. + +For a brand-new scope with **zero published packages**, npm rejects the publish +with `404 Not Found - PUT https://registry.npmjs.org/@scope/package-name`. +This 404 message is misleading — what npm is actually saying is "this package +doesn't exist and the token doesn't have permission to create it." + +Even with a granular token set to "All packages and scopes" with read/write, +**CI publishes can fail to create the first version of a brand-new package**. +This appears to be a registry-side gate that distinguishes interactive user +sessions from token-based machine sessions for first publishes. The behavior +isn't well documented; the workaround is reliable. + +### 7.2 The bootstrap procedure + +You will publish each package **once** from your local laptop using your full +account credentials. After that, CI takes over for all subsequent versions. + +**Prerequisites:** +- All packages built (`pnpm build`) +- `git status` clean +- Logged into npm: `npm whoami` returns your username + +**Step 1: log in to npm CLI:** + +```bash +npm login +``` + +Follow the browser prompt — security key challenge fires, you complete it, +the CLI gets a session token. + +**Step 2: have OTP codes ready.** + +Every `npm publish` will prompt for an OTP. You need one **per publish** — +they're single-use. Get them from one of: + +- TOTP authenticator app (preferred): a fresh 6-digit code per publish +- npm recovery codes: 64-char hex strings, single-use, generated on the + npm 2FA settings page + +> **GOTCHA — recovery code format:** +> +> npm recovery codes are 64-character hex (e.g., +> `a5c41c1d7aa6f33650de2b5a0c3fd1b1ddc651638719b70888ae52afe46e6996`), +> NOT the `XXXXX-XXXXX` short format some tools use. Use them as-is with +> `--otp=`. + +> **GOTCHA — recovery code burn rate:** +> +> Each recovery code is single-use. If you have 7 packages to publish, +> you need 7 codes. Recovery codes come in batches of 5 or 10 — generate +> a fresh batch first if your remaining count is short. **Generating new +> codes invalidates the old set.** + +**Step 3: publish each package.** + +Use `pnpm publish` (not `npm publish` directly) so `workspace:*` deps get +rewritten to real semver: + +```bash +cd packages/ +pnpm publish --access public --no-git-checks --otp= +``` + +`--access public` is required for scoped packages on a free account (also set +via `publishConfig`, but pass it on the CLI as belt-and-suspenders). +`--no-git-checks` skips pnpm's pre-publish branch/tag checks (which require +specific git state that's irrelevant for first publish). + +Repeat for each of the 7 (or N) packages with a **different OTP each time**. + +> **GOTCHA — `pnpm -r publish` with one `--otp` flag does NOT work for >1 package:** +> +> Recovery codes are single-use. Passing `--otp=X` to `pnpm -r publish` uses +> the same code for every package; only the first succeeds and the rest fail +> with EOTP. Run `pnpm publish` per-package, each with its own code. + +> **GOTCHA — pnpm's `+ @` line is not a success guarantee:** +> +> When `pnpm publish` echoes `+ @scope/foo@0.1.0`, that means the **upload** +> happened, not that the registry accepted it. If npm responds with 4xx after +> tarball upload (E403, E404), the publish failed but pnpm's progress line +> still shows the package name. Always verify with `npm view` or curl. + +### 7.3 Verify the publishes + +```bash +for pkg in core server client auth ...; do + echo -n "@scope/prefix-$pkg: " + curl -s -o /dev/null -w "%{http_code}\n" https://registry.npmjs.org/@scope%2fprefix-$pkg +done +``` + +Expect `200` for each. Some may show `404` for **60–90 seconds** after publish +due to CDN propagation — re-check after waiting. If `npm view @scope/foo version` +returns the expected version, it's truly live regardless of what curl says. + +> **GOTCHA — `npm view` cache:** +> +> If `npm view` returns 404 right after publish, also try +> `npm view @scope/foo --registry=https://registry.npmjs.org/` to bypass any +> local npm cache. + +### 7.4 Mirror existing versions to GitHub Packages (one-time backfill) + +The release workflow's "Mirror published packages to GitHub Packages" step only +fires when **CI** publishes (it keys off `changesets/action`'s `publishedPackages` +output). Your manual first publishes from local in section 7.2 went only to +npmjs.com, so the GitHub repo's **Packages** sidebar will be empty. + +To backfill the existing versions to GitHub Packages, do a one-time local mirror. +All future versions will mirror automatically via CI. + +**Step 1: refresh `gh` token with packages scopes.** + +The default `gh auth login` token does **not** include `write:packages`. Refresh +with the additional scopes: + +```bash +gh auth refresh -h github.com -s write:packages,read:packages +``` + +This prints a one-time code, opens your browser, you confirm. Returns when done. + +> **GOTCHA — `gh auth refresh` is interactive only:** +> +> The refresh command needs a browser/device flow and won't run inside a +> non-interactive shell (Claude Code's bash, automation scripts, etc.). Run it +> in your own terminal. + +> **GOTCHA — CI doesn't have this problem:** +> +> The release workflow's `GITHUB_TOKEN` automatically has `write:packages` +> because the workflow's `permissions:` block declares `packages: write`. +> Only the local backfill needs the manual `gh auth refresh`. + +**Step 2: publish each package to `npm.pkg.github.com`.** + +```bash +TOKEN=$(gh auth token) +cat > .npmrc.gh < **GOTCHA — `repository.url` must point to the same owner:** +> +> GitHub Packages refuses publishes when `package.json`'s `repository.url` +> points to a repo owned by someone else. The `directory` subfield is fine; +> only the owner check matters. If you skipped section 2.1's `repository` +> field setup, you'll get a 422 here. + +**Step 3: verify on GitHub.** + +```bash +gh api "/users//packages?package_type=npm" --jq '.[].name' +``` + +Expect each package name listed. They'll appear in the repo sidebar at +`https://github.com//` within a few seconds. + +For an organization-owned repo, swap `/users/` → `/orgs/`. + +### 7.5 Re-enable the auto-publish trigger + +Once all packages are live on npm, edit `.github/workflows/release.yml`: + +```yaml +on: + push: + branches: [main] + workflow_dispatch: +``` + +Commit and push. The next push to `main` will run the workflow. With no pending +changesets and all packages already at the published versions, `changeset publish` +sees nothing to do and the workflow no-ops successfully. Future releases publish +to npm and auto-mirror to GitHub Packages in the same workflow run. + +--- + +## 8. Day-to-day release flow + +After bootstrap is complete, the standard flow is: + +```bash +# while working on a feature +pnpm changeset # interactive: pick packages, bump type, summary +git add .changeset/*.md +git commit -m "feat: ..." +git push # opens your PR + +# after PR merges to main: +# - GH Actions opens (or updates) a "Version Packages" PR automatically +# - review the version bumps + auto-generated CHANGELOGs +# - merge it → packages publish to npm + mirror to GitHub Packages +``` + +No manual OTPs. No laptop dependency. CI handles everything. + +--- + +## 9. Verification checklist + +Before declaring "done," confirm: + +- [ ] `pnpm install` clean (no warnings about `workspace:*` deps) +- [ ] `pnpm typecheck` passes +- [ ] `pnpm test` passes +- [ ] `pnpm build` produces `dist/` in each package, no stray output in `src/` +- [ ] `git status` clean +- [ ] All publishable packages have `repository`, `homepage`, `bugs`, `license`, + `author`, `publishConfig.access`, and a copied `LICENSE` file +- [ ] All examples/e2e marked `"private": true` +- [ ] Root `package.json` marked `"private": true` +- [ ] `.changeset/config.json` has `access: "public"` and your repo in the changelog config +- [ ] `NPM_TOKEN` secret set in GitHub +- [ ] GitHub Actions permissions: read/write + allow PR creation +- [ ] All packages return `200` from `https://registry.npmjs.org/@scope%2fpkg` +- [ ] `release.yml` trigger is `push: branches: [main]` (after first publish) +- [ ] First test push to main runs the workflow as a no-op + +--- + +## 10. Troubleshooting + +### `404 Not Found - PUT` during CI publish + +The token cannot create new packages. Either: +- Token is "Only select packages and scopes" → regenerate with "All packages and scopes" +- Or this is a brand-new scope and CI cannot bootstrap → do manual first publish + +### `EOTP — This operation requires a one-time password` + +`npm publish` from CLI is asking for OTP. You need to pass `--otp=` with a +TOTP code or recovery code. Security key alone is not sufficient at the CLI. + +### `403 You cannot publish over the previously published versions` + +That version already exists on npm. Either: +- The previous publish succeeded silently (check with `npm view`) +- Or you need to bump the version first (`pnpm changeset` + version PR) + +### `402 Payment Required` + +Scoped package published as private without `--access public`. Add +`"publishConfig": { "access": "public" }` to `package.json` and republish a new +version (you can't change a published version's access). + +### `npm error code E401` / `ENEEDAUTH` in CI + +The `NPM_TOKEN` secret is missing, expired, or doesn't have write access. Verify +in repo settings, regenerate if needed. + +### Workflow can't open Version Packages PR + +GitHub Actions doesn't have permission. Settings → Actions → General → Workflow +permissions → "Read and write" + "Allow GitHub Actions to create and approve PRs." + +### `pnpm -r publish` says "+ " but `npm view` returns 404 + +Either CDN propagation lag (wait 60–90 seconds) or the upload happened but the +registry rejected after. Try `npm view --registry=https://registry.npmjs.org/`. +If still 404, re-publish the same version — if it returns E403 "cannot publish +over previously published," the package is actually live. + +### Recovery codes look like 64-char hex, not `XXXXX-XXXXX` + +That's correct. Use them as-is with `--otp=`. + +### Stale `.js` / `.d.ts` files in `packages/*/src/` + +Old tsup output that wrote into `src/` instead of `dist/`. Delete them and add +the gitignore patterns from section 2.2. + +### `403 permission_denied: The token provided does not match expected scopes` on GitHub Packages publish + +The `gh` token doesn't have `write:packages`. Refresh with: +```bash +gh auth refresh -h github.com -s write:packages,read:packages +``` +This is local-only — CI's `GITHUB_TOKEN` is fine because the workflow's +`permissions:` block grants `packages: write`. + +### `422 Unprocessable Entity` on GitHub Packages publish + +`package.json`'s `repository.url` points to a different GitHub owner than the +one publishing. Make sure `repository.url` points to a repo owned by the same +account/org as the package scope. + +### GitHub repo sidebar "Packages" section is empty after CI publish + +Either: +- The mirror step hasn't run yet — check that the workflow has the + `Mirror published packages to GitHub Packages` step and `packages: write` in + permissions +- The mirror step's package-name → directory mapping is wrong (e.g., + `packages/${name#@scope/prefix-}` doesn't match your naming) +- The CI run published nothing (`steps.changesets.outputs.published == 'true'` + was false), so the mirror step was correctly skipped + +If you've published versions only manually to npm and never via CI, you need +the one-time backfill from section 7.4 — the mirror step keys off CI publishes. + +--- + +## 11. Quick command reference + +```bash +# Build everything +pnpm build + +# Run tests + typecheck + lint +pnpm test && pnpm typecheck && pnpm lint + +# Add a changeset (interactive) +pnpm changeset + +# Bump versions per pending changesets +pnpm version-packages + +# Local publish (per package, with OTP) +cd packages/ +pnpm publish --access public --no-git-checks --otp= + +# Verify a package is live +curl -s -o /dev/null -w "%{http_code}\n" https://registry.npmjs.org/@scope%2fpkg +npm view @scope/pkg version + +# List all published packages in your scope +npm access list packages @scope + +# Refresh local gh token to allow GitHub Packages publishing (one-time) +gh auth refresh -h github.com -s write:packages,read:packages + +# Manually mirror a package to GitHub Packages (one-time backfill) +TOKEN=$(gh auth token) +cd packages/ +echo "@scope:registry=https://npm.pkg.github.com +//npm.pkg.github.com/:_authToken=${TOKEN}" > /tmp/.npmrc.gh +NPM_CONFIG_USERCONFIG=/tmp/.npmrc.gh \ + npm publish --registry=https://npm.pkg.github.com +rm /tmp/.npmrc.gh + +# List packages on GitHub +gh api "/users//packages?package_type=npm" --jq '.[].name' +``` + +--- + +## 12. References + +- Changesets: https://github.com/changesets/changesets +- changesets/action: https://github.com/changesets/action +- npm scoped packages: https://docs.npmjs.com/about-scopes +- npm provenance: https://docs.npmjs.com/generating-provenance-statements +- pnpm publish: https://pnpm.io/cli/publish diff --git a/README.md b/README.md index fe292a3..0c03d30 100644 --- a/README.md +++ b/README.md @@ -1,74 +1,95 @@ # llm-cache -

- version - CI - license - node - pnpm -

+[![CI](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml/badge.svg)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) +[![TypeScript](https://img.shields.io/badge/TypeScript-5.4-blue)](https://www.typescriptlang.org/) -Semantic caching layer for LLM calls — embedding-based similarity matching, not exact-match alone. Reduce latency, cut API costs, and maintain consistent responses across semantically equivalent prompts. +> Semantic caching layer for LLM calls — embedding-based similarity matching with model-aware fingerprinting, use-case segmentation, adaptive TTL, and cost tracking. Reduce latency, cut API costs, and maintain consistent responses across semantically equivalent prompts. + +This monorepo provides the core caching engine, pluggable storage adapters, an HTTP server wrapper, and supporting packages for cost tracking and observability. ## Features -- **Exact-match cache** — SHA-256 hash of the full prompt for zero-latency cache hits -- **Semantic cache** — Embed prompts and search for similar cached entries above a configurable cosine similarity threshold -- **Cache fingerprinting** — Model, temperature, top_p, system prompt, and tools are hashed together so different generation configurations never collide +- **Exact-match cache** — SHA-256 hash of the full prompt for sub-millisecond cache hits +- **Semantic cache** — Embed prompts and search for similar entries above a configurable cosine similarity threshold +- **Generation config fingerprinting** — Model, temperature, top_p, system prompt, and tools are hashed so different configurations never collide - **Multi-adapter storage** — Pluggable backends for metadata (Memory, Redis, DynamoDB) and vector search (Memory, Qdrant) -- **Use case segmentation** — Isolate caches by use case (e.g., summarization vs. classification) to prevent cross-contamination -- **Cost tracking** — Built-in pricing for 40+ models across OpenAI, Anthropic, and Google; calculate dollars saved per cache hit +- **Use-case segmentation** — Isolate caches by use case to prevent cross-contamination - **Adaptive TTL** — Factual queries expire faster than creative ones; sensitive data gets the shortest TTL -- **Observability** — Structured JSON logging, Prometheus-compatible metrics, and optional distributed tracing support +- **Cost tracking** — Built-in pricing for 40+ models across OpenAI, Anthropic, and Google with savings calculation +- **Observability** — Structured JSON logging with automatic PII redaction and Prometheus-compatible metrics - **Encryption-ready** — AES-256-GCM for prompts, responses, and embeddings at the storage layer -- **HTTP server** — Optional REST API wrapper for polyglot and service-oriented architectures +- **HTTP server** — REST API wrapper for polyglot and service-oriented architectures ## Installation +### Using the packages + +Packages are published under the `@reaatech` scope and can be installed individually: + ```bash # Core library (required) -pnpm add @llm-cache/core +pnpm add @reaatech/llm-cache -# Storage adapters (optional — pick what you need) -pnpm add @llm-cache/adapters-redis # Redis for exact-match metadata -pnpm add @llm-cache/adapters-dynamodb # DynamoDB for exact-match metadata -pnpm add @llm-cache/adapters-qdrant # Qdrant for vector search +# Storage adapters (pick what you need) +pnpm add @reaatech/llm-cache-adapters-redis # Redis for exact-match metadata +pnpm add @reaatech/llm-cache-adapters-dynamodb # DynamoDB for exact-match metadata +pnpm add @reaatech/llm-cache-adapters-qdrant # Qdrant for vector search # Utilities (optional) -pnpm add @llm-cache/cost-tracker # Cost calculation and pricing data -pnpm add @llm-cache/observability # Metrics, logging, and tracing -pnpm add @llm-cache/server # HTTP server wrapper +pnpm add @reaatech/llm-cache-cost-tracker # Cost calculation and pricing data +pnpm add @reaatech/llm-cache-observability # Metrics, logging, and tracing +pnpm add @reaatech/llm-cache-server # HTTP server wrapper ``` -**Requirements:** Node.js >= 20.0.0, pnpm >= 8.0.0 +### Contributing -## Quick Start +```bash +# Clone the repository +git clone https://github.com/reaatech/llm-cache.git +cd llm-cache + +# Install dependencies +pnpm install + +# Build all packages +pnpm build + +# Run the test suite +pnpm test -### Library Usage +# Run linting +pnpm lint + +# Run type check +pnpm typecheck +``` + +## Quick Start ```typescript -import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@llm-cache/core'; +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from "@reaatech/llm-cache"; const cache = new CacheEngine({ storage: new InMemoryAdapter(), vectorStorage: new InMemoryAdapter(), embedder: new OpenAIEmbedder({ - provider: 'openai', - model: 'text-embedding-3-small', + provider: "openai", + model: "text-embedding-3-small", dimensions: 1536, apiKey: process.env.OPENAI_API_KEY, }), config: { - storage: { adapter: 'memory' }, - vectorStorage: { adapter: 'memory' }, + storage: { adapter: "memory" }, + vectorStorage: { adapter: "memory" }, embedding: { - provider: 'openai', - model: 'text-embedding-3-small', + provider: "openai", + model: "text-embedding-3-small", dimensions: 1536, batchSize: 100, maxRetries: 3, }, - similarity: { threshold: 0.8, metric: 'cosine', maxResults: 10 }, + similarity: { threshold: 0.8, metric: "cosine", maxResults: 10 }, ttl: { default: 3600, factual: 1800, @@ -77,59 +98,57 @@ const cache = new CacheEngine({ sensitive: 600, byUseCase: {}, }, - segmentation: { enabled: true, defaultUseCase: 'general' }, - cost: { enabled: true, currency: 'USD' }, - observability: { metrics: true, tracing: false, logging: 'info' }, + segmentation: { enabled: true, defaultUseCase: "general" }, + cost: { enabled: true, currency: "USD" }, + observability: { metrics: true, tracing: false, logging: "info" }, }, }); // Store a response await cache.set( - 'What is TypeScript?', - { choices: [{ message: { content: 'A typed superset of JavaScript' } }] }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, + "What is TypeScript?", + { choices: [{ message: { content: "A typed superset of JavaScript" } }] }, + { model: "gpt-4", modelVersion: "gpt-4-0613" }, ); // Exact match — < 1ms -const exact = await cache.get('What is TypeScript?', { - model: 'gpt-4', - modelVersion: 'gpt-4-0613', +const exact = await cache.get("What is TypeScript?", { + model: "gpt-4", + modelVersion: "gpt-4-0613", }); -// { hit: true, type: 'exact', entry: {...} } +// → { hit: true, type: "exact", entry: {...} } // Semantic match — < 50ms with Qdrant -const semantic = await cache.get('Tell me about TypeScript', { - model: 'gpt-4', - modelVersion: 'gpt-4-0613', +const semantic = await cache.get("Tell me about TypeScript", { + model: "gpt-4", + modelVersion: "gpt-4-0613", }); -// { hit: true, type: 'semantic', confidence: 0.92, entry: {...} } - -// Miss — forward to your LLM provider, then cache the response -const miss = await cache.get('What is Rust?', { - model: 'gpt-4', - modelVersion: 'gpt-4-0613', -}); -// { hit: false } +// → { hit: true, type: "semantic", confidence: 0.92, entry: {...} } ``` ### Server Usage (Docker) ```bash -# Start Qdrant + Redis + cache server docker compose up -# Check cache curl -X POST http://localhost:3000/cache/get \ - -H 'Content-Type: application/json' \ + -H "Content-Type: application/json" \ -d '{"prompt": "What is TypeScript?", "options": {"model": "gpt-4", "modelVersion": "gpt-4-0613"}}' - -# Store a response -curl -X POST http://localhost:3000/cache/set \ - -H 'Content-Type: application/json' \ - -d '{"prompt": "What is TypeScript?", "response": {"choices": [{"message": {"content": "A typed superset of JavaScript"}}]}, "options": {"model": "gpt-4", "modelVersion": "gpt-4-0613"}}' ``` -For end-to-end examples with Redis, Qdrant, and DynamoDB, see the [`examples/`](examples/) directory. +See the [`examples/`](examples/) directory for end-to-end examples with Redis, Qdrant, and DynamoDB. + +## Packages + +| Package | Description | +| ------- | ----------- | +| [`@reaatech/llm-cache`](./packages/core) | Core caching engine, adapters (InMemory), embedder (OpenAI), similarity matcher, and all shared types | +| [`@reaatech/llm-cache-adapters-redis`](./packages/adapters/redis) | Redis storage adapter with automatic TTL, connection pooling, and batch operations | +| [`@reaatech/llm-cache-adapters-dynamodb`](./packages/adapters/dynamodb) | DynamoDB adapter with native TTL, GSIs for metadata queries, and batch operations | +| [`@reaatech/llm-cache-adapters-qdrant`](./packages/adapters/qdrant) | Qdrant vector database adapter for low-latency semantic search via HNSW | +| [`@reaatech/llm-cache-cost-tracker`](./packages/cost-tracker) | Cost calculator with built-in pricing for 40+ models and savings computation | +| [`@reaatech/llm-cache-observability`](./packages/observability) | Structured JSON logger with PII redaction and Prometheus metrics collector | +| [`@reaatech/llm-cache-server`](./packages/server) | HTTP server wrapper with configurable storage and vector adapters | ## Architecture @@ -157,108 +176,29 @@ CacheEngine 3. **Semantic search** — Embed the prompt, query the vector store for similar entries above the configured threshold (< 50ms with Qdrant) 4. **Cache miss** — Forward to your LLM provider, then store the result for future hits -## Packages - -| Package | npm | Description | -|---|---|---| -| `@llm-cache/core` | [![npm](https://img.shields.io/badge/npm-core-blue)](https://www.npmjs.com/package/@llm-cache/core) | CacheEngine, adapters (InMemory), embedder (OpenAI), similarity matcher, and all shared types | -| `@llm-cache/adapters-redis` | [![npm](https://img.shields.io/badge/npm-redis-blue)](https://www.npmjs.com/package/@llm-cache/adapters-redis) | Redis storage adapter with connection pooling, SETEX TTL, and key-space scanning | -| `@llm-cache/adapters-dynamodb` | [![npm](https://img.shields.io/badge/npm-dynamodb-blue)](https://www.npmjs.com/package/@llm-cache/adapters-dynamodb) | DynamoDB adapter with native TTL, GSIs for useCase and modelVersion queries, batch operations | -| `@llm-cache/adapters-qdrant` | [![npm](https://img.shields.io/badge/npm-qdrant-blue)](https://www.npmjs.com/package/@llm-cache/adapters-qdrant) | Qdrant vector database adapter for low-latency semantic search via HNSW | -| `@llm-cache/cost-tracker` | [![npm](https://img.shields.io/badge/npm-cost--tracker-blue)](https://www.npmjs.com/package/@llm-cache/cost-tracker) | Cost calculator with built-in pricing for 40+ models (OpenAI, Anthropic, Google) | -| `@llm-cache/observability` | [![npm](https://img.shields.io/badge/npm-observability-blue)](https://www.npmjs.com/package/@llm-cache/observability) | Structured JSON logger, Prometheus metrics collector, optional tracing hooks | -| `@llm-cache/server` | [![npm](https://img.shields.io/badge/npm-server-blue)](https://www.npmjs.com/package/@llm-cache/server) | HTTP server wrapper with configurable storage and vector adapters | - ## Configuration -### Environment Variables +See [`.env.example`](.env.example) for the full annotated configuration reference. Core environment variables: | Variable | Description | Default | -|---|---|---| +|----------|-------------|---------| | `OPENAI_API_KEY` | OpenAI API key for embeddings | — | -| `OPENAI_ORGANIZATION` | OpenAI organization ID (optional) | — | -| `EMBEDDING_PROVIDER` | Embedding provider (`openai`) | `openai` | -| `OPENAI_EMBEDDING_MODEL` | Embedding model name | `text-embedding-3-small` | -| `OPENAI_EMBEDDING_DIMENSIONS` | Embedding vector dimensions | `1536` | -| `EMBEDDING_BATCH_SIZE` | Max prompts per embedding API call | `100` | -| `EMBEDDING_MAX_RETRIES` | Max retries on embedding API failures | `3` | -| `STORAGE_ADAPTER` | Metadata storage backend | `memory` | -| `REDIS_URL` | Redis connection URL (e.g. `redis://localhost:6379`) | — | -| `DYNAMODB_REGION` | AWS region for DynamoDB | — | -| `DYNAMODB_TABLE` | DynamoDB table name | — | -| `DYNAMODB_ENDPOINT` | DynamoDB endpoint override (local dev) | — | -| `VECTOR_STORAGE_ADAPTER` | Vector search backend | `memory` | -| `QDRANT_URL` | Qdrant server URL | — | -| `QDRANT_COLLECTION` | Qdrant collection name | `llm-cache` | -| `QDRANT_API_KEY` | Qdrant API key (optional) | — | +| `STORAGE_ADAPTER` | Metadata storage backend (`memory`, `redis`, `dynamodb`) | `memory` | +| `VECTOR_STORAGE_ADAPTER` | Vector search backend (`memory`, `qdrant`) | `memory` | | `SIMILARITY_THRESHOLD` | Cosine similarity threshold (0.0–1.0) | `0.8` | -| `SIMILARITY_MAX_RESULTS` | Max results from semantic search | `10` | | `TTL_DEFAULT` | Default cache TTL in seconds | `3600` | -| `TTL_FACTUAL` | TTL for factual queries | `1800` | -| `TTL_CREATIVE` | TTL for creative queries | `7200` | -| `TTL_ANALYTICAL` | TTL for analytical queries | `3600` | -| `TTL_SENSITIVE` | TTL for sensitive data | `600` | -| `SEGMENTATION_ENABLED` | Enable use-case-based cache isolation | `true` | -| `DEFAULT_USE_CASE` | Default use case when none specified | `general` | -| `COST_TRACKING_ENABLED` | Enable cost savings calculation | `true` | -| `COST_CURRENCY` | Currency for cost reporting | `USD` | -| `METRICS_ENABLED` | Enable Prometheus metrics collection | `true` | -| `TRACING_ENABLED` | Enable distributed tracing hooks | `false` | -| `LOG_LEVEL` | Log level (`error`, `warn`, `info`, `debug`) | `info` | | `LLM_CACHE_API_KEY` | API key for server authentication | — | | `PORT` | HTTP server port | `3000` | -| `MAX_BODY_BYTES` | Max request body size (bytes) | `1048576` | - -See [`.env.example`](.env.example) for the full annotated configuration reference. - -### Similarity Threshold Tuning - -| Threshold | Behavior | Recommended For | -|---|---|---| -| `0.95+` | Near-identical matches only | Strict fact retrieval, legal text | -| `0.85–0.94` | Close paraphrases | Q&A, documentation search | -| `0.75–0.84` | Semantically related | Summarization, creative writing | -| `0.70–0.74` | Loosely related | Brainstorming, exploration | ## Operational Notes -- **DynamoDB TTL** — Enable native TTL on the `expiresAtEpoch` attribute (override via the `ttlAttribute` adapter option). Without it, expired rows accumulate indefinitely. -- **Qdrant eviction** — The adapter does not auto-evict expired points. Run `cache.invalidate({ olderThan })` periodically to clean up. -- **Server authentication** — Set `LLM_CACHE_API_KEY` before exposing the server beyond a trusted network. Without it, all `/cache/*` endpoints are unauthenticated. -- **Pricing data** — Pricing in `@llm-cache/cost-tracker` is provided as reference and may lag provider price changes. Verify against your provider before relying on it for billing. -- **Redis SCAN queries** — `findByUseCase`, `findByModelVersion`, and `invalidateByCriteria` walk the keyspace via `SCAN` (O(N)). Avoid calling them on hot paths; run from background jobs or deploy Redis Stack with RediSearch. - -## Development - -```bash -# Install dependencies -pnpm install - -# Build all packages -pnpm build - -# Run all tests -pnpm test - -# Run tests with coverage -pnpm test:coverage - -# Run tests for a specific package -pnpm --filter @llm-cache/core test +- **DynamoDB TTL** — Enable native TTL on the `expiresAtEpoch` attribute (override via the `ttlAttribute` adapter option). +- **Qdrant eviction** — The adapter does not auto-evict expired points. Run `cache.invalidate({ olderThan })` periodically. +- **Server authentication** — Set `LLM_CACHE_API_KEY` before exposing the server beyond a trusted network. +- **Pricing data** — Pricing in `@reaatech/llm-cache-cost-tracker` is provided as reference and may lag provider price changes. +- **Redis SCAN queries** — `findByUseCase`, `findByModelVersion`, and `invalidateByCriteria` walk the keyspace via `SCAN` (O(N)). Avoid calling on hot paths. -# Lint all packages -pnpm lint -pnpm lint:fix - -# Type-check all packages -pnpm typecheck - -# Format code -pnpm format -pnpm format:check -``` - -### Project Structure +## Project Structure ``` llm-cache/ @@ -274,23 +214,19 @@ llm-cache/ ├── examples/ # Usage examples (basic, Redis, Qdrant) ├── skills/ # AI agent development skills ├── docker-compose.yml # Local development stack (Qdrant + Redis + server) -├── tsconfig.json # Root TypeScript configuration (strict, ESNext) +├── tsconfig.json # Root TypeScript configuration ├── pnpm-workspace.yaml # pnpm workspace definition └── .github/workflows/ # CI/CD pipelines ``` -## Contributing - -Contributions are welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on commit conventions, testing requirements, and the pull request process. This project follows the [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html) code of conduct. - -For security vulnerabilities, please report directly via [GitHub Security Advisories](https://github.com/reaatech/llm-cache/security/advisories/new) rather than opening a public issue. See [SECURITY.md](SECURITY.md) for the full disclosure policy. - ## Documentation -- [ARCHITECTURE.md](ARCHITECTURE.md) — System design, data flow, and component interfaces -- [DEV_PLAN.md](DEV_PLAN.md) — Development roadmap and milestones -- [AGENTS.md](AGENTS.md) — AI agent development framework and skill definitions +- [`ARCHITECTURE.md`](ARCHITECTURE.md) — System design, data flow, and component interfaces +- [`DEV_PLAN.md`](DEV_PLAN.md) — Development roadmap and milestones +- [`CONTRIBUTING.md`](CONTRIBUTING.md) — Contribution workflow and release process +- [`AGENTS.md`](AGENTS.md) — AI agent development framework and skill definitions +- [`SECURITY.md`](SECURITY.md) — Vulnerability reporting and security best practices ## License -MIT © [llm-cache contributors](https://github.com/reaatech/llm-cache/graphs/contributors) +[MIT](LICENSE) diff --git a/biome.json b/biome.json new file mode 100644 index 0000000..d398bd8 --- /dev/null +++ b/biome.json @@ -0,0 +1,33 @@ +{ + "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", + "files": { + "ignore": ["dist", "node_modules", "coverage"] + }, + "organizeImports": { + "enabled": true + }, + "linter": { + "enabled": true, + "rules": { + "recommended": true, + "suspicious": { + "noExplicitAny": "error" + }, + "style": { + "noNonNullAssertion": "error" + } + } + }, + "formatter": { + "enabled": true, + "indentStyle": "space", + "indentWidth": 2, + "lineWidth": 100 + }, + "javascript": { + "formatter": { + "quoteStyle": "single", + "trailingCommas": "all" + } + } +} diff --git a/examples/basic-usage.ts b/examples/basic-usage.ts index 899db26..bacab89 100644 --- a/examples/basic-usage.ts +++ b/examples/basic-usage.ts @@ -1,5 +1,5 @@ -import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@llm-cache/core'; -import type { CacheConfig } from '@llm-cache/core'; +import type { CacheConfig } from '@reaatech/llm-cache'; +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@reaatech/llm-cache'; const config: CacheConfig = { storage: { adapter: 'memory' }, @@ -63,7 +63,7 @@ async function main() { await cache.set( 'What is TypeScript?', { choices: [{ message: { content: 'TypeScript is a typed superset of JavaScript.' } }] }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); // Exact match @@ -82,7 +82,7 @@ async function main() { }); console.log( 'Semantic match:', - semantic.hit ? `${semantic.type} hit (confidence: ${semantic.confidence})` : 'miss' + semantic.hit ? `${semantic.type} hit (confidence: ${semantic.confidence})` : 'miss', ); // Miss diff --git a/examples/package.json b/examples/package.json index a6a9dcb..a606618 100644 --- a/examples/package.json +++ b/examples/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/examples", + "name": "@reaatech/llm-cache-examples", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "private": true, @@ -8,10 +8,10 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "@llm-cache/core": "workspace:*", - "@llm-cache/adapters-redis": "workspace:*", - "@llm-cache/adapters-qdrant": "workspace:*", - "@llm-cache/adapters-dynamodb": "workspace:*" + "@reaatech/llm-cache": "workspace:*", + "@reaatech/llm-cache-adapters-redis": "workspace:*", + "@reaatech/llm-cache-adapters-qdrant": "workspace:*", + "@reaatech/llm-cache-adapters-dynamodb": "workspace:*" }, "devDependencies": { "@types/node": "^20.11.0", diff --git a/examples/qdrant-example.ts b/examples/qdrant-example.ts index 10ec49f..64c27ff 100644 --- a/examples/qdrant-example.ts +++ b/examples/qdrant-example.ts @@ -1,6 +1,6 @@ -import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@llm-cache/core'; -import { QdrantAdapter } from '@llm-cache/adapters-qdrant'; -import type { CacheConfig } from '@llm-cache/core'; +import type { CacheConfig } from '@reaatech/llm-cache'; +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@reaatech/llm-cache'; +import { QdrantAdapter } from '@reaatech/llm-cache-adapters-qdrant'; const config: CacheConfig = { storage: { adapter: 'memory' }, @@ -71,7 +71,7 @@ async function main() { await cache.set( 'Explain quantum computing', { answer: 'Quantum computing uses qubits...' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); const result = await cache.get('What is quantum computing?', { diff --git a/examples/redis-example.ts b/examples/redis-example.ts index a64a8ab..82dcb19 100644 --- a/examples/redis-example.ts +++ b/examples/redis-example.ts @@ -1,6 +1,6 @@ -import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@llm-cache/core'; -import { RedisAdapter } from '@llm-cache/adapters-redis'; -import type { CacheConfig } from '@llm-cache/core'; +import type { CacheConfig } from '@reaatech/llm-cache'; +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@reaatech/llm-cache'; +import { RedisAdapter } from '@reaatech/llm-cache-adapters-redis'; const config: CacheConfig = { storage: { adapter: 'redis' }, @@ -67,7 +67,7 @@ async function main() { await cache.set( 'What is Redis?', { answer: 'An in-memory data structure store' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); const result = await cache.get('What is Redis?', { model: 'gpt-4', modelVersion: 'gpt-4-0613' }); diff --git a/package.json b/package.json index 2f1ad69..e49ed8d 100644 --- a/package.json +++ b/package.json @@ -16,40 +16,29 @@ "type": "module", "engines": { "node": ">=20.0.0", - "pnpm": ">=8.0.0" + "pnpm": ">=10.0.0" }, - "packageManager": "pnpm@8.15.0", + "packageManager": "pnpm@10.22.0", "scripts": { - "build": "pnpm --filter @llm-cache/core build && pnpm --filter @llm-cache/observability --filter @llm-cache/cost-tracker --filter @llm-cache/adapters-redis --filter @llm-cache/adapters-dynamodb --filter @llm-cache/adapters-qdrant build && pnpm --filter @llm-cache/server build", + "build": "pnpm -r build", "test": "pnpm -r test", "test:coverage": "pnpm -r test:coverage", - "lint": "eslint . --ext .ts", - "lint:fix": "eslint . --ext .ts --fix", - "typecheck": "pnpm -r typecheck", - "format": "prettier --write \"**/*.{ts,tsx,md,json}\"", - "format:check": "prettier --check \"**/*.{ts,tsx,md,json}\"" + "lint": "biome check .", + "lint:fix": "biome check --write .", + "format": "biome format --write .", + "typecheck": "tsc --noEmit -p tsconfig.typecheck.json", + "changeset": "changeset", + "version-packages": "changeset version", + "release": "pnpm build && changeset publish" }, "devDependencies": { + "@biomejs/biome": "^1.9.4", + "@changesets/changelog-github": "^0.6.0", + "@changesets/cli": "^2.28.1", "@types/node": "^20.11.0", - "@typescript-eslint/eslint-plugin": "^7.0.0", - "@typescript-eslint/parser": "^7.0.0", - "@vitest/coverage-v8": "^1.3.0", - "eslint": "^8.57.0", - "eslint-config-prettier": "^9.1.0", - "husky": "^9.0.0", - "lint-staged": "^15.2.0", - "prettier": "^3.2.0", - "typescript": "^5.4.0", - "vitest": "^1.3.0" - }, - "lint-staged": { - "*.{ts,tsx}": [ - "eslint --fix", - "prettier --write" - ], - "*.{md,json,yaml,yml}": [ - "prettier --write" - ] + "@vitest/coverage-v8": "3.2.4", + "typescript": "^5.8.3", + "vitest": "^3.1.1" }, "pnpm": { "overrides": { diff --git a/packages/adapters/dynamodb/LICENSE b/packages/adapters/dynamodb/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/adapters/dynamodb/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/adapters/dynamodb/README.md b/packages/adapters/dynamodb/README.md index f6c3cb0..cc68f56 100644 --- a/packages/adapters/dynamodb/README.md +++ b/packages/adapters/dynamodb/README.md @@ -1,45 +1,147 @@ -# @llm-cache/adapters-dynamodb +# @reaatech/llm-cache-adapters-dynamodb -DynamoDB storage adapter for llm-cache exact-match metadata storage. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache-adapters-dynamodb.svg)](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-dynamodb) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +DynamoDB storage adapter for llm-cache exact-match metadata. Provides serverless, scalable persistence with native DynamoDB TTL, GSI-based metadata queries, and batch operations chunked to AWS limits. + +## Installation ```bash -npm install @llm-cache/adapters-dynamodb +npm install @reaatech/llm-cache-adapters-dynamodb +# or +pnpm add @reaatech/llm-cache-adapters-dynamodb ``` -## Usage +## Feature Overview + +- **Native DynamoDB TTL** — writes an epoch-second attribute (`expiresAtEpoch` by default) for automatic row expiration +- **GSI-backed queries** — `gsi1` indexes `useCase`, `gsi2` indexes `modelVersion` for efficient metadata queries +- **Batch operations** — `getBatch`, `setBatch`, and `deleteBatch` chunked to DynamoDB's 25-item limit +- **Paginated invalidation** — `invalidateByCriteria` uses GSI queries when possible, falling back to `Scan` for broad criteria +- **Stats** — `getStats()` returns item count and table size from `DescribeTable` +- **Health check** — `healthCheck()` runs a lightweight `Scan` (limit 1) + +## Quick Start ```typescript -import { DynamoDBAdapter } from '@llm-cache/adapters-dynamodb'; +import { CacheEngine, OpenAIEmbedder } from "@reaatech/llm-cache"; +import { DynamoDBAdapter } from "@reaatech/llm-cache-adapters-dynamodb"; -const adapter = new DynamoDBAdapter({ - region: 'us-east-1', - tableName: 'llm-cache', +const storage = new DynamoDBAdapter({ + region: "us-east-1", + tableName: "llm-cache", }); -// Use adapter with CacheEngine const cache = new CacheEngine({ - storage: adapter, - vectorStorage: /* Qdrant or InMemoryAdapter */, - embedder, - config, + storage, + vectorStorage: /* QdrantAdapter or InMemoryAdapter */, + embedder: new OpenAIEmbedder({ + provider: "openai", + model: "text-embedding-3-small", + dimensions: 1536, + apiKey: process.env.OPENAI_API_KEY, + }), + config: { /* ... */ }, }); ``` ## Table Schema -Your DynamoDB table should have: +Your DynamoDB table must have these attributes and indexes: + +| Attribute | Type | Key | Description | +|-----------|------|-----|-------------| +| `pk` | String | HASH | Exact-match key (`promptHash:generationConfigHash`) | +| `gsi1pk` | String | GSI1 HASH | `USECASE#` for useCase queries | +| `gsi1sk` | String | GSI1 RANGE | `modelVersion#generationConfigHash` | +| `gsi2pk` | String | GSI2 HASH | `MODEL#` for model queries | +| `gsi2sk` | String | GSI2 RANGE | `useCase#createdAt` | +| `expiresAtEpoch` | Number | (TTL) | Epoch seconds for native DynamoDB TTL | + +Enable TTL on the `expiresAtEpoch` attribute (or your custom `ttlAttribute` name) in the DynamoDB console. + +## API Reference + +### `DynamoDBAdapter` (class) + +Implements `StorageAdapter` from `@reaatech/llm-cache`. + +```typescript +import { DynamoDBAdapter } from "@reaatech/llm-cache-adapters-dynamodb"; + +const adapter = new DynamoDBAdapter({ + region: "us-east-1", + tableName: "llm-cache", +}); +``` + +#### `DynamoDBAdapterConfig` + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `region` | `string` | (required) | AWS region | +| `tableName` | `string` | (required) | DynamoDB table name | +| `endpoint` | `string` | — | Override endpoint (e.g., DynamoDB Local: `http://localhost:8000`) | +| `ttlAttribute` | `string` | `"expiresAtEpoch"` | Attribute name for native DynamoDB TTL | -- **PK** (partition key) — exact-match key -- **GSI1** (`gsi1pk`, `gsi1sk`) — query by `useCase` -- **GSI2** (`gsi2pk`, `gsi2sk`) — query by `modelVersion` +#### Methods + +| Method | Returns | Description | +|--------|---------|-------------| +| `get(key)` | `Promise` | Retrieve and deserialize a cache entry (auto-deletes if expired) | +| `set(key, entry)` | `Promise` | Store an entry with native TTL epoch attribute | +| `delete(key)` | `Promise` | Remove a key | +| `exists(key)` | `Promise` | Check if a key exists (reads then checks expiry) | +| `getBatch(keys)` | `Promise<(CacheEntry \| null)[]>` | Batch retrieve via `BatchGetCommand` | +| `setBatch(items)` | `Promise` | Batch store chunked at 25 items per `BatchWriteCommand` | +| `deleteBatch(keys)` | `Promise` | Batch delete chunked at 25 keys per `BatchWriteCommand` | +| `findByUseCase(useCase, limit?)` | `Promise` | Query GSI1 for entries by use case | +| `findByModelVersion(modelVersion, limit?)` | `Promise` | Query GSI2 for entries by model version | +| `invalidateByCriteria(criteria)` | `Promise` | Delete matching entries — uses GSI when possible | +| `getStats()` | `Promise` | Get `ItemCount` and `TableSizeBytes` from `DescribeTable` | +| `healthCheck()` | `Promise` | Run a limit-1 `Scan` and report status | + +## Usage Patterns + +### Local Development with DynamoDB Local + +```typescript +const adapter = new DynamoDBAdapter({ + region: "us-east-1", + tableName: "llm-cache", + endpoint: "http://localhost:8000", +}); +``` + +### Custom TTL Attribute + +```typescript +const adapter = new DynamoDBAdapter({ + region: "us-east-1", + tableName: "llm-cache", + ttlAttribute: "ttl", // Match your table's TTL attribute name +}); +``` + +### IAM Credentials + +The adapter uses the AWS SDK credential chain. Configure via environment variables (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`), IAM roles (EC2, ECS, Lambda), or `~/.aws/credentials`. ## Notes -- Batch writes are chunked to DynamoDB's 25-item limit. -- Semantic search requires a separate vector database (e.g., Qdrant). +- Semantic search requires a separate vector database (e.g., Qdrant). DynamoDB does not support native vector similarity search. +- Batch writes (`setBatch`, `deleteBatch`) are automatically chunked to DynamoDB's 25-item limit across multiple requests. +- `invalidateByCriteria` prefers GSI-indexed criteria (`useCase`, `modelVersion`). Broad invalidation (e.g., `olderThan` only) falls back to a full table `Scan`. + +## Related Packages + +- [`@reaatech/llm-cache`](https://www.npmjs.com/package/@reaatech/llm-cache) — Core caching engine and types +- [`@reaatech/llm-cache-adapters-qdrant`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-qdrant) — Qdrant vector search adapter ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/adapters/dynamodb/package.json b/packages/adapters/dynamodb/package.json index e514a63..7cf5bce 100644 --- a/packages/adapters/dynamodb/package.json +++ b/packages/adapters/dynamodb/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/adapters-dynamodb", + "name": "@reaatech/llm-cache-adapters-dynamodb", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "DynamoDB storage adapter for llm-cache", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,39 +16,38 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/adapters/dynamodb" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/adapters/dynamodb#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, "keywords": ["llm-cache", "dynamodb", "aws", "cache", "adapter"], - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "typecheck": "tsc --noEmit" }, "dependencies": { - "@llm-cache/core": "workspace:*", + "@reaatech/llm-cache": "workspace:*", "@aws-sdk/client-dynamodb": "^3.500.0", "@aws-sdk/lib-dynamodb": "^3.500.0" }, "devDependencies": { "@types/node": "^20.11.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/adapters/dynamodb/src/DynamoDBAdapter.test.ts b/packages/adapters/dynamodb/src/DynamoDBAdapter.test.ts index 17fa795..c26f058 100644 --- a/packages/adapters/dynamodb/src/DynamoDBAdapter.test.ts +++ b/packages/adapters/dynamodb/src/DynamoDBAdapter.test.ts @@ -1,6 +1,6 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { CacheEntry } from '@reaatech/llm-cache'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { DynamoDBAdapter } from './DynamoDBAdapter.js'; -import type { CacheEntry } from '@llm-cache/core'; function makeEntry(overrides?: Partial): CacheEntry { const now = new Date(); @@ -63,7 +63,7 @@ describe('DynamoDBAdapter', () => { mockSend = vi.fn().mockResolvedValue({}); adapter = new DynamoDBAdapter({ region: 'us-east-1', tableName: 'test' }); - // eslint-disable-next-line @typescript-eslint/no-explicit-any + // biome-ignore lint/suspicious/noExplicitAny: test mock injection (adapter as any).client = { send: mockSend } as any; }); @@ -76,8 +76,8 @@ describe('DynamoDBAdapter', () => { mockSend.mockResolvedValueOnce({ Item: itemFromEntry('key', entry) }); const result = await adapter.get('key'); expect(result).not.toBeNull(); - expect(result!.prompt).toBe('test'); - expect(result!.id).toBe('test-id'); + expect(result?.prompt).toBe('test'); + expect(result?.id).toBe('test-id'); }); it('should return null for missing key', async () => { @@ -109,7 +109,7 @@ describe('DynamoDBAdapter', () => { expect(command.input.Item.pk).toBe('key'); expect(command.input.Item.id).toBe(entry.id); expect(command.input.Item.expiresAtEpoch).toBe( - Math.floor(entry.metadata.expiresAt.getTime() / 1000) + Math.floor(entry.metadata.expiresAt.getTime() / 1000), ); }); @@ -119,14 +119,12 @@ describe('DynamoDBAdapter', () => { tableName: 'test', ttlAttribute: 'ttl', }); - // eslint-disable-next-line @typescript-eslint/no-explicit-any + // biome-ignore lint/suspicious/noExplicitAny: test mock injection (customAdapter as any).client = { send: mockSend } as any; const entry = makeEntry(); await customAdapter.set('key', entry); const command = mockSend.mock.calls[0][0]; - expect(command.input.Item.ttl).toBe( - Math.floor(entry.metadata.expiresAt.getTime() / 1000) - ); + expect(command.input.Item.ttl).toBe(Math.floor(entry.metadata.expiresAt.getTime() / 1000)); }); it('should delete a key', async () => { diff --git a/packages/adapters/dynamodb/src/DynamoDBAdapter.ts b/packages/adapters/dynamodb/src/DynamoDBAdapter.ts index 6b9fa36..58d0f3d 100644 --- a/packages/adapters/dynamodb/src/DynamoDBAdapter.ts +++ b/packages/adapters/dynamodb/src/DynamoDBAdapter.ts @@ -1,16 +1,21 @@ -import { DynamoDBClient, DescribeTableCommand } from '@aws-sdk/client-dynamodb'; +import { DescribeTableCommand, DynamoDBClient } from '@aws-sdk/client-dynamodb'; import { + BatchGetCommand, + BatchWriteCommand, + DeleteCommand, DynamoDBDocumentClient, GetCommand, PutCommand, - DeleteCommand, - BatchGetCommand, - BatchWriteCommand, QueryCommand, ScanCommand, } from '@aws-sdk/lib-dynamodb'; -import type { CacheEntry, InvalidationCriteria, StorageStats, HealthStatus } from '@llm-cache/core'; -import type { StorageAdapter } from '@llm-cache/core'; +import type { + CacheEntry, + HealthStatus, + InvalidationCriteria, + StorageAdapter, + StorageStats, +} from '@reaatech/llm-cache'; export interface DynamoDBAdapterConfig { region: string; @@ -44,7 +49,7 @@ export class DynamoDBAdapter implements StorageAdapter { new GetCommand({ TableName: this.tableName, Key: { pk: key }, - }) + }), ); if (!result.Item) return null; @@ -63,7 +68,7 @@ export class DynamoDBAdapter implements StorageAdapter { new PutCommand({ TableName: this.tableName, Item: this.serialize(key, entry), - }) + }), ); } @@ -72,7 +77,7 @@ export class DynamoDBAdapter implements StorageAdapter { new DeleteCommand({ TableName: this.tableName, Key: { pk: key }, - }) + }), ); return true; } @@ -92,7 +97,7 @@ export class DynamoDBAdapter implements StorageAdapter { Keys: keys.map((k) => ({ pk: k })), }, }, - }) + }), ); const items = result.Responses?.[this.tableName] ?? []; @@ -114,7 +119,7 @@ export class DynamoDBAdapter implements StorageAdapter { PutRequest: { Item: this.serialize(key, entry), }, - }) + }), ); // DynamoDB batch write supports max 25 items per request @@ -125,7 +130,7 @@ export class DynamoDBAdapter implements StorageAdapter { RequestItems: { [this.tableName]: chunk, }, - }) + }), ); } } @@ -138,7 +143,7 @@ export class DynamoDBAdapter implements StorageAdapter { DeleteRequest: { Key: { pk: k }, }, - }) + }), ); for (let i = 0; i < writeRequests.length; i += 25) { @@ -148,7 +153,7 @@ export class DynamoDBAdapter implements StorageAdapter { RequestItems: { [this.tableName]: chunk, }, - }) + }), ); } @@ -165,7 +170,7 @@ export class DynamoDBAdapter implements StorageAdapter { ':pk': `USECASE#${useCase}`, }, Limit: limit, - }) + }), ); return (result.Items ?? []) @@ -183,7 +188,7 @@ export class DynamoDBAdapter implements StorageAdapter { ':pk': `MODEL#${modelVersion}`, }, Limit: limit, - }) + }), ); return (result.Items ?? []) @@ -204,7 +209,7 @@ export class DynamoDBAdapter implements StorageAdapter { KeyConditionExpression: 'gsi1pk = :pk', ExpressionAttributeValues: { ':pk': `USECASE#${criteria.useCase}` }, ExclusiveStartKey: lastKey, - }) + }), ); for (const item of result.Items ?? []) { const entry = this.deserialize(item); @@ -228,7 +233,7 @@ export class DynamoDBAdapter implements StorageAdapter { KeyConditionExpression: 'gsi2pk = :pk', ExpressionAttributeValues: { ':pk': `MODEL#${criteria.modelVersion}` }, ExclusiveStartKey: lastKey, - }) + }), ); for (const item of result.Items ?? []) { const entry = this.deserialize(item); @@ -246,7 +251,7 @@ export class DynamoDBAdapter implements StorageAdapter { let lastKey: Record | undefined; do { const result = await this.client.send( - new ScanCommand({ TableName: this.tableName, ExclusiveStartKey: lastKey }) + new ScanCommand({ TableName: this.tableName, ExclusiveStartKey: lastKey }), ); for (const item of result.Items ?? []) { const entry = this.deserialize(item); @@ -264,7 +269,7 @@ export class DynamoDBAdapter implements StorageAdapter { async getStats(): Promise { try { const result = await this.rawClient.send( - new DescribeTableCommand({ TableName: this.tableName }) + new DescribeTableCommand({ TableName: this.tableName }), ); const itemCount = result.Table?.ItemCount ?? 0; const sizeBytes = result.Table?.TableSizeBytes ?? 0; @@ -290,7 +295,7 @@ export class DynamoDBAdapter implements StorageAdapter { new ScanCommand({ TableName: this.tableName, Limit: 1, - }) + }), ); return { healthy: true }; } catch (error) { @@ -348,7 +353,7 @@ export class DynamoDBAdapter implements StorageAdapter { try { createdAt = new Date(String(metadata.createdAt)); expiresAt = new Date(String(metadata.expiresAt)); - if (isNaN(createdAt.getTime()) || isNaN(expiresAt.getTime())) { + if (Number.isNaN(createdAt.getTime()) || Number.isNaN(expiresAt.getTime())) { createdAt = new Date(); expiresAt = new Date(Date.now() - 1); } @@ -370,8 +375,12 @@ export class DynamoDBAdapter implements StorageAdapter { embeddingDimensions: Number(item.embeddingDimensions ?? 0), useCase: String(item.useCase ?? ''), sensitive: Boolean(item.sensitive), - tokens: this.coerceTokenCost(item.tokens as Partial<{ prompt: number; completion: number; total: number }> | undefined), - cost: this.coerceTokenCost(item.cost as Partial<{ prompt: number; completion: number; total: number }> | undefined), + tokens: this.coerceTokenCost( + item.tokens as Partial<{ prompt: number; completion: number; total: number }> | undefined, + ), + cost: this.coerceTokenCost( + item.cost as Partial<{ prompt: number; completion: number; total: number }> | undefined, + ), metadata: { createdAt, ttl: Number(metadata.ttl) || 0, @@ -382,7 +391,11 @@ export class DynamoDBAdapter implements StorageAdapter { }; } - private coerceTokenCost(obj?: Partial<{ prompt: number; completion: number; total: number }>): { prompt: number; completion: number; total: number } { + private coerceTokenCost(obj?: Partial<{ prompt: number; completion: number; total: number }>): { + prompt: number; + completion: number; + total: number; + } { if (!obj || typeof obj !== 'object') return { prompt: 0, completion: 0, total: 0 }; const prompt = typeof obj.prompt === 'number' ? obj.prompt : 0; const completion = typeof obj.completion === 'number' ? obj.completion : 0; diff --git a/packages/adapters/dynamodb/tsconfig.json b/packages/adapters/dynamodb/tsconfig.json index aba118c..c8c92cb 100644 --- a/packages/adapters/dynamodb/tsconfig.json +++ b/packages/adapters/dynamodb/tsconfig.json @@ -2,10 +2,7 @@ "extends": "../../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"], - "references": [{ "path": "../../core" }] + "include": ["src/**/*"] } diff --git a/packages/adapters/qdrant/LICENSE b/packages/adapters/qdrant/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/adapters/qdrant/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/adapters/qdrant/README.md b/packages/adapters/qdrant/README.md index 8aa04ac..16f9f86 100644 --- a/packages/adapters/qdrant/README.md +++ b/packages/adapters/qdrant/README.md @@ -1,41 +1,180 @@ -# @llm-cache/adapters-qdrant +# @reaatech/llm-cache-adapters-qdrant -Qdrant vector database adapter for llm-cache semantic search. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache-adapters-qdrant.svg)](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-qdrant) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +Qdrant vector database adapter for llm-cache semantic search. Implements `VectorStorageAdapter` with HNSW approximate nearest neighbor search, metadata filtering, and deterministic UUID-based point IDs for keyspace isolation. + +## Installation ```bash -npm install @llm-cache/adapters-qdrant +npm install @reaatech/llm-cache-adapters-qdrant +# or +pnpm add @reaatech/llm-cache-adapters-qdrant ``` -## Usage +## Feature Overview + +- **HNSW search** — low-latency cosine similarity search via Qdrant's approximate nearest neighbor engine +- **Auto-provisioning** — `connect()` creates the collection and five payload indexes on first run +- **Metadata filtering** — `findSimilar` filters by `useCase`, `modelVersion`, `generationConfigHash`, and `embeddingModel` +- **Deterministic point IDs** — UUID v5 from cache keys for stable point identity across processes +- **Paginated invalidation** — `invalidateByCriteria` scrolls with configurable page size, deletes in batches +- **Hybrid search** — vector similarity combined with filter conditions in a single Qdrant query + +## Quick Start ```typescript -import { QdrantAdapter } from '@llm-cache/adapters-qdrant'; +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from "@reaatech/llm-cache"; +import { QdrantAdapter } from "@reaatech/llm-cache-adapters-qdrant"; + +const vectorStorage = new QdrantAdapter({ + url: "http://localhost:6333", + collectionName: "llm-cache", + vectorSize: 1536, +}); +await vectorStorage.connect(); + +const cache = new CacheEngine({ + storage: new InMemoryAdapter(), // or RedisAdapter / DynamoDBAdapter + vectorStorage, + embedder: new OpenAIEmbedder({ + provider: "openai", + model: "text-embedding-3-small", + dimensions: 1536, + apiKey: process.env.OPENAI_API_KEY, + }), + config: { /* ... */ }, +}); +``` + +## API Reference + +### `QdrantAdapter` (class) + +Implements `VectorStorageAdapter` from `@reaatech/llm-cache`. + +```typescript +import { QdrantAdapter } from "@reaatech/llm-cache-adapters-qdrant"; const adapter = new QdrantAdapter({ - url: 'http://localhost:6333', - collectionName: 'llm-cache', + url: "http://localhost:6333", + collectionName: "llm-cache", vectorSize: 1536, - distance: 'Cosine', }); await adapter.connect(); +``` -// Use adapter with CacheEngine -const cache = new CacheEngine({ - storage: /* Redis, DynamoDB, or InMemoryAdapter */, - vectorStorage: adapter, - embedder, - config, +#### `QdrantAdapterConfig` + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `url` | `string` | (required) | Qdrant server URL | +| `collectionName` | `string` | (required) | Collection name (created on first `connect()`) | +| `vectorSize` | `number` | (required) | Embedding vector dimensions (e.g., 1536 for `text-embedding-3-small`) | +| `apiKey` | `string` | — | Qdrant API key for authentication | +| `distance` | `"Cosine" \| "Euclid" \| "Dot"` | `"Cosine"` | Distance metric for vector similarity | +| `scrollPageSize` | `number` | `256` | Page size for paginated scroll operations | + +#### Methods + +| Method | Returns | Description | +|--------|---------|-------------| +| `connect()` | `Promise` | Create collection and payload indexes if they don't exist | +| `disconnect()` | `Promise` | Reset initialized state (client stays open) | +| `get(key)` | `Promise` | Retrieve a point by deterministic UUID and deserialize | +| `set(key, entry)` | `Promise` | Upsert a point with embedding vector and payload | +| `delete(key)` | `Promise` | Delete a point by deterministic UUID | +| `exists(key)` | `Promise` | Check if a point exists | +| `getBatch(keys)` | `Promise<(CacheEntry \| null)[]>` | Batch retrieve multiple points | +| `setBatch(items)` | `Promise` | Batch upsert multiple points | +| `deleteBatch(keys)` | `Promise` | Batch delete multiple points | +| `findSimilar(embedding, threshold, filters, limit?)` | `Promise` | Semantic search with metadata filtering | +| `findByUseCase(useCase, limit?)` | `Promise` | Scroll entries filtered by use case | +| `findByModelVersion(modelVersion, limit?)` | `Promise` | Scroll entries filtered by model version | +| `invalidateByCriteria(criteria)` | `Promise` | Paginated scroll + batch delete (refuses empty criteria) | +| `getStats()` | `Promise` | Get `points_count` from collection info | +| `healthCheck()` | `Promise` | Call `getCollections()` and report status | + +### `VectorSearchFilters` + +Pass to `findSimilar()` to narrow semantic search results: + +| Property | Type | Description | +|----------|------|-------------| +| `useCase` | `string` | Filter to a specific use case | +| `modelVersion` | `string` | Filter to a specific model version | +| `generationConfigHash` | `string` | Filter to a specific generation config fingerprint | +| `embeddingModel` | `string` | Filter to a specific embedding model | + +## Usage Patterns + +### Semantic Search with Filters + +```typescript +const results = await adapter.findSimilar( + embedding, // query vector + 0.8, // cosine similarity threshold + { + useCase: "qa", + modelVersion: "gpt-4-0613", + }, + 10 // max results +); + +for (const { entry, similarity } of results) { + console.log(`Match (${similarity.toFixed(3)}): ${entry.prompt}`); +} +``` + +### API Key Authentication + +```typescript +const adapter = new QdrantAdapter({ + url: "https://qdrant.example.com", + collectionName: "llm-cache", + vectorSize: 1536, + apiKey: process.env.QDRANT_API_KEY, }); ``` +### Custom Distance Metric + +```typescript +const adapter = new QdrantAdapter({ + url: "http://localhost:6333", + collectionName: "llm-cache", + vectorSize: 1536, + distance: "Dot", // Cosine | Euclid | Dot +}); +``` + +### Periodic Cleanup + +The adapter does not auto-evict expired points. Schedule cleanup: + +```typescript +const removed = await adapter.invalidateByCriteria({ + olderThan: new Date(Date.now() - 24 * 3600_000), // older than 24 hours +}); +console.log(`Cleaned ${removed} expired points`); +``` + ## Notes -- Automatically creates the collection and payload indexes on first `connect()`. -- Supports hybrid search (vector similarity + metadata filtering). -- `findSimilar` filters by `useCase`, `modelVersion`, `generationConfigHash`, and `embeddingModel`. +- Auto-created payload indexes: `useCase`, `modelVersion`, `generationConfigHash`, `embeddingModel`, `createdAtMs`. +- `invalidateByCriteria` refuses to delete the entire collection — at least one criterion must be specified. +- Point IDs are deterministic UUID v5 from cache keys using a stable namespace UUID. The same key always maps to the same point. +- Metadata queries (`findByUseCase`, `findByModelVersion`) use `scroll` with payload decoding and expiry filtering. + +## Related Packages + +- [`@reaatech/llm-cache`](https://www.npmjs.com/package/@reaatech/llm-cache) — Core caching engine and types +- [`@reaatech/llm-cache-adapters-redis`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-redis) — Redis storage adapter (exact-match metadata) ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/adapters/qdrant/package.json b/packages/adapters/qdrant/package.json index 8db8290..844db7c 100644 --- a/packages/adapters/qdrant/package.json +++ b/packages/adapters/qdrant/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/adapters-qdrant", + "name": "@reaatech/llm-cache-adapters-qdrant", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "Qdrant vector database adapter for llm-cache semantic search", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,40 +16,39 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/adapters/qdrant" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/adapters/qdrant#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, "keywords": ["llm-cache", "qdrant", "vector", "semantic-search", "adapter"], - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "typecheck": "tsc --noEmit" }, "dependencies": { - "@llm-cache/core": "workspace:*", + "@reaatech/llm-cache": "workspace:*", "@qdrant/js-client-rest": "^1.8.0", "uuid": "^9.0.0" }, "devDependencies": { "@types/node": "^20.11.0", "@types/uuid": "^9.0.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/adapters/qdrant/src/QdrantAdapter.test.ts b/packages/adapters/qdrant/src/QdrantAdapter.test.ts index c7bd6f3..10e4744 100644 --- a/packages/adapters/qdrant/src/QdrantAdapter.test.ts +++ b/packages/adapters/qdrant/src/QdrantAdapter.test.ts @@ -1,7 +1,7 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { CacheEntry } from '@reaatech/llm-cache'; import { v5 as uuidv5 } from 'uuid'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { QdrantAdapter } from './QdrantAdapter.js'; -import type { CacheEntry } from '@llm-cache/core'; const KEY_NAMESPACE = '6ba7b811-9dad-11d1-80b4-00c04fd430c8'; const pointId = (key: string) => uuidv5(key, KEY_NAMESPACE); @@ -92,7 +92,7 @@ describe('QdrantAdapter', () => { collectionName: 'test-cache', vectorSize: 3, }); - // eslint-disable-next-line @typescript-eslint/no-explicit-any + // biome-ignore lint/suspicious/noExplicitAny: test mock injection (adapter as any).client = mockClient as any; }); @@ -128,10 +128,10 @@ describe('QdrantAdapter', () => { const result = await adapter.get('key'); expect(result).not.toBeNull(); - expect(result!.prompt).toBe('test'); + expect(result?.prompt).toBe('test'); expect(mockClient.retrieve).toHaveBeenCalledWith( 'test-cache', - expect.objectContaining({ ids: [pointId('key')] }) + expect.objectContaining({ ids: [pointId('key')] }), ); }); @@ -202,7 +202,9 @@ describe('QdrantAdapter', () => { it('should pass olderThan as a numeric range filter', async () => { mockClient.scroll.mockResolvedValueOnce({ points: [], next_page_offset: null }); await adapter.invalidateByCriteria({ olderThan: new Date(1700_000_000_000) }); - const filter = mockClient.scroll.mock.calls[0][1].filter as { must: Array<{ range?: unknown }> }; + const filter = mockClient.scroll.mock.calls[0][1].filter as { + must: Array<{ range?: unknown }>; + }; expect(filter.must.some((c) => c.range)).toBe(true); }); diff --git a/packages/adapters/qdrant/src/QdrantAdapter.ts b/packages/adapters/qdrant/src/QdrantAdapter.ts index 0ddb27a..bb2c3f5 100644 --- a/packages/adapters/qdrant/src/QdrantAdapter.ts +++ b/packages/adapters/qdrant/src/QdrantAdapter.ts @@ -1,14 +1,14 @@ import { QdrantClient } from '@qdrant/js-client-rest'; -import { v5 as uuidv5 } from 'uuid'; import type { CacheEntry, - InvalidationCriteria, - StorageStats, HealthStatus, + InvalidationCriteria, SimilarityResult, + StorageStats, VectorSearchFilters, -} from '@llm-cache/core'; -import type { VectorStorageAdapter } from '@llm-cache/core'; + VectorStorageAdapter, +} from '@reaatech/llm-cache'; +import { v5 as uuidv5 } from 'uuid'; // Stable namespace UUID so the same key always maps to the same point ID across processes. const KEY_NAMESPACE = '6ba7b811-9dad-11d1-80b4-00c04fd430c8'; @@ -174,18 +174,14 @@ export class QdrantAdapter implements VectorStorageAdapter { } async findByUseCase(useCase: string, limit = 100): Promise { - return this.scrollAll( - { must: [{ key: 'useCase', match: { value: useCase } }] }, - limit, - true - ); + return this.scrollAll({ must: [{ key: 'useCase', match: { value: useCase } }] }, limit, true); } async findByModelVersion(modelVersion: string, limit = 100): Promise { return this.scrollAll( { must: [{ key: 'modelVersion', match: { value: modelVersion } }] }, limit, - true + true, ); } @@ -193,7 +189,7 @@ export class QdrantAdapter implements VectorStorageAdapter { embedding: number[], threshold: number, filters: VectorSearchFilters, - limit = 10 + limit = 10, ): Promise { const mustConditions: Array> = []; @@ -227,7 +223,7 @@ export class QdrantAdapter implements VectorStorageAdapter { return result .map((hit) => ({ - entry: this.deserializeEntry(hit.payload!, (hit.vector as number[]) ?? []), + entry: this.deserializeEntry(hit.payload ?? {}, (hit.vector as number[]) ?? []), similarity: hit.score, })) .filter((r) => !this.isExpired(r.entry)); @@ -319,7 +315,7 @@ export class QdrantAdapter implements VectorStorageAdapter { private async scrollAll( filter: Record, limit: number, - withVector: boolean + withVector: boolean, ): Promise { const out: CacheEntry[] = []; let offset: string | number | undefined; @@ -391,7 +387,7 @@ export class QdrantAdapter implements VectorStorageAdapter { try { createdAt = new Date(String(metadata.createdAt)); expiresAt = new Date(String(metadata.expiresAt)); - if (isNaN(createdAt.getTime()) || isNaN(expiresAt.getTime())) { + if (Number.isNaN(createdAt.getTime()) || Number.isNaN(expiresAt.getTime())) { createdAt = new Date(); expiresAt = new Date(Date.now() - 1); } @@ -413,8 +409,14 @@ export class QdrantAdapter implements VectorStorageAdapter { embeddingDimensions: Number(payload.embeddingDimensions), useCase: String(payload.useCase), sensitive: Boolean(payload.sensitive), - tokens: this.coerceTokenCost(payload.tokens as Partial<{ prompt: number; completion: number; total: number }> | undefined), - cost: this.coerceTokenCost(payload.cost as Partial<{ prompt: number; completion: number; total: number }> | undefined), + tokens: this.coerceTokenCost( + payload.tokens as + | Partial<{ prompt: number; completion: number; total: number }> + | undefined, + ), + cost: this.coerceTokenCost( + payload.cost as Partial<{ prompt: number; completion: number; total: number }> | undefined, + ), metadata: { createdAt, ttl: Number(metadata.ttl) || 0, @@ -425,7 +427,11 @@ export class QdrantAdapter implements VectorStorageAdapter { }; } - private coerceTokenCost(obj?: Partial<{ prompt: number; completion: number; total: number }>): { prompt: number; completion: number; total: number } { + private coerceTokenCost(obj?: Partial<{ prompt: number; completion: number; total: number }>): { + prompt: number; + completion: number; + total: number; + } { if (!obj || typeof obj !== 'object') return { prompt: 0, completion: 0, total: 0 }; const prompt = typeof obj.prompt === 'number' ? obj.prompt : 0; const completion = typeof obj.completion === 'number' ? obj.completion : 0; diff --git a/packages/adapters/qdrant/tsconfig.json b/packages/adapters/qdrant/tsconfig.json index aba118c..c8c92cb 100644 --- a/packages/adapters/qdrant/tsconfig.json +++ b/packages/adapters/qdrant/tsconfig.json @@ -2,10 +2,7 @@ "extends": "../../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"], - "references": [{ "path": "../../core" }] + "include": ["src/**/*"] } diff --git a/packages/adapters/redis/LICENSE b/packages/adapters/redis/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/adapters/redis/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/adapters/redis/README.md b/packages/adapters/redis/README.md index d1254ff..693fbda 100644 --- a/packages/adapters/redis/README.md +++ b/packages/adapters/redis/README.md @@ -1,36 +1,126 @@ -# @llm-cache/adapters-redis +# @reaatech/llm-cache-adapters-redis -Redis storage adapter for llm-cache exact-match metadata storage. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache-adapters-redis.svg)](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-redis) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +Redis storage adapter for llm-cache exact-match metadata. Provides sub-millisecond key-value operations with automatic TTL via `SETEX`, connection pooling with reconnection, and key-space scanning for metadata queries. + +## Installation ```bash -npm install @llm-cache/adapters-redis +npm install @reaatech/llm-cache-adapters-redis +# or +pnpm add @reaatech/llm-cache-adapters-redis ``` -## Usage +## Feature Overview + +- **Automatic TTL** — every `set()` call uses `SETEX` for automatic Redis-side expiration +- **Connection pooling** — single `node-redis` client with configurable reconnect strategy +- **Batch operations** — `getBatch`, `setBatch`, and `deleteBatch` for bulk workloads +- **Metadata queries** — `findByUseCase` and `findByModelVersion` via `SCAN` with in-process filtering +- **Invalidation** — `invalidateByCriteria` supports useCase, modelVersion, generationConfigHash, embeddingModel, olderThan, and promptHash +- **Stats** — `getStats()` returns `keys` count from Redis `INFO keyspace` +- **Health check** — `healthCheck()` pings Redis and reports status + +## Quick Start ```typescript -import { RedisAdapter } from '@llm-cache/adapters-redis'; +import { CacheEngine, OpenAIEmbedder } from "@reaatech/llm-cache"; +import { RedisAdapter } from "@reaatech/llm-cache-adapters-redis"; -const adapter = new RedisAdapter({ url: 'redis://localhost:6379' }); -await adapter.connect(); +const storage = new RedisAdapter({ url: "redis://localhost:6379" }); +await storage.connect(); -// Use adapter with CacheEngine const cache = new CacheEngine({ - storage: adapter, - vectorStorage: /* Qdrant or InMemoryAdapter */, - embedder, - config, + storage, + vectorStorage: /* QdrantAdapter or InMemoryAdapter */, + embedder: new OpenAIEmbedder({ + provider: "openai", + model: "text-embedding-3-small", + dimensions: 1536, + apiKey: process.env.OPENAI_API_KEY, + }), + config: { /* ... */ }, +}); +``` + +## API Reference + +### `RedisAdapter` (class) + +Implements `StorageAdapter` from `@reaatech/llm-cache`. + +```typescript +import { RedisAdapter } from "@reaatech/llm-cache-adapters-redis"; + +const adapter = new RedisAdapter({ url: "redis://localhost:6379" }); +await adapter.connect(); +``` + +#### `RedisAdapterConfig` + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `url` | `string` | (required) | Redis connection URL (supports `redis://`, `rediss://`, password in URL) | +| `keyPrefix` | `string` | `"llm-cache:"` | Prefix prepended to all Redis keys | + +#### Methods + +| Method | Returns | Description | +|--------|---------|-------------| +| `connect()` | `Promise` | Open the Redis connection (idempotent) | +| `disconnect()` | `Promise` | Close the Redis connection | +| `get(key)` | `Promise` | Retrieve and deserialize a cache entry (auto-deletes if expired) | +| `set(key, entry)` | `Promise` | Store an entry with TTL via `SETEX` (deletes instead if TTL ≤ 0) | +| `delete(key)` | `Promise` | Remove a key | +| `exists(key)` | `Promise` | Check if a key exists | +| `getBatch(keys)` | `Promise<(CacheEntry \| null)[]>` | Batch retrieve via `MGET` | +| `setBatch(items)` | `Promise` | Batch store via pipelined `MULTI`/`EXEC` | +| `deleteBatch(keys)` | `Promise` | Batch delete via `DEL` (multiple keys) | +| `findByUseCase(useCase, limit?)` | `Promise` | Scan keyspace for entries matching a use case | +| `findByModelVersion(modelVersion, limit?)` | `Promise` | Scan keyspace for entries matching a model version | +| `invalidateByCriteria(criteria)` | `Promise` | Delete all entries matching criteria (walks full keyspace) | +| `getStats()` | `Promise` | Get approximate entry count from Redis `INFO keyspace` | +| `healthCheck()` | `Promise` | Ping Redis and report `{ healthy: boolean }` | + +## Usage Patterns + +### Custom Key Prefix + +```typescript +const adapter = new RedisAdapter({ + url: "redis://localhost:6379", + keyPrefix: "myapp:cache:", }); +// Keys stored as: myapp:cache:: ``` -## Notes +### Authentication + +```typescript +// Password in URL +const adapter = new RedisAdapter({ url: "redis://:mypassword@localhost:6379" }); + +// TLS +const adapter = new RedisAdapter({ url: "rediss://localhost:6380" }); +``` + +## Performance Notes + +- `get()` and `set()` operate at sub-millisecond latency for typical payloads. +- `findByUseCase`, `findByModelVersion`, and `invalidateByCriteria` use `SCAN` and walk the full keyspace — O(N) in cache size. Avoid calling these on hot request paths. Run from background jobs or deploy Redis Stack with RediSearch for indexed metadata queries. +- `setBatch` pipelines operations through a single `MULTI`/`EXEC` block for efficiency. +- This adapter implements exact-match metadata storage only. Semantic search requires a `VectorStorageAdapter` (e.g., `QdrantAdapter`). + +## Related Packages -- Redis stores exact-match metadata with TTL via `EXPIRE` (no manual eviction needed). -- Semantic search requires a vector database (e.g., Qdrant); this adapter does not implement `findSimilar`. -- **Performance warning**: `findByUseCase`, `findByModelVersion`, and `invalidateByCriteria` all use `SCAN` and walk the full keyspace, fetching each value to filter in process. They are O(N) in the size of the cache and should not be called on a hot request path. For production-scale metadata queries, deploy Redis Stack and add RediSearch indexes, or run these calls from a background job. +- [`@reaatech/llm-cache`](https://www.npmjs.com/package/@reaatech/llm-cache) — Core caching engine and types +- [`@reaatech/llm-cache-adapters-qdrant`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-qdrant) — Qdrant vector search adapter (pair with this adapter for full exact + semantic caching) ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/adapters/redis/package.json b/packages/adapters/redis/package.json index 337f175..43e56f7 100644 --- a/packages/adapters/redis/package.json +++ b/packages/adapters/redis/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/adapters-redis", + "name": "@reaatech/llm-cache-adapters-redis", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "Redis storage adapter for llm-cache", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,38 +16,37 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/adapters/redis" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/adapters/redis#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, "keywords": ["llm-cache", "redis", "cache", "adapter"], - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "typecheck": "tsc --noEmit" }, "dependencies": { - "@llm-cache/core": "workspace:*", + "@reaatech/llm-cache": "workspace:*", "redis": "^4.6.0" }, "devDependencies": { "@types/node": "^20.11.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/adapters/redis/src/RedisAdapter.test.ts b/packages/adapters/redis/src/RedisAdapter.test.ts index d3ec320..0411520 100644 --- a/packages/adapters/redis/src/RedisAdapter.test.ts +++ b/packages/adapters/redis/src/RedisAdapter.test.ts @@ -1,6 +1,6 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { CacheEntry } from '@reaatech/llm-cache'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { RedisAdapter } from './RedisAdapter.js'; -import type { CacheEntry } from '@llm-cache/core'; function makeEntry(overrides?: Partial): CacheEntry { const now = new Date(); @@ -68,7 +68,7 @@ describe('RedisAdapter', () => { }; adapter = new RedisAdapter({ url: 'redis://localhost:6379' }); - // eslint-disable-next-line @typescript-eslint/no-explicit-any + // biome-ignore lint/suspicious/noExplicitAny: test mock injection (adapter as any).client = mockClient as any; }); @@ -94,8 +94,8 @@ describe('RedisAdapter', () => { mockClient.get.mockResolvedValueOnce(JSON.stringify(entry)); const result = await adapter.get('key'); expect(result).not.toBeNull(); - expect(result!.prompt).toBe('test'); - expect(result!.metadata.createdAt instanceof Date).toBe(true); + expect(result?.prompt).toBe('test'); + expect(result?.metadata.createdAt instanceof Date).toBe(true); }); it('should return null for missing key', async () => { diff --git a/packages/adapters/redis/src/RedisAdapter.ts b/packages/adapters/redis/src/RedisAdapter.ts index c69b6d9..043186d 100644 --- a/packages/adapters/redis/src/RedisAdapter.ts +++ b/packages/adapters/redis/src/RedisAdapter.ts @@ -1,6 +1,11 @@ -import { createClient, type RedisClientType } from 'redis'; -import type { CacheEntry, InvalidationCriteria, StorageStats, HealthStatus } from '@llm-cache/core'; -import type { StorageAdapter } from '@llm-cache/core'; +import type { + CacheEntry, + HealthStatus, + InvalidationCriteria, + StorageAdapter, + StorageStats, +} from '@reaatech/llm-cache'; +import { type RedisClientType, createClient } from 'redis'; export interface RedisAdapterConfig { url: string; @@ -231,7 +236,7 @@ export class RedisAdapter implements StorageAdapter { try { const info = await this.client.info('keyspace'); const match = info?.match(/keys=(\d+)/); - totalEntries = match ? parseInt(match[1], 10) : 0; + totalEntries = match ? Number.parseInt(match[1], 10) : 0; } catch { // info('keyspace') may not be available on all Redis versions } diff --git a/packages/adapters/redis/tsconfig.json b/packages/adapters/redis/tsconfig.json index aba118c..c8c92cb 100644 --- a/packages/adapters/redis/tsconfig.json +++ b/packages/adapters/redis/tsconfig.json @@ -2,10 +2,7 @@ "extends": "../../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"], - "references": [{ "path": "../../core" }] + "include": ["src/**/*"] } diff --git a/packages/core/LICENSE b/packages/core/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/core/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/core/README.md b/packages/core/README.md index 0aa6c69..a25476c 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -1,38 +1,57 @@ -# @llm-cache/core +# @reaatech/llm-cache -Core caching engine for llm-cache — semantic and exact-match caching with embedding-based similarity. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache.svg)](https://www.npmjs.com/package/@reaatech/llm-cache) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +Canonical caching engine for LLM calls — semantic and exact-match caching with embedding-based similarity matching, model-aware fingerprinting, use-case segmentation, and adaptive TTL. + +## Installation ```bash -npm install @llm-cache/core +npm install @reaatech/llm-cache +# or +pnpm add @reaatech/llm-cache ``` -## Usage +## Feature Overview + +- **Exact-match cache** — SHA-256 hash of the full prompt for sub-millisecond cache hits +- **Semantic cache** — Embed prompts and search for similar cached entries above a configurable cosine similarity threshold +- **Generation config fingerprinting** — Model, temperature, top_p, system prompt, and tools are hashed so different configurations never collide +- **Use-case segmentation** — Isolate caches by use case to prevent cross-contamination (e.g., summarization vs. classification) +- **Adaptive TTL** — Different TTLs for factual, creative, analytical, and sensitive data +- **Cost-aware** — Optional `CostCalculatorLike` integration for tracking savings per cache hit +- **Encryption-ready** — Pluggable `EncryptionService` for encrypting prompts, responses, and embeddings at the storage layer +- **Zod-validated config** — `CacheConfigSchema` validates the full configuration object at startup + +## Quick Start ```typescript -import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from '@llm-cache/core'; +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from "@reaatech/llm-cache"; const cache = new CacheEngine({ storage: new InMemoryAdapter(), vectorStorage: new InMemoryAdapter(), embedder: new OpenAIEmbedder({ - provider: 'openai', - model: 'text-embedding-3-small', + provider: "openai", + model: "text-embedding-3-small", dimensions: 1536, apiKey: process.env.OPENAI_API_KEY, }), config: { - storage: { adapter: 'memory' }, - vectorStorage: { adapter: 'memory' }, + storage: { adapter: "memory" }, + vectorStorage: { adapter: "memory" }, embedding: { - provider: 'openai', - model: 'text-embedding-3-small', + provider: "openai", + model: "text-embedding-3-small", dimensions: 1536, batchSize: 100, maxRetries: 3, }, - similarity: { threshold: 0.8, metric: 'cosine', maxResults: 10 }, + similarity: { threshold: 0.8, metric: "cosine", maxResults: 10 }, ttl: { default: 3600, factual: 1800, @@ -41,37 +60,200 @@ const cache = new CacheEngine({ sensitive: 600, byUseCase: {}, }, - segmentation: { enabled: true, defaultUseCase: 'general' }, - cost: { enabled: true, currency: 'USD' }, - observability: { metrics: true, tracing: false, logging: 'info' }, + segmentation: { enabled: true, defaultUseCase: "general" }, + cost: { enabled: true, currency: "USD" }, + observability: { metrics: true, tracing: false, logging: "info" }, }, }); +// Store a response await cache.set( - 'What is TypeScript?', - { answer: 'A typed superset of JavaScript' }, - { - model: 'gpt-4', - modelVersion: 'gpt-4-0613', - } + "What is TypeScript?", + { answer: "A typed superset of JavaScript" }, + { model: "gpt-4", modelVersion: "gpt-4-0613" }, +); + +// Exact match — < 1ms +const exact = await cache.get("What is TypeScript?", { + model: "gpt-4", + modelVersion: "gpt-4-0613", +}); +// → { hit: true, type: "exact", entry: {...} } + +// Semantic match — uses embedding similarity +const semantic = await cache.get("Tell me about TypeScript", { + model: "gpt-4", + modelVersion: "gpt-4-0613", +}); +// → { hit: true, type: "semantic", confidence: 0.92, entry: {...} } +``` + +## API Reference + +### `CacheEngine` + +The main caching orchestrator. Performs multi-stage lookup: exact match → semantic search → cache miss. + +```typescript +import { CacheEngine } from "@reaatech/llm-cache"; + +const engine = new CacheEngine({ storage, vectorStorage, embedder, config }); +``` + +#### `CacheEngineDependencies` + +| Property | Type | Required | Description | +|----------|------|----------|-------------| +| `storage` | `StorageAdapter` | Yes | Exact-match metadata store (e.g., `InMemoryAdapter`, `RedisAdapter`, `DynamoDBAdapter`) | +| `vectorStorage` | `VectorStorageAdapter` | Yes | Vector search store for semantic matching (e.g., `InMemoryAdapter`, `QdrantAdapter`) | +| `embedder` | `EmbeddingProvider` | Yes | Embedding generation (e.g., `OpenAIEmbedder`) | +| `config` | `CacheConfig` | Yes | Full cache configuration (Zod-validated) | +| `costCalculator` | `CostCalculatorLike` | No | Optional cost tracking integration | +| `encryptionService` | `EncryptionService` | No | Optional encryption for prompts/responses/embeddings | + +#### Methods + +| Method | Returns | Description | +|--------|---------|-------------| +| `get(prompt, options?)` | `Promise` | Look up a prompt: exact → semantic → miss | +| `set(prompt, response, options?, metadata?)` | `Promise` | Store a response and its embedding | +| `invalidate(criteria)` | `Promise` | Delete entries matching criteria (useCase, modelVersion, olderThan, etc.) | +| `healthCheck()` | `Promise<{ storage: HealthStatus; vectorStorage: HealthStatus }>` | Check storage and vector backend health | + +### `CacheOptions` + +| Property | Type | Description | +|----------|------|-------------| +| `useCase` | `string` | Cache segment namespace | +| `model` | `string` | Model identifier | +| `modelVersion` | `string` | Specific model version | +| `generationConfigHash` | `string` | Pre-computed fingerprint (auto-generated if omitted) | +| `temperature` | `number` | Sampling temperature (affects fingerprint) | +| `topP` | `number` | Nucleus sampling parameter (affects fingerprint) | +| `maxTokens` | `number` | Max completion tokens (affects fingerprint) | +| `systemPrompt` | `string` | System prompt (affects fingerprint) | +| `tools` | `unknown[]` | Tool definitions (affect fingerprint) | +| `responseFormat` | `"text" \| "json_object" \| "json_schema"` | Response format (affects fingerprint) | + +### `CacheResult` + +A discriminated union returned by `get()`: + +```typescript +type CacheResult = + | { hit: true; type: "exact" | "semantic"; entry: CacheEntry; confidence?: number; similarity?: number; cachedAt: Date; age: number } + | { hit: false; reason: "not_found" | "below_threshold" | "expired" | "dimension_mismatch" }; +``` + +### `CacheMetadata` + +Pass to `set()` to control TTL, sensitivity, and token tracking: + +| Property | Type | Description | +|----------|------|-------------| +| `queryType` | `"factual" \| "creative" \| "analytical"` | Determines TTL from config | +| `ttl` | `number` | Override TTL in seconds | +| `sensitive` | `boolean` | Mark entry for encryption and shorter TTL | +| `tokens` | `{ prompt: number; completion: number }` | Token usage for cost calculation | + +### `InvalidationCriteria` + +| Property | Type | Description | +|----------|------|-------------| +| `useCase` | `string` | Invalidate all entries in a use case | +| `modelVersion` | `string` | Invalidate by model version | +| `generationConfigHash` | `string` | Invalidate by fingerprint | +| `embeddingModel` | `string` | Invalidate by embedding model | +| `olderThan` | `Date` | Invalidate entries created before this time | +| `promptHash` | `string` | Invalidate a specific prompt hash | + +### Adapters & Embedder + +| Export | Description | +|--------|-------------| +| `InMemoryAdapter` | In-memory storage/vector adapter with LRU eviction and TTL cleanup | +| `OpenAIEmbedder` | OpenAI embedding provider with batch processing and retry | +| `SimilarityMatcher` | Cosine similarity matcher with configurable threshold | + +### Config + +| Export | Description | +|--------|-------------| +| `CacheConfig` | TypeScript interface for the full configuration tree | +| `CacheConfigSchema` | Zod schema — use `safeParse` to validate at startup | + +### Utility Functions + +| Export | Description | +|--------|-------------| +| `buildPromptHash(prompt)` | SHA-256 hex hash of a prompt string | +| `buildCacheFingerprint(options)` | SHA-256 hash of the generation configuration | +| `buildExactMatchKey(options)` | Composite key: `promptHash:generationConfigHash` | + +### Encryption + +| Export | Description | +|--------|-------------| +| `EncryptionService` | AES-256-GCM encryption for prompts, responses, and embeddings | +| `EncryptedPayload` | Type for the encrypted output (`{ ciphertext, iv, tag }`) | + +## Usage Patterns + +### Use Case Segmentation + +```typescript +// Each use case has an isolated cache namespace +await cache.set("classify: spam", { label: "spam" }, { + model: "gpt-4", + modelVersion: "gpt-4-0613", + useCase: "classification", +}); + +// Same prompt in a different use case will miss +const result = await cache.get("classify: spam", { + model: "gpt-4", + modelVersion: "gpt-4-0613", + useCase: "summarization", +}); +// → { hit: false, reason: "not_found" } +``` + +### Sensitive Data Handling + +```typescript +const entry = await cache.set( + "Patient: John Doe, SSN: 123-45-6789", + response, + { model: "gpt-4", modelVersion: "gpt-4-0613" }, + { sensitive: true, ttl: 600 }, ); +// Entry gets the config's `sensitive` TTL (600s default) +// Encryption is applied if encryptionService is configured +``` + +### Model Rotation + +```typescript +// After upgrading from gpt-4 to gpt-4-turbo, invalidate old entries +const removed = await cache.invalidate({ modelVersion: "gpt-4-0613" }); +console.log(`Cleared ${removed.total} old model entries`); -const result = await cache.get('What is TypeScript?', { - model: 'gpt-4', - modelVersion: 'gpt-4-0613', +// New requests with gpt-4-turbo will generate fresh cache entries +const result = await cache.get(prompt, { + model: "gpt-4", + modelVersion: "gpt-4-turbo", }); -// { hit: true, type: 'exact', entry: {...} } ``` -## Exports +## Related Packages -- `CacheEngine` — Main caching orchestrator -- `InMemoryAdapter` — In-memory storage/vector adapter -- `OpenAIEmbedder` — OpenAI embedding provider -- `SimilarityMatcher` — Cosine similarity matcher -- `CacheConfigSchema` — Zod config validation schema -- `buildPromptHash`, `buildCacheFingerprint`, `buildExactMatchKey` — Hash utilities +- [`@reaatech/llm-cache-adapters-redis`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-redis) — Redis storage adapter +- [`@reaatech/llm-cache-adapters-dynamodb`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-dynamodb) — DynamoDB storage adapter +- [`@reaatech/llm-cache-adapters-qdrant`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-qdrant) — Qdrant vector search adapter +- [`@reaatech/llm-cache-cost-tracker`](https://www.npmjs.com/package/@reaatech/llm-cache-cost-tracker) — Cost calculation and pricing data +- [`@reaatech/llm-cache-observability`](https://www.npmjs.com/package/@reaatech/llm-cache-observability) — Structured logging and Prometheus metrics +- [`@reaatech/llm-cache-server`](https://www.npmjs.com/package/@reaatech/llm-cache-server) — HTTP server wrapper ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/core/package.json b/packages/core/package.json index 08ca96c..5a15e46 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/core", + "name": "@reaatech/llm-cache", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "Core caching engine for llm-cache — semantic and exact-match caching with embedding-based similarity", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,53 +16,24 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/core" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/core#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, - "keywords": [ - "llm", - "cache", - "semantic-cache", - "embeddings", - "openai", - "vector-search" - ], - "files": [ - "dist", - "README.md" - ], + "keywords": ["llm", "cache", "semantic-cache", "embeddings", "openai", "vector-search"], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" - }, - "./cache": { - "import": "./dist/cache/index.js", - "types": "./dist/cache/index.d.ts" - }, - "./embedding": { - "import": "./dist/embedding/index.js", - "types": "./dist/embedding/index.d.ts" - }, - "./storage": { - "import": "./dist/storage/index.js", - "types": "./dist/storage/index.d.ts" - }, - "./config": { - "import": "./dist/config/index.js", - "types": "./dist/config/index.d.ts" - }, - "./types": { - "import": "./dist/types/index.js", - "types": "./dist/types/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", @@ -74,7 +46,8 @@ "devDependencies": { "@types/node": "^20.11.0", "@types/uuid": "^9.0.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/core/src/cache/CacheEngine.integration.test.ts b/packages/core/src/cache/CacheEngine.integration.test.ts index f1c4897..7508567 100644 --- a/packages/core/src/cache/CacheEngine.integration.test.ts +++ b/packages/core/src/cache/CacheEngine.integration.test.ts @@ -1,6 +1,6 @@ -import { describe, it, expect, beforeEach } from 'vitest'; -import { CacheEngine, InMemoryAdapter } from '@llm-cache/core'; -import type { CacheConfig, EmbeddingProvider } from '@llm-cache/core'; +import type { CacheConfig, EmbeddingProvider } from '@reaatech/llm-cache'; +import { CacheEngine, InMemoryAdapter } from '@reaatech/llm-cache'; +import { beforeEach, describe, expect, it } from 'vitest'; class DeterministicEmbedder implements EmbeddingProvider { embed(text: string): Promise { @@ -79,7 +79,7 @@ describe('CacheEngine Integration', () => { await engine.set( 'What is the capital of France?', { answer: 'Paris' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); // Query with a semantically similar but different prompt @@ -100,7 +100,7 @@ describe('CacheEngine Integration', () => { await engine.set( 'What is TypeScript?', { answer: 'A typed superset of JavaScript' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); const result = await engine.get('What is TypeScript?', { @@ -118,7 +118,7 @@ describe('CacheEngine Integration', () => { await engine.set( 'classify: spam', { label: 'spam' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613', useCase: 'classification' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613', useCase: 'classification' }, ); // Same prompt in a different use case should miss @@ -136,7 +136,7 @@ describe('CacheEngine Integration', () => { 'expires soon', 'value', { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, - { ttl: 0 } // expires immediately + { ttl: 0 }, // expires immediately ); // Manually expire the entry by setting expiresAt in the past @@ -169,17 +169,17 @@ describe('CacheEngine Integration', () => { await engine.set( 'What is JavaScript?', { answer: 'JS' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); await engine.set( 'What is Python?', { answer: 'PY' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); await engine.set( 'What is Rust?', { answer: 'RS' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); const result = await engine.get('Tell me about JavaScript', { diff --git a/packages/core/src/cache/CacheEngine.test.ts b/packages/core/src/cache/CacheEngine.test.ts index fafaa1b..5d356b9 100644 --- a/packages/core/src/cache/CacheEngine.test.ts +++ b/packages/core/src/cache/CacheEngine.test.ts @@ -1,8 +1,8 @@ -import { describe, it, expect, beforeEach } from 'vitest'; -import { CacheEngine } from './CacheEngine.js'; -import { InMemoryAdapter } from '../storage/InMemoryAdapter.js'; -import type { EmbeddingProvider } from '../embedding/EmbeddingProvider.js'; +import { beforeEach, describe, expect, it } from 'vitest'; import type { CacheConfig } from '../config/CacheConfig.js'; +import type { EmbeddingProvider } from '../embedding/EmbeddingProvider.js'; +import { InMemoryAdapter } from '../storage/InMemoryAdapter.js'; +import { CacheEngine } from './CacheEngine.js'; class FakeEmbedder implements EmbeddingProvider { private dimension: number; @@ -15,7 +15,7 @@ class FakeEmbedder implements EmbeddingProvider { return Promise.resolve( Array(this.dimension) .fill(0) - .map(() => Math.random()) + .map(() => Math.random()), ); } @@ -94,7 +94,7 @@ describe('CacheEngine', () => { await engine.set( 'hello world', { content: 'hi' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); const result = await engine.get('hello world', { model: 'gpt-4', modelVersion: 'gpt-4-0613' }); @@ -112,7 +112,7 @@ describe('CacheEngine', () => { model: 'gpt-4', modelVersion: 'gpt-4-0613', temperature: 0.5, - } + }, ); // Same prompt, different temperature = different fingerprint = miss @@ -128,7 +128,7 @@ describe('CacheEngine', () => { await engine.set( 'hello world', { content: 'hi' }, - { model: 'gpt-4', modelVersion: 'gpt-4-0613' } + { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, ); const result = await engine.invalidate({ useCase: 'general' }); @@ -162,7 +162,7 @@ describe('CacheEngine', () => { if (result.hit) { expect(typeof result.entry.response).toBe('object'); expect((result.entry.response as typeof complexResponse).choices[0].message.content).toBe( - 'hi' + 'hi', ); } }); @@ -175,7 +175,7 @@ describe('CacheEngine', () => { model: 'gpt-4', modelVersion: 'gpt-4-0613', }, - { queryType: 'factual' } + { queryType: 'factual' }, ); expect(entry.metadata.ttl).toBe(1800); @@ -189,7 +189,7 @@ describe('CacheEngine', () => { model: 'gpt-4', modelVersion: 'gpt-4-0613', }, - { sensitive: true } + { sensitive: true }, ); expect(entry.metadata.ttl).toBe(600); @@ -247,7 +247,7 @@ describe('CacheEngine', () => { 'prompt', 'response', { model: 'gpt-4', modelVersion: 'gpt-4-0613' }, - { tokens: { prompt: 100, completion: 200 } } + { tokens: { prompt: 100, completion: 200 } }, ); expect(entry.tokens.total).toBe(300); expect(entry.cost.total).toBeCloseTo(0.003); diff --git a/packages/core/src/cache/CacheEngine.ts b/packages/core/src/cache/CacheEngine.ts index 0ac9a56..8df9fcf 100644 --- a/packages/core/src/cache/CacheEngine.ts +++ b/packages/core/src/cache/CacheEngine.ts @@ -1,18 +1,18 @@ +import { v4 as uuidv4 } from 'uuid'; +import type { CacheConfig } from '../config/CacheConfig.js'; +import type { EmbeddingProvider } from '../embedding/EmbeddingProvider.js'; +import type { StorageAdapter, VectorStorageAdapter } from '../storage/StorageAdapter.js'; import type { CacheEntry, - CacheResult, - CacheOptions, CacheMetadata, - VectorSearchFilters, + CacheOptions, + CacheResult, CostCalculatorLike, InvalidateResult, + VectorSearchFilters, } from '../types/index.js'; -import type { StorageAdapter, VectorStorageAdapter } from '../storage/StorageAdapter.js'; -import type { EmbeddingProvider } from '../embedding/EmbeddingProvider.js'; -import type { CacheConfig } from '../config/CacheConfig.js'; -import type { EncryptionService, EncryptedPayload } from '../utils/encryption.js'; -import { buildPromptHash, buildCacheFingerprint, buildExactMatchKey } from '../utils/hash.js'; -import { v4 as uuidv4 } from 'uuid'; +import type { EncryptedPayload, EncryptionService } from '../utils/encryption.js'; +import { buildCacheFingerprint, buildExactMatchKey, buildPromptHash } from '../utils/hash.js'; export interface CacheEngineDependencies { storage: StorageAdapter; @@ -98,7 +98,7 @@ export class CacheEngine { embedding, this.config.similarity.threshold, filters, - this.config.similarity.maxResults + this.config.similarity.maxResults, ); const fresh = similarEntries.filter((r) => !this.isExpired(r.entry)); @@ -128,15 +128,15 @@ export class CacheEngine { } async getBatch( - prompts: Array<{ prompt: string; options?: CacheOptions }> + prompts: Array<{ prompt: string; options?: CacheOptions }>, ): Promise> { const settled = await Promise.allSettled( - prompts.map(({ prompt, options }) => this.get(prompt, options)) + prompts.map(({ prompt, options }) => this.get(prompt, options)), ); return settled.map((s) => s.status === 'fulfilled' ? s.value - : ({ hit: false, reason: 'error', error: errorMessage(s.reason) } as const) + : ({ hit: false, reason: 'error', error: errorMessage(s.reason) } as const), ); } @@ -144,7 +144,7 @@ export class CacheEngine { prompt: string, response: unknown, options?: CacheOptions, - metadata?: CacheMetadata + metadata?: CacheMetadata, ): Promise { if (!prompt) { throw new Error('Prompt must be a non-empty string'); @@ -181,11 +181,7 @@ export class CacheEngine { total: (metadata?.tokens?.prompt ?? 0) + (metadata?.tokens?.completion ?? 0), }; - const cost = this.calculateCost( - options?.model ?? 'unknown', - tokens.prompt, - tokens.completion - ); + const cost = this.calculateCost(options?.model ?? 'unknown', tokens.prompt, tokens.completion); const entry: CacheEntry = { id: uuidv4(), @@ -225,17 +221,17 @@ export class CacheEngine { response: unknown; options?: CacheOptions; metadata?: CacheMetadata; - }> + }>, ): Promise> { const settled = await Promise.allSettled( items.map(({ prompt, response, options, metadata }) => - this.set(prompt, response, options, metadata) - ) + this.set(prompt, response, options, metadata), + ), ); return settled.map((s) => s.status === 'fulfilled' ? ({ ok: true, entry: s.value } as const) - : ({ ok: false, error: errorMessage(s.reason) } as const) + : ({ ok: false, error: errorMessage(s.reason) } as const), ); } @@ -297,7 +293,7 @@ export class CacheEngine { private calculateCost( model: string, promptTokens: number, - completionTokens: number + completionTokens: number, ): { prompt: number; completion: number; total: number } { if (!this.config.cost.enabled || !this.costCalculator) { return { prompt: 0, completion: 0, total: 0 }; @@ -306,7 +302,7 @@ export class CacheEngine { model, promptTokens, completionTokens, - this.config.cost.currency + this.config.cost.currency, ); return { prompt: breakdown.inputCost, diff --git a/packages/core/src/cache/SimilarityMatcher.test.ts b/packages/core/src/cache/SimilarityMatcher.test.ts index 1e7a62b..d3383bd 100644 --- a/packages/core/src/cache/SimilarityMatcher.test.ts +++ b/packages/core/src/cache/SimilarityMatcher.test.ts @@ -1,7 +1,7 @@ -import { describe, it, expect, vi } from 'vitest'; -import { SimilarityMatcher } from './SimilarityMatcher.js'; +import { describe, expect, it, vi } from 'vitest'; import type { VectorStorageAdapter } from '../storage/StorageAdapter.js'; import type { CacheEntry } from '../types/index.js'; +import { SimilarityMatcher } from './SimilarityMatcher.js'; function makeEntry(embedding: number[], overrides?: Partial): CacheEntry { const now = new Date(); @@ -107,7 +107,7 @@ describe('SimilarityMatcher', () => { it('should throw on dimension mismatch', () => { const matcher = new SimilarityMatcher({} as VectorStorageAdapter); expect(() => matcher.calculateCosineSimilarity([1, 0], [1, 0, 0])).toThrow( - 'dimension mismatch' + 'dimension mismatch', ); }); }); diff --git a/packages/core/src/cache/SimilarityMatcher.ts b/packages/core/src/cache/SimilarityMatcher.ts index e86f65a..6ccbbfa 100644 --- a/packages/core/src/cache/SimilarityMatcher.ts +++ b/packages/core/src/cache/SimilarityMatcher.ts @@ -1,5 +1,5 @@ -import type { SimilarityResult, VectorSearchFilters, CacheEntry } from '../types/index.js'; import type { VectorStorageAdapter } from '../storage/StorageAdapter.js'; +import type { CacheEntry, SimilarityResult, VectorSearchFilters } from '../types/index.js'; export class SimilarityMatcher { constructor(private vectorStorage: VectorStorageAdapter) {} @@ -8,7 +8,7 @@ export class SimilarityMatcher { embedding: number[], filters: VectorSearchFilters, threshold: number, - limit = 10 + limit = 10, ): Promise { const results = await this.vectorStorage.findSimilar(embedding, threshold, filters, limit); diff --git a/packages/core/src/config/CacheConfig.test.ts b/packages/core/src/config/CacheConfig.test.ts index 54b8271..d8d2ff7 100644 --- a/packages/core/src/config/CacheConfig.test.ts +++ b/packages/core/src/config/CacheConfig.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from 'vitest'; +import { describe, expect, it } from 'vitest'; import { CacheConfigSchema } from './CacheConfig.js'; describe('CacheConfigSchema', () => { @@ -108,7 +108,7 @@ describe('CacheConfigSchema', () => { CacheConfigSchema.parse({ storage: { adapter: 'postgres' }, vectorStorage: { adapter: 'memory' }, - }) + }), ).toThrow(); }); @@ -118,7 +118,7 @@ describe('CacheConfigSchema', () => { storage: { adapter: 'memory' }, vectorStorage: { adapter: 'memory' }, similarity: { threshold: 1.5 }, - }) + }), ).toThrow(); }); @@ -128,7 +128,7 @@ describe('CacheConfigSchema', () => { storage: { adapter: 'memory' }, vectorStorage: { adapter: 'memory' }, observability: { logging: 'verbose' }, - }) + }), ).toThrow(); }); }); diff --git a/packages/core/src/config/index.ts b/packages/core/src/config/index.ts index 44264a0..169c13b 100644 --- a/packages/core/src/config/index.ts +++ b/packages/core/src/config/index.ts @@ -1 +1 @@ -export { CacheConfigSchema, type CacheConfig } from './CacheConfig.js'; +export { type CacheConfig, CacheConfigSchema } from './CacheConfig.js'; diff --git a/packages/core/src/embedding/OpenAIEmbedder.test.ts b/packages/core/src/embedding/OpenAIEmbedder.test.ts index 0244e7a..503ea58 100644 --- a/packages/core/src/embedding/OpenAIEmbedder.test.ts +++ b/packages/core/src/embedding/OpenAIEmbedder.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { OpenAIEmbedder } from './OpenAIEmbedder.js'; describe('OpenAIEmbedder', () => { @@ -18,7 +18,7 @@ describe('OpenAIEmbedder', () => { vi.stubGlobal('fetch', fetchMock); vi.spyOn( embedder as unknown as { sleep: (ms: number) => Promise }, - 'sleep' + 'sleep', ).mockResolvedValue(); }); @@ -141,7 +141,7 @@ describe('OpenAIEmbedder', () => { mockResponse([ [0.1, 0.2, 0.3], [0.4, 0.5, 0.6], - ]) + ]), ); const results = await embedder.embedBatch(['hello', 'world']); diff --git a/packages/core/src/embedding/OpenAIEmbedder.ts b/packages/core/src/embedding/OpenAIEmbedder.ts index f234db9..e633ac6 100644 --- a/packages/core/src/embedding/OpenAIEmbedder.ts +++ b/packages/core/src/embedding/OpenAIEmbedder.ts @@ -37,7 +37,7 @@ export class OpenAIEmbedder implements EmbeddingProvider { if (cached) { if (expectedDimensions && cached.length !== expectedDimensions) { throw new Error( - `Embedding dimension mismatch: cached=${cached.length}, expected=${expectedDimensions}` + `Embedding dimension mismatch: cached=${cached.length}, expected=${expectedDimensions}`, ); } // Refresh LRU position @@ -64,7 +64,7 @@ export class OpenAIEmbedder implements EmbeddingProvider { if (cached) { if (expectedDimensions && cached.length !== expectedDimensions) { throw new Error( - `Embedding dimension mismatch: cached=${cached.length}, expected=${expectedDimensions}` + `Embedding dimension mismatch: cached=${cached.length}, expected=${expectedDimensions}`, ); } this.cache.delete(cacheKey); @@ -91,10 +91,7 @@ export class OpenAIEmbedder implements EmbeddingProvider { return out; } - private async fetchEmbeddings( - texts: string[], - expectedDimensions?: number - ): Promise { + private async fetchEmbeddings(texts: string[], expectedDimensions?: number): Promise { let lastError: Error | undefined; for (let attempt = 1; attempt <= this.maxRetries; attempt++) { @@ -147,7 +144,7 @@ export class OpenAIEmbedder implements EmbeddingProvider { for (const e of embeddings) { if (e.length !== expectedDimensions) { throw new Error( - `Embedding dimension mismatch: received=${e.length}, expected=${expectedDimensions}` + `Embedding dimension mismatch: received=${e.length}, expected=${expectedDimensions}`, ); } } @@ -184,7 +181,7 @@ export class OpenAIEmbedder implements EmbeddingProvider { } private backoffMs(attempt: number): number { - return Math.pow(2, attempt) * 1000; + return 2 ** attempt * 1000; } private sleep(ms: number): Promise { diff --git a/packages/core/src/embedding/index.ts b/packages/core/src/embedding/index.ts index a68dd13..e6ecc6e 100644 --- a/packages/core/src/embedding/index.ts +++ b/packages/core/src/embedding/index.ts @@ -1,2 +1,2 @@ -export { type EmbeddingProvider, type EmbeddingProviderConfig } from './EmbeddingProvider.js'; +export type { EmbeddingProvider, EmbeddingProviderConfig } from './EmbeddingProvider.js'; export { OpenAIEmbedder } from './OpenAIEmbedder.js'; diff --git a/packages/core/src/index.test.ts b/packages/core/src/index.test.ts index 26e5fa5..edbbf90 100644 --- a/packages/core/src/index.test.ts +++ b/packages/core/src/index.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from 'vitest'; +import { describe, expect, it } from 'vitest'; import * as core from './index.js'; describe('core exports', () => { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2636056..9ee1fa1 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -1,10 +1,10 @@ -export * from './types/index.js'; -export * from './storage/StorageAdapter.js'; -export * from './storage/InMemoryAdapter.js'; +export * from './cache/CacheEngine.js'; +export * from './cache/SimilarityMatcher.js'; +export * from './config/CacheConfig.js'; export * from './embedding/EmbeddingProvider.js'; export * from './embedding/OpenAIEmbedder.js'; -export * from './config/CacheConfig.js'; -export * from './utils/hash.js'; +export * from './storage/InMemoryAdapter.js'; +export * from './storage/StorageAdapter.js'; +export * from './types/index.js'; export * from './utils/encryption.js'; -export * from './cache/CacheEngine.js'; -export * from './cache/SimilarityMatcher.js'; +export * from './utils/hash.js'; diff --git a/packages/core/src/storage/InMemoryAdapter.test.ts b/packages/core/src/storage/InMemoryAdapter.test.ts index 4fe2d28..36619c7 100644 --- a/packages/core/src/storage/InMemoryAdapter.test.ts +++ b/packages/core/src/storage/InMemoryAdapter.test.ts @@ -1,6 +1,6 @@ -import { describe, it, expect, beforeEach } from 'vitest'; -import { InMemoryAdapter } from './InMemoryAdapter.js'; +import { beforeEach, describe, expect, it } from 'vitest'; import type { CacheEntry } from '../types/index.js'; +import { InMemoryAdapter } from './InMemoryAdapter.js'; function makeEntry(embedding: number[], overrides?: Partial): CacheEntry { const now = new Date(); @@ -42,7 +42,7 @@ describe('InMemoryAdapter', () => { const retrieved = await adapter.get('key1'); expect(retrieved).not.toBeNull(); - expect(retrieved!.prompt).toBe('test prompt'); + expect(retrieved?.prompt).toBe('test prompt'); }); it('should return null for missing keys', async () => { @@ -107,5 +107,4 @@ describe('InMemoryAdapter', () => { const stats = await smallAdapter.getStats(); expect(stats.totalEntries).toBe(2); }); - }); diff --git a/packages/core/src/storage/InMemoryAdapter.ts b/packages/core/src/storage/InMemoryAdapter.ts index 8b83f5e..2640523 100644 --- a/packages/core/src/storage/InMemoryAdapter.ts +++ b/packages/core/src/storage/InMemoryAdapter.ts @@ -1,9 +1,9 @@ import type { CacheEntry, - InvalidationCriteria, - StorageStats, HealthStatus, + InvalidationCriteria, SimilarityResult, + StorageStats, VectorSearchFilters, } from '../types/index.js'; import type { VectorStorageAdapter } from './StorageAdapter.js'; @@ -52,7 +52,7 @@ export class InMemoryAdapter implements VectorStorageAdapter { if (deleted && entry) { this.stats.totalSizeBytes = Math.max( 0, - this.stats.totalSizeBytes - JSON.stringify(entry).length + this.stats.totalSizeBytes - JSON.stringify(entry).length, ); } return Promise.resolve(deleted); @@ -112,7 +112,7 @@ export class InMemoryAdapter implements VectorStorageAdapter { embedding: number[], threshold: number, filters: VectorSearchFilters, - limit = 10 + limit = 10, ): Promise { const results: SimilarityResult[] = []; @@ -157,7 +157,7 @@ export class InMemoryAdapter implements VectorStorageAdapter { if (match) { this.stats.totalSizeBytes = Math.max( 0, - this.stats.totalSizeBytes - JSON.stringify(entry).length + this.stats.totalSizeBytes - JSON.stringify(entry).length, ); this.cache.delete(key); count++; diff --git a/packages/core/src/storage/StorageAdapter.ts b/packages/core/src/storage/StorageAdapter.ts index 8f5e32f..4e601c0 100644 --- a/packages/core/src/storage/StorageAdapter.ts +++ b/packages/core/src/storage/StorageAdapter.ts @@ -1,9 +1,9 @@ import type { CacheEntry, - InvalidationCriteria, - StorageStats, HealthStatus, + InvalidationCriteria, SimilarityResult, + StorageStats, VectorSearchFilters, } from '../types/index.js'; @@ -39,6 +39,6 @@ export interface VectorStorageAdapter extends StorageAdapter { embedding: number[], threshold: number, filters: VectorSearchFilters, - limit?: number + limit?: number, ): Promise; } diff --git a/packages/core/src/storage/index.ts b/packages/core/src/storage/index.ts index a9209ce..50d5cb5 100644 --- a/packages/core/src/storage/index.ts +++ b/packages/core/src/storage/index.ts @@ -1,2 +1,2 @@ -export { type StorageAdapter, type VectorStorageAdapter } from './StorageAdapter.js'; export { InMemoryAdapter, type InMemoryAdapterOptions } from './InMemoryAdapter.js'; +export type { StorageAdapter, VectorStorageAdapter } from './StorageAdapter.js'; diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index b8f2bda..20de575 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -75,7 +75,7 @@ export interface CostCalculatorLike { model: string, promptTokens: number, completionTokens: number, - currency?: string + currency?: string, ): { inputCost: number; outputCost: number; diff --git a/packages/core/src/utils/encryption.test.ts b/packages/core/src/utils/encryption.test.ts index 05c4e7d..4d9c833 100644 --- a/packages/core/src/utils/encryption.test.ts +++ b/packages/core/src/utils/encryption.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from 'vitest'; +import { describe, expect, it } from 'vitest'; import { EncryptionService } from './encryption.js'; describe('EncryptionService', () => { @@ -38,7 +38,7 @@ describe('EncryptionService', () => { it('should throw on tampered ciphertext', () => { const encrypted = service.encrypt('secret'); - encrypted.ciphertext = encrypted.ciphertext.slice(0, -4) + 'dead'; + encrypted.ciphertext = `${encrypted.ciphertext.slice(0, -4)}dead`; expect(() => service.decrypt(encrypted)).toThrow(); }); }); diff --git a/packages/core/src/utils/encryption.ts b/packages/core/src/utils/encryption.ts index b1a8588..92f81ae 100644 --- a/packages/core/src/utils/encryption.ts +++ b/packages/core/src/utils/encryption.ts @@ -24,9 +24,7 @@ export class EncryptionService { this._key = keyOrPassphrase; this._salt = Buffer.from(options.salt ?? randomBytes(SALT_LENGTH)); } else { - this._salt = options.salt - ? Buffer.from(options.salt) - : randomBytes(SALT_LENGTH); + this._salt = options.salt ? Buffer.from(options.salt) : randomBytes(SALT_LENGTH); this._key = scryptSync(String(keyOrPassphrase), this._salt, KEY_LENGTH); } } diff --git a/packages/core/src/utils/hash.test.ts b/packages/core/src/utils/hash.test.ts index 4aaedfd..321b39b 100644 --- a/packages/core/src/utils/hash.test.ts +++ b/packages/core/src/utils/hash.test.ts @@ -1,5 +1,5 @@ -import { describe, it, expect } from 'vitest'; -import { sha256, buildPromptHash, buildCacheFingerprint, buildExactMatchKey } from './hash.js'; +import { describe, expect, it } from 'vitest'; +import { buildCacheFingerprint, buildExactMatchKey, buildPromptHash, sha256 } from './hash.js'; describe('hash utilities', () => { it('sha256 should produce a 64-character hex string', () => { diff --git a/packages/core/src/utils/hash.ts b/packages/core/src/utils/hash.ts index 8256ecd..5733dac 100644 --- a/packages/core/src/utils/hash.ts +++ b/packages/core/src/utils/hash.ts @@ -50,7 +50,7 @@ export function buildCacheFingerprint(options: { export function buildExactMatchKey( promptHash: string, useCase: string, - generationConfigHash: string + generationConfigHash: string, ): string { // Hash the useCase to neutralize delimiter collisions and any user-supplied special chars. const safeUseCase = sha256(useCase).slice(0, 16); diff --git a/packages/core/src/utils/index.ts b/packages/core/src/utils/index.ts index 08f5216..b4275d9 100644 --- a/packages/core/src/utils/index.ts +++ b/packages/core/src/utils/index.ts @@ -1,2 +1,2 @@ -export { sha256, buildPromptHash, buildCacheFingerprint, buildExactMatchKey } from './hash.js'; export { EncryptionService } from './encryption.js'; +export { buildCacheFingerprint, buildExactMatchKey, buildPromptHash, sha256 } from './hash.js'; diff --git a/packages/core/tsconfig.json b/packages/core/tsconfig.json index 53dc78f..90d76d7 100644 --- a/packages/core/tsconfig.json +++ b/packages/core/tsconfig.json @@ -2,9 +2,7 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"] + "include": ["src/**/*"] } diff --git a/packages/cost-tracker/LICENSE b/packages/cost-tracker/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/cost-tracker/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/cost-tracker/README.md b/packages/cost-tracker/README.md index 0db1c17..243d0bc 100644 --- a/packages/cost-tracker/README.md +++ b/packages/cost-tracker/README.md @@ -1,33 +1,183 @@ -# @llm-cache/cost-tracker +# @reaatech/llm-cache-cost-tracker -Cost tracking and pricing calculations for llm-cache. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache-cost-tracker.svg)](https://www.npmjs.com/package/@reaatech/llm-cache-cost-tracker) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +Cost calculator and model pricing database for llm-cache. Computes per-request costs, tracks savings from cache hits, and ships with reference pricing for 40+ models across OpenAI, Anthropic, and Google. + +## Installation ```bash -npm install @llm-cache/cost-tracker +npm install @reaatech/llm-cache-cost-tracker +# or +pnpm add @reaatech/llm-cache-cost-tracker ``` -## Usage +## Feature Overview + +- **Per-request cost calculation** — input and output cost from token counts and model pricing +- **40+ pre-configured models** — OpenAI, Anthropic, and Google pricing data included +- **Savings computation** — `calculateSavings()` returns percentage and absolute savings from cache hits +- **Extensible pricing** — register custom or updated pricing via `registerPricing()` +- **Implements `CostCalculatorLike`** — drop-in integration with `@reaatech/llm-cache`'s `CostCalculatorLike` interface + +## Quick Start ```typescript -import { CostCalculator } from '@llm-cache/cost-tracker'; +import { CostCalculator, defaultPricingDatabase } from "@reaatech/llm-cache-cost-tracker"; -const calculator = new CostCalculator([ - { - modelId: 'gpt-4', - inputPricing: { per1KTokens: 0.03, currency: 'USD' }, - outputPricing: { per1KTokens: 0.06, currency: 'USD' }, - }, -]); +const calculator = new CostCalculator(defaultPricingDatabase); -const cost = calculator.calculateCost('gpt-4', 1000, 500); -// { totalCost: 0.06, currency: 'USD', ... } +const cost = calculator.calculateCost("gpt-4", 1000, 500); +// → { model: "gpt-4", inputCost: 0.03, outputCost: 0.03, totalCost: 0.06, currency: "USD" } const savings = calculator.calculateSavings(0.06, 0.0001); -// { totalSavings: 0.0599, savingsPercentage: 99.83 } +// → { originalCost: 0.06, cacheCost: 0.0001, totalSavings: 0.0599, savingsPercentage: 99.83 } +``` + +### Integration with CacheEngine + +```typescript +import { CacheEngine, InMemoryAdapter, OpenAIEmbedder } from "@reaatech/llm-cache"; +import { CostCalculator, defaultPricingDatabase } from "@reaatech/llm-cache-cost-tracker"; + +const cache = new CacheEngine({ + storage: new InMemoryAdapter(), + vectorStorage: new InMemoryAdapter(), + embedder: /* ... */, + config: { /* ... */ }, + costCalculator: new CostCalculator(defaultPricingDatabase), +}); + +// cost tracking happens automatically on cache hits +const result = await cache.get("What is TypeScript?", { + model: "gpt-4", + modelVersion: "gpt-4-0613", +}); +``` + +## API Reference + +### `CostCalculator` (class) + +```typescript +import { CostCalculator } from "@reaatech/llm-cache-cost-tracker"; + +const calc = new CostCalculator(); // empty pricing DB +const calc = new CostCalculator(customPricing); // with initial pricing +``` + +#### Constructor + +| Parameter | Type | Description | +|-----------|------|-------------| +| `initialPricing` | `ModelPricing[]` | Optional array of model pricing records to pre-register | + +#### Methods + +| Method | Returns | Description | +|--------|---------|-------------| +| `registerPricing(pricing)` | `void` | Register or overwrite pricing for a model | +| `calculateCost(model, promptTokens, completionTokens, currency?)` | `CostBreakdown` | Compute cost from token usage | +| `calculateSavings(originalCost, cacheCost)` | `SavingsReport` | Compute savings from a cache hit | + +### `ModelPricing` + +```typescript +interface ModelPricing { + modelId: string; + inputPricing: { + per1KTokens: number; + currency: string; + }; + outputPricing: { + per1KTokens: number; + currency: string; + }; +} +``` + +### `CostBreakdown` + +Returned by `calculateCost()`: + +| Property | Type | Description | +|----------|------|-------------| +| `model` | `string` | Model identifier | +| `promptTokens` | `number` | Input token count | +| `completionTokens` | `number` | Output token count | +| `totalTokens` | `number` | Sum of input and output tokens | +| `inputCost` | `number` | Cost of prompt tokens | +| `outputCost` | `number` | Cost of completion tokens | +| `totalCost` | `number` | Sum of input and output costs | +| `currency` | `string` | Currency code (default `"USD"`) | + +### `SavingsReport` + +Returned by `calculateSavings()`: + +| Property | Type | Description | +|----------|------|-------------| +| `originalCost` | `number` | Original API cost | +| `cacheCost` | `number` | Embedding/retrieval cost | +| `totalSavings` | `number` | `originalCost - cacheCost` | +| `savingsPercentage` | `number` | Percentage saved (0–100) | + +### `defaultPricingDatabase` + +A pre-configured `ModelPricing[]` array covering 40+ models from OpenAI, Anthropic, and Google. Import and pass to `CostCalculator`: + +```typescript +import { CostCalculator, defaultPricingDatabase } from "@reaatech/llm-cache-cost-tracker"; + +const calc = new CostCalculator(defaultPricingDatabase); +``` + +## Usage Patterns + +### Custom Model Pricing + +```typescript +const calc = new CostCalculator(); + +calc.registerPricing({ + modelId: "my-custom-model", + inputPricing: { per1KTokens: 0.01, currency: "USD" }, + outputPricing: { per1KTokens: 0.02, currency: "USD" }, +}); + +const cost = calc.calculateCost("my-custom-model", 500, 300); +``` + +### Non-USD Currency + +```typescript +const cost = calc.calculateCost("gpt-4", 1000, 500, "EUR"); +// Returns costs in configured currency from the pricing record +``` + +### Safety With Missing Models + +When a model is not found in the pricing database, `calculateCost()` returns zero costs rather than throwing: + +```typescript +const cost = calc.calculateCost("unknown-model", 1000, 500); +// → { inputCost: 0, outputCost: 0, totalCost: 0, ... } ``` +## Notes + +- Pricing data is provided as reference and may lag provider price changes. Verify against your provider before relying on it for billing. +- Token counts should be sourced from your LLM provider's response `usage` field, not estimated locally. + +## Related Packages + +- [`@reaatech/llm-cache`](https://www.npmjs.com/package/@reaatech/llm-cache) — Core caching engine (accepts `CostCalculatorLike` instances) +- [`@reaatech/llm-cache-observability`](https://www.npmjs.com/package/@reaatech/llm-cache-observability) — Prometheus metrics including `cache_cost_savings_total` + ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/cost-tracker/package.json b/packages/cost-tracker/package.json index 779093a..517ce28 100644 --- a/packages/cost-tracker/package.json +++ b/packages/cost-tracker/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/cost-tracker", + "name": "@reaatech/llm-cache-cost-tracker", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "Cost tracking and pricing calculations for llm-cache", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,37 +16,36 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/cost-tracker" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/cost-tracker#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, "keywords": ["llm-cache", "cost", "pricing", "openai", "anthropic"], - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "typecheck": "tsc --noEmit" }, "dependencies": { - "@llm-cache/core": "workspace:*" + "@reaatech/llm-cache": "workspace:*" }, "devDependencies": { "@types/node": "^20.11.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/cost-tracker/src/CostCalculator.test.ts b/packages/cost-tracker/src/CostCalculator.test.ts index 0c2fc24..445bb15 100644 --- a/packages/cost-tracker/src/CostCalculator.test.ts +++ b/packages/cost-tracker/src/CostCalculator.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import { CostCalculator } from './CostCalculator.js'; describe('CostCalculator', () => { diff --git a/packages/cost-tracker/src/CostCalculator.ts b/packages/cost-tracker/src/CostCalculator.ts index 95c9b54..220c6e6 100644 --- a/packages/cost-tracker/src/CostCalculator.ts +++ b/packages/cost-tracker/src/CostCalculator.ts @@ -40,7 +40,7 @@ export class CostCalculator { model: string, promptTokens: number, completionTokens: number, - currency = 'USD' + currency = 'USD', ): CostBreakdown { const safePromptTokens = Math.max(0, promptTokens); const safeCompletionTokens = Math.max(0, completionTokens); @@ -76,7 +76,7 @@ export class CostCalculator { calculateSavings( originalCost: number, - embeddingCost: number + embeddingCost: number, ): { originalCost: number; embeddingCost: number; diff --git a/packages/cost-tracker/src/index.ts b/packages/cost-tracker/src/index.ts index 766654b..f25251b 100644 --- a/packages/cost-tracker/src/index.ts +++ b/packages/cost-tracker/src/index.ts @@ -1,2 +1,2 @@ -export { CostCalculator, type CostBreakdown, type ModelPricing } from './CostCalculator.js'; +export { type CostBreakdown, CostCalculator, type ModelPricing } from './CostCalculator.js'; export { defaultPricingDatabase } from './pricing-data.js'; diff --git a/packages/cost-tracker/tsconfig.json b/packages/cost-tracker/tsconfig.json index 49a9966..90d76d7 100644 --- a/packages/cost-tracker/tsconfig.json +++ b/packages/cost-tracker/tsconfig.json @@ -2,10 +2,7 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"], - "references": [{ "path": "../core" }] + "include": ["src/**/*"] } diff --git a/packages/observability/LICENSE b/packages/observability/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/observability/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/observability/README.md b/packages/observability/README.md index 0bc516c..4b4e5c3 100644 --- a/packages/observability/README.md +++ b/packages/observability/README.md @@ -1,28 +1,205 @@ -# @llm-cache/observability +# @reaatech/llm-cache-observability -Observability, metrics, logging, and tracing utilities for llm-cache. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache-observability.svg)](https://www.npmjs.com/package/@reaatech/llm-cache-observability) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +Observability utilities for llm-cache — structured JSON logging with automatic PII redaction, Prometheus-compatible metrics collection, and optional distributed tracing hooks. + +## Installation ```bash -npm install @llm-cache/observability +npm install @reaatech/llm-cache-observability +# or +pnpm add @reaatech/llm-cache-observability ``` -## Usage +## Feature Overview + +- **Structured JSON logging** — writes NDJSON to `stdout` with automatic timestamps and correlation IDs +- **Automatic PII redaction** — 17 sensitive field names are redacted from all log output (`prompt`, `apiKey`, `token`, `secret`, etc.) +- **Correlation ID propagation** — `child()` creates scoped loggers carrying the request-level correlation ID +- **Prometheus metrics** — counters and histograms with `toPrometheus()` text exposition format +- **Label cardinality protection** — configurable cap on distinct counter labels (excess bucketed as `"other"`) +- **Histogram retention cap** — configurable max samples per histogram (old samples dropped) +- **Zero runtime dependencies** — no third-party logging or metrics libraries + +## Quick Start ```typescript -import { Logger, MetricsCollector } from '@llm-cache/observability'; +import { Logger, MetricsCollector } from "@reaatech/llm-cache-observability"; -const logger = new Logger({ level: 'info', correlationId: 'req-123' }); -logger.info('Cache hit', { type: 'exact' }); -logger.cacheHit('exact', 12, 0.95); +// Structured logging +const logger = new Logger({ level: "info" }); +logger.info("Cache hit", { type: "exact", latencyMs: 12 }); +logger.cacheHit("exact", 12, 0.95); +logger.cacheMiss(45); +logger.error("Request failed", new Error("timeout"), { path: "/cache/get" }); +// Metrics collection const metrics = new MetricsCollector({ enabled: true }); -metrics.recordHit('exact'); +metrics.recordHit("exact"); metrics.recordMiss(); -metrics.recordLatency('get', 42); +metrics.recordLatency("get", 42); +metrics.recordSavings(0.0599); + +// Prometheus exposition +console.log(metrics.toPrometheus()); +// → # HELP cache_hits_total_exact Total exact cache hits +// → # TYPE cache_hits_total_exact counter +// → cache_hits_total_exact 1 +// → ... +``` + +## API Reference + +### `Logger` (class) + +```typescript +import { Logger } from "@reaatech/llm-cache-observability"; + +const logger = new Logger({ level: "info", correlationId: "req-abc123" }); +``` + +#### `LoggerConfig` + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `level` | `"error" \| "warn" \| "info" \| "debug"` | `"info"` | Minimum log level | +| `correlationId` | `string` | auto-generated UUID | Correlation ID for request tracing | +| `context` | `Record` | — | Key-value pairs to include in every log line | + +#### Methods + +| Method | Description | +|--------|-------------| +| `error(message, error?, meta?)` | Log at ERROR level. Error stack is automatically serialized | +| `warn(message, meta?)` | Log at WARN level | +| `info(message, meta?)` | Log at INFO level | +| `debug(message, meta?)` | Log at DEBUG level | +| `child(context)` | Create a child logger inheriting the correlation ID and merging context | +| `cacheHit(type, latencyMs, confidence?)` | Structured convenience method for cache hit events | +| `cacheMiss(latencyMs)` | Structured convenience method for cache miss events | + +All methods automatically redact values for 17 sensitive field names: `password`, `apiKey`, `api_key`, `secret`, `token`, `authorization`, `prompt`, `response`, `credential`, `private_key`, `privateKey`, `accessKeyId`, `secretAccessKey`, and fields containing any of these substrings. + +#### Log Output Format + +``` +{"level":"INFO","timestamp":"2026-04-30T12:00:00.000Z","correlationId":"req-abc123","message":"Cache hit","type":"exact","latencyMs":12} +{"level":"ERROR","timestamp":"...","correlationId":"...","message":"Request failed","error":"timeout","stack":"...","path":"/cache/get"} +``` + +### `MetricsCollector` (class) + +```typescript +import { MetricsCollector } from "@reaatech/llm-cache-observability"; + +const metrics = new MetricsCollector({ enabled: true, serviceName: "llm-cache" }); ``` +#### `MetricsCollectorConfig` + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `enabled` | `boolean` | `true` | Disable to turn metrics into no-ops | +| `serviceName` | `string` | — | Service name (reserved for future use) | +| `maxLabelCardinality` | `number` | `128` | Max distinct label values per metric family | +| `maxHistogramSamples` | `number` | `1024` | Max samples retained per histogram | + +#### Recording Methods + +| Method | Description | +|--------|-------------| +| `recordHit(type)` | Increment `cache_hits_total_` and `cache_requests_total` | +| `recordMiss()` | Increment `cache_misses_total` and `cache_requests_total` | +| `recordLatency(operation, durationMs)` | Append to histogram `cache_latency_ms_` | +| `recordSavings(amount)` | Increment `cache_cost_savings_total` by amount | +| `recordSemanticHitQuality(accepted, useCase)` | Increment quality counter segmented by use case | +| `recordError(operation, errorType)` | Increment error counter segmented by operation and error type | + +#### Query Methods + +| Method | Returns | Description | +|--------|---------|-------------| +| `getCounters()` | `Record` | All counter values as a plain object | +| `getHistograms()` | `Record` | All histogram sample arrays | +| `toPrometheus()` | `string` | Prometheus text exposition format for scraping | + +#### Prometheus Output + +``` +# HELP cache_hits_total_exact Total exact cache hits +# TYPE cache_hits_total_exact counter +cache_hits_total_exact 42 + +# HELP cache_latency_ms_get Cache operation latency in ms for get +# TYPE cache_latency_ms_get histogram +cache_latency_ms_get{quantile="0.5"} 12 +cache_latency_ms_get{quantile="0.95"} 45 +cache_latency_ms_get{quantile="0.99"} 98 +``` + +## Usage Patterns + +### Correlation ID Propagation + +```typescript +import { randomUUID } from "node:crypto"; + +async function handleRequest(req: Request) { + const correlationId = req.headers["x-correlation-id"] ?? randomUUID(); + const logger = new Logger({ correlationId }).child({ path: req.url }); + + logger.info("Request received"); + + // Nested calls carry the same correlation ID + await processTask(logger.child({ component: "task-executor" })); +} +``` + +### Metrics-Only Mode + +```typescript +// Disable metrics in tests or when scraping is handled externally +const metrics = new MetricsCollector({ enabled: false }); +metrics.recordHit("exact"); // no-op +``` + +### Label Cardinality Safety + +```typescript +const metrics = new MetricsCollector({ maxLabelCardinality: 64 }); + +// Up to 64 distinct operation names tracked per metric family +metrics.recordLatency("get", 12); +metrics.recordLatency("set", 8); +metrics.recordLatency("very-specific-op-name-65", 5); // bucketed as "other" +``` + +## Integration with the Server + +The `@reaatech/llm-cache-server` package uses both `Logger` and `MetricsCollector` for request logging, cache event tracking, and Prometheus scraping at `GET /metrics`: + +```typescript +import { Logger, MetricsCollector } from "@reaatech/llm-cache-observability"; + +const logger = new Logger({ level: "info" }); +const metrics = new MetricsCollector({ enabled: true }); + +// On cache hit: +metrics.recordHit("exact"); +logger.cacheHit("exact", latencyMs, confidence); +``` + +## Related Packages + +- [`@reaatech/llm-cache`](https://www.npmjs.com/package/@reaatech/llm-cache) — Core caching engine +- [`@reaatech/llm-cache-server`](https://www.npmjs.com/package/@reaatech/llm-cache-server) — HTTP server wrapper (uses both `Logger` and `MetricsCollector`) +- [`@reaatech/llm-cache-cost-tracker`](https://www.npmjs.com/package/@reaatech/llm-cache-cost-tracker) — Cost tracking (pairs with `recordSavings`) + ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/observability/package.json b/packages/observability/package.json index 2afc670..fef1060 100644 --- a/packages/observability/package.json +++ b/packages/observability/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/observability", + "name": "@reaatech/llm-cache-observability", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "Observability, metrics, logging, and tracing for llm-cache", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,26 +16,24 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/observability" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/observability#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, "keywords": ["llm-cache", "metrics", "logging", "prometheus", "observability"], - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", @@ -43,7 +42,8 @@ "dependencies": {}, "devDependencies": { "@types/node": "^20.11.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/observability/src/index.ts b/packages/observability/src/index.ts index e574b58..021a31f 100644 --- a/packages/observability/src/index.ts +++ b/packages/observability/src/index.ts @@ -1,2 +1,2 @@ -export { MetricsCollector, type MetricsCollectorConfig } from './metrics/MetricsCollector.js'; export { Logger, type LoggerConfig } from './logging/Logger.js'; +export { MetricsCollector, type MetricsCollectorConfig } from './metrics/MetricsCollector.js'; diff --git a/packages/observability/src/metrics/MetricsCollector.test.ts b/packages/observability/src/metrics/MetricsCollector.test.ts index b2482c8..c4d9ebe 100644 --- a/packages/observability/src/metrics/MetricsCollector.test.ts +++ b/packages/observability/src/metrics/MetricsCollector.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import { MetricsCollector } from './MetricsCollector.js'; describe('MetricsCollector', () => { diff --git a/packages/observability/src/metrics/MetricsCollector.ts b/packages/observability/src/metrics/MetricsCollector.ts index d5dfe12..b66de9e 100644 --- a/packages/observability/src/metrics/MetricsCollector.ts +++ b/packages/observability/src/metrics/MetricsCollector.ts @@ -99,7 +99,7 @@ export class MetricsCollector { lines.push(`${name}{quantile="0.95"} ${p95}`); lines.push(`${name}{quantile="0.99"} ${p99}`); } - return lines.join('\n') + '\n'; + return `${lines.join('\n')}\n`; } reset(): void { diff --git a/packages/observability/tsconfig.json b/packages/observability/tsconfig.json index 53dc78f..90d76d7 100644 --- a/packages/observability/tsconfig.json +++ b/packages/observability/tsconfig.json @@ -2,9 +2,7 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"] + "include": ["src/**/*"] } diff --git a/packages/server/Dockerfile b/packages/server/Dockerfile index 5539221..2c41664 100644 --- a/packages/server/Dockerfile +++ b/packages/server/Dockerfile @@ -20,7 +20,7 @@ FROM dependencies AS build COPY packages ./packages COPY tsconfig.json . -RUN pnpm build --filter=@llm-cache/server... +RUN pnpm build --filter=@reaatech/llm-cache-server... FROM node:20-alpine AS production RUN addgroup -g 1001 -S nodejs && adduser -S nodejs -u 1001 diff --git a/packages/server/LICENSE b/packages/server/LICENSE new file mode 100644 index 0000000..1390d41 --- /dev/null +++ b/packages/server/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 llm-cache contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/server/README.md b/packages/server/README.md index 8ffb14a..846680e 100644 --- a/packages/server/README.md +++ b/packages/server/README.md @@ -1,47 +1,270 @@ -# @llm-cache/server +# @reaatech/llm-cache-server -HTTP service wrapper for llm-cache. +[![npm version](https://img.shields.io/npm/v/@reaatech/llm-cache-server.svg)](https://www.npmjs.com/package/@reaatech/llm-cache-server) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/reaatech/llm-cache/blob/main/LICENSE) +[![CI](https://img.shields.io/github/actions/workflow/status/reaatech/llm-cache/ci.yml?branch=main&label=CI)](https://github.com/reaatech/llm-cache/actions/workflows/ci.yml) -## Install +> **Status:** Pre-1.0 — APIs may change in minor versions. Pin to a specific version in production. + +HTTP server wrapper for llm-cache providing a REST API for cache operations, Prometheus metrics, and health endpoints. Supports multiple storage and vector adapter backends via environment variables — deploy as a sidecar or centralized caching service. + +## Installation ```bash -npm install -g @llm-cache/server +npm install @reaatech/llm-cache-server # or -npx @llm-cache/server +pnpm add @reaatech/llm-cache-server ``` -## Usage +## Feature Overview + +- **REST API** — JSON endpoints for `get`, `set`, and `invalidate` cache operations +- **Pluggable storage** — switch between `memory`, `redis`, and `dynamodb` via `STORAGE_ADAPTER` +- **Pluggable vector search** — switch between `memory` and `qdrant` via `VECTOR_STORAGE_ADAPTER` +- **API key authentication** — Bearer token auth via `LLM_CACHE_API_KEY` (constant-time comparison) +- **Prometheus metrics** — `GET /metrics` returns Prometheus text exposition format +- **Health probes** — `GET /health` (liveness) and `GET /ready` (readiness with storage checks) +- **Correlation ID** — every response carries `X-Correlation-Id` for distributed tracing +- **Configurable body limit** — `MAX_BODY_BYTES` caps incoming request size + +## Quick Start ### CLI ```bash -llm-cache-server +export LLM_CACHE_API_KEY=my-secret-key +export OPENAI_API_KEY=sk-... +export STORAGE_ADAPTER=redis +export REDIS_URL=redis://localhost:6379 +export VECTOR_STORAGE_ADAPTER=qdrant +export QDRANT_URL=http://localhost:6333 + +npx @reaatech/llm-cache-server +# → llm-cache server listening on port 3000 ``` -Environment variables: +### Docker -- `PORT` — default `3000` -- `STORAGE_ADAPTER` — `memory`, `redis`, `dynamodb` -- `VECTOR_STORAGE_ADAPTER` — `memory`, `qdrant` -- `OPENAI_API_KEY` — required for embeddings -- `REDIS_URL`, `QDRANT_URL`, `DYNAMODB_REGION`, `DYNAMODB_TABLE` +```bash +docker compose up +``` -### Docker +### Programmatic + +```typescript +import { createApp, main } from "@reaatech/llm-cache-server"; + +// Option A: start the default server +main().catch(console.error); + +// Option B: create the app and customize +const app = await createApp(); +app.server.listen(3000, () => console.log("Listening on :3000")); + +// Graceful shutdown +process.on("SIGTERM", () => app.shutdown().then(() => process.exit(0))); +``` + +## API Reference + +### `createApp(): Promise` + +Creates a fully configured HTTP server with cache engine, storage adapters, and embedder. Configuration is loaded from environment variables via `loadConfig()`. + +```typescript +import { createApp } from "@reaatech/llm-cache-server"; + +const app = await createApp(); +``` + +#### `App` + +| Property | Type | Description | +|----------|------|-------------| +| `server` | `http.Server` | Node.js HTTP server | +| `cache` | `CacheEngine` | The configured cache engine instance | +| `shutdown` | `() => Promise` | Graceful shutdown — closes server and storage connections | + +### `main(): Promise` + +Convenience function that calls `createApp()`, starts listening on the configured port, and registers `SIGTERM`/`SIGINT` handlers for graceful shutdown. + +### `loadConfig(): ServerConfig` + +Loads and validates configuration from environment variables. Returns the full `ServerConfig` object. + +```typescript +import { loadConfig } from "@reaatech/llm-cache-server"; + +const config = loadConfig(); +// → { port: 3000, storageAdapter: "redis", vectorStorageAdapter: "qdrant", ... } +``` + +### `ServerConfig` + +| Property | Type | Default | Description | +|----------|------|---------|-------------| +| `port` | `number` | `3000` | HTTP server port | +| `storageAdapter` | `"memory" \| "redis" \| "dynamodb"` | `"memory"` | Exact-match storage backend | +| `vectorStorageAdapter` | `"memory" \| "qdrant"` | `"memory"` | Semantic search backend | +| `redisUrl` | `string` | — | Redis connection URL | +| `dynamodbRegion` | `string` | — | AWS region for DynamoDB | +| `dynamodbTable` | `string` | — | DynamoDB table name | +| `dynamodbEndpoint` | `string` | — | DynamoDB endpoint override | +| `qdrantUrl` | `string` | — | Qdrant server URL | +| `qdrantCollection` | `string` | — | Qdrant collection name | +| `qdrantApiKey` | `string` | — | Qdrant API key | +| `openaiApiKey` | `string` | (required) | OpenAI API key for embeddings | +| `openaiOrganization` | `string` | — | OpenAI organization ID | +| `apiKey` | `string` | — | Bearer token for server authentication | +| `maxBodyBytes` | `number` | `1048576` | Max request body size in bytes | +| `cacheConfig` | `CacheConfig` | (see env vars) | Full cache configuration object | + +### REST Endpoints + +| Method | Path | Auth | Description | +|--------|------|------|-------------| +| `GET` | `/health` | No | Liveness probe — always returns 200 | +| `GET` | `/ready` | No | Readiness probe — checks storage and vector backend health | +| `POST` | `/cache/get` | Yes | Lookup a prompt; returns `CacheResult` | +| `POST` | `/cache/set` | Yes | Store a response; returns `{ id, cached }` | +| `POST` | `/cache/invalidate` | Yes | Invalidate cache entries by criteria | +| `GET` | `/metrics` | Yes | Prometheus text or JSON metrics snapshot | +| `GET` | `/stats` | Yes | Storage and vector adapter stats | + +### `POST /cache/get` + +```json +{ + "prompt": "What is TypeScript?", + "options": { + "model": "gpt-4", + "modelVersion": "gpt-4-0613", + "useCase": "qa" + } +} +``` + +Response (hit): +```json +{ "hit": true, "type": "exact", "entry": { /* CacheEntry */ }, "confidence": 1.0 } +``` + +### `POST /cache/set` + +```json +{ + "prompt": "What is TypeScript?", + "response": { "choices": [{ "message": { "content": "A typed superset of JavaScript" } }] }, + "options": { "model": "gpt-4", "modelVersion": "gpt-4-0613" }, + "metadata": { "queryType": "factual", "tokens": { "prompt": 10, "completion": 20 } } +} +``` + +### `POST /cache/invalidate` + +```json +{ + "criteria": { "useCase": "qa", "modelVersion": "gpt-4-0613" } +} +``` + +Response: +```json +{ "total": 42, "storage": 42, "vectorStorage": 0 } +``` + +## Environment Variables + +All environment variables used by the server. See [`.env.example`](../../.env.example) for the complete annotated reference. + +| Variable | Required | Default | Adapters | +|----------|----------|---------|----------| +| `PORT` | No | `3000` | All | +| `LLM_CACHE_API_KEY` | No | — | All (enables auth) | +| `MAX_BODY_BYTES` | No | `1048576` | All | +| `OPENAI_API_KEY` | Yes | — | All | +| `OPENAI_ORGANIZATION` | No | — | All | +| `STORAGE_ADAPTER` | No | `memory` | `redis`, `dynamodb` | +| `REDIS_URL` | Conditional | — | Redis | +| `DYNAMODB_REGION` | Conditional | — | DynamoDB | +| `DYNAMODB_TABLE` | Conditional | — | DynamoDB | +| `DYNAMODB_ENDPOINT` | No | — | DynamoDB | +| `VECTOR_STORAGE_ADAPTER` | No | `memory` | `qdrant` | +| `QDRANT_URL` | Conditional | — | Qdrant | +| `QDRANT_COLLECTION` | No | `llm-cache` | Qdrant | +| `QDRANT_API_KEY` | No | — | Qdrant | +| `SIMILARITY_THRESHOLD` | No | `0.8` | All | +| `SIMILARITY_MAX_RESULTS` | No | `10` | All | +| `TTL_DEFAULT` | No | `3600` | All | +| `TTL_FACTUAL` | No | `1800` | All | +| `TTL_CREATIVE` | No | `7200` | All | +| `TTL_ANALYTICAL` | No | `3600` | All | +| `TTL_SENSITIVE` | No | `600` | All | +| `LOG_LEVEL` | No | `info` | All | +| `METRICS_ENABLED` | No | `true` | All | + +## Usage Patterns + +### Authentication + +Set `LLM_CACHE_API_KEY` to require Bearer token authentication on all `/cache/*` and `/metrics` endpoints. The comparison is constant-time to prevent timing attacks. Endpoints `/health` and `/ready` remain public. ```bash -docker-compose up +export LLM_CACHE_API_KEY=my-secret-key + +curl -X POST http://localhost:3000/cache/get \ + -H "Authorization: Bearer my-secret-key" \ + -H "Content-Type: application/json" \ + -d '{"prompt": "What is TypeScript?"}' +``` + +### Redis + Qdrant (Production) + +```bash +export STORAGE_ADAPTER=redis +export REDIS_URL=redis://:password@redis.internal:6379 +export VECTOR_STORAGE_ADAPTER=qdrant +export QDRANT_URL=http://qdrant.internal:6333 +export QDRANT_COLLECTION=llm-cache +export OPENAI_API_KEY=sk-... + +npx @reaatech/llm-cache-server +``` + +### DynamoDB + In-Memory Vector (Testing) + +```bash +export STORAGE_ADAPTER=dynamodb +export DYNAMODB_REGION=us-east-1 +export DYNAMODB_TABLE=llm-cache +export DYNAMODB_ENDPOINT=http://localhost:8000 +export VECTOR_STORAGE_ADAPTER=memory +export OPENAI_API_KEY=sk-... + +npx @reaatech/llm-cache-server +``` + +### Docker Compose + +The project's `docker-compose.yml` starts Qdrant, Redis, and the cache server: + +```bash +docker compose up + +# Health check +curl http://localhost:3000/health +# → { "status": "ok", "timestamp": "..." } ``` -### Endpoints +## Related Packages -- `GET /health` — health check -- `GET /ready` — readiness probe -- `POST /cache/get` — lookup cache entry -- `POST /cache/set` — store cache entry -- `POST /cache/invalidate` — invalidate by criteria -- `GET /metrics` — metrics snapshot -- `GET /stats` — storage stats +- [`@reaatech/llm-cache`](https://www.npmjs.com/package/@reaatech/llm-cache) — Core caching engine +- [`@reaatech/llm-cache-adapters-redis`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-redis) — Redis storage adapter +- [`@reaatech/llm-cache-adapters-dynamodb`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-dynamodb) — DynamoDB storage adapter +- [`@reaatech/llm-cache-adapters-qdrant`](https://www.npmjs.com/package/@reaatech/llm-cache-adapters-qdrant) — Qdrant vector search adapter +- [`@reaatech/llm-cache-observability`](https://www.npmjs.com/package/@reaatech/llm-cache-observability) — Metrics and logging ## License -MIT +[MIT](https://github.com/reaatech/llm-cache/blob/main/LICENSE) diff --git a/packages/server/package.json b/packages/server/package.json index ea969b6..46a1aa2 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -1,5 +1,5 @@ { - "name": "@llm-cache/server", + "name": "@reaatech/llm-cache-server", "version": "0.1.0", "author": "Rick Somers (https://reaatech.com)", "description": "HTTP service wrapper for llm-cache", @@ -7,7 +7,8 @@ "engines": { "node": ">=20.0.0" }, - "main": "./dist/index.js", + "main": "./dist/index.cjs", + "module": "./dist/index.js", "types": "./dist/index.d.ts", "license": "MIT", "repository": { @@ -15,7 +16,7 @@ "url": "https://github.com/reaatech/llm-cache.git", "directory": "packages/server" }, - "homepage": "https://github.com/reaatech/llm-cache#readme", + "homepage": "https://github.com/reaatech/llm-cache/tree/main/packages/server#readme", "bugs": { "url": "https://github.com/reaatech/llm-cache/issues" }, @@ -23,21 +24,19 @@ "bin": { "llm-cache-server": "./dist/cli.js" }, - "files": [ - "dist", - "README.md" - ], + "files": ["dist", "README.md"], "publishConfig": { "access": "public" }, "exports": { ".": { + "types": "./dist/index.d.ts", "import": "./dist/index.js", - "types": "./dist/index.d.ts" + "require": "./dist/index.cjs" } }, "scripts": { - "build": "tsc --build", + "build": "tsup src/index.ts --format cjs,esm --dts --clean", "test": "vitest run", "test:watch": "vitest", "test:coverage": "vitest run --coverage", @@ -45,16 +44,17 @@ "start": "node dist/index.js" }, "dependencies": { - "@llm-cache/core": "workspace:*", - "@llm-cache/adapters-redis": "workspace:*", - "@llm-cache/adapters-dynamodb": "workspace:*", - "@llm-cache/adapters-qdrant": "workspace:*", - "@llm-cache/cost-tracker": "workspace:*", - "@llm-cache/observability": "workspace:*" + "@reaatech/llm-cache": "workspace:*", + "@reaatech/llm-cache-adapters-redis": "workspace:*", + "@reaatech/llm-cache-adapters-dynamodb": "workspace:*", + "@reaatech/llm-cache-adapters-qdrant": "workspace:*", + "@reaatech/llm-cache-cost-tracker": "workspace:*", + "@reaatech/llm-cache-observability": "workspace:*" }, "devDependencies": { "@types/node": "^20.11.0", - "vitest": "^1.3.0", - "@vitest/coverage-v8": "^1.3.0" + "tsup": "^8.4.0", + "vitest": "^3.1.1", + "@vitest/coverage-v8": "3.2.4" } } diff --git a/packages/server/src/app.test.ts b/packages/server/src/app.test.ts index 0615302..7d4ab46 100644 --- a/packages/server/src/app.test.ts +++ b/packages/server/src/app.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest'; +import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest'; import type { createApp } from './app.js'; const originalFetch = globalThis.fetch; @@ -16,11 +16,11 @@ describe('Server App', () => { if (url.includes('api.openai.com')) { return new Response( JSON.stringify({ data: [{ embedding: new Array(1536).fill(0.1) }] }), - { status: 200, headers: { 'Content-Type': 'application/json' } } + { status: 200, headers: { 'Content-Type': 'application/json' } }, ); } return originalFetch(input, init); - }) + }), ); process.env.OPENAI_API_KEY = 'test-key'; @@ -40,10 +40,14 @@ describe('Server App', () => { afterAll(async () => { await new Promise((resolve) => app.server.close(() => resolve())); vi.unstubAllGlobals(); - delete process.env.OPENAI_API_KEY; + process.env.OPENAI_API_KEY = undefined; }); - async function fetchJson(path: string, opts?: RequestInit) { + async function fetchJson( + path: string, + opts?: RequestInit, + // biome-ignore lint/suspicious/noExplicitAny: test response fixture + ): Promise<{ status: number; data: any }> { const res = await fetch(`${baseUrl}${path}`, opts); const data = await res.json(); return { status: res.status, data }; diff --git a/packages/server/src/app.ts b/packages/server/src/app.ts index e69b2f7..f183f81 100644 --- a/packages/server/src/app.ts +++ b/packages/server/src/app.ts @@ -1,17 +1,17 @@ -import { createServer, type IncomingMessage, type ServerResponse } from 'node:http'; -import { URL } from 'node:url'; import { randomUUID, timingSafeEqual } from 'node:crypto'; +import { type IncomingMessage, type ServerResponse, createServer } from 'node:http'; +import { URL } from 'node:url'; import { CacheEngine, - InMemoryAdapter, - OpenAIEmbedder, type CacheOptions, type EmbeddingProvider, -} from '@llm-cache/core'; -import { RedisAdapter } from '@llm-cache/adapters-redis'; -import { DynamoDBAdapter } from '@llm-cache/adapters-dynamodb'; -import { QdrantAdapter } from '@llm-cache/adapters-qdrant'; -import { MetricsCollector, Logger } from '@llm-cache/observability'; + InMemoryAdapter, + OpenAIEmbedder, +} from '@reaatech/llm-cache'; +import { DynamoDBAdapter } from '@reaatech/llm-cache-adapters-dynamodb'; +import { QdrantAdapter } from '@reaatech/llm-cache-adapters-qdrant'; +import { RedisAdapter } from '@reaatech/llm-cache-adapters-redis'; +import { Logger, MetricsCollector } from '@reaatech/llm-cache-observability'; import { loadConfig } from './config.js'; const config = loadConfig(); @@ -47,7 +47,7 @@ async function createStorageAdapter() { case 'dynamodb': { if (!config.dynamodbRegion || !config.dynamodbTable) { throw new Error( - 'DYNAMODB_REGION and DYNAMODB_TABLE are required when STORAGE_ADAPTER=dynamodb' + 'DYNAMODB_REGION and DYNAMODB_TABLE are required when STORAGE_ADAPTER=dynamodb', ); } return new DynamoDBAdapter({ @@ -56,7 +56,6 @@ async function createStorageAdapter() { endpoint: config.dynamodbEndpoint, }); } - case 'memory': default: logger.info('Using in-memory storage'); return new InMemoryAdapter(); @@ -82,7 +81,6 @@ async function createVectorStorageAdapter() { }); return adapter; } - case 'memory': default: logger.info('Using in-memory vector storage'); return new InMemoryAdapter(); @@ -105,12 +103,12 @@ function createEmbedder(): EmbeddingProvider { return { embed: () => { return Promise.reject( - new Error('OpenAI API key not configured. Set OPENAI_API_KEY environment variable.') + new Error('OpenAI API key not configured. Set OPENAI_API_KEY environment variable.'), ); }, embedBatch: () => { return Promise.reject( - new Error('OpenAI API key not configured. Set OPENAI_API_KEY environment variable.') + new Error('OpenAI API key not configured. Set OPENAI_API_KEY environment variable.'), ); }, }; @@ -303,7 +301,7 @@ export async function createApp(): Promise { const raw = body.criteria ?? {}; const olderThanDate = raw.olderThan ? new Date(raw.olderThan) : undefined; - if (raw.olderThan && isNaN(olderThanDate!.getTime())) { + if (raw.olderThan && Number.isNaN(olderThanDate?.getTime())) { sendJson(res, 400, { error: 'Invalid "olderThan" date' }); return; } @@ -365,10 +363,7 @@ export async function createApp(): Promise { if ('disconnect' in storage && typeof storage.disconnect === 'function') { await storage.disconnect(); } - if ( - 'disconnect' in vectorStorage && - typeof vectorStorage.disconnect === 'function' - ) { + if ('disconnect' in vectorStorage && typeof vectorStorage.disconnect === 'function') { await vectorStorage.disconnect(); } } @@ -379,7 +374,7 @@ export async function createApp(): Promise { class HttpError extends Error { constructor( public status: number, - message: string + message: string, ) { super(message); this.name = 'HttpError'; diff --git a/packages/server/src/config.ts b/packages/server/src/config.ts index 32a0aef..9550808 100644 --- a/packages/server/src/config.ts +++ b/packages/server/src/config.ts @@ -1,4 +1,4 @@ -import { CacheConfigSchema, type CacheConfig } from '@llm-cache/core'; +import { type CacheConfig, CacheConfigSchema } from '@reaatech/llm-cache'; export interface ServerConfig { port: number; @@ -20,14 +20,14 @@ export interface ServerConfig { function parseNumber(value: string | undefined, defaultValue: number): number { if (!value) return defaultValue; - const parsed = parseInt(value, 10); - return isNaN(parsed) ? defaultValue : parsed; + const parsed = Number.parseInt(value, 10); + return Number.isNaN(parsed) ? defaultValue : parsed; } function parseFloatValue(value: string | undefined, defaultValue: number): number { if (!value) return defaultValue; - const parsed = parseFloat(value); - return isNaN(parsed) ? defaultValue : parsed; + const parsed = Number.parseFloat(value); + return Number.isNaN(parsed) ? defaultValue : parsed; } export function loadConfig(): ServerConfig { @@ -80,7 +80,7 @@ export function loadConfig(): ServerConfig { const issues = result.error.issues .map( (issue: { path: (string | number)[]; message: string }) => - `${issue.path.join('.')}: ${issue.message}` + `${issue.path.join('.')}: ${issue.message}`, ) .join(', '); throw new Error(`Invalid configuration: ${issues}`); diff --git a/packages/server/src/index.test.ts b/packages/server/src/index.test.ts index 0050ec6..3ca8d27 100644 --- a/packages/server/src/index.test.ts +++ b/packages/server/src/index.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from 'vitest'; +import { describe, expect, it } from 'vitest'; import * as server from './index.js'; describe('Server exports', () => { diff --git a/packages/server/tsconfig.json b/packages/server/tsconfig.json index b6222a0..90d76d7 100644 --- a/packages/server/tsconfig.json +++ b/packages/server/tsconfig.json @@ -2,17 +2,7 @@ "extends": "../../tsconfig.json", "compilerOptions": { "outDir": "./dist", - "rootDir": "./src", - "composite": true + "rootDir": "./src" }, - "include": ["src/**/*"], - "exclude": ["dist", "node_modules", "**/*.test.ts"], - "references": [ - { "path": "../core" }, - { "path": "../adapters/redis" }, - { "path": "../adapters/dynamodb" }, - { "path": "../adapters/qdrant" }, - { "path": "../cost-tracker" }, - { "path": "../observability" } - ] + "include": ["src/**/*"] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 82b8786..2267166 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -48,16 +48,16 @@ importers: examples: dependencies: - '@llm-cache/adapters-dynamodb': + '@reaatech/llm-cache-adapters-dynamodb': specifier: workspace:* version: link:../packages/adapters/dynamodb - '@llm-cache/adapters-qdrant': + '@reaatech/llm-cache-adapters-qdrant': specifier: workspace:* version: link:../packages/adapters/qdrant - '@llm-cache/adapters-redis': + '@reaatech/llm-cache-adapters-redis': specifier: workspace:* version: link:../packages/adapters/redis - '@llm-cache/core': + '@reaatech/llm-cache': specifier: workspace:* version: link:../packages/core devDependencies: @@ -76,7 +76,7 @@ importers: '@aws-sdk/lib-dynamodb': specifier: ^3.500.0 version: 3.1035.0(@aws-sdk/client-dynamodb@3.1035.0) - '@llm-cache/core': + '@reaatech/llm-cache': specifier: workspace:* version: link:../../core devDependencies: @@ -92,7 +92,7 @@ importers: packages/adapters/qdrant: dependencies: - '@llm-cache/core': + '@reaatech/llm-cache': specifier: workspace:* version: link:../../core '@qdrant/js-client-rest': @@ -117,7 +117,7 @@ importers: packages/adapters/redis: dependencies: - '@llm-cache/core': + '@reaatech/llm-cache': specifier: workspace:* version: link:../../core redis: @@ -158,7 +158,7 @@ importers: packages/cost-tracker: dependencies: - '@llm-cache/core': + '@reaatech/llm-cache': specifier: workspace:* version: link:../core devDependencies: @@ -186,22 +186,22 @@ importers: packages/server: dependencies: - '@llm-cache/adapters-dynamodb': + '@reaatech/llm-cache-adapters-dynamodb': specifier: workspace:* version: link:../adapters/dynamodb - '@llm-cache/adapters-qdrant': + '@reaatech/llm-cache-adapters-qdrant': specifier: workspace:* version: link:../adapters/qdrant - '@llm-cache/adapters-redis': + '@reaatech/llm-cache-adapters-redis': specifier: workspace:* version: link:../adapters/redis - '@llm-cache/core': + '@reaatech/llm-cache': specifier: workspace:* version: link:../core - '@llm-cache/cost-tracker': + '@reaatech/llm-cache-cost-tracker': specifier: workspace:* version: link:../cost-tracker - '@llm-cache/observability': + '@reaatech/llm-cache-observability': specifier: workspace:* version: link:../observability devDependencies: diff --git a/skills/devops/skills.md b/skills/devops/skills.md index 3119253..0a394fc 100644 --- a/skills/devops/skills.md +++ b/skills/devops/skills.md @@ -54,7 +54,7 @@ The DevOps Agent is responsible for implementing CI/CD, deployment, and infrastr ### Distribution Model Note -The Docker, Kubernetes, and Helm configurations described here apply to the **`@llm-cache/server` service wrapper**. The server is **optional for end users** (who can use `@llm-cache/core` directly) but is **required to develop and maintain** as a first-class workspace package. It must be built, tested, and released in lockstep with core releases. +The Docker, Kubernetes, and Helm configurations described here apply to the **`@reaatech/llm-cache-server` service wrapper**. The server is **optional for end users** (who can use `@reaatech/llm-cache` directly) but is **required to develop and maintain** as a first-class workspace package. It must be built, tested, and released in lockstep with core releases. ### Example 1: GitHub Actions CI/CD Pipeline @@ -272,7 +272,7 @@ RUN corepack enable && corepack prepare pnpm@8.15.0 --activate # Install dependencies FROM base AS dependencies -RUN pnpm install --frozen-lockfile --filter=@llm-cache/core... +RUN pnpm install --frozen-lockfile --filter=@reaatech/llm-cache... # Build application FROM dependencies AS build @@ -282,7 +282,7 @@ COPY packages/cost-tracker ./packages/cost-tracker COPY packages/observability ./packages/observability COPY tsconfig.json . -RUN pnpm build --filter=@llm-cache/core +RUN pnpm build --filter=@reaatech/llm-cache # Production image FROM node:20-alpine AS production @@ -505,9 +505,9 @@ data: - Must support Node.js 18+ - Must use pnpm for package management -- Must support Kubernetes deployments (for `@llm-cache/server` service wrapper) +- Must support Kubernetes deployments (for `@reaatech/llm-cache-server` service wrapper) - Must integrate with AWS services -- Docker and k8s configurations are for the `@llm-cache/server` service wrapper (required to develop, optional for users) +- Docker and k8s configurations are for the `@reaatech/llm-cache-server` service wrapper (required to develop, optional for users) ### Performance Constraints diff --git a/tsconfig.json b/tsconfig.json index 5c0f1b8..bbd6158 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -26,5 +26,5 @@ "isolatedModules": true, "verbatimModuleSyntax": true }, - "exclude": ["node_modules", "dist", "coverage"] + "exclude": ["node_modules", "dist", "*.config.js"] } diff --git a/tsconfig.typecheck.json b/tsconfig.typecheck.json new file mode 100644 index 0000000..c580960 --- /dev/null +++ b/tsconfig.typecheck.json @@ -0,0 +1,15 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "baseUrl": ".", + "paths": { + "@reaatech/llm-cache": ["./packages/core/src/index.ts"], + "@reaatech/llm-cache-server": ["./packages/server/src/index.ts"], + "@reaatech/llm-cache-adapters-redis": ["./packages/adapters/redis/src/index.ts"], + "@reaatech/llm-cache-adapters-dynamodb": ["./packages/adapters/dynamodb/src/index.ts"], + "@reaatech/llm-cache-adapters-qdrant": ["./packages/adapters/qdrant/src/index.ts"], + "@reaatech/llm-cache-cost-tracker": ["./packages/cost-tracker/src/index.ts"], + "@reaatech/llm-cache-observability": ["./packages/observability/src/index.ts"] + } + } +}