From 8d77cc024c4f75fb6c04630e7b5b72c6bd304698 Mon Sep 17 00:00:00 2001 From: kadekillary Date: Fri, 8 May 2026 03:56:33 -0600 Subject: [PATCH] feat(parity): close high-value SDK parity gaps --- CHANGELOG.md | 9 + docs/API_REFERENCE.md | 252 ++++++++++++++++++++ docs/CACHING.md | 2 +- docs/PARITY.md | 62 +++++ docs/PROMPTS.md | 17 ++ docs/README.md | 2 + lib/langfuse.rb | 19 ++ lib/langfuse/api_client.rb | 394 ++++++++++++++++++++++++++++++- lib/langfuse/client.rb | 58 +++++ lib/langfuse/media.rb | 195 +++++++++++++++ lib/langfuse/otel_setup.rb | 3 +- lib/langfuse/sdk_headers.rb | 30 +++ spec/langfuse/api_client_spec.rb | 263 +++++++++++++++++++++ spec/langfuse/client_spec.rb | 23 ++ spec/langfuse/media_spec.rb | 91 +++++++ spec/langfuse/otel_setup_spec.rb | 11 + 16 files changed, 1428 insertions(+), 3 deletions(-) create mode 100644 docs/PARITY.md create mode 100644 lib/langfuse/media.rb create mode 100644 lib/langfuse/sdk_headers.rb create mode 100644 spec/langfuse/media_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index e5a2a4f..6cc888d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add SDK identity headers for REST and OTLP requests. +- Add prompt deletion with prompt-name-wide cache invalidation. +- Add dependency-light media references, media upload helpers, and media reference resolution. +- Add flat read/admin APIs for sessions, observations v2, scores v2, score configs, models, metrics v2, and health. + +### Documentation +- Add the JS/Python SDK parity matrix and document the new prompt, media, and read/admin APIs. + ## [0.10.1] - 2026-05-05 ### Changed diff --git a/docs/API_REFERENCE.md b/docs/API_REFERENCE.md index b6a2107..1a55b49 100644 --- a/docs/API_REFERENCE.md +++ b/docs/API_REFERENCE.md @@ -10,6 +10,8 @@ Complete method reference for the Langfuse Ruby SDK. - [Trace ID Generation](#trace-id-generation) - [Tracing & Observability](#tracing--observability) - [Traces](#traces) +- [Media References](#media-references) +- [Read/Admin APIs](#readadmin-apis) - [Scoring](#scoring) - [Datasets](#datasets) - [Experiments](#experiments) @@ -422,6 +424,47 @@ prompt = client.update_prompt( ) ``` +### `Client#delete_prompt` + +Delete prompt versions and invalidate cached variants for the prompt name. + +**Signature:** + +```ruby +delete_prompt(name, version: nil, label: nil) # => nil +``` + +**Parameters:** + +| Parameter | Type | Required | Description | +| --------- | ------- | -------- | ----------- | +| `name` | String | Yes | Prompt name | +| `version` | Integer | No | Specific prompt version to delete | +| `label` | String | No | Delete versions matching this label | + +**Returns:** `nil` + +**Raises:** + +- `NotFoundError` if prompt/version/label is not found +- `UnauthorizedError` if credentials invalid +- `ApiError` on network/server errors + +**Example:** + +```ruby +# Delete all versions for a prompt +client.delete_prompt("support-assistant") + +# Delete only one version +client.delete_prompt("support-assistant", version: 3) + +# Delete versions carrying a label +client.delete_prompt("support-assistant", label: "staging") +``` + +After a successful delete, `delete_prompt` invalidates all cached variants for that prompt name. Edits made outside this SDK still become visible through TTL expiry, `refresh_prompt`, or explicit invalidation. + ### `Client#list_prompts` List all prompts in the project. @@ -909,6 +952,215 @@ trace = client.get_trace("trace-uuid-123") puts trace["name"] ``` +## Media References + +Media references let trace input, output, and metadata carry large media content by reference instead of embedding raw bytes directly in every payload. + +### `Langfuse::Media` + +Wrap bytes, a file, or a base64 data URI and expose the same deterministic reference-string shape used by the JS and Python SDKs. + +**Signatures:** + +```ruby +Langfuse::Media.new(content_bytes:, content_type:) +Langfuse::Media.new(file_path:, content_type:) +Langfuse::Media.new(base64_data_uri:) +``` + +**Properties:** + +| Property | Type | Description | +| -------- | ---- | ----------- | +| `content_type` | String | MIME type | +| `content_bytes` | String | Raw bytes | +| `content_length` | Integer | Byte length | +| `content_sha256_hash` | String | Base64 SHA256 digest | +| `media_id` | String | Deterministic Langfuse media ID derived from the SHA256 digest | +| `reference_string` | String | `@@@langfuseMedia:...@@@` reference token | +| `tag` | String | Alias for `reference_string` | +| `base64_data_uri` | String | Inline `data:` URI representation | + +**Example:** + +```ruby +media = Langfuse::Media.new( + content_bytes: File.binread("receipt.png"), + content_type: "image/png" +) + +media.reference_string +# => "@@@langfuseMedia:type=image/png|id=...|source=bytes@@@" +``` + +`Langfuse::LangfuseMedia` is an alias for compatibility with the upstream SDK naming. + +### `Langfuse::Media.parse_reference_string` + +Parse a reference token. + +```ruby +reference = Langfuse::Media.parse_reference_string(media.reference_string) +reference.media_id +reference.content_type +reference.source +``` + +### `Client#upload_media` + +Create a media record, upload bytes to the returned presigned URL when one is provided, patch upload status, and return the media reference string. + +**Signature:** + +```ruby +upload_media(media, trace_id:, field:, observation_id: nil, timeout: nil) # => String +``` + +**Parameters:** + +| Parameter | Type | Required | Description | +| --------- | ---- | -------- | ----------- | +| `media` | Langfuse::Media | Yes | Media wrapper | +| `trace_id` | String | Yes | Associated trace ID | +| `field` | String or Symbol | Yes | `input`, `output`, or `metadata` | +| `observation_id` | String | No | Associated observation ID | +| `timeout` | Integer | No | Upload timeout override | + +**Example:** + +```ruby +trace_id = Langfuse.create_trace_id(seed: "receipt-42") +reference = media.reference_string + +Langfuse.observe("receipt-review", { input: { image: reference } }, trace_id: trace_id) do |obs| + obs.update(output: "accepted") +end + +client.upload_media(media, trace_id: trace_id, field: :input) +``` + +### Media REST Helpers + +Flat helpers expose the underlying platform media API without introducing a nested manager: + +```ruby +client.get_media(media_id) +client.get_media_upload_url( + trace_id: trace_id, + content_type: media.content_type, + content_length: media.content_length, + sha256_hash: media.content_sha256_hash, + field: :input +) +client.patch_media( + media_id: media.media_id, + uploaded_at: Time.now.utc, + upload_http_status: 200 +) +``` + +### `Client#resolve_media_references` + +Resolve reference strings in a nested object to base64 data URIs. + +**Signature:** + +```ruby +resolve_media_references(obj:, resolve_with: :base64_data_uri, + max_depth: 10, content_fetch_timeout: 10) +``` + +**Example:** + +```ruby +payload = { input: { image: media.reference_string } } +resolved = client.resolve_media_references(obj: payload) +resolved[:input][:image] # => "data:image/png;base64,..." +``` + +Resolution is best-effort per reference: failed downloads are logged and left as reference strings so one broken media item does not destroy the whole payload. + +## Read/Admin APIs + +These are thin flat wrappers over high-value Langfuse read/admin endpoints. They return parsed response hashes or arrays and intentionally avoid nested generated-client managers. + +### Sessions + +```ruby +client.list_sessions(page: 1, limit: 20, environment: "production") +client.get_session("session-id") +``` + +`list_sessions` accepts optional API filters as Ruby snake_case keys. + +### Observations v2 + +```ruby +client.list_observations( + trace_id: "trace-id", + from_start_time: Time.utc(2026, 1, 1), + limit: 50 +) +``` + +Snake_case query keys are converted to API camelCase, and Time-like values are formatted as ISO8601. + +### Scores v2 + +```ruby +client.list_scores(trace_id: "trace-id", data_type: "NUMERIC") +client.get_score("score-id") +``` + +Creation still uses the existing score APIs documented in [Scoring](#scoring); these v2 methods are for readback. + +### Score Configs + +```ruby +config = client.create_score_config( + name: "quality", + data_type: "NUMERIC", + min_value: 0, + max_value: 1 +) + +client.list_score_configs(limit: 20) +client.get_score_config(config["id"]) +client.update_score_config(config_id: config["id"], max_value: 5) +``` + +Body keys are recursively converted from snake_case to camelCase. + +### Models + +```ruby +model = client.create_model(model_name: "gpt-4o", match_pattern: "gpt-4o") +client.list_models(limit: 20) +client.get_model(model["id"]) +client.delete_model(model["id"]) +``` + +### Metrics v2 + +```ruby +client.query_metrics( + query: { + view: "observations", + metrics: [{ measure: "count", aggregation: "count" }], + fromTimestamp: "2026-01-01T00:00:00Z", + toTimestamp: "2026-01-02T00:00:00Z" + } +) +``` + +Pass either a Ruby hash or an already-encoded JSON string. The platform expects the metrics query itself as the `query` URL parameter. + +### Health + +```ruby +client.health +``` + ## Scoring ### `Client#create_score` diff --git a/docs/CACHING.md b/docs/CACHING.md index cefbb65..d351f44 100644 --- a/docs/CACHING.md +++ b/docs/CACHING.md @@ -73,7 +73,7 @@ Cache identity is prompt name plus version or label. When neither is supplied, t Name-wide invalidation and whole-cache clear use generation counters. Old Rails.cache entries are not physically scanned or deleted; they become unreachable under the new generated storage keys and expire by TTL. -Automatic mutation invalidation only covers `create_prompt` and `update_prompt` calls made by the current SDK process. Prompt edits made in the Langfuse UI or by other SDKs become visible through TTL expiry, `refresh_prompt`, or explicit invalidation. +Automatic mutation invalidation only covers `create_prompt`, `update_prompt`, and `delete_prompt` calls made by the current SDK process. Prompt edits made in the Langfuse UI or by other SDKs become visible through TTL expiry, `refresh_prompt`, or explicit invalidation. ### Cache Events diff --git a/docs/PARITY.md b/docs/PARITY.md new file mode 100644 index 0000000..20a3718 --- /dev/null +++ b/docs/PARITY.md @@ -0,0 +1,62 @@ +# SDK Parity Matrix + +This matrix compares `langfuse-rb` against the local sibling SDKs checked into this repository: + +- Ruby: `lib/langfuse/**`, version `0.10.1` +- JS: `langfuse-js/packages/**`, package version `5.3.0` +- Python: `langfuse-python/langfuse/**`, package version `4.6.0b1` + +The goal is not raw feature-count parity. Ruby should stay framework-agnostic, dependency-light, and flat-client-first. Generated manager trees from JS/Python are evidence for API behavior, not a mandate to copy their public shape. + +## Shipped Now + +| Area | JS/Python evidence | Ruby status | +| --- | --- | --- | +| SDK identity headers | JS `LangfuseClient` and generated core client pass `X-Langfuse-Sdk-Name` / `X-Langfuse-Sdk-Version`; Python `client_wrapper.py` sets the same REST headers. Both OTLP exporters also send lower-case SDK identity headers. | `Langfuse::SdkHeaders` now centralizes REST and OTLP identity headers. REST includes `X-Langfuse-Sdk-Name`, `X-Langfuse-Sdk-Version`, and `X-Langfuse-Public-Key`; OTLP includes `x-langfuse-sdk-name`, `x-langfuse-sdk-version`, and `x-langfuse-public-key`. | +| Prompt deletion | JS `prompt.delete(name, { version, label })` calls generated DELETE `/api/public/v2/prompts/{promptName}`; Python generated `prompts.delete` exposes the same endpoint. | `client.delete_prompt(name, version: nil, label: nil)` deletes prompt versions and invalidates all cached variants for that prompt name. Ruby returns `nil` for 204 responses instead of leaking transport details. | +| Media references | JS and Python expose `LangfuseMedia`, deterministic reference strings, reference parsing, reference resolution, and media upload APIs. | `Langfuse::Media` / `Langfuse::LangfuseMedia` support bytes, file, and base64 data URI input; deterministic media IDs; reference string parsing; nested reference resolution to base64 data URIs; and `get_media`, `get_media_upload_url`, `patch_media`, `upload_media`. | +| Sessions | JS/Python generated clients expose `/api/public/sessions` list and get. | `client.list_sessions(**filters)` and `client.get_session(session_id)` are flat read APIs. | +| Observations v2 | JS/Python generated clients expose GET `/api/public/v2/observations`. | `client.list_observations(**filters)` is a thin v2 read API with Ruby snake_case query keys converted to API camelCase. | +| Scores v2 | JS/Python generated clients expose GET `/api/public/v2/scores` and GET by score ID. | `client.list_scores(**filters)` and `client.get_score(score_id)` cover v2 readback while existing score creation remains batched and flat. | +| Score configs | JS/Python generated clients expose create/list/get/update under `/api/public/score-configs`. | `client.create_score_config`, `list_score_configs`, `get_score_config`, and `update_score_config` provide thin admin access with recursive snake_case to camelCase body conversion. | +| Models | JS/Python generated clients expose create/list/get/delete under `/api/public/models`. | `client.create_model`, `list_models`, `get_model`, and `delete_model` provide thin model admin access. | +| Metrics v2 | JS/Python generated clients expose GET `/api/public/v2/metrics`. | `client.query_metrics(query:)` accepts a JSON string or a Ruby hash encoded as the API `query` parameter. | +| Health | JS/Python generated clients expose GET `/api/public/health`. | `client.health` exposes the same check. | + +## Separate Issues + +These gaps are real, but they are not the same kind of work as AAI-129. + +| Gap | Why separate | +| --- | --- | +| Full generated REST resource tree: annotation queues, comments, organizations, projects, LLM connections, blob storage integrations, SCIM, prompt-version namespace, trace delete/update, OpenTelemetry generated namespace | Shipping all of this as hand-written flat Ruby methods would either bloat the SDK or recreate generated-client machinery under another name. Each surface needs a Rails-facing use case before it belongs in the public Ruby client. | +| Experiment/eval ergonomics beyond the current runner | The useful work is run lifecycle, result comparison, and score attachment around real eval workflows. That coordinates with AAI-6 rather than landing as generic API breadth here. | +| Automatic media extraction from tracing payloads | JS/Python include task managers or media services that walk payloads and upload media in the background. Ruby now has the safe primitives; automatic span-payload rewriting needs a separate design because it changes tracing hot-path behavior. | +| Deeper v4 ingestion semantics | This branch aligns SDK identity headers and keeps existing v4-shaped observation primitives. Any additional ingestion-contract work should coordinate with AAI-67 rather than expanding this PR past observable parity. | + +## Deferred + +| Gap | Reason | +| --- | --- | +| Generated client machinery | Adds maintenance and dependency weight that conflicts with the current Ruby SDK design. Thin flat APIs are enough for the high-value Rails workflows. | +| Async media upload manager | Ruby already has explicit upload primitives. A background queue would need lifecycle, shutdown, retry, and error-reporting decisions. That is real architecture, not a parity checkbox. | +| Framework integrations copied from JS/Python | Ruby should stay framework-agnostic. Rails examples and cache support belong here; Rails as a gem dependency does not. | + +## Not Applicable To Ruby + +| JS/Python shape | Ruby decision | +| --- | --- | +| JS nested managers such as `langfuse.prompt.delete` or generated `client.prompts.delete` | Ruby keeps the flat API: `client.delete_prompt`. | +| Python decorator/context APIs copied literally | Ruby already exposes block/stateful observation APIs that match Ruby idioms better than decorator mimicry. | +| OpenAI/LangChain framework packages as SDK dependencies | Integrations can exist outside the core gem. The core SDK stays dependency-light. | +| Browser or Node-specific media objects | Ruby media input is bytes, file path, or base64 data URI. | + +## Validation Map + +| Requirement | Evidence | +| --- | --- | +| Local unit coverage | `spec/langfuse/api_client_spec.rb`, `spec/langfuse/client_spec.rb`, `spec/langfuse/media_spec.rb`, `spec/langfuse/otel_setup_spec.rb` | +| Client to ApiClient mocked HTTP coverage | WebMock specs assert REST paths, query/body mapping, cache invalidation, media upload PUT, and 204 delete semantics. | +| YARD docs for public methods | New public methods have YARD docs in `ApiClient`, delegated client docs, and consumer docs in `API_REFERENCE.md`. | +| Live platform validation | Use a local scratchpad verifier with Langfuse credentials plus Langfuse CLI discovery output in the PR evidence. | +| Caveats | This matrix is committed so the PR states what shipped, what did not ship, and why. | diff --git a/docs/PROMPTS.md b/docs/PROMPTS.md index 7e42f6e..f0f6e62 100644 --- a/docs/PROMPTS.md +++ b/docs/PROMPTS.md @@ -418,6 +418,23 @@ puts prompt.labels # => ["production"] **Note:** Only labels can be updated. Prompt content is immutable after creation—create a new version instead. +### `delete_prompt` - Delete Versions + +Delete all versions for a prompt, one explicit version, or versions carrying a label: + +```ruby +# Delete all versions +client.delete_prompt("checkout-flow") + +# Delete one version +client.delete_prompt("checkout-flow", version: 3) + +# Delete versions carrying a label +client.delete_prompt("checkout-flow", label: "staging") +``` + +`delete_prompt` returns `nil` on success and invalidates all cached variants for that prompt name in the current SDK process. + ### Promotion Workflow Example A typical promotion workflow: diff --git a/docs/README.md b/docs/README.md index 7522069..522d1c3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,6 +21,7 @@ This is the consumer hub. Start here unless you are already looking for a specif ### Instrument an App - **[Tracing](TRACING.md)** — Observation hierarchy, propagation, background jobs, explicit global install +- **[API Reference](API_REFERENCE.md#media-references)** — Media reference upload, parsing, and resolution - **[Rails](RAILS.md)** — Rails-specific patterns for controllers, services, jobs, and tests - **[Scoring](SCORING.md)** — Capture quality signals after a trace exists @@ -39,5 +40,6 @@ This is the consumer hub. Start here unless you are already looking for a specif ### Reference - **[API Reference](API_REFERENCE.md)** — Exact public signatures and types +- **[SDK Parity Matrix](PARITY.md)** — What matches JS/Python, what is intentionally separate, and what is out of scope - **[Configuration](CONFIGURATION.md)** — Option-by-option config reference - **[Architecture](ARCHITECTURE.md)** — Implementation and internal design reference, not required for the first run diff --git a/lib/langfuse.rb b/lib/langfuse.rb index 5e5e28f..0aed07e 100644 --- a/lib/langfuse.rb +++ b/lib/langfuse.rb @@ -40,6 +40,7 @@ class UnauthorizedError < ApiError; end end require_relative "langfuse/config" +require_relative "langfuse/sdk_headers" require_relative "langfuse/cache_constants" require_relative "langfuse/prompt_cache" require_relative "langfuse/prompt_fetch_result" @@ -59,6 +60,7 @@ class UnauthorizedError < ApiError; end require_relative "langfuse/trace_id" require_relative "langfuse/score_client" require_relative "langfuse/prompt_renderer" +require_relative "langfuse/media" require_relative "langfuse/text_prompt_client" require_relative "langfuse/chat_prompt_client" require_relative "langfuse/timestamp_parser" @@ -322,6 +324,23 @@ def flush_scores client.flush_scores if @client end + # Resolve Langfuse media reference tokens in a nested object using the global client. + # + # @param obj [Object] Object to traverse + # @param resolve_with [Symbol, String] Only :base64_data_uri is supported + # @param max_depth [Integer] Maximum nested traversal depth + # @param content_fetch_timeout [Integer] Media download timeout in seconds + # @return [Object] Copy of obj with resolvable media references replaced + # @raise [ArgumentError] when resolve_with is unsupported + def resolve_media_references(obj:, resolve_with: :base64_data_uri, max_depth: 10, content_fetch_timeout: 10) + client.resolve_media_references( + obj: obj, + resolve_with: resolve_with, + max_depth: max_depth, + content_fetch_timeout: content_fetch_timeout + ) + end + # Generate a trace ID (deterministic when seeded, random otherwise). # # Use this to correlate Langfuse traces with external identifiers. The diff --git a/lib/langfuse/api_client.rb b/lib/langfuse/api_client.rb index 551910f..4106640 100644 --- a/lib/langfuse/api_client.rb +++ b/lib/langfuse/api_client.rb @@ -4,7 +4,9 @@ require "faraday/retry" require "base64" require "json" +require "time" require "uri" +require_relative "sdk_headers" require_relative "prompt_fetch_result" require_relative "prompt_cache_coordinator" @@ -268,6 +270,88 @@ def update_prompt(name:, version:, labels:) .tap { @prompt_cache_coordinator.invalidate_after_mutation(name) } end + # Delete prompt versions. + # + # @param name [String] Prompt name + # @param version [Integer, nil] Optional version to delete + # @param label [String, nil] Optional label filter for deletion + # @return [nil] + # @raise [NotFoundError] if the prompt is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def delete_prompt(name, version: nil, label: nil) + path = "/api/public/v2/prompts/#{URI.encode_uri_component(name)}" + request(:delete, path, params: { version: version, label: label }.compact) + @prompt_cache_coordinator.invalidate_after_mutation(name) + nil + end + + # Fetch a media record and its temporary download URL. + # + # @param media_id [String] Langfuse media ID + # @return [Hash] Media record + # @raise [NotFoundError] if the media record is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def get_media(media_id) + request(:get, "/api/public/media/#{URI.encode_uri_component(media_id)}") + end + + # Get a presigned upload URL for a media record. + # + # @param trace_id [String] Associated trace ID + # @param content_type [String] MIME type + # @param content_length [Integer] Media byte length + # @param sha256_hash [String] Base64-encoded SHA256 digest + # @param field [String, Symbol] Trace/observation field: input, output, or metadata + # @param observation_id [String, nil] Associated observation ID + # @return [Hash] Upload URL response + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def get_media_upload_url(trace_id:, content_type:, content_length:, sha256_hash:, field:, observation_id: nil) + request(:post, "/api/public/media", body: { + traceId: trace_id, observationId: observation_id, contentType: content_type, + contentLength: content_length, sha256Hash: sha256_hash, field: field + }.compact) + end + + # Patch media upload status after uploading to the presigned URL. + # + # @param media_id [String] Langfuse media ID + # @param uploaded_at [Time, String] Upload completion timestamp + # @param upload_http_status [Integer] HTTP status returned by object storage + # @param upload_http_error [String, nil] Upload error message + # @param upload_time_ms [Integer, nil] Upload duration in milliseconds + # @return [Hash] Patched media record + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def patch_media(media_id:, uploaded_at:, upload_http_status:, upload_http_error: nil, upload_time_ms: nil) + request(:patch, "/api/public/media/#{URI.encode_uri_component(media_id)}", body: { + uploadedAt: format_timestamp(uploaded_at), uploadHttpStatus: upload_http_status, + uploadHttpError: upload_http_error, uploadTimeMs: upload_time_ms + }.compact) + end + + # Upload media bytes through Langfuse's presigned media flow. + # + # @param media [Media] Media wrapper + # @param trace_id [String] Associated trace ID + # @param field [String, Symbol] Trace/observation field: input, output, or metadata + # @param observation_id [String, nil] Associated observation ID + # @param timeout [Integer, nil] Upload timeout override + # @return [String] Langfuse media reference token + # @raise [ArgumentError] if media is invalid + # @raise [ApiError] if the upload fails + def upload_media(media, trace_id:, field:, observation_id: nil, timeout: nil) + validate_media_upload!(media) + upload = get_media_upload_url( + trace_id: trace_id, content_type: media.content_type, content_length: media.content_length, + sha256_hash: media.content_sha256_hash, field: field, observation_id: observation_id + ) + upload_media_to_presigned_url(media, upload, timeout: timeout) + media.reference_string + end + # Send a batch of events to the Langfuse ingestion API # # Sends events (scores, traces, observations) to the ingestion endpoint. @@ -389,6 +473,196 @@ def get_projects # rubocop:disable Naming/AccessorMethodName request(:get, "/api/public/projects") end + # Check Langfuse API health. + # + # @return [Hash] Health response + # @raise [ApiError] for API errors + def health + request(:get, "/api/public/health") + end + + # List sessions. + # + # @param filters [Hash] Optional filters: page, limit, from_timestamp, to_timestamp, environment + # @return [Array] Session records + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def list_sessions(**filters) + list_sessions_paginated(**filters)["data"] || [] + end + + # Fetch one session including traces. + # + # @param session_id [String] Session ID + # @return [Hash] Session record + # @raise [NotFoundError] if the session is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def get_session(session_id) + request(:get, "/api/public/sessions/#{URI.encode_uri_component(session_id)}") + end + + # Full paginated sessions response including "meta". + # + # @api private + # @param filters [Hash] Optional filters + # @return [Hash] Full response hash + def list_sessions_paginated(**filters) + request(:get, "/api/public/sessions", params: transform_query_options(filters)) + end + + # List observations through the v2 read API. + # + # @param filters [Hash] Optional v2 filters using snake_case keys + # @return [Array] Observation records + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def list_observations(**filters) + list_observations_paginated(**filters)["data"] || [] + end + + # Full v2 observations response including cursor metadata. + # + # @api private + # @param filters [Hash] Optional v2 filters using snake_case keys + # @return [Hash] Full response hash + def list_observations_paginated(**filters) + request(:get, "/api/public/v2/observations", params: transform_query_options(filters)) + end + + # List scores through the v2 read API. + # + # @param filters [Hash] Optional v2 filters using snake_case keys + # @return [Array] Score records + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def list_scores(**filters) + list_scores_paginated(**filters)["data"] || [] + end + + # Fetch one score through the v2 read API. + # + # @param score_id [String] Score ID + # @return [Hash] Score record + # @raise [NotFoundError] if the score is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def get_score(score_id) + request(:get, "/api/public/v2/scores/#{URI.encode_uri_component(score_id)}") + end + + # Full v2 scores response including "meta". + # + # @api private + # @param filters [Hash] Optional v2 filters using snake_case keys + # @return [Hash] Full response hash + def list_scores_paginated(**filters) + request(:get, "/api/public/v2/scores", params: transform_query_options(filters)) + end + + # Create a score config. + # + # @param attributes [Hash] Score config attributes using snake_case keys + # @return [Hash] Score config record + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def create_score_config(**attributes) + request(:post, "/api/public/score-configs", body: transform_body_options(attributes)) + end + + # List score configs. + # + # @param page [Integer, nil] Page number + # @param limit [Integer, nil] Page size + # @return [Array] Score config records + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def list_score_configs(page: nil, limit: nil) + request(:get, "/api/public/score-configs", params: { page: page, limit: limit }.compact)["data"] || [] + end + + # Fetch one score config. + # + # @param config_id [String] Score config ID + # @return [Hash] Score config record + # @raise [NotFoundError] if the score config is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def get_score_config(config_id) + request(:get, "/api/public/score-configs/#{URI.encode_uri_component(config_id)}") + end + + # Update a score config. + # + # @param config_id [String] Score config ID + # @param attributes [Hash] Score config attributes using snake_case keys + # @return [Hash] Score config record + # @raise [NotFoundError] if the score config is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def update_score_config(config_id:, **attributes) + request(:patch, "/api/public/score-configs/#{URI.encode_uri_component(config_id)}", + body: transform_body_options(attributes)) + end + + # Create a model. + # + # @param attributes [Hash] Model attributes using snake_case keys + # @return [Hash] Model record + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def create_model(**attributes) + request(:post, "/api/public/models", body: transform_body_options(attributes)) + end + + # List models. + # + # @param page [Integer, nil] Page number + # @param limit [Integer, nil] Page size + # @return [Array] Model records + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def list_models(page: nil, limit: nil) + request(:get, "/api/public/models", params: { page: page, limit: limit }.compact)["data"] || [] + end + + # Fetch one model. + # + # @param id [String] Model ID + # @return [Hash] Model record + # @raise [NotFoundError] if the model is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def get_model(id) + request(:get, "/api/public/models/#{URI.encode_uri_component(id)}") + end + + # Delete one model. + # + # @param id [String] Model ID + # @return [nil] + # @raise [NotFoundError] if the model is not found + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def delete_model(id) + request(:delete, "/api/public/models/#{URI.encode_uri_component(id)}") + nil + end + + # Query metrics through the v2 metrics API. + # + # @param query [Hash, String] Metrics query hash or JSON string + # @return [Hash] Metrics response + # @raise [ArgumentError] if query is empty + # @raise [UnauthorizedError] if authentication fails + # @raise [ApiError] for other API errors + def query_metrics(query:) + raise ArgumentError, "query is required" if query.nil? || query == "" + + encoded_query = query.is_a?(String) ? query : JSON.generate(query) + request(:get, "/api/public/v2/metrics", params: { query: encoded_query }) + end + # Shut down the API client and release resources # # Shuts down the cache backend's SWR thread pool when present. @@ -618,6 +892,122 @@ def cache_backend_name @prompt_cache_coordinator.backend_name end + def validate_media_upload!(media) + return if media.respond_to?(:valid?) && media.valid? && media.respond_to?(:reference_string) + + raise ArgumentError, "media must be a valid Langfuse::Media" + end + + def upload_media_to_presigned_url(media, upload, timeout:) + upload_url = upload["uploadUrl"] + return if upload_url.nil? || upload_url.empty? + + started = Process.clock_gettime(Process::CLOCK_MONOTONIC) + response = perform_media_put(upload_url, media, timeout) + patch_uploaded_media(media, response, started) + raise ApiError, "Media upload failed (#{response.status})" unless response.status.between?(200, 299) + rescue Faraday::Error => e + patch_failed_media(media, e, started) + raise ApiError, "Media upload failed: #{e.message}" + end + + def perform_media_put(upload_url, media, timeout) + media_upload_connection(upload_url, timeout).put do |request| + media_upload_headers(upload_url, media).each { |key, value| request.headers[key] = value } + request.body = media.content_bytes + end + end + + def patch_uploaded_media(media, response, started) + patch_media( + media_id: media.media_id, + uploaded_at: Time.now.utc, + upload_http_status: response.status, + upload_http_error: response.status.between?(200, 299) ? nil : response.body.to_s, + upload_time_ms: elapsed_ms(started) + ) + end + + def patch_failed_media(media, error, started) + patch_media( + media_id: media.media_id, + uploaded_at: Time.now.utc, + upload_http_status: 0, + upload_http_error: error.message, + upload_time_ms: elapsed_ms(started) + ) + rescue ApiError + nil + end + + def media_upload_connection(upload_url, timeout) + Faraday.new(url: upload_url) do |conn| + conn.options.timeout = timeout || @timeout + conn.adapter Faraday.default_adapter + end + end + + def media_upload_headers(upload_url, media) + headers = { "Content-Type" => media.content_type } + return headers if gcs_upload_url?(upload_url) + + headers.merge( + "x-amz-checksum-sha256" => media.content_sha256_hash, + "x-ms-blob-type" => "BlockBlob" + ) + end + + def gcs_upload_url?(upload_url) + host = URI.parse(upload_url).host.to_s + host == "storage.googleapis.com" || host.end_with?(".storage.googleapis.com") + rescue URI::InvalidURIError + false + end + + def elapsed_ms(started) + ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - started) * 1000).round + end + + def transform_query_options(options) + transform_options(options) { |value| format_query_value(value) } + end + + def transform_body_options(options) + transform_options(options) { |value| transform_body_value(value) } + end + + def transform_body_value(value) + case value + when Hash + transform_body_options(value) + when Array + value.map { |item| transform_body_value(item) } + else + value + end + end + + def transform_options(options) + options.each_with_object({}) do |(key, value), params| + next if value.nil? + + params[camelize_key(key)] = yield(value) + end + end + + def camelize_key(key) + parts = key.to_s.split("_") + ([parts.first] + parts[1..].map(&:capitalize)).join.to_sym + end + + def format_query_value(value) + value.respond_to?(:iso8601) ? value.iso8601 : value + end + + def format_timestamp(value) + value.respond_to?(:iso8601) ? value.iso8601 : value + end + # Issue an HTTP request, raise on Faraday errors, parse the response. # # @api private @@ -714,7 +1104,7 @@ def default_headers "Authorization" => authorization_header, "User-Agent" => user_agent, "Content-Type" => "application/json" - } + }.merge(SdkHeaders.rest(public_key: public_key)) end # Generate Basic Auth header @@ -761,6 +1151,8 @@ def handle_response(response) case response.status when 200, 201 response.body + when 204 + nil when 401 raise UnauthorizedError, "Authentication failed. Check your API keys." when 404 diff --git a/lib/langfuse/client.rb b/lib/langfuse/client.rb index 1a82d17..09ebbef 100644 --- a/lib/langfuse/client.rb +++ b/lib/langfuse/client.rb @@ -42,6 +42,26 @@ class Client # @!method prompt_cache_stats # @!method prompt_cache_key(name, version: nil, label: nil) # @!method validate_prompt_cache_backend! + # @!method delete_prompt(name, version: nil, label: nil) + # @!method get_media(media_id) + # @!method upload_media(media, trace_id:, field:, observation_id: nil, timeout: nil) + # @!method get_media_upload_url(trace_id:, content_type:, content_length:, sha256_hash:, field:, observation_id: nil) + # @!method patch_media(media_id:, uploaded_at:, upload_http_status:, upload_http_error: nil, upload_time_ms: nil) + # @!method health + # @!method list_sessions(**filters) + # @!method get_session(session_id) + # @!method list_observations(**filters) + # @!method list_scores(**filters) + # @!method get_score(score_id) + # @!method create_score_config(**attributes) + # @!method list_score_configs(page: nil, limit: nil) + # @!method get_score_config(config_id) + # @!method update_score_config(config_id:, **attributes) + # @!method create_model(**attributes) + # @!method list_models(page: nil, limit: nil) + # @!method get_model(id) + # @!method delete_model(id) + # @!method query_metrics(query:) # @!method list_traces(**options) # @!method get_trace(id) # @!method list_datasets(page: nil, limit: nil) @@ -55,6 +75,26 @@ class Client :prompt_cache_stats, :prompt_cache_key, :validate_prompt_cache_backend!, + :delete_prompt, + :get_media, + :upload_media, + :get_media_upload_url, + :patch_media, + :health, + :list_sessions, + :get_session, + :list_observations, + :list_scores, + :get_score, + :create_score_config, + :list_score_configs, + :get_score_config, + :update_score_config, + :create_model, + :list_models, + :get_model, + :delete_model, + :query_metrics, :list_traces, :get_trace, :list_datasets, @@ -302,6 +342,24 @@ def update_prompt(name:, version:, labels:) build_prompt_client(prompt_data) end + # Resolve Langfuse media reference tokens in a nested object. + # + # @param obj [Object] Object to traverse + # @param resolve_with [Symbol, String] Only :base64_data_uri is supported + # @param max_depth [Integer] Maximum nested traversal depth + # @param content_fetch_timeout [Integer] Media download timeout in seconds + # @return [Object] Copy of obj with resolvable media references replaced + # @raise [ArgumentError] when resolve_with is unsupported + def resolve_media_references(obj:, resolve_with: :base64_data_uri, max_depth: 10, content_fetch_timeout: 10) + Media.resolve_references( + obj, + client: self, + resolve_with: resolve_with, + max_depth: max_depth, + content_fetch_timeout: content_fetch_timeout + ) + end + # Lazily-fetched project ID for URL generation # # Fetches the project ID from the API on first access and caches it. diff --git a/lib/langfuse/media.rb b/lib/langfuse/media.rb new file mode 100644 index 0000000..134c1f0 --- /dev/null +++ b/lib/langfuse/media.rb @@ -0,0 +1,195 @@ +# frozen_string_literal: true + +require "base64" +require "digest" +require "faraday" +require "json" + +module Langfuse + # Parsed Langfuse media reference token. + MediaReference = Struct.new(:media_id, :source, :content_type, keyword_init: true) + + # Dependency-light wrapper for media references used in trace input/output/metadata. + class Media + REFERENCE_PATTERN = /@@@langfuseMedia:.+?@@@/ + PREFIX = "@@@langfuseMedia:" + SUFFIX = "@@@" + + # @return [Object, nil] Optional application object wrapped by this media helper + attr_reader :obj + + # @return [String] Source type: base64_data_uri, bytes, or file + attr_reader :source + + # @return [String] MIME type + attr_reader :content_type + + # @return [String] Raw media bytes + attr_reader :content_bytes + + # @param obj [Object, nil] Optional application object to carry alongside media content + # @param base64_data_uri [String, nil] Base64 data URI + # @param content_type [String, nil] MIME type when using bytes or file_path + # @param content_bytes [String, nil] Raw media bytes + # @param file_path [String, nil] File path to read as media bytes + # @return [Media] + # @raise [ArgumentError] when no valid media source is provided + def initialize(obj: nil, base64_data_uri: nil, content_type: nil, content_bytes: nil, file_path: nil) + @obj = obj + assign_content(base64_data_uri: base64_data_uri, content_type: content_type, + content_bytes: content_bytes, file_path: file_path) + raise ArgumentError, "media content and content_type are required" unless valid? + end + + # @return [Boolean] true when the media has bytes, content type, and source + def valid? + !content_bytes.nil? && !content_type.nil? && !source.nil? + end + + # @return [Integer] Media byte length + def content_length + content_bytes.bytesize + end + + # @return [String] Base64-encoded SHA256 digest + def content_sha256_hash + Base64.strict_encode64(Digest::SHA256.digest(content_bytes)) + end + + # @return [String] Deterministic Langfuse media ID derived from content hash + def media_id + content_sha256_hash.tr("+/", "-_")[0, 22] + end + + # @return [String] Langfuse media reference token + def reference_string + "#{PREFIX}type=#{content_type}|id=#{media_id}|source=#{source}#{SUFFIX}" + end + alias tag reference_string + + # @return [String] Media content as a base64 data URI + def base64_data_uri + "data:#{content_type};base64,#{Base64.strict_encode64(content_bytes)}" + end + + # @return [String] JSON representation compatible with JS/Python SDK media wrappers + def to_json(*) + base64_data_uri.to_json(*) + end + + class << self + # Parse a Langfuse media reference token. + # + # @param reference_string [String] Reference token + # @return [MediaReference] Parsed reference + # @raise [ArgumentError] when the token is malformed + def parse_reference_string(reference_string) + validate_reference_string!(reference_string) + parsed = reference_string[PREFIX.length...-SUFFIX.length].split("|").to_h { |pair| pair.split("=", 2) } + unless parsed.values_at("type", "id", "source").all? + raise ArgumentError, "Missing required fields in reference string" + end + + MediaReference.new(media_id: parsed.fetch("id"), source: parsed.fetch("source"), + content_type: parsed.fetch("type")) + rescue NoMethodError + raise ArgumentError, "Reference string is not a string" + end + + # Resolve Langfuse media reference tokens in a nested object. + # + # @param obj [Object] Object to traverse + # @param client [Client] Langfuse client used to fetch media records + # @param resolve_with [Symbol, String] Only :base64_data_uri is supported + # @param max_depth [Integer] Maximum nested traversal depth + # @param content_fetch_timeout [Integer] Media download timeout in seconds + # @return [Object] Copy of obj with resolvable references replaced + # @raise [ArgumentError] when resolve_with is unsupported + def resolve_references(obj, client:, resolve_with: :base64_data_uri, max_depth: 10, content_fetch_timeout: 10) + raise ArgumentError, "resolve_with must be :base64_data_uri" unless resolve_with.to_s == "base64_data_uri" + + logger = client.respond_to?(:config) ? client.config.logger : nil + traverse(obj, client, 0, max_depth, content_fetch_timeout, logger) + end + + private + + def validate_reference_string!(reference_string) + raise ArgumentError, "Reference string is empty" if reference_string.nil? || reference_string.empty? + unless reference_string.start_with?("#{PREFIX}type=") + raise ArgumentError, "Reference string does not start with '#{PREFIX}type='" + end + raise ArgumentError, "Reference string does not end with '#{SUFFIX}'" unless reference_string.end_with?(SUFFIX) + end + + def traverse(obj, client, depth, max_depth, timeout, logger) + return obj if depth > max_depth + return resolve_string(obj, client, timeout, logger) if obj.is_a?(String) + return obj.map { |item| traverse(item, client, depth + 1, max_depth, timeout, logger) } if obj.is_a?(Array) + + if obj.is_a?(Hash) + return obj.transform_values { |value| traverse(value, client, depth + 1, max_depth, timeout, logger) } + end + + obj + end + + def resolve_string(value, client, timeout, logger) + value.gsub(REFERENCE_PATTERN) do |reference_string| + resolve_reference_string(reference_string, client, timeout) + rescue StandardError => e + logger&.warn("Langfuse media reference resolution failed: #{e.message}") + reference_string + end + end + + def resolve_reference_string(reference_string, client, timeout) + reference = parse_reference_string(reference_string) + media = client.get_media(reference.media_id) + response = media_download_connection(media.fetch("url"), timeout).get + raise ApiError, "Media download failed (#{response.status})" unless response.status == 200 + + "data:#{media.fetch('contentType')};base64,#{Base64.strict_encode64(response.body.b)}" + end + + def media_download_connection(url, timeout) + Faraday.new(url: url) do |conn| + conn.options.timeout = timeout + conn.adapter Faraday.default_adapter + end + end + end + + private + + def assign_content(base64_data_uri:, content_type:, content_bytes:, file_path:) + if base64_data_uri + @content_bytes, @content_type = parse_base64_data_uri(base64_data_uri) + @source = "base64_data_uri" + elsif content_bytes && content_type + @content_bytes = content_bytes.b + @content_type = content_type + @source = "bytes" + elsif file_path && content_type + @content_bytes = File.binread(file_path) + @content_type = content_type + @source = "file" + end + end + + def parse_base64_data_uri(data_uri) + header, encoded = data_uri.delete_prefix("data:").split(",", 2) + raise ArgumentError, "base64_data_uri must start with data:" unless data_uri.start_with?("data:") + raise ArgumentError, "base64_data_uri must include ;base64" unless header&.split(";")&.include?("base64") + raise ArgumentError, "base64_data_uri must include content type" if header.split(";").first.to_s.empty? + + [Base64.strict_decode64(encoded), header.split(";").first] + rescue ArgumentError + raise + rescue StandardError + raise ArgumentError, "base64_data_uri is invalid" + end + end + + LangfuseMedia = Media +end diff --git a/lib/langfuse/otel_setup.rb b/lib/langfuse/otel_setup.rb index e4bfaa1..fa0233e 100644 --- a/lib/langfuse/otel_setup.rb +++ b/lib/langfuse/otel_setup.rb @@ -3,6 +3,7 @@ require "opentelemetry/sdk" require "opentelemetry/exporter/otlp" require "base64" +require_relative "sdk_headers" module Langfuse # OpenTelemetry initialization and setup for Langfuse tracing. @@ -173,7 +174,7 @@ def blank?(value) def build_headers(public_key, secret_key) credentials = "#{public_key}:#{secret_key}" encoded = Base64.strict_encode64(credentials) - { "Authorization" => "Basic #{encoded}" } + { "Authorization" => "Basic #{encoded}" }.merge(SdkHeaders.otlp(public_key: public_key)) end def build_sampler(sample_rate) diff --git a/lib/langfuse/sdk_headers.rb b/lib/langfuse/sdk_headers.rb new file mode 100644 index 0000000..e61f561 --- /dev/null +++ b/lib/langfuse/sdk_headers.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Langfuse + # Shared SDK identity headers used by REST and OTLP clients. + module SdkHeaders + SDK_NAME = "ruby" + + class << self + # @param public_key [String] Langfuse public API key + # @return [Hash] REST API SDK identity headers + def rest(public_key:) + { + "X-Langfuse-Sdk-Name" => SDK_NAME, + "X-Langfuse-Sdk-Version" => Langfuse::VERSION, + "X-Langfuse-Public-Key" => public_key + } + end + + # @param public_key [String] Langfuse public API key + # @return [Hash] OTLP exporter SDK identity headers + def otlp(public_key:) + { + "x-langfuse-sdk-name" => SDK_NAME, + "x-langfuse-sdk-version" => Langfuse::VERSION, + "x-langfuse-public-key" => public_key + } + end + end + end +end diff --git a/spec/langfuse/api_client_spec.rb b/spec/langfuse/api_client_spec.rb index 7ad7635..fbb072b 100644 --- a/spec/langfuse/api_client_spec.rb +++ b/spec/langfuse/api_client_spec.rb @@ -84,6 +84,14 @@ conn = api_client.connection expect(conn.headers["Content-Type"]).to eq("application/json") end + + it "includes Langfuse SDK identity headers" do + conn = api_client.connection + + expect(conn.headers["X-Langfuse-Sdk-Name"]).to eq("ruby") + expect(conn.headers["X-Langfuse-Sdk-Version"]).to eq(Langfuse::VERSION) + expect(conn.headers["X-Langfuse-Public-Key"]).to eq(public_key) + end end describe "#authorization_header" do @@ -2816,4 +2824,259 @@ def increment(key, amount) end end end + + describe "SDK parity APIs" do + # rubocop:disable RSpec/MultipleMemoizedHelpers + describe "#delete_prompt" do + let(:prompt_name) { "greeting" } + let(:prompt_url) { "#{base_url}/api/public/v2/prompts/#{prompt_name}" } + let(:cache) { Langfuse::PromptCache.new(ttl: 60) } + let(:cached_client) do + described_class.new(public_key: public_key, secret_key: secret_key, base_url: base_url, cache: cache) + end + let(:prompt_response) do + { + "id" => "prompt-123", + "name" => prompt_name, + "version" => 1, + "prompt" => "Hello {{name}}!", + "type" => "text", + "labels" => ["production"] + } + end + + it "deletes all prompt versions and invalidates cached variants by name" do + stub_request(:get, prompt_url) + .to_return( + { status: 200, body: prompt_response.merge("version" => 1).to_json, + headers: { "Content-Type" => "application/json" } }, + { status: 200, body: prompt_response.merge("version" => 2).to_json, + headers: { "Content-Type" => "application/json" } } + ) + stub_request(:delete, prompt_url).to_return(status: 204, body: "") + + cached_client.get_prompt(prompt_name) + result = cached_client.delete_prompt(prompt_name) + fetched = cached_client.get_prompt(prompt_name) + + expect(result).to be_nil + expect(fetched["version"]).to eq(2) + expect(a_request(:delete, prompt_url)).to have_been_made.once + expect(a_request(:get, prompt_url)).to have_been_made.twice + end + + it "passes version and label query parameters when provided" do + stub_request(:delete, prompt_url) + .with(query: { version: "2", label: "staging" }) + .to_return(status: 204, body: "") + + api_client.delete_prompt(prompt_name, version: 2, label: "staging") + + expect(a_request(:delete, prompt_url) + .with(query: { version: "2", label: "staging" })).to have_been_made.once + end + end + # rubocop:enable RSpec/MultipleMemoizedHelpers + + describe "media APIs" do + let(:media) { Langfuse::Media.new(content_bytes: "hello", content_type: "text/plain") } + + it "fetches media records" do + stub_request(:get, "#{base_url}/api/public/media/#{media.media_id}") + .to_return(status: 200, body: { mediaId: media.media_id, contentType: "text/plain" }.to_json, + headers: { "Content-Type" => "application/json" }) + + result = api_client.get_media(media.media_id) + + expect(result["mediaId"]).to eq(media.media_id) + end + + it "creates upload URLs with Ruby keyword arguments mapped to API fields" do + stub_request(:post, "#{base_url}/api/public/media") + .with(body: hash_including( + "traceId" => "trace-123", + "contentType" => "text/plain", + "contentLength" => 5, + "sha256Hash" => media.content_sha256_hash, + "field" => "input" + )) + .to_return(status: 200, body: { mediaId: media.media_id, uploadUrl: nil }.to_json, + headers: { "Content-Type" => "application/json" }) + + result = api_client.get_media_upload_url( + trace_id: "trace-123", + content_type: media.content_type, + content_length: media.content_length, + sha256_hash: media.content_sha256_hash, + field: "input" + ) + + expect(result["mediaId"]).to eq(media.media_id) + end + + it "uploads media bytes and patches upload status" do + upload_url = "https://media-upload.langfuse.test/upload/#{media.media_id}" + stub_request(:post, "#{base_url}/api/public/media") + .to_return(status: 200, body: { mediaId: media.media_id, uploadUrl: upload_url }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:put, upload_url) + .with do |request| + request.body == "hello" && + request.headers["Content-Type"] == "text/plain" && + request.headers["X-Amz-Checksum-Sha256"] == media.content_sha256_hash && + request.headers["X-Ms-Blob-Type"] == "BlockBlob" + end + .to_return(status: 200, body: "") + stub_request(:patch, "#{base_url}/api/public/media/#{media.media_id}") + .with(body: hash_including("uploadHttpStatus" => 200)) + .to_return(status: 200, body: { mediaId: media.media_id }.to_json, + headers: { "Content-Type" => "application/json" }) + + result = api_client.upload_media(media, trace_id: "trace-123", field: "input") + + expect(result).to eq(media.reference_string) + expect(a_request(:put, upload_url)).to have_been_made.once + end + + it "omits provider-specific upload headers for GCS presigned URLs" do + upload_url = "https://storage.googleapis.com/langfuse-media/#{media.media_id}" + stub_request(:post, "#{base_url}/api/public/media") + .to_return(status: 200, body: { mediaId: media.media_id, uploadUrl: upload_url }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:put, upload_url) + .with do |request| + request.headers["Content-Type"] == "text/plain" && + request.headers["X-Amz-Checksum-Sha256"].nil? && + request.headers["X-Ms-Blob-Type"].nil? + end + .to_return(status: 200, body: "") + stub_request(:patch, "#{base_url}/api/public/media/#{media.media_id}") + .to_return(status: 200, body: { mediaId: media.media_id }.to_json, + headers: { "Content-Type" => "application/json" }) + + api_client.upload_media(media, trace_id: "trace-123", field: "input") + + expect(a_request(:put, upload_url)).to have_been_made.once + end + end + + describe "read and admin APIs" do + it "checks API health" do + stub_request(:get, "#{base_url}/api/public/health") + .to_return(status: 200, body: { status: "OK" }.to_json, + headers: { "Content-Type" => "application/json" }) + + expect(api_client.health["status"]).to eq("OK") + end + + it "lists and fetches sessions" do + stub_request(:get, "#{base_url}/api/public/sessions") + .with(query: { page: "1", environment: %w[production staging] }) + .to_return(status: 200, body: { data: [{ id: "session-1" }] }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:get, "#{base_url}/api/public/sessions/session-1") + .to_return(status: 200, body: { id: "session-1" }.to_json, + headers: { "Content-Type" => "application/json" }) + + expect(api_client.list_sessions(page: 1, environment: %w[production staging])) + .to eq([{ "id" => "session-1" }]) + expect(api_client.get_session("session-1")["id"]).to eq("session-1") + end + + it "lists observations through the v2 endpoint" do + stub_request(:get, "#{base_url}/api/public/v2/observations") + .with(query: hash_including("traceId" => "trace-123", "fromStartTime" => "2026-01-01T00:00:00Z")) + .to_return(status: 200, body: { data: [{ id: "obs-1" }], meta: { nextCursor: nil } }.to_json, + headers: { "Content-Type" => "application/json" }) + + result = api_client.list_observations(trace_id: "trace-123", from_start_time: Time.utc(2026, 1, 1)) + + expect(result).to eq([{ "id" => "obs-1" }]) + end + + it "lists and fetches scores through the v2 endpoint" do + stub_request(:get, "#{base_url}/api/public/v2/scores") + .with(query: hash_including("traceId" => "trace-123", "dataType" => "NUMERIC")) + .to_return(status: 200, body: { data: [{ id: "score-1" }] }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:get, "#{base_url}/api/public/v2/scores/score-1") + .to_return(status: 200, body: { id: "score-1" }.to_json, + headers: { "Content-Type" => "application/json" }) + + expect(api_client.list_scores(trace_id: "trace-123", data_type: "NUMERIC")) + .to eq([{ "id" => "score-1" }]) + expect(api_client.get_score("score-1")["id"]).to eq("score-1") + end + + it "manages score configs" do + stub_request(:post, "#{base_url}/api/public/score-configs") + .with do |request| + body = JSON.parse(request.body) + body["dataType"] == "CATEGORICAL" && + body.dig("categories", 0, "value") == 5 && + body.dig("categories", 0, "label") == "pass" + end + .to_return(status: 200, body: { id: "config-1" }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:get, "#{base_url}/api/public/score-configs") + .to_return(status: 200, body: { data: [{ id: "config-1" }] }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:get, "#{base_url}/api/public/score-configs/config-1") + .to_return(status: 200, body: { id: "config-1" }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:patch, "#{base_url}/api/public/score-configs/config-1") + .with(body: hash_including("description" => "updated")) + .to_return(status: 200, body: { id: "config-1", description: "updated" }.to_json, + headers: { "Content-Type" => "application/json" }) + + expect( + api_client.create_score_config( + name: "quality", + data_type: "CATEGORICAL", + categories: [{ value: 5, label: "pass" }] + )["id"] + ) + .to eq("config-1") + expect(api_client.list_score_configs).to eq([{ "id" => "config-1" }]) + expect(api_client.get_score_config("config-1")["id"]).to eq("config-1") + expect(api_client.update_score_config(config_id: "config-1", description: "updated")["description"]) + .to eq("updated") + end + + it "manages models" do + stub_request(:post, "#{base_url}/api/public/models") + .with(body: hash_including("modelName" => "gpt-4o", "matchPattern" => "gpt-4o")) + .to_return(status: 200, body: { id: "model-1" }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:get, "#{base_url}/api/public/models") + .to_return(status: 200, body: { data: [{ id: "model-1" }] }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:get, "#{base_url}/api/public/models/model-1") + .to_return(status: 200, body: { id: "model-1" }.to_json, + headers: { "Content-Type" => "application/json" }) + stub_request(:delete, "#{base_url}/api/public/models/model-1") + .to_return(status: 204, body: "") + + expect(api_client.create_model(model_name: "gpt-4o", match_pattern: "gpt-4o")["id"]).to eq("model-1") + expect(api_client.list_models).to eq([{ "id" => "model-1" }]) + expect(api_client.get_model("model-1")["id"]).to eq("model-1") + expect(api_client.delete_model("model-1")).to be_nil + end + + it "queries metrics with a hash query encoded as JSON" do + metrics_query = { + view: "observations", + metrics: [{ measure: "count", aggregation: "count" }], + fromTimestamp: "2026-01-01T00:00:00Z", + toTimestamp: "2026-01-02T00:00:00Z" + } + stub_request(:get, "#{base_url}/api/public/v2/metrics") + .with(query: { query: JSON.generate(metrics_query) }) + .to_return(status: 200, body: { data: [] }.to_json, + headers: { "Content-Type" => "application/json" }) + + expect(api_client.query_metrics(query: metrics_query)).to eq({ "data" => [] }) + end + end + end end diff --git a/spec/langfuse/client_spec.rb b/spec/langfuse/client_spec.rb index 6903f0d..c10015a 100644 --- a/spec/langfuse/client_spec.rb +++ b/spec/langfuse/client_spec.rb @@ -2523,4 +2523,27 @@ def self.cache end end end + + describe "#resolve_media_references" do + let(:client) { described_class.new(valid_config) } + let(:base_url) { valid_config.base_url } + let(:reference_string) { "@@@langfuseMedia:type=text/plain|id=media-123|source=bytes@@@" } + + before do + stub_request(:get, "#{base_url}/api/public/media/media-123") + .to_return(status: 200, body: { + mediaId: "media-123", + contentType: "text/plain", + url: "https://media.langfuse.test/media-123" + }.to_json, headers: { "Content-Type" => "application/json" }) + stub_request(:get, "https://media.langfuse.test/media-123") + .to_return(status: 200, body: "hello") + end + + it "resolves references through the client flat API" do + result = client.resolve_media_references(obj: { "file" => reference_string }) + + expect(result).to eq({ "file" => "data:text/plain;base64,aGVsbG8=" }) + end + end end diff --git a/spec/langfuse/media_spec.rb b/spec/langfuse/media_spec.rb new file mode 100644 index 0000000..9d15805 --- /dev/null +++ b/spec/langfuse/media_spec.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +RSpec.describe Langfuse::Media do + let(:base_url) { "https://cloud.langfuse.com" } + let(:config) do + Langfuse::Config.new do |c| + c.public_key = "pk_test_123" + c.secret_key = "sk_test_456" + c.base_url = base_url + end + end + let(:client) { Langfuse::Client.new(config) } + + describe "#initialize" do + it "wraps raw bytes and builds deterministic media metadata" do + media = described_class.new(content_bytes: "hello", content_type: "text/plain") + + expect(media.source).to eq("bytes") + expect(media.content_length).to eq(5) + expect(media.media_id).to eq("LPJNul-wow4m6Dsqxbninh") + expect(media.reference_string).to eq( + "@@@langfuseMedia:type=text/plain|id=LPJNul-wow4m6Dsqxbninh|source=bytes@@@" + ) + expect(media.base64_data_uri).to eq("data:text/plain;base64,aGVsbG8=") + end + + it "parses base64 data URIs" do + media = described_class.new(base64_data_uri: "data:text/plain;base64,aGVsbG8=") + + expect(media.content_bytes).to eq("hello") + expect(media.content_type).to eq("text/plain") + expect(media.source).to eq("base64_data_uri") + end + + it "raises for invalid media input" do + expect { described_class.new(content_bytes: "hello") } + .to raise_error(ArgumentError, "media content and content_type are required") + end + end + + describe ".parse_reference_string" do + it "parses Langfuse media reference tokens" do + reference = described_class.parse_reference_string( + "@@@langfuseMedia:type=image/png|id=media-123|source=bytes@@@" + ) + + expect(reference.media_id).to eq("media-123") + expect(reference.content_type).to eq("image/png") + expect(reference.source).to eq("bytes") + end + + it "raises on malformed tokens" do + expect { described_class.parse_reference_string("@@@bad@@@") } + .to raise_error(ArgumentError, /does not start/) + end + end + + describe ".resolve_references" do + let(:reference_string) { "@@@langfuseMedia:type=text/plain|id=media-123|source=bytes@@@" } + let(:download_url) { "https://media.langfuse.test/media-123" } + + before do + stub_request(:get, "#{base_url}/api/public/media/media-123") + .to_return(status: 200, body: { + mediaId: "media-123", + contentType: "text/plain", + url: download_url + }.to_json, headers: { "Content-Type" => "application/json" }) + stub_request(:get, download_url) + .to_return(status: 200, body: "hello") + end + + it "replaces nested media references with base64 data URIs" do + obj = { "input" => ["before #{reference_string} after"] } + + result = described_class.resolve_references(obj, client: client) + + expect(result).to eq({ "input" => ["before data:text/plain;base64,aGVsbG8= after"] }) + expect(obj).to eq({ "input" => ["before #{reference_string} after"] }) + end + + it "leaves unresolved references intact" do + stub_request(:get, download_url).to_return(status: 500, body: "nope") + obj = { "input" => reference_string } + + result = described_class.resolve_references(obj, client: client) + + expect(result).to eq(obj) + end + end +end diff --git a/spec/langfuse/otel_setup_spec.rb b/spec/langfuse/otel_setup_spec.rb index 7f76798..603debd 100644 --- a/spec/langfuse/otel_setup_spec.rb +++ b/spec/langfuse/otel_setup_spec.rb @@ -160,6 +160,17 @@ end end + describe ".build_headers" do + it "includes auth and Langfuse SDK identity headers for OTLP export" do + headers = described_class.send(:build_headers, config.public_key, config.secret_key) + + expect(headers["Authorization"]).to start_with("Basic ") + expect(headers["x-langfuse-sdk-name"]).to eq("ruby") + expect(headers["x-langfuse-sdk-version"]).to eq(Langfuse::VERSION) + expect(headers["x-langfuse-public-key"]).to eq(config.public_key) + end + end + describe "lazy module-level setup" do it "does not initialize tracing during Langfuse.configure" do Langfuse.configure do |c|