diff --git a/src/vision/interfaces.ts b/src/vision/interfaces.ts index bf4b8a2..d5ae8fc 100644 --- a/src/vision/interfaces.ts +++ b/src/vision/interfaces.ts @@ -7,6 +7,8 @@ export type VOCRParams = { page_range?: Array; /** High fidelity word-level bounding boxes within complex documents. Default: false. */ fine_grained?: boolean; + /** Include line and word level bounding box coordinates. When false, the coordinates are omitted but the text and confidence are still returned. Default: true. */ + return_bounds?: boolean; }; export interface VOCRResponse extends BaseResponse { @@ -19,11 +21,11 @@ export interface VOCRResponse extends BaseResponse { text: string; lines: Array<{ text: string; - bounds: BoundingBox; + bounds?: BoundingBox; // omitted when return_bounds is false average_confidence: number; words: Array<{ text: string; - bounds: BoundingBox; + bounds?: BoundingBox; // omitted when return_bounds is false confidence: number; }>; }>; diff --git a/tests/vision.test.ts b/tests/vision.test.ts index d536757..abf860a 100644 --- a/tests/vision.test.ts +++ b/tests/vision.test.ts @@ -724,32 +724,6 @@ describe("Object Detection API", () => { }); }); - // Complex scenario tests - test("should work with comprehensive configuration", async () => { - const result = await client.vision.object_detection({ - url: TEST_URLS.image, - prompts: ["detect all objects", "find text elements"], - features: ["object", "gui"], - annotated_image: true, - return_type: "url", - }); - - expectSuccess(result); - expectType(result, "object"); - - if (result.objects !== undefined) { - expectArray(result.objects); - } - - if (result.gui_elements !== undefined) { - expectArray(result.gui_elements); - } - - if (result.annotated_image !== undefined) { - expectType(result.annotated_image, "string"); - } - }); - test("should work with file upload", async () => { const imageResponse = await fetch(TEST_URLS.image); const imageBlob = await imageResponse.blob();