From 3cd055ccbae60fed5cbc83f7700f3c27f7447c91 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Tue, 2 Jun 2026 17:18:59 -0700 Subject: [PATCH 1/2] feat(ocr): allow users to avoid bounds. --- jigsawstack/vision.py | 4 ++++ tests/test_audio.py | 32 ++++++++---------------------- tests/test_embedding.py | 8 ++++---- tests/test_file_store.py | 8 ++------ tests/test_object_detection.py | 8 ++++---- tests/test_prediction.py | 8 ++------ tests/test_translate.py | 8 ++------ tests/test_validate.py | 8 ++------ tests/test_vocr.py | 36 ++++++++++++---------------------- 9 files changed, 40 insertions(+), 80 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 2778cf3..0748a69 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -154,6 +154,10 @@ class VOCRParams(TypedDict): """ High fidelity word-level bounding boxes within complex documents. Default: false. """ + return_bounds: NotRequired[bool] + """ + Include line and word level bounding box coordinates. When false, the coordinates are omitted but the text and confidence are still returned. Default: true. + """ class Word(TypedDict): diff --git a/tests/test_audio.py b/tests/test_audio.py index 6a8861d..3063809 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -131,26 +131,20 @@ class TestAudioSync: """Test synchronous audio speech-to-text methods""" - @pytest.mark.parametrize( - "test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES] - ) + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) def test_speech_to_text(self, test_case): """Test synchronous speech-to-text with various inputs""" try: if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) # Verify response structure assert result["success"] - assert result.get("text", None) is not None and isinstance( - result["text"], str - ) + assert result.get("text", None) is not None and isinstance(result["text"], str) # Check for chunks if result.get("chunks", None): @@ -172,9 +166,7 @@ def test_speech_to_text_webhook(self, test_case): if test_case.get("blob"): # Download audio content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.audio.speech_to_text( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.audio.speech_to_text(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.audio.speech_to_text(test_case["params"]) @@ -189,9 +181,7 @@ def test_speech_to_text_webhook(self, test_case): class TestAudioAsync: """Test asynchronous audio speech-to-text methods""" - @pytest.mark.parametrize( - "test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES] - ) + @pytest.mark.parametrize("test_case", TEST_CASES, ids=[tc["name"] for tc in TEST_CASES]) @pytest.mark.asyncio async def test_speech_to_text_async(self, test_case): """Test asynchronous speech-to-text with various inputs""" @@ -208,9 +198,7 @@ async def test_speech_to_text_async(self, test_case): # Verify response structure assert result["success"] - assert result.get("text", None) is not None and isinstance( - result["text"], str - ) + assert result.get("text", None) is not None and isinstance(result["text"], str) # Check for chunks if result.get("chunks", None): @@ -220,9 +208,7 @@ async def test_speech_to_text_async(self, test_case): if result.get("speakers", None): assert isinstance(result["speakers"], list) except JigsawStackError as e: - pytest.fail( - f"Unexpected JigsawStackError in async {test_case['name']}: {e}" - ) + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") @pytest.mark.parametrize( "test_case", WEBHOOK_TEST_CASES, ids=[tc["name"] for tc in WEBHOOK_TEST_CASES] @@ -248,6 +234,4 @@ async def test_speech_to_text_webhook_async(self, test_case): except JigsawStackError as e: # Webhook URLs might fail if invalid - print( - f"Expected possible error for async webhook test {test_case['name']}: {e}" - ) + print(f"Expected possible error for async webhook test {test_case['name']}: {e}") diff --git a/tests/test_embedding.py b/tests/test_embedding.py index 52121e7..a0773a6 100644 --- a/tests/test_embedding.py +++ b/tests/test_embedding.py @@ -28,12 +28,12 @@ headers={"x-jigsaw-skip-cache": "true"}, ) -SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +SAMPLE_TEXT = ( + "The quick brown fox jumps over the lazy dog. This is a sample text for embedding generation." +) SAMPLE_IMAGE_URL = "https://images.unsplash.com/photo-1542931287-023b922fa89b?q=80&w=2574&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" SAMPLE_AUDIO_URL = "https://jigsawstack.com/preview/stt-example.wav" -SAMPLE_PDF_URL = ( - "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" -) +SAMPLE_PDF_URL = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" # Test cases for Embedding V2 EMBEDDING_V2_TEST_CASES = [ diff --git a/tests/test_file_store.py b/tests/test_file_store.py index b743623..16d5fb0 100644 --- a/tests/test_file_store.py +++ b/tests/test_file_store.py @@ -130,9 +130,7 @@ class TestFileStoreAsync: async def test_file_upload_async(self, test_case): """Test asynchronous file upload with various options""" try: - result = await async_jigsaw.store.upload( - test_case["file"], test_case["options"] - ) + result = await async_jigsaw.store.upload(test_case["file"], test_case["options"]) print(f"Async upload test {test_case['name']}: {result}") assert result.get("key") is not None @@ -147,9 +145,7 @@ async def test_file_upload_async(self, test_case): self.uploaded_keys.append(result["key"]) except JigsawStackError as e: - pytest.fail( - f"Unexpected JigsawStackError in async {test_case['name']}: {e}" - ) + pytest.fail(f"Unexpected JigsawStackError in async {test_case['name']}: {e}") @pytest.mark.asyncio async def test_file_get_async(self): diff --git a/tests/test_object_detection.py b/tests/test_object_detection.py index 4c846a6..a6f4844 100644 --- a/tests/test_object_detection.py +++ b/tests/test_object_detection.py @@ -30,7 +30,9 @@ headers={"x-jigsaw-skip-cache": "true"}, ) -IMAGE_URL = "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +IMAGE_URL = ( + "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg" +) TEST_CASES = [ { @@ -114,9 +116,7 @@ def test_object_detection(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = jigsaw.vision.object_detection( - blob_content, test_case.get("options", {}) - ) + result = jigsaw.vision.object_detection(blob_content, test_case.get("options", {})) else: # Use params directly result = jigsaw.vision.object_detection(test_case["params"]) diff --git a/tests/test_prediction.py b/tests/test_prediction.py index e8489da..46258e8 100644 --- a/tests/test_prediction.py +++ b/tests/test_prediction.py @@ -59,9 +59,7 @@ def generate_dates(start_date, num_days): { "name": "seasonal_pattern", "params": { - "dataset": [ - {"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21) - ], + "dataset": [{"date": dates[i], "value": 100 + (50 * (i % 7))} for i in range(21)], "steps": 7, }, }, @@ -75,9 +73,7 @@ def generate_dates(start_date, num_days): { "name": "large_dataset_prediction", "params": { - "dataset": [ - {"date": dates[i], "value": 1000 + (i * 20)} for i in range(30) - ], + "dataset": [{"date": dates[i], "value": 1000 + (i * 20)} for i in range(30)], "steps": 10, }, }, diff --git a/tests/test_translate.py b/tests/test_translate.py index abcbcc1..e7db1e6 100644 --- a/tests/test_translate.py +++ b/tests/test_translate.py @@ -94,9 +94,7 @@ def test_translate_text(self, test_case): # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) - assert len(result["translated_text"]) == len( - test_case["params"]["text"] - ) + assert len(result["translated_text"]) == len(test_case["params"]["text"]) else: assert isinstance(result["translated_text"], str) @@ -123,9 +121,7 @@ async def test_translate_text_async(self, test_case): # Check if the response structure matches the input if isinstance(test_case["params"]["text"], list): assert isinstance(result["translated_text"], list) - assert len(result["translated_text"]) == len( - test_case["params"]["text"] - ) + assert len(result["translated_text"]) == len(test_case["params"]["text"]) else: assert isinstance(result["translated_text"], str) diff --git a/tests/test_validate.py b/tests/test_validate.py index 376e6d2..1539680 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -29,9 +29,7 @@ ) # Sample URLs for NSFW testing -SAFE_IMAGE_URL = ( - "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" -) +SAFE_IMAGE_URL = "https://images.unsplash.com/photo-1506905925346-21bda4d32df4?q=80&w=2070" POTENTIALLY_NSFW_URL = "https://images.unsplash.com/photo-1512310604669-443f26c35f52?q=80&w=868&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" # Profanity Test Cases @@ -240,9 +238,7 @@ async def test_nsfw_check_blob_async(self, test_case): try: # Download blob content blob_content = requests.get(test_case["blob_url"]).content - result = await async_jigsaw.validate.nsfw( - blob_content, test_case["options"] - ) + result = await async_jigsaw.validate.nsfw(blob_content, test_case["options"]) assert result["success"] assert "nsfw" in result diff --git a/tests/test_vocr.py b/tests/test_vocr.py index d7193f2..a2395c2 100644 --- a/tests/test_vocr.py +++ b/tests/test_vocr.py @@ -157,9 +157,7 @@ def test_vocr(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize( - "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] - ) + @pytest.mark.parametrize("test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases]) def test_vocr_pdf(self, test_case): """Test synchronous VOCR with PDF inputs""" try: @@ -177,15 +175,13 @@ def test_vocr_pdf(self, test_case): assert "context" in result assert "total_pages" in result - if test_case.get("params", {}).get("page_range") or test_case.get( - "options", {} - ).get("page_range"): + if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get( + "page_range" + ): assert "page_range" in result assert isinstance(result["page_range"], list) - logger.info( - f"Test {test_case['name']}: total_pages={result.get('total_pages')}" - ) + logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") @@ -207,9 +203,7 @@ async def test_vocr_async(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.vision.vocr( - blob_content, test_case.get("options", {}) - ) + result = await async_jigsaw.vision.vocr(blob_content, test_case.get("options", {})) else: # Use params directly result = await async_jigsaw.vision.vocr(test_case["params"]) @@ -236,9 +230,7 @@ async def test_vocr_async(self, test_case): except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") - @pytest.mark.parametrize( - "test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases] - ) + @pytest.mark.parametrize("test_case", pdf_test_cases, ids=[tc["name"] for tc in pdf_test_cases]) @pytest.mark.asyncio async def test_vocr_pdf_async(self, test_case): """Test asynchronous VOCR with PDF inputs""" @@ -246,9 +238,7 @@ async def test_vocr_pdf_async(self, test_case): if test_case.get("blob"): # Download blob content blob_content = requests.get(test_case["blob"]).content - result = await async_jigsaw.vision.vocr( - blob_content, test_case.get("options", {}) - ) + result = await async_jigsaw.vision.vocr(blob_content, test_case.get("options", {})) else: # Use params directly result = await async_jigsaw.vision.vocr(test_case["params"]) @@ -262,15 +252,13 @@ async def test_vocr_pdf_async(self, test_case): assert "total_pages" in result # PDF specific # Check if page_range is in response when requested - if test_case.get("params", {}).get("page_range") or test_case.get( - "options", {} - ).get("page_range"): + if test_case.get("params", {}).get("page_range") or test_case.get("options", {}).get( + "page_range" + ): assert "page_range" in result assert isinstance(result["page_range"], list) - logger.info( - f"Test {test_case['name']}: total_pages={result.get('total_pages')}" - ) + logger.info(f"Test {test_case['name']}: total_pages={result.get('total_pages')}") except JigsawStackError as e: pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") From f6f370e7563324eb7d555f9583ee3aae5cd35b46 Mon Sep 17 00:00:00 2001 From: Khurdhula-Harshavardhan Date: Tue, 2 Jun 2026 17:32:09 -0700 Subject: [PATCH 2/2] fix(types): response for bounds is now optional under ocr --- jigsawstack/vision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jigsawstack/vision.py b/jigsawstack/vision.py index 0748a69..d23b548 100644 --- a/jigsawstack/vision.py +++ b/jigsawstack/vision.py @@ -162,13 +162,13 @@ class VOCRParams(TypedDict): class Word(TypedDict): text: str - bounds: BoundingBox + bounds: NotRequired[BoundingBox] # omitted when return_bounds is false confidence: float class Line(TypedDict): text: str - bounds: BoundingBox + bounds: NotRequired[BoundingBox] # omitted when return_bounds is false average_confidence: float words: List[Word]