diff --git a/src/infrastructure/bricks/bricks_api_adapter.py b/src/infrastructure/bricks/bricks_api_adapter.py index bf76efa..53ac529 100644 --- a/src/infrastructure/bricks/bricks_api_adapter.py +++ b/src/infrastructure/bricks/bricks_api_adapter.py @@ -2,7 +2,9 @@ import json import logging import os +import random import re +import time import urllib.error import urllib.parse import urllib.request @@ -16,6 +18,8 @@ logger = logging.getLogger(__name__) _DEFAULT_TIMEOUT = 30 +_MAX_RETRIES = 3 +_RETRY_BACKOFF = 2 class BricksApiAdapter(BricksApiPort): @@ -27,39 +31,67 @@ def __init__(self, config) -> None: async def close(self) -> None: pass + @staticmethod + def _is_retryable(exc: Exception) -> bool: + if isinstance(exc, urllib.error.HTTPError): + return exc.code >= 500 + if isinstance(exc, (urllib.error.URLError, ConnectionError, TimeoutError, OSError)): + return True + return False + def _get(self, url: str, headers: dict | None = None) -> tuple[bytes, dict]: - logger.debug("GET %s", url) - req = urllib.request.Request(url, headers=headers or {}) - try: - with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: - body = resp.read() - resp_headers = dict(resp.headers) - logger.debug("GET %s -> %d bytes (status=%s)", url, len(body), resp.status) - return body, resp_headers - except urllib.error.HTTPError as e: - error_body = e.read().decode("utf-8", errors="replace") if hasattr(e, "read") else "" - logger.error("GET %s -> HTTP %d: %s", url, e.code, error_body[:500]) - raise - except Exception as e: - logger.error("GET %s -> error: %s", url, e) - raise + logger.info("GET %s", url) + for attempt in range(1, _MAX_RETRIES + 1): + try: + req = urllib.request.Request(url, headers=headers or {}) + with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: + body = resp.read() + resp_headers = dict(resp.headers) + logger.info("GET %s -> %d bytes (status=%s)", url, len(body), resp.status) + return body, resp_headers + except urllib.error.HTTPError as e: + error_body = e.read().decode("utf-8", errors="replace") if hasattr(e, "read") else "" + if self._is_retryable(e) and attempt < _MAX_RETRIES: + logger.warning("GET %s -> HTTP %d (attempt %d/%d), retrying", url, e.code, attempt, _MAX_RETRIES) + time.sleep(_RETRY_BACKOFF ** attempt + random.uniform(0, 1)) + continue + logger.exception("GET %s -> HTTP %d: %s", url, e.code, error_body[:500]) + raise + except Exception as e: + if self._is_retryable(e) and attempt < _MAX_RETRIES: + logger.warning("GET %s -> error (attempt %d/%d): %s, retrying", url, attempt, _MAX_RETRIES, e) + time.sleep(_RETRY_BACKOFF ** attempt + random.uniform(0, 1)) + continue + logger.exception("GET %s -> error: %s", url, e) + raise + raise RuntimeError(f"GET {url} failed after {_MAX_RETRIES} retries") def _post(self, url: str, payload: dict, headers: dict) -> bytes: data = json.dumps(payload).encode("utf-8") - logger.debug("POST %s (body=%d bytes)", url, len(data)) - req = urllib.request.Request(url, data=data, headers=headers, method="POST") - try: - with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: - body = resp.read() - logger.debug("POST %s -> %d bytes (status=%s)", url, len(body), resp.status) - return body - except urllib.error.HTTPError as e: - error_body = e.read().decode("utf-8", errors="replace") if hasattr(e, "read") else "" - logger.error("POST %s -> HTTP %d: %s", url, e.code, error_body[:500]) - raise - except Exception as e: - logger.error("POST %s -> error: %s", url, e) - raise + logger.info("POST %s (body=%d bytes)", url, len(data)) + for attempt in range(1, _MAX_RETRIES + 1): + try: + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: + body = resp.read() + logger.info("POST %s -> %d bytes (status=%s)", url, len(body), resp.status) + return body + except urllib.error.HTTPError as e: + error_body = e.read().decode("utf-8", errors="replace") if hasattr(e, "read") else "" + if self._is_retryable(e) and attempt < _MAX_RETRIES: + logger.warning("POST %s -> HTTP %d (attempt %d/%d), retrying", url, e.code, attempt, _MAX_RETRIES) + time.sleep(_RETRY_BACKOFF ** attempt + random.uniform(0, 1)) + continue + logger.exception("POST %s -> HTTP %d: %s", url, e.code, error_body[:500]) + raise + except Exception as e: + if self._is_retryable(e) and attempt < _MAX_RETRIES: + logger.warning("POST %s -> error (attempt %d/%d): %s, retrying", url, attempt, _MAX_RETRIES, e) + time.sleep(_RETRY_BACKOFF ** attempt + random.uniform(0, 1)) + continue + logger.exception("POST %s -> error: %s", url, e) + raise + raise RuntimeError(f"POST {url} failed after {_MAX_RETRIES} retries") async def list_project_documents(self, project_id: str) -> list[BricksDocumentInfo]: url = f"{self._base_url}/api/projects/{project_id}/documents/ai" @@ -77,6 +109,8 @@ async def list_project_documents(self, project_id: str) -> list[BricksDocumentIn raise FileNotFoundError( f"Bricks project not found: {project_id}" ) from e + if e.code == 429: + logger.warning("Bricks API rate limited (HTTP 429) for project %s", project_id) raise RuntimeError(f"Bricks API error (HTTP {e.code})") from e except urllib.error.URLError as e: raise ConnectionError(f"Bricks API connection failed: {e.reason}") from e @@ -116,6 +150,8 @@ async def download_document( raise FileNotFoundError( f"Document {document_id} not found (project {project_id})" ) from e + if e.code == 429: + logger.warning("Bricks API rate limited (HTTP 429) for document %s download", document_id) raise RuntimeError( f"Failed to download document (HTTP {e.code})" ) from e @@ -148,6 +184,8 @@ async def publish_section_version(self, payload: dict) -> SectionVersionResult: raise PermissionError( f"Publish authentication failed (HTTP {e.code})" ) from e + if e.code == 429: + logger.warning("Bricks API rate limited (HTTP 429) for publish: project=%s section=%s", payload.get("projectUniqueId"), payload.get("sectionKey")) raise RuntimeError(f"Publish failed (HTTP {e.code})") from e except urllib.error.URLError as e: raise ConnectionError(f"Publish connection failed: {e.reason}") from e diff --git a/tests/unit/infrastructure/test_bricks_api_adapter.py b/tests/unit/infrastructure/test_bricks_api_adapter.py index 120aeeb..7403b21 100644 --- a/tests/unit/infrastructure/test_bricks_api_adapter.py +++ b/tests/unit/infrastructure/test_bricks_api_adapter.py @@ -6,13 +6,13 @@ """ import json -from io import BytesIO -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock, patch import pytest import urllib.error from domain.ports.bricks_api_port import BricksDocumentInfo, SectionVersionResult +from infrastructure.bricks.bricks_api_adapter import _MAX_RETRIES from infrastructure.bricks.bricks_api_adapter import ( BricksApiAdapter, _extract_filename, @@ -22,7 +22,6 @@ @pytest.fixture def bricks_config() -> MagicMock: - """Provide a mock BricksConfig.""" config = MagicMock() config.BRICKS_API_BASE_URL = "https://api.bricks.example.com" config.BRICKS_API_KEY = "test-api-key-12345" @@ -33,12 +32,10 @@ def bricks_config() -> MagicMock: @pytest.fixture def adapter(bricks_config: MagicMock) -> BricksApiAdapter: - """Provide a BricksApiAdapter with mock config.""" return BricksApiAdapter(config=bricks_config) def _mock_urlopen_response(body: bytes, headers: dict | None = None, status: int = 200) -> MagicMock: - """Create a mock object that behaves like urllib.urlopen response.""" mock_resp = MagicMock() mock_resp.read.return_value = body mock_resp.headers = headers or {} @@ -48,10 +45,8 @@ def _mock_urlopen_response(body: bytes, headers: dict | None = None, status: int class TestListProjectDocuments: - """Tests for BricksApiAdapter.list_project_documents.""" async def test_calls_api_with_bearer_token(self, adapter: BricksApiAdapter) -> None: - """Should call GET /api/projects/{id}/documents/ai with Bearer token.""" body = json.dumps({"items": []}).encode() mock_resp = _mock_urlopen_response(body) @@ -64,7 +59,6 @@ async def test_calls_api_with_bearer_token(self, adapter: BricksApiAdapter) -> N assert req.get_header("Authorization") == "Bearer test-bearer-token" async def test_returns_list_of_bricks_document_info(self, adapter: BricksApiAdapter) -> None: - """Should parse JSON response and return list of BricksDocumentInfo.""" api_response = { "items": [ { @@ -114,7 +108,6 @@ async def test_returns_list_of_bricks_document_info(self, adapter: BricksApiAdap assert result[1].is_ignored is True async def test_camel_case_to_snake_case_mapping(self, adapter: BricksApiAdapter) -> None: - """Should correctly map camelCase API fields to snake_case model fields.""" api_response = { "items": [ { @@ -147,7 +140,6 @@ async def test_camel_case_to_snake_case_mapping(self, adapter: BricksApiAdapter) assert doc.category_classified_at == "2025-12-05T08:00:00.000Z" async def test_returns_empty_list_when_no_documents(self, adapter: BricksApiAdapter) -> None: - """Should return empty list when API returns empty items.""" body = json.dumps({"items": []}).encode() mock_resp = _mock_urlopen_response(body) @@ -157,7 +149,6 @@ async def test_returns_empty_list_when_no_documents(self, adapter: BricksApiAdap assert result == [] async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> None: - """Should raise PermissionError on 401 Unauthorized.""" with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: mock_urlopen.side_effect = urllib.error.HTTPError( url="https://api.bricks.example.com/api/projects/proj-123/documents/ai", @@ -171,7 +162,6 @@ async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> No await adapter.list_project_documents(project_id="proj-123") async def test_raises_on_403_forbidden(self, adapter: BricksApiAdapter) -> None: - """Should raise PermissionError on 403 Forbidden.""" with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: mock_urlopen.side_effect = urllib.error.HTTPError( url="https://api.bricks.example.com/api/projects/proj-123/documents/ai", @@ -185,7 +175,6 @@ async def test_raises_on_403_forbidden(self, adapter: BricksApiAdapter) -> None: await adapter.list_project_documents(project_id="proj-123") async def test_raises_on_404_not_found(self, adapter: BricksApiAdapter) -> None: - """Should raise FileNotFoundError on 404 Not Found.""" with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: mock_urlopen.side_effect = urllib.error.HTTPError( url="https://api.bricks.example.com/api/projects/nonexistent/documents/ai", @@ -199,27 +188,37 @@ async def test_raises_on_404_not_found(self, adapter: BricksApiAdapter) -> None: await adapter.list_project_documents(project_id="nonexistent") async def test_raises_on_connection_error(self, adapter: BricksApiAdapter) -> None: - """Should raise ConnectionError on URLError.""" - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = urllib.error.URLError(reason="Connection refused") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [urllib.error.URLError(reason="Connection refused")] * _MAX_RETRIES with pytest.raises(ConnectionError): await adapter.list_project_documents(project_id="proj-123") async def test_raises_on_timeout(self, adapter: BricksApiAdapter) -> None: - """Should raise TimeoutError on socket timeout.""" - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = TimeoutError("Request timed out") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [TimeoutError("Request timed out")] * _MAX_RETRIES with pytest.raises(TimeoutError): await adapter.list_project_documents(project_id="proj-123") + async def test_does_not_retry_non_retryable_exception(self, adapter: BricksApiAdapter) -> None: + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = ValueError("bad value") + + with pytest.raises(ValueError): + await adapter.list_project_documents(project_id="proj-123") + assert mock_urlopen.call_count == 1 + class TestDownloadDocument: - """Tests for BricksApiAdapter.download_document.""" async def test_resolves_document_id_and_downloads_presigned_url(self, adapter: BricksApiAdapter) -> None: - """Should resolve document_id via list_project_documents and download pre-signed URL.""" presigned_url = "https://s3.example.com/projects/proj-1/doc.pdf?X-Amz-Signature=abc123" doc_list_body = json.dumps({ "items": [{ @@ -247,7 +246,6 @@ async def test_resolves_document_id_and_downloads_presigned_url(self, adapter: B assert mime_type == "application/pdf" async def test_extracts_filename_from_url_when_no_content_disposition(self, adapter: BricksApiAdapter) -> None: - """Should extract filename from URL path when no Content-Disposition header.""" presigned_url = "https://s3.example.com/projects/proj-1/Dossier_Financement.pdf?X-Amz-Signature=abc" doc_list_body = json.dumps({ "items": [{ @@ -268,7 +266,6 @@ async def test_extracts_filename_from_url_when_no_content_disposition(self, adap assert filename == "Dossier_Financement.pdf" async def test_defaults_to_document_bin_when_no_filename(self, adapter: BricksApiAdapter) -> None: - """Should return 'document.bin' when no filename in URL or header.""" doc_list_body = json.dumps({ "items": [{ "id": "doc-3", @@ -288,7 +285,6 @@ async def test_defaults_to_document_bin_when_no_filename(self, adapter: BricksAp assert filename == "document.bin" async def test_raises_when_document_id_not_found(self, adapter: BricksApiAdapter) -> None: - """Should raise FileNotFoundError when document_id not found in project.""" doc_list_body = json.dumps({"items": []}).encode() list_resp = _mock_urlopen_response(doc_list_body) @@ -297,50 +293,53 @@ async def test_raises_when_document_id_not_found(self, adapter: BricksApiAdapter await adapter.download_document(document_id="missing-id", project_id="proj-1") async def test_raises_permission_error_on_403(self, adapter: BricksApiAdapter) -> None: - """Should raise PermissionError on 403 Forbidden when downloading.""" presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() list_resp = _mock_urlopen_response(doc_list_body) - - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = [list_resp, urllib.error.HTTPError( - url="https://s3.example.com/doc.pdf", - code=403, - msg="Forbidden", - hdrs={}, - fp=None, - )] + http_err_403 = urllib.error.HTTPError( + url="https://s3.example.com/doc.pdf", + code=403, + msg="Forbidden", + hdrs={}, + fp=None, + ) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [list_resp, http_err_403] with pytest.raises(PermissionError): await adapter.download_document(document_id="doc-1", project_id="proj-1") async def test_raises_connection_error(self, adapter: BricksApiAdapter) -> None: - """Should raise ConnectionError on URLError.""" presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() list_resp = _mock_urlopen_response(doc_list_body) - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = [list_resp, urllib.error.URLError(reason="Connection refused")] + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [list_resp] + [urllib.error.URLError(reason="Connection refused")] * _MAX_RETRIES with pytest.raises(ConnectionError): await adapter.download_document(document_id="doc-1", project_id="proj-1") async def test_raises_timeout(self, adapter: BricksApiAdapter) -> None: - """Should raise TimeoutError on download timeout.""" presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() list_resp = _mock_urlopen_response(doc_list_body) - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = [list_resp, TimeoutError("Download timed out")] + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [list_resp] + [TimeoutError("Download timed out")] * _MAX_RETRIES with pytest.raises(TimeoutError): await adapter.download_document(document_id="doc-1", project_id="proj-1") class TestExtractFilename: - """Tests for _extract_filename helper.""" def test_extracts_from_content_disposition_quoted(self) -> None: assert _extract_filename('attachment; filename="report.pdf"') == "report.pdf" @@ -362,7 +361,6 @@ def test_url_without_extension_falls_back(self) -> None: class TestNormalizeExtension: - """Tests for _normalize_extension helper.""" def test_normal_dot_pdf(self) -> None: assert _normalize_extension("report.pdf") == "report.pdf" @@ -384,10 +382,8 @@ def test_normal_dot_docx(self) -> None: class TestPublishSectionVersion: - """Tests for BricksApiAdapter.publish_section_version.""" async def test_calls_post_with_x_api_key_header(self, adapter: BricksApiAdapter) -> None: - """Should call POST /api/section-versions with X-API-Key header.""" api_response = {"id": "sv-1", "sectionKey": "intro"} body = json.dumps(api_response).encode() mock_resp = _mock_urlopen_response(body) @@ -411,7 +407,6 @@ async def test_calls_post_with_x_api_key_header(self, adapter: BricksApiAdapter) assert "api/section-versions" in req.full_url async def test_returns_section_version_result(self, adapter: BricksApiAdapter) -> None: - """Should return SectionVersionResult on successful publish.""" api_response = {"id": "sv-uuid", "sectionKey": "summary"} body = json.dumps(api_response).encode() mock_resp = _mock_urlopen_response(body) @@ -425,7 +420,6 @@ async def test_returns_section_version_result(self, adapter: BricksApiAdapter) - assert result.success is True async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> None: - """Should raise PermissionError on 401 when publishing.""" with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: mock_urlopen.side_effect = urllib.error.HTTPError( url="https://api.bricks.example.com/api/section-versions", @@ -439,17 +433,32 @@ async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> No await adapter.publish_section_version(payload={"section_key": "intro"}) async def test_raises_on_connection_error(self, adapter: BricksApiAdapter) -> None: - """Should raise ConnectionError on URLError when publishing.""" - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = urllib.error.URLError(reason="Connection refused") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [urllib.error.URLError(reason="Connection refused")] * _MAX_RETRIES with pytest.raises(ConnectionError): await adapter.publish_section_version(payload={"section_key": "intro"}) async def test_raises_on_timeout(self, adapter: BricksApiAdapter) -> None: - """Should raise TimeoutError on publish timeout.""" - with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: - mock_urlopen.side_effect = TimeoutError("Publish timed out") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen, \ + patch("infrastructure.bricks.bricks_api_adapter.time.sleep"), \ + patch("infrastructure.bricks.bricks_api_adapter.random.uniform", return_value=0.5): + mock_urlopen.side_effect = [TimeoutError("Publish timed out")] * _MAX_RETRIES with pytest.raises(TimeoutError): + await adapter.publish_section_version(payload={"section_key": "intro"}) + + async def test_logs_warning_on_429_rate_limit(self, adapter: BricksApiAdapter) -> None: + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/section-versions", + code=429, + msg="Too Many Requests", + hdrs={}, + fp=None, + ) + + with pytest.raises(RuntimeError, match="HTTP 429"): await adapter.publish_section_version(payload={"section_key": "intro"}) \ No newline at end of file