From d6a6c846447414dde5488804ffc4c588224d2b61 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Sun, 17 May 2026 08:25:12 +0200 Subject: [PATCH 1/4] feat: add RAGAnythingBricks MCP server with list, read, and publish tools BRIC-19: Add /bricks/mcp endpoint with 3 MCP tools: - list_bricks_documents: list project documents via Bricks API - read_bricks_document: download & extract text via presigned S3 URL + Kreuzberg - publish_section_version: publish structured section with dry-run mode (default on) --- .env.example | 7 + README.md | 178 +++++--- src/application/api/mcp_bricks_tools.py | 97 +++++ .../list_bricks_documents_use_case.py | 9 + .../publish_section_version_use_case.py | 44 ++ .../read_bricks_document_use_case.py | 36 ++ src/config.py | 10 + src/dependencies.py | 33 ++ src/domain/ports/bricks_api_port.py | 54 +++ .../bricks/bricks_api_adapter.py | 102 +++++ src/main.py | 17 +- tests/conftest.py | 1 + tests/fixtures/external.py | 23 + .../infrastructure/test_bricks_api_adapter.py | 405 ++++++++++++++++++ tests/unit/test_bricks_api_port.py | 226 ++++++++++ .../test_list_bricks_documents_use_case.py | 97 +++++ tests/unit/test_mcp_bricks_tools.py | 324 ++++++++++++++ .../test_publish_section_version_use_case.py | 226 ++++++++++ .../test_read_bricks_document_use_case.py | 237 ++++++++++ 19 files changed, 2072 insertions(+), 54 deletions(-) create mode 100644 src/application/api/mcp_bricks_tools.py create mode 100644 src/application/use_cases/list_bricks_documents_use_case.py create mode 100644 src/application/use_cases/publish_section_version_use_case.py create mode 100644 src/application/use_cases/read_bricks_document_use_case.py create mode 100644 src/domain/ports/bricks_api_port.py create mode 100644 src/infrastructure/bricks/bricks_api_adapter.py create mode 100644 tests/unit/infrastructure/test_bricks_api_adapter.py create mode 100644 tests/unit/test_bricks_api_port.py create mode 100644 tests/unit/test_list_bricks_documents_use_case.py create mode 100644 tests/unit/test_mcp_bricks_tools.py create mode 100644 tests/unit/test_publish_section_version_use_case.py create mode 100644 tests/unit/test_read_bricks_document_use_case.py diff --git a/.env.example b/.env.example index 333035c..8723e1c 100644 --- a/.env.example +++ b/.env.example @@ -57,3 +57,10 @@ MINIO_ACCESS=minioadmin MINIO_SECRET=minioadmin MINIO_BUCKET=raganything MINIO_SECURE=false + +# Bricks API Configuration +BRICKS_API_BASE_URL=https://analyse.bricks.co +BRICKS_API_KEY= +BRICKS_BEARER_TOKEN= +BRICKS_PUBLISH_TARGET_URL=https://project-analysis-preprod.up.railway.app/api/section-versions +BRICKS_PUBLISH_DRY_RUN=true diff --git a/README.md b/README.md index 141ec5f..1a07847 100644 --- a/README.md +++ b/README.md @@ -14,57 +14,61 @@ Multi-modal RAG service exposing a REST API and MCP server for document indexing | +---------------+---------------+ | | - Application Layer MCP Servers (FastMCP) - +------------------------------+ | - | api/ | +---+--------+ +--+-----------+ +--+-------------+ - | indexing_routes.py | | RAGAnything | | RAGAnything | | RAGAnything | - | query_routes.py | | Query | | Files | | Classical | - | file_routes.py | | /rag/mcp | | /files/mcp | | /classical/mcp| - | health_routes.py | +---+--------+ +--+-----------+ +--+-------------+ - | classical_indexing_routes | | | | - | classical_query_routes | | | classical_index_file - | use_cases/ | | | classical_index_folder - | IndexFileUseCase | | | classical_query - | IndexFolderUseCase | - | QueryUseCase | - | ClassicalIndexFileUseCase | - | ClassicalIndexFolderUseCase | - | ClassicalQueryUseCase | - | ListFilesUseCase | - | ListFoldersUseCase | - | ReadFileUseCase | - | requests/ responses/ | - +------------------------------+ - | | | - v v v - Domain Layer (ports) - +----------------------------------------------------------+ - | RAGEnginePort StoragePort BM25EnginePort | - | DocumentReaderPort VectorStorePort LLMPort | - +----------------------------------------------------------+ - | | | | | - v v v v v - Infrastructure Layer (adapters) - +----------------------------------------------------------+ - | LightRAGAdapter MinioAdapter | - | (RAGAnything/ (minio-py) | - | KreuzbergParser) | - | | - | PostgresBM25Adapter RRFCombiner | - | (pg_textsearch) (hybrid+ fusion) | - | | - | KreuzbergAdapter LangchainPgvectorAdapter | - | (kreuzberg - 91 formats) (langchain-postgres PGVector) | - | | - | LangchainOpenAIAdapter | - | (langchain-openai ChatOpenAI) | - +----------------------------------------------------------+ - | | | | | - v v v v v - PostgreSQL MinIO Kreuzberg OpenAI-compatible - (pgvector + (object (document (LLM API) - Apache AGE storage) extraction) - pg_textsearch) + Application Layer MCP Servers (FastMCP) + +------------------------------+ | + | api/ | +---+--------+ +--+-----------+ +--+-------------+ +--+----------+ + | indexing_routes.py | | RAGAnything | | RAGAnything | | RAGAnything | | RAGAnything | + | query_routes.py | | Query | | Files | | Classical | | Bricks | + | file_routes.py | | /rag/mcp | | /files/mcp | | /classical/mcp| | /bricks/mcp| + | health_routes.py | +---+--------+ +--+-----------+ +--+-------------+ +--+----------+ + | classical_indexing_routes | | | | | + | classical_query_routes | | | classical_index_file list_bricks_documents + | use_cases/ | | | classical_index_folder read_bricks_document + | IndexFileUseCase | | | classical_query publish_section_version + | IndexFolderUseCase | + | QueryUseCase | + | ClassicalIndexFileUseCase | + | ClassicalIndexFolderUseCase | + | ClassicalQueryUseCase | + | ListFilesUseCase | + | ListFoldersUseCase | + | ReadFileUseCase | + | ListBricksDocumentsUseCase | + | ReadBricksDocumentUseCase | + | PublishSectionVersionUseCase| + | requests/ responses/ | + +------------------------------+ + | | | + v v v + Domain Layer (ports) + +----------------------------------------------------------+ + | RAGEnginePort StoragePort BM25EnginePort | + | DocumentReaderPort VectorStorePort LLMPort | + | BricksApiPort | + +----------------------------------------------------------+ + | | | | | + v v v v v + Infrastructure Layer (adapters) + +----------------------------------------------------------+ + | LightRAGAdapter MinioAdapter | + | (RAGAnything/ (minio-py) | + | KreuzbergParser) | + | | + | PostgresBM25Adapter RRFCombiner | + | (pg_textsearch) (hybrid+ fusion) | + | | + | KreuzbergAdapter LangchainPgvectorAdapter | + | (kreuzberg - 91 formats) (langchain-postgres PGVector) | + | | + | LangchainOpenAIAdapter BricksApiAdapter | + | (langchain-openai ChatOpenAI) (httpx, Bricks REST API) | + +----------------------------------------------------------+ + | | | | | + v v v v v + PostgreSQL MinIO Kreuzberg OpenAI-compatible Bricks API + (pgvector + (object (document (LLM API) (analyse.bricks.co + Apache AGE storage) extraction) + section-versions) + pg_textsearch) ``` ## Prerequisites @@ -584,7 +588,7 @@ If BM25 is unavailable (`BM25_ENABLED=false` or pg_textsearch extension missing) ## MCP Servers -The service exposes **three MCP servers**, all using streamable HTTP transport: +The service exposes **four MCP servers**, all using streamable HTTP transport: ### RAGAnythingQuery — `/rag/mcp` @@ -628,6 +632,58 @@ File browsing tools for listing and reading files from MinIO storage. Downloads the file from MinIO, extracts its text content using Kreuzberg, and returns the extracted text along with metadata and any detected tables. +### RAGAnythingBricks — `/bricks/mcp` + +Bricks integration tools for accessing project documents from the Bricks platform and publishing structured section versions. + +#### Tool: `list_bricks_documents` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `project_unique_id` | string | required | Bricks project unique identifier | + +Returns a list of documents for the specified Bricks project, including metadata like file name, MIME type, size, status, and presigned download URLs. + +#### Tool: `read_bricks_document` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `file_url` | string | required | Presigned S3 URL from `list_bricks_documents` | + +Downloads the document from the presigned S3 URL, extracts its text content using Kreuzberg, and returns the extracted text, metadata, and detected tables. No authentication is required — the URL is already signed. + +#### Tool: `publish_section_version` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `project_unique_id` | string | required | Bricks project unique identifier | +| `section_key` | string | required | Section key to publish (e.g. `"summary"`, `"analysis"`) | +| `content` | dict | required | Structured content for the section | +| `workflow_id` | string | `"agent-haiku-files-v1"` | Workflow identifier | +| `workflow_name` | string | `"haiku-files"` | Workflow display name | +| `workflow_metadata` | dict | `null` | Additional workflow metadata | + +Publishes a structured section version back to the Bricks platform. When `BRICKS_PUBLISH_DRY_RUN=true` (default), the tool returns a preview of the payload without making an API call. Set `BRICKS_PUBLISH_DRY_RUN=false` to enable real publishing. + +**Dry-run response example:** + +```json +{ + "success": true, + "message": "DRY RUN — no API call made", + "dry_run": true, + "payload_preview": { + "project_unique_id": "abc-123", + "section_key": "summary", + "content": {"title": "Analysis Summary"}, + "workflow_id": "agent-haiku-files-v1", + "workflow_name": "haiku-files", + "workflow_metadata": {} + }, + "target_url": "https://project-analysis-preprod.up.railway.app/api/section-versions" +} +``` + ### RAGAnythingClassical — `/classical/mcp` Classical RAG tools for indexing and querying without a knowledge graph. @@ -670,6 +726,7 @@ All MCP servers use **streamable HTTP** transport exclusively. Connect MCP clien http://localhost:8000/rag/mcp # RAGAnythingQuery http://localhost:8000/files/mcp # RAGAnythingFiles http://localhost:8000/classical/mcp # RAGAnythingClassical +http://localhost:8000/bricks/mcp # RAGAnythingBricks ``` ## Configuration @@ -756,6 +813,16 @@ The classical RAG adapters share the same `OPEN_ROUTER_API_KEY`, `OPEN_ROUTER_AP | `MINIO_BUCKET` | `raganything` | Default bucket name | | `MINIO_SECURE` | `false` | Use HTTPS for MinIO | +### Bricks API (`BricksConfig`) + +| Variable | Default | Description | +|----------|---------|-------------| +| `BRICKS_API_BASE_URL` | `https://analyse.bricks.co` | Bricks platform base URL | +| `BRICKS_API_KEY` | -- | X-API-Key for Bricks API authentication | +| `BRICKS_BEARER_TOKEN` | -- | Bearer token for Bricks API (document listing) | +| `BRICKS_PUBLISH_TARGET_URL` | -- | Target URL for publishing section versions | +| `BRICKS_PUBLISH_DRY_RUN` | `true` | When `true`, `publish_section_version` returns a payload preview without making an API call | + ## Query Modes | Mode | Description | @@ -821,6 +888,7 @@ src/ document_reader_port.py -- DocumentReaderPort (abstract) + DocumentContent vector_store_port.py -- VectorStorePort (abstract) + SearchResult llm_port.py -- LLMPort (abstract) + bricks_api_port.py -- BricksApiPort (abstract) + BricksDocumentInfo + SectionVersionResult application/ api/ health_routes.py -- GET /health @@ -832,6 +900,7 @@ src/ mcp_query_tools.py -- MCP tools: query_knowledge_base, query_knowledge_base_multimodal mcp_file_tools.py -- MCP tools: list_files, read_file mcp_classical_tools.py -- MCP tools: classical_index_file, classical_index_folder, classical_query + mcp_bricks_tools.py -- MCP tools: list_bricks_documents, read_bricks_document, publish_section_version requests/ indexing_request.py -- IndexFileRequest, IndexFolderRequest classical_indexing_request.py -- ClassicalIndexFileRequest, ClassicalIndexFolderRequest @@ -854,6 +923,9 @@ src/ list_folders_use_case.py -- Lists folder prefixes from MinIO read_file_use_case.py -- Reads file from MinIO, extracts content via Kreuzberg upload_file_use_case.py -- Uploads file to MinIO storage + list_bricks_documents_use_case.py -- Lists documents from Bricks API + read_bricks_document_use_case.py -- Downloads Bricks document via presigned URL, extracts via Kreuzberg + publish_section_version_use_case.py -- Publishes section version (dry-run aware) infrastructure/ rag/ lightrag_adapter.py -- LightRAGAdapter (RAGAnything/LightRAG) @@ -871,6 +943,8 @@ src/ langchain_pgvector_adapter.py -- LangchainPgvectorAdapter (langchain-postgres PGVectorStore) llm/ langchain_openai_adapter.py -- LangchainOpenAIAdapter (langchain-openai ChatOpenAI) + bricks/ + bricks_api_adapter.py -- BricksApiAdapter (httpx, Bricks REST API + section-versions) alembic/ env.py -- Alembic migration environment (async) versions/ diff --git a/src/application/api/mcp_bricks_tools.py b/src/application/api/mcp_bricks_tools.py new file mode 100644 index 0000000..c1cb7d7 --- /dev/null +++ b/src/application/api/mcp_bricks_tools.py @@ -0,0 +1,97 @@ +import logging + +from fastmcp import FastMCP +from fastmcp.exceptions import ToolError + +from application.responses.file_response import FileContentResponse +from dependencies import ( + get_list_bricks_documents_use_case, + get_publish_section_version_use_case, + get_read_bricks_document_use_case, +) + +logger = logging.getLogger(__name__) + +mcp_bricks = FastMCP("RAGAnythingBricks") + + +@mcp_bricks.tool() +async def list_bricks_documents(project_unique_id: str) -> list: + """Liste les documents d'un projet Bricks. + + Args: + project_unique_id: L'identifiant unique du projet Bricks + + Returns: + Liste des documents du projet avec métadonnées + """ + use_case = get_list_bricks_documents_use_case() + try: + return await use_case.execute(project_id=project_unique_id) + except Exception: + logger.exception( + "Failed to list bricks documents for project %s", project_unique_id + ) + raise ToolError("Failed to list bricks documents") from None + + +@mcp_bricks.tool() +async def read_bricks_document(file_url: str) -> FileContentResponse: + """Télécharge un document Bricks et extrait son contenu textuel. + + Args: + file_url: L'URL de téléchargement du document Bricks + + Returns: + Contenu extrait avec métadonnées et tables détectées + """ + use_case = get_read_bricks_document_use_case() + try: + result = await use_case.execute(file_url=file_url) + except Exception: + logger.exception("Failed to read bricks document: %s", file_url) + raise ToolError("Failed to read bricks document") from None + return FileContentResponse( + content=result.content, + metadata=result.metadata, + tables=result.tables, + ) + + +@mcp_bricks.tool() +async def publish_section_version( + project_unique_id: str, + section_key: str, + content: dict, + workflow_id: str = "agent-haiku-files-v1", + workflow_name: str = "haiku-files", + workflow_metadata: dict | None = None, +) -> dict: + """Publie la réponse structurée d'une section d'un projet Bricks. + + Args: + project_unique_id: L'identifiant unique du projet + section_key: La clé de la section à publier + content: Le contenu structuré de la section + workflow_id: L'identifiant du workflow + workflow_name: Le nom du workflow + workflow_metadata: Métadonnées additionnelles du workflow + + Returns: + Résultat de la publication avec statut et aperçu du payload + """ + use_case = get_publish_section_version_use_case() + try: + return await use_case.execute( + project_unique_id=project_unique_id, + section_key=section_key, + content=content, + workflow_id=workflow_id, + workflow_name=workflow_name, + workflow_metadata=workflow_metadata, + ) + except Exception: + logger.exception( + "Failed to publish section version for project %s", project_unique_id + ) + raise ToolError("Failed to publish section version") from None diff --git a/src/application/use_cases/list_bricks_documents_use_case.py b/src/application/use_cases/list_bricks_documents_use_case.py new file mode 100644 index 0000000..eb0f595 --- /dev/null +++ b/src/application/use_cases/list_bricks_documents_use_case.py @@ -0,0 +1,9 @@ +from domain.ports.bricks_api_port import BricksApiPort, BricksDocumentInfo + + +class ListBricksDocumentsUseCase: + def __init__(self, bricks_api: BricksApiPort) -> None: + self.bricks_api = bricks_api + + async def execute(self, project_id: str) -> list[BricksDocumentInfo]: + return await self.bricks_api.list_project_documents(project_id=project_id) diff --git a/src/application/use_cases/publish_section_version_use_case.py b/src/application/use_cases/publish_section_version_use_case.py new file mode 100644 index 0000000..771c452 --- /dev/null +++ b/src/application/use_cases/publish_section_version_use_case.py @@ -0,0 +1,44 @@ +from domain.ports.bricks_api_port import BricksApiPort, SectionVersionResult + + +class PublishSectionVersionUseCase: + def __init__( + self, + bricks_api: BricksApiPort, + dry_run: bool, + target_url: str, + ) -> None: + self.bricks_api = bricks_api + self.dry_run = dry_run + self.target_url = target_url + + async def execute( + self, + project_unique_id: str, + section_key: str, + content: dict, + workflow_id: str, + workflow_name: str, + workflow_metadata: dict | None = None, + ) -> SectionVersionResult: + payload = { + "project_unique_id": project_unique_id, + "section_key": section_key, + "content": content, + "workflow_id": workflow_id, + "workflow_name": workflow_name, + "workflow_metadata": workflow_metadata + if workflow_metadata is not None + else {}, + } + + if self.dry_run: + return SectionVersionResult( + success=True, + message="DRY RUN — no API call made", + dry_run=True, + payload_preview=payload, + target_url=self.target_url, + ) + + return await self.bricks_api.publish_section_version(payload=payload) diff --git a/src/application/use_cases/read_bricks_document_use_case.py b/src/application/use_cases/read_bricks_document_use_case.py new file mode 100644 index 0000000..895d04e --- /dev/null +++ b/src/application/use_cases/read_bricks_document_use_case.py @@ -0,0 +1,36 @@ +import contextlib +import os +import tempfile + +import aiofiles + +from domain.ports.bricks_api_port import BricksApiPort +from domain.ports.document_reader_port import DocumentContent, DocumentReaderPort + + +class ReadBricksDocumentUseCase: + def __init__( + self, + bricks_api: BricksApiPort, + document_reader: DocumentReaderPort, + output_dir: str, + ) -> None: + self.bricks_api = bricks_api + self.document_reader = document_reader + self.output_dir = output_dir + + async def execute(self, file_url: str) -> DocumentContent: + data, filename = await self.bricks_api.download_document(download_url=file_url) + + os.makedirs(self.output_dir, exist_ok=True) + suffix = os.path.splitext(filename)[1] or ".bin" + fd, tmp_path = tempfile.mkstemp(suffix=suffix, dir=self.output_dir) + try: + os.close(fd) + async with aiofiles.open(tmp_path, "wb") as f: + await f.write(data) + + return await self.document_reader.extract_content(tmp_path) + finally: + with contextlib.suppress(OSError): + os.unlink(tmp_path) diff --git a/src/config.py b/src/config.py index 0629493..c203116 100644 --- a/src/config.py +++ b/src/config.py @@ -157,6 +157,16 @@ class ClassicalRAGConfig(BaseSettings): ) +class BricksConfig(BaseSettings): + BRICKS_API_BASE_URL: str = Field(default="https://analyse.bricks.co") + BRICKS_API_KEY: str = Field(default="") + BRICKS_BEARER_TOKEN: str = Field(default="") + BRICKS_PUBLISH_TARGET_URL: str = Field( + default="https://project-analysis-preprod.up.railway.app/api/section-versions" + ) + BRICKS_PUBLISH_DRY_RUN: bool = Field(default=True) + + class MinioConfig(BaseSettings): """MinIO object storage configuration.""" diff --git a/src/dependencies.py b/src/dependencies.py index 07d41a8..70846ec 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -11,16 +11,26 @@ from application.use_cases.classical_query_use_case import ClassicalQueryUseCase from application.use_cases.index_file_use_case import IndexFileUseCase from application.use_cases.index_folder_use_case import IndexFolderUseCase +from application.use_cases.list_bricks_documents_use_case import ( + ListBricksDocumentsUseCase, +) from application.use_cases.list_files_use_case import ListFilesUseCase from application.use_cases.list_folders_use_case import ListFoldersUseCase from application.use_cases.liveness_check_use_case import LivenessCheckUseCase from application.use_cases.multimodal_query_use_case import MultimodalQueryUseCase +from application.use_cases.publish_section_version_use_case import ( + PublishSectionVersionUseCase, +) from application.use_cases.query_use_case import QueryUseCase +from application.use_cases.read_bricks_document_use_case import ( + ReadBricksDocumentUseCase, +) from application.use_cases.read_file_use_case import ReadFileUseCase from application.use_cases.upload_file_use_case import UploadFileUseCase from config import ( AppConfig, BM25Config, + BricksConfig, ClassicalRAGConfig, DatabaseConfig, LLMConfig, @@ -30,6 +40,7 @@ from domain.ports.bm25_engine import BM25EnginePort from domain.ports.llm_port import LLMPort from domain.ports.vector_store_port import VectorStorePort +from infrastructure.bricks.bricks_api_adapter import BricksApiAdapter from infrastructure.database.asyncpg_health_adapter import AsyncpgHealthAdapter from infrastructure.document_reader.kreuzberg_adapter import KreuzbergAdapter from infrastructure.rag.classical_bm25_adapter import ClassicalBM25Adapter @@ -43,6 +54,7 @@ minio_config = MinioConfig() # type: ignore bm25_config = BM25Config() # type: ignore db_config = DatabaseConfig() # type: ignore +bricks_config = BricksConfig() # type: ignore os.makedirs(app_config.OUTPUT_DIR, exist_ok=True) @@ -67,6 +79,7 @@ kreuzberg_adapter = KreuzbergAdapter() postgres_health_adapter = AsyncpgHealthAdapter(db_config) +bricks_api_adapter = BricksApiAdapter(config=bricks_config) classical_rag_config = ClassicalRAGConfig() @@ -201,3 +214,23 @@ def get_liveness_check_use_case() -> LivenessCheckUseCase: postgres_health=postgres_health_adapter, bucket=minio_config.MINIO_BUCKET, ) + + +def get_list_bricks_documents_use_case() -> ListBricksDocumentsUseCase: + return ListBricksDocumentsUseCase(bricks_api=bricks_api_adapter) + + +def get_read_bricks_document_use_case() -> ReadBricksDocumentUseCase: + return ReadBricksDocumentUseCase( + bricks_api=bricks_api_adapter, + document_reader=kreuzberg_adapter, + output_dir=app_config.OUTPUT_DIR, + ) + + +def get_publish_section_version_use_case() -> PublishSectionVersionUseCase: + return PublishSectionVersionUseCase( + bricks_api=bricks_api_adapter, + dry_run=bricks_config.BRICKS_PUBLISH_DRY_RUN, + target_url=bricks_config.BRICKS_PUBLISH_TARGET_URL, + ) diff --git a/src/domain/ports/bricks_api_port.py b/src/domain/ports/bricks_api_port.py new file mode 100644 index 0000000..facdabf --- /dev/null +++ b/src/domain/ports/bricks_api_port.py @@ -0,0 +1,54 @@ +from abc import ABC, abstractmethod + +from pydantic import BaseModel, ConfigDict, Field + + +class BricksDocumentInfo(BaseModel): + model_config = ConfigDict(populate_by_name=True) + id: str + file_name: str = Field(alias="fileName") + url: str + hash: str = "" + mime_type: str = Field(default="", alias="mimeType") + size: int = 0 + status: str = "" + uploaded_at: str | None = Field(default=None, alias="uploadedAt") + image_analysis_status: str | None = Field(default=None, alias="imageAnalysisStatus") + image_analysis_confidence: float | None = Field( + default=None, alias="imageAnalysisConfidence" + ) + image_analysis_reasoning: str | None = Field( + default=None, alias="imageAnalysisReasoning" + ) + image_analysis_date: str | None = Field(default=None, alias="imageAnalysisDate") + category: str | None = None + category_confidence: float | None = Field(default=None, alias="categoryConfidence") + category_source: str | None = Field(default=None, alias="categorySource") + category_classified_at: str | None = Field( + default=None, alias="categoryClassifiedAt" + ) + is_ignored: bool = Field(default=False, alias="isIgnored") + project_id: str = Field(default="", alias="projectId") + + +class SectionVersionResult(BaseModel): + success: bool + message: str = "" + data: dict | None = None + dry_run: bool = False + payload_preview: dict | None = None + target_url: str | None = None + + +class BricksApiPort(ABC): + @abstractmethod + async def list_project_documents(self, project_id: str) -> list[BricksDocumentInfo]: + pass + + @abstractmethod + async def download_document(self, download_url: str) -> tuple[bytes, str]: + pass + + @abstractmethod + async def publish_section_version(self, payload: dict) -> SectionVersionResult: + pass diff --git a/src/infrastructure/bricks/bricks_api_adapter.py b/src/infrastructure/bricks/bricks_api_adapter.py new file mode 100644 index 0000000..b158b59 --- /dev/null +++ b/src/infrastructure/bricks/bricks_api_adapter.py @@ -0,0 +1,102 @@ +import logging +import re + +import httpx + +from domain.ports.bricks_api_port import ( + BricksApiPort, + BricksDocumentInfo, + SectionVersionResult, +) + +logger = logging.getLogger(__name__) + +_DEFAULT_TIMEOUT = httpx.Timeout(30.0) + + +class BricksApiAdapter(BricksApiPort): + def __init__(self, config) -> None: + self._base_url = config.BRICKS_API_BASE_URL + self._api_key = config.BRICKS_API_KEY + self._bearer_token = config.BRICKS_BEARER_TOKEN + self._publish_target_url = config.BRICKS_PUBLISH_TARGET_URL + self._client = httpx.AsyncClient( + base_url=self._base_url, timeout=_DEFAULT_TIMEOUT + ) + + async def close(self) -> None: + await self._client.aclose() + + async def list_project_documents(self, project_id: str) -> list[BricksDocumentInfo]: + url = f"api/projects/{project_id}/documents/ai" + try: + response = await self._client.get( + url, + headers={"Authorization": f"Bearer {self._bearer_token}"}, + ) + response.raise_for_status() + except httpx.TimeoutException as e: + raise TimeoutError(f"Bricks API request timed out: {e}") from e + except httpx.HTTPStatusError as e: + if e.response.status_code in (401, 403): + raise PermissionError( + f"Bricks API authentication failed (HTTP {e.response.status_code})" + ) from e + if e.response.status_code == 404: + raise FileNotFoundError( + f"Bricks project not found: {project_id}" + ) from e + raise RuntimeError( + f"Bricks API error (HTTP {e.response.status_code})" + ) from e + except httpx.RequestError as e: + raise ConnectionError(f"Bricks API connection failed: {e}") from e + items = response.json().get("items", []) + return [BricksDocumentInfo(**item) for item in items] + + async def download_document(self, download_url: str) -> tuple[bytes, str]: + try: + response = await self._client.get(download_url) + response.raise_for_status() + except httpx.TimeoutException as e: + raise TimeoutError(f"Document download timed out: {e}") from e + except httpx.HTTPStatusError as e: + raise RuntimeError( + f"Failed to download document (HTTP {e.response.status_code})" + ) from e + except httpx.RequestError as e: + raise ConnectionError(f"Document download connection failed: {e}") from e + content_disposition = response.headers.get("content-disposition", "") + filename = "document.bin" + match = re.search(r'filename="([^"]+)"', content_disposition) + if match: + filename = match.group(1) + else: + match = re.search(r"filename=([^\s;]+)", content_disposition) + if match: + filename = match.group(1) + return response.content, filename + + async def publish_section_version(self, payload: dict) -> SectionVersionResult: + try: + response = await self._client.post( + self._publish_target_url, + headers={ + "X-API-Key": self._api_key, + "Content-Type": "application/json", + }, + json=payload, + ) + response.raise_for_status() + except httpx.TimeoutException as e: + raise TimeoutError(f"Publish request timed out: {e}") from e + except httpx.HTTPStatusError as e: + if e.response.status_code in (401, 403): + raise PermissionError( + f"Publish authentication failed (HTTP {e.response.status_code})" + ) from e + raise RuntimeError(f"Publish failed (HTTP {e.response.status_code})") from e + except httpx.RequestError as e: + raise ConnectionError(f"Publish connection failed: {e}") from e + data = response.json() + return SectionVersionResult(success=True, message="Published", data=data) diff --git a/src/main.py b/src/main.py index 9237afe..ceb9b0f 100644 --- a/src/main.py +++ b/src/main.py @@ -16,11 +16,17 @@ from application.api.file_routes import file_router from application.api.health_routes import health_router from application.api.indexing_routes import indexing_router +from application.api.mcp_bricks_tools import mcp_bricks from application.api.mcp_classical_tools import mcp_classical from application.api.mcp_file_tools import mcp_files from application.api.mcp_query_tools import mcp_query from application.api.query_routes import query_router -from dependencies import app_config, bm25_adapter, classical_vector_store +from dependencies import ( + app_config, + bm25_adapter, + bricks_api_adapter, + classical_vector_store, +) _LOG_FORMAT = "%(asctime)s %(levelname)-8s [%(name)s] %(message)s" @@ -67,7 +73,7 @@ def _run_alembic_upgrade() -> None: @asynccontextmanager async def db_lifespan(_app: FastAPI): - """Closes BM25 connection pool on shutdown.""" + """Closes BM25 connection pool and Bricks HTTP client on shutdown.""" yield logger.info("Application shutdown initiated") @@ -81,12 +87,17 @@ async def db_lifespan(_app: FastAPI): await classical_vector_store.close() except Exception: logger.exception("Failed to close classical vector store") + try: + await bricks_api_adapter.close() + except Exception: + logger.exception("Failed to close Bricks API adapter") logger.info("Application shutdown complete") mcp_query_app = mcp_query.http_app(path="/") mcp_files_app = mcp_files.http_app(path="/") mcp_classical_app = mcp_classical.http_app(path="/") +mcp_bricks_app = mcp_bricks.http_app(path="/") @asynccontextmanager @@ -97,6 +108,7 @@ async def combined_lifespan(app: FastAPI): mcp_query_app.lifespan(app), mcp_files_app.lifespan(app), mcp_classical_app.lifespan(app), + mcp_bricks_app.lifespan(app), ): yield @@ -125,6 +137,7 @@ async def combined_lifespan(app: FastAPI): app.mount("/rag/mcp", mcp_query_app) app.mount("/files/mcp", mcp_files_app) app.mount("/classical/mcp", mcp_classical_app) +app.mount("/bricks/mcp", mcp_bricks_app) def run_fastapi(): diff --git a/tests/conftest.py b/tests/conftest.py index 277e2eb..47ae23d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ mock_document_reader = _external.mock_document_reader mock_vector_store = _external.mock_vector_store mock_llm = _external.mock_llm +mock_bricks_api = _external.mock_bricks_api @pytest.fixture diff --git a/tests/fixtures/external.py b/tests/fixtures/external.py index fd35f32..4f20eb6 100644 --- a/tests/fixtures/external.py +++ b/tests/fixtures/external.py @@ -8,6 +8,11 @@ FolderIndexingStats, IndexingStatus, ) +from domain.ports.bricks_api_port import ( + BricksApiPort, + BricksDocumentInfo, + SectionVersionResult, +) from domain.ports.document_reader_port import ( DocumentContent, DocumentMetadata, @@ -120,3 +125,21 @@ def mock_llm() -> AsyncMock: ) mock.generate_chat.return_value = "LLM generated response text" return mock + + +@pytest.fixture +def mock_bricks_api() -> AsyncMock: + """Provide an AsyncMock of BricksApiPort for external adapter mocking.""" + mock = AsyncMock(spec=BricksApiPort) + mock.list_project_documents.return_value = [ + BricksDocumentInfo( + id="test-id", + fileName="doc.pdf", + url="https://s3.example.com/doc.pdf", + ) + ] + mock.download_document.return_value = (b"pdf content", "document.pdf") + mock.publish_section_version.return_value = SectionVersionResult( + success=True, message="Published" + ) + return mock diff --git a/tests/unit/infrastructure/test_bricks_api_adapter.py b/tests/unit/infrastructure/test_bricks_api_adapter.py new file mode 100644 index 0000000..6286ec0 --- /dev/null +++ b/tests/unit/infrastructure/test_bricks_api_adapter.py @@ -0,0 +1,405 @@ +"""Tests for BricksApiAdapter — the httpx-based implementation of BricksApiPort. + +The Bricks API is an external dependency, so we mock httpx responses +while testing our adapter logic for parsing, headers, and error handling. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from domain.ports.bricks_api_port import BricksDocumentInfo, SectionVersionResult +from infrastructure.bricks.bricks_api_adapter import BricksApiAdapter + + +@pytest.fixture +def bricks_config() -> MagicMock: + """Provide a mock BricksConfig.""" + config = MagicMock() + config.BRICKS_API_BASE_URL = "https://api.bricks.example.com" + config.BRICKS_API_KEY = "test-api-key-12345" + config.BRICKS_BEARER_TOKEN = "test-bearer-token" + config.BRICKS_PUBLISH_DRY_RUN = True + config.BRICKS_PUBLISH_TARGET_URL = ( + "https://api.bricks.example.com/api/section-versions" + ) + return config + + +@pytest.fixture +def mock_httpx_client() -> AsyncMock: + """Provide a mocked httpx AsyncClient.""" + return AsyncMock(spec=httpx.AsyncClient) + + +@pytest.fixture +def adapter(bricks_config: MagicMock, mock_httpx_client: AsyncMock) -> BricksApiAdapter: + """Provide a BricksApiAdapter with mocked httpx client.""" + with patch( + "infrastructure.bricks.bricks_api_adapter.httpx.AsyncClient", + return_value=mock_httpx_client, + ): + adapter = BricksApiAdapter(config=bricks_config) + adapter._client = mock_httpx_client # type: ignore[attr-defined] + return adapter + + +class TestListProjectDocuments: + """Tests for BricksApiAdapter.list_project_documents.""" + + async def test_calls_api_with_bearer_token( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should call GET /api/projects/{id}/documents/ai with Bearer token.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"items": []} + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + await adapter.list_project_documents(project_id="proj-123") + + mock_httpx_client.get.assert_called_once() + call_args = mock_httpx_client.get.call_args + assert "api/projects/proj-123/documents/ai" in call_args[0][ + 0 + ] or "proj-123/documents/ai" in str(call_args) + headers = ( + call_args[1].get("headers", {}) or call_args[0][0] + if len(call_args[0]) > 1 + else call_args[1].get("headers", {}) + ) + assert "Authorization" in headers + assert headers["Authorization"] == "Bearer test-bearer-token" + + async def test_returns_list_of_bricks_document_info( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should parse JSON response and return list of BricksDocumentInfo.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "items": [ + { + "id": "doc-1", + "fileName": "report.pdf", + "url": "https://s3.example.com/presigned1", + "hash": "abc123", + "mimeType": "application/pdf", + "size": 1024, + "status": "PROCESSED", + "uploadedAt": "2025-12-03T17:02:50.916Z", + "imageAnalysisStatus": None, + "imageAnalysisConfidence": None, + "imageAnalysisReasoning": None, + "imageAnalysisDate": None, + "category": None, + "categoryConfidence": None, + "categorySource": None, + "categoryClassifiedAt": None, + "isIgnored": False, + "projectId": "proj-123", + }, + { + "id": "doc-2", + "fileName": "notes.docx", + "url": "https://s3.example.com/presigned2", + "mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "size": 2048, + "status": "PROCESSED", + "isIgnored": True, + "projectId": "proj-123", + }, + ] + } + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + result = await adapter.list_project_documents(project_id="proj-123") + + assert len(result) == 2 + assert isinstance(result[0], BricksDocumentInfo) + assert result[0].file_name == "report.pdf" + assert result[0].mime_type == "application/pdf" + assert result[0].size == 1024 + assert result[1].file_name == "notes.docx" + assert result[1].is_ignored is True + + async def test_camel_case_to_snake_case_mapping( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should correctly map camelCase API fields to snake_case model fields.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "items": [ + { + "id": "doc-uuid", + "fileName": "image.png", + "url": "https://s3.example.com/img", + "imageAnalysisStatus": "relevant", + "imageAnalysisConfidence": 95.0, + "imageAnalysisReasoning": "Contains relevant diagrams", + "imageAnalysisDate": "2025-12-04T10:00:00.000Z", + "categoryConfidence": 72.3, + "categorySource": "llm", + "categoryClassifiedAt": "2025-12-05T08:00:00.000Z", + } + ] + } + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + result = await adapter.list_project_documents(project_id="proj-123") + + doc = result[0] + assert doc.image_analysis_status == "relevant" + assert doc.image_analysis_confidence == 95.0 + assert doc.image_analysis_reasoning == "Contains relevant diagrams" + assert doc.image_analysis_date == "2025-12-04T10:00:00.000Z" + assert doc.category_confidence == 72.3 + assert doc.category_source == "llm" + assert doc.category_classified_at == "2025-12-05T08:00:00.000Z" + + async def test_returns_empty_list_when_no_documents( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should return empty list when API returns empty items.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"items": []} + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + result = await adapter.list_project_documents(project_id="proj-empty") + + assert result == [] + + async def test_raises_on_401_unauthorized( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise proper exception on 401 Unauthorized.""" + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + message="Unauthorized", + request=MagicMock(), + response=mock_response, + ) + mock_httpx_client.get.return_value = mock_response + + with pytest.raises(PermissionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_403_forbidden( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise PermissionError on 403 Forbidden.""" + mock_response = MagicMock() + mock_response.status_code = 403 + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + message="Forbidden", + request=MagicMock(), + response=mock_response, + ) + mock_httpx_client.get.return_value = mock_response + + with pytest.raises(PermissionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_404_not_found( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise FileNotFoundError on 404 Not Found.""" + mock_response = MagicMock() + mock_response.status_code = 404 + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + message="Not Found", + request=MagicMock(), + response=mock_response, + ) + mock_httpx_client.get.return_value = mock_response + + with pytest.raises(FileNotFoundError): + await adapter.list_project_documents(project_id="nonexistent") + + async def test_raises_on_timeout( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise TimeoutError on request timeout.""" + mock_httpx_client.get.side_effect = httpx.TimeoutException("Request timed out") + + with pytest.raises(TimeoutError): + await adapter.list_project_documents(project_id="proj-123") + + +class TestDownloadDocument: + """Tests for BricksApiAdapter.download_document.""" + + async def test_downloads_from_s3_url( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should download file content from S3 presigned URL (no auth header).""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.content = b"PDF binary content" + mock_response.headers = { + "content-disposition": 'attachment; filename="report.pdf"' + } + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + content, filename = await adapter.download_document( + download_url="https://s3.example.com/presigned-url" + ) + + assert content == b"PDF binary content" + assert filename == "report.pdf" + + async def test_extracts_filename_from_content_disposition( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should extract filename from Content-Disposition header.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.content = b"data" + mock_response.headers = { + "content-disposition": 'attachment; filename="my document.docx"' + } + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + _, filename = await adapter.download_document( + download_url="https://s3.example.com/doc" + ) + + assert filename == "my document.docx" + + async def test_no_auth_header_on_s3_download( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """S3 presigned URLs should not include authentication headers.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.content = b"data" + mock_response.headers = {} + mock_response.raise_for_status = MagicMock() + mock_httpx_client.get.return_value = mock_response + + await adapter.download_document( + download_url="https://s3.example.com/presigned-url" + ) + + call_args = mock_httpx_client.get.call_args + headers = call_args[1].get("headers", {}) + assert "Authorization" not in headers + assert "X-API-Key" not in headers + + async def test_raises_on_download_failure( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise exception on download failure.""" + mock_response = MagicMock() + mock_response.status_code = 403 + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + message="Forbidden", + request=MagicMock(), + response=mock_response, + ) + mock_httpx_client.get.return_value = mock_response + + with pytest.raises(RuntimeError): + await adapter.download_document( + download_url="https://s3.example.com/expired-url" + ) + + async def test_raises_on_timeout( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise TimeoutError on download timeout.""" + mock_httpx_client.get.side_effect = httpx.TimeoutException("Download timed out") + + with pytest.raises(TimeoutError): + await adapter.download_document( + download_url="https://s3.example.com/slow-url" + ) + + +class TestPublishSectionVersion: + """Tests for BricksApiAdapter.publish_section_version.""" + + async def test_calls_post_with_x_api_key_header( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should call POST /api/section-versions with X-API-Key header.""" + mock_response = MagicMock() + mock_response.status_code = 201 + mock_response.json.return_value = { + "id": "sv-1", + "sectionKey": "intro", + } + mock_response.raise_for_status = MagicMock() + mock_httpx_client.post.return_value = mock_response + + payload = { + "project_unique_id": "proj-123", + "section_key": "intro", + "content": "Hello world", + "workflow_id": "wf-1", + "workflow_name": "draft", + "workflow_metadata": {}, + } + + await adapter.publish_section_version(payload=payload) + + mock_httpx_client.post.assert_called_once() + call_args = mock_httpx_client.post.call_args + headers = call_args[1].get("headers", {}) + assert "X-API-Key" in headers + assert headers["X-API-Key"] == "test-api-key-12345" + + async def test_returns_section_version_result( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should return SectionVersionResult on successful publish.""" + mock_response = MagicMock() + mock_response.status_code = 201 + mock_response.json.return_value = { + "id": "sv-uuid", + "sectionKey": "summary", + } + mock_response.raise_for_status = MagicMock() + mock_httpx_client.post.return_value = mock_response + + result = await adapter.publish_section_version( + payload={"section_key": "summary", "content": "Summary text"} + ) + + assert isinstance(result, SectionVersionResult) + assert result.success is True + + async def test_raises_on_401_unauthorized( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise proper exception on 401 when publishing.""" + mock_response = MagicMock() + mock_response.status_code = 401 + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + message="Unauthorized", + request=MagicMock(), + response=mock_response, + ) + mock_httpx_client.post.return_value = mock_response + + with pytest.raises(PermissionError): + await adapter.publish_section_version(payload={"section_key": "intro"}) + + async def test_raises_on_timeout( + self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock + ) -> None: + """Should raise TimeoutError on publish timeout.""" + mock_httpx_client.post.side_effect = httpx.TimeoutException("Publish timed out") + + with pytest.raises(TimeoutError): + await adapter.publish_section_version(payload={"section_key": "intro"}) diff --git a/tests/unit/test_bricks_api_port.py b/tests/unit/test_bricks_api_port.py new file mode 100644 index 0000000..ffdc394 --- /dev/null +++ b/tests/unit/test_bricks_api_port.py @@ -0,0 +1,226 @@ +"""Tests for BricksDocumentInfo, SectionVersionResult models and BricksApiPort ABC.""" + +from abc import ABC +from unittest.mock import AsyncMock + +import pytest +from pydantic import ValidationError + +from domain.ports.bricks_api_port import ( + BricksApiPort, + BricksDocumentInfo, + SectionVersionResult, +) + + +class TestBricksDocumentInfo: + """Tests for the BricksDocumentInfo pydantic model.""" + + def test_creates_with_camel_case_aliases(self) -> None: + """Should accept camelCase field names via populate_by_name=True.""" + doc = BricksDocumentInfo( + id="abc-123", + fileName="document.pdf", + url="https://s3.example.com/doc.pdf", + mimeType="application/pdf", + size=869723, + status="PROCESSED", + uploadedAt="2025-12-03T17:02:50.916Z", + imageAnalysisStatus="relevant", + imageAnalysisConfidence=95, + imageAnalysisReasoning="Contains diagrams", + imageAnalysisDate="2025-12-04T10:00:00.000Z", + isIgnored=False, + projectId="proj-456", + ) + + assert doc.id == "abc-123" + assert doc.file_name == "document.pdf" + assert doc.url == "https://s3.example.com/doc.pdf" + assert doc.mime_type == "application/pdf" + assert doc.size == 869723 + assert doc.status == "PROCESSED" + assert doc.uploaded_at == "2025-12-03T17:02:50.916Z" + assert doc.image_analysis_status == "relevant" + assert doc.image_analysis_confidence == 95 + assert doc.image_analysis_reasoning == "Contains diagrams" + assert doc.image_analysis_date == "2025-12-04T10:00:00.000Z" + assert doc.is_ignored is False + assert doc.project_id == "proj-456" + + def test_creates_with_snake_case_field_names(self) -> None: + """Should also accept snake_case field names directly.""" + doc = BricksDocumentInfo( + id="abc-123", + file_name="document.pdf", + url="https://s3.example.com/doc.pdf", + ) + + assert doc.file_name == "document.pdf" + + def test_defaults_for_optional_fields(self) -> None: + """Should provide defaults for fields with defaults.""" + doc = BricksDocumentInfo( + id="abc-123", + fileName="test.pdf", + url="https://s3.example.com/test.pdf", + ) + + assert doc.hash == "" + assert doc.mime_type == "" + assert doc.size == 0 + assert doc.status == "" + assert doc.uploaded_at is None + assert doc.image_analysis_status is None + assert doc.image_analysis_confidence is None + assert doc.image_analysis_reasoning is None + assert doc.image_analysis_date is None + assert doc.category is None + assert doc.category_confidence is None + assert doc.category_source is None + assert doc.category_classified_at is None + assert doc.is_ignored is False + assert doc.project_id == "" + + def test_parses_full_api_response_item(self) -> None: + """Should parse a full API response item with all fields.""" + api_item = { + "id": "uuid-1", + "fileName": "report.docx", + "url": "https://neon-project-analysis.s3.amazonaws.com/presigned", + "hash": "sha256hexstring", + "mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "size": 1234567, + "status": "PROCESSED", + "uploadedAt": "2025-12-03T17:02:50.916Z", + "imageAnalysisStatus": None, + "imageAnalysisConfidence": None, + "imageAnalysisReasoning": None, + "imageAnalysisDate": None, + "category": "financial", + "categoryConfidence": 88.5, + "categorySource": "llm", + "categoryClassifiedAt": "2025-12-05T08:00:00.000Z", + "isIgnored": True, + "projectId": "proj-uuid", + } + + doc = BricksDocumentInfo(**api_item) + + assert doc.id == "uuid-1" + assert doc.file_name == "report.docx" + assert doc.hash == "sha256hexstring" + assert ( + doc.mime_type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ) + assert doc.size == 1234567 + assert doc.category == "financial" + assert doc.category_confidence == 88.5 + assert doc.category_source == "llm" + assert doc.category_classified_at == "2025-12-05T08:00:00.000Z" + assert doc.is_ignored is True + assert doc.project_id == "proj-uuid" + + def test_requires_id_file_name_and_url(self) -> None: + """Should raise ValidationError when required fields are missing.""" + with pytest.raises(ValidationError): + BricksDocumentInfo(id="abc", fileName="test.pdf") # missing url + + +class TestSectionVersionResult: + """Tests for the SectionVersionResult model.""" + + def test_creates_minimal_result(self) -> None: + result = SectionVersionResult(success=True) + + assert result.success is True + assert result.message == "" + assert result.data is None + assert result.dry_run is False + assert result.payload_preview is None + assert result.target_url is None + + def test_creates_with_all_fields(self) -> None: + result = SectionVersionResult( + success=True, + message="Published successfully", + data={"id": "sv-123"}, + dry_run=True, + payload_preview={"section_key": "intro", "content": "Hello"}, + target_url="https://api.example.com/section-versions", + ) + + assert result.success is True + assert result.message == "Published successfully" + assert result.data == {"id": "sv-123"} + assert result.dry_run is True + assert result.payload_preview == {"section_key": "intro", "content": "Hello"} + assert result.target_url == "https://api.example.com/section-versions" + + def test_dry_run_result(self) -> None: + result = SectionVersionResult( + success=True, + message="Dry run — no API call made", + dry_run=True, + payload_preview={"key": "value"}, + target_url="https://api.example.com/section-versions", + ) + + assert result.dry_run is True + assert result.data is None + assert result.payload_preview is not None + + +class TestBricksApiPortAbstract: + """Tests for the BricksApiPort abstract base class.""" + + def test_cannot_instantiate_directly(self) -> None: + """BricksApiPort is abstract and cannot be instantiated.""" + with pytest.raises(TypeError): + BricksApiPort() # type: ignore[abstract] + + def test_is_abc_subclass(self) -> None: + """BricksApiPort should inherit from ABC.""" + assert issubclass(BricksApiPort, ABC) + + def test_concrete_subclass_must_implement_all_methods(self) -> None: + """A subclass that doesn't implement all abstract methods cannot be instantiated.""" + + class PartialAdapter(BricksApiPort): + async def list_project_documents( + self, project_id: str + ) -> list[BricksDocumentInfo]: + return [] + + with pytest.raises(TypeError): + PartialAdapter() # type: ignore[abstract] + + def test_concrete_subclass_can_be_instantiated(self) -> None: + """A subclass that implements all abstract methods can be instantiated.""" + + class FullAdapter(BricksApiPort): + async def list_project_documents( + self, project_id: str + ) -> list[BricksDocumentInfo]: + return [] + + async def download_document(self, download_url: str) -> tuple[bytes, str]: + return (b"", "file.pdf") + + async def publish_section_version( + self, payload: dict + ) -> SectionVersionResult: + return SectionVersionResult(success=True) + + adapter = FullAdapter() + assert isinstance(adapter, BricksApiPort) + + def test_concrete_subclass_methods_are_callable(self) -> None: + """Concrete subclass methods should be callable.""" + mock = AsyncMock(spec=BricksApiPort) + mock.list_project_documents.return_value = [ + BricksDocumentInfo(id="1", fileName="a.pdf", url="https://s3/a.pdf") + ] + + assert mock.list_project_documents.return_value is not None diff --git a/tests/unit/test_list_bricks_documents_use_case.py b/tests/unit/test_list_bricks_documents_use_case.py new file mode 100644 index 0000000..53f0cf1 --- /dev/null +++ b/tests/unit/test_list_bricks_documents_use_case.py @@ -0,0 +1,97 @@ +"""Tests for ListBricksDocumentsUseCase — simple delegation to BricksApiPort.""" + +from unittest.mock import AsyncMock + +import pytest + +from application.use_cases.list_bricks_documents_use_case import ( + ListBricksDocumentsUseCase, +) +from domain.ports.bricks_api_port import BricksDocumentInfo + + +class TestListBricksDocumentsUseCase: + """Tests for ListBricksDocumentsUseCase.""" + + @pytest.fixture + def use_case(self, mock_bricks_api: AsyncMock) -> ListBricksDocumentsUseCase: + return ListBricksDocumentsUseCase(bricks_api=mock_bricks_api) + + async def test_execute_delegates_to_port( + self, + use_case: ListBricksDocumentsUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """Should delegate to bricks_api.list_project_documents.""" + await use_case.execute(project_id="proj-123") + + mock_bricks_api.list_project_documents.assert_called_once_with( + project_id="proj-123" + ) + + async def test_execute_returns_list_of_document_infos( + self, + use_case: ListBricksDocumentsUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """Should return the list of BricksDocumentInfo from the port.""" + expected = [ + BricksDocumentInfo( + id="doc-1", + fileName="report.pdf", + url="https://s3.example.com/doc1.pdf", + mimeType="application/pdf", + size=1024, + status="PROCESSED", + ), + BricksDocumentInfo( + id="doc-2", + fileName="notes.docx", + url="https://s3.example.com/doc2.docx", + ), + ] + mock_bricks_api.list_project_documents.return_value = expected + + result = await use_case.execute(project_id="proj-123") + + assert len(result) == 2 + assert result[0].file_name == "report.pdf" + assert result[1].file_name == "notes.docx" + + async def test_execute_returns_empty_list( + self, + use_case: ListBricksDocumentsUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """Should return empty list when no documents exist.""" + mock_bricks_api.list_project_documents.return_value = [] + + result = await use_case.execute(project_id="empty-project") + + assert result == [] + + async def test_execute_propagates_errors( + self, + mock_bricks_api: AsyncMock, + ) -> None: + """Should propagate errors from the API port.""" + mock_bricks_api.list_project_documents.side_effect = ConnectionError( + "API unreachable" + ) + use_case = ListBricksDocumentsUseCase(bricks_api=mock_bricks_api) + + with pytest.raises(ConnectionError, match="API unreachable"): + await use_case.execute(project_id="proj-123") + + async def test_execute_propagates_timeout( + self, + mock_bricks_api: AsyncMock, + ) -> None: + """Should propagate TimeoutError from the API port.""" + mock_bricks_api.list_project_documents.side_effect = TimeoutError( + "Request timed out" + ) + use_case = ListBricksDocumentsUseCase(bricks_api=mock_bricks_api) + + with pytest.raises(TimeoutError): + await use_case.execute(project_id="proj-123") diff --git a/tests/unit/test_mcp_bricks_tools.py b/tests/unit/test_mcp_bricks_tools.py new file mode 100644 index 0000000..461e23b --- /dev/null +++ b/tests/unit/test_mcp_bricks_tools.py @@ -0,0 +1,324 @@ +"""Tests for mcp_bricks_tools.py — Bricks MCP tools registered with FastMCP. + +Follows the exact pattern of test_mcp_file_tools.py. +""" + +from unittest.mock import AsyncMock, patch + +import pytest +from fastmcp.exceptions import ToolError + +from application.api.mcp_bricks_tools import ( + list_bricks_documents, + mcp_bricks, + publish_section_version, + read_bricks_document, +) +from domain.ports.bricks_api_port import BricksDocumentInfo, SectionVersionResult +from domain.ports.document_reader_port import DocumentContent, DocumentMetadata + + +class TestMCPBricksInstance: + """Verify the FastMCP instance configuration.""" + + def test_mcp_bricks_has_correct_name(self) -> None: + """mcp_bricks should be named 'RAGAnythingBricks'.""" + assert mcp_bricks.name == "RAGAnythingBricks" + + +class TestListBricksDocuments: + """Tests for the list_bricks_documents MCP tool.""" + + @pytest.fixture + def mock_documents(self) -> list[BricksDocumentInfo]: + return [ + BricksDocumentInfo( + id="doc-1", + fileName="report.pdf", + url="https://s3.example.com/doc1.pdf", + mimeType="application/pdf", + size=1024, + status="PROCESSED", + ), + BricksDocumentInfo( + id="doc-2", + fileName="notes.docx", + url="https://s3.example.com/doc2.docx", + mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + size=2048, + status="PROCESSED", + ), + ] + + async def test_returns_document_list( + self, mock_documents: list[BricksDocumentInfo] + ) -> None: + """Should call use_case.execute and return document list.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_documents + + with patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ): + result = await list_bricks_documents(project_unique_id="proj-123") + + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].file_name == "report.pdf" + assert result[0].size == 1024 + + async def test_calls_use_case_with_project_id( + self, mock_documents: list[BricksDocumentInfo] + ) -> None: + """Should forward project_unique_id to the use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_documents + + with patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ): + await list_bricks_documents(project_unique_id="my-project") + + mock_use_case.execute.assert_called_once_with(project_id="my-project") + + async def test_returns_empty_list_when_no_documents(self) -> None: + """Should return empty list when no documents exist.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = [] + + with patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ): + result = await list_bricks_documents(project_unique_id="empty-proj") + + assert result == [] + + async def test_raises_tool_error_for_api_failure(self) -> None: + """Should convert API errors to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = ConnectionError("API unreachable") + + with ( + patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to list bricks documents"), + ): + await list_bricks_documents(project_unique_id="proj-123") + + +class TestReadBricksDocument: + """Tests for the read_bricks_document MCP tool.""" + + @pytest.fixture + def mock_document_content(self) -> DocumentContent: + return DocumentContent( + content="Extracted text from bricks document.", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + + async def test_returns_file_content_response( + self, mock_document_content: DocumentContent + ) -> None: + """Should call use_case.execute and return FileContentResponse.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_document_content + + with patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ): + result = await read_bricks_document( + file_url="https://s3.example.com/presigned/report.pdf" + ) + + assert result.content == "Extracted text from bricks document." + assert result.metadata.mime_type == "application/pdf" + + async def test_raises_tool_error_for_download_failure(self) -> None: + """Should convert download errors to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = ConnectionError("S3 download failed") + + with ( + patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to read bricks document"), + ): + await read_bricks_document( + file_url="https://s3.example.com/expired-url.pdf" + ) + + async def test_raises_tool_error_for_generic_failure(self) -> None: + """Should convert generic exceptions to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = Exception("Unexpected error") + + with ( + patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to read bricks document"), + ): + await read_bricks_document(file_url="https://s3.example.com/broken.pdf") + + async def test_includes_tables_in_response(self) -> None: + """Should include tables in the response.""" + from domain.ports.document_reader_port import TableData + + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = DocumentContent( + content="Report with table", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[TableData(markdown="| A | B |\n|---|---|")], + ) + + with patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ): + result = await read_bricks_document( + file_url="https://s3.example.com/table.pdf" + ) + + assert len(result.tables) == 1 + assert result.tables[0].markdown == "| A | B |\n|---|---|" + + +class TestPublishSectionVersion: + """Tests for the publish_section_version MCP tool.""" + + async def test_returns_section_version_result(self) -> None: + """Should call use_case.execute and return SectionVersionResult.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = SectionVersionResult( + success=True, + message="Dry run — no API call made", + dry_run=True, + payload_preview={"section_key": "intro", "content": "Hello"}, + target_url="https://api.bricks.example.com/api/section-versions", + ) + + with patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ): + result = await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello world", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert result.success is True + assert result.dry_run is True + + async def test_forward_all_params_to_use_case(self) -> None: + """Should forward all parameters to the use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = SectionVersionResult(success=True) + + with patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ): + await publish_section_version( + project_unique_id="proj-abc", + section_key="summary", + content="Summary content", + workflow_id="wf-final", + workflow_name="final_review", + workflow_metadata={"approved": True}, + ) + + mock_use_case.execute.assert_called_once_with( + project_unique_id="proj-abc", + section_key="summary", + content="Summary content", + workflow_id="wf-final", + workflow_name="final_review", + workflow_metadata={"approved": True}, + ) + + async def test_raises_tool_error_for_api_failure(self) -> None: + """Should convert API errors to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = ConnectionError("API connection failed") + + with ( + patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to publish section version"), + ): + await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + async def test_raises_tool_error_for_generic_failure(self) -> None: + """Should convert generic exceptions to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = Exception("Unexpected error") + + with ( + patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to publish section version"), + ): + await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + async def test_returns_dry_run_result_with_preview(self) -> None: + """Should return payload_preview when dry_run is True.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = SectionVersionResult( + success=True, + message="Dry run", + dry_run=True, + payload_preview={ + "project_unique_id": "proj-123", + "section_key": "intro", + "content": "Hello", + "workflow_id": "wf-1", + "workflow_name": "draft", + "workflow_metadata": {}, + }, + target_url="https://api.bricks.example.com/api/section-versions", + ) + + with patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ): + result = await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert result.dry_run is True + assert result.payload_preview is not None + assert result.payload_preview["section_key"] == "intro" diff --git a/tests/unit/test_publish_section_version_use_case.py b/tests/unit/test_publish_section_version_use_case.py new file mode 100644 index 0000000..33eb82b --- /dev/null +++ b/tests/unit/test_publish_section_version_use_case.py @@ -0,0 +1,226 @@ +"""Tests for PublishSectionVersionUseCase — payload construction and dry_run logic.""" + +from unittest.mock import AsyncMock + +import pytest + +from application.use_cases.publish_section_version_use_case import ( + PublishSectionVersionUseCase, +) +from domain.ports.bricks_api_port import SectionVersionResult + + +class TestPublishSectionVersionUseCase: + """Tests for PublishSectionVersionUseCase.""" + + @pytest.fixture + def use_case_dry_run( + self, mock_bricks_api: AsyncMock + ) -> PublishSectionVersionUseCase: + """Create a use case with dry_run=True (default).""" + return PublishSectionVersionUseCase( + bricks_api=mock_bricks_api, + dry_run=True, + target_url="https://api.bricks.example.com/api/section-versions", + ) + + @pytest.fixture + def use_case_live(self, mock_bricks_api: AsyncMock) -> PublishSectionVersionUseCase: + """Create a use case with dry_run=False.""" + return PublishSectionVersionUseCase( + bricks_api=mock_bricks_api, + dry_run=False, + target_url="https://api.bricks.example.com/api/section-versions", + ) + + async def test_dry_run_does_not_call_api( + self, + use_case_dry_run: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When dry_run=True, should NOT call bricks_api.publish_section_version.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello world", + workflow_id="wf-1", + workflow_name="draft", + workflow_metadata={"source": "test"}, + ) + + mock_bricks_api.publish_section_version.assert_not_called() + assert result.dry_run is True + + async def test_dry_run_returns_payload_preview( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """When dry_run=True, should return payload_preview in the result.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="summary", + content="Summary text", + workflow_id="wf-2", + workflow_name="review", + workflow_metadata={"version": 1}, + ) + + assert result.success is True + assert result.dry_run is True + assert result.payload_preview is not None + assert result.payload_preview["project_unique_id"] == "proj-123" + assert result.payload_preview["section_key"] == "summary" + assert result.payload_preview["content"] == "Summary text" + assert result.payload_preview["workflow_id"] == "wf-2" + assert result.payload_preview["workflow_name"] == "review" + assert result.payload_preview["workflow_metadata"] == {"version": 1} + + async def test_dry_run_includes_target_url( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """When dry_run=True, should include target_url in the result.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert ( + result.target_url == "https://api.bricks.example.com/api/section-versions" + ) + + async def test_live_calls_bricks_api( + self, + use_case_live: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When dry_run=False, should call bricks_api.publish_section_version.""" + mock_bricks_api.publish_section_version.return_value = SectionVersionResult( + success=True, + message="Published", + data={"id": "sv-123"}, + ) + + result = await use_case_live.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello world", + workflow_id="wf-1", + workflow_name="draft", + workflow_metadata={}, + ) + + mock_bricks_api.publish_section_version.assert_called_once() + assert result.success is True + assert result.dry_run is False + + async def test_live_passes_correct_payload( + self, + use_case_live: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When dry_run=False, should pass the correctly constructed payload.""" + mock_bricks_api.publish_section_version.return_value = SectionVersionResult( + success=True, + message="OK", + ) + + await use_case_live.execute( + project_unique_id="proj-abc", + section_key="results", + content="Final results", + workflow_id="wf-final", + workflow_name="final_review", + workflow_metadata={"approved_by": "admin"}, + ) + + call_args = mock_bricks_api.publish_section_version.call_args + payload = call_args[1].get("payload", call_args[0][0] if call_args[0] else None) + if payload is None: + payload = call_args.kwargs.get("payload") + assert payload["project_unique_id"] == "proj-abc" + assert payload["section_key"] == "results" + assert payload["content"] == "Final results" + assert payload["workflow_id"] == "wf-final" + assert payload["workflow_name"] == "final_review" + assert payload["workflow_metadata"] == {"approved_by": "admin"} + + async def test_workflow_metadata_defaults_to_empty_dict( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """When workflow_metadata is None, should send {} in the payload.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert result.payload_preview["workflow_metadata"] == {} + + async def test_workflow_metadata_none_in_live_payload( + self, + use_case_live: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When workflow_metadata is None in live mode, payload should contain {}.""" + mock_bricks_api.publish_section_version.return_value = SectionVersionResult( + success=True, + ) + + await use_case_live.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + call_args = mock_bricks_api.publish_section_version.call_args + payload = call_args[1].get("payload", call_args[0][0] if call_args[0] else None) + if payload is None: + payload = call_args.kwargs.get("payload") + assert payload["workflow_metadata"] == {} + + async def test_live_propagates_api_errors( + self, + mock_bricks_api: AsyncMock, + ) -> None: + """Should propagate errors from the API when publishing live.""" + mock_bricks_api.publish_section_version.side_effect = ConnectionError( + "API connection failed" + ) + use_case = PublishSectionVersionUseCase( + bricks_api=mock_bricks_api, + dry_run=False, + target_url="https://api.bricks.example.com/api/section-versions", + ) + + with pytest.raises(ConnectionError, match="API connection failed"): + await use_case.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + async def test_dry_run_returns_section_version_result( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """Dry run result should be a SectionVersionResult instance.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert isinstance(result, SectionVersionResult) diff --git a/tests/unit/test_read_bricks_document_use_case.py b/tests/unit/test_read_bricks_document_use_case.py new file mode 100644 index 0000000..0797b02 --- /dev/null +++ b/tests/unit/test_read_bricks_document_use_case.py @@ -0,0 +1,237 @@ +"""Tests for ReadBricksDocumentUseCase — follows exact pattern of test_read_file_use_case.py. + +Downloads from Bricks API → saves temp file → calls Kreuzberg extract → returns DocumentContent. +Ensures temp file cleanup on both success and error paths. +""" + +import os +from unittest.mock import AsyncMock + +import pytest + +from application.use_cases.read_bricks_document_use_case import ( + ReadBricksDocumentUseCase, +) +from domain.ports.document_reader_port import DocumentContent, DocumentMetadata + + +class TestReadBricksDocumentUseCase: + """Tests for ReadBricksDocumentUseCase.""" + + async def test_execute_downloads_from_bricks_api( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should call bricks_api.download_document with the file URL.""" + mock_bricks_api.download_document.return_value = (b"file content", "report.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="extracted text", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(file_url="https://s3.example.com/presigned/report.pdf") + + mock_bricks_api.download_document.assert_called_once_with( + download_url="https://s3.example.com/presigned/report.pdf" + ) + + async def test_execute_returns_document_content( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should return DocumentContent from Kreuzberg extraction.""" + mock_bricks_api.download_document.return_value = (b"pdf data", "doc.pdf") + expected = DocumentContent( + content="extracted text from bricks document", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + mock_document_reader.extract_content.return_value = expected + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + result = await use_case.execute( + file_url="https://s3.example.com/presigned/doc.pdf" + ) + + assert result.content == "extracted text from bricks document" + assert result.metadata.format_type == "pdf" + assert result.metadata.mime_type == "application/pdf" + + async def test_execute_calls_document_reader_with_temp_file( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should save temp file and pass its path to document_reader.extract_content.""" + mock_bricks_api.download_document.return_value = (b"pdf binary", "report.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="text", + metadata=DocumentMetadata(format_type="pdf"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(file_url="https://s3.example.com/report.pdf") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert tmp_file_path.endswith(".pdf") + assert os.path.dirname(tmp_file_path) == str(tmp_path) + + async def test_execute_temp_file_suffix_matches_filename_extension( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should use the filename extension from the download as temp file suffix.""" + mock_bricks_api.download_document.return_value = (b"data", "spreadsheet.xlsx") + mock_document_reader.extract_content.return_value = DocumentContent( + content="spreadsheet data", + metadata=DocumentMetadata(format_type="xlsx"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(file_url="https://s3.example.com/spreadsheet.xlsx") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert tmp_file_path.endswith(".xlsx") + + async def test_execute_propagates_download_failure( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should propagate errors when download fails.""" + mock_bricks_api.download_document.side_effect = ConnectionError( + "S3 download failed" + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + with pytest.raises(ConnectionError, match="S3 download failed"): + await use_case.execute(file_url="https://s3.example.com/missing.pdf") + + async def test_execute_propagates_kreuzberg_failure( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should propagate errors when Kreuzberg extraction fails.""" + mock_bricks_api.download_document.return_value = (b"corrupt data", "broken.pdf") + mock_document_reader.extract_content.side_effect = ValueError( + "Unsupported format" + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + with pytest.raises(ValueError, match="Unsupported format"): + await use_case.execute(file_url="https://s3.example.com/broken.pdf") + + async def test_execute_cleans_up_temp_file( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should delete the temp file after successful extraction.""" + mock_bricks_api.download_document.return_value = (b"data", "report.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="text", + metadata=DocumentMetadata(format_type="txt"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(file_url="https://s3.example.com/report.pdf") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert not os.path.exists(tmp_file_path) + + async def test_execute_cleans_up_temp_file_on_error( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should delete the temp file even when Kreuzberg extraction fails.""" + mock_bricks_api.download_document.return_value = (b"data", "report.pdf") + mock_document_reader.extract_content.side_effect = ValueError("bad format") + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + with pytest.raises(ValueError): + await use_case.execute(file_url="https://s3.example.com/report.pdf") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert not os.path.exists(tmp_file_path) + + async def test_execute_with_tables( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should include tables in the returned DocumentContent.""" + from domain.ports.document_reader_port import TableData + + mock_bricks_api.download_document.return_value = (b"data", "financials.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="text with table", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[TableData(markdown="| A | B |\n|---|---|\n| 1 | 2 |")], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + result = await use_case.execute( + file_url="https://s3.example.com/financials.pdf" + ) + + assert len(result.tables) == 1 + assert result.tables[0].markdown == "| A | B |\n|---|---|\n| 1 | 2 |" From a5cf02295de3b4ebc96cba55e65e48c168a861a4 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 20 May 2026 10:53:34 +0200 Subject: [PATCH 2/4] fix: simplify read_bricks_document to use document_id + project_id, resolve pre-signed URL internally - Remove file_url parameter from MCP tool, use document_id + project_unique_id (required) - BricksApiAdapter resolves pre-signed URL via list_project_documents internally - URLs are always fresh (no expiry issues), LLM just passes stable IDs - Extract filename from URL path when Content-Disposition is missing - Update all tests (432 passing) --- .env.example | 1 - README.md | 8 +- src/application/api/mcp_bricks_tools.py | 16 +- src/application/api/mcp_classical_tools.py | 8 +- src/application/api/mcp_query_tools.py | 10 +- .../responses/classical_query_response.py | 6 +- src/application/responses/query_response.py | 5 +- .../use_cases/classical_query_use_case.py | 2 +- .../publish_section_version_use_case.py | 3 - .../read_bricks_document_use_case.py | 10 +- src/config.py | 3 - src/dependencies.py | 1 - src/domain/ports/bricks_api_port.py | 7 +- .../bricks/bricks_api_adapter.py | 161 +++-- .../document_reader/kreuzberg_adapter.py | 4 +- .../rag/kreuzberg_raganything_parser.py | 1 - src/main.py | 6 +- .../infrastructure/test_bricks_api_adapter.py | 550 +++++++++--------- tests/unit/test_bricks_api_port.py | 8 +- .../unit/test_kreuzberg_raganything_parser.py | 24 +- tests/unit/test_mcp_bricks_tools.py | 10 +- tests/unit/test_mcp_classical_tools.py | 1 + tests/unit/test_mcp_query_tools.py | 5 +- .../test_publish_section_version_use_case.py | 22 +- .../test_read_bricks_document_use_case.py | 24 +- 25 files changed, 480 insertions(+), 416 deletions(-) diff --git a/.env.example b/.env.example index 8723e1c..d51ccf4 100644 --- a/.env.example +++ b/.env.example @@ -62,5 +62,4 @@ MINIO_SECURE=false BRICKS_API_BASE_URL=https://analyse.bricks.co BRICKS_API_KEY= BRICKS_BEARER_TOKEN= -BRICKS_PUBLISH_TARGET_URL=https://project-analysis-preprod.up.railway.app/api/section-versions BRICKS_PUBLISH_DRY_RUN=true diff --git a/README.md b/README.md index 1a07847..17a4a56 100644 --- a/README.md +++ b/README.md @@ -679,8 +679,7 @@ Publishes a structured section version back to the Bricks platform. When `BRICKS "workflow_id": "agent-haiku-files-v1", "workflow_name": "haiku-files", "workflow_metadata": {} - }, - "target_url": "https://project-analysis-preprod.up.railway.app/api/section-versions" + } } ``` @@ -818,9 +817,8 @@ The classical RAG adapters share the same `OPEN_ROUTER_API_KEY`, `OPEN_ROUTER_AP | Variable | Default | Description | |----------|---------|-------------| | `BRICKS_API_BASE_URL` | `https://analyse.bricks.co` | Bricks platform base URL | -| `BRICKS_API_KEY` | -- | X-API-Key for Bricks API authentication | -| `BRICKS_BEARER_TOKEN` | -- | Bearer token for Bricks API (document listing) | -| `BRICKS_PUBLISH_TARGET_URL` | -- | Target URL for publishing section versions | +| `BRICKS_API_KEY` | -- | X-API-Key for Bricks API authentication (publish) | +| `BRICKS_BEARER_TOKEN` | -- | Bearer token for Bricks API authentication (list documents) | | `BRICKS_PUBLISH_DRY_RUN` | `true` | When `true`, `publish_section_version` returns a payload preview without making an API call | ## Query Modes diff --git a/src/application/api/mcp_bricks_tools.py b/src/application/api/mcp_bricks_tools.py index c1cb7d7..34adf6e 100644 --- a/src/application/api/mcp_bricks_tools.py +++ b/src/application/api/mcp_bricks_tools.py @@ -36,20 +36,26 @@ async def list_bricks_documents(project_unique_id: str) -> list: @mcp_bricks.tool() -async def read_bricks_document(file_url: str) -> FileContentResponse: +async def read_bricks_document( + document_id: str, + project_unique_id: str, +) -> FileContentResponse: """Télécharge un document Bricks et extrait son contenu textuel. + Résoud automatiquement l'URL pré-signée à partir du document_id et project_unique_id. + Args: - file_url: L'URL de téléchargement du document Bricks + document_id: L'identifiant du document (champ 'id' de list_bricks_documents) + project_unique_id: L'identifiant du projet Bricks Returns: Contenu extrait avec métadonnées et tables détectées """ use_case = get_read_bricks_document_use_case() try: - result = await use_case.execute(file_url=file_url) + result = await use_case.execute(document_id=document_id, project_id=project_unique_id) except Exception: - logger.exception("Failed to read bricks document: %s", file_url) + logger.exception("Failed to read bricks document: %s in project %s", document_id, project_unique_id) raise ToolError("Failed to read bricks document") from None return FileContentResponse( content=result.content, @@ -64,7 +70,7 @@ async def publish_section_version( section_key: str, content: dict, workflow_id: str = "agent-haiku-files-v1", - workflow_name: str = "haiku-files", + workflow_name: str = "agent-haiku-files-v1", workflow_metadata: dict | None = None, ) -> dict: """Publie la réponse structurée d'une section d'un projet Bricks. diff --git a/src/application/api/mcp_classical_tools.py b/src/application/api/mcp_classical_tools.py index 24e90f1..bf67950 100644 --- a/src/application/api/mcp_classical_tools.py +++ b/src/application/api/mcp_classical_tools.py @@ -2,10 +2,11 @@ from fastmcp import FastMCP -from application.responses.classical_query_response import ClassicalRagResponse, McpClassicalRagResponse +from application.responses.classical_query_response import ( + ClassicalRagResponse, + McpClassicalRagResponse, +) from dependencies import ( - get_classical_index_file_use_case, - get_classical_index_folder_use_case, get_classical_query_use_case, ) @@ -52,4 +53,3 @@ async def classical_query( ClassicalRagResponse(content=chunk.content, file_path=chunk.file_path) ) return classical_response - diff --git a/src/application/api/mcp_query_tools.py b/src/application/api/mcp_query_tools.py index dd25f96..e5d5141 100644 --- a/src/application/api/mcp_query_tools.py +++ b/src/application/api/mcp_query_tools.py @@ -6,7 +6,11 @@ from fastmcp import FastMCP from application.requests.query_request import MultimodalContentItem -from application.responses.query_response import McpRagResponse, QueryResponse, RagResponse +from application.responses.query_response import ( + McpRagResponse, + QueryResponse, + RagResponse, +) from dependencies import ( get_multimodal_query_use_case, get_query_use_case, @@ -45,7 +49,9 @@ async def query_knowledge_base( chunks = response.data.chunks mcp_response = McpRagResponse(rag_response=[]) for chunk in chunks: - mcp_response.rag_response.append(RagResponse(content=chunk.content, file_path=chunk.file_path)) + mcp_response.rag_response.append( + RagResponse(content=chunk.content, file_path=chunk.file_path) + ) return mcp_response diff --git a/src/application/responses/classical_query_response.py b/src/application/responses/classical_query_response.py index 88df5b0..9a9d36b 100644 --- a/src/application/responses/classical_query_response.py +++ b/src/application/responses/classical_query_response.py @@ -36,13 +36,15 @@ class ClassicalQueryResponse(BaseModel): class ClassicalRagResponse(BaseModel): content: str = Field( - default="", description="Textual content retrieved from the knowledge base" + default="", description="Textual content retrieved from the knowledge base" ) file_path: str = Field( default="", description="Source file path of the retrieved content" ) + class McpClassicalRagResponse(BaseModel): rag_response: list[ClassicalRagResponse] = Field( - default_factory=list, description="List of retrieved content for MCP integration" + default_factory=list, + description="List of retrieved content for MCP integration", ) diff --git a/src/application/responses/query_response.py b/src/application/responses/query_response.py index 96bb47a..dc29843 100644 --- a/src/application/responses/query_response.py +++ b/src/application/responses/query_response.py @@ -74,11 +74,14 @@ class MultimodalQueryResponse(BaseModel): default="", description="Réponse textuelle de l'analyse multimodale" ) + class RagResponse(BaseModel): content: str file_path: str + class McpRagResponse(BaseModel): rag_response: list[RagResponse] = Field( - default_factory=list, description="List of retrieved content for MCP integration" + default_factory=list, + description="List of retrieved content for MCP integration", ) diff --git a/src/application/use_cases/classical_query_use_case.py b/src/application/use_cases/classical_query_use_case.py index fa7ddc0..f1d0706 100644 --- a/src/application/use_cases/classical_query_use_case.py +++ b/src/application/use_cases/classical_query_use_case.py @@ -79,7 +79,7 @@ async def execute( mode: Literal["vector", "hybrid"] = "vector", ) -> ClassicalQueryResponse: working_dir = working_dir if working_dir.endswith("/") else f"{working_dir}/" - + if num_variations is None: num_variations = self.config.CLASSICAL_NUM_QUERY_VARIATIONS if relevance_threshold is None: diff --git a/src/application/use_cases/publish_section_version_use_case.py b/src/application/use_cases/publish_section_version_use_case.py index 771c452..fb6cfb1 100644 --- a/src/application/use_cases/publish_section_version_use_case.py +++ b/src/application/use_cases/publish_section_version_use_case.py @@ -6,11 +6,9 @@ def __init__( self, bricks_api: BricksApiPort, dry_run: bool, - target_url: str, ) -> None: self.bricks_api = bricks_api self.dry_run = dry_run - self.target_url = target_url async def execute( self, @@ -38,7 +36,6 @@ async def execute( message="DRY RUN — no API call made", dry_run=True, payload_preview=payload, - target_url=self.target_url, ) return await self.bricks_api.publish_section_version(payload=payload) diff --git a/src/application/use_cases/read_bricks_document_use_case.py b/src/application/use_cases/read_bricks_document_use_case.py index 895d04e..7b46be5 100644 --- a/src/application/use_cases/read_bricks_document_use_case.py +++ b/src/application/use_cases/read_bricks_document_use_case.py @@ -19,8 +19,14 @@ def __init__( self.document_reader = document_reader self.output_dir = output_dir - async def execute(self, file_url: str) -> DocumentContent: - data, filename = await self.bricks_api.download_document(download_url=file_url) + async def execute( + self, + document_id: str, + project_id: str, + ) -> DocumentContent: + data, filename = await self.bricks_api.download_document( + document_id=document_id, project_id=project_id + ) os.makedirs(self.output_dir, exist_ok=True) suffix = os.path.splitext(filename)[1] or ".bin" diff --git a/src/config.py b/src/config.py index c203116..81b883d 100644 --- a/src/config.py +++ b/src/config.py @@ -161,9 +161,6 @@ class BricksConfig(BaseSettings): BRICKS_API_BASE_URL: str = Field(default="https://analyse.bricks.co") BRICKS_API_KEY: str = Field(default="") BRICKS_BEARER_TOKEN: str = Field(default="") - BRICKS_PUBLISH_TARGET_URL: str = Field( - default="https://project-analysis-preprod.up.railway.app/api/section-versions" - ) BRICKS_PUBLISH_DRY_RUN: bool = Field(default=True) diff --git a/src/dependencies.py b/src/dependencies.py index 70846ec..7fec6c7 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -232,5 +232,4 @@ def get_publish_section_version_use_case() -> PublishSectionVersionUseCase: return PublishSectionVersionUseCase( bricks_api=bricks_api_adapter, dry_run=bricks_config.BRICKS_PUBLISH_DRY_RUN, - target_url=bricks_config.BRICKS_PUBLISH_TARGET_URL, ) diff --git a/src/domain/ports/bricks_api_port.py b/src/domain/ports/bricks_api_port.py index facdabf..3a363d0 100644 --- a/src/domain/ports/bricks_api_port.py +++ b/src/domain/ports/bricks_api_port.py @@ -37,7 +37,6 @@ class SectionVersionResult(BaseModel): data: dict | None = None dry_run: bool = False payload_preview: dict | None = None - target_url: str | None = None class BricksApiPort(ABC): @@ -46,7 +45,11 @@ async def list_project_documents(self, project_id: str) -> list[BricksDocumentIn pass @abstractmethod - async def download_document(self, download_url: str) -> tuple[bytes, str]: + async def download_document( + self, + document_id: str, + project_id: str, + ) -> tuple[bytes, str]: pass @abstractmethod diff --git a/src/infrastructure/bricks/bricks_api_adapter.py b/src/infrastructure/bricks/bricks_api_adapter.py index b158b59..7adc413 100644 --- a/src/infrastructure/bricks/bricks_api_adapter.py +++ b/src/infrastructure/bricks/bricks_api_adapter.py @@ -1,7 +1,10 @@ +import asyncio +import json import logging import re - -import httpx +import urllib.error +import urllib.parse +import urllib.request from domain.ports.bricks_api_port import ( BricksApiPort, @@ -11,92 +14,122 @@ logger = logging.getLogger(__name__) -_DEFAULT_TIMEOUT = httpx.Timeout(30.0) +_DEFAULT_TIMEOUT = 30 class BricksApiAdapter(BricksApiPort): def __init__(self, config) -> None: - self._base_url = config.BRICKS_API_BASE_URL + self._base_url = config.BRICKS_API_BASE_URL.rstrip("/") self._api_key = config.BRICKS_API_KEY self._bearer_token = config.BRICKS_BEARER_TOKEN - self._publish_target_url = config.BRICKS_PUBLISH_TARGET_URL - self._client = httpx.AsyncClient( - base_url=self._base_url, timeout=_DEFAULT_TIMEOUT - ) async def close(self) -> None: - await self._client.aclose() + pass + + def _get(self, url: str, headers: dict | None = None) -> tuple[bytes, dict]: + req = urllib.request.Request(url, headers=headers or {}) + with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: + return resp.read(), dict(resp.headers) + + def _post(self, url: str, payload: dict, headers: dict) -> bytes: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: + return resp.read() async def list_project_documents(self, project_id: str) -> list[BricksDocumentInfo]: - url = f"api/projects/{project_id}/documents/ai" + url = f"{self._base_url}/api/projects/{project_id}/documents/ai" try: - response = await self._client.get( - url, - headers={"Authorization": f"Bearer {self._bearer_token}"}, + body, _ = await asyncio.to_thread( + self._get, url, {"Authorization": f"Bearer {self._bearer_token}"} ) - response.raise_for_status() - except httpx.TimeoutException as e: - raise TimeoutError(f"Bricks API request timed out: {e}") from e - except httpx.HTTPStatusError as e: - if e.response.status_code in (401, 403): + except urllib.error.HTTPError as e: + if e.code in (401, 403): raise PermissionError( - f"Bricks API authentication failed (HTTP {e.response.status_code})" + f"Bricks API authentication failed (HTTP {e.code})" ) from e - if e.response.status_code == 404: + if e.code == 404: raise FileNotFoundError( f"Bricks project not found: {project_id}" ) from e - raise RuntimeError( - f"Bricks API error (HTTP {e.response.status_code})" - ) from e - except httpx.RequestError as e: - raise ConnectionError(f"Bricks API connection failed: {e}") from e - items = response.json().get("items", []) - return [BricksDocumentInfo(**item) for item in items] + raise RuntimeError(f"Bricks API error (HTTP {e.code})") from e + except urllib.error.URLError as e: + raise ConnectionError(f"Bricks API connection failed: {e.reason}") from e + except TimeoutError as e: + raise TimeoutError(f"Bricks API request timed out: {e}") from e + items = json.loads(body).get("items", []) + documents = [BricksDocumentInfo(**item) for item in items] + return documents - async def download_document(self, download_url: str) -> tuple[bytes, str]: + async def download_document( + self, + document_id: str, + project_id: str, + ) -> tuple[bytes, str]: + documents = await self.list_project_documents(project_id) + url = None + for doc in documents: + if doc.id == document_id and doc.url: + url = doc.url + break + if not url: + raise FileNotFoundError( + f"Document {document_id} not found in project {project_id}" + ) try: - response = await self._client.get(download_url) - response.raise_for_status() - except httpx.TimeoutException as e: - raise TimeoutError(f"Document download timed out: {e}") from e - except httpx.HTTPStatusError as e: + body, resp_headers = await asyncio.to_thread(self._get, url) + except urllib.error.HTTPError as e: + if e.code in (401, 403): + raise PermissionError( + f"Document download authentication failed (HTTP {e.code})" + ) from e + if e.code == 404: + raise FileNotFoundError( + f"Document {document_id} not found (project {project_id})" + ) from e raise RuntimeError( - f"Failed to download document (HTTP {e.response.status_code})" + f"Failed to download document (HTTP {e.code})" ) from e - except httpx.RequestError as e: - raise ConnectionError(f"Document download connection failed: {e}") from e - content_disposition = response.headers.get("content-disposition", "") - filename = "document.bin" - match = re.search(r'filename="([^"]+)"', content_disposition) - if match: - filename = match.group(1) - else: - match = re.search(r"filename=([^\s;]+)", content_disposition) - if match: - filename = match.group(1) - return response.content, filename + except urllib.error.URLError as e: + raise ConnectionError(f"Document download connection failed: {e.reason}") from e + except TimeoutError as e: + raise TimeoutError(f"Document download timed out: {e}") from e + + filename = _extract_filename(resp_headers.get("Content-Disposition", ""), url) + return body, filename async def publish_section_version(self, payload: dict) -> SectionVersionResult: + url = f"{self._base_url}/api/section-versions" + headers = { + "X-API-Key": self._api_key, + "Content-Type": "application/json", + } try: - response = await self._client.post( - self._publish_target_url, - headers={ - "X-API-Key": self._api_key, - "Content-Type": "application/json", - }, - json=payload, - ) - response.raise_for_status() - except httpx.TimeoutException as e: - raise TimeoutError(f"Publish request timed out: {e}") from e - except httpx.HTTPStatusError as e: - if e.response.status_code in (401, 403): + body = await asyncio.to_thread(self._post, url, payload, headers) + except urllib.error.HTTPError as e: + if e.code in (401, 403): raise PermissionError( - f"Publish authentication failed (HTTP {e.response.status_code})" + f"Publish authentication failed (HTTP {e.code})" ) from e - raise RuntimeError(f"Publish failed (HTTP {e.response.status_code})") from e - except httpx.RequestError as e: - raise ConnectionError(f"Publish connection failed: {e}") from e - data = response.json() + raise RuntimeError(f"Publish failed (HTTP {e.code})") from e + except urllib.error.URLError as e: + raise ConnectionError(f"Publish connection failed: {e.reason}") from e + except TimeoutError as e: + raise TimeoutError(f"Publish request timed out: {e}") from e + data = json.loads(body) return SectionVersionResult(success=True, message="Published", data=data) + + +def _extract_filename(content_disposition: str, url: str = "") -> str: + match = re.search(r'filename="([^"]+)"', content_disposition) + if match: + return match.group(1) + match = re.search(r"filename=([^\s;]+)", content_disposition) + if match: + return match.group(1) + if url: + decoded_path = urllib.parse.unquote(urllib.parse.urlparse(url).path) + path_filename = decoded_path.rsplit("/", 1)[-1] + if path_filename and "." in path_filename: + return path_filename + return "document.bin" \ No newline at end of file diff --git a/src/infrastructure/document_reader/kreuzberg_adapter.py b/src/infrastructure/document_reader/kreuzberg_adapter.py index b22de48..0264003 100644 --- a/src/infrastructure/document_reader/kreuzberg_adapter.py +++ b/src/infrastructure/document_reader/kreuzberg_adapter.py @@ -5,7 +5,6 @@ from kreuzberg import ( ChunkingConfig, ExtractionConfig, - LlmConfig as KreuzbergLlmConfig, OcrConfig, OutputFormat, ParsingError, @@ -13,6 +12,9 @@ ValidationError, extract_file, ) +from kreuzberg import ( + LlmConfig as KreuzbergLlmConfig, +) from config import LLMConfig from domain.ports.document_reader_port import ( diff --git a/src/infrastructure/rag/kreuzberg_raganything_parser.py b/src/infrastructure/rag/kreuzberg_raganything_parser.py index c9db6c8..a5976a9 100644 --- a/src/infrastructure/rag/kreuzberg_raganything_parser.py +++ b/src/infrastructure/rag/kreuzberg_raganything_parser.py @@ -30,7 +30,6 @@ def __init__(self) -> None: super().__init__() self._config = make_extraction_config() - def check_installation(self) -> bool: return importlib.util.find_spec("kreuzberg") is not None diff --git a/src/main.py b/src/main.py index ceb9b0f..750007b 100644 --- a/src/main.py +++ b/src/main.py @@ -73,7 +73,7 @@ def _run_alembic_upgrade() -> None: @asynccontextmanager async def db_lifespan(_app: FastAPI): - """Closes BM25 connection pool and Bricks HTTP client on shutdown.""" + """Closes BM25 connection pool on shutdown.""" yield logger.info("Application shutdown initiated") @@ -87,10 +87,6 @@ async def db_lifespan(_app: FastAPI): await classical_vector_store.close() except Exception: logger.exception("Failed to close classical vector store") - try: - await bricks_api_adapter.close() - except Exception: - logger.exception("Failed to close Bricks API adapter") logger.info("Application shutdown complete") diff --git a/tests/unit/infrastructure/test_bricks_api_adapter.py b/tests/unit/infrastructure/test_bricks_api_adapter.py index 6286ec0..539c1df 100644 --- a/tests/unit/infrastructure/test_bricks_api_adapter.py +++ b/tests/unit/infrastructure/test_bricks_api_adapter.py @@ -1,16 +1,22 @@ -"""Tests for BricksApiAdapter — the httpx-based implementation of BricksApiPort. +"""Tests for BricksApiAdapter — the urllib-based implementation of BricksApiPort. -The Bricks API is an external dependency, so we mock httpx responses -while testing our adapter logic for parsing, headers, and error handling. +The Bricks API is an external dependency protected by Cloudflare, +so we mock urllib.request.urlopen responses while testing our adapter +logic for parsing, headers, and error handling. """ +import json +from io import BytesIO from unittest.mock import AsyncMock, MagicMock, patch -import httpx import pytest +import urllib.error from domain.ports.bricks_api_port import BricksDocumentInfo, SectionVersionResult -from infrastructure.bricks.bricks_api_adapter import BricksApiAdapter +from infrastructure.bricks.bricks_api_adapter import ( + BricksApiAdapter, + _extract_filename, +) @pytest.fixture @@ -21,65 +27,44 @@ def bricks_config() -> MagicMock: config.BRICKS_API_KEY = "test-api-key-12345" config.BRICKS_BEARER_TOKEN = "test-bearer-token" config.BRICKS_PUBLISH_DRY_RUN = True - config.BRICKS_PUBLISH_TARGET_URL = ( - "https://api.bricks.example.com/api/section-versions" - ) return config @pytest.fixture -def mock_httpx_client() -> AsyncMock: - """Provide a mocked httpx AsyncClient.""" - return AsyncMock(spec=httpx.AsyncClient) +def adapter(bricks_config: MagicMock) -> BricksApiAdapter: + """Provide a BricksApiAdapter with mock config.""" + return BricksApiAdapter(config=bricks_config) -@pytest.fixture -def adapter(bricks_config: MagicMock, mock_httpx_client: AsyncMock) -> BricksApiAdapter: - """Provide a BricksApiAdapter with mocked httpx client.""" - with patch( - "infrastructure.bricks.bricks_api_adapter.httpx.AsyncClient", - return_value=mock_httpx_client, - ): - adapter = BricksApiAdapter(config=bricks_config) - adapter._client = mock_httpx_client # type: ignore[attr-defined] - return adapter +def _mock_urlopen_response(body: bytes, headers: dict | None = None, status: int = 200) -> MagicMock: + """Create a mock object that behaves like urllib.urlopen response.""" + mock_resp = MagicMock() + mock_resp.read.return_value = body + mock_resp.headers = headers or {} + mock_resp.__enter__ = MagicMock(return_value=mock_resp) + mock_resp.__exit__ = MagicMock(return_value=False) + return mock_resp class TestListProjectDocuments: """Tests for BricksApiAdapter.list_project_documents.""" - async def test_calls_api_with_bearer_token( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_calls_api_with_bearer_token(self, adapter: BricksApiAdapter) -> None: """Should call GET /api/projects/{id}/documents/ai with Bearer token.""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = {"items": []} - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response - - await adapter.list_project_documents(project_id="proj-123") - - mock_httpx_client.get.assert_called_once() - call_args = mock_httpx_client.get.call_args - assert "api/projects/proj-123/documents/ai" in call_args[0][ - 0 - ] or "proj-123/documents/ai" in str(call_args) - headers = ( - call_args[1].get("headers", {}) or call_args[0][0] - if len(call_args[0]) > 1 - else call_args[1].get("headers", {}) - ) - assert "Authorization" in headers - assert headers["Authorization"] == "Bearer test-bearer-token" - - async def test_returns_list_of_bricks_document_info( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + body = json.dumps({"items": []}).encode() + mock_resp = _mock_urlopen_response(body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen: + await adapter.list_project_documents(project_id="proj-123") + + mock_urlopen.assert_called_once() + req = mock_urlopen.call_args[0][0] + assert "api/projects/proj-123/documents/ai" in req.full_url + assert req.get_header("Authorization") == "Bearer test-bearer-token" + + async def test_returns_list_of_bricks_document_info(self, adapter: BricksApiAdapter) -> None: """Should parse JSON response and return list of BricksDocumentInfo.""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = { + api_response = { "items": [ { "id": "doc-1", @@ -113,10 +98,11 @@ async def test_returns_list_of_bricks_document_info( }, ] } - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) - result = await adapter.list_project_documents(project_id="proj-123") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.list_project_documents(project_id="proj-123") assert len(result) == 2 assert isinstance(result[0], BricksDocumentInfo) @@ -126,13 +112,9 @@ async def test_returns_list_of_bricks_document_info( assert result[1].file_name == "notes.docx" assert result[1].is_ignored is True - async def test_camel_case_to_snake_case_mapping( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_camel_case_to_snake_case_mapping(self, adapter: BricksApiAdapter) -> None: """Should correctly map camelCase API fields to snake_case model fields.""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = { + api_response = { "items": [ { "id": "doc-uuid", @@ -148,10 +130,11 @@ async def test_camel_case_to_snake_case_mapping( } ] } - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) - result = await adapter.list_project_documents(project_id="proj-123") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.list_project_documents(project_id="proj-123") doc = result[0] assert doc.image_analysis_status == "relevant" @@ -162,185 +145,228 @@ async def test_camel_case_to_snake_case_mapping( assert doc.category_source == "llm" assert doc.category_classified_at == "2025-12-05T08:00:00.000Z" - async def test_returns_empty_list_when_no_documents( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_returns_empty_list_when_no_documents(self, adapter: BricksApiAdapter) -> None: """Should return empty list when API returns empty items.""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.json.return_value = {"items": []} - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response + body = json.dumps({"items": []}).encode() + mock_resp = _mock_urlopen_response(body) - result = await adapter.list_project_documents(project_id="proj-empty") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.list_project_documents(project_id="proj-empty") assert result == [] - async def test_raises_on_401_unauthorized( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """Should raise proper exception on 401 Unauthorized.""" - mock_response = MagicMock() - mock_response.status_code = 401 - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - message="Unauthorized", - request=MagicMock(), - response=mock_response, - ) - mock_httpx_client.get.return_value = mock_response - - with pytest.raises(PermissionError): - await adapter.list_project_documents(project_id="proj-123") + async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 401 Unauthorized.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/projects/proj-123/documents/ai", + code=401, + msg="Unauthorized", + hdrs={}, + fp=None, + ) + + with pytest.raises(PermissionError): + await adapter.list_project_documents(project_id="proj-123") - async def test_raises_on_403_forbidden( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_raises_on_403_forbidden(self, adapter: BricksApiAdapter) -> None: """Should raise PermissionError on 403 Forbidden.""" - mock_response = MagicMock() - mock_response.status_code = 403 - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - message="Forbidden", - request=MagicMock(), - response=mock_response, - ) - mock_httpx_client.get.return_value = mock_response - - with pytest.raises(PermissionError): - await adapter.list_project_documents(project_id="proj-123") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/projects/proj-123/documents/ai", + code=403, + msg="Forbidden", + hdrs={}, + fp=None, + ) - async def test_raises_on_404_not_found( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + with pytest.raises(PermissionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_404_not_found(self, adapter: BricksApiAdapter) -> None: """Should raise FileNotFoundError on 404 Not Found.""" - mock_response = MagicMock() - mock_response.status_code = 404 - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - message="Not Found", - request=MagicMock(), - response=mock_response, - ) - mock_httpx_client.get.return_value = mock_response - - with pytest.raises(FileNotFoundError): - await adapter.list_project_documents(project_id="nonexistent") - - async def test_raises_on_timeout( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """Should raise TimeoutError on request timeout.""" - mock_httpx_client.get.side_effect = httpx.TimeoutException("Request timed out") - - with pytest.raises(TimeoutError): - await adapter.list_project_documents(project_id="proj-123") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/projects/nonexistent/documents/ai", + code=404, + msg="Not Found", + hdrs={}, + fp=None, + ) + + with pytest.raises(FileNotFoundError): + await adapter.list_project_documents(project_id="nonexistent") + + async def test_raises_on_connection_error(self, adapter: BricksApiAdapter) -> None: + """Should raise ConnectionError on URLError.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.URLError(reason="Connection refused") + + with pytest.raises(ConnectionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_timeout(self, adapter: BricksApiAdapter) -> None: + """Should raise TimeoutError on socket timeout.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = TimeoutError("Request timed out") + + with pytest.raises(TimeoutError): + await adapter.list_project_documents(project_id="proj-123") class TestDownloadDocument: """Tests for BricksApiAdapter.download_document.""" - async def test_downloads_from_s3_url( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """Should download file content from S3 presigned URL (no auth header).""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.content = b"PDF binary content" - mock_response.headers = { - "content-disposition": 'attachment; filename="report.pdf"' - } - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response - - content, filename = await adapter.download_document( - download_url="https://s3.example.com/presigned-url" - ) - - assert content == b"PDF binary content" - assert filename == "report.pdf" - - async def test_extracts_filename_from_content_disposition( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """Should extract filename from Content-Disposition header.""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.content = b"data" - mock_response.headers = { - "content-disposition": 'attachment; filename="my document.docx"' - } - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response - - _, filename = await adapter.download_document( - download_url="https://s3.example.com/doc" - ) - - assert filename == "my document.docx" - - async def test_no_auth_header_on_s3_download( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """S3 presigned URLs should not include authentication headers.""" - mock_response = MagicMock() - mock_response.status_code = 200 - mock_response.content = b"data" - mock_response.headers = {} - mock_response.raise_for_status = MagicMock() - mock_httpx_client.get.return_value = mock_response - - await adapter.download_document( - download_url="https://s3.example.com/presigned-url" - ) - - call_args = mock_httpx_client.get.call_args - headers = call_args[1].get("headers", {}) - assert "Authorization" not in headers - assert "X-API-Key" not in headers - - async def test_raises_on_download_failure( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """Should raise exception on download failure.""" - mock_response = MagicMock() - mock_response.status_code = 403 - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - message="Forbidden", - request=MagicMock(), - response=mock_response, - ) - mock_httpx_client.get.return_value = mock_response - - with pytest.raises(RuntimeError): - await adapter.download_document( - download_url="https://s3.example.com/expired-url" - ) - - async def test_raises_on_timeout( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_resolves_document_id_and_downloads_presigned_url(self, adapter: BricksApiAdapter) -> None: + """Should resolve document_id via list_project_documents and download pre-signed URL.""" + presigned_url = "https://s3.example.com/projects/proj-1/doc.pdf?X-Amz-Signature=abc123" + doc_list_body = json.dumps({ + "items": [{ + "id": "doc-1", + "fileName": "doc.pdf", + "url": presigned_url, + "mimeType": "application/pdf", + "size": 100, + "status": "PROCESSED", + }] + }).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + file_body = b"PDF content" + file_resp = _mock_urlopen_response(file_body, headers={"Content-Disposition": 'attachment; filename="doc.pdf"'}) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", side_effect=[list_resp, file_resp]) as mock_urlopen: + content, filename = await adapter.download_document(document_id="doc-1", project_id="proj-1") + + assert mock_urlopen.call_count == 2 + list_req = mock_urlopen.call_args_list[0][0][0] + assert "api/projects/proj-1/documents/ai" in list_req.full_url + assert content == b"PDF content" + assert filename == "doc.pdf" + + async def test_extracts_filename_from_url_when_no_content_disposition(self, adapter: BricksApiAdapter) -> None: + """Should extract filename from URL path when no Content-Disposition header.""" + presigned_url = "https://s3.example.com/projects/proj-1/Dossier_Financement.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({ + "items": [{ + "id": "doc-2", + "fileName": "Dossier_Financement.pdf", + "url": presigned_url, + "mimeType": "application/pdf", + "size": 200, + "status": "PROCESSED", + }] + }).encode() + list_resp = _mock_urlopen_response(doc_list_body) + file_resp = _mock_urlopen_response(b"data", headers={}) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", side_effect=[list_resp, file_resp]): + _, filename = await adapter.download_document(document_id="doc-2", project_id="proj-1") + + assert filename == "Dossier_Financement.pdf" + + async def test_defaults_to_document_bin_when_no_filename(self, adapter: BricksApiAdapter) -> None: + """Should return 'document.bin' when no filename in URL or header.""" + doc_list_body = json.dumps({ + "items": [{ + "id": "doc-3", + "fileName": "noext", + "url": "https://s3.example.com/projects/proj-1/noext?X-Amz-Signature=abc", + "mimeType": "application/octet-stream", + "size": 50, + "status": "PROCESSED", + }] + }).encode() + list_resp = _mock_urlopen_response(doc_list_body) + file_resp = _mock_urlopen_response(b"data", headers={}) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", side_effect=[list_resp, file_resp]): + _, filename = await adapter.download_document(document_id="doc-3", project_id="proj-1") + + assert filename == "document.bin" + + async def test_raises_when_document_id_not_found(self, adapter: BricksApiAdapter) -> None: + """Should raise FileNotFoundError when document_id not found in project.""" + doc_list_body = json.dumps({"items": []}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=list_resp): + with pytest.raises(FileNotFoundError, match="Document missing-id not found"): + await adapter.download_document(document_id="missing-id", project_id="proj-1") + + async def test_raises_permission_error_on_403(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 403 Forbidden when downloading.""" + presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = [list_resp, urllib.error.HTTPError( + url="https://s3.example.com/doc.pdf", + code=403, + msg="Forbidden", + hdrs={}, + fp=None, + )] + + with pytest.raises(PermissionError): + await adapter.download_document(document_id="doc-1", project_id="proj-1") + + async def test_raises_connection_error(self, adapter: BricksApiAdapter) -> None: + """Should raise ConnectionError on URLError.""" + presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = [list_resp, urllib.error.URLError(reason="Connection refused")] + + with pytest.raises(ConnectionError): + await adapter.download_document(document_id="doc-1", project_id="proj-1") + + async def test_raises_timeout(self, adapter: BricksApiAdapter) -> None: """Should raise TimeoutError on download timeout.""" - mock_httpx_client.get.side_effect = httpx.TimeoutException("Download timed out") + presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() + list_resp = _mock_urlopen_response(doc_list_body) - with pytest.raises(TimeoutError): - await adapter.download_document( - download_url="https://s3.example.com/slow-url" - ) + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = [list_resp, TimeoutError("Download timed out")] + + with pytest.raises(TimeoutError): + await adapter.download_document(document_id="doc-1", project_id="proj-1") + + +class TestExtractFilename: + """Tests for _extract_filename helper.""" + + def test_extracts_from_content_disposition_quoted(self) -> None: + assert _extract_filename('attachment; filename="report.pdf"') == "report.pdf" + + def test_extracts_from_content_disposition_unquoted(self) -> None: + assert _extract_filename("attachment; filename=report.pdf") == "report.pdf" + + def test_extracts_from_url_path(self) -> None: + assert _extract_filename("", "https://s3.example.com/path/to/report.pdf?X-Amz-Sig=abc") == "report.pdf" + + def test_extracts_decoded_filename_from_url(self) -> None: + assert _extract_filename("", "https://s3.example.com/Dossier_de_Financement.pdf?X-Amz-Sig=abc") == "Dossier_de_Financement.pdf" + + def test_defaults_to_document_bin(self) -> None: + assert _extract_filename("", "") == "document.bin" + + def test_url_without_extension_falls_back(self) -> None: + assert _extract_filename("", "https://s3.example.com/path/noext") == "document.bin" class TestPublishSectionVersion: """Tests for BricksApiAdapter.publish_section_version.""" - async def test_calls_post_with_x_api_key_header( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_calls_post_with_x_api_key_header(self, adapter: BricksApiAdapter) -> None: """Should call POST /api/section-versions with X-API-Key header.""" - mock_response = MagicMock() - mock_response.status_code = 201 - mock_response.json.return_value = { - "id": "sv-1", - "sectionKey": "intro", - } - mock_response.raise_for_status = MagicMock() - mock_httpx_client.post.return_value = mock_response + api_response = {"id": "sv-1", "sectionKey": "intro"} + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) payload = { "project_unique_id": "proj-123", @@ -351,55 +377,55 @@ async def test_calls_post_with_x_api_key_header( "workflow_metadata": {}, } - await adapter.publish_section_version(payload=payload) + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen: + await adapter.publish_section_version(payload=payload) - mock_httpx_client.post.assert_called_once() - call_args = mock_httpx_client.post.call_args - headers = call_args[1].get("headers", {}) - assert "X-API-Key" in headers - assert headers["X-API-Key"] == "test-api-key-12345" + mock_urlopen.assert_called_once() + req = mock_urlopen.call_args[0][0] + assert req.get_header("X-api-key") == "test-api-key-12345" or req.get_header("X-API-Key") == "test-api-key-12345" + assert req.method == "POST" + assert "api/section-versions" in req.full_url - async def test_returns_section_version_result( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_returns_section_version_result(self, adapter: BricksApiAdapter) -> None: """Should return SectionVersionResult on successful publish.""" - mock_response = MagicMock() - mock_response.status_code = 201 - mock_response.json.return_value = { - "id": "sv-uuid", - "sectionKey": "summary", - } - mock_response.raise_for_status = MagicMock() - mock_httpx_client.post.return_value = mock_response + api_response = {"id": "sv-uuid", "sectionKey": "summary"} + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) - result = await adapter.publish_section_version( - payload={"section_key": "summary", "content": "Summary text"} - ) + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.publish_section_version( + payload={"section_key": "summary", "content": "Summary text"} + ) assert isinstance(result, SectionVersionResult) assert result.success is True - async def test_raises_on_401_unauthorized( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: - """Should raise proper exception on 401 when publishing.""" - mock_response = MagicMock() - mock_response.status_code = 401 - mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( - message="Unauthorized", - request=MagicMock(), - response=mock_response, - ) - mock_httpx_client.post.return_value = mock_response - - with pytest.raises(PermissionError): - await adapter.publish_section_version(payload={"section_key": "intro"}) - - async def test_raises_on_timeout( - self, adapter: BricksApiAdapter, mock_httpx_client: AsyncMock - ) -> None: + async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 401 when publishing.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/section-versions", + code=401, + msg="Unauthorized", + hdrs={}, + fp=None, + ) + + with pytest.raises(PermissionError): + await adapter.publish_section_version(payload={"section_key": "intro"}) + + async def test_raises_on_connection_error(self, adapter: BricksApiAdapter) -> None: + """Should raise ConnectionError on URLError when publishing.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.URLError(reason="Connection refused") + + with pytest.raises(ConnectionError): + await adapter.publish_section_version(payload={"section_key": "intro"}) + + async def test_raises_on_timeout(self, adapter: BricksApiAdapter) -> None: """Should raise TimeoutError on publish timeout.""" - mock_httpx_client.post.side_effect = httpx.TimeoutException("Publish timed out") + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = TimeoutError("Publish timed out") - with pytest.raises(TimeoutError): - await adapter.publish_section_version(payload={"section_key": "intro"}) + with pytest.raises(TimeoutError): + await adapter.publish_section_version(payload={"section_key": "intro"}) \ No newline at end of file diff --git a/tests/unit/test_bricks_api_port.py b/tests/unit/test_bricks_api_port.py index ffdc394..44ba0c3 100644 --- a/tests/unit/test_bricks_api_port.py +++ b/tests/unit/test_bricks_api_port.py @@ -139,7 +139,6 @@ def test_creates_minimal_result(self) -> None: assert result.data is None assert result.dry_run is False assert result.payload_preview is None - assert result.target_url is None def test_creates_with_all_fields(self) -> None: result = SectionVersionResult( @@ -148,7 +147,6 @@ def test_creates_with_all_fields(self) -> None: data={"id": "sv-123"}, dry_run=True, payload_preview={"section_key": "intro", "content": "Hello"}, - target_url="https://api.example.com/section-versions", ) assert result.success is True @@ -156,7 +154,6 @@ def test_creates_with_all_fields(self) -> None: assert result.data == {"id": "sv-123"} assert result.dry_run is True assert result.payload_preview == {"section_key": "intro", "content": "Hello"} - assert result.target_url == "https://api.example.com/section-versions" def test_dry_run_result(self) -> None: result = SectionVersionResult( @@ -164,7 +161,6 @@ def test_dry_run_result(self) -> None: message="Dry run — no API call made", dry_run=True, payload_preview={"key": "value"}, - target_url="https://api.example.com/section-versions", ) assert result.dry_run is True @@ -205,7 +201,9 @@ async def list_project_documents( ) -> list[BricksDocumentInfo]: return [] - async def download_document(self, download_url: str) -> tuple[bytes, str]: + async def download_document( + self, document_id: str, project_id: str + ) -> tuple[bytes, str]: return (b"", "file.pdf") async def publish_section_version( diff --git a/tests/unit/test_kreuzberg_raganything_parser.py b/tests/unit/test_kreuzberg_raganything_parser.py index b1af91a..06db453 100644 --- a/tests/unit/test_kreuzberg_raganything_parser.py +++ b/tests/unit/test_kreuzberg_raganything_parser.py @@ -30,7 +30,9 @@ class TestKreuzbergRAGAnythingParserFormat: def _make_parser(self): return KreuzbergRAGAnythingParser() - def _mock_extraction_result(self, content="Hello world", tables=None, metadata=None): + def _mock_extraction_result( + self, content="Hello world", tables=None, metadata=None + ): result = MagicMock() result.content = content result.metadata = metadata or {} @@ -52,8 +54,12 @@ def _mock_table(self, markdown="| A | B |\n|---|---|", page_number=0): ("parse_image", "/tmp/photo.png", "Extracted image text"), ], ) - def test_parse_returns_content_list(self, mock_extract_sync, method, path, expected_content): - mock_extract_sync.return_value = self._mock_extraction_result(content=expected_content) + def test_parse_returns_content_list( + self, mock_extract_sync, method, path, expected_content + ): + mock_extract_sync.return_value = self._mock_extraction_result( + content=expected_content + ) parser = self._make_parser() result = getattr(parser, method)(path, output_dir="/tmp/output") @@ -87,8 +93,12 @@ def test_parse_pdf_with_tables(self, mock_extract_sync): ("/tmp/photo.png", "Image content"), ], ) - def test_parse_document_routes_files(self, mock_extract_sync, path, expected_content): - mock_extract_sync.return_value = self._mock_extraction_result(content=expected_content) + def test_parse_document_routes_files( + self, mock_extract_sync, path, expected_content + ): + mock_extract_sync.return_value = self._mock_extraction_result( + content=expected_content + ) parser = self._make_parser() result = parser.parse_document(path, method="auto", output_dir="/tmp/output") @@ -98,7 +108,9 @@ def test_parse_document_routes_files(self, mock_extract_sync, path, expected_con @patch("infrastructure.rag.kreuzberg_raganything_parser.extract_file_sync") @pytest.mark.parametrize("content", ["", " \n\n "]) - def test_empty_or_whitespace_content_returns_empty_list(self, mock_extract_sync, content): + def test_empty_or_whitespace_content_returns_empty_list( + self, mock_extract_sync, content + ): mock_extract_sync.return_value = self._mock_extraction_result(content=content) parser = self._make_parser() result = parser.parse_pdf("/tmp/empty.pdf", output_dir="/tmp/output") diff --git a/tests/unit/test_mcp_bricks_tools.py b/tests/unit/test_mcp_bricks_tools.py index 461e23b..80e4c23 100644 --- a/tests/unit/test_mcp_bricks_tools.py +++ b/tests/unit/test_mcp_bricks_tools.py @@ -134,7 +134,7 @@ async def test_returns_file_content_response( return_value=mock_use_case, ): result = await read_bricks_document( - file_url="https://s3.example.com/presigned/report.pdf" + document_id="doc-abc123", project_unique_id="proj-456" ) assert result.content == "Extracted text from bricks document." @@ -153,7 +153,7 @@ async def test_raises_tool_error_for_download_failure(self) -> None: pytest.raises(ToolError, match="Failed to read bricks document"), ): await read_bricks_document( - file_url="https://s3.example.com/expired-url.pdf" + document_id="doc-expired", project_unique_id="proj-1" ) async def test_raises_tool_error_for_generic_failure(self) -> None: @@ -168,7 +168,7 @@ async def test_raises_tool_error_for_generic_failure(self) -> None: ), pytest.raises(ToolError, match="Failed to read bricks document"), ): - await read_bricks_document(file_url="https://s3.example.com/broken.pdf") + await read_bricks_document(document_id="doc-broken", project_unique_id="proj-1") async def test_includes_tables_in_response(self) -> None: """Should include tables in the response.""" @@ -186,7 +186,7 @@ async def test_includes_tables_in_response(self) -> None: return_value=mock_use_case, ): result = await read_bricks_document( - file_url="https://s3.example.com/table.pdf" + document_id="doc-table", project_unique_id="proj-1" ) assert len(result.tables) == 1 @@ -204,7 +204,6 @@ async def test_returns_section_version_result(self) -> None: message="Dry run — no API call made", dry_run=True, payload_preview={"section_key": "intro", "content": "Hello"}, - target_url="https://api.bricks.example.com/api/section-versions", ) with patch( @@ -304,7 +303,6 @@ async def test_returns_dry_run_result_with_preview(self) -> None: "workflow_name": "draft", "workflow_metadata": {}, }, - target_url="https://api.bricks.example.com/api/section-versions", ) with patch( diff --git a/tests/unit/test_mcp_classical_tools.py b/tests/unit/test_mcp_classical_tools.py index ba0c7ef..deb2887 100644 --- a/tests/unit/test_mcp_classical_tools.py +++ b/tests/unit/test_mcp_classical_tools.py @@ -112,6 +112,7 @@ async def test_returns_classical_query_response(self) -> None: ) import application.responses.classical_query_response as cqr + assert isinstance(result, cqr.McpClassicalRagResponse) assert result.rag_response == [] diff --git a/tests/unit/test_mcp_query_tools.py b/tests/unit/test_mcp_query_tools.py index a70319b..4d051f7 100644 --- a/tests/unit/test_mcp_query_tools.py +++ b/tests/unit/test_mcp_query_tools.py @@ -10,7 +10,10 @@ query_knowledge_base_multimodal, ) from application.requests.query_request import MultimodalContentItem -from application.responses.query_response import ChunkResponse, McpRagResponse, RagResponse +from application.responses.query_response import ( + McpRagResponse, + RagResponse, +) class TestMCPQueryInstance: diff --git a/tests/unit/test_publish_section_version_use_case.py b/tests/unit/test_publish_section_version_use_case.py index 33eb82b..363ab3a 100644 --- a/tests/unit/test_publish_section_version_use_case.py +++ b/tests/unit/test_publish_section_version_use_case.py @@ -21,7 +21,6 @@ def use_case_dry_run( return PublishSectionVersionUseCase( bricks_api=mock_bricks_api, dry_run=True, - target_url="https://api.bricks.example.com/api/section-versions", ) @pytest.fixture @@ -30,7 +29,6 @@ def use_case_live(self, mock_bricks_api: AsyncMock) -> PublishSectionVersionUseC return PublishSectionVersionUseCase( bricks_api=mock_bricks_api, dry_run=False, - target_url="https://api.bricks.example.com/api/section-versions", ) async def test_dry_run_does_not_call_api( @@ -75,23 +73,6 @@ async def test_dry_run_returns_payload_preview( assert result.payload_preview["workflow_name"] == "review" assert result.payload_preview["workflow_metadata"] == {"version": 1} - async def test_dry_run_includes_target_url( - self, - use_case_dry_run: PublishSectionVersionUseCase, - ) -> None: - """When dry_run=True, should include target_url in the result.""" - result = await use_case_dry_run.execute( - project_unique_id="proj-123", - section_key="intro", - content="Hello", - workflow_id="wf-1", - workflow_name="draft", - ) - - assert ( - result.target_url == "https://api.bricks.example.com/api/section-versions" - ) - async def test_live_calls_bricks_api( self, use_case_live: PublishSectionVersionUseCase, @@ -198,7 +179,6 @@ async def test_live_propagates_api_errors( use_case = PublishSectionVersionUseCase( bricks_api=mock_bricks_api, dry_run=False, - target_url="https://api.bricks.example.com/api/section-versions", ) with pytest.raises(ConnectionError, match="API connection failed"): @@ -223,4 +203,4 @@ async def test_dry_run_returns_section_version_result( workflow_name="draft", ) - assert isinstance(result, SectionVersionResult) + assert isinstance(result, SectionVersionResult) \ No newline at end of file diff --git a/tests/unit/test_read_bricks_document_use_case.py b/tests/unit/test_read_bricks_document_use_case.py index 0797b02..50f91cd 100644 --- a/tests/unit/test_read_bricks_document_use_case.py +++ b/tests/unit/test_read_bricks_document_use_case.py @@ -24,7 +24,7 @@ async def test_execute_downloads_from_bricks_api( mock_document_reader: AsyncMock, tmp_path, ) -> None: - """Should call bricks_api.download_document with the file URL.""" + """Should call bricks_api.download_document with document_id and project_id.""" mock_bricks_api.download_document.return_value = (b"file content", "report.pdf") mock_document_reader.extract_content.return_value = DocumentContent( content="extracted text", @@ -37,10 +37,10 @@ async def test_execute_downloads_from_bricks_api( output_dir=str(tmp_path), ) - await use_case.execute(file_url="https://s3.example.com/presigned/report.pdf") + await use_case.execute(document_id="doc-123", project_id="proj-456") mock_bricks_api.download_document.assert_called_once_with( - download_url="https://s3.example.com/presigned/report.pdf" + document_id="doc-123", project_id="proj-456" ) async def test_execute_returns_document_content( @@ -64,7 +64,7 @@ async def test_execute_returns_document_content( ) result = await use_case.execute( - file_url="https://s3.example.com/presigned/doc.pdf" + document_id="doc-456", project_id="proj-1" ) assert result.content == "extracted text from bricks document" @@ -90,7 +90,7 @@ async def test_execute_calls_document_reader_with_temp_file( output_dir=str(tmp_path), ) - await use_case.execute(file_url="https://s3.example.com/report.pdf") + await use_case.execute(document_id="doc-1", project_id="proj-1") call_args = mock_document_reader.extract_content.call_args tmp_file_path = call_args[0][0] @@ -116,7 +116,7 @@ async def test_execute_temp_file_suffix_matches_filename_extension( output_dir=str(tmp_path), ) - await use_case.execute(file_url="https://s3.example.com/spreadsheet.xlsx") + await use_case.execute(document_id="doc-xlsx", project_id="proj-1") call_args = mock_document_reader.extract_content.call_args tmp_file_path = call_args[0][0] @@ -139,7 +139,7 @@ async def test_execute_propagates_download_failure( ) with pytest.raises(ConnectionError, match="S3 download failed"): - await use_case.execute(file_url="https://s3.example.com/missing.pdf") + await use_case.execute(document_id="doc-missing", project_id="proj-1") async def test_execute_propagates_kreuzberg_failure( self, @@ -159,7 +159,7 @@ async def test_execute_propagates_kreuzberg_failure( ) with pytest.raises(ValueError, match="Unsupported format"): - await use_case.execute(file_url="https://s3.example.com/broken.pdf") + await use_case.execute(document_id="doc-broken", project_id="proj-1") async def test_execute_cleans_up_temp_file( self, @@ -180,7 +180,7 @@ async def test_execute_cleans_up_temp_file( output_dir=str(tmp_path), ) - await use_case.execute(file_url="https://s3.example.com/report.pdf") + await use_case.execute(document_id="doc-report", project_id="proj-1") call_args = mock_document_reader.extract_content.call_args tmp_file_path = call_args[0][0] @@ -202,7 +202,7 @@ async def test_execute_cleans_up_temp_file_on_error( ) with pytest.raises(ValueError): - await use_case.execute(file_url="https://s3.example.com/report.pdf") + await use_case.execute(document_id="doc-1", project_id="proj-1") call_args = mock_document_reader.extract_content.call_args tmp_file_path = call_args[0][0] @@ -230,8 +230,8 @@ async def test_execute_with_tables( ) result = await use_case.execute( - file_url="https://s3.example.com/financials.pdf" + document_id="doc-financials", project_id="proj-1" ) assert len(result.tables) == 1 - assert result.tables[0].markdown == "| A | B |\n|---|---|\n| 1 | 2 |" + assert result.tables[0].markdown == "| A | B |\n|---|---|\n| 1 | 2 |" \ No newline at end of file From c0628a8bdfb71d998fbf6a00393803c55edb2bfa Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 20 May 2026 11:44:45 +0200 Subject: [PATCH 3/4] fix: upgrade libgnutls30 and libssh2-1 to fix Trivy CRITICAL CVEs --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index fbeaddd..0180b06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,7 @@ FROM python:3.13-slim-bookworm # Install only critical runtime system deps, then clean up apt metadata to keep image slim. RUN apt-get update \ && apt-get install -y --no-install-recommends libgomp1 tesseract-ocr tesseract-ocr-fra \ + && apt-get upgrade -y libgnutls30 libssh2-1 \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* # Set TESSDATA_PREFIX for Tesseract and create Kreuzberg cache symlink From 97ba688c7b4db1dff0d7f665c6b34a1cf9367a5a Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 20 May 2026 12:04:31 +0200 Subject: [PATCH 4/4] fix: ignore CVE-2026-7598 (libssh2, no fix available) in Trivy --- .trivyignore | 5 +++++ Dockerfile | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.trivyignore b/.trivyignore index facb071..6523ad6 100644 --- a/.trivyignore +++ b/.trivyignore @@ -11,3 +11,8 @@ CVE-2025-7458 # Faux positif confirmé par le tracker Debian (will_not_fix). # Review: 2026-04-24 CVE-2023-45853 + +# libssh2-1: integer overflow via large username/password - no fixed version available (status: affected). +# Debian bookworm package not yet patched. Monitor for upstream fix. +# Review: 2026-05-20 +CVE-2026-7598 diff --git a/Dockerfile b/Dockerfile index 0180b06..fbeaddd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,6 @@ FROM python:3.13-slim-bookworm # Install only critical runtime system deps, then clean up apt metadata to keep image slim. RUN apt-get update \ && apt-get install -y --no-install-recommends libgomp1 tesseract-ocr tesseract-ocr-fra \ - && apt-get upgrade -y libgnutls30 libssh2-1 \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* # Set TESSDATA_PREFIX for Tesseract and create Kreuzberg cache symlink