diff --git a/.env.example b/.env.example index 333035c..d51ccf4 100644 --- a/.env.example +++ b/.env.example @@ -57,3 +57,9 @@ MINIO_ACCESS=minioadmin MINIO_SECRET=minioadmin MINIO_BUCKET=raganything MINIO_SECURE=false + +# Bricks API Configuration +BRICKS_API_BASE_URL=https://analyse.bricks.co +BRICKS_API_KEY= +BRICKS_BEARER_TOKEN= +BRICKS_PUBLISH_DRY_RUN=true diff --git a/Dockerfile b/Dockerfile index fbeaddd..0180b06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,6 +17,7 @@ FROM python:3.13-slim-bookworm # Install only critical runtime system deps, then clean up apt metadata to keep image slim. RUN apt-get update \ && apt-get install -y --no-install-recommends libgomp1 tesseract-ocr tesseract-ocr-fra \ + && apt-get upgrade -y libgnutls30 libssh2-1 \ && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* # Set TESSDATA_PREFIX for Tesseract and create Kreuzberg cache symlink diff --git a/README.md b/README.md index 141ec5f..17a4a56 100644 --- a/README.md +++ b/README.md @@ -14,57 +14,61 @@ Multi-modal RAG service exposing a REST API and MCP server for document indexing | +---------------+---------------+ | | - Application Layer MCP Servers (FastMCP) - +------------------------------+ | - | api/ | +---+--------+ +--+-----------+ +--+-------------+ - | indexing_routes.py | | RAGAnything | | RAGAnything | | RAGAnything | - | query_routes.py | | Query | | Files | | Classical | - | file_routes.py | | /rag/mcp | | /files/mcp | | /classical/mcp| - | health_routes.py | +---+--------+ +--+-----------+ +--+-------------+ - | classical_indexing_routes | | | | - | classical_query_routes | | | classical_index_file - | use_cases/ | | | classical_index_folder - | IndexFileUseCase | | | classical_query - | IndexFolderUseCase | - | QueryUseCase | - | ClassicalIndexFileUseCase | - | ClassicalIndexFolderUseCase | - | ClassicalQueryUseCase | - | ListFilesUseCase | - | ListFoldersUseCase | - | ReadFileUseCase | - | requests/ responses/ | - +------------------------------+ - | | | - v v v - Domain Layer (ports) - +----------------------------------------------------------+ - | RAGEnginePort StoragePort BM25EnginePort | - | DocumentReaderPort VectorStorePort LLMPort | - +----------------------------------------------------------+ - | | | | | - v v v v v - Infrastructure Layer (adapters) - +----------------------------------------------------------+ - | LightRAGAdapter MinioAdapter | - | (RAGAnything/ (minio-py) | - | KreuzbergParser) | - | | - | PostgresBM25Adapter RRFCombiner | - | (pg_textsearch) (hybrid+ fusion) | - | | - | KreuzbergAdapter LangchainPgvectorAdapter | - | (kreuzberg - 91 formats) (langchain-postgres PGVector) | - | | - | LangchainOpenAIAdapter | - | (langchain-openai ChatOpenAI) | - +----------------------------------------------------------+ - | | | | | - v v v v v - PostgreSQL MinIO Kreuzberg OpenAI-compatible - (pgvector + (object (document (LLM API) - Apache AGE storage) extraction) - pg_textsearch) + Application Layer MCP Servers (FastMCP) + +------------------------------+ | + | api/ | +---+--------+ +--+-----------+ +--+-------------+ +--+----------+ + | indexing_routes.py | | RAGAnything | | RAGAnything | | RAGAnything | | RAGAnything | + | query_routes.py | | Query | | Files | | Classical | | Bricks | + | file_routes.py | | /rag/mcp | | /files/mcp | | /classical/mcp| | /bricks/mcp| + | health_routes.py | +---+--------+ +--+-----------+ +--+-------------+ +--+----------+ + | classical_indexing_routes | | | | | + | classical_query_routes | | | classical_index_file list_bricks_documents + | use_cases/ | | | classical_index_folder read_bricks_document + | IndexFileUseCase | | | classical_query publish_section_version + | IndexFolderUseCase | + | QueryUseCase | + | ClassicalIndexFileUseCase | + | ClassicalIndexFolderUseCase | + | ClassicalQueryUseCase | + | ListFilesUseCase | + | ListFoldersUseCase | + | ReadFileUseCase | + | ListBricksDocumentsUseCase | + | ReadBricksDocumentUseCase | + | PublishSectionVersionUseCase| + | requests/ responses/ | + +------------------------------+ + | | | + v v v + Domain Layer (ports) + +----------------------------------------------------------+ + | RAGEnginePort StoragePort BM25EnginePort | + | DocumentReaderPort VectorStorePort LLMPort | + | BricksApiPort | + +----------------------------------------------------------+ + | | | | | + v v v v v + Infrastructure Layer (adapters) + +----------------------------------------------------------+ + | LightRAGAdapter MinioAdapter | + | (RAGAnything/ (minio-py) | + | KreuzbergParser) | + | | + | PostgresBM25Adapter RRFCombiner | + | (pg_textsearch) (hybrid+ fusion) | + | | + | KreuzbergAdapter LangchainPgvectorAdapter | + | (kreuzberg - 91 formats) (langchain-postgres PGVector) | + | | + | LangchainOpenAIAdapter BricksApiAdapter | + | (langchain-openai ChatOpenAI) (httpx, Bricks REST API) | + +----------------------------------------------------------+ + | | | | | + v v v v v + PostgreSQL MinIO Kreuzberg OpenAI-compatible Bricks API + (pgvector + (object (document (LLM API) (analyse.bricks.co + Apache AGE storage) extraction) + section-versions) + pg_textsearch) ``` ## Prerequisites @@ -584,7 +588,7 @@ If BM25 is unavailable (`BM25_ENABLED=false` or pg_textsearch extension missing) ## MCP Servers -The service exposes **three MCP servers**, all using streamable HTTP transport: +The service exposes **four MCP servers**, all using streamable HTTP transport: ### RAGAnythingQuery — `/rag/mcp` @@ -628,6 +632,57 @@ File browsing tools for listing and reading files from MinIO storage. Downloads the file from MinIO, extracts its text content using Kreuzberg, and returns the extracted text along with metadata and any detected tables. +### RAGAnythingBricks — `/bricks/mcp` + +Bricks integration tools for accessing project documents from the Bricks platform and publishing structured section versions. + +#### Tool: `list_bricks_documents` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `project_unique_id` | string | required | Bricks project unique identifier | + +Returns a list of documents for the specified Bricks project, including metadata like file name, MIME type, size, status, and presigned download URLs. + +#### Tool: `read_bricks_document` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `file_url` | string | required | Presigned S3 URL from `list_bricks_documents` | + +Downloads the document from the presigned S3 URL, extracts its text content using Kreuzberg, and returns the extracted text, metadata, and detected tables. No authentication is required — the URL is already signed. + +#### Tool: `publish_section_version` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `project_unique_id` | string | required | Bricks project unique identifier | +| `section_key` | string | required | Section key to publish (e.g. `"summary"`, `"analysis"`) | +| `content` | dict | required | Structured content for the section | +| `workflow_id` | string | `"agent-haiku-files-v1"` | Workflow identifier | +| `workflow_name` | string | `"haiku-files"` | Workflow display name | +| `workflow_metadata` | dict | `null` | Additional workflow metadata | + +Publishes a structured section version back to the Bricks platform. When `BRICKS_PUBLISH_DRY_RUN=true` (default), the tool returns a preview of the payload without making an API call. Set `BRICKS_PUBLISH_DRY_RUN=false` to enable real publishing. + +**Dry-run response example:** + +```json +{ + "success": true, + "message": "DRY RUN — no API call made", + "dry_run": true, + "payload_preview": { + "project_unique_id": "abc-123", + "section_key": "summary", + "content": {"title": "Analysis Summary"}, + "workflow_id": "agent-haiku-files-v1", + "workflow_name": "haiku-files", + "workflow_metadata": {} + } +} +``` + ### RAGAnythingClassical — `/classical/mcp` Classical RAG tools for indexing and querying without a knowledge graph. @@ -670,6 +725,7 @@ All MCP servers use **streamable HTTP** transport exclusively. Connect MCP clien http://localhost:8000/rag/mcp # RAGAnythingQuery http://localhost:8000/files/mcp # RAGAnythingFiles http://localhost:8000/classical/mcp # RAGAnythingClassical +http://localhost:8000/bricks/mcp # RAGAnythingBricks ``` ## Configuration @@ -756,6 +812,15 @@ The classical RAG adapters share the same `OPEN_ROUTER_API_KEY`, `OPEN_ROUTER_AP | `MINIO_BUCKET` | `raganything` | Default bucket name | | `MINIO_SECURE` | `false` | Use HTTPS for MinIO | +### Bricks API (`BricksConfig`) + +| Variable | Default | Description | +|----------|---------|-------------| +| `BRICKS_API_BASE_URL` | `https://analyse.bricks.co` | Bricks platform base URL | +| `BRICKS_API_KEY` | -- | X-API-Key for Bricks API authentication (publish) | +| `BRICKS_BEARER_TOKEN` | -- | Bearer token for Bricks API authentication (list documents) | +| `BRICKS_PUBLISH_DRY_RUN` | `true` | When `true`, `publish_section_version` returns a payload preview without making an API call | + ## Query Modes | Mode | Description | @@ -821,6 +886,7 @@ src/ document_reader_port.py -- DocumentReaderPort (abstract) + DocumentContent vector_store_port.py -- VectorStorePort (abstract) + SearchResult llm_port.py -- LLMPort (abstract) + bricks_api_port.py -- BricksApiPort (abstract) + BricksDocumentInfo + SectionVersionResult application/ api/ health_routes.py -- GET /health @@ -832,6 +898,7 @@ src/ mcp_query_tools.py -- MCP tools: query_knowledge_base, query_knowledge_base_multimodal mcp_file_tools.py -- MCP tools: list_files, read_file mcp_classical_tools.py -- MCP tools: classical_index_file, classical_index_folder, classical_query + mcp_bricks_tools.py -- MCP tools: list_bricks_documents, read_bricks_document, publish_section_version requests/ indexing_request.py -- IndexFileRequest, IndexFolderRequest classical_indexing_request.py -- ClassicalIndexFileRequest, ClassicalIndexFolderRequest @@ -854,6 +921,9 @@ src/ list_folders_use_case.py -- Lists folder prefixes from MinIO read_file_use_case.py -- Reads file from MinIO, extracts content via Kreuzberg upload_file_use_case.py -- Uploads file to MinIO storage + list_bricks_documents_use_case.py -- Lists documents from Bricks API + read_bricks_document_use_case.py -- Downloads Bricks document via presigned URL, extracts via Kreuzberg + publish_section_version_use_case.py -- Publishes section version (dry-run aware) infrastructure/ rag/ lightrag_adapter.py -- LightRAGAdapter (RAGAnything/LightRAG) @@ -871,6 +941,8 @@ src/ langchain_pgvector_adapter.py -- LangchainPgvectorAdapter (langchain-postgres PGVectorStore) llm/ langchain_openai_adapter.py -- LangchainOpenAIAdapter (langchain-openai ChatOpenAI) + bricks/ + bricks_api_adapter.py -- BricksApiAdapter (httpx, Bricks REST API + section-versions) alembic/ env.py -- Alembic migration environment (async) versions/ diff --git a/src/application/api/mcp_bricks_tools.py b/src/application/api/mcp_bricks_tools.py new file mode 100644 index 0000000..34adf6e --- /dev/null +++ b/src/application/api/mcp_bricks_tools.py @@ -0,0 +1,103 @@ +import logging + +from fastmcp import FastMCP +from fastmcp.exceptions import ToolError + +from application.responses.file_response import FileContentResponse +from dependencies import ( + get_list_bricks_documents_use_case, + get_publish_section_version_use_case, + get_read_bricks_document_use_case, +) + +logger = logging.getLogger(__name__) + +mcp_bricks = FastMCP("RAGAnythingBricks") + + +@mcp_bricks.tool() +async def list_bricks_documents(project_unique_id: str) -> list: + """Liste les documents d'un projet Bricks. + + Args: + project_unique_id: L'identifiant unique du projet Bricks + + Returns: + Liste des documents du projet avec métadonnées + """ + use_case = get_list_bricks_documents_use_case() + try: + return await use_case.execute(project_id=project_unique_id) + except Exception: + logger.exception( + "Failed to list bricks documents for project %s", project_unique_id + ) + raise ToolError("Failed to list bricks documents") from None + + +@mcp_bricks.tool() +async def read_bricks_document( + document_id: str, + project_unique_id: str, +) -> FileContentResponse: + """Télécharge un document Bricks et extrait son contenu textuel. + + Résoud automatiquement l'URL pré-signée à partir du document_id et project_unique_id. + + Args: + document_id: L'identifiant du document (champ 'id' de list_bricks_documents) + project_unique_id: L'identifiant du projet Bricks + + Returns: + Contenu extrait avec métadonnées et tables détectées + """ + use_case = get_read_bricks_document_use_case() + try: + result = await use_case.execute(document_id=document_id, project_id=project_unique_id) + except Exception: + logger.exception("Failed to read bricks document: %s in project %s", document_id, project_unique_id) + raise ToolError("Failed to read bricks document") from None + return FileContentResponse( + content=result.content, + metadata=result.metadata, + tables=result.tables, + ) + + +@mcp_bricks.tool() +async def publish_section_version( + project_unique_id: str, + section_key: str, + content: dict, + workflow_id: str = "agent-haiku-files-v1", + workflow_name: str = "agent-haiku-files-v1", + workflow_metadata: dict | None = None, +) -> dict: + """Publie la réponse structurée d'une section d'un projet Bricks. + + Args: + project_unique_id: L'identifiant unique du projet + section_key: La clé de la section à publier + content: Le contenu structuré de la section + workflow_id: L'identifiant du workflow + workflow_name: Le nom du workflow + workflow_metadata: Métadonnées additionnelles du workflow + + Returns: + Résultat de la publication avec statut et aperçu du payload + """ + use_case = get_publish_section_version_use_case() + try: + return await use_case.execute( + project_unique_id=project_unique_id, + section_key=section_key, + content=content, + workflow_id=workflow_id, + workflow_name=workflow_name, + workflow_metadata=workflow_metadata, + ) + except Exception: + logger.exception( + "Failed to publish section version for project %s", project_unique_id + ) + raise ToolError("Failed to publish section version") from None diff --git a/src/application/api/mcp_classical_tools.py b/src/application/api/mcp_classical_tools.py index 24e90f1..bf67950 100644 --- a/src/application/api/mcp_classical_tools.py +++ b/src/application/api/mcp_classical_tools.py @@ -2,10 +2,11 @@ from fastmcp import FastMCP -from application.responses.classical_query_response import ClassicalRagResponse, McpClassicalRagResponse +from application.responses.classical_query_response import ( + ClassicalRagResponse, + McpClassicalRagResponse, +) from dependencies import ( - get_classical_index_file_use_case, - get_classical_index_folder_use_case, get_classical_query_use_case, ) @@ -52,4 +53,3 @@ async def classical_query( ClassicalRagResponse(content=chunk.content, file_path=chunk.file_path) ) return classical_response - diff --git a/src/application/api/mcp_query_tools.py b/src/application/api/mcp_query_tools.py index dd25f96..e5d5141 100644 --- a/src/application/api/mcp_query_tools.py +++ b/src/application/api/mcp_query_tools.py @@ -6,7 +6,11 @@ from fastmcp import FastMCP from application.requests.query_request import MultimodalContentItem -from application.responses.query_response import McpRagResponse, QueryResponse, RagResponse +from application.responses.query_response import ( + McpRagResponse, + QueryResponse, + RagResponse, +) from dependencies import ( get_multimodal_query_use_case, get_query_use_case, @@ -45,7 +49,9 @@ async def query_knowledge_base( chunks = response.data.chunks mcp_response = McpRagResponse(rag_response=[]) for chunk in chunks: - mcp_response.rag_response.append(RagResponse(content=chunk.content, file_path=chunk.file_path)) + mcp_response.rag_response.append( + RagResponse(content=chunk.content, file_path=chunk.file_path) + ) return mcp_response diff --git a/src/application/responses/classical_query_response.py b/src/application/responses/classical_query_response.py index 88df5b0..9a9d36b 100644 --- a/src/application/responses/classical_query_response.py +++ b/src/application/responses/classical_query_response.py @@ -36,13 +36,15 @@ class ClassicalQueryResponse(BaseModel): class ClassicalRagResponse(BaseModel): content: str = Field( - default="", description="Textual content retrieved from the knowledge base" + default="", description="Textual content retrieved from the knowledge base" ) file_path: str = Field( default="", description="Source file path of the retrieved content" ) + class McpClassicalRagResponse(BaseModel): rag_response: list[ClassicalRagResponse] = Field( - default_factory=list, description="List of retrieved content for MCP integration" + default_factory=list, + description="List of retrieved content for MCP integration", ) diff --git a/src/application/responses/query_response.py b/src/application/responses/query_response.py index 96bb47a..dc29843 100644 --- a/src/application/responses/query_response.py +++ b/src/application/responses/query_response.py @@ -74,11 +74,14 @@ class MultimodalQueryResponse(BaseModel): default="", description="Réponse textuelle de l'analyse multimodale" ) + class RagResponse(BaseModel): content: str file_path: str + class McpRagResponse(BaseModel): rag_response: list[RagResponse] = Field( - default_factory=list, description="List of retrieved content for MCP integration" + default_factory=list, + description="List of retrieved content for MCP integration", ) diff --git a/src/application/use_cases/classical_query_use_case.py b/src/application/use_cases/classical_query_use_case.py index fa7ddc0..f1d0706 100644 --- a/src/application/use_cases/classical_query_use_case.py +++ b/src/application/use_cases/classical_query_use_case.py @@ -79,7 +79,7 @@ async def execute( mode: Literal["vector", "hybrid"] = "vector", ) -> ClassicalQueryResponse: working_dir = working_dir if working_dir.endswith("/") else f"{working_dir}/" - + if num_variations is None: num_variations = self.config.CLASSICAL_NUM_QUERY_VARIATIONS if relevance_threshold is None: diff --git a/src/application/use_cases/list_bricks_documents_use_case.py b/src/application/use_cases/list_bricks_documents_use_case.py new file mode 100644 index 0000000..eb0f595 --- /dev/null +++ b/src/application/use_cases/list_bricks_documents_use_case.py @@ -0,0 +1,9 @@ +from domain.ports.bricks_api_port import BricksApiPort, BricksDocumentInfo + + +class ListBricksDocumentsUseCase: + def __init__(self, bricks_api: BricksApiPort) -> None: + self.bricks_api = bricks_api + + async def execute(self, project_id: str) -> list[BricksDocumentInfo]: + return await self.bricks_api.list_project_documents(project_id=project_id) diff --git a/src/application/use_cases/publish_section_version_use_case.py b/src/application/use_cases/publish_section_version_use_case.py new file mode 100644 index 0000000..fb6cfb1 --- /dev/null +++ b/src/application/use_cases/publish_section_version_use_case.py @@ -0,0 +1,41 @@ +from domain.ports.bricks_api_port import BricksApiPort, SectionVersionResult + + +class PublishSectionVersionUseCase: + def __init__( + self, + bricks_api: BricksApiPort, + dry_run: bool, + ) -> None: + self.bricks_api = bricks_api + self.dry_run = dry_run + + async def execute( + self, + project_unique_id: str, + section_key: str, + content: dict, + workflow_id: str, + workflow_name: str, + workflow_metadata: dict | None = None, + ) -> SectionVersionResult: + payload = { + "project_unique_id": project_unique_id, + "section_key": section_key, + "content": content, + "workflow_id": workflow_id, + "workflow_name": workflow_name, + "workflow_metadata": workflow_metadata + if workflow_metadata is not None + else {}, + } + + if self.dry_run: + return SectionVersionResult( + success=True, + message="DRY RUN — no API call made", + dry_run=True, + payload_preview=payload, + ) + + return await self.bricks_api.publish_section_version(payload=payload) diff --git a/src/application/use_cases/read_bricks_document_use_case.py b/src/application/use_cases/read_bricks_document_use_case.py new file mode 100644 index 0000000..7b46be5 --- /dev/null +++ b/src/application/use_cases/read_bricks_document_use_case.py @@ -0,0 +1,42 @@ +import contextlib +import os +import tempfile + +import aiofiles + +from domain.ports.bricks_api_port import BricksApiPort +from domain.ports.document_reader_port import DocumentContent, DocumentReaderPort + + +class ReadBricksDocumentUseCase: + def __init__( + self, + bricks_api: BricksApiPort, + document_reader: DocumentReaderPort, + output_dir: str, + ) -> None: + self.bricks_api = bricks_api + self.document_reader = document_reader + self.output_dir = output_dir + + async def execute( + self, + document_id: str, + project_id: str, + ) -> DocumentContent: + data, filename = await self.bricks_api.download_document( + document_id=document_id, project_id=project_id + ) + + os.makedirs(self.output_dir, exist_ok=True) + suffix = os.path.splitext(filename)[1] or ".bin" + fd, tmp_path = tempfile.mkstemp(suffix=suffix, dir=self.output_dir) + try: + os.close(fd) + async with aiofiles.open(tmp_path, "wb") as f: + await f.write(data) + + return await self.document_reader.extract_content(tmp_path) + finally: + with contextlib.suppress(OSError): + os.unlink(tmp_path) diff --git a/src/config.py b/src/config.py index 0629493..81b883d 100644 --- a/src/config.py +++ b/src/config.py @@ -157,6 +157,13 @@ class ClassicalRAGConfig(BaseSettings): ) +class BricksConfig(BaseSettings): + BRICKS_API_BASE_URL: str = Field(default="https://analyse.bricks.co") + BRICKS_API_KEY: str = Field(default="") + BRICKS_BEARER_TOKEN: str = Field(default="") + BRICKS_PUBLISH_DRY_RUN: bool = Field(default=True) + + class MinioConfig(BaseSettings): """MinIO object storage configuration.""" diff --git a/src/dependencies.py b/src/dependencies.py index 07d41a8..7fec6c7 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -11,16 +11,26 @@ from application.use_cases.classical_query_use_case import ClassicalQueryUseCase from application.use_cases.index_file_use_case import IndexFileUseCase from application.use_cases.index_folder_use_case import IndexFolderUseCase +from application.use_cases.list_bricks_documents_use_case import ( + ListBricksDocumentsUseCase, +) from application.use_cases.list_files_use_case import ListFilesUseCase from application.use_cases.list_folders_use_case import ListFoldersUseCase from application.use_cases.liveness_check_use_case import LivenessCheckUseCase from application.use_cases.multimodal_query_use_case import MultimodalQueryUseCase +from application.use_cases.publish_section_version_use_case import ( + PublishSectionVersionUseCase, +) from application.use_cases.query_use_case import QueryUseCase +from application.use_cases.read_bricks_document_use_case import ( + ReadBricksDocumentUseCase, +) from application.use_cases.read_file_use_case import ReadFileUseCase from application.use_cases.upload_file_use_case import UploadFileUseCase from config import ( AppConfig, BM25Config, + BricksConfig, ClassicalRAGConfig, DatabaseConfig, LLMConfig, @@ -30,6 +40,7 @@ from domain.ports.bm25_engine import BM25EnginePort from domain.ports.llm_port import LLMPort from domain.ports.vector_store_port import VectorStorePort +from infrastructure.bricks.bricks_api_adapter import BricksApiAdapter from infrastructure.database.asyncpg_health_adapter import AsyncpgHealthAdapter from infrastructure.document_reader.kreuzberg_adapter import KreuzbergAdapter from infrastructure.rag.classical_bm25_adapter import ClassicalBM25Adapter @@ -43,6 +54,7 @@ minio_config = MinioConfig() # type: ignore bm25_config = BM25Config() # type: ignore db_config = DatabaseConfig() # type: ignore +bricks_config = BricksConfig() # type: ignore os.makedirs(app_config.OUTPUT_DIR, exist_ok=True) @@ -67,6 +79,7 @@ kreuzberg_adapter = KreuzbergAdapter() postgres_health_adapter = AsyncpgHealthAdapter(db_config) +bricks_api_adapter = BricksApiAdapter(config=bricks_config) classical_rag_config = ClassicalRAGConfig() @@ -201,3 +214,22 @@ def get_liveness_check_use_case() -> LivenessCheckUseCase: postgres_health=postgres_health_adapter, bucket=minio_config.MINIO_BUCKET, ) + + +def get_list_bricks_documents_use_case() -> ListBricksDocumentsUseCase: + return ListBricksDocumentsUseCase(bricks_api=bricks_api_adapter) + + +def get_read_bricks_document_use_case() -> ReadBricksDocumentUseCase: + return ReadBricksDocumentUseCase( + bricks_api=bricks_api_adapter, + document_reader=kreuzberg_adapter, + output_dir=app_config.OUTPUT_DIR, + ) + + +def get_publish_section_version_use_case() -> PublishSectionVersionUseCase: + return PublishSectionVersionUseCase( + bricks_api=bricks_api_adapter, + dry_run=bricks_config.BRICKS_PUBLISH_DRY_RUN, + ) diff --git a/src/domain/ports/bricks_api_port.py b/src/domain/ports/bricks_api_port.py new file mode 100644 index 0000000..3a363d0 --- /dev/null +++ b/src/domain/ports/bricks_api_port.py @@ -0,0 +1,57 @@ +from abc import ABC, abstractmethod + +from pydantic import BaseModel, ConfigDict, Field + + +class BricksDocumentInfo(BaseModel): + model_config = ConfigDict(populate_by_name=True) + id: str + file_name: str = Field(alias="fileName") + url: str + hash: str = "" + mime_type: str = Field(default="", alias="mimeType") + size: int = 0 + status: str = "" + uploaded_at: str | None = Field(default=None, alias="uploadedAt") + image_analysis_status: str | None = Field(default=None, alias="imageAnalysisStatus") + image_analysis_confidence: float | None = Field( + default=None, alias="imageAnalysisConfidence" + ) + image_analysis_reasoning: str | None = Field( + default=None, alias="imageAnalysisReasoning" + ) + image_analysis_date: str | None = Field(default=None, alias="imageAnalysisDate") + category: str | None = None + category_confidence: float | None = Field(default=None, alias="categoryConfidence") + category_source: str | None = Field(default=None, alias="categorySource") + category_classified_at: str | None = Field( + default=None, alias="categoryClassifiedAt" + ) + is_ignored: bool = Field(default=False, alias="isIgnored") + project_id: str = Field(default="", alias="projectId") + + +class SectionVersionResult(BaseModel): + success: bool + message: str = "" + data: dict | None = None + dry_run: bool = False + payload_preview: dict | None = None + + +class BricksApiPort(ABC): + @abstractmethod + async def list_project_documents(self, project_id: str) -> list[BricksDocumentInfo]: + pass + + @abstractmethod + async def download_document( + self, + document_id: str, + project_id: str, + ) -> tuple[bytes, str]: + pass + + @abstractmethod + async def publish_section_version(self, payload: dict) -> SectionVersionResult: + pass diff --git a/src/infrastructure/bricks/bricks_api_adapter.py b/src/infrastructure/bricks/bricks_api_adapter.py new file mode 100644 index 0000000..7adc413 --- /dev/null +++ b/src/infrastructure/bricks/bricks_api_adapter.py @@ -0,0 +1,135 @@ +import asyncio +import json +import logging +import re +import urllib.error +import urllib.parse +import urllib.request + +from domain.ports.bricks_api_port import ( + BricksApiPort, + BricksDocumentInfo, + SectionVersionResult, +) + +logger = logging.getLogger(__name__) + +_DEFAULT_TIMEOUT = 30 + + +class BricksApiAdapter(BricksApiPort): + def __init__(self, config) -> None: + self._base_url = config.BRICKS_API_BASE_URL.rstrip("/") + self._api_key = config.BRICKS_API_KEY + self._bearer_token = config.BRICKS_BEARER_TOKEN + + async def close(self) -> None: + pass + + def _get(self, url: str, headers: dict | None = None) -> tuple[bytes, dict]: + req = urllib.request.Request(url, headers=headers or {}) + with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: + return resp.read(), dict(resp.headers) + + def _post(self, url: str, payload: dict, headers: dict) -> bytes: + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=_DEFAULT_TIMEOUT) as resp: + return resp.read() + + async def list_project_documents(self, project_id: str) -> list[BricksDocumentInfo]: + url = f"{self._base_url}/api/projects/{project_id}/documents/ai" + try: + body, _ = await asyncio.to_thread( + self._get, url, {"Authorization": f"Bearer {self._bearer_token}"} + ) + except urllib.error.HTTPError as e: + if e.code in (401, 403): + raise PermissionError( + f"Bricks API authentication failed (HTTP {e.code})" + ) from e + if e.code == 404: + raise FileNotFoundError( + f"Bricks project not found: {project_id}" + ) from e + raise RuntimeError(f"Bricks API error (HTTP {e.code})") from e + except urllib.error.URLError as e: + raise ConnectionError(f"Bricks API connection failed: {e.reason}") from e + except TimeoutError as e: + raise TimeoutError(f"Bricks API request timed out: {e}") from e + items = json.loads(body).get("items", []) + documents = [BricksDocumentInfo(**item) for item in items] + return documents + + async def download_document( + self, + document_id: str, + project_id: str, + ) -> tuple[bytes, str]: + documents = await self.list_project_documents(project_id) + url = None + for doc in documents: + if doc.id == document_id and doc.url: + url = doc.url + break + if not url: + raise FileNotFoundError( + f"Document {document_id} not found in project {project_id}" + ) + try: + body, resp_headers = await asyncio.to_thread(self._get, url) + except urllib.error.HTTPError as e: + if e.code in (401, 403): + raise PermissionError( + f"Document download authentication failed (HTTP {e.code})" + ) from e + if e.code == 404: + raise FileNotFoundError( + f"Document {document_id} not found (project {project_id})" + ) from e + raise RuntimeError( + f"Failed to download document (HTTP {e.code})" + ) from e + except urllib.error.URLError as e: + raise ConnectionError(f"Document download connection failed: {e.reason}") from e + except TimeoutError as e: + raise TimeoutError(f"Document download timed out: {e}") from e + + filename = _extract_filename(resp_headers.get("Content-Disposition", ""), url) + return body, filename + + async def publish_section_version(self, payload: dict) -> SectionVersionResult: + url = f"{self._base_url}/api/section-versions" + headers = { + "X-API-Key": self._api_key, + "Content-Type": "application/json", + } + try: + body = await asyncio.to_thread(self._post, url, payload, headers) + except urllib.error.HTTPError as e: + if e.code in (401, 403): + raise PermissionError( + f"Publish authentication failed (HTTP {e.code})" + ) from e + raise RuntimeError(f"Publish failed (HTTP {e.code})") from e + except urllib.error.URLError as e: + raise ConnectionError(f"Publish connection failed: {e.reason}") from e + except TimeoutError as e: + raise TimeoutError(f"Publish request timed out: {e}") from e + data = json.loads(body) + return SectionVersionResult(success=True, message="Published", data=data) + + +def _extract_filename(content_disposition: str, url: str = "") -> str: + match = re.search(r'filename="([^"]+)"', content_disposition) + if match: + return match.group(1) + match = re.search(r"filename=([^\s;]+)", content_disposition) + if match: + return match.group(1) + if url: + decoded_path = urllib.parse.unquote(urllib.parse.urlparse(url).path) + path_filename = decoded_path.rsplit("/", 1)[-1] + if path_filename and "." in path_filename: + return path_filename + return "document.bin" \ No newline at end of file diff --git a/src/infrastructure/document_reader/kreuzberg_adapter.py b/src/infrastructure/document_reader/kreuzberg_adapter.py index b22de48..0264003 100644 --- a/src/infrastructure/document_reader/kreuzberg_adapter.py +++ b/src/infrastructure/document_reader/kreuzberg_adapter.py @@ -5,7 +5,6 @@ from kreuzberg import ( ChunkingConfig, ExtractionConfig, - LlmConfig as KreuzbergLlmConfig, OcrConfig, OutputFormat, ParsingError, @@ -13,6 +12,9 @@ ValidationError, extract_file, ) +from kreuzberg import ( + LlmConfig as KreuzbergLlmConfig, +) from config import LLMConfig from domain.ports.document_reader_port import ( diff --git a/src/infrastructure/rag/kreuzberg_raganything_parser.py b/src/infrastructure/rag/kreuzberg_raganything_parser.py index c9db6c8..a5976a9 100644 --- a/src/infrastructure/rag/kreuzberg_raganything_parser.py +++ b/src/infrastructure/rag/kreuzberg_raganything_parser.py @@ -30,7 +30,6 @@ def __init__(self) -> None: super().__init__() self._config = make_extraction_config() - def check_installation(self) -> bool: return importlib.util.find_spec("kreuzberg") is not None diff --git a/src/main.py b/src/main.py index 9237afe..750007b 100644 --- a/src/main.py +++ b/src/main.py @@ -16,11 +16,17 @@ from application.api.file_routes import file_router from application.api.health_routes import health_router from application.api.indexing_routes import indexing_router +from application.api.mcp_bricks_tools import mcp_bricks from application.api.mcp_classical_tools import mcp_classical from application.api.mcp_file_tools import mcp_files from application.api.mcp_query_tools import mcp_query from application.api.query_routes import query_router -from dependencies import app_config, bm25_adapter, classical_vector_store +from dependencies import ( + app_config, + bm25_adapter, + bricks_api_adapter, + classical_vector_store, +) _LOG_FORMAT = "%(asctime)s %(levelname)-8s [%(name)s] %(message)s" @@ -87,6 +93,7 @@ async def db_lifespan(_app: FastAPI): mcp_query_app = mcp_query.http_app(path="/") mcp_files_app = mcp_files.http_app(path="/") mcp_classical_app = mcp_classical.http_app(path="/") +mcp_bricks_app = mcp_bricks.http_app(path="/") @asynccontextmanager @@ -97,6 +104,7 @@ async def combined_lifespan(app: FastAPI): mcp_query_app.lifespan(app), mcp_files_app.lifespan(app), mcp_classical_app.lifespan(app), + mcp_bricks_app.lifespan(app), ): yield @@ -125,6 +133,7 @@ async def combined_lifespan(app: FastAPI): app.mount("/rag/mcp", mcp_query_app) app.mount("/files/mcp", mcp_files_app) app.mount("/classical/mcp", mcp_classical_app) +app.mount("/bricks/mcp", mcp_bricks_app) def run_fastapi(): diff --git a/tests/conftest.py b/tests/conftest.py index 277e2eb..47ae23d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ mock_document_reader = _external.mock_document_reader mock_vector_store = _external.mock_vector_store mock_llm = _external.mock_llm +mock_bricks_api = _external.mock_bricks_api @pytest.fixture diff --git a/tests/fixtures/external.py b/tests/fixtures/external.py index fd35f32..4f20eb6 100644 --- a/tests/fixtures/external.py +++ b/tests/fixtures/external.py @@ -8,6 +8,11 @@ FolderIndexingStats, IndexingStatus, ) +from domain.ports.bricks_api_port import ( + BricksApiPort, + BricksDocumentInfo, + SectionVersionResult, +) from domain.ports.document_reader_port import ( DocumentContent, DocumentMetadata, @@ -120,3 +125,21 @@ def mock_llm() -> AsyncMock: ) mock.generate_chat.return_value = "LLM generated response text" return mock + + +@pytest.fixture +def mock_bricks_api() -> AsyncMock: + """Provide an AsyncMock of BricksApiPort for external adapter mocking.""" + mock = AsyncMock(spec=BricksApiPort) + mock.list_project_documents.return_value = [ + BricksDocumentInfo( + id="test-id", + fileName="doc.pdf", + url="https://s3.example.com/doc.pdf", + ) + ] + mock.download_document.return_value = (b"pdf content", "document.pdf") + mock.publish_section_version.return_value = SectionVersionResult( + success=True, message="Published" + ) + return mock diff --git a/tests/unit/infrastructure/test_bricks_api_adapter.py b/tests/unit/infrastructure/test_bricks_api_adapter.py new file mode 100644 index 0000000..539c1df --- /dev/null +++ b/tests/unit/infrastructure/test_bricks_api_adapter.py @@ -0,0 +1,431 @@ +"""Tests for BricksApiAdapter — the urllib-based implementation of BricksApiPort. + +The Bricks API is an external dependency protected by Cloudflare, +so we mock urllib.request.urlopen responses while testing our adapter +logic for parsing, headers, and error handling. +""" + +import json +from io import BytesIO +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import urllib.error + +from domain.ports.bricks_api_port import BricksDocumentInfo, SectionVersionResult +from infrastructure.bricks.bricks_api_adapter import ( + BricksApiAdapter, + _extract_filename, +) + + +@pytest.fixture +def bricks_config() -> MagicMock: + """Provide a mock BricksConfig.""" + config = MagicMock() + config.BRICKS_API_BASE_URL = "https://api.bricks.example.com" + config.BRICKS_API_KEY = "test-api-key-12345" + config.BRICKS_BEARER_TOKEN = "test-bearer-token" + config.BRICKS_PUBLISH_DRY_RUN = True + return config + + +@pytest.fixture +def adapter(bricks_config: MagicMock) -> BricksApiAdapter: + """Provide a BricksApiAdapter with mock config.""" + return BricksApiAdapter(config=bricks_config) + + +def _mock_urlopen_response(body: bytes, headers: dict | None = None, status: int = 200) -> MagicMock: + """Create a mock object that behaves like urllib.urlopen response.""" + mock_resp = MagicMock() + mock_resp.read.return_value = body + mock_resp.headers = headers or {} + mock_resp.__enter__ = MagicMock(return_value=mock_resp) + mock_resp.__exit__ = MagicMock(return_value=False) + return mock_resp + + +class TestListProjectDocuments: + """Tests for BricksApiAdapter.list_project_documents.""" + + async def test_calls_api_with_bearer_token(self, adapter: BricksApiAdapter) -> None: + """Should call GET /api/projects/{id}/documents/ai with Bearer token.""" + body = json.dumps({"items": []}).encode() + mock_resp = _mock_urlopen_response(body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen: + await adapter.list_project_documents(project_id="proj-123") + + mock_urlopen.assert_called_once() + req = mock_urlopen.call_args[0][0] + assert "api/projects/proj-123/documents/ai" in req.full_url + assert req.get_header("Authorization") == "Bearer test-bearer-token" + + async def test_returns_list_of_bricks_document_info(self, adapter: BricksApiAdapter) -> None: + """Should parse JSON response and return list of BricksDocumentInfo.""" + api_response = { + "items": [ + { + "id": "doc-1", + "fileName": "report.pdf", + "url": "https://s3.example.com/presigned1", + "hash": "abc123", + "mimeType": "application/pdf", + "size": 1024, + "status": "PROCESSED", + "uploadedAt": "2025-12-03T17:02:50.916Z", + "imageAnalysisStatus": None, + "imageAnalysisConfidence": None, + "imageAnalysisReasoning": None, + "imageAnalysisDate": None, + "category": None, + "categoryConfidence": None, + "categorySource": None, + "categoryClassifiedAt": None, + "isIgnored": False, + "projectId": "proj-123", + }, + { + "id": "doc-2", + "fileName": "notes.docx", + "url": "https://s3.example.com/presigned2", + "mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "size": 2048, + "status": "PROCESSED", + "isIgnored": True, + "projectId": "proj-123", + }, + ] + } + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.list_project_documents(project_id="proj-123") + + assert len(result) == 2 + assert isinstance(result[0], BricksDocumentInfo) + assert result[0].file_name == "report.pdf" + assert result[0].mime_type == "application/pdf" + assert result[0].size == 1024 + assert result[1].file_name == "notes.docx" + assert result[1].is_ignored is True + + async def test_camel_case_to_snake_case_mapping(self, adapter: BricksApiAdapter) -> None: + """Should correctly map camelCase API fields to snake_case model fields.""" + api_response = { + "items": [ + { + "id": "doc-uuid", + "fileName": "image.png", + "url": "https://s3.example.com/img", + "imageAnalysisStatus": "relevant", + "imageAnalysisConfidence": 95.0, + "imageAnalysisReasoning": "Contains relevant diagrams", + "imageAnalysisDate": "2025-12-04T10:00:00.000Z", + "categoryConfidence": 72.3, + "categorySource": "llm", + "categoryClassifiedAt": "2025-12-05T08:00:00.000Z", + } + ] + } + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.list_project_documents(project_id="proj-123") + + doc = result[0] + assert doc.image_analysis_status == "relevant" + assert doc.image_analysis_confidence == 95.0 + assert doc.image_analysis_reasoning == "Contains relevant diagrams" + assert doc.image_analysis_date == "2025-12-04T10:00:00.000Z" + assert doc.category_confidence == 72.3 + assert doc.category_source == "llm" + assert doc.category_classified_at == "2025-12-05T08:00:00.000Z" + + async def test_returns_empty_list_when_no_documents(self, adapter: BricksApiAdapter) -> None: + """Should return empty list when API returns empty items.""" + body = json.dumps({"items": []}).encode() + mock_resp = _mock_urlopen_response(body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.list_project_documents(project_id="proj-empty") + + assert result == [] + + async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 401 Unauthorized.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/projects/proj-123/documents/ai", + code=401, + msg="Unauthorized", + hdrs={}, + fp=None, + ) + + with pytest.raises(PermissionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_403_forbidden(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 403 Forbidden.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/projects/proj-123/documents/ai", + code=403, + msg="Forbidden", + hdrs={}, + fp=None, + ) + + with pytest.raises(PermissionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_404_not_found(self, adapter: BricksApiAdapter) -> None: + """Should raise FileNotFoundError on 404 Not Found.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/projects/nonexistent/documents/ai", + code=404, + msg="Not Found", + hdrs={}, + fp=None, + ) + + with pytest.raises(FileNotFoundError): + await adapter.list_project_documents(project_id="nonexistent") + + async def test_raises_on_connection_error(self, adapter: BricksApiAdapter) -> None: + """Should raise ConnectionError on URLError.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.URLError(reason="Connection refused") + + with pytest.raises(ConnectionError): + await adapter.list_project_documents(project_id="proj-123") + + async def test_raises_on_timeout(self, adapter: BricksApiAdapter) -> None: + """Should raise TimeoutError on socket timeout.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = TimeoutError("Request timed out") + + with pytest.raises(TimeoutError): + await adapter.list_project_documents(project_id="proj-123") + + +class TestDownloadDocument: + """Tests for BricksApiAdapter.download_document.""" + + async def test_resolves_document_id_and_downloads_presigned_url(self, adapter: BricksApiAdapter) -> None: + """Should resolve document_id via list_project_documents and download pre-signed URL.""" + presigned_url = "https://s3.example.com/projects/proj-1/doc.pdf?X-Amz-Signature=abc123" + doc_list_body = json.dumps({ + "items": [{ + "id": "doc-1", + "fileName": "doc.pdf", + "url": presigned_url, + "mimeType": "application/pdf", + "size": 100, + "status": "PROCESSED", + }] + }).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + file_body = b"PDF content" + file_resp = _mock_urlopen_response(file_body, headers={"Content-Disposition": 'attachment; filename="doc.pdf"'}) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", side_effect=[list_resp, file_resp]) as mock_urlopen: + content, filename = await adapter.download_document(document_id="doc-1", project_id="proj-1") + + assert mock_urlopen.call_count == 2 + list_req = mock_urlopen.call_args_list[0][0][0] + assert "api/projects/proj-1/documents/ai" in list_req.full_url + assert content == b"PDF content" + assert filename == "doc.pdf" + + async def test_extracts_filename_from_url_when_no_content_disposition(self, adapter: BricksApiAdapter) -> None: + """Should extract filename from URL path when no Content-Disposition header.""" + presigned_url = "https://s3.example.com/projects/proj-1/Dossier_Financement.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({ + "items": [{ + "id": "doc-2", + "fileName": "Dossier_Financement.pdf", + "url": presigned_url, + "mimeType": "application/pdf", + "size": 200, + "status": "PROCESSED", + }] + }).encode() + list_resp = _mock_urlopen_response(doc_list_body) + file_resp = _mock_urlopen_response(b"data", headers={}) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", side_effect=[list_resp, file_resp]): + _, filename = await adapter.download_document(document_id="doc-2", project_id="proj-1") + + assert filename == "Dossier_Financement.pdf" + + async def test_defaults_to_document_bin_when_no_filename(self, adapter: BricksApiAdapter) -> None: + """Should return 'document.bin' when no filename in URL or header.""" + doc_list_body = json.dumps({ + "items": [{ + "id": "doc-3", + "fileName": "noext", + "url": "https://s3.example.com/projects/proj-1/noext?X-Amz-Signature=abc", + "mimeType": "application/octet-stream", + "size": 50, + "status": "PROCESSED", + }] + }).encode() + list_resp = _mock_urlopen_response(doc_list_body) + file_resp = _mock_urlopen_response(b"data", headers={}) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", side_effect=[list_resp, file_resp]): + _, filename = await adapter.download_document(document_id="doc-3", project_id="proj-1") + + assert filename == "document.bin" + + async def test_raises_when_document_id_not_found(self, adapter: BricksApiAdapter) -> None: + """Should raise FileNotFoundError when document_id not found in project.""" + doc_list_body = json.dumps({"items": []}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=list_resp): + with pytest.raises(FileNotFoundError, match="Document missing-id not found"): + await adapter.download_document(document_id="missing-id", project_id="proj-1") + + async def test_raises_permission_error_on_403(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 403 Forbidden when downloading.""" + presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = [list_resp, urllib.error.HTTPError( + url="https://s3.example.com/doc.pdf", + code=403, + msg="Forbidden", + hdrs={}, + fp=None, + )] + + with pytest.raises(PermissionError): + await adapter.download_document(document_id="doc-1", project_id="proj-1") + + async def test_raises_connection_error(self, adapter: BricksApiAdapter) -> None: + """Should raise ConnectionError on URLError.""" + presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = [list_resp, urllib.error.URLError(reason="Connection refused")] + + with pytest.raises(ConnectionError): + await adapter.download_document(document_id="doc-1", project_id="proj-1") + + async def test_raises_timeout(self, adapter: BricksApiAdapter) -> None: + """Should raise TimeoutError on download timeout.""" + presigned_url = "https://s3.example.com/doc.pdf?X-Amz-Signature=abc" + doc_list_body = json.dumps({"items": [{"id": "doc-1", "fileName": "doc.pdf", "url": presigned_url, "mimeType": "application/pdf", "size": 100, "status": "PROCESSED"}]}).encode() + list_resp = _mock_urlopen_response(doc_list_body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = [list_resp, TimeoutError("Download timed out")] + + with pytest.raises(TimeoutError): + await adapter.download_document(document_id="doc-1", project_id="proj-1") + + +class TestExtractFilename: + """Tests for _extract_filename helper.""" + + def test_extracts_from_content_disposition_quoted(self) -> None: + assert _extract_filename('attachment; filename="report.pdf"') == "report.pdf" + + def test_extracts_from_content_disposition_unquoted(self) -> None: + assert _extract_filename("attachment; filename=report.pdf") == "report.pdf" + + def test_extracts_from_url_path(self) -> None: + assert _extract_filename("", "https://s3.example.com/path/to/report.pdf?X-Amz-Sig=abc") == "report.pdf" + + def test_extracts_decoded_filename_from_url(self) -> None: + assert _extract_filename("", "https://s3.example.com/Dossier_de_Financement.pdf?X-Amz-Sig=abc") == "Dossier_de_Financement.pdf" + + def test_defaults_to_document_bin(self) -> None: + assert _extract_filename("", "") == "document.bin" + + def test_url_without_extension_falls_back(self) -> None: + assert _extract_filename("", "https://s3.example.com/path/noext") == "document.bin" + + +class TestPublishSectionVersion: + """Tests for BricksApiAdapter.publish_section_version.""" + + async def test_calls_post_with_x_api_key_header(self, adapter: BricksApiAdapter) -> None: + """Should call POST /api/section-versions with X-API-Key header.""" + api_response = {"id": "sv-1", "sectionKey": "intro"} + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) + + payload = { + "project_unique_id": "proj-123", + "section_key": "intro", + "content": "Hello world", + "workflow_id": "wf-1", + "workflow_name": "draft", + "workflow_metadata": {}, + } + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp) as mock_urlopen: + await adapter.publish_section_version(payload=payload) + + mock_urlopen.assert_called_once() + req = mock_urlopen.call_args[0][0] + assert req.get_header("X-api-key") == "test-api-key-12345" or req.get_header("X-API-Key") == "test-api-key-12345" + assert req.method == "POST" + assert "api/section-versions" in req.full_url + + async def test_returns_section_version_result(self, adapter: BricksApiAdapter) -> None: + """Should return SectionVersionResult on successful publish.""" + api_response = {"id": "sv-uuid", "sectionKey": "summary"} + body = json.dumps(api_response).encode() + mock_resp = _mock_urlopen_response(body) + + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen", return_value=mock_resp): + result = await adapter.publish_section_version( + payload={"section_key": "summary", "content": "Summary text"} + ) + + assert isinstance(result, SectionVersionResult) + assert result.success is True + + async def test_raises_on_401_unauthorized(self, adapter: BricksApiAdapter) -> None: + """Should raise PermissionError on 401 when publishing.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.HTTPError( + url="https://api.bricks.example.com/api/section-versions", + code=401, + msg="Unauthorized", + hdrs={}, + fp=None, + ) + + with pytest.raises(PermissionError): + await adapter.publish_section_version(payload={"section_key": "intro"}) + + async def test_raises_on_connection_error(self, adapter: BricksApiAdapter) -> None: + """Should raise ConnectionError on URLError when publishing.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = urllib.error.URLError(reason="Connection refused") + + with pytest.raises(ConnectionError): + await adapter.publish_section_version(payload={"section_key": "intro"}) + + async def test_raises_on_timeout(self, adapter: BricksApiAdapter) -> None: + """Should raise TimeoutError on publish timeout.""" + with patch("infrastructure.bricks.bricks_api_adapter.urllib.request.urlopen") as mock_urlopen: + mock_urlopen.side_effect = TimeoutError("Publish timed out") + + with pytest.raises(TimeoutError): + await adapter.publish_section_version(payload={"section_key": "intro"}) \ No newline at end of file diff --git a/tests/unit/test_bricks_api_port.py b/tests/unit/test_bricks_api_port.py new file mode 100644 index 0000000..44ba0c3 --- /dev/null +++ b/tests/unit/test_bricks_api_port.py @@ -0,0 +1,224 @@ +"""Tests for BricksDocumentInfo, SectionVersionResult models and BricksApiPort ABC.""" + +from abc import ABC +from unittest.mock import AsyncMock + +import pytest +from pydantic import ValidationError + +from domain.ports.bricks_api_port import ( + BricksApiPort, + BricksDocumentInfo, + SectionVersionResult, +) + + +class TestBricksDocumentInfo: + """Tests for the BricksDocumentInfo pydantic model.""" + + def test_creates_with_camel_case_aliases(self) -> None: + """Should accept camelCase field names via populate_by_name=True.""" + doc = BricksDocumentInfo( + id="abc-123", + fileName="document.pdf", + url="https://s3.example.com/doc.pdf", + mimeType="application/pdf", + size=869723, + status="PROCESSED", + uploadedAt="2025-12-03T17:02:50.916Z", + imageAnalysisStatus="relevant", + imageAnalysisConfidence=95, + imageAnalysisReasoning="Contains diagrams", + imageAnalysisDate="2025-12-04T10:00:00.000Z", + isIgnored=False, + projectId="proj-456", + ) + + assert doc.id == "abc-123" + assert doc.file_name == "document.pdf" + assert doc.url == "https://s3.example.com/doc.pdf" + assert doc.mime_type == "application/pdf" + assert doc.size == 869723 + assert doc.status == "PROCESSED" + assert doc.uploaded_at == "2025-12-03T17:02:50.916Z" + assert doc.image_analysis_status == "relevant" + assert doc.image_analysis_confidence == 95 + assert doc.image_analysis_reasoning == "Contains diagrams" + assert doc.image_analysis_date == "2025-12-04T10:00:00.000Z" + assert doc.is_ignored is False + assert doc.project_id == "proj-456" + + def test_creates_with_snake_case_field_names(self) -> None: + """Should also accept snake_case field names directly.""" + doc = BricksDocumentInfo( + id="abc-123", + file_name="document.pdf", + url="https://s3.example.com/doc.pdf", + ) + + assert doc.file_name == "document.pdf" + + def test_defaults_for_optional_fields(self) -> None: + """Should provide defaults for fields with defaults.""" + doc = BricksDocumentInfo( + id="abc-123", + fileName="test.pdf", + url="https://s3.example.com/test.pdf", + ) + + assert doc.hash == "" + assert doc.mime_type == "" + assert doc.size == 0 + assert doc.status == "" + assert doc.uploaded_at is None + assert doc.image_analysis_status is None + assert doc.image_analysis_confidence is None + assert doc.image_analysis_reasoning is None + assert doc.image_analysis_date is None + assert doc.category is None + assert doc.category_confidence is None + assert doc.category_source is None + assert doc.category_classified_at is None + assert doc.is_ignored is False + assert doc.project_id == "" + + def test_parses_full_api_response_item(self) -> None: + """Should parse a full API response item with all fields.""" + api_item = { + "id": "uuid-1", + "fileName": "report.docx", + "url": "https://neon-project-analysis.s3.amazonaws.com/presigned", + "hash": "sha256hexstring", + "mimeType": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "size": 1234567, + "status": "PROCESSED", + "uploadedAt": "2025-12-03T17:02:50.916Z", + "imageAnalysisStatus": None, + "imageAnalysisConfidence": None, + "imageAnalysisReasoning": None, + "imageAnalysisDate": None, + "category": "financial", + "categoryConfidence": 88.5, + "categorySource": "llm", + "categoryClassifiedAt": "2025-12-05T08:00:00.000Z", + "isIgnored": True, + "projectId": "proj-uuid", + } + + doc = BricksDocumentInfo(**api_item) + + assert doc.id == "uuid-1" + assert doc.file_name == "report.docx" + assert doc.hash == "sha256hexstring" + assert ( + doc.mime_type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ) + assert doc.size == 1234567 + assert doc.category == "financial" + assert doc.category_confidence == 88.5 + assert doc.category_source == "llm" + assert doc.category_classified_at == "2025-12-05T08:00:00.000Z" + assert doc.is_ignored is True + assert doc.project_id == "proj-uuid" + + def test_requires_id_file_name_and_url(self) -> None: + """Should raise ValidationError when required fields are missing.""" + with pytest.raises(ValidationError): + BricksDocumentInfo(id="abc", fileName="test.pdf") # missing url + + +class TestSectionVersionResult: + """Tests for the SectionVersionResult model.""" + + def test_creates_minimal_result(self) -> None: + result = SectionVersionResult(success=True) + + assert result.success is True + assert result.message == "" + assert result.data is None + assert result.dry_run is False + assert result.payload_preview is None + + def test_creates_with_all_fields(self) -> None: + result = SectionVersionResult( + success=True, + message="Published successfully", + data={"id": "sv-123"}, + dry_run=True, + payload_preview={"section_key": "intro", "content": "Hello"}, + ) + + assert result.success is True + assert result.message == "Published successfully" + assert result.data == {"id": "sv-123"} + assert result.dry_run is True + assert result.payload_preview == {"section_key": "intro", "content": "Hello"} + + def test_dry_run_result(self) -> None: + result = SectionVersionResult( + success=True, + message="Dry run — no API call made", + dry_run=True, + payload_preview={"key": "value"}, + ) + + assert result.dry_run is True + assert result.data is None + assert result.payload_preview is not None + + +class TestBricksApiPortAbstract: + """Tests for the BricksApiPort abstract base class.""" + + def test_cannot_instantiate_directly(self) -> None: + """BricksApiPort is abstract and cannot be instantiated.""" + with pytest.raises(TypeError): + BricksApiPort() # type: ignore[abstract] + + def test_is_abc_subclass(self) -> None: + """BricksApiPort should inherit from ABC.""" + assert issubclass(BricksApiPort, ABC) + + def test_concrete_subclass_must_implement_all_methods(self) -> None: + """A subclass that doesn't implement all abstract methods cannot be instantiated.""" + + class PartialAdapter(BricksApiPort): + async def list_project_documents( + self, project_id: str + ) -> list[BricksDocumentInfo]: + return [] + + with pytest.raises(TypeError): + PartialAdapter() # type: ignore[abstract] + + def test_concrete_subclass_can_be_instantiated(self) -> None: + """A subclass that implements all abstract methods can be instantiated.""" + + class FullAdapter(BricksApiPort): + async def list_project_documents( + self, project_id: str + ) -> list[BricksDocumentInfo]: + return [] + + async def download_document( + self, document_id: str, project_id: str + ) -> tuple[bytes, str]: + return (b"", "file.pdf") + + async def publish_section_version( + self, payload: dict + ) -> SectionVersionResult: + return SectionVersionResult(success=True) + + adapter = FullAdapter() + assert isinstance(adapter, BricksApiPort) + + def test_concrete_subclass_methods_are_callable(self) -> None: + """Concrete subclass methods should be callable.""" + mock = AsyncMock(spec=BricksApiPort) + mock.list_project_documents.return_value = [ + BricksDocumentInfo(id="1", fileName="a.pdf", url="https://s3/a.pdf") + ] + + assert mock.list_project_documents.return_value is not None diff --git a/tests/unit/test_kreuzberg_raganything_parser.py b/tests/unit/test_kreuzberg_raganything_parser.py index b1af91a..06db453 100644 --- a/tests/unit/test_kreuzberg_raganything_parser.py +++ b/tests/unit/test_kreuzberg_raganything_parser.py @@ -30,7 +30,9 @@ class TestKreuzbergRAGAnythingParserFormat: def _make_parser(self): return KreuzbergRAGAnythingParser() - def _mock_extraction_result(self, content="Hello world", tables=None, metadata=None): + def _mock_extraction_result( + self, content="Hello world", tables=None, metadata=None + ): result = MagicMock() result.content = content result.metadata = metadata or {} @@ -52,8 +54,12 @@ def _mock_table(self, markdown="| A | B |\n|---|---|", page_number=0): ("parse_image", "/tmp/photo.png", "Extracted image text"), ], ) - def test_parse_returns_content_list(self, mock_extract_sync, method, path, expected_content): - mock_extract_sync.return_value = self._mock_extraction_result(content=expected_content) + def test_parse_returns_content_list( + self, mock_extract_sync, method, path, expected_content + ): + mock_extract_sync.return_value = self._mock_extraction_result( + content=expected_content + ) parser = self._make_parser() result = getattr(parser, method)(path, output_dir="/tmp/output") @@ -87,8 +93,12 @@ def test_parse_pdf_with_tables(self, mock_extract_sync): ("/tmp/photo.png", "Image content"), ], ) - def test_parse_document_routes_files(self, mock_extract_sync, path, expected_content): - mock_extract_sync.return_value = self._mock_extraction_result(content=expected_content) + def test_parse_document_routes_files( + self, mock_extract_sync, path, expected_content + ): + mock_extract_sync.return_value = self._mock_extraction_result( + content=expected_content + ) parser = self._make_parser() result = parser.parse_document(path, method="auto", output_dir="/tmp/output") @@ -98,7 +108,9 @@ def test_parse_document_routes_files(self, mock_extract_sync, path, expected_con @patch("infrastructure.rag.kreuzberg_raganything_parser.extract_file_sync") @pytest.mark.parametrize("content", ["", " \n\n "]) - def test_empty_or_whitespace_content_returns_empty_list(self, mock_extract_sync, content): + def test_empty_or_whitespace_content_returns_empty_list( + self, mock_extract_sync, content + ): mock_extract_sync.return_value = self._mock_extraction_result(content=content) parser = self._make_parser() result = parser.parse_pdf("/tmp/empty.pdf", output_dir="/tmp/output") diff --git a/tests/unit/test_list_bricks_documents_use_case.py b/tests/unit/test_list_bricks_documents_use_case.py new file mode 100644 index 0000000..53f0cf1 --- /dev/null +++ b/tests/unit/test_list_bricks_documents_use_case.py @@ -0,0 +1,97 @@ +"""Tests for ListBricksDocumentsUseCase — simple delegation to BricksApiPort.""" + +from unittest.mock import AsyncMock + +import pytest + +from application.use_cases.list_bricks_documents_use_case import ( + ListBricksDocumentsUseCase, +) +from domain.ports.bricks_api_port import BricksDocumentInfo + + +class TestListBricksDocumentsUseCase: + """Tests for ListBricksDocumentsUseCase.""" + + @pytest.fixture + def use_case(self, mock_bricks_api: AsyncMock) -> ListBricksDocumentsUseCase: + return ListBricksDocumentsUseCase(bricks_api=mock_bricks_api) + + async def test_execute_delegates_to_port( + self, + use_case: ListBricksDocumentsUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """Should delegate to bricks_api.list_project_documents.""" + await use_case.execute(project_id="proj-123") + + mock_bricks_api.list_project_documents.assert_called_once_with( + project_id="proj-123" + ) + + async def test_execute_returns_list_of_document_infos( + self, + use_case: ListBricksDocumentsUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """Should return the list of BricksDocumentInfo from the port.""" + expected = [ + BricksDocumentInfo( + id="doc-1", + fileName="report.pdf", + url="https://s3.example.com/doc1.pdf", + mimeType="application/pdf", + size=1024, + status="PROCESSED", + ), + BricksDocumentInfo( + id="doc-2", + fileName="notes.docx", + url="https://s3.example.com/doc2.docx", + ), + ] + mock_bricks_api.list_project_documents.return_value = expected + + result = await use_case.execute(project_id="proj-123") + + assert len(result) == 2 + assert result[0].file_name == "report.pdf" + assert result[1].file_name == "notes.docx" + + async def test_execute_returns_empty_list( + self, + use_case: ListBricksDocumentsUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """Should return empty list when no documents exist.""" + mock_bricks_api.list_project_documents.return_value = [] + + result = await use_case.execute(project_id="empty-project") + + assert result == [] + + async def test_execute_propagates_errors( + self, + mock_bricks_api: AsyncMock, + ) -> None: + """Should propagate errors from the API port.""" + mock_bricks_api.list_project_documents.side_effect = ConnectionError( + "API unreachable" + ) + use_case = ListBricksDocumentsUseCase(bricks_api=mock_bricks_api) + + with pytest.raises(ConnectionError, match="API unreachable"): + await use_case.execute(project_id="proj-123") + + async def test_execute_propagates_timeout( + self, + mock_bricks_api: AsyncMock, + ) -> None: + """Should propagate TimeoutError from the API port.""" + mock_bricks_api.list_project_documents.side_effect = TimeoutError( + "Request timed out" + ) + use_case = ListBricksDocumentsUseCase(bricks_api=mock_bricks_api) + + with pytest.raises(TimeoutError): + await use_case.execute(project_id="proj-123") diff --git a/tests/unit/test_mcp_bricks_tools.py b/tests/unit/test_mcp_bricks_tools.py new file mode 100644 index 0000000..80e4c23 --- /dev/null +++ b/tests/unit/test_mcp_bricks_tools.py @@ -0,0 +1,322 @@ +"""Tests for mcp_bricks_tools.py — Bricks MCP tools registered with FastMCP. + +Follows the exact pattern of test_mcp_file_tools.py. +""" + +from unittest.mock import AsyncMock, patch + +import pytest +from fastmcp.exceptions import ToolError + +from application.api.mcp_bricks_tools import ( + list_bricks_documents, + mcp_bricks, + publish_section_version, + read_bricks_document, +) +from domain.ports.bricks_api_port import BricksDocumentInfo, SectionVersionResult +from domain.ports.document_reader_port import DocumentContent, DocumentMetadata + + +class TestMCPBricksInstance: + """Verify the FastMCP instance configuration.""" + + def test_mcp_bricks_has_correct_name(self) -> None: + """mcp_bricks should be named 'RAGAnythingBricks'.""" + assert mcp_bricks.name == "RAGAnythingBricks" + + +class TestListBricksDocuments: + """Tests for the list_bricks_documents MCP tool.""" + + @pytest.fixture + def mock_documents(self) -> list[BricksDocumentInfo]: + return [ + BricksDocumentInfo( + id="doc-1", + fileName="report.pdf", + url="https://s3.example.com/doc1.pdf", + mimeType="application/pdf", + size=1024, + status="PROCESSED", + ), + BricksDocumentInfo( + id="doc-2", + fileName="notes.docx", + url="https://s3.example.com/doc2.docx", + mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + size=2048, + status="PROCESSED", + ), + ] + + async def test_returns_document_list( + self, mock_documents: list[BricksDocumentInfo] + ) -> None: + """Should call use_case.execute and return document list.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_documents + + with patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ): + result = await list_bricks_documents(project_unique_id="proj-123") + + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].file_name == "report.pdf" + assert result[0].size == 1024 + + async def test_calls_use_case_with_project_id( + self, mock_documents: list[BricksDocumentInfo] + ) -> None: + """Should forward project_unique_id to the use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_documents + + with patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ): + await list_bricks_documents(project_unique_id="my-project") + + mock_use_case.execute.assert_called_once_with(project_id="my-project") + + async def test_returns_empty_list_when_no_documents(self) -> None: + """Should return empty list when no documents exist.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = [] + + with patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ): + result = await list_bricks_documents(project_unique_id="empty-proj") + + assert result == [] + + async def test_raises_tool_error_for_api_failure(self) -> None: + """Should convert API errors to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = ConnectionError("API unreachable") + + with ( + patch( + "application.api.mcp_bricks_tools.get_list_bricks_documents_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to list bricks documents"), + ): + await list_bricks_documents(project_unique_id="proj-123") + + +class TestReadBricksDocument: + """Tests for the read_bricks_document MCP tool.""" + + @pytest.fixture + def mock_document_content(self) -> DocumentContent: + return DocumentContent( + content="Extracted text from bricks document.", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + + async def test_returns_file_content_response( + self, mock_document_content: DocumentContent + ) -> None: + """Should call use_case.execute and return FileContentResponse.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_document_content + + with patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ): + result = await read_bricks_document( + document_id="doc-abc123", project_unique_id="proj-456" + ) + + assert result.content == "Extracted text from bricks document." + assert result.metadata.mime_type == "application/pdf" + + async def test_raises_tool_error_for_download_failure(self) -> None: + """Should convert download errors to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = ConnectionError("S3 download failed") + + with ( + patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to read bricks document"), + ): + await read_bricks_document( + document_id="doc-expired", project_unique_id="proj-1" + ) + + async def test_raises_tool_error_for_generic_failure(self) -> None: + """Should convert generic exceptions to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = Exception("Unexpected error") + + with ( + patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to read bricks document"), + ): + await read_bricks_document(document_id="doc-broken", project_unique_id="proj-1") + + async def test_includes_tables_in_response(self) -> None: + """Should include tables in the response.""" + from domain.ports.document_reader_port import TableData + + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = DocumentContent( + content="Report with table", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[TableData(markdown="| A | B |\n|---|---|")], + ) + + with patch( + "application.api.mcp_bricks_tools.get_read_bricks_document_use_case", + return_value=mock_use_case, + ): + result = await read_bricks_document( + document_id="doc-table", project_unique_id="proj-1" + ) + + assert len(result.tables) == 1 + assert result.tables[0].markdown == "| A | B |\n|---|---|" + + +class TestPublishSectionVersion: + """Tests for the publish_section_version MCP tool.""" + + async def test_returns_section_version_result(self) -> None: + """Should call use_case.execute and return SectionVersionResult.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = SectionVersionResult( + success=True, + message="Dry run — no API call made", + dry_run=True, + payload_preview={"section_key": "intro", "content": "Hello"}, + ) + + with patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ): + result = await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello world", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert result.success is True + assert result.dry_run is True + + async def test_forward_all_params_to_use_case(self) -> None: + """Should forward all parameters to the use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = SectionVersionResult(success=True) + + with patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ): + await publish_section_version( + project_unique_id="proj-abc", + section_key="summary", + content="Summary content", + workflow_id="wf-final", + workflow_name="final_review", + workflow_metadata={"approved": True}, + ) + + mock_use_case.execute.assert_called_once_with( + project_unique_id="proj-abc", + section_key="summary", + content="Summary content", + workflow_id="wf-final", + workflow_name="final_review", + workflow_metadata={"approved": True}, + ) + + async def test_raises_tool_error_for_api_failure(self) -> None: + """Should convert API errors to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = ConnectionError("API connection failed") + + with ( + patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to publish section version"), + ): + await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + async def test_raises_tool_error_for_generic_failure(self) -> None: + """Should convert generic exceptions to ToolError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = Exception("Unexpected error") + + with ( + patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ), + pytest.raises(ToolError, match="Failed to publish section version"), + ): + await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + async def test_returns_dry_run_result_with_preview(self) -> None: + """Should return payload_preview when dry_run is True.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = SectionVersionResult( + success=True, + message="Dry run", + dry_run=True, + payload_preview={ + "project_unique_id": "proj-123", + "section_key": "intro", + "content": "Hello", + "workflow_id": "wf-1", + "workflow_name": "draft", + "workflow_metadata": {}, + }, + ) + + with patch( + "application.api.mcp_bricks_tools.get_publish_section_version_use_case", + return_value=mock_use_case, + ): + result = await publish_section_version( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert result.dry_run is True + assert result.payload_preview is not None + assert result.payload_preview["section_key"] == "intro" diff --git a/tests/unit/test_mcp_classical_tools.py b/tests/unit/test_mcp_classical_tools.py index ba0c7ef..deb2887 100644 --- a/tests/unit/test_mcp_classical_tools.py +++ b/tests/unit/test_mcp_classical_tools.py @@ -112,6 +112,7 @@ async def test_returns_classical_query_response(self) -> None: ) import application.responses.classical_query_response as cqr + assert isinstance(result, cqr.McpClassicalRagResponse) assert result.rag_response == [] diff --git a/tests/unit/test_mcp_query_tools.py b/tests/unit/test_mcp_query_tools.py index a70319b..4d051f7 100644 --- a/tests/unit/test_mcp_query_tools.py +++ b/tests/unit/test_mcp_query_tools.py @@ -10,7 +10,10 @@ query_knowledge_base_multimodal, ) from application.requests.query_request import MultimodalContentItem -from application.responses.query_response import ChunkResponse, McpRagResponse, RagResponse +from application.responses.query_response import ( + McpRagResponse, + RagResponse, +) class TestMCPQueryInstance: diff --git a/tests/unit/test_publish_section_version_use_case.py b/tests/unit/test_publish_section_version_use_case.py new file mode 100644 index 0000000..363ab3a --- /dev/null +++ b/tests/unit/test_publish_section_version_use_case.py @@ -0,0 +1,206 @@ +"""Tests for PublishSectionVersionUseCase — payload construction and dry_run logic.""" + +from unittest.mock import AsyncMock + +import pytest + +from application.use_cases.publish_section_version_use_case import ( + PublishSectionVersionUseCase, +) +from domain.ports.bricks_api_port import SectionVersionResult + + +class TestPublishSectionVersionUseCase: + """Tests for PublishSectionVersionUseCase.""" + + @pytest.fixture + def use_case_dry_run( + self, mock_bricks_api: AsyncMock + ) -> PublishSectionVersionUseCase: + """Create a use case with dry_run=True (default).""" + return PublishSectionVersionUseCase( + bricks_api=mock_bricks_api, + dry_run=True, + ) + + @pytest.fixture + def use_case_live(self, mock_bricks_api: AsyncMock) -> PublishSectionVersionUseCase: + """Create a use case with dry_run=False.""" + return PublishSectionVersionUseCase( + bricks_api=mock_bricks_api, + dry_run=False, + ) + + async def test_dry_run_does_not_call_api( + self, + use_case_dry_run: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When dry_run=True, should NOT call bricks_api.publish_section_version.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello world", + workflow_id="wf-1", + workflow_name="draft", + workflow_metadata={"source": "test"}, + ) + + mock_bricks_api.publish_section_version.assert_not_called() + assert result.dry_run is True + + async def test_dry_run_returns_payload_preview( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """When dry_run=True, should return payload_preview in the result.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="summary", + content="Summary text", + workflow_id="wf-2", + workflow_name="review", + workflow_metadata={"version": 1}, + ) + + assert result.success is True + assert result.dry_run is True + assert result.payload_preview is not None + assert result.payload_preview["project_unique_id"] == "proj-123" + assert result.payload_preview["section_key"] == "summary" + assert result.payload_preview["content"] == "Summary text" + assert result.payload_preview["workflow_id"] == "wf-2" + assert result.payload_preview["workflow_name"] == "review" + assert result.payload_preview["workflow_metadata"] == {"version": 1} + + async def test_live_calls_bricks_api( + self, + use_case_live: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When dry_run=False, should call bricks_api.publish_section_version.""" + mock_bricks_api.publish_section_version.return_value = SectionVersionResult( + success=True, + message="Published", + data={"id": "sv-123"}, + ) + + result = await use_case_live.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello world", + workflow_id="wf-1", + workflow_name="draft", + workflow_metadata={}, + ) + + mock_bricks_api.publish_section_version.assert_called_once() + assert result.success is True + assert result.dry_run is False + + async def test_live_passes_correct_payload( + self, + use_case_live: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When dry_run=False, should pass the correctly constructed payload.""" + mock_bricks_api.publish_section_version.return_value = SectionVersionResult( + success=True, + message="OK", + ) + + await use_case_live.execute( + project_unique_id="proj-abc", + section_key="results", + content="Final results", + workflow_id="wf-final", + workflow_name="final_review", + workflow_metadata={"approved_by": "admin"}, + ) + + call_args = mock_bricks_api.publish_section_version.call_args + payload = call_args[1].get("payload", call_args[0][0] if call_args[0] else None) + if payload is None: + payload = call_args.kwargs.get("payload") + assert payload["project_unique_id"] == "proj-abc" + assert payload["section_key"] == "results" + assert payload["content"] == "Final results" + assert payload["workflow_id"] == "wf-final" + assert payload["workflow_name"] == "final_review" + assert payload["workflow_metadata"] == {"approved_by": "admin"} + + async def test_workflow_metadata_defaults_to_empty_dict( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """When workflow_metadata is None, should send {} in the payload.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert result.payload_preview["workflow_metadata"] == {} + + async def test_workflow_metadata_none_in_live_payload( + self, + use_case_live: PublishSectionVersionUseCase, + mock_bricks_api: AsyncMock, + ) -> None: + """When workflow_metadata is None in live mode, payload should contain {}.""" + mock_bricks_api.publish_section_version.return_value = SectionVersionResult( + success=True, + ) + + await use_case_live.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + call_args = mock_bricks_api.publish_section_version.call_args + payload = call_args[1].get("payload", call_args[0][0] if call_args[0] else None) + if payload is None: + payload = call_args.kwargs.get("payload") + assert payload["workflow_metadata"] == {} + + async def test_live_propagates_api_errors( + self, + mock_bricks_api: AsyncMock, + ) -> None: + """Should propagate errors from the API when publishing live.""" + mock_bricks_api.publish_section_version.side_effect = ConnectionError( + "API connection failed" + ) + use_case = PublishSectionVersionUseCase( + bricks_api=mock_bricks_api, + dry_run=False, + ) + + with pytest.raises(ConnectionError, match="API connection failed"): + await use_case.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + async def test_dry_run_returns_section_version_result( + self, + use_case_dry_run: PublishSectionVersionUseCase, + ) -> None: + """Dry run result should be a SectionVersionResult instance.""" + result = await use_case_dry_run.execute( + project_unique_id="proj-123", + section_key="intro", + content="Hello", + workflow_id="wf-1", + workflow_name="draft", + ) + + assert isinstance(result, SectionVersionResult) \ No newline at end of file diff --git a/tests/unit/test_read_bricks_document_use_case.py b/tests/unit/test_read_bricks_document_use_case.py new file mode 100644 index 0000000..50f91cd --- /dev/null +++ b/tests/unit/test_read_bricks_document_use_case.py @@ -0,0 +1,237 @@ +"""Tests for ReadBricksDocumentUseCase — follows exact pattern of test_read_file_use_case.py. + +Downloads from Bricks API → saves temp file → calls Kreuzberg extract → returns DocumentContent. +Ensures temp file cleanup on both success and error paths. +""" + +import os +from unittest.mock import AsyncMock + +import pytest + +from application.use_cases.read_bricks_document_use_case import ( + ReadBricksDocumentUseCase, +) +from domain.ports.document_reader_port import DocumentContent, DocumentMetadata + + +class TestReadBricksDocumentUseCase: + """Tests for ReadBricksDocumentUseCase.""" + + async def test_execute_downloads_from_bricks_api( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should call bricks_api.download_document with document_id and project_id.""" + mock_bricks_api.download_document.return_value = (b"file content", "report.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="extracted text", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(document_id="doc-123", project_id="proj-456") + + mock_bricks_api.download_document.assert_called_once_with( + document_id="doc-123", project_id="proj-456" + ) + + async def test_execute_returns_document_content( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should return DocumentContent from Kreuzberg extraction.""" + mock_bricks_api.download_document.return_value = (b"pdf data", "doc.pdf") + expected = DocumentContent( + content="extracted text from bricks document", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + mock_document_reader.extract_content.return_value = expected + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + result = await use_case.execute( + document_id="doc-456", project_id="proj-1" + ) + + assert result.content == "extracted text from bricks document" + assert result.metadata.format_type == "pdf" + assert result.metadata.mime_type == "application/pdf" + + async def test_execute_calls_document_reader_with_temp_file( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should save temp file and pass its path to document_reader.extract_content.""" + mock_bricks_api.download_document.return_value = (b"pdf binary", "report.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="text", + metadata=DocumentMetadata(format_type="pdf"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(document_id="doc-1", project_id="proj-1") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert tmp_file_path.endswith(".pdf") + assert os.path.dirname(tmp_file_path) == str(tmp_path) + + async def test_execute_temp_file_suffix_matches_filename_extension( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should use the filename extension from the download as temp file suffix.""" + mock_bricks_api.download_document.return_value = (b"data", "spreadsheet.xlsx") + mock_document_reader.extract_content.return_value = DocumentContent( + content="spreadsheet data", + metadata=DocumentMetadata(format_type="xlsx"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(document_id="doc-xlsx", project_id="proj-1") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert tmp_file_path.endswith(".xlsx") + + async def test_execute_propagates_download_failure( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should propagate errors when download fails.""" + mock_bricks_api.download_document.side_effect = ConnectionError( + "S3 download failed" + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + with pytest.raises(ConnectionError, match="S3 download failed"): + await use_case.execute(document_id="doc-missing", project_id="proj-1") + + async def test_execute_propagates_kreuzberg_failure( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should propagate errors when Kreuzberg extraction fails.""" + mock_bricks_api.download_document.return_value = (b"corrupt data", "broken.pdf") + mock_document_reader.extract_content.side_effect = ValueError( + "Unsupported format" + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + with pytest.raises(ValueError, match="Unsupported format"): + await use_case.execute(document_id="doc-broken", project_id="proj-1") + + async def test_execute_cleans_up_temp_file( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should delete the temp file after successful extraction.""" + mock_bricks_api.download_document.return_value = (b"data", "report.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="text", + metadata=DocumentMetadata(format_type="txt"), + tables=[], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + await use_case.execute(document_id="doc-report", project_id="proj-1") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert not os.path.exists(tmp_file_path) + + async def test_execute_cleans_up_temp_file_on_error( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should delete the temp file even when Kreuzberg extraction fails.""" + mock_bricks_api.download_document.return_value = (b"data", "report.pdf") + mock_document_reader.extract_content.side_effect = ValueError("bad format") + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + with pytest.raises(ValueError): + await use_case.execute(document_id="doc-1", project_id="proj-1") + + call_args = mock_document_reader.extract_content.call_args + tmp_file_path = call_args[0][0] + assert not os.path.exists(tmp_file_path) + + async def test_execute_with_tables( + self, + mock_bricks_api: AsyncMock, + mock_document_reader: AsyncMock, + tmp_path, + ) -> None: + """Should include tables in the returned DocumentContent.""" + from domain.ports.document_reader_port import TableData + + mock_bricks_api.download_document.return_value = (b"data", "financials.pdf") + mock_document_reader.extract_content.return_value = DocumentContent( + content="text with table", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[TableData(markdown="| A | B |\n|---|---|\n| 1 | 2 |")], + ) + use_case = ReadBricksDocumentUseCase( + bricks_api=mock_bricks_api, + document_reader=mock_document_reader, + output_dir=str(tmp_path), + ) + + result = await use_case.execute( + document_id="doc-financials", project_id="proj-1" + ) + + assert len(result.tables) == 1 + assert result.tables[0].markdown == "| A | B |\n|---|---|\n| 1 | 2 |" \ No newline at end of file