diff --git a/.env.example b/.env.example index e769191..690660f 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,12 @@ # Example environment variables for SmartBaseAI -# Copy this file to `.env` and adjust the values as needed +# Copy this file to `.env` and adjust values. +# Anything set here can be overridden at runtime via the admin Settings page +# (values saved there win over env vars). -# Secret key used for signing JWT tokens +# Required: JWT signing key SECRET_KEY=change-this-secret + +# LLM provider credentials (any or all) +# ANTHROPIC_API_KEY=sk-ant-... +# OPENAI_API_KEY=sk-... +# OLLAMA_BASE_URL=http://localhost:11434 diff --git a/.gitignore b/.gitignore index 87bddad..87aab64 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,11 @@ # Python cache files __pycache__/ *.pyc +.pytest_cache/ # Environment files .env +frontend/.env.local # Node dependencies node_modules/ @@ -11,3 +13,20 @@ node_modules/ # Build artifacts dist/ build/ +frontend/dist/ + +# Runtime state — never commit +vector_store/ +data/*.db +data/uploads/ +data/financebench/ +data/_downloads/ +embeddings.json + +# Virtual envs +venv/ +.venv/ + +# IDE +.idea/ +.vscode/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..569e571 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,63 @@ +# Changelog + +All notable changes to SmartBaseAI are documented here. Dates are approximate and derived from git history. + +## [Unreleased] — upgrade branch + +### Fixed +- **`SECRET_KEY` now read from the environment.** Both `api/routes_auth.py` and `api/auth_middleware.py` previously hardcoded `"super_secret"`, silently ignoring the `.env` value. A new `api/config.py` centralizes the setting and loads `.env` via `python-dotenv` when available. +- **File uploads are now ingested into the tenant vector store.** `POST /files/upload` previously only wrote the file to disk. Text formats (`.txt`, `.md`, `.csv`, `.log`) are now pushed through `TenantVectorStore.add_document` so uploaded content is immediately searchable by chat. Unsupported binary formats still save cleanly and log a skip. +- **Tenants created via the admin API are now immediately visible to the chat route.** `TenantManager` previously cached `tenants.json` in memory at construction time, so the `tenant_manager` instance owned by `routes_chat.py` never saw tenants created via the `manager` instance owned by `routes_admin.py` until the process restarted. Now `TenantManager` re-reads storage on every call. +- **Duplicate tenant / user creation returns `409 Conflict`** instead of surfacing a `ValueError` as a generic `500`. + +### Added +- `scripts/smoke_test.py` — end-to-end test that exercises auth, admin, tenant CRUD, user CRUD, file upload with ingestion, chat RAG path, and chat DB-lookup path against a live server. +- `docs/architecture.md` with a Mermaid diagram of the orchestration flow. +- Recruiter-facing `README.md` rewrite — hero, elevator pitch, quickstart, tech stack, contact. +- `TODO.md` for out-of-scope ideas surfaced during polish. + +### Changed +- `python-dotenv` added to `requirements.txt`. + +--- + +## Historical milestones (from git log) + +### Auth and multi-tenancy +- JWT auth with role-based access (`super_admin` / `admin` / `user`) +- Multi-tenant user model scoped by `tenant_id` +- Legacy plaintext password migration with bcrypt rehash on verify +- Schema migrations for legacy `users` tables (rename `password` → `hashed_password`, backfill timestamps) + +### Retrieval + orchestration +- `ResponseGenerator` introduced as the three-source orchestrator (history + DB + RAG) +- Fusion layer — DB-preferred with RAG as supplemental context +- Persistent per-tenant Chroma vector store via `TenantVectorStore` +- Hybrid search (keyword ∪ semantic) returned in Chroma-compatible format +- `exact_lookup` fast path for date-keyed structured queries (`data/.db`) +- CUDA auto-detected for `sentence-transformers` with CPU fallback + +### Model backends +- Ollama integration with real HTTP calls and streaming-JSON handling +- OpenAI and Anthropic model wrappers under `ai/models/` +- Local Llama support + +### Data ingestion +- `ETLManager` with pluggable connectors (Postgres / MySQL / MongoDB / HTTP API) +- Cleaners + metadata generator +- Script to load CSV data into a tenant's structured sample DB + +### API +- FastAPI app with routers for auth, chat, admin, and files +- CORS middleware +- Conversation + audit log repositories (SQLite) +- Tenant listing and per-tenant chat session management + +### Frontend +- React 19 + Vite + Tailwind frontend under `frontend/` +- Login, chat, files, and admin pages with role-aware views +- Axios client with configurable `VITE_API_BASE_URL` +- Tenant selection for super-admins, persistent chat sessions + +### Testing +- `pytest` suites for AI pipeline, API routes, ingestion, hybrid search, and user migration diff --git a/README.md b/README.md index ef87b81..14bc4ce 100644 --- a/README.md +++ b/README.md @@ -1,153 +1,165 @@ -![SmartBaseAI Logo](logo.png) +

+ SmartBaseAI +

-SmartBaseAI is an open-source starter kit for building multi-tenant chat -applications. It includes a FastAPI backend, simple Next.js frontends and a -collection of scripts for managing tenants and embeddings. +

SmartBaseAI

-## Environment setup +

+ A multi-tenant LLM platform that grounds answers in both your structured databases and your unstructured documents. +

-Before running the API create a `.env` file in the project root. You can start by copying the provided example: +

+ Quickstart · + Architecture · + Demo · + Tech stack · + Contact +

-```bash -cp .env.example .env -``` - -Edit the file and set `SECRET_KEY` to a secure value that will be used for signing JWT tokens. Any variables defined in `.env` are read by the backend on startup. +--- -## Backend setup +## What is it -Install Python dependencies and run the API server: +SmartBaseAI is an open-source backend + web UI for building chat experiences over private knowledge bases. Unlike a vanilla RAG starter, every chat turn runs through an **orchestrator** that merges three sources before calling the LLM: -```bash -pip install -r requirements.txt -python scripts/run_server.py --reload -``` +1. **Conversation history** — maintained per session. +2. **Exact structured lookups** — the orchestrator detects entities in the user's message (e.g. an ISO date) and pulls the matching row from the tenant's SQL / Mongo / API source. +3. **Hybrid RAG** — per-tenant Chroma store with keyword ∪ semantic retrieval over uploaded documents. -The server listens on port `8000` by default. +Each tenant is isolated: its own persistent vector store, its own DB connector, its own model backend. LLMs are pluggable — **OpenAI, Anthropic, Ollama, or a local Llama** — so a tenant can run fully on-prem or fully hosted without code changes. -## Frontend setup +Built for environments where "hallucinate a close price" is not acceptable: the exact DB row is preferred, RAG is treated as supplemental context, and the fallback is an explicit `"No information"` rather than a fabricated answer. -Two Next.js applications are provided. Each must be started separately. +## Demo -### Chat interface + +

Demo GIF coming soon — see docs/screenshots/.

-```bash -cd ui/web -npm install -npm run dev -``` +## Quickstart -### Admin panel +**Requirements:** Python 3.10+, Node 18+, optionally a running Ollama instance (`ollama pull llama3`) or an OpenAI / Anthropic API key. ```bash -cd ui/admin_panel -npm install -npm run dev -``` - -## Run the web app - -Start the API server and a frontend to use SmartBaseAI in the browser. +# 1. Clone +git clone https://github.com/danrixd/smartbaseai.git +cd smartbaseai -```bash -# terminal 1: backend -python scripts/run_server.py --reload - -# terminal 2: chat interface -cd ui/web -npm install -npm run dev -``` +# 2. Configure +cp .env.example .env +# edit .env: set SECRET_KEY and (optionally) OPENAI_API_KEY / ANTHROPIC_API_KEY -The chat interface is available at . To manage tenants, run the admin panel in another terminal: +# 3. Backend +pip install -r requirements.txt +python scripts/run_server.py --reload # http://localhost:8000 -```bash -cd ui/admin_panel +# 4. Frontend (in a second terminal) +cd frontend npm install -npm run dev -- -p 3001 +npm run dev # http://localhost:5173 ``` -Visit for administrative tasks. +Open , log in, pick a tenant, and start chatting. The default admin credentials live in `.env.example` — change them before exposing the service. -## Utility scripts - -### Create a tenant +### Creating a tenant ```bash -python scripts/setup_tenant.py tenant1 --name "Tenant 1" --db-type postgres \ - --db-config '{"host": "localhost", "user": "app"}' +python scripts/setup_tenant.py tenant1 \ + --name "Tenant 1" \ + --db-type postgres \ + --db-config '{"host": "localhost", "user": "app"}' \ --model-type ollama --model-name llama3 ``` -### Build embeddings +### Ingesting documents ```bash -python scripts/build_embeddings.py --source docs/ --output embeddings.json \ +python scripts/build_embeddings.py \ + --source docs/ \ + --output embeddings.json \ --embedder local ``` -## Example API usage - -### Authenticate +### Running the tests ```bash -curl -X POST -H "Content-Type: application/json" \ - -d '{"username": "admin", "password": "ChangeThis123!"}' \ - http://localhost:8000/auth/login +pip install -r requirements.txt +pytest -q ``` -Use the `access_token` returned above when calling other endpoints. +## Architecture -### Send a chat message +```mermaid +flowchart LR + U[User] --> FE[React + Vite UI] + FE -->|JWT| API[FastAPI
auth · chat · admin · files] -```bash -curl -H "Authorization: Bearer " \ - -H "Content-Type: application/json" \ - -X POST http://localhost:8000/chat/message \ - -d '{"session_id": "s1", "tenant_id": "t1", "message": "hello"}' -``` + API --> ORCH[ResponseGenerator
orchestrator] -### Tenant operations + ORCH --> HIST[(Conversation
history)] + ORCH --> DB[(Tenant DB
Postgres · MySQL · Mongo · API)] + ORCH --> RAG[Hybrid retrieval
keyword ∪ semantic] -```bash -# list tenants -curl -H "Authorization: Bearer " http://localhost:8000/admin/tenants + RAG --> VS[(Per-tenant Chroma
MiniLM-L6-v2)] + + ORCH --> LLM{LLM backend} + LLM --> OAI[OpenAI] + LLM --> ANT[Anthropic] + LLM --> OLL[Ollama] + LLM --> LLA[Local Llama] -# get configuration for a tenant -curl -H "Authorization: Bearer " http://localhost:8000/admin/tenants/t1 + ING[ETL Manager] -->|clean + metadata| VS + SRC[(Source DBs / APIs)] --> ING ``` -## Testing +Full write-up: **[docs/architecture.md](docs/architecture.md)**. -Install the Python dependencies before running the test suite: +Key files if you want to read the code: -```bash -pip install -r requirements.txt -``` +- `chatbot/response_generator.py` — the three-source orchestrator. +- `ai/rag_pipeline.py` — tenant-aware RAG with FAISS fallback. +- `ai/vector_stores/chroma_store.py` — persistent per-tenant store with hybrid query. +- `ingestion/etl_manager.py` — pluggable DB connectors → clean → metadata → store. +- `api/app.py` — FastAPI entrypoint; routers under `api/routes_*.py`. + +## Tech stack -Run all unit tests with: +| Layer | Choice | +|--------------|------------------------------------------------------------------| +| Backend | Python 3.10+, FastAPI, Pydantic, JWT auth, pytest | +| Retrieval | ChromaDB (persistent), sentence-transformers `all-MiniLM-L6-v2`, FAISS fallback, hybrid keyword + semantic | +| LLMs | OpenAI, Anthropic, Ollama, local Llama — pluggable via `ai/models/` | +| Data sources | Postgres, MySQL, MongoDB, generic HTTP APIs | +| Frontend | React 19, Vite, Tailwind, React Router, Axios | +| Infra | GPU auto-detected for embeddings (CUDA → CPU fallback) | + +## Example API usage ```bash -pytest -q +# 1. Authenticate +curl -X POST -H "Content-Type: application/json" \ + -d '{"username": "admin", "password": "ChangeThis123!"}' \ + http://localhost:8000/auth/login + +# 2. Chat +curl -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -X POST http://localhost:8000/chat/message \ + -d '{"session_id": "s1", "tenant_id": "tenant1", "message": "What was the close on 2024-03-15?"}' ``` -## Extending the response generator +## Project status -The `ResponseGenerator` combines conversation history, structured data from a -tenant database and unstructured context from the vector store. Additional data -sources can be integrated by implementing new helper methods that fetch and -format their results before calling the language model. Each source should be -encapsulated in its own method and added to the prompt in -`chatbot/response_generator.py`. +Active personal project. See [CHANGELOG.md](CHANGELOG.md) for milestones and [TODO.md](TODO.md) for the backlog. -## Contributing +## Contact -Contributions are welcome! Please fork the repository and open a pull request -for any enhancements or bug fixes. For large changes, open an issue first to -discuss the proposed work. Make sure to run the test suite with `pytest -q` -before submitting a PR. +Built by **Dan Ringart** — algorithm developer, B.Sc. Physics (Tel Aviv University). Background in simulations, quant trading systems, and LLM-orchestrated platforms. -## License +- Website: [danringart.com](https://danringart.com) +- GitHub: [@danrixd](https://github.com/danrixd) + +Feedback, issues, and PRs welcome. -This project is licensed under the [MIT License](LICENSE). +## License +MIT — see [LICENSE](LICENSE). diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..2a07f83 --- /dev/null +++ b/TODO.md @@ -0,0 +1,32 @@ +# TODO + +Ideas surfaced during the upgrade / polish pass. None of these are scoped for the current branch — they're a backlog, not a plan. + +## Polish still to do +- Capture a demo GIF of the running app (login → upload doc → chat → DB-lookup answer) and embed in the README. +- Add 3–5 PNG screenshots to `docs/screenshots/` (login, chat, files, admin). +- Run `gh repo edit` to set topics and description (commands drafted in `docs/gh-repo-edit.sh`). +- Consider a public demo instance on Fly.io / Railway and link from README. + +## Bugs worth fixing +- `datetime.utcnow()` is deprecated and spams 70+ warnings during test runs. Migrate to `datetime.now(UTC)` in `db/*_repository.py`, `api/routes_auth.py`, and `ingestion/metadata_generator.py`. +- `requirements.txt` has no version pins — reproducibility will drift. Pin or move to `pyproject.toml` + `uv lock`. +- `ui/web` and `ui/admin_panel` are stale Next.js skeletons that are no longer referenced — delete or clearly mark as deprecated. +- `config/tenant_config.py` is imported by `db/query_engine.py` but the **actual** orchestrator uses `exact_lookup` which reads `data/.db` directly and ignores tenant config. These two code paths should converge. +- CORS `origins` in `api/app.py` only includes `http://localhost:5173` — add `http://127.0.0.1:5173` and make configurable via env. +- `SECRET_KEY` still has a `"super_secret"` fallback for test ergonomics. Production mode should refuse to start without one. + +## Feature ideas +- Binary file ingestion: PDF → text (pdfminer / pypdf), DOCX → text (python-docx), then push through the same ingest path as `.txt`. +- Chunking strategy: current upload ingestion stores the **whole file** as one document. Should chunk on ~500 token windows with overlap for better retrieval. +- Streaming responses from the LLM back to the frontend (SSE). +- Model routing: allow a tenant to use different models for cheap/retrieval vs. final generation. +- Intent recognition is a stub — wire `chatbot/intent_recognition.py` into the orchestrator so non-RAG intents (e.g. "show me my files") route to structured handlers instead of the LLM. +- Audit log viewer in the admin UI. +- Rate limiting per tenant. +- Switch SQLite system DB to a proper Postgres for production deploys. + +## Tests worth adding +- End-to-end test that exercises the full three-source fusion (DB + RAG + history) — currently `tests/test_api.py` covers each in isolation but not the merge. +- Test for the tenant-visibility fix: create tenant via admin route, immediately chat under it, assert 200 not 404. +- Test for upload → chat retrieval roundtrip (the new auto-ingest path). diff --git a/ai/chunking.py b/ai/chunking.py new file mode 100644 index 0000000..0d33e34 --- /dev/null +++ b/ai/chunking.py @@ -0,0 +1,154 @@ +"""Text chunking for vault ingestion. + +Two-tier strategy: + +1. **Section split** — if the document has markdown headings (``^##``, + ``^###``, etc.), split at heading boundaries and chunk each section + independently. This keeps 10-K sections like *Risk Factors*, *MD&A*, + or *Consolidated Balance Sheet* in their own chunks with the section + title as the first line. + +2. **Size-bounded packing** — within each section, pack paragraphs until + ~``target_tokens`` words, respecting a ``MAX_CHUNK_CHARS`` hard cap. + Oversized paragraphs (pypdf table blobs with no line breaks) are + force-split into whitespace-aligned slices so the embedder never sees + anything larger than the cap. + +Chunk metadata shape is just a string. The caller is responsible for +attaching retrieval metadata (``{filename, section, chunk_idx, ...}``) +when it adds the chunk to a vector store. This module only cares about +producing well-shaped chunks. +""" + +from __future__ import annotations + +import re + +MAX_CHUNK_CHARS = 2000 +HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE) + + +def _hard_split(s: str, max_chars: int) -> list[str]: + """Force-split an oversize blob that has no paragraph breaks.""" + parts: list[str] = [] + i = 0 + while i < len(s): + end = min(i + max_chars, len(s)) + if end < len(s): + window_start = end - max_chars // 10 + slice_ = s[window_start:end] + m = re.search(r"\s(?=\S*$)", slice_) + if m: + end = window_start + m.start() + parts.append(s[i:end].strip()) + i = end + return [p for p in parts if p] + + +def _pack_paragraphs(paragraphs: list[str], target_tokens: int) -> list[str]: + """Greedy-pack paragraphs into chunks ~target_tokens words, capped by chars.""" + chunks: list[str] = [] + buf: list[str] = [] + size_words = 0 + size_chars = 0 + for p in paragraphs: + if len(p) > MAX_CHUNK_CHARS: + # Flush current buffer, then emit the hard-split pieces one at a time + if buf: + chunks.append("\n\n".join(buf)) + buf = [] + size_words = 0 + size_chars = 0 + for piece in _hard_split(p, MAX_CHUNK_CHARS): + chunks.append(piece[:MAX_CHUNK_CHARS]) + continue + words = len(p.split()) + added_chars = len(p) + 2 + if buf and (size_words + words > target_tokens or size_chars + added_chars > MAX_CHUNK_CHARS): + chunks.append("\n\n".join(buf)) + buf = [p] + size_words = words + size_chars = len(p) + else: + buf.append(p) + size_words += words + size_chars += added_chars + if buf: + chunks.append("\n\n".join(buf)) + return [c for c in chunks if c.strip()] + + +def _split_into_sections(text: str) -> list[tuple[str, str]]: + """Split a markdown document into [(section_title, section_body), ...]. + + If there are no ``##`` headings at all, returns a single untitled + section containing the whole document. This preserves the old flat + behaviour for tenants whose files aren't markdown-with-headings. + """ + matches = list(HEADING_RE.finditer(text)) + if not matches: + return [("", text)] + + sections: list[tuple[str, str]] = [] + # Preamble before the first heading (if any) is kept as an untitled section + first_start = matches[0].start() + if first_start > 0: + preamble = text[:first_start].strip() + if preamble: + sections.append(("", preamble)) + + for idx, m in enumerate(matches): + title = m.group(2).strip() + body_start = m.end() + body_end = matches[idx + 1].start() if idx + 1 < len(matches) else len(text) + body = text[body_start:body_end].strip() + if not body: + continue + sections.append((title, body)) + return sections + + +def chunk_text(text: str, target_tokens: int = 500) -> list[str]: + """Return a list of chunks (pure strings) ready for the embedder. + + Each chunk is prefixed with the section title (as a bold line) when + the document has markdown headings — this biases semantic similarity + toward matching the section name. + """ + sections = _split_into_sections(text) + out: list[str] = [] + for title, body in sections: + paragraphs = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()] + if not paragraphs: + continue + packed = _pack_paragraphs(paragraphs, target_tokens) + for chunk in packed: + if title: + out.append(f"**{title}**\n\n{chunk}"[:MAX_CHUNK_CHARS]) + else: + out.append(chunk[:MAX_CHUNK_CHARS]) + return out + + +def chunk_with_sections(text: str, target_tokens: int = 500) -> list[dict]: + """Like ``chunk_text`` but returns rich dicts with section metadata. + + Output: ``[{"text": str, "section": str, "chunk_idx": int}, ...]`` + """ + sections = _split_into_sections(text) + out: list[dict] = [] + for title, body in sections: + paragraphs = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()] + if not paragraphs: + continue + packed = _pack_paragraphs(paragraphs, target_tokens) + for i, chunk in enumerate(packed): + full = f"**{title}**\n\n{chunk}" if title else chunk + out.append( + { + "text": full[:MAX_CHUNK_CHARS], + "section": title, + "chunk_idx": len(out), + } + ) + return out diff --git a/ai/models/anthropic_model.py b/ai/models/anthropic_model.py index a3f5a19..7bee2ef 100644 --- a/ai/models/anthropic_model.py +++ b/ai/models/anthropic_model.py @@ -1,14 +1,139 @@ -"""Simplified Anthropic model wrapper.""" +"""Anthropic Claude model wrapper. +Uses the official ``anthropic`` Python SDK. Defaults to ``claude-opus-4-6`` with +adaptive thinking and applies ``cache_control: ephemeral`` on the stable system +prompt so repeated turns in a session can read from the prompt cache. + +If the SDK is not installed or no API key is available, ``generate`` returns +a clearly-marked stub so the pipeline stays functional in offline / demo mode +without silently pretending to be connected. +""" + +from __future__ import annotations + +import logging from typing import Any +from db import settings_repository + +logger = logging.getLogger("smartbaseai.anthropic") + +DEFAULT_MODEL = "claude-opus-4-6" +DEFAULT_MAX_TOKENS = 1024 +DEFAULT_SYSTEM_PROMPT = ( + "You are SmartBaseAI, a multi-tenant assistant that answers user questions " + "using the Context block provided below. Ground every answer in the Context. " + "If the Context does not contain the answer, say you don't have that information " + "rather than guessing. Prefer exact numeric values from the structured data " + "section when present." +) + +try: + import anthropic # type: ignore +except Exception: # pragma: no cover - optional dependency + anthropic = None # type: ignore + class AnthropicModel: - """Generate text using the Anthropic API (mocked).""" + """Generate text using the Anthropic Messages API.""" - def __init__(self, api_key: str | None = None) -> None: - self.api_key = api_key + def __init__( + self, + api_key: str | None = None, + model_name: str = DEFAULT_MODEL, + max_tokens: int = DEFAULT_MAX_TOKENS, + **_: Any, + ) -> None: + self.api_key = api_key or settings_repository.get("anthropic_api_key") + self.model_name = model_name or DEFAULT_MODEL + self.max_tokens = max_tokens + self._client: "anthropic.Anthropic | None" = None + if anthropic is not None and self.api_key: + try: + self._client = anthropic.Anthropic(api_key=self.api_key) + except Exception: + self._client = None + + def _split_prompt(self, prompt: str) -> tuple[str, str]: + """Split a rendered prompt into (system_user_content). + + The orchestrator produces a single flat prompt string. For caching to + be effective we want the stable system header in ``system`` and the + per-turn Context + user question in ``messages``. We pass the whole + rendered prompt as the user turn and use a static system prompt — the + prompt itself already carries the Context. + """ + return DEFAULT_SYSTEM_PROMPT, prompt def generate(self, prompt: str, **kwargs: Any) -> str: - """Return a deterministic response for the prompt.""" - return f"[Anthropic] Response to: {prompt}" + if self._client is None: + missing = "SDK not installed" if anthropic is None else "ANTHROPIC_API_KEY not set" + return f"[Anthropic unavailable — {missing}] {self._preview(prompt)}" + + system_prompt, user_content = self._split_prompt(prompt) + try: + response = self._client.messages.create( + model=self.model_name, + max_tokens=self.max_tokens, + system=[ + { + "type": "text", + "text": system_prompt, + "cache_control": {"type": "ephemeral"}, + } + ], + messages=[{"role": "user", "content": user_content}], + ) + except anthropic.AuthenticationError as e: # type: ignore[attr-defined] + return f"[Anthropic auth error] {e}" + except anthropic.RateLimitError as e: # type: ignore[attr-defined] + return f"[Anthropic rate-limited] {e}" + except anthropic.APIError as e: # type: ignore[attr-defined] + return f"[Anthropic API error {getattr(e, 'status_code', '?')}] {e}" + except Exception as e: + return f"[Anthropic unexpected error] {e}" + + # Log prompt-cache usage so we can verify the cache_control block + # is actually being hit across turns. Operators care about the + # cache_read_input_tokens field — if it stays 0 across repeated + # queries, the prefix is changing somewhere (silent invalidator). + try: + usage = response.usage + logger.info( + "anthropic usage: input=%s cache_create=%s cache_read=%s output=%s (model=%s)", + getattr(usage, "input_tokens", None), + getattr(usage, "cache_creation_input_tokens", None), + getattr(usage, "cache_read_input_tokens", None), + getattr(usage, "output_tokens", None), + self.model_name, + ) + except Exception: + pass + + parts: list[str] = [] + for block in response.content: + if getattr(block, "type", None) == "text": + parts.append(block.text) + return "".join(parts).strip() or "[Anthropic returned no text]" + + @staticmethod + def _preview(prompt: str) -> str: + head = prompt.strip().splitlines() + return head[-1][:120] if head else "" + + @staticmethod + def ping(api_key: str | None = None) -> tuple[bool, str]: + """Quick connectivity check — used by ``/admin/models/status``.""" + if anthropic is None: + return False, "anthropic SDK not installed" + key = api_key or settings_repository.get("anthropic_api_key") + if not key: + return False, "no API key configured" + try: + client = anthropic.Anthropic(api_key=key) + client.models.list(limit=1) + return True, "ok" + except anthropic.AuthenticationError: # type: ignore[attr-defined] + return False, "invalid API key" + except Exception as e: + return False, f"{type(e).__name__}: {e}" diff --git a/ai/models/ollama_model.py b/ai/models/ollama_model.py index b6e2809..0494b7a 100644 --- a/ai/models/ollama_model.py +++ b/ai/models/ollama_model.py @@ -1,49 +1,80 @@ - """Wrapper for interacting with a local Ollama server.""" from __future__ import annotations -from typing import Any import json +from typing import Any + +from db import settings_repository + +DEFAULT_BASE_URL = "http://localhost:11434" -try: # requests may not be installed in all environments - import requests +try: + import requests # type: ignore except Exception: # pragma: no cover - dependency missing - requests = None # type: ignore[misc] + requests = None # type: ignore class OllamaModel: """Generate text using an Ollama model if available.""" - def __init__(self, model_name: str = "llama3.2", base_url: str = "http://localhost:11434") -> None: + def __init__( + self, + model_name: str = "llama3.2", + base_url: str | None = None, + **_: Any, + ) -> None: self.model_name = model_name - self.base_url = base_url.rstrip("/") + self.base_url = ( + base_url + or settings_repository.get("ollama_base_url") + or DEFAULT_BASE_URL + ).rstrip("/") def generate(self, prompt: str, **kwargs: Any) -> str: """Return a response from Ollama or a fallback message.""" - if requests is not None: - url = f"{self.base_url}/api/generate" - payload = {"model": self.model_name, "prompt": prompt, "stream": False} + if requests is None: + return "[Ollama unavailable — requests not installed]" + url = f"{self.base_url}/api/generate" + payload = {"model": self.model_name, "prompt": prompt, "stream": False} + try: + r = requests.post(url, json=payload, timeout=30) + r.raise_for_status() try: - r = requests.post(url, json=payload, timeout=10) - r.raise_for_status() - try: - data = r.json() - if isinstance(data, dict) and "response" in data: - return str(data["response"]).strip() - except ValueError: - text = "" - for line in r.text.splitlines(): - try: - obj = json.loads(line) - except Exception: - continue - text += obj.get("response", "") - if obj.get("done"): - break - if text: - return text.strip() - except Exception: # pragma: no cover - network failures or missing dep - pass - return f"[Ollama] Response to: {prompt}" + data = r.json() + if isinstance(data, dict) and "response" in data: + return str(data["response"]).strip() or "[Ollama returned no text]" + except ValueError: + text = "" + for line in r.text.splitlines(): + try: + obj = json.loads(line) + except Exception: + continue + text += obj.get("response", "") + if obj.get("done"): + break + if text: + return text.strip() + except Exception as e: # network failure, bad response, etc. + return f"[Ollama unreachable at {self.base_url}] {type(e).__name__}: {e}" + return "[Ollama returned unexpected response shape]" + @staticmethod + def ping(base_url: str | None = None) -> tuple[bool, str]: + """Quick connectivity check — used by ``/admin/models/status``.""" + if requests is None: + return False, "requests not installed" + url = ( + base_url + or settings_repository.get("ollama_base_url") + or DEFAULT_BASE_URL + ).rstrip("/") + "/api/tags" + try: + r = requests.get(url, timeout=3) + r.raise_for_status() + data = r.json() + models = data.get("models", []) if isinstance(data, dict) else [] + return True, f"ok ({len(models)} models available)" + except Exception as e: + return False, f"{type(e).__name__}: {e}" diff --git a/ai/models/openai_model.py b/ai/models/openai_model.py index c60127a..556257b 100644 --- a/ai/models/openai_model.py +++ b/ai/models/openai_model.py @@ -1,14 +1,91 @@ -"""Simplified OpenAI language model wrapper.""" +"""OpenAI GPT model wrapper. + +Uses the official ``openai`` Python SDK (>=1.0). If the SDK is missing or no +API key is available, ``generate`` returns a clearly-marked stub so the +pipeline stays functional in offline / demo mode. +""" + +from __future__ import annotations from typing import Any +from db import settings_repository + +DEFAULT_MODEL = "gpt-4o-mini" +DEFAULT_MAX_TOKENS = 1024 +DEFAULT_SYSTEM_PROMPT = ( + "You are SmartBaseAI, a multi-tenant assistant that answers user questions " + "using the Context block provided below. Ground every answer in the Context. " + "If the Context does not contain the answer, say you don't have that information " + "rather than guessing." +) + +try: + import openai # type: ignore +except Exception: # pragma: no cover - optional dependency + openai = None # type: ignore + class OpenAIModel: - """Generate text using the OpenAI API (mocked).""" + """Generate text using the OpenAI Chat Completions API.""" - def __init__(self, api_key: str | None = None) -> None: - self.api_key = api_key + def __init__( + self, + api_key: str | None = None, + model_name: str = DEFAULT_MODEL, + max_tokens: int = DEFAULT_MAX_TOKENS, + **_: Any, + ) -> None: + self.api_key = api_key or settings_repository.get("openai_api_key") + self.model_name = model_name or DEFAULT_MODEL + self.max_tokens = max_tokens + self._client: "openai.OpenAI | None" = None + if openai is not None and self.api_key: + try: + self._client = openai.OpenAI(api_key=self.api_key) + except Exception: + self._client = None def generate(self, prompt: str, **kwargs: Any) -> str: - """Return a deterministic response for the prompt.""" - return f"[OpenAI] Response to: {prompt}" + if self._client is None: + missing = "SDK not installed" if openai is None else "OPENAI_API_KEY not set" + return f"[OpenAI unavailable — {missing}]" + + try: + response = self._client.chat.completions.create( + model=self.model_name, + max_tokens=self.max_tokens, + messages=[ + {"role": "system", "content": DEFAULT_SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + ) + except openai.AuthenticationError as e: # type: ignore[attr-defined] + return f"[OpenAI auth error] {e}" + except openai.RateLimitError as e: # type: ignore[attr-defined] + return f"[OpenAI rate-limited] {e}" + except openai.APIError as e: # type: ignore[attr-defined] + return f"[OpenAI API error] {e}" + except Exception as e: + return f"[OpenAI unexpected error] {e}" + + try: + return (response.choices[0].message.content or "").strip() or "[OpenAI returned no text]" + except Exception: + return "[OpenAI returned unexpected response shape]" + + @staticmethod + def ping(api_key: str | None = None) -> tuple[bool, str]: + if openai is None: + return False, "openai SDK not installed" + key = api_key or settings_repository.get("openai_api_key") + if not key: + return False, "no API key configured" + try: + client = openai.OpenAI(api_key=key) + client.models.list() + return True, "ok" + except openai.AuthenticationError: # type: ignore[attr-defined] + return False, "invalid API key" + except Exception as e: + return False, f"{type(e).__name__}: {e}" diff --git a/ai/vector_stores/chroma_store.py b/ai/vector_stores/chroma_store.py index 18cedb0..3a5e62e 100644 --- a/ai/vector_stores/chroma_store.py +++ b/ai/vector_stores/chroma_store.py @@ -118,4 +118,131 @@ def hybrid_query(self, query: str, n_results: int = 3) -> dict: combined = list(dict.fromkeys(keyword_results + semantic_docs)) return {"documents": [combined[:n_results]]} + def store_info(self) -> dict: + """Return metadata about the underlying vector collection. + + Used by the RAG visualizer to render a "Vector store" panel alongside + the retrieval results — without this, there's no way to see how many + vectors were searched or which embedding model produced them. + """ + info: dict = { + "collection_name": getattr(self.collection, "name", "documents"), + "persist_path": self.persist_path, + "tenant_id": self.tenant_id, + "count": None, + "embedding_model": None, + "embedding_dim": None, + "device": None, + } + try: + info["count"] = int(self.collection.count()) + except Exception: + try: + # Fallback for the in-memory dummy collection + docs = self.collection.get(include=["documents"]) # type: ignore[arg-type] + info["count"] = len(docs.get("documents", [])) + except Exception: + info["count"] = 0 + + fn = getattr(self, "embedding_fn", None) + if fn is not None: + info["embedding_model"] = getattr(fn, "model_name", None) or type(fn).__name__ + try: + import torch # type: ignore + + info["device"] = ( + "cuda" if getattr(torch.cuda, "is_available", lambda: False)() else "cpu" + ) + except Exception: + info["device"] = "cpu" + try: + probe = fn(["__dim_probe__"]) + if probe and isinstance(probe[0], (list, tuple)): + info["embedding_dim"] = len(probe[0]) + except Exception: + pass + return info + + def hybrid_query_trace(self, query: str, n_results: int = 3) -> dict: + """Same as ``hybrid_query`` but returns the internal breakdown plus + store metadata so the visualizer can show a real vector-search panel. + + We deliberately pull *more* semantic candidates than the caller asks + for (``n_candidates = max(n_results * 4, 8)``) so the visualizer can + render a ranked list of what the vector DB considered, not just the + final top-K. + """ + keyword_results = self.keyword_search(query) + + info = self.store_info() + collection_size = info.get("count") or 0 + n_candidates = max(n_results * 4, 8) + if collection_size: + n_candidates = min(n_candidates, collection_size) + + semantic_docs: list[str] = [] + semantic_metas: list[dict] = [] + semantic_distances: list[float | None] = [] + try: + raw = self.collection.query( + query_texts=[query], + n_results=max(n_candidates, 1), + include=["documents", "metadatas", "distances"], + ) + docs = raw.get("documents", [[]]) or [[]] + metas = raw.get("metadatas", [[]]) or [[]] + dists = raw.get("distances", [[]]) or [[]] + if docs and isinstance(docs[0], list): + semantic_docs = list(docs[0]) + if metas and isinstance(metas[0], list): + semantic_metas = list(metas[0]) + if dists and isinstance(dists[0], list): + semantic_distances = list(dists[0]) + except Exception: + # Chroma may reject "include" on older versions or on empty stores. + fallback = self.query(query, n_results).get("documents", [[]]) + if fallback and isinstance(fallback[0], list): + semantic_docs = list(fallback[0]) + + # Pad metas / distances so per-doc zip works in the caller. + while len(semantic_metas) < len(semantic_docs): + semantic_metas.append({}) + while len(semantic_distances) < len(semantic_docs): + semantic_distances.append(None) + + keyword_entries = [ + {"document": doc, "metadata": {}, "score": None, "source": "keyword"} + for doc in keyword_results + ] + semantic_entries = [ + { + "document": doc, + "metadata": meta or {}, + "score": dist, + "source": "semantic", + } + for doc, meta, dist in zip(semantic_docs, semantic_metas, semantic_distances) + ] + + # Combined, deduplicated by document identity, preserving keyword order first. + seen: set[str] = set() + combined: list[dict] = [] + for entry in keyword_entries + semantic_entries: + key = entry["document"] + if key in seen: + continue + seen.add(key) + combined.append(entry) + combined = combined[:n_results] + + return { + "store": info, + "query": query, + "n_candidates": n_candidates, + "n_results": n_results, + "keyword": keyword_entries, + "semantic": semantic_entries, # full ranked candidate list + "combined": combined, # post-fusion top-K actually used + } + diff --git a/api/app.py b/api/app.py index bc04701..2f022ab 100644 --- a/api/app.py +++ b/api/app.py @@ -2,6 +2,9 @@ from fastapi.middleware.cors import CORSMiddleware from . import routes_chat, routes_admin, routes_auth, routes_files +from .logging_config import RequestIdMiddleware, configure_logging + +configure_logging() app = FastAPI(title="SmartBase API") @@ -16,7 +19,9 @@ allow_credentials=True, allow_methods=["*"], allow_headers=["*"], + expose_headers=["X-Request-ID"], ) +app.add_middleware(RequestIdMiddleware) app.include_router(routes_auth.router) app.include_router(routes_chat.router) diff --git a/api/auth_middleware.py b/api/auth_middleware.py index 146ed80..c95727c 100644 --- a/api/auth_middleware.py +++ b/api/auth_middleware.py @@ -3,9 +3,7 @@ import jwt from db import user_repository - -SECRET_KEY = "super_secret" -ALGORITHM = "HS256" +from api.config import SECRET_KEY, ALGORITHM security = HTTPBearer() diff --git a/api/config.py b/api/config.py new file mode 100644 index 0000000..e103b6a --- /dev/null +++ b/api/config.py @@ -0,0 +1,63 @@ +"""Runtime configuration loaded from ``.env``. + +This module is deliberately strict: for the provider credentials listed in +``ENV_ONLY_KEYS``, the ``.env`` file is treated as the **only** source of +truth. Shell/system environment variables leaking into the process are +explicitly unset after load so that removing a key from ``.env`` really +removes it, and so that tests or demos run against the committed ``.env`` +rather than whatever happens to be in the operator's shell. + +``SECRET_KEY`` itself still falls back to a default for test ergonomics. +""" + +from __future__ import annotations + +import os + +try: + from dotenv import dotenv_values, load_dotenv +except Exception: # pragma: no cover - optional dependency + dotenv_values = None # type: ignore[assignment] + load_dotenv = None # type: ignore[assignment] + +# Provider-credential keys that must come from .env only. Any pre-existing +# shell value for these is wiped so it can't be picked up by os.getenv() +# later. SECRET_KEY and JWT_ALGORITHM are intentionally NOT in this set — +# they fall through to shell env or the hard-coded fallback. +ENV_ONLY_KEYS = { + "ANTHROPIC_API_KEY", + "OPENAI_API_KEY", + "OLLAMA_BASE_URL", +} + + +def _load_env_strict() -> None: + """Load ``.env`` and strip any ENV_ONLY_KEYS that came from the shell. + + Idempotent — safe to call on every import. When ``python-dotenv`` is not + installed this degrades to a no-op and we fall back to whatever the shell + provides (which is fine for the dev fallback path; prod installs pin + ``python-dotenv`` in requirements.txt). + """ + if dotenv_values is None or load_dotenv is None: + return + + dotenv_map = dotenv_values(".env") or {} + + for key in ENV_ONLY_KEYS: + if key in dotenv_map: + # .env supplies a value — let load_dotenv push it into os.environ + # unconditionally, even if the shell already had one. + continue + # .env does NOT supply this key — make sure no shell value leaks. + os.environ.pop(key, None) + + # Now actually populate os.environ from .env. override=True ensures values + # in .env win over any shell value for keys that appear in .env. + load_dotenv(override=True) + + +_load_env_strict() + +SECRET_KEY: str = os.getenv("SECRET_KEY", "super_secret") +ALGORITHM: str = os.getenv("JWT_ALGORITHM", "HS256") diff --git a/api/logging_config.py b/api/logging_config.py new file mode 100644 index 0000000..521961f --- /dev/null +++ b/api/logging_config.py @@ -0,0 +1,93 @@ +"""Structured logging + request-id middleware. + +Every request gets a ``request_id`` (from the ``X-Request-ID`` header or a +freshly-generated ``uuid4``). The id is exposed via a ``contextvars.ContextVar`` +so any log statement inside the request produces a line tagged with the same +id, and is returned in the response as ``X-Request-ID`` for client-side +correlation. + +Log format is plain key=value (not JSON) to stay readable in a terminal +without sacrificing machine-parseability. Switch to JSON later by swapping +the formatter if you need it. +""" + +from __future__ import annotations + +import contextvars +import logging +import sys +import time +import uuid + +from fastapi import Request +from starlette.middleware.base import BaseHTTPMiddleware + +_request_id_var: contextvars.ContextVar[str] = contextvars.ContextVar( + "request_id", default="-" +) + + +def current_request_id() -> str: + return _request_id_var.get() + + +class RequestIdFilter(logging.Filter): + def filter(self, record: logging.LogRecord) -> bool: + record.request_id = _request_id_var.get() + return True + + +def configure_logging(level: int = logging.INFO) -> None: + """Install a root logger formatted as `time level rid=... logger: msg`. + + Idempotent — safe to call multiple times at process start. + """ + root = logging.getLogger() + # Wipe existing handlers so our config wins + for h in list(root.handlers): + root.removeHandler(h) + handler = logging.StreamHandler(sys.stdout) + fmt = logging.Formatter( + "%(asctime)s %(levelname)-5s rid=%(request_id)s %(name)s: %(message)s", + datefmt="%H:%M:%S", + ) + handler.setFormatter(fmt) + handler.addFilter(RequestIdFilter()) + root.addHandler(handler) + root.setLevel(level) + # Keep uvicorn access noise down a notch + logging.getLogger("uvicorn.access").setLevel(logging.WARNING) + logging.getLogger("uvicorn.error").setLevel(logging.INFO) + + +class RequestIdMiddleware(BaseHTTPMiddleware): + """Assign or propagate an X-Request-ID on every request + log timing.""" + + def __init__(self, app, logger_name: str = "smartbaseai.http") -> None: + super().__init__(app) + self._log = logging.getLogger(logger_name) + + async def dispatch(self, request: Request, call_next): + rid = request.headers.get("x-request-id") or uuid.uuid4().hex[:12] + token = _request_id_var.set(rid) + start = time.monotonic() + status = 500 + try: + response = await call_next(request) + status = response.status_code + response.headers["X-Request-ID"] = rid + return response + finally: + elapsed_ms = (time.monotonic() - start) * 1000 + path = request.url.path + if path not in ("/docs", "/openapi.json", "/redoc") and not path.startswith( + "/static" + ): + self._log.info( + "%s %s -> %s (%.1fms)", + request.method, + path, + status, + elapsed_ms, + ) + _request_id_var.reset(token) diff --git a/api/routes_admin.py b/api/routes_admin.py index e0a1dea..31417b7 100644 --- a/api/routes_admin.py +++ b/api/routes_admin.py @@ -5,8 +5,17 @@ from pydantic import BaseModel from tenants.tenant_manager import TenantManager -from db import user_repository, audit_log_repository +from db import ( + audit_log_repository, + settings_repository, + usage_repository, + user_repository, +) +from ai.vector_stores.chroma_store import TenantVectorStore from .auth_middleware import require_role +from ai.models.anthropic_model import AnthropicModel +from ai.models.openai_model import OpenAIModel +from ai.models.ollama_model import OllamaModel router = APIRouter(prefix="/admin", tags=["admin"]) manager = TenantManager() @@ -29,6 +38,18 @@ class UserUpdate(BaseModel): role: str | None = None +class SettingsUpdate(BaseModel): + anthropic_api_key: str | None = None + openai_api_key: str | None = None + ollama_base_url: str | None = None + + +class ModelTestRequest(BaseModel): + provider: str + api_key: str | None = None + base_url: str | None = None + + @router.get("/tenants") def list_tenants(user=Depends(require_role(["super_admin"]))): """Return all tenant identifiers (super admins only).""" @@ -49,7 +70,10 @@ def create_tenant(data: TenantData, user=Depends(require_role(["super_admin"]))) """Create a new tenant.""" if not data.tenant_id: raise HTTPException(status_code=400, detail="tenant_id required") - manager.create(data.tenant_id, data.config) + try: + manager.create(data.tenant_id, data.config) + except ValueError as e: + raise HTTPException(status_code=409, detail=str(e)) audit_log_repository.log_action(user["username"], "create_tenant", data.tenant_id) return {"status": "created"} @@ -59,7 +83,10 @@ def update_tenant(tenant_id: str, data: TenantData, user=Depends(require_role([" """Update an existing tenant configuration.""" if manager.get(tenant_id) is None: raise HTTPException(status_code=404, detail="Tenant not found") - manager.create(tenant_id, data.config) + try: + manager.update(tenant_id, data.config) + except KeyError as e: + raise HTTPException(status_code=404, detail=str(e)) audit_log_repository.log_action(user["username"], "update_tenant", tenant_id) return {"status": "updated"} @@ -89,7 +116,12 @@ def create_user(data: UserData, user=Depends(require_role(["super_admin", "admin data.tenant_id = user["tenant_id"] if data.role == "super_admin": raise HTTPException(status_code=403, detail="Cannot create super_admin") - user_repository.create_user(data.username, data.password, data.role, data.tenant_id) + if user_repository.get_user(data.username) is not None: + raise HTTPException(status_code=409, detail="User already exists") + try: + user_repository.create_user(data.username, data.password, data.role, data.tenant_id) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) audit_log_repository.log_action(user["username"], "create_user", data.username) return {"status": "created"} @@ -110,6 +142,137 @@ def update_user(username: str, data: UserUpdate, user=Depends(require_role(["sup return {"status": "updated"} +@router.get("/settings") +def get_settings(user=Depends(require_role(["super_admin"]))): + """Return masked runtime settings for the admin UI.""" + return settings_repository.all_masked() + + +@router.put("/settings") +def put_settings(data: SettingsUpdate, user=Depends(require_role(["super_admin"]))): + """Upsert runtime settings. Empty string clears a value (fall back to env).""" + payload = {k: v for k, v in data.model_dump().items() if v is not None} + settings_repository.set_many(payload) + audit_log_repository.log_action( + user["username"], "update_settings", ",".join(payload.keys()) + ) + return settings_repository.all_masked() + + +@router.get("/models/status") +def models_status(user=Depends(require_role(["user", "admin", "super_admin"]))): + """Live connectivity check for every supported model backend. + + Callable by any authenticated role so the sidebar indicator can show real + status for regular users too. Does not leak API keys. + """ + ollama_ok, ollama_detail = OllamaModel.ping() + openai_ok, openai_detail = OpenAIModel.ping() + anthropic_ok, anthropic_detail = AnthropicModel.ping() + return { + "ollama": {"ok": ollama_ok, "detail": ollama_detail}, + "openai": {"ok": openai_ok, "detail": openai_detail}, + "anthropic": {"ok": anthropic_ok, "detail": anthropic_detail}, + } + + +@router.post("/models/test") +def models_test( + data: ModelTestRequest, + user=Depends(require_role(["super_admin"])), +): + """Test a specific provider, optionally with an override key before saving.""" + provider = data.provider.lower() + if provider == "anthropic": + ok, detail = AnthropicModel.ping(api_key=data.api_key) + elif provider == "openai": + ok, detail = OpenAIModel.ping(api_key=data.api_key) + elif provider == "ollama": + ok, detail = OllamaModel.ping(base_url=data.base_url) + else: + raise HTTPException(status_code=400, detail=f"unknown provider: {provider}") + return {"provider": provider, "ok": ok, "detail": detail} + + +@router.get("/audit-log") +def get_audit_log( + limit: int = 200, + offset: int = 0, + username: str | None = None, + action: str | None = None, + user=Depends(require_role(["super_admin"])), +): + """Return recent audit events, newest first. Super_admin only.""" + return { + "total": audit_log_repository.count_logs(username=username, action=action), + "logs": audit_log_repository.list_logs( + limit=min(max(limit, 1), 1000), + offset=max(offset, 0), + username=username, + action=action, + ), + } + + +@router.get("/usage") +def get_usage_rollup( + limit: int = 60, + user=Depends(require_role(["super_admin"])), +): + """Per-day, per-tenant LLM usage + estimated cost.""" + return {"rollup": usage_repository.rollup(limit=min(max(limit, 1), 365))} + + +@router.get("/search") +def cross_tenant_search( + q: str, + tenants: str | None = None, + top_k: int = 5, + user=Depends(require_role(["super_admin"])), +): + """Super-admin cross-tenant retrieval. + + Runs the same query against the hybrid retriever for every tenant in + ``tenants`` (comma-separated) or every tenant if omitted, and returns a + flat list of hits with tenant tags. + """ + if not q.strip(): + raise HTTPException(status_code=400, detail="q is required") + target_tenants: list[str] + if tenants: + target_tenants = [t.strip() for t in tenants.split(",") if t.strip()] + else: + target_tenants = manager.list() + + results = [] + for tid in target_tenants: + try: + store = TenantVectorStore(tid) + trace = store.hybrid_query_trace(q, n_results=top_k) + except Exception as exc: + logger.warning("cross-tenant search failed for %s: %s", tid, exc) + continue + for entry in trace.get("combined", []): + results.append( + { + "tenant_id": tid, + "source": entry.get("source"), + "score": entry.get("score"), + "filename": (entry.get("metadata") or {}).get("filename"), + "path": (entry.get("metadata") or {}).get("path"), + "preview": (entry.get("document") or "")[:400], + } + ) + # Rank across tenants: keyword hits first, then semantic by distance + results.sort( + key=lambda r: ( + 0 if r["source"] == "keyword" else 1, + r["score"] if r["score"] is not None else 9.99, + ) + ) + return {"query": q, "tenants": target_tenants, "hits": results[: top_k * max(len(target_tenants), 1)]} + + @router.delete("/users/{username}") def delete_user(username: str, user=Depends(require_role(["super_admin", "admin"]))): """Delete a user.""" diff --git a/api/routes_auth.py b/api/routes_auth.py index add7e99..19e02e5 100644 --- a/api/routes_auth.py +++ b/api/routes_auth.py @@ -1,13 +1,11 @@ -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from fastapi import APIRouter, HTTPException, Depends from pydantic import BaseModel import jwt from db import user_repository from api.auth_middleware import require_role, get_current_user - -SECRET_KEY = "super_secret" -ALGORITHM = "HS256" +from api.config import SECRET_KEY, ALGORITHM router = APIRouter(prefix="/auth", tags=["auth"]) @@ -37,7 +35,7 @@ def login(req: LoginRequest): "sub": user["username"], "role": user["role"], "tenant_id": user["tenant_id"], - "exp": datetime.utcnow() + timedelta(hours=12), + "exp": datetime.now(timezone.utc) + timedelta(hours=12), }, SECRET_KEY, algorithm=ALGORITHM, diff --git a/api/routes_chat.py b/api/routes_chat.py index 977964a..24148b2 100644 --- a/api/routes_chat.py +++ b/api/routes_chat.py @@ -1,14 +1,23 @@ from __future__ import annotations +import json +import logging +import time + from pydantic import BaseModel from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import StreamingResponse from chatbot.conversation_manager import ConversationManager from chatbot.response_generator import ResponseGenerator from tenants.tenant_manager import TenantManager -import logging -from db import conversation_repository, audit_log_repository +from db import ( + audit_log_repository, + conversation_repository, + rag_trace_repository, + usage_repository, +) from .auth_middleware import get_current_user @@ -22,6 +31,34 @@ class ChatRequest(BaseModel): session_id: str tenant_id: str message: str + model_provider: str | None = None + model_name: str | None = None + + +class SavedTraceRequest(BaseModel): + tenant_id: str + title: str | None = None + query: str + reply: str | None = None + trace: dict + + +def _resolve_model(tenant_config: dict, req: ChatRequest) -> tuple[str, str]: + """Pick the (provider, model_name) for this request. + + Priority: explicit request override → tenant's ``models[0]`` default → + legacy ``model_type``/``model_name`` fallback → ollama/llama3. + """ + if req.model_provider: + return req.model_provider, req.model_name or "" + models = tenant_config.get("models") or [] + if models: + first = models[0] + return first.get("provider", "ollama"), first.get("name") or first.get("model_name", "") + return ( + tenant_config.get("model_type", "ollama"), + tenant_config.get("model_name", "llama3.2"), + ) @router.post("/message") @@ -36,8 +73,18 @@ def chat_message(req: ChatRequest, user=Depends(get_current_user)): if tenant_config is None: raise HTTPException(status_code=404, detail="Tenant not found") - model_type = tenant_config.get("model_type", "ollama") - model_name = tenant_config.get("model_name", "llama3.2") + # Per-tenant daily token cap — opt-in. Set ``daily_token_cap`` on the + # tenant config to any positive integer to enforce; 0/missing = unlimited. + cap = int(tenant_config.get("daily_token_cap") or 0) + if cap > 0: + used = usage_repository.today_token_total(tenant_id) + if used >= cap: + raise HTTPException( + status_code=429, + detail=f"Daily token cap reached for tenant '{tenant_id}' ({used}/{cap})", + ) + + model_type, model_name = _resolve_model(tenant_config, req) conversation_manager.start_session(req.session_id) conversation_manager.add_message(req.session_id, "user", req.message) @@ -51,7 +98,9 @@ def chat_message(req: ChatRequest, user=Depends(get_current_user)): model_type=model_type, model_name=model_name, ) + t_start = time.monotonic() reply = generator.generate_response(req.message, history) + latency_ms = (time.monotonic() - t_start) * 1000 if model_type == "ollama" and not reply.startswith("[Ollama"): reply = f"[Ollama] {reply}" @@ -59,6 +108,23 @@ def chat_message(req: ChatRequest, user=Depends(get_current_user)): conversation_repository.add_message( req.session_id, user["username"], tenant_id, "assistant", reply ) + + # Rough input/output token estimates (4 chars/token heuristic) since the + # generator wraps multiple provider types that don't all report usage. + # AnthropicModel logs real usage in its own code; this is the floor + # estimate used for rate-limiting and cost display. + approx_input = max(1, (len(req.message) + 2000) // 4) + approx_output = max(1, len(reply) // 4) + usage_repository.record( + tenant_id=tenant_id, + username=user["username"], + provider=model_type, + model_name=model_name or "", + input_tokens=approx_input, + output_tokens=approx_output, + latency_ms=latency_ms, + ) + audit_log_repository.log_action(user["username"], "chat_message", req.session_id) return { "reply": reply, @@ -68,6 +134,155 @@ def chat_message(req: ChatRequest, user=Depends(get_current_user)): } +@router.post("/message/stream") +def chat_message_stream(req: ChatRequest, user=Depends(get_current_user)): + """Token-by-token SSE variant of /chat/message. + + Streams the full reply in ~60-char chunks so the frontend can render + progressively. Providers that don't expose a streaming API still get + chunked-delivery semantics on the frontend side. + """ + tenant_id = req.tenant_id.strip() + if user.get("role") != "super_admin" and user.get("tenant_id") != tenant_id: + raise HTTPException(status_code=403, detail="Tenant access denied") + tenant_config = tenant_manager.get(tenant_id) + if tenant_config is None: + raise HTTPException(status_code=404, detail="Tenant not found") + cap = int(tenant_config.get("daily_token_cap") or 0) + if cap > 0 and usage_repository.today_token_total(tenant_id) >= cap: + raise HTTPException(status_code=429, detail="Daily token cap reached") + + model_type, model_name = _resolve_model(tenant_config, req) + conversation_manager.start_session(req.session_id) + conversation_manager.add_message(req.session_id, "user", req.message) + conversation_repository.add_message( + req.session_id, user["username"], tenant_id, "user", req.message + ) + history = conversation_manager.history(req.session_id) + generator = ResponseGenerator( + tenant_id=tenant_id, model_type=model_type, model_name=model_name, + ) + + def _sse(event: str, data: dict) -> bytes: + return f"event: {event}\ndata: {json.dumps(data)}\n\n".encode() + + def event_stream(): + t0 = time.monotonic() + try: + reply = generator.generate_response(req.message, history) + except Exception as e: + yield _sse("error", {"detail": f"{type(e).__name__}: {e}"}) + return + if model_type == "ollama" and not reply.startswith("[Ollama"): + reply = f"[Ollama] {reply}" + CHUNK = 60 + for i in range(0, len(reply), CHUNK): + yield _sse("delta", {"text": reply[i : i + CHUNK]}) + latency_ms = (time.monotonic() - t0) * 1000 + conversation_manager.add_message(req.session_id, "assistant", reply) + conversation_repository.add_message( + req.session_id, user["username"], tenant_id, "assistant", reply + ) + usage_repository.record( + tenant_id=tenant_id, + username=user["username"], + provider=model_type, + model_name=model_name or "", + input_tokens=max(1, (len(req.message) + 2000) // 4), + output_tokens=max(1, len(reply) // 4), + latency_ms=latency_ms, + ) + audit_log_repository.log_action(user["username"], "chat_stream", req.session_id) + yield _sse("done", {"latency_ms": round(latency_ms, 1)}) + + return StreamingResponse(event_stream(), media_type="text/event-stream") + + +@router.post("/trace") +def chat_trace(req: ChatRequest, user=Depends(get_current_user)): + """Run a chat query and return the full pipeline trace for visualization. + + This endpoint is intentionally read-only with respect to the conversation + store — it does not append to history or write audit logs — so the RAG + visualizer can run experimental queries without polluting real sessions. + """ + tenant_id = req.tenant_id.strip() + if user.get("role") != "super_admin" and user.get("tenant_id") != tenant_id: + raise HTTPException(status_code=403, detail="Tenant access denied") + + tenant_config = tenant_manager.get(tenant_id) + if tenant_config is None: + raise HTTPException(status_code=404, detail="Tenant not found") + + model_type, model_name = _resolve_model(tenant_config, req) + + history = conversation_manager.history(req.session_id) + generator = ResponseGenerator( + tenant_id=tenant_id, + model_type=model_type, + model_name=model_name, + ) + return generator.generate_response_trace(req.message, history) + + +def _check_tenant_access(user: dict, tenant_id: str) -> None: + if user.get("role") != "super_admin" and user.get("tenant_id") != tenant_id: + raise HTTPException(status_code=403, detail="Tenant access denied") + + +@router.post("/traces") +def save_trace(req: SavedTraceRequest, user=Depends(get_current_user)): + """Persist a RAG trace so it can be reloaded later without re-running the LLM.""" + tenant_id = (req.tenant_id or "").strip() + if not tenant_id: + raise HTTPException(status_code=400, detail="tenant_id required") + _check_tenant_access(user, tenant_id) + trace_id = rag_trace_repository.save_trace( + tenant_id=tenant_id, + title=(req.title or "").strip() or req.query[:80], + query=req.query, + reply=req.reply or "", + trace=req.trace, + created_by=user["username"], + ) + audit_log_repository.log_action(user["username"], "save_rag_trace", f"{tenant_id}:{trace_id}") + return {"id": trace_id, "status": "saved"} + + +@router.get("/traces") +def list_traces(tenant_id: str | None = None, user=Depends(get_current_user)): + """List saved traces for a tenant (metadata only, no full trace blob).""" + target = (tenant_id or "").strip() or user.get("tenant_id") or "" + if not target: + raise HTTPException(status_code=400, detail="tenant_id required") + _check_tenant_access(user, target) + return {"tenant_id": target, "traces": rag_trace_repository.list_traces(target)} + + +@router.get("/traces/{trace_id}") +def get_saved_trace(trace_id: int, user=Depends(get_current_user)): + """Fetch a saved trace including the full trace JSON.""" + row = rag_trace_repository.get_trace(trace_id) + if row is None: + raise HTTPException(status_code=404, detail="trace not found") + _check_tenant_access(user, row["tenant_id"]) + return row + + +@router.delete("/traces/{trace_id}") +def delete_saved_trace(trace_id: int, user=Depends(get_current_user)): + row = rag_trace_repository.get_trace(trace_id) + if row is None: + raise HTTPException(status_code=404, detail="trace not found") + _check_tenant_access(user, row["tenant_id"]) + # Only the creator or super_admin can delete + if user.get("role") != "super_admin" and row.get("created_by") != user["username"]: + raise HTTPException(status_code=403, detail="only the creator or super_admin can delete") + rag_trace_repository.delete_trace(trace_id) + audit_log_repository.log_action(user["username"], "delete_rag_trace", str(trace_id)) + return {"status": "deleted"} + + @router.get("/history") def chat_history(session_id: str, user=Depends(get_current_user)): """Return chat history for a session.""" diff --git a/api/routes_files.py b/api/routes_files.py index 271302b..d64dc2b 100644 --- a/api/routes_files.py +++ b/api/routes_files.py @@ -1,32 +1,252 @@ from __future__ import annotations +import re import shutil from pathlib import Path import logging -from fastapi import APIRouter, Depends, File, UploadFile +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile +from pydantic import BaseModel +from ai.chunking import MAX_CHUNK_CHARS, chunk_text +from ai.vector_stores.chroma_store import TenantVectorStore from db import file_repository, audit_log_repository from .auth_middleware import get_current_user router = APIRouter(prefix="/files", tags=["files"]) -UPLOAD_DIR = Path("data/uploads") +VAULT_ROOTS = { + "personal": Path("data/personal"), + "company": Path("data/demo"), + "organization": Path("data/organization"), + "relativity": Path("data/relativity"), + "saas-ai": Path("data/saas-ai"), + "smartbase-docs": Path("data/smartbase-docs"), + "financebench": Path("data/financebench"), +} +VAULT_FALLBACK = Path("data/vaults") +INGESTIBLE_SUFFIXES = {".txt", ".md", ".markdown", ".csv", ".log"} +SAFE_NAME_RE = re.compile(r"^[A-Za-z0-9._-]+$") logger = logging.getLogger(__name__) +def _vault_root(tenant_id: str) -> Path: + """Return the directory backing a tenant's editable vault. + + Seeded tenants (personal/company/organization) use hand-curated dirs. + Everything else falls back to ``data/vaults/{tenant_id}/`` so upload and + edit flows work uniformly across new tenants. + """ + if tenant_id in VAULT_ROOTS: + return VAULT_ROOTS[tenant_id] + return VAULT_FALLBACK / tenant_id + + +class VaultFileUpdate(BaseModel): + content: str + + +def _ingest_file_into_tenant_store(tenant_id: str, path: Path, filename: str) -> bool: + """Read a text file, chunk it, and push it into the tenant's vector store. + + Matches the financebench loader's layout: stable doc_ids of the form + ``{tenant}:{filename}#{chunk_idx}``. Before inserting the new chunks, + it drops any existing chunks under the same ``{tenant}:{filename}#`` + prefix so edits and re-uploads don't accumulate stale vectors. + """ + if path.suffix.lower() not in INGESTIBLE_SUFFIXES: + logger.info("Skipping ingestion for unsupported file type: %s", filename) + return False + try: + text = path.read_text(encoding="utf-8") + except UnicodeDecodeError: + logger.warning("Skipping non-UTF8 file during ingestion: %s", filename) + return False + if not text.strip(): + return False + + store = TenantVectorStore(tenant_id) + base_id = f"{tenant_id}:{filename}" + # Delete every chunk under this file's prefix (stale from prior ingests). + try: + existing = store.collection.get(where={"filename": filename}) + ids_to_drop = [i for i in (existing or {}).get("ids", []) if i.startswith(base_id)] + if ids_to_drop: + store.collection.delete(ids=ids_to_drop) + except Exception as exc: + logger.debug("chunk cleanup skipped for %s: %s", filename, exc) + # Also delete the legacy single-doc shape for backwards compat + try: + store.collection.delete(ids=[base_id]) + except Exception: + pass + + chunks = chunk_text(text, target_tokens=500) + if not chunks: + return False + for i, chunk in enumerate(chunks): + doc_id = f"{base_id}#{i}" + meta = { + "filename": filename, + "source": "vault-edit", + "tenant": tenant_id, + "chunk_idx": i, + } + try: + store.add_document(doc_id, chunk[:MAX_CHUNK_CHARS], meta) + except Exception as exc: + logger.warning("ingest failed on %s: %s", doc_id, exc) + return True + + @router.post("/upload") -def upload_file(file: UploadFile = File(...), user=Depends(get_current_user)): - tenant_id = user.get("tenant_id") or "global" - dest_dir = UPLOAD_DIR / tenant_id / user["username"] - dest_dir.mkdir(parents=True, exist_ok=True) - dest_path = dest_dir / file.filename +def upload_file( + file: UploadFile = File(...), + tenant_id: str | None = None, + user=Depends(get_current_user), +): + """Upload a file directly into a tenant's vault and auto-ingest it. + + Files land in the same directory as seeded vault content so the Vault + editor lists them uniformly. ``tenant_id`` is optional; if omitted it + defaults to the caller's tenant (non-super-admins can't cross that). + """ + tenant = _resolve_tenant(user, tenant_id) + if not file.filename or not SAFE_NAME_RE.match(file.filename): + raise HTTPException(status_code=400, detail="invalid filename") + root = _vault_root(tenant) + root.mkdir(parents=True, exist_ok=True) + dest_path = root / file.filename with dest_path.open("wb") as f: shutil.copyfileobj(file.file, f) - file_repository.add_file(user["username"], tenant_id, file.filename, str(dest_path)) - audit_log_repository.log_action(user["username"], "upload_file", file.filename) - return {"filename": file.filename} + file_repository.add_file(user["username"], tenant, file.filename, str(dest_path)) + + ingested = False + try: + ingested = _ingest_file_into_tenant_store(tenant, dest_path, file.filename) + except Exception as exc: # pragma: no cover - defensive, don't fail upload + logger.exception("Failed to ingest uploaded file: %s", exc) + + audit_log_repository.log_action( + user["username"], "upload_file", f"{tenant}:{file.filename}" + ) + return {"tenant_id": tenant, "filename": file.filename, "ingested": ingested} @router.get("/list") def list_files(user=Depends(get_current_user)): return file_repository.list_files(user["username"]) + + +def _resolve_tenant(user: dict, tenant_id: str | None) -> str: + """Resolve the target tenant for a vault operation, enforcing scoping.""" + requested = (tenant_id or user.get("tenant_id") or "").strip() + if not requested: + raise HTTPException(status_code=400, detail="tenant_id required") + if user.get("role") != "super_admin" and user.get("tenant_id") != requested: + raise HTTPException(status_code=403, detail="Tenant access denied") + return requested + + +def _safe_vault_path(tenant_id: str, filename: str) -> Path: + """Resolve ``filename`` under the tenant's vault root, rejecting traversal. + + ``filename`` can be a bare name (``profile.md``) or a nested path + (``AAPL/10k_2022.md``) using forward slashes. Every path segment must + match SAFE_NAME_RE, and the resolved path must live inside the vault + root — otherwise we raise 400 rather than serve a file outside the vault. + """ + if not filename or filename.startswith("/"): + raise HTTPException(status_code=400, detail="invalid filename") + parts = filename.replace("\\", "/").split("/") + if any(p in ("", "..", ".") for p in parts): + raise HTTPException(status_code=400, detail="invalid filename") + for p in parts: + if not SAFE_NAME_RE.match(p): + raise HTTPException(status_code=400, detail="invalid filename") + root = _vault_root(tenant_id).resolve() + path = (root / "/".join(parts)).resolve() + try: + path.relative_to(root) + except ValueError: + raise HTTPException(status_code=400, detail="path traversal rejected") + return path + + +@router.get("/vault") +def list_vault(tenant_id: str | None = None, user=Depends(get_current_user)): + """List the editable files in the current tenant's vault. + + Walks recursively so tenants like ``financebench`` whose content is + organized under per-ticker subdirectories show all their files. Each + entry's ``filename`` is the POSIX path relative to the vault root. + """ + tenant = _resolve_tenant(user, tenant_id) + root = _vault_root(tenant) + files = [] + if root.exists(): + for path in sorted(root.rglob("*")): + if not path.is_file(): + continue + if path.suffix.lower() not in INGESTIBLE_SUFFIXES: + continue + try: + rel = path.relative_to(root).as_posix() + except ValueError: + continue + files.append( + { + "filename": rel, + "size": path.stat().st_size, + "suffix": path.suffix.lower(), + } + ) + return {"tenant_id": tenant, "root": str(root), "count": len(files), "files": files} + + +@router.get("/vault/{filename:path}") +def read_vault_file(filename: str, tenant_id: str | None = None, user=Depends(get_current_user)): + tenant = _resolve_tenant(user, tenant_id) + path = _safe_vault_path(tenant, filename) + if not path.exists(): + raise HTTPException(status_code=404, detail="file not found") + return { + "tenant_id": tenant, + "filename": filename, + "content": path.read_text(encoding="utf-8"), + } + + +@router.put("/vault/{filename:path}") +def update_vault_file( + filename: str, + data: VaultFileUpdate, + tenant_id: str | None = None, + user=Depends(get_current_user), +): + """Overwrite a vault file's content and re-ingest it into the tenant store. + + This is how "change specific file data by user commands" works end-to-end: + the frontend posts the new body, the file is rewritten on disk, and the + tenant's Chroma collection drops the old vector and re-embeds the new + content under the same stable ``doc_id`` so the next chat query retrieves + the updated material instead of the stale one. + """ + tenant = _resolve_tenant(user, tenant_id) + path = _safe_vault_path(tenant, filename) + if path.suffix.lower() not in INGESTIBLE_SUFFIXES: + raise HTTPException(status_code=400, detail="unsupported file type") + + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(data.content, encoding="utf-8") + + # Use the shared ingestion helper so the Vault editor's re-ingest + # matches the loader: chunked with section metadata, deletes all + # {tenant}:{filename}#* before inserting fresh chunks. + _ingest_file_into_tenant_store(tenant, path, filename) + audit_log_repository.log_action(user["username"], "edit_vault_file", f"{tenant}:{filename}") + return { + "tenant_id": tenant, + "filename": filename, + "bytes": len(data.content), + "reingested": True, + } diff --git a/chatbot/response_generator.py b/chatbot/response_generator.py index f86e09b..6157348 100644 --- a/chatbot/response_generator.py +++ b/chatbot/response_generator.py @@ -19,13 +19,17 @@ import re from typing import Iterable, Mapping +from ai.models.anthropic_model import AnthropicModel from ai.models.ollama_model import OllamaModel from ai.models.openai_model import OpenAIModel from ai.rag_pipeline import RAGPipeline from db.query_engine import exact_lookup -# Simple pattern for ISO like ``YYYY-MM-DD HH:MM`` timestamps. +# Match either "YYYY-MM-DD HH:MM" (intraday market_data rows) or bare +# "YYYY-MM-DD" (daily_bars rows). DATE_ONLY_PATTERN is used as a fallback +# when the datetime pattern doesn't fire. DATE_PATTERN = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}") +DATE_ONLY_PATTERN = re.compile(r"\b\d{4}-\d{2}-\d{2}\b") class ResponseGenerator: @@ -34,6 +38,7 @@ class ResponseGenerator: MODELS = { "ollama": OllamaModel, "openai": OpenAIModel, + "anthropic": AnthropicModel, } def __init__(self, tenant_id: str, model_type: str = "ollama", **model_kwargs) -> None: @@ -48,20 +53,45 @@ def __init__(self, tenant_id: str, model_type: str = "ollama", **model_kwargs) - # ------------------------------------------------------------------ # Helpers for the different data sources def _lookup_db(self, message: str) -> str: - """Return a formatted row from the tenant's DB if the message contains a date.""" + """Return a formatted row from the tenant's DB if the message contains a date. + + Tries two patterns: + 1. Intraday "YYYY-MM-DD HH:MM" → legacy market_data rows. + 2. Date-only "YYYY-MM-DD" → financebench daily_bars or market_data. + + The second pattern passes ``message=`` so ``exact_lookup`` can sniff a + ticker hint for the financebench tenant. + """ match = DATE_PATTERN.search(message) - if not match: - return "" + if match: + date_str = match.group(0) + row = exact_lookup(date_str, self.tenant_id, message=message) + if row: + return self._format_row(date_str, row) - date_str = match.group(0) - row = exact_lookup(date_str, self.tenant_id) - if not row: - return "" - return ( - f"Close value for {date_str} is {row['close']} " - f"(Open: {row['open']}, High: {row['high']}, " - f"Low: {row['low']}, Volume: {row['volume']})" - ) + match = DATE_ONLY_PATTERN.search(message) + if match: + date_str = match.group(0) + row = exact_lookup(date_str, self.tenant_id, message=message) + if row: + return self._format_row(date_str, row) + + return "" + + @staticmethod + def _format_row(date_str: str, row: dict) -> str: + """Render a structured row into a one-line context string.""" + ticker = row.get("ticker") + prefix = f"{ticker} on {row.get('date', date_str)}" if ticker else f"Close value for {date_str}" + try: + close = row["close"] + return ( + f"{prefix}: close={close} " + f"(open={row.get('open')}, high={row.get('high')}, " + f"low={row.get('low')}, volume={row.get('volume')})" + ) + except Exception: + return f"{prefix}: {row}" def _search_rag(self, message: str) -> str: """Retrieve free-form context using the RAG pipeline.""" @@ -120,6 +150,92 @@ def generate_response( prompt = self._build_prompt(user_message, history, context) return self.model.generate(prompt) + # ------------------------------------------------------------------ + def generate_response_trace( + self, + user_message: str, + history: Iterable[Mapping[str, str]] | None = None, + ) -> dict: + """Same as ``generate_response`` but returns a full pipeline trace. + + The returned dict is designed to be rendered in the RAG visualizer: + every stage exposes the exact data that flowed through it so a viewer + can see, for a single query, which document triggered the retrieval + and which structured row (if any) grounded the answer. + """ + # 1. History (serialized for display only) + history_list = list(history or []) + history_text = self._format_history(history_list) + + # 2. Structured DB lookup (intraday first, then date-only) + db_row: dict = {} + db_text = "" + detected = None + m = DATE_PATTERN.search(user_message) + if m: + detected = m.group(0) + db_row = exact_lookup(detected, self.tenant_id, message=user_message) or {} + if not db_row: + m = DATE_ONLY_PATTERN.search(user_message) + if m: + detected = m.group(0) + db_row = exact_lookup(detected, self.tenant_id, message=user_message) or {} + if db_row: + db_text = self._format_row(detected or "", db_row) + db_stage = { + "detected_date": detected, + "matched": bool(db_row), + "row": db_row, + "text": db_text, + } + + # 3. Hybrid retrieval with per-source breakdown + store metadata + rag_stage: dict = { + "store": None, + "keyword": [], + "semantic": [], + "combined": [], + "text": "", + "n_candidates": 0, + "n_results": 3, + } + store = getattr(self.rag, "store", None) + if store is not None and hasattr(store, "hybrid_query_trace"): + trace = store.hybrid_query_trace(user_message, n_results=3) + rag_stage.update(trace) + rag_stage["text"] = "\n".join(e["document"] for e in trace.get("combined", [])) + else: + rag_stage["text"] = self._search_rag(user_message) + + # 4. Fusion + merged_context = self._merge_sources(db_text, rag_stage["text"]) + + # 5. Prompt assembly + prompt = self._build_prompt(user_message, history_list, merged_context) if merged_context else "" + + # 6. LLM generation + if merged_context: + reply = self.model.generate(prompt) + else: + reply = "No information" + + return { + "query": user_message, + "tenant_id": self.tenant_id, + "stages": { + "history": {"messages": history_list, "text": history_text}, + "db_lookup": db_stage, + "rag_retrieval": rag_stage, + "fusion": {"db_text": db_text, "rag_text": rag_stage["text"], "merged": merged_context}, + "prompt": {"full": prompt}, + "llm": { + "model_type": self.model.__class__.__name__, + "reply": reply, + }, + }, + "reply": reply, + } + # --------------------------------------------------------------------------- # Extension notes diff --git a/data/demo/01-company-overview.md b/data/demo/01-company-overview.md new file mode 100644 index 0000000..597f233 --- /dev/null +++ b/data/demo/01-company-overview.md @@ -0,0 +1,23 @@ +# Acme Analytics — Company Overview + +Acme Analytics was founded in 2017 in Tel Aviv. The company builds real-time market analytics for mid-sized quantitative trading desks. As of 2024 the team is 42 people across research, engineering, and client ops. + +## Products + +- **Pulse** — streaming market-data aggregator that ingests L2 order books from 11 venues. +- **Atlas** — backtesting platform with a proprietary slippage model calibrated on 6 years of tick data. +- **Signal** — an alpha research IDE used internally and by three design-partner clients. + +## Leadership + +- CEO: Tamar Cohen (ex-Goldman Sachs quant) +- CTO: Dan Ringart (ex-research physicist, Tel Aviv University) +- Head of Client Ops: Michael Avraham + +## Key facts + +- Founded: 2017 +- Headquarters: Tel Aviv, Israel +- Employees: 42 +- Funding to date: $18M across seed and Series A +- Primary clients: prop trading firms in London, Chicago, and Singapore diff --git a/data/demo/02-product-pulse.md b/data/demo/02-product-pulse.md new file mode 100644 index 0000000..b886ff0 --- /dev/null +++ b/data/demo/02-product-pulse.md @@ -0,0 +1,27 @@ +# Pulse — Product Specification + +Pulse is Acme Analytics' real-time market data aggregator. It normalizes raw order-book snapshots from multiple exchanges into a unified schema and publishes them over a low-latency gRPC stream. + +## Supported venues + +Pulse currently ingests from 11 venues: CME, Eurex, LSE, NYSE, NASDAQ, TSE, HKEX, SGX, ICE, BME, and ASX. Adding a new venue typically takes 3 weeks of engineering work including conformance tests. + +## Latency guarantees + +- P50 end-to-end latency: **4.2 ms** +- P99 end-to-end latency: **11.8 ms** +- Delivery guarantee: at-least-once, with client-side de-duplication by sequence number. + +## Schema + +Every tick is published as a `MarketTick` message with the following fields: + +- `symbol` — venue-qualified ticker, e.g. `NASDAQ:AAPL` +- `timestamp` — nanosecond epoch +- `bid_price`, `bid_size`, `ask_price`, `ask_size` +- `trade_price`, `trade_size`, `trade_side` +- `sequence` — monotonic per-symbol sequence number + +## Retention + +Pulse retains 90 days of raw ticks in hot storage and 2 years in cold S3-compatible object storage. Clients can replay any 90-day window at up to 10× wall-clock speed. diff --git a/data/demo/03-product-atlas.md b/data/demo/03-product-atlas.md new file mode 100644 index 0000000..9b10664 --- /dev/null +++ b/data/demo/03-product-atlas.md @@ -0,0 +1,24 @@ +# Atlas — Backtesting Platform + +Atlas is Acme Analytics' backtesting platform. It runs strategies against historical tick data with a proprietary slippage model and venue-specific fee schedules. + +## Slippage model + +The Atlas slippage model was calibrated on 6 years of tick data (2018-2023) across 11 venues. It accounts for: + +1. Order-book depth at the moment of submission +2. Historical cancellation rate for similar orders +3. Venue-specific rebate / take fees +4. Known latency between decision and execution + +On out-of-sample 2024 data, the model's predicted fill prices are within **2.3 basis points** of actual fills on average. + +## Execution modes + +- **Vector backtest** — fast, assumes instantaneous fills at mid. Use for strategy prototyping. +- **Event-driven backtest** — realistic, replays the full order book. Use for final validation before paper trading. +- **Paper trading** — hooks Atlas into Pulse's live feed and simulates fills in real time. + +## Typical benchmarks + +A one-year event-driven backtest over 500 symbols at 1-minute granularity completes in ~8 minutes on a single 16-core machine. diff --git a/data/demo/04-policies.md b/data/demo/04-policies.md new file mode 100644 index 0000000..aacbb54 --- /dev/null +++ b/data/demo/04-policies.md @@ -0,0 +1,23 @@ +# Internal Policies + +## Data handling + +All client trading data is classified as Confidential Level 2. It must never leave the VPC it was ingested into, and it must be encrypted at rest using AES-256 with keys rotated every 90 days. + +## On-call rotation + +Engineering runs a 24/7 on-call rotation for Pulse. Each engineer is on-call for one week at a time. The current pager ladder is: + +1. Primary on-call +2. Secondary on-call +3. CTO (Dan Ringart) + +A sev-1 incident must be acknowledged within 5 minutes and an initial status update must be posted in #incidents within 15 minutes. + +## Travel reimbursement + +Client-site travel is reimbursed up to $250 per day for hotel and $75 per day for meals. Any expense above these caps requires prior approval from the Head of Client Ops (Michael Avraham). + +## Secret codeword + +For this demo, the secret internal codeword is **QUOKKA-7**. If someone asks you about the codeword in the visualizer, that's the document you should retrieve. diff --git a/data/demo/05-faq.md b/data/demo/05-faq.md new file mode 100644 index 0000000..9de3de5 --- /dev/null +++ b/data/demo/05-faq.md @@ -0,0 +1,25 @@ +# Frequently Asked Questions + +## What does Acme Analytics do? + +We build real-time market analytics tools for mid-sized quantitative trading desks. Our three products are Pulse (market data), Atlas (backtesting), and Signal (alpha research). + +## How many venues does Pulse support? + +Pulse currently ingests from 11 venues: CME, Eurex, LSE, NYSE, NASDAQ, TSE, HKEX, SGX, ICE, BME, and ASX. + +## How accurate is the Atlas slippage model? + +On out-of-sample 2024 data, predicted fill prices are within 2.3 basis points of actual fills on average. The model was calibrated on 6 years of tick data from 2018 to 2023. + +## Who is the CTO? + +Dan Ringart, formerly a research physicist at Tel Aviv University. + +## What are the latency guarantees for Pulse? + +P50 end-to-end latency is 4.2 ms and P99 is 11.8 ms. + +## How long is client trading data retained? + +Pulse retains 90 days of raw ticks in hot storage and 2 years in cold object storage. diff --git a/data/demo/06-market-data.csv b/data/demo/06-market-data.csv new file mode 100644 index 0000000..1cb8d7d --- /dev/null +++ b/data/demo/06-market-data.csv @@ -0,0 +1,11 @@ +date,open,high,low,close,volume +2024-03-11 09:30,100.20,101.50,99.80,101.10,12500 +2024-03-12 09:30,101.10,102.30,100.70,102.00,13100 +2024-03-13 09:30,102.00,103.10,101.50,102.80,14000 +2024-03-14 09:30,102.80,103.90,102.20,103.50,15200 +2024-03-15 09:30,101.20,102.80,100.90,102.50,15000 +2024-03-18 09:30,102.50,103.40,101.80,102.90,14800 +2024-03-19 09:30,102.90,104.20,102.60,104.00,16700 +2024-03-20 09:30,104.00,104.80,103.30,103.80,15400 +2024-03-21 09:30,103.80,105.10,103.50,104.90,17200 +2024-03-22 09:30,104.90,105.60,104.10,104.30,14100 diff --git a/data/demo/ui_smoke_upload.md b/data/demo/ui_smoke_upload.md new file mode 100644 index 0000000..3b7d737 --- /dev/null +++ b/data/demo/ui_smoke_upload.md @@ -0,0 +1,3 @@ +# UI smoke upload + +Transient test file. diff --git a/data/organization/01-mission-overview.md b/data/organization/01-mission-overview.md new file mode 100644 index 0000000..1026e99 --- /dev/null +++ b/data/organization/01-mission-overview.md @@ -0,0 +1,26 @@ +# Project Lunar Harbor — Mission Overview + +> **Notice:** This document and the rest of the `organization` vault contain **fictional** mission data created for the SmartBaseAI demo. None of it reflects real NASA, ESA, CNSA, ISRO, or JAXA programs. Any resemblance to actual current Moon missions is coincidental and intended to illustrate retrieval over non-public corpora. + +## Summary + +Project Lunar Harbor is a fictional joint program to establish a permanently crewed south-pole outpost on the Moon by 2031. It spans three mission classes: + +- **LH-Recon** — uncrewed robotic scouts (rovers + seismic probes). 4 missions launched, 2 more planned. +- **LH-Cargo** — robotic landers delivering habitat modules and ISRU (in-situ resource utilization) gear. +- **LH-Crew** — human-rated landers carrying a 4-person expedition crew for 45-day surface stays. + +## Current phase + +As of April 2026, Lunar Harbor is in the **Phase B-2 "Surface Readiness"** checkpoint. This phase runs from January 2026 to October 2026 and gates the first LH-Cargo-3 launch window. + +## Internal reference code + +Every artifact in this vault is tagged with the internal code **LH-ALT-7741-NX**. This code is not published outside the program and is used to verify provenance of any retrieved document. A well-functioning RAG system should be able to surface this exact string when asked "what is the internal reference code". + +## Latest updates (as of 2026-04-12) + +1. LH-Recon-4 completed its 28-day traverse of the Shackleton rim on 2026-04-09 and is now in a 30-day safe-mode hibernation. +2. The LH-Cargo-3 habitat-module integration test slipped 11 days after a sensor fault in the ECLSS thermal loop, flagged in incident **INC-LH-2026-0174**. +3. Crew medical screening for Expedition-1 started 2026-04-01. Four candidates remain in the final pool (see `04-crew-manifest.md`). +4. The Lunar Harbor program manager, **Dr. Inbar Katz**, approved a $14.2M reallocation from the comms budget to accelerate the ISRU water-extraction prototype. diff --git a/data/organization/02-work-plan.md b/data/organization/02-work-plan.md new file mode 100644 index 0000000..c209697 --- /dev/null +++ b/data/organization/02-work-plan.md @@ -0,0 +1,65 @@ +# Project Lunar Harbor — Phase B-2 Work Plan + +This plan is the authoritative breakdown of Phase B-2 "Surface Readiness" (January 2026 – October 2026). Every step has a fixed owner, a deliverable, and a gate that must pass before the next step begins. Ask the RAG system for any specific step by number — e.g. "what is step 5 of the work plan" — and it should return the exact row. + +## Steps + +### Step 1 — ECLSS thermal loop qualification +- **Owner:** Dr. Maya Brenner (thermal systems lead) +- **Start:** 2026-01-12 +- **Deliverable:** Signed-off qualification report for the EC-TL-3 thermal loop across −173°C to +127°C surface temperature swing. +- **Gate:** All 18 test profiles completed with ≤2 anomalies per profile. +- **Status (2026-04-12):** In progress — 14/18 profiles complete. Blocked by INC-LH-2026-0174. + +### Step 2 — LH-Cargo-3 habitat module integration +- **Owner:** Tomer Aviv (cargo integration) +- **Start:** 2026-02-03 +- **Deliverable:** Fully integrated Habitat-A module, mass 4,280 kg, ready for vibration test. +- **Gate:** Mass within ±0.5% of the 4,280 kg budget and CoG within the launch vehicle envelope. +- **Status:** Slipped 11 days pending Step 1. + +### Step 3 — ISRU water-extraction prototype field test +- **Owner:** Dr. Priya Srinivasan (ISRU) +- **Start:** 2026-03-15 +- **Deliverable:** Prototype extracts ≥1.2 kg of water per 24-hour cycle from simulated regolith. +- **Gate:** Three consecutive 24-hour runs meet the 1.2 kg threshold. +- **Status:** Two of three runs complete. Latest run delivered **1.47 kg** on 2026-04-08. + +### Step 4 — Crew Expedition-1 medical screening +- **Owner:** Flight Surgeon Dr. Lior Aharon +- **Start:** 2026-04-01 +- **Deliverable:** Four-person final roster for Expedition-1, each cleared for 45 days in 1/6 G. +- **Gate:** Each candidate passes cardiopulmonary, psychological, and ophthalmic screening. +- **Status:** In progress. Four candidates remain (Tal Moran, Javier Ruiz, Noa Sharabi, Kenji Watanabe). + +### Step 5 — Communications blackout contingency drill +- **Owner:** Avi Rosen (mission ops) +- **Start:** 2026-05-20 +- **Deliverable:** Demonstrated 6-hour autonomous operation during full comms blackout, with automated return-to-habitat on loss of signal. +- **Gate:** Drill completes with zero safety-critical anomalies. +- **Status:** Not started. + +### Step 6 — LH-Cargo-3 launch readiness review +- **Owner:** Dr. Inbar Katz (program manager) +- **Start:** 2026-07-01 +- **Deliverable:** Green-light memo signed by program manager, chief engineer, and flight safety officer. +- **Gate:** Steps 1–5 all closed; no open Level-1 hazards. +- **Status:** Not started. + +### Step 7 — LH-Cargo-3 launch +- **Owner:** Launch Director +- **Window:** 2026-10-09 to 2026-10-12 +- **Deliverable:** Successful translunar injection and nominal trajectory insertion. +- **Status:** Not started. + +## Summary table + +| # | Step | Owner | Start | Status | +|---|---|---|---|---| +| 1 | ECLSS thermal loop qual | Dr. Maya Brenner | 2026-01-12 | In progress | +| 2 | LH-Cargo-3 integration | Tomer Aviv | 2026-02-03 | Slipped 11 days | +| 3 | ISRU water prototype | Dr. Priya Srinivasan | 2026-03-15 | 2/3 runs done | +| 4 | Crew medical screening | Dr. Lior Aharon | 2026-04-01 | In progress | +| 5 | Comms blackout drill | Avi Rosen | 2026-05-20 | Not started | +| 6 | Launch readiness review | Dr. Inbar Katz | 2026-07-01 | Not started | +| 7 | LH-Cargo-3 launch | Launch Director | 2026-10-09 | Not started | diff --git a/data/organization/03-contingencies.md b/data/organization/03-contingencies.md new file mode 100644 index 0000000..626b851 --- /dev/null +++ b/data/organization/03-contingencies.md @@ -0,0 +1,36 @@ +# Non-Public Contingency Sub-Procedures — Project Lunar Harbor + +> **Fictional data for the SmartBaseAI demo.** These contingency procedures do not exist in any real space program. They were written to test the RAG system's ability to retrieve information that cannot be found anywhere on the public internet, verifying that the answer *must* come from this vault and not from the LLM's training data. + +## CONTINGENCY PROCEDURE C-7 — "Quokka" + +Triggered when the ISRU water-extraction prototype reports three consecutive cycles below the 0.8 kg/24h threshold. + +1. Crew leaves the ISRU skid powered on but locks out the auger via hardware switch **AUGER-LOCK-2B**. +2. Flight surgeon initiates a 48-hour water ration reduction to 2.1 L per crew per day. +3. Mission ops uplinks diagnostic pattern **DP-Q-14** and collects 72 hours of telemetry. +4. If two diagnostic passes show the same fault signature, crew performs the **Quokka swap**: remove auger assembly A, install auger assembly B from stowage locker 3-F. +5. If both auger assemblies fail the Quokka swap, Expedition-1 is cut short and crew preps for emergency ascent using the backup **LH-Return-2** profile. + +Decision authority for a Quokka abort belongs to the Expedition-1 commander (currently provisional: Tal Moran), NOT the ground flight director. + +## CONTINGENCY PROCEDURE C-12 — "Bramble" + +Triggered when surface dust ingress exceeds **4.2 mg/m³** inside the habitat airlock for more than 15 minutes. + +1. Airlock is sealed at the outer hatch. +2. Crew purges with N₂ at 2.5 bar for 90 seconds, then vents to exterior. +3. Dust sample is collected in cartridge **BRAMBLE-C-3** and stored for return-to-Earth analysis. +4. No crew member may doff an EVA suit until the airlock reads <0.5 mg/m³ for two consecutive 10-minute windows. + +## CONTINGENCY PROCEDURE C-19 — "Kingfisher" + +Triggered when any single crew member's cumulative radiation dose exceeds **180 mSv** for the expedition. + +1. Affected crew member is restricted to the habitat's central shielded cell (volume 8.3 m³) for 72 hours. +2. All remaining EVAs are reassigned to unaffected crew. +3. If a second crew member triggers C-19 within the same expedition, the full crew aborts to LH-Return-2 regardless of remaining mission objectives. + +## Authorship and verification + +All three procedures were authored by **Dr. Inbar Katz** and peer-reviewed by **Dr. Maya Brenner** on 2026-02-27. The internal reference for this document is **LH-ALT-7741-NX-contingencies-v3**. If a RAG query asks "who wrote the contingency procedures", the correct answer — based on this vault — is Dr. Inbar Katz. diff --git a/data/organization/04-crew-manifest.md b/data/organization/04-crew-manifest.md new file mode 100644 index 0000000..4e1b6f7 --- /dev/null +++ b/data/organization/04-crew-manifest.md @@ -0,0 +1,29 @@ +# Expedition-1 Crew Manifest (Fictional) + +Four candidates remain in the final pool for Project Lunar Harbor Expedition-1. Final roster is gated on Step 4 of the work plan. + +| # | Name | Role | Age | Nationality | Notes | +|---|---|---|---|---|---| +| 1 | Tal Moran | Commander (provisional) | 41 | Israeli | 2 prior ISS rotations; flight test pilot background. | +| 2 | Javier Ruiz | Pilot | 37 | Spanish | ESA astronaut class of 2022; habitat systems lead during training. | +| 3 | Noa Sharabi | Mission specialist — geology | 34 | Israeli | PhD in planetary science, Weizmann; field ops at Devon Island analog. | +| 4 | Kenji Watanabe | Mission specialist — ISRU | 39 | Japanese | JAXA ISRU project lead; co-author on the Quokka procedure (C-7). | + +## Medical pool — confidential + +Only Flight Surgeon Dr. Lior Aharon and the program manager Dr. Inbar Katz have full access to the medical screening dossiers. Known flags within this vault: + +- Tal Moran has a historical lower-back injury (L4-L5, rehabilitated 2023). Cleared with conditional note. +- Javier Ruiz wears corrective contact lenses (prescription −2.25 OU), cleared under waiver **MED-2026-018**. +- Noa Sharabi and Kenji Watanabe have no active flags. + +## Training status + +All four candidates have completed: +- 40 hours in the centrifuge at 3.2 G sustained. +- 60 hours in the hypobaric chamber. +- 120 hours of EVA suit-up drills. + +Remaining training blocks: +- Crew medical cross-training (Noa Sharabi leads) — 30 hours, scheduled 2026-05-05 to 2026-05-09. +- Lunar lander ascent abort simulator — 20 hours, scheduled 2026-06-01. diff --git a/data/organization/05-telemetry.csv b/data/organization/05-telemetry.csv new file mode 100644 index 0000000..da54065 --- /dev/null +++ b/data/organization/05-telemetry.csv @@ -0,0 +1,9 @@ +date,vehicle,battery_volts,solar_watts,temp_c,dust_mg_m3,anomaly_code +2026-04-05 09:30,LH-Recon-4,28.7,412.3,-92.4,0.31, +2026-04-06 09:30,LH-Recon-4,28.4,398.1,-94.1,0.29, +2026-04-07 09:30,LH-Recon-4,27.9,371.6,-96.0,0.42, +2026-04-08 09:30,LH-Recon-4,27.3,340.2,-97.8,0.58,INC-LH-2026-0171 +2026-04-09 09:30,LH-Recon-4,26.1,0.0,-174.2,0.71,INC-LH-2026-0174 +2026-04-10 09:30,LH-Recon-4,25.9,0.0,-175.0,0.68,INC-LH-2026-0174 +2026-04-11 09:30,LH-Recon-4,25.8,0.0,-174.5,0.66, +2026-04-12 09:30,LH-Recon-4,25.7,0.0,-174.1,0.64, diff --git a/data/personal/01-habits.md b/data/personal/01-habits.md new file mode 100644 index 0000000..3633ce2 --- /dev/null +++ b/data/personal/01-habits.md @@ -0,0 +1,19 @@ +# Personal habits & routines + +## Morning routine (Sun–Thu) +- 06:45 wake, 15 min stretching + 10 push-ups. +- 07:00 espresso (Ethiopia Yirgacheffe, 18 g, 92°C, 32 s pull). +- 07:15 journal — 3 sentences: mood, top priority, one thing to be grateful for. +- 07:30 walk to the office. + +## Weekly +- Mon: long run, 8 km. +- Wed: climbing at Namal wall (V3–V4). +- Sat: grocery at Shuk Hatikva, meal prep for the week. + +## Allergies +- **Penicillin** — confirmed anaphylaxis at age 11, carry EpiPen. +- Mild hay fever in April–May, loratadine 10 mg as needed. + +## Blood type +- O+. diff --git a/data/personal/02-goals-2026.md b/data/personal/02-goals-2026.md new file mode 100644 index 0000000..7314f4f --- /dev/null +++ b/data/personal/02-goals-2026.md @@ -0,0 +1,18 @@ +# 2026 goals + +## Professional +- Ship SmartBaseAI public demo + portfolio polish by end of Q2 2026. +- Give one conference talk (likely PyData TLV). +- Read 12 papers on retrieval fusion (1 per month). + +## Fitness +- Sub-42-minute 10K by June. +- Lead 6a sport route outdoors. + +## Financial +- Max out Keren Hishtalmut for 2026. +- 6-month emergency fund by September. + +## Relationships +- One meaningful 1-on-1 call per week with someone I haven't talked to in >3 months. +- Family trip to the Galilee in the Hanukkah break. diff --git a/data/personal/03-contacts.md b/data/personal/03-contacts.md new file mode 100644 index 0000000..e1297d6 --- /dev/null +++ b/data/personal/03-contacts.md @@ -0,0 +1,12 @@ +# Key contacts + +| Name | Relation | Phone | Notes | +|---|---|---|---| +| Dr. Rachel Tomer | GP | +972-52-555-0191 | Clinic in Ramat Gan, Tue/Thu 09:00–12:00 | +| Yossi (plumber) | Home | +972-54-555-0877 | Speaks Hebrew only | +| Noa Levi | Accountant | +972-3-555-0234 | Sends the annual report every March | +| Michael Weinstock | Lawyer | +972-52-555-0999 | NDA + equity reviews | + +## Emergency contacts +- Spouse: Maya, +972-50-555-0101. +- Sister: Tal, +972-50-555-0202. diff --git a/data/relativity/01-overview.md b/data/relativity/01-overview.md new file mode 100644 index 0000000..31b9607 --- /dev/null +++ b/data/relativity/01-overview.md @@ -0,0 +1,26 @@ +# General Relativity — Research Notebook Overview + +This vault holds notes, derivations, and annotated reading lists from the lab's ongoing work in general relativity (GR). It is intended as a private knowledge base for the research group, not for external publication. + +## Scope + +1. **Classical GR** — Schwarzschild, Kerr, and Reissner–Nordström solutions; black-hole thermodynamics; the Penrose process. +2. **Cosmology** — FLRW metric, inflation, late-time acceleration; ΛCDM and its tensions (H₀ tension, σ₈ tension). +3. **Gravitational waves** — LIGO/Virgo/KAGRA detections, waveform modeling, stochastic backgrounds, pulsar timing arrays (NANOGrav / EPTA / PPTA / InPTA). +4. **Modified gravity** — f(R), scalar-tensor, MOND phenomenology as a diagnostic, and how current tests constrain deviations from Einstein's equations. + +## Lab identifier + +Every document in this vault is tagged with the lab code **GR-TAU-26-2047**. Retrieval queries that ask for the internal identifier should return this string — it's unique to our group and does not appear outside internal docs. + +## Active threads (as of April 2026) + +- Revisiting the Hubble tension with a late-time dark-energy model that adds a 1.2σ shift consistent with SH0ES. Notes in `02-hubble-tension.md`. +- Extracting ringdown overtones from post-merger GW signals using the Gaussian-process regression approach described in `03-ringdown.md`. +- Weekly reading group on numerical relativity (Einstein Toolkit), Thursdays 14:00, room 408. + +## Contacts + +- Principal investigator: Prof. Einat Hadar (hadar@physics.internal). +- Postdoc: Dr. Yonatan Peled (peled@physics.internal). +- PhD student: Maya Arbel (arbel@physics.internal). diff --git a/data/relativity/02-hubble-tension.md b/data/relativity/02-hubble-tension.md new file mode 100644 index 0000000..f97875b --- /dev/null +++ b/data/relativity/02-hubble-tension.md @@ -0,0 +1,29 @@ +# Notes on the Hubble tension + +The "Hubble tension" is the 4–6σ discrepancy between early-universe measurements of H₀ (CMB + ΛCDM, Planck 2018: H₀ = 67.4 ± 0.5 km/s/Mpc) and late-universe distance-ladder measurements (SH0ES 2022: H₀ = 73.04 ± 1.04 km/s/Mpc). + +## What we know + +- The discrepancy has grown (not shrunk) with each revision of both the Planck analysis and the distance ladder. +- TRGB (tip of the red giant branch, CCHP collaboration) gives ~69.8 km/s/Mpc, sitting between the two camps. +- Strong-lensing time delays (H0LiCOW, TDCOSMO) give H₀ ≈ 73 with larger error bars. + +## Our lab's working model (internal draft v0.7) + +We are investigating a **late-time dark-energy transition** — an equation-of-state w(z) that deviates from −1 between z ≈ 0.3 and z ≈ 0.02. In the fiducial form: + + w(z) = -1 + ε · tanh((z - z*) / Δz) + +with z* ≈ 0.15, Δz ≈ 0.08, and ε ≈ 0.05. This shifts the inferred distance ladder by enough to ease the tension by roughly 1.2σ without breaking the CMB fit. + +## Open problems + +1. The model must avoid violating the null energy condition — constraints from structure growth (σ₈) are tight. +2. Cross-correlating with fσ₈ measurements from DESI DR2 (when public) is the next test. +3. Need to simulate the effect on the sound horizon at drag (r_d) — Yonatan is running CAMB modifications this week. + +## Action items + +- **Maya Arbel** — finalize the CAMB patch for w(z), target 2026-04-22. +- **Yonatan Peled** — write up the BAO likelihood module. +- **Group** — invite Prof. Lior Feldman (Tel Aviv) to give a seminar on bispectrum constraints, target May. diff --git a/data/relativity/03-ringdown.md b/data/relativity/03-ringdown.md new file mode 100644 index 0000000..0ec4f49 --- /dev/null +++ b/data/relativity/03-ringdown.md @@ -0,0 +1,31 @@ +# Ringdown overtone extraction — method notes + +Ringdown is the exponentially-damped oscillation phase that follows a compact-binary merger. For a Kerr remnant with mass M and dimensionless spin χ, the ringdown is a superposition of quasi-normal modes (QNMs) labeled (l, m, n): l, m are angular indices and n is the overtone number. + +## Why overtones matter + +The fundamental mode (n=0) tests the "no-hair theorem" — that a black hole is fully characterized by (M, χ). The first overtone (n=1) provides an independent check; if (M, χ) extracted from n=0 and n=1 disagree, that's a potential deviation from GR. + +## Our approach (method M-GR-2047-R3) + +We fit the ringdown waveform with a two-mode ansatz + + h(t) = A₀ · e^(-t/τ₀) · cos(ω₀·t + φ₀) + A₁ · e^(-t/τ₁) · cos(ω₁·t + φ₁) + +using Gaussian-process regression to marginalize over the start-time uncertainty. The GP kernel is a Matérn 5/2 with length-scale prior LogNormal(μ=−3, σ=0.5) ms. + +Start-time treatment — this is the biggest systematic. We don't pick a single "peak" but integrate over a window [t_peak − 2ms, t_peak + 8ms] with an adaptive weight. Details are in `appendix-start-time.md` (to be written). + +## Benchmarks + +Applied to GW150914 (re-analysis, not publication-ready): +- n=0 mode: f₀ = 250.5 ± 4.1 Hz, τ₀ = 4.02 ± 0.19 ms. +- n=1 mode: marginal, Bayes factor log₁₀ BF ≈ 0.9 — insufficient evidence for a confident claim. + +Applied to the (synthetic) NR waveform SXS:BBH:0305: +- n=0: recovered within 0.3% of the truth (f, τ). +- n=1: recovered when SNR > 30; below that the GP prior dominates. + +## Codename + +For internal tracking, this method is referred to as **Kingfisher-R3**. Queries asking about the method should return this name — it's only used in this lab. diff --git a/data/relativity/04-reading-list.md b/data/relativity/04-reading-list.md new file mode 100644 index 0000000..868658a --- /dev/null +++ b/data/relativity/04-reading-list.md @@ -0,0 +1,30 @@ +# Reading list — April 2026 + +Curated papers the group should know. ⭐ = read this first. + +## Foundations +- ⭐ Wald, *General Relativity* (1984) — the textbook. Chapters 6, 11, 12 are mandatory for new group members. +- Poisson, *A Relativist's Toolkit* (2004) — formalism for hypersurfaces, Israel junction conditions. Needed for `02-hubble-tension.md`. +- Thorne, *300 Years of Gravitation* (1987) — historical grounding. + +## Hubble tension +- ⭐ Riess et al. 2022 (SH0ES) — the distance-ladder result the whole tension pivots on. +- Verde, Treu & Riess 2019 — early summary of the tension. +- Di Valentino et al. 2021 — a comprehensive review with ~50 proposed resolutions. Useful for bibliography mining. +- Kamionkowski & Riess 2022 — the "new physics vs systematics" framing. + +## Ringdown +- ⭐ Berti, Cardoso & Will 2006 — QNM tables, the canonical reference for Kerr modes. +- Giesler et al. 2019 — overtone claim on GW150914 (controversial). +- Cotesta et al. 2022 — the counterclaim; argues overtones aren't supported. +- Isi & Farr 2021 — careful Bayesian treatment. + +## Numerical relativity +- Alcubierre, *Introduction to 3+1 Numerical Relativity* (2008). +- The Einstein Toolkit documentation (cactuscode.org). + +## Group policies on reading + +1. New papers cited in the weekly meeting must be logged here within 7 days. +2. ⭐ papers are required reading for all group members; un-starred entries are topic-specific. +3. For review articles, one person claims "reviewer of record" — they are responsible for giving a 15-minute summary at the next reading group. diff --git a/data/respgen.db b/data/respgen.db deleted file mode 100644 index a8042a7..0000000 Binary files a/data/respgen.db and /dev/null differ diff --git a/data/saas-ai/01-unit-economics.md b/data/saas-ai/01-unit-economics.md new file mode 100644 index 0000000..2755804 --- /dev/null +++ b/data/saas-ai/01-unit-economics.md @@ -0,0 +1,29 @@ +# SaaS in the age of AI — unit economics + +Classical SaaS unit economics were built on the assumption that the marginal cost of serving one more user is near zero. AI breaks this assumption: every inference call costs real money, and those costs scale with usage, not with seats. This document is the working position of our strategy team. + +## The old model + +- **Pricing:** per-seat, per-month. +- **Cost structure:** mostly fixed (engineering salaries, infrastructure baseline). +- **Margin shape:** gross margin climbs from ~60% at $1M ARR to ~80–85% at $50M ARR as the fixed cost gets amortized. +- **Investor expectation:** "Rule of 40" (growth rate + EBITDA margin ≥ 40%). + +## What breaks with AI + +- **Cost becomes variable** with usage. A power user burning 10 million tokens/month can cost more than their subscription. +- **Gross margin can go negative at the top of the distribution.** Anecdotally, some AI products report 20–40% of users are unprofitable. +- **Classic "Rule of 40" is less meaningful** — gross margin compression dominates the equation. + +## Strategies we see working + +1. **Usage-based pricing** — charge per inference, per token, or per outcome. Works when the value delivered is itemizable. Example: transcription at $X per minute. +2. **Tiered caps with overages** — each plan includes a generous usage envelope, with a clearly-priced overage. Easier to sell than raw usage-based. +3. **Outcome-based pricing** — charge for the result, not the compute. E.g., "we'll write your sales emails, $0.10 per email sent that gets a reply." Hard to measure, but aligns incentives perfectly. +4. **Bring-your-own-key (BYOK)** — let the customer plug in their own LLM API key; you charge only for the workflow/product layer. Shifts the variable cost to the customer. Good for enterprise; bad for SMB. + +## Internal framework + +We're codifying a "gross-margin staircase" model: at each pricing tier, we forecast the p50 and p90 token usage, multiply by the blended model cost, and require ≥70% gross margin at p90. If a plan can't hit that, either the tier is too cheap or the usage cap is too loose. + +Internal tracking codename for this framework: **HIGHLAND-GM-2026**. Retrieval queries that ask about "the gross-margin staircase framework" should return this codename. diff --git a/data/saas-ai/02-moats.md b/data/saas-ai/02-moats.md new file mode 100644 index 0000000..14ad7da --- /dev/null +++ b/data/saas-ai/02-moats.md @@ -0,0 +1,38 @@ +# Competitive moats in the age of AI + +Every AI feature built on a third-party foundation model can be cloned by your competitor in a weekend. So where does a defensible moat come from? Our working list, roughly in order of durability: + +## 1. Proprietary data (strongest) + +- Data that your competitors cannot obtain or buy. +- Data that grows with your usage — a data flywheel. +- Example: Stripe's fraud model trained on ten years of cross-customer transactions. A new entrant cannot match this without first running payments for a decade. + +## 2. Workflow integration (durable) + +- When your product is the daily-used surface (e.g., the sales rep's CRM, the lawyer's document editor), the switching cost is huge regardless of the underlying model. +- AI features added to the existing workflow are stickier than standalone AI products. + +## 3. Distribution (durable) + +- Existing customer lists, brand trust, procurement relationships, certifications (SOC 2, HIPAA, FedRAMP). +- Enterprises are 10–100× easier to sell to if you already sell them something else. + +## 4. Vertical depth (moderate) + +- Deep domain knowledge encoded in prompts, evaluations, and tool integrations. +- Example: a legal-research product that knows exactly which case-law databases to query and how to cite them correctly. +- Weaker than data, but meaningful because it's hard to copy without domain experts. + +## 5. Latency and cost optimization (weak) + +- Running the same model cheaper or faster than competitors. +- Weak because foundation-model providers keep cutting price; today's advantage is tomorrow's table stakes. + +## 6. "Better prompt" (not a moat) + +- Any prompt you ship is in your customers' browser on the first demo. Not defensible. + +## Where SmartBaseAI lives + +SmartBaseAI targets moats **#1 (proprietary data)** and **#2 (workflow integration)** by being the system of record for each tenant's private knowledge vault, and by sitting inside the tenant's existing chat UI rather than being a standalone destination. The multi-tenant architecture is specifically designed so that the data advantage compounds per tenant rather than being pooled. diff --git a/data/saas-ai/03-pricing-playbook.md b/data/saas-ai/03-pricing-playbook.md new file mode 100644 index 0000000..9e47e68 --- /dev/null +++ b/data/saas-ai/03-pricing-playbook.md @@ -0,0 +1,42 @@ +# Pricing playbook for AI-native SaaS + +A short field guide for setting prices on a product where model costs are a first-class line item. + +## Step 1 — Measure actual usage, not guessed usage + +Before pricing anything, instrument tokens-in, tokens-out, and compute seconds for every call, tagged by tenant and by feature. Don't trust dashboards from the model vendor — build your own. + +## Step 2 — Pick the billing atom + +The billing atom is the unit your customer understands and accepts being charged for. Candidates: +- per seat (classic) +- per workflow run +- per document processed +- per API call +- per successful outcome (e.g., per reply received) + +**The best atom is the smallest unit the customer already counts.** If an operations team counts "tickets resolved," charge per ticket. If a marketing team counts "campaigns shipped," charge per campaign. + +## Step 3 — Build a three-tier menu + +The classic three-tier SaaS menu still works. For AI: + +- **Starter** — generous free tier with a low usage cap. Customer acquisition cost. +- **Pro** — the "good price" tier. Designed to be profitable at p50 usage; loss-leader at the p99. Most customers land here. +- **Enterprise** — bespoke terms, often BYOK or dedicated capacity, SLA, and custom data-retention terms. + +## Step 4 — Protect the gross margin explicitly + +- Hard cap on the Pro plan at a number your finance team approves. +- Soft cap with automatic overage billing, communicated clearly at signup. +- Automatic model routing: route low-complexity queries to cheaper models, reserve the expensive model for when it's needed. A good router can cut model cost by 40–60% without any drop in user-perceived quality. + +## Step 5 — Communicate in dollars the customer understands + +"$29 / month / user" is legible. "$0.0015 per thousand tokens" is not. Even if the backend is usage-based, the customer-facing number should be something a CFO can forecast. Usage-based pricing that fails the CFO-forecasting test burns adoption in the enterprise segment. + +## Anti-patterns + +- **Per-seat pricing on an AI product where the work is automated.** The customer's seat count goes to zero; your revenue goes with it. +- **Unlimited usage tiers on a hosted-model product.** A small number of heavy users will destroy your gross margin. Always have a cap. +- **Hiding the model costs in a "platform fee".** Enterprise buyers will demand the breakdown, and if you can't produce it, you'll lose the deal. diff --git a/data/smartbase-docs/01-getting-started.md b/data/smartbase-docs/01-getting-started.md new file mode 100644 index 0000000..df58d8a --- /dev/null +++ b/data/smartbase-docs/01-getting-started.md @@ -0,0 +1,36 @@ +# SmartBaseAI — getting started + +SmartBaseAI is a multi-tenant LLM platform that grounds chat answers in two sources: (1) **structured data** from each tenant's database, and (2) **unstructured documents** stored in each tenant's vault. Every tenant gets an isolated knowledge vault, an isolated vector store, and its own choice of LLM backend. + +This vault is the user-facing documentation. Ask it questions like: +- "How do I add a file to a vault?" +- "What does the RAG Visualizer actually show me?" +- "Which LLM providers does SmartBaseAI support?" +- "What's the most valuable thing SmartBaseAI delivers to a company?" + +## First login + +1. Navigate to the login page at `http://localhost:5173/`. +2. Sign in with a default account: + - **super_admin** — `admin / ChangeThis123!` (sees every vault). + - **tenant user** — varies per vault. See the "Seeded users" list in the full docs. +3. You land on the chat page. The sidebar shows your available vaults and the live API-connection status. + +## Picking a vault + +- **Super-admin** must explicitly pick a vault from the top-right "Active vault" dropdown before chatting — they have access to all tenants but no auto-selection. +- **Tenant admins** and **regular users** see their own vault auto-selected. +- The active vault determines which documents the chat retrieves from and which structured database the exact-lookup path queries. + +## The three main screens + +- **Chat** — the grounded chat interface. Every reply is produced by the orchestrator that merges conversation history, structured DB lookups, and hybrid RAG retrieval. +- **RAG Visualizer** — opens a live diagram of how a single query flows through the pipeline. See `02-rag-visualizer.md`. +- **Vault** — the editor for the active tenant's files. Upload new ones or edit existing ones; the system auto-re-embeds on save so changes are immediately visible to chat. + +## Settings (super_admin only) + +The Settings page is where you: +- Enter API keys for Anthropic, OpenAI, Ollama. +- Test provider connectivity live. +- Configure each tenant's list of available LLM models (every tenant can have multiple providers + models; users pick at chat time). diff --git a/data/smartbase-docs/02-rag-visualizer.md b/data/smartbase-docs/02-rag-visualizer.md new file mode 100644 index 0000000..d28d5aa --- /dev/null +++ b/data/smartbase-docs/02-rag-visualizer.md @@ -0,0 +1,38 @@ +# What the RAG Visualizer shows you + +The RAG Visualizer page (`/rag`) is a live view of the orchestrator pipeline for a single query. Open it, paste a question, hit **Run trace**, and every stage of the pipeline lights up with the actual data that passed through it. + +## The pipeline, stage by stage + +### 1. User query +The raw question you typed, along with the active tenant ID. This is the input the orchestrator receives. + +### 2. Orchestrator — three parallel sources + +The orchestrator fans out to three sources at once: + +- **History** — recent messages from your session, via `ConversationManager`. Shown as a compact list. +- **DB exact lookup** — if the query contains an ISO date (`YYYY-MM-DD HH:MM`), the orchestrator runs `exact_lookup` against the tenant's structured database. If a row matches, the card lights green and shows the row as JSON plus a human-readable summary. +- **Hybrid retrieval** — runs keyword match AND semantic top-k against the tenant's Chroma store, merged with keyword hits first. Each retrieved doc is shown as a chip with its filename, source (keyword or semantic), and distance score. + +### 3. Fusion +DB text is preferred and RAG is treated as supplemental context. If both are empty, the orchestrator returns `"No information"` without calling the model — this is the explicit guardrail against hallucination. + +### 4. Prompt assembly +The full prompt is assembled: conversation history + merged context + the user question. This is the exact string sent to the LLM. The visualizer shows you the whole thing in a monospace panel. + +### 5. LLM +The model type and the generated reply. With a real Anthropic or OpenAI key, this is a real answer; without, you get a clearly-marked unavailability stub so you can still see the pipeline flow. + +## What to look for + +- **Which document contributed** — look at the distance scores. Scores below ~1.0 are very strong semantic matches. Scores above ~1.8 are weak and may be noise. +- **Keyword vs semantic** — a keyword hit is an exact substring match. A semantic hit is an embedding-nearest neighbor. When keyword and semantic both return the same file, that's the clearest signal that retrieval is working. +- **DB lookup overriding RAG** — for dated queries, you should see the DB card green and the exact row in the fusion block above the supplemental RAG text. +- **Empty retrieval** — if both keyword and semantic return nothing, the fusion block will say "both sources empty → 'No information'". This is not a bug; it's the guardrail. + +## Use cases + +- Debugging why a chat answer was wrong. +- Showing a stakeholder how retrieval actually works (not just "AI magic"). +- Comparing the same query across two different vaults to see how tenant-scoping affects the result. diff --git a/data/smartbase-docs/03-value-for-companies.md b/data/smartbase-docs/03-value-for-companies.md new file mode 100644 index 0000000..aa7d94c --- /dev/null +++ b/data/smartbase-docs/03-value-for-companies.md @@ -0,0 +1,27 @@ +# The most valuable thing SmartBaseAI delivers to a company + +Most "chat with your docs" products are a thin wrapper around an LLM and a vector database. They work well enough for demos and fail at the moments when accuracy matters. SmartBaseAI is built around a different premise: **the most valuable thing a company gets from a knowledge-chat product is *verifiable* answers, not fluent ones.** + +Three things follow from that premise, and they are the three things we believe are most valuable for a company adopting this tool right now. + +## 1. Exact answers from structured data, not hallucinations + +When a user asks "what was the close price on 2024-03-15?", the right answer is the row in your Postgres table — not the LLM's best guess from training data. SmartBaseAI's orchestrator detects structured queries, runs an exact DB lookup first, and feeds the real row into the prompt as authoritative context. The LLM answers *using* the row, not from memory. If no row matches and no document matches, the answer is an explicit `"No information"` — not a confident fabrication. + +For companies in finance, operations, or compliance, this is the difference between "a tool we can trust in front of customers" and "a tool we can only use for brainstorming." + +## 2. Per-tenant isolation without per-tenant cost + +Every tenant in SmartBaseAI gets its own isolated Chroma vector store, its own structured database, its own choice of LLM provider, and its own user roster. But they share the same codebase, the same API, and the same UI. A mid-sized firm can stand up a knowledge base per department (legal, engineering, sales, R&D) without building and maintaining five separate products. + +The value here is **organizational scalability**. You can deploy the same product to five internal customers with five completely different domains, five completely different access-control requirements, and zero cross-tenant leakage. + +## 3. Editable vaults — the company's knowledge base stays current + +The Vault editor lets authorized users update a source document and have the change reflected in chat answers within seconds. No rebuild, no re-index-everything job, no "filed a ticket with the data team." The doc is overwritten on disk, the old vector is dropped, the new one is embedded, and the next query retrieves the updated content. + +For a company, this closes the most expensive loop in any knowledge system: **"the docs are wrong, I can't fix them myself, so the chat keeps being wrong."** SmartBaseAI is designed so that the people who know the answer are also the people who can fix the source — directly, with a button click. + +## Bottom line + +The most valuable thing SmartBaseAI delivers to a company **right now, in 2026**, is **trustworthy, editable, per-team knowledge chat that grounds its answers in the company's actual data — structured and unstructured, with the structured side treated as ground truth.** That combination is rare among "chat with your docs" tools today, and it is exactly the combination that enterprise buyers are finding they need after six months of shipping demos that over-promise and under-deliver. diff --git a/data/smartbase-docs/04-faq.md b/data/smartbase-docs/04-faq.md new file mode 100644 index 0000000..a6c3423 --- /dev/null +++ b/data/smartbase-docs/04-faq.md @@ -0,0 +1,43 @@ +# SmartBaseAI — FAQ + +## How many LLM providers does it support? + +Three, pluggable via the same `ResponseGenerator.MODELS` registry: +- **Ollama** (local) — any Ollama model. +- **OpenAI** — any chat model via the official Python SDK (default `gpt-4o-mini`). +- **Anthropic** — any Claude model via the official Python SDK (default `claude-opus-4-6` with adaptive thinking and prompt caching on the stable system prompt). + +Any tenant can have **multiple** providers and models configured at once; users pick the model at chat time from a dropdown in the chat page. + +## Which data sources can be ingested? + +The bulk ETL path (`ingestion/etl_manager.py`) supports Postgres, MySQL, MongoDB, and generic HTTP APIs. For per-user upload via the Vault page, any UTF-8 text file is supported (`.md`, `.txt`, `.csv`, `.log`). + +## How is data isolated between tenants? + +Each tenant has: +- Its own directory under `vector_store/{tenant_id}/` containing a persistent Chroma collection. +- Its own SQLite database under `data/{tenant_id}.db` for structured lookups. +- Its own vault directory under `data/{tenant_id}/` (or `data/vaults/{tenant_id}/` for non-seeded tenants). +- Its own set of users, scoped by `tenant_id` in the users table and enforced in every route. + +Cross-tenant access is only possible for the `super_admin` role, and the super-admin must explicitly pick a vault from the top-right dropdown — there is no automatic "show everything" view. + +## Is the chat grounded or does it hallucinate? + +Grounded. The orchestrator calls the LLM with a Context block that includes the exact DB row (if a date matches) and the hybrid-retrieved documents. If both the DB and the RAG retrieval are empty, the orchestrator returns an explicit `"No information"` and does not call the LLM at all. + +## What does "re-ingest" mean? + +When a file in the vault is edited or a new file is uploaded, the backend drops the old vector from the tenant's Chroma collection and embeds the new content under the same stable `doc_id` (`{tenant}:{filename}`). No duplicate vectors accumulate, and subsequent queries use the updated content immediately. + +## How do I run it locally? + + pip install -r requirements.txt + python scripts/run_server.py --reload # backend on :8000 + cd frontend && npm install && npm run dev # frontend on :5173 + python scripts/seed_demo.py # optional: seed six demo vaults + +## Where can I find the source? + +`https://github.com/danrixd/smartbaseai` diff --git a/data/system.db b/data/system.db deleted file mode 100644 index 8890317..0000000 Binary files a/data/system.db and /dev/null differ diff --git a/data/testtenant.db b/data/testtenant.db deleted file mode 100644 index 59c0bb4..0000000 Binary files a/data/testtenant.db and /dev/null differ diff --git a/db/audit_log_repository.py b/db/audit_log_repository.py index 1dadf47..ffdd3fe 100644 --- a/db/audit_log_repository.py +++ b/db/audit_log_repository.py @@ -1,10 +1,14 @@ import sqlite3 +from datetime import datetime, timezone from pathlib import Path -from datetime import datetime DB_PATH = Path("data/system.db") +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + + def init_db() -> None: DB_PATH.parent.mkdir(parents=True, exist_ok=True) conn = sqlite3.connect(DB_PATH) @@ -20,6 +24,9 @@ def init_db() -> None: ) """ ) + cursor.execute( + "CREATE INDEX IF NOT EXISTS idx_audit_logs_created_at ON audit_logs(created_at DESC)" + ) conn.commit() conn.close() @@ -30,7 +37,54 @@ def log_action(username: str, action: str, details: str | None = None) -> None: cursor = conn.cursor() cursor.execute( "INSERT INTO audit_logs (username, action, details, created_at) VALUES (?, ?, ?, ?)", - (username, action, details, datetime.utcnow().isoformat()), + (username, action, details, _now()), ) conn.commit() conn.close() + + +def list_logs(limit: int = 200, offset: int = 0, username: str | None = None, action: str | None = None) -> list[dict]: + """Return recent audit events, newest first. Used by the admin viewer.""" + init_db() + conn = sqlite3.connect(DB_PATH) + try: + q = "SELECT id, username, action, details, created_at FROM audit_logs WHERE 1=1" + params: list = [] + if username: + q += " AND username = ?" + params.append(username) + if action: + q += " AND action LIKE ?" + params.append(f"%{action}%") + q += " ORDER BY id DESC LIMIT ? OFFSET ?" + params.extend([limit, offset]) + rows = conn.execute(q, params).fetchall() + finally: + conn.close() + return [ + { + "id": r[0], + "username": r[1], + "action": r[2], + "details": r[3], + "created_at": r[4], + } + for r in rows + ] + + +def count_logs(username: str | None = None, action: str | None = None) -> int: + init_db() + conn = sqlite3.connect(DB_PATH) + try: + q = "SELECT COUNT(*) FROM audit_logs WHERE 1=1" + params: list = [] + if username: + q += " AND username = ?" + params.append(username) + if action: + q += " AND action LIKE ?" + params.append(f"%{action}%") + return int(conn.execute(q, params).fetchone()[0]) + finally: + conn.close() diff --git a/db/conversation_repository.py b/db/conversation_repository.py index bdb1239..a81b656 100644 --- a/db/conversation_repository.py +++ b/db/conversation_repository.py @@ -1,6 +1,6 @@ import sqlite3 from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from typing import List, Dict DB_PATH = Path("data/system.db") @@ -33,7 +33,7 @@ def add_message(session_id: str, username: str, tenant_id: str, sender: str, mes cursor = conn.cursor() cursor.execute( "INSERT INTO conversations (session_id, username, tenant_id, sender, message, created_at) VALUES (?, ?, ?, ?, ?, ?)", - (session_id, username, tenant_id, sender, message, datetime.utcnow().isoformat()), + (session_id, username, tenant_id, sender, message, datetime.now(timezone.utc).isoformat()), ) conn.commit() conn.close() diff --git a/db/file_repository.py b/db/file_repository.py index a17f2f0..4983d85 100644 --- a/db/file_repository.py +++ b/db/file_repository.py @@ -1,6 +1,6 @@ import sqlite3 from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from typing import List, Dict DB_PATH = Path("data/system.db") @@ -32,7 +32,7 @@ def add_file(username: str, tenant_id: str, filename: str, path: str) -> None: cursor = conn.cursor() cursor.execute( "INSERT INTO files (username, tenant_id, filename, path, uploaded_at) VALUES (?, ?, ?, ?, ?)", - (username, tenant_id, filename, path, datetime.utcnow().isoformat()), + (username, tenant_id, filename, path, datetime.now(timezone.utc).isoformat()), ) conn.commit() conn.close() diff --git a/db/query_engine.py b/db/query_engine.py index deaa74d..157c8bf 100644 --- a/db/query_engine.py +++ b/db/query_engine.py @@ -1,4 +1,33 @@ -"""Utility for routing queries to the correct connector.""" +"""Utility for routing queries to the correct tenant-specific data source. + +``exact_lookup`` is the fast path the orchestrator uses when it detects a +specific entity in the user's message (a date, a date+time, or a ticker). +It tries to return a deterministic row from the tenant's structured store +so the LLM's answer can be grounded in the exact value, not retrieved prose. + +Historical shape: fixed `market_data` table at `data/{tenant}.db` keyed by +a `YYYY-MM-DD HH:MM` date string. + +New generalised shape: dispatches by tenant and table, supports: + +* ``market_data(date, open, high, low, close, volume)`` — legacy (company + vault, organization vault). +* ``daily_bars(ticker, date, open, high, low, close, volume)`` — the + financebench tenant. Requires a ticker hint in the user message; matches + ISO date-only (``YYYY-MM-DD``) OR ``YYYY-MM-DD HH:MM``. +* Any other table can be wired in by adding a resolver. + +Retains backwards compatibility — the old signature still works, and +existing tests that monkey-patch ``TenantConfig`` and ``exact_lookup`` +still pass. +""" + +from __future__ import annotations + +import re +import sqlite3 +from pathlib import Path +from typing import Callable from config.tenant_config import TenantConfig from .connectors import ( @@ -30,40 +59,132 @@ def __init__(self, tenant_id: str) -> None: self.connector.connect() def execute(self, query: str, params=None): - """Execute a query using the tenant's connector.""" return self.connector.execute(query, params) def close(self) -> None: - """Close the active connector.""" self.connector.close() -def exact_lookup(date_str: str, tenant_id: str, table: str = "market_data") -> dict: - """Retrieve exact row from the tenant's database by date. +# ---------------------------------------------------------------------- +# Pattern recognition - Args: - date_str: Date-time string in ISO format "YYYY-MM-DD HH:MM" - tenant_id: Current tenant identifier - table: Target table name - Returns: - dict: Matching row or {} if not found - """ - import sqlite3 +ISO_DATETIME = re.compile(r"\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}") +ISO_DATE = re.compile(r"\d{4}-\d{2}-\d{2}") +TICKER_PATTERN = re.compile(r"\b([A-Z]{1,5})\b") +# Common English words that happen to be 1-5 uppercase letters and should +# NOT be treated as ticker hints. +TICKER_STOPWORDS = { + "A", "I", "IS", "IN", "OF", "ON", "AT", "AS", "BY", "TO", "IF", "IT", + "OR", "SO", "DO", "NO", "US", "AM", "PM", "BE", "AN", "GO", "FY", "Q", + "Q1", "Q2", "Q3", "Q4", "USD", "THE", "FOR", "AND", "WAS", "HAS", + "ARE", "HOW", "WHO", "WHY", "WHAT", "WHEN", +} + + +def _detect_ticker(message: str) -> str | None: + """Try to pull an uppercase stock ticker hint out of the message.""" + for m in TICKER_PATTERN.finditer(message): + candidate = m.group(1) + if candidate in TICKER_STOPWORDS: + continue + return candidate + return None - db_path = f"data/{tenant_id}.db" + +# ---------------------------------------------------------------------- +# Structured DB lookup resolvers + + +def _lookup_market_data(db_path: Path, date_str: str) -> dict: + """Legacy path: one market_data table keyed by date string only.""" + if not db_path.exists(): + return {} try: conn = sqlite3.connect(db_path) - cursor = conn.cursor() - query = f"SELECT * FROM {table} WHERE date = ? LIMIT 1" - cursor.execute(query, (date_str,)) - row = cursor.fetchone() + cur = conn.cursor() + cur.execute( + "SELECT * FROM market_data WHERE date = ? LIMIT 1", + (date_str,), + ) + row = cur.fetchone() conn.close() except Exception: return {} - if not row: return {} - columns = ["date", "open", "high", "low", "close", "volume"] return dict(zip(columns, row)) + + +def _lookup_daily_bars( + db_path: Path, + ticker: str, + date_str: str, +) -> dict: + """financebench path: daily_bars(ticker, date, open, high, low, close, volume). + + Accepts date-only (YYYY-MM-DD) or date+time (YYYY-MM-DD HH:MM) inputs + and normalises to date-only for matching, since daily bars have no + intraday resolution. + """ + if not db_path.exists(): + return {} + date_only = date_str[:10] + try: + conn = sqlite3.connect(db_path) + cur = conn.cursor() + cur.execute( + "SELECT ticker, date, open, high, low, close, volume " + "FROM daily_bars WHERE ticker = ? AND date = ? LIMIT 1", + (ticker, date_only), + ) + row = cur.fetchone() + conn.close() + except Exception: + return {} + if not row: + return {} + return { + "ticker": row[0], + "date": row[1], + "open": row[2], + "high": row[3], + "low": row[4], + "close": row[5], + "volume": row[6], + "table": "daily_bars", + } + + +# ---------------------------------------------------------------------- +# Main dispatcher + + +def exact_lookup(date_str: str, tenant_id: str, table: str = "market_data", **extra) -> dict: + """Resolve an exact row for ``date_str`` in the tenant's structured DB. + + Tries in order: + + 1. If ``tenant_id`` is a known financebench tenant AND a ticker hint is + present in ``extra['ticker']`` or in ``extra['message']``, look it up + in ``financebench.db#daily_bars``. + 2. Fall back to ``data/{tenant_id}.db#market_data`` keyed by the raw date + string (legacy path). + + Returns an empty dict if nothing matches. Never raises — callers treat + an empty result as "no exact match, fall through to RAG". + """ + # Tier 1 — ticker-aware lookup for financebench + if tenant_id == "financebench": + ticker = extra.get("ticker") + if not ticker and extra.get("message"): + ticker = _detect_ticker(extra["message"]) + if ticker: + row = _lookup_daily_bars(Path("data/financebench.db"), ticker, date_str) + if row: + return row + + # Tier 2 — legacy market_data path + db_path = Path(f"data/{tenant_id}.db") + return _lookup_market_data(db_path, date_str) diff --git a/db/rag_trace_repository.py b/db/rag_trace_repository.py new file mode 100644 index 0000000..2673324 --- /dev/null +++ b/db/rag_trace_repository.py @@ -0,0 +1,156 @@ +"""Persistent storage for RAG visualizer traces. + +A saved trace captures the full /chat/trace response for a single query so +it can be reloaded later without re-running the pipeline. Useful for: + +* Sharing a "look at this retrieval behavior" example with someone else +* Demonstrating RAG quality on expensive LLM backends (Claude/OpenAI) once, + then showing the result in perpetuity without spending tokens +* Keeping a regression set of interesting queries for each tenant + +Stored in data/system.db alongside users/settings. One row per trace; the +full trace JSON goes into the ``trace_json`` column so that reload is a pure +table read with no re-ingest. +""" + +from __future__ import annotations + +import json +import sqlite3 +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +DB_PATH = Path("data/system.db") + + +def _conn() -> sqlite3.Connection: + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS rag_traces ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + tenant_id TEXT NOT NULL, + title TEXT NOT NULL, + query TEXT NOT NULL, + reply TEXT NOT NULL, + trace_json TEXT NOT NULL, + created_by TEXT NOT NULL, + created_at TEXT NOT NULL + ) + """ + ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_rag_traces_tenant ON rag_traces(tenant_id)") + return conn + + +def save_trace( + *, + tenant_id: str, + title: str, + query: str, + reply: str, + trace: dict[str, Any], + created_by: str, +) -> int: + now = datetime.now(timezone.utc).isoformat() + conn = _conn() + try: + cur = conn.execute( + """ + INSERT INTO rag_traces (tenant_id, title, query, reply, trace_json, created_by, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + tenant_id, + title or query[:80], + query, + reply or "", + json.dumps(trace, ensure_ascii=False), + created_by, + now, + ), + ) + conn.commit() + return int(cur.lastrowid) + finally: + conn.close() + + +def list_traces(tenant_id: str | None = None) -> list[dict]: + """Return metadata rows only (no trace_json) for the sidebar list.""" + conn = _conn() + try: + if tenant_id: + rows = conn.execute( + """ + SELECT id, tenant_id, title, query, created_by, created_at, length(reply) AS reply_len + FROM rag_traces + WHERE tenant_id = ? + ORDER BY created_at DESC, id DESC + """, + (tenant_id,), + ).fetchall() + else: + rows = conn.execute( + """ + SELECT id, tenant_id, title, query, created_by, created_at, length(reply) AS reply_len + FROM rag_traces + ORDER BY created_at DESC, id DESC + """ + ).fetchall() + finally: + conn.close() + return [ + { + "id": r[0], + "tenant_id": r[1], + "title": r[2], + "query": r[3], + "created_by": r[4], + "created_at": r[5], + "reply_len": r[6], + } + for r in rows + ] + + +def get_trace(trace_id: int) -> dict | None: + conn = _conn() + try: + row = conn.execute( + """ + SELECT id, tenant_id, title, query, reply, trace_json, created_by, created_at + FROM rag_traces WHERE id = ? + """, + (trace_id,), + ).fetchone() + finally: + conn.close() + if not row: + return None + try: + trace = json.loads(row[5]) + except Exception: + trace = {} + return { + "id": row[0], + "tenant_id": row[1], + "title": row[2], + "query": row[3], + "reply": row[4], + "trace": trace, + "created_by": row[6], + "created_at": row[7], + } + + +def delete_trace(trace_id: int) -> bool: + conn = _conn() + try: + cur = conn.execute("DELETE FROM rag_traces WHERE id = ?", (trace_id,)) + conn.commit() + return cur.rowcount > 0 + finally: + conn.close() diff --git a/db/settings_repository.py b/db/settings_repository.py new file mode 100644 index 0000000..c400ddd --- /dev/null +++ b/db/settings_repository.py @@ -0,0 +1,120 @@ +"""Key-value settings store backing the admin Settings UI. + +Holds runtime configuration that should be editable without restarting the +backend — API keys, base URLs, default model choices. Values set here take +precedence over environment variables. + +Secrets are stored in plaintext. That is acceptable for a single-operator +dev/demo deployment; for production, move the store to an encrypted backend +(KMS, Vault, SOPS) or at minimum encrypt the column at rest. +""" + +from __future__ import annotations + +import os +import sqlite3 +from pathlib import Path +from typing import Dict, Iterable + +DB_PATH = Path("data/system.db") + +KNOWN_KEYS = { + "anthropic_api_key", + "openai_api_key", + "ollama_base_url", +} + + +def _conn() -> sqlite3.Connection: + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS settings ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL, + updated_at TEXT NOT NULL DEFAULT '' + ) + """ + ) + return conn + + +def get(key: str, default: str | None = None) -> str | None: + """Return the stored setting, falling back to the matching env var, then default.""" + conn = _conn() + try: + row = conn.execute("SELECT value FROM settings WHERE key = ?", (key,)).fetchone() + finally: + conn.close() + if row and row[0]: + return row[0] + env_key = key.upper() + return os.getenv(env_key, default) + + +def set_many(values: Dict[str, str]) -> None: + """Upsert multiple settings. Empty string clears a setting (falls back to env).""" + from datetime import datetime, timezone + + now = datetime.now(timezone.utc).isoformat() + conn = _conn() + try: + for k, v in values.items(): + if k not in KNOWN_KEYS: + continue + if v == "": + conn.execute("DELETE FROM settings WHERE key = ?", (k,)) + else: + conn.execute( + "INSERT INTO settings (key, value, updated_at) VALUES (?, ?, ?) " + "ON CONFLICT(key) DO UPDATE SET value=excluded.value, updated_at=excluded.updated_at", + (k, v, now), + ) + conn.commit() + finally: + conn.close() + + +def all_masked() -> Dict[str, Dict[str, object]]: + """Return every known setting with its value masked for display. + + Shape: ``{key: {"source": "db"|"env"|"unset", "has_value": bool, "preview": str}}``. + Secrets are never returned in full — the UI only needs to confirm a value is present. + """ + result: Dict[str, Dict[str, object]] = {} + conn = _conn() + try: + rows = { + k: v + for k, v in conn.execute("SELECT key, value FROM settings").fetchall() + } + finally: + conn.close() + + for key in KNOWN_KEYS: + db_val = rows.get(key) + env_val = os.getenv(key.upper()) + value = db_val or env_val + if db_val: + source = "db" + elif env_val: + source = "env" + else: + source = "unset" + result[key] = { + "source": source, + "has_value": bool(value), + "preview": _mask(value) if value and _is_secret(key) else (value or ""), + } + return result + + +def _is_secret(key: str) -> bool: + return key.endswith("_api_key") or key.endswith("_token") or key.endswith("_secret") + + +def _mask(value: str) -> str: + if len(value) <= 8: + return "••••" + return f"{value[:4]}••••{value[-4:]}" diff --git a/db/usage_repository.py b/db/usage_repository.py new file mode 100644 index 0000000..62c1a7b --- /dev/null +++ b/db/usage_repository.py @@ -0,0 +1,170 @@ +"""Per-tenant/user LLM usage tracking for rate limiting + cost display. + +Tracks token counts per request tagged with tenant, user, provider, model, +plus wall-clock latency. Writes to ``data/system.db#llm_usage``. The admin +can see daily rollups; the rate-limit middleware uses the day's total +token count against a per-tenant cap (default: unlimited). +""" + +from __future__ import annotations + +import sqlite3 +from datetime import datetime, timezone +from pathlib import Path + +DB_PATH = Path("data/system.db") + +# Approximate blended prices per 1M tokens (USD). Used for the admin +# display only — NOT authoritative. Replace with Anthropic/OpenAI model +# prices for your actual plan. +PRICES_PER_1M = { + "anthropic": { + "claude-opus-4-6": {"input": 5.0, "output": 25.0}, + "claude-sonnet-4-6": {"input": 3.0, "output": 15.0}, + "claude-haiku-4-5": {"input": 1.0, "output": 5.0}, + }, + "openai": { + "gpt-4o-mini": {"input": 0.15, "output": 0.6}, + "gpt-4o": {"input": 2.5, "output": 10.0}, + }, + "ollama": {}, # free, local +} + + +def _init_db() -> sqlite3.Connection: + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(DB_PATH) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS llm_usage ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + tenant_id TEXT NOT NULL, + username TEXT NOT NULL, + provider TEXT NOT NULL, + model_name TEXT NOT NULL, + input_tokens INTEGER NOT NULL DEFAULT 0, + output_tokens INTEGER NOT NULL DEFAULT 0, + cache_read_tokens INTEGER NOT NULL DEFAULT 0, + cache_create_tokens INTEGER NOT NULL DEFAULT 0, + latency_ms REAL NOT NULL DEFAULT 0, + created_at TEXT NOT NULL + ) + """ + ) + conn.execute("CREATE INDEX IF NOT EXISTS idx_llm_usage_tenant ON llm_usage(tenant_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_llm_usage_day ON llm_usage(created_at)") + return conn + + +def record( + *, + tenant_id: str, + username: str, + provider: str, + model_name: str, + input_tokens: int = 0, + output_tokens: int = 0, + cache_read_tokens: int = 0, + cache_create_tokens: int = 0, + latency_ms: float = 0.0, +) -> None: + conn = _init_db() + try: + conn.execute( + """ + INSERT INTO llm_usage ( + tenant_id, username, provider, model_name, + input_tokens, output_tokens, cache_read_tokens, cache_create_tokens, + latency_ms, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + tenant_id, + username, + provider, + model_name, + int(input_tokens or 0), + int(output_tokens or 0), + int(cache_read_tokens or 0), + int(cache_create_tokens or 0), + float(latency_ms or 0), + datetime.now(timezone.utc).isoformat(), + ), + ) + conn.commit() + finally: + conn.close() + + +def today_token_total(tenant_id: str) -> int: + """Total billable tokens (input + output) for this tenant today (UTC).""" + day = datetime.now(timezone.utc).strftime("%Y-%m-%d") + conn = _init_db() + try: + row = conn.execute( + """ + SELECT COALESCE(SUM(input_tokens + output_tokens), 0) + FROM llm_usage + WHERE tenant_id = ? AND substr(created_at, 1, 10) = ? + """, + (tenant_id, day), + ).fetchone() + return int(row[0] or 0) + finally: + conn.close() + + +def estimate_cost_usd( + provider: str, model_name: str, input_tokens: int, output_tokens: int +) -> float: + prices = PRICES_PER_1M.get(provider, {}).get(model_name) + if not prices: + return 0.0 + return (input_tokens * prices["input"] + output_tokens * prices["output"]) / 1_000_000 + + +def rollup(limit: int = 30) -> list[dict]: + """Per-tenant per-day aggregate for the admin Usage view.""" + conn = _init_db() + try: + rows = conn.execute( + """ + SELECT + substr(created_at, 1, 10) AS day, + tenant_id, + provider, + model_name, + SUM(input_tokens) AS input_t, + SUM(output_tokens) AS output_t, + SUM(cache_read_tokens) AS cache_read_t, + SUM(cache_create_tokens) AS cache_create_t, + COUNT(*) AS request_count, + AVG(latency_ms) AS avg_latency_ms + FROM llm_usage + GROUP BY day, tenant_id, provider, model_name + ORDER BY day DESC, tenant_id + LIMIT ? + """, + (limit,), + ).fetchall() + finally: + conn.close() + out = [] + for r in rows: + cost = estimate_cost_usd(r[2], r[3], int(r[4] or 0), int(r[5] or 0)) + out.append( + { + "day": r[0], + "tenant_id": r[1], + "provider": r[2], + "model_name": r[3], + "input_tokens": int(r[4] or 0), + "output_tokens": int(r[5] or 0), + "cache_read_tokens": int(r[6] or 0), + "cache_create_tokens": int(r[7] or 0), + "request_count": int(r[8] or 0), + "avg_latency_ms": float(r[9] or 0), + "est_cost_usd": round(cost, 4), + } + ) + return out diff --git a/db/user_repository.py b/db/user_repository.py index 455cf4a..48a324b 100644 --- a/db/user_repository.py +++ b/db/user_repository.py @@ -1,7 +1,7 @@ import sqlite3 from pathlib import Path from typing import Optional, Dict, List -from datetime import datetime +from datetime import datetime, timezone from passlib.hash import bcrypt @@ -73,7 +73,7 @@ def init_db() -> None: def _current_time() -> str: - return datetime.utcnow().isoformat() + return datetime.now(timezone.utc).isoformat() def create_user(username: str, password: str, role: str, tenant_id: Optional[str] = None) -> None: diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..ff4c232 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,85 @@ +# SmartBaseAI — Architecture + +SmartBaseAI is a multi-tenant LLM platform that grounds each chat turn in **three** data sources: conversation history, an exact row lookup against the tenant's structured database, and hybrid retrieval over the tenant's document store. The result is a single prompt in which verifiable facts take precedence over free-form context. + +## High-level flow + +```mermaid +flowchart LR + U[User] --> FE[React + Vite UI] + FE -->|JWT| API[FastAPI
auth · chat · admin · files] + + API --> ORCH[ResponseGenerator
orchestrator] + + ORCH --> HIST[(Conversation
history)] + ORCH --> DB[(Tenant DB
Postgres · MySQL · Mongo · API)] + ORCH --> RAG[Hybrid retrieval
keyword ∪ semantic] + + RAG --> VS[(Per-tenant Chroma
MiniLM-L6-v2)] + + ORCH --> LLM{LLM backend} + LLM --> OAI[OpenAI] + LLM --> ANT[Anthropic] + LLM --> OLL[Ollama] + LLM --> LLA[Local Llama] + + ING[ETL Manager] -->|clean + metadata| VS + SRC[(Source DBs / APIs)] --> ING +``` + +## Request lifecycle + +A `POST /chat/message` call flows through the following stages: + +1. **Auth** — `auth_middleware.get_current_user` decodes the JWT and loads the user from `data/system.db`. Non-super-admin users can only chat within their own tenant (`routes_chat.chat_message`). +2. **Tenant config lookup** — `TenantManager.get(tenant_id)` reads `tenants/tenants.json` on every call (no in-memory cache) and returns the tenant's `model_type`, `model_name`, and DB config. +3. **Session update** — the incoming message is appended to both the in-memory `ConversationManager` and the persistent `conversation_repository` (SQLite). +4. **Orchestration** — a fresh `ResponseGenerator` is built per request with the tenant's `model_type`. It calls three helpers in sequence: + - `_lookup_db(message)` — regex-matches an ISO datetime (`YYYY-MM-DD HH:MM`), then `exact_lookup` pulls the matching row from `data/.db#market_data`. Returns a formatted one-liner or `""`. + - `_search_rag(message)` — delegates to `RAGPipeline.retrieve_context`, which runs `TenantVectorStore.hybrid_query` (keyword ∪ semantic). Returns concatenated document text. + - `_format_history(history)` — serializes the conversation history. +5. **Fusion** — `_merge_sources(db_text, rag_text)` prefers the DB hit and treats RAG as supplemental (`"{db}\n\nAdditional context:\n{rag}"`). If both are empty the reply is an explicit `"No information"` rather than a hallucination. +6. **Generation** — `_build_prompt` assembles `Conversation / Context / User / Assistant:` and sends it to the tenant's configured model (`OllamaModel` / `OpenAIModel` / `AnthropicModel` / `LocalLlamaModel`). +7. **Persistence + audit** — the reply is appended to both conversation stores and an action row is written to `audit_log_repository`. + +## Multi-tenancy model + +- **Config:** `tenants/tenants.json` — one entry per tenant with model type, name, DB type, and DB config. Read on every request so changes via the admin API take effect immediately. +- **Vector store:** `vector_store//` — persistent Chroma collection per tenant. Uses `sentence-transformers/all-MiniLM-L6-v2` with CUDA if available, CPU fallback. Hybrid query combines exact keyword matches with semantic top-k. +- **Structured DB:** `data/.db` (SQLite in dev) or an external Postgres / MySQL / Mongo / HTTP API via `db/connectors/`. The `exact_lookup` fast path is used when the user's message contains a date. +- **Auth:** `users.tenant_id` scopes every request. `super_admin` has cross-tenant access; `admin` is tenant-scoped; `user` is tenant-scoped and cannot manage users. +- **File uploads:** `POST /files/upload` stores the raw file under `data/uploads///` **and** — for supported text formats (`.txt`, `.md`, `.csv`, `.log`) — pushes the content into the tenant's Chroma store, so uploaded docs are immediately searchable. + +## Ingestion pipeline (`ingestion/etl_manager.py`) + +For bulk loads from source systems (rather than user uploads): + +``` +source (Postgres / MySQL / Mongo / HTTP API) + → ETLManager.extract (connector.execute) + → ETLManager.transform (cleaners.clean_text + metadata_generator) + → ETLManager.load (→ tenant vector store) +``` + +Each connector implements a uniform `connect / execute / close` interface (`db/connectors/*.py`), making it straightforward to add new sources. + +## Layering + +| Layer | Modules | +|------------------|-------------------------------------------------------------------------| +| API | `api/app.py`, `api/routes_*.py`, `api/auth_middleware.py`, `api/config.py` | +| Orchestration | `chatbot/response_generator.py`, `chatbot/conversation_manager.py`, `chatbot/intent_recognition.py` | +| Retrieval | `ai/rag_pipeline.py`, `ai/vector_stores/{chroma_store,faiss_store,pinecone_store}.py`, `ai/embeddings/*.py` | +| LLM backends | `ai/models/{openai_model,anthropic_model,ollama_model,local_llama_model}.py`, `ai/model_manager.py` | +| Data access | `db/query_engine.py`, `db/connectors/*.py`, `db/{user,file,conversation,audit_log}_repository.py` | +| Ingestion | `ingestion/etl_manager.py`, `ingestion/cleaners.py`, `ingestion/metadata_generator.py` | +| Multi-tenancy | `tenants/tenant_manager.py`, `tenants/tenant_storage.py` | +| Frontend | `frontend/src/**` (React 19, Vite, Tailwind, React Router, Axios) | + +## Design notes + +- **DB-preferred fusion.** The orchestrator treats exact structured lookups as ground truth and RAG as supplemental context. This matters in domains where hallucinating a numeric answer is unacceptable (finance, ops, analytics) — the LLM sees the authoritative row *before* the retrieved prose. +- **Explicit `"No information"` fallback.** When both sources are empty, the system does not call the LLM at all. This is a deliberate guardrail against fluent-but-wrong answers. +- **Hybrid retrieval over pure semantic.** `TenantVectorStore.hybrid_query` unions keyword hits with semantic top-k, deduplicating by document identity. This recovers exact-match queries that embeddings miss (codes, IDs, acronyms). +- **Stateless tenant manager.** `TenantManager` re-reads `tenants.json` on every call. Simpler and avoids a stale-cache class of bug that previously caused newly-created tenants to be invisible to the chat route until a restart. +- **Per-tenant persistence isolation.** Each tenant's Chroma store lives in its own directory; each tenant's structured SQLite lives in its own file. No query joins across tenant boundaries are possible. diff --git a/docs/ci-workflow.yml.template b/docs/ci-workflow.yml.template new file mode 100644 index 0000000..3c9c5c9 --- /dev/null +++ b/docs/ci-workflow.yml.template @@ -0,0 +1,76 @@ +# GitHub Actions workflow template. +# +# This file cannot live at its final path (.github/workflows/test.yml) +# until the pushing PAT has the "workflow" scope. GitHub rejects any push +# that adds or modifies files under .github/workflows/ from a token without +# that scope, regardless of repo permissions. +# +# To activate CI: +# +# 1. gh auth refresh -s workflow # grants workflow scope on your PAT +# 2. mkdir -p .github/workflows +# 3. cp docs/ci-workflow.yml.template .github/workflows/test.yml +# 4. git add .github/workflows/test.yml +# 5. git commit -m "ci: add test workflow" +# 6. git push +# +# After the first successful run, add the badge to the README: +# +# ![test](https://github.com/danrixd/smartbaseai/actions/workflows/test.yml/badge.svg) + +name: test + +on: + push: + branches: [main, upgrade] + pull_request: + branches: [main] + +jobs: + backend: + name: Backend (pytest) + runs-on: ubuntu-latest + env: + SECRET_KEY: ci-placeholder-secret-min-32-bytes-for-hmac + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + cache-dependency-path: requirements.txt + + - name: Install backend deps + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run pytest + run: pytest -q + + frontend: + name: Frontend (vite build) + runs-on: ubuntu-latest + defaults: + run: + working-directory: frontend + steps: + - uses: actions/checkout@v4 + + - name: Set up Node 20 + uses: actions/setup-node@v4 + with: + node-version: "20" + cache: npm + cache-dependency-path: frontend/package-lock.json + + - name: Install frontend deps + run: npm ci + + - name: Build + run: npm run build + + - name: Lint (non-blocking) + run: npm run lint || true diff --git a/docs/demo-script.md b/docs/demo-script.md new file mode 100644 index 0000000..055ea59 --- /dev/null +++ b/docs/demo-script.md @@ -0,0 +1,58 @@ +# Demo script — SmartBaseAI walkthrough + +A 90-second walkthrough anyone can record with ScreenToGif / Peek / OBS +to produce a GIF or MP4 for the README. Each step is narrated with the +vault / query / expected output so the recorder can line them up. + +**Recording window:** 1440 × 900. Open a Chrome window at that size to keep +the capture tight. Use incognito so the sidebar doesn't cache state. + +## Step 0 — log in +- Visit +- Username: `admin` Password: `ChangeThis123!` +- Should land on the Chat page with the "Pick a vault" gate visible + +## Step 1 — pick a vault (shows the multi-tenant story) +- Click the "Active vault" dropdown in the top right +- Select `financebench` +- The gate disappears, chat UI renders + +## Step 2 — the killer query (DB + RAG fusion) +- Click the RAG Visualizer sidebar link +- Paste: *"What was AAPL closing price on 2024-06-14?"* +- Click **Run trace** +- DB exact lookup card goes green with the AAPL row (close=212.49, volume=70.1M) +- Hybrid retrieval card shows top-3 semantic matches with distance bars +- Fusion block shows the DB row + "Additional context:" with retrieved chunks +- LLM card shows the Anthropic reply grounded in both + +## Step 3 — fundamentals query (pure RAG) +- Paste: *"What was Microsoft's net income in FY2023?"* +- Scroll to the "Vector store search" panel +- Call out: 197,201 vectors scanned, MiniLM-L6-v2 on CUDA, score bars + +## Step 4 — save the trace (zero-token replay) +- Click **💾 Save trace**, name it "MSFT net income FY2023 (Opus)" +- Expand "Saved traces" — the row appears +- Click **⬇ .md** — downloads a formatted markdown report + +## Step 5 — vault editor with search +- Click Vault in sidebar +- Search `AAPL` — tree collapses to just the Apple folder +- Click `profile.md`, show the edit / split / preview toggle + +## Step 6 — cross-tenant search (super_admin only) +- Click Cross-tenant Search +- Query: `quokka` — hits in the `organization` vault + +## Step 7 — usage dashboard +- Click Usage — per-day × per-tenant rollup with estimated cost + +## Short version (30 seconds) +Steps 0 → 2 only. The headline story: multi-tenant knowledge vault with +live LLM providers, orchestrator hits SQLite for the exact row and RAG +for context, pipeline visualized live. + +## Suggested output +- `demo.gif` at repo root, referenced from README +- 15–20 seconds, 8–10 fps, ≤5 MB diff --git a/docs/financebench_eval.md b/docs/financebench_eval.md new file mode 100644 index 0000000..b3e692a --- /dev/null +++ b/docs/financebench_eval.md @@ -0,0 +1,137 @@ +# FinanceBench evaluation results + +Ran **20** questions against the `financebench` tenant via `http://127.0.0.1:8799` using **anthropic/claude-opus-4-6**. + +## Headline accuracy + +| Metric | Passed | Rate | +|---|---:|---:| +| Numeric match (tolerance 2%) | 9 / 20 | 45.0% | +| Substring match | 0 / 20 | 0.0% | +| **Any auto-score** | **9 / 20** | **45.0%** | + +## Latency + +- mean: **42959 ms**, median: **41719 ms**, max: **66531 ms** + +## Per question type + +| Question type | Total | Passed | Rate | +|---|---:|---:|---:| +| `domain-relevant` | 10 | 5 | 50.0% | +| `novel-generated` | 10 | 4 | 40.0% | + +## Sample failures + +### `financebench_id_00206` — JPMorgan (JPM) +**Q**: Are JPM's gross margins historically consistent (not fluctuating more than roughly 2% each year)? If gross margins are not a relevant metric for a company like this, then please state that and explain why. + +**Canonical**: Since JPM is a financial institution, gross margin is not a relevant metric. + +**Reply**: ## Gross Margins Are Not a Relevant Metric for JPMorgan Chase (JPM) + +The context provided does not contain any information about JPMorgan Chase (JPM). However, I can explain why gross margin is not a meaningful or relevant metric for a company like JPM: + +**JPMorgan Chase is a major financial institution (bank/financial services company), and gross margin is not a standard or useful metric for evaluating banks.** Here's why: + +1. **Banks don't have "cost of goods sold."** Gross margin is calculated as (Revenue - Cost of Goods Sold) / Revenue. Banks don't manufacture or sell physical products, so… + +### `financebench_id_00216` — Verizon (VZ) +**Q**: Does Verizon have a reasonably healthy liquidity profile based on its quick ratio for FY 2022? If the quick ratio is not relevant to measure liquidity, please state that and explain why. + +**Canonical**: No. The quick ratio was approximately 0.54 for Verizon. It indicated that Verizon does not have a healthy liquidity profile. + +**Reply**: Based on the context provided, I don't have any information about Verizon or its financial data for FY 2022. The context contains liquidity risk management information related to Citizens Financial Group, JPMorgan Chase (referenced as "the Firm"), and Allstate, but there is no data or discussion pertaining to Verizon's quick ratio or any other Verizon financial metrics. + +Therefore, I'm unable to assess whether Verizon has a reasonably healthy liquidity profile based on its quick ratio for FY 2022, nor can I provide any relevant commentary on the applicability of the quick ratio to Verizon's bu… + +### `financebench_id_00222` — AMD (AMD) +**Q**: Does AMD have a reasonably healthy liquidity profile based on its quick ratio for FY22? If the quick ratio is not relevant to measure liquidity, please state that and explain why. + +**Canonical**: Yes. The quick ratio is 1.57, calculated as (cash and cash equivalents+Short term investments+Accounts receivable, net+receivables from related parties)/ (current liabilities). + +**Reply**: Based on the context provided, I don't have any financial data related to **AMD** (Advanced Micro Devices) for FY22 or any other period. The context contains liquidity and financial information related to **Pfizer**, **JPMorgan Chase** (or a similar large bank), and **Citizens Financial Group**, but nothing pertaining to AMD. + +Therefore, I'm unable to assess AMD's quick ratio or its liquidity profile for FY22 based on the information available to me. To properly answer this question, I would need AMD's FY22 balance sheet data, including current assets (cash, marketable securities, accounts rec… + +### `financebench_id_00283` — Pfizer (PFE) +**Q**: How much does Pfizer expect to pay to spin off Upjohn in the future in USD million? + +**Canonical**: 77.78 + +**Reply**: Based on the context provided, I don't have specific information about how much Pfizer expects to pay to spin off Upjohn. The context includes financial data about Pfizer's operating segments, revenue figures, and restructuring/acquisition-related costs, but it does not contain details about the expected costs specifically associated with the Upjohn spin-off transaction. + +While there are references to the "Transforming to a More Focused Company" program (which was related to the Upjohn separation and combination with Mylan), the context does not break out a specific expected total cost figure … + +### `financebench_id_00299` — JPMorgan (JPM) +**Q**: Which of JPM's business segments had the lowest net revenue in 2021 Q1? + +**Canonical**: Corporate. Its net revenue was -$473 million. + +**Reply**: I don't have that information. The context provided includes annual (full-year) data for JPMorgan Chase's segments, but it does not contain a quarterly breakdown of net revenue by business segment for Q1 2021. To answer your question accurately, I would need JPMorgan Chase's Q1 2021 earnings data broken down by segment. + +### `financebench_id_00302` — Pfizer (PFE) +**Q**: Did Pfizer grow its PPNE between FY20 and FY21? + +**Canonical**: Yes, change in PPNE was positive year over year + +**Reply**: Based on the context provided, I don't have the specific Property, Plant, and Equipment (PP&E) figures for Pfizer's FY20 and FY21 to determine whether PP&E grew between those years. The context references pages from Pfizer's 2020 and 2021 Form 10-K filings but does not include the actual numerical data for PP&E. I would need the balance sheet or notes with those specific figures to answer your question accurately. + +### `financebench_id_00394` — JPMorgan (JPM) +**Q**: In 2022 Q2, which of JPM's business segments had the highest net income? + +**Canonical**: Corporate & Investment Bank. Its net income was $3725 million. + +**Reply**: I don't have that information. The context provided contains JPMorgan Chase's full-year 2022 data and some segment-level metrics, but it does not include a quarterly (Q2 2022) breakdown of net income by business segment. To answer your question accurately, I would need JPMorgan Chase's Q2 2022 earnings release or 10-Q filing, which is not included in the context provided. + +### `financebench_id_00407` — MGM Resorts (MGM) +**Q**: Which type of debt received the largest investment among the short term investments for MGM in H1 FY2023? + +**Canonical**: the biggest short term investment is in corporate bonds (almost 82% of the total investment) + +**Reply**: I don't have information about MGM's short-term investments for H1 FY2023 in the provided context. The context contains MGM's financial data primarily from fiscal years 2015-2020, including cash flow statements and long-term debt details, but does not include any data about short-term investment breakdowns for H1 FY2023. + +### `financebench_id_00464` — Boeing (BA) +**Q**: Is Boeing's business subject to cyclicality? + +**Canonical**: Yes, Boeing's business is subject to cyclicality due to its exposure to the airline industry which is a cyclical industry. + +**Reply**: Yes, Boeing's business is clearly subject to cyclicality, as evidenced by the financial data in the context: + +**Revenue Cyclicality:** +- **2015:** $96,114 million +- **2016:** $94,571 million +- **2017:** $93,392 million +- **2018:** $101,127 million (significant upturn) +- **2019:** $76,559 million (sharp decline) +- **2020:** $58,158 million (further steep decline) + +**Earnings Cyclicality:** +- **2015:** Net earnings of $5,176 million +- **2016:** Net earnings of $4,895 million +- **2017:** Net earnings of $8,197 million +- **2018:** Net earnings of $10,460 million (peak) +- **2019:** Net loss of ($63… + +### `financebench_id_00476` — American Express (AXP) +**Q**: Which debt securities are registered to trade on a national securities exchange under American Express' name as of 2022? + +**Canonical**: There are none + +**Reply**: Based on the context provided, I don't have specific information about which American Express debt securities are registered to trade on a national securities exchange. The context describes the types of debt issued by American Express in 2022 — including Fixed Rate Senior Notes, Floating Rate Senior Notes, Fixed-to-Floating Rate Senior Notes, Fixed-to-Floating Rate Subordinated Notes, and American Express Credit Account Master Trust Fixed Rate Class A Certificates — but it does not specify which of these (if any) are registered to trade on a national securities exchange. + +To find this informa… + +### `financebench_id_00499` — 3M (MMM) +**Q**: Is 3M a capital-intensive business based on FY2022 data? + +**Canonical**: No, the company is managing its CAPEX and Fixed Assets pretty efficiently, which is evident from below key metrics: +CAPEX/Revenue Ratio: 5.1% +Fixed assets/Total Assets: 20% +Return on Assets= 12.4% + +**Reply**: I don't have sufficient FY2022 data in the provided context to make a definitive assessment of whether 3M is a capital-intensive business. The context primarily contains information from FY2019 and FY2020, including some capital spending and net property, plant and equipment figures, but does not include FY2022 financial data such as revenue, capital expenditures, net PP&E, or depreciation for that year. + +To properly assess capital intensity for FY2022, I would need metrics such as: +- **FY2022 Capital Expenditures relative to Revenue** (capex-to-revenue ratio) +- **FY2022 Net PP&E relative to T… diff --git a/docs/gh-repo-edit.sh b/docs/gh-repo-edit.sh new file mode 100644 index 0000000..4ba9078 --- /dev/null +++ b/docs/gh-repo-edit.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Draft — DO NOT run blindly. Review then execute from the repo root. +# +# These commands set the public repo description and topics so SmartBaseAI +# shows up in searches and on the GitHub profile card with a crisp pitch. + +set -euo pipefail + +gh repo edit danrixd/smartbaseai \ + --description "Multi-tenant LLM platform that grounds answers in both your structured databases and your unstructured documents. Hybrid RAG + exact DB lookups + pluggable model backends." \ + --homepage "https://danringart.com" \ + --add-topic llm \ + --add-topic rag \ + --add-topic hybrid-search \ + --add-topic multi-tenant \ + --add-topic fastapi \ + --add-topic python \ + --add-topic react \ + --add-topic chromadb \ + --add-topic sentence-transformers \ + --add-topic ollama diff --git a/docs/pr_body.md b/docs/pr_body.md new file mode 100644 index 0000000..7ddb38e --- /dev/null +++ b/docs/pr_body.md @@ -0,0 +1,97 @@ +Long-running `upgrade` branch — 19 commits that take SmartBaseAI from a half-documented starter kit with mocked model wrappers into a working multi-tenant knowledge platform with real LLM providers, interactive retrieval visualization, a 506-company fundamental-analysis demo, and a full operational surface (audit log, usage tracking, cross-tenant search, session timeout handling, rate limiting, structured logging). + +## Headline highlights + +- **6 seeded knowledge vaults** (`personal`, `company` / Acme Analytics, `organization` / Project Lunar Harbor, `relativity`, `saas-ai`, `smartbase-docs`) plus one large-scale vault `financebench` with **506 S&P 500 companies**, **1,241 markdown files**, **197,201 Chroma vectors**, **1.23 M daily bar rows**, and **150 ground-truth Q&A** pulled from the FinanceBench open-source benchmark. + +- **Real LLM providers** — `AnthropicModel` / `OpenAIModel` / `OllamaModel` now call the live SDKs (was mocked). Anthropic defaults to `claude-opus-4-6` with `cache_control: ephemeral` on the system prompt per the `claude-api` skill guidance, logs `cache_read_input_tokens` per response for verification. Tenants can declare multiple providers; users pick at chat time via a dropdown, with per-request override threaded through `/chat/message` and `/chat/trace`. + +- **RAG Visualizer** (new page) — live view of a single query flowing through the three-source orchestrator. Shows the vector-store info strip (tenant, collection, **vector count**, embedding model, device), full semantic candidate ranking with **L2 distance bars**, keyword hits, fusion block, assembled prompt, and LLM reply. Persist traces with 💾 Save trace, reload later without re-running the LLM (**zero-token replay** for demos). Export to JSON / Markdown. + +- **Interactive Vault editor** — 1,744-file tree browser with live search (`components/FileTree.jsx`), collapsible per-ticker folders, inline match highlighting, markdown edit/split/preview toggle, bulk upload, automatic section-aware re-chunk + re-embed on save. + +- **Generalised `exact_lookup`** — the DB-preferred fusion fast path now dispatches by tenant and table. Financebench queries with a ticker hint (sniffed from the message) hit `daily_bars(ticker, date, open, high, low, close, volume)` and return the exact row. Verified live: *"AAPL closing price on 2024-06-14"* → `close=212.49, volume=70.1M` directly from SQLite, RAG retrieval runs as supplemental context only. + +- **FinanceBench eval harness** (`scripts/eval_financebench.py`) — runs the 150 ground-truth questions through `/chat/message`, scores with three strategies (numeric 2% tolerance, substring, optional Claude-as-judge), writes `docs/financebench_eval.md` with headline accuracy, per-question-type breakdown, latency stats, and sample failures. + +- **Production surface** — structured logging with request-id middleware, per-tenant daily token cap with 429 on overage, usage rollup (`/admin/usage`), audit log viewer (`/admin/audit-log`), cross-tenant search (`/admin/search`), session-timeout redirect, error boundary, strict `.env` loader that refuses to leak shell env vars. + +## Commits (newest first) + +``` +911f140 fix(eval): ASCII-only progress output to avoid Windows cp1252 crash +e91e3c7 feat: T2/T3/T4 sweep — 16 of the 17 outstanding gap-analysis items +d5c312d fix(privacy): don't leak absolute filesystem paths into vault content +94738da feat: persist RAG traces so they can be replayed without spending tokens +b7f04bc feat(ui): interactive file tree with search + collapsible folders for Vault +c026ab0 fix(files): recursive vault listing + nested-path routes + financebench mapping +b0730c1 fix(loader): standalone ingest script to sidestep native-state segfault +fd04398 fix(loader): resilient SP500 fetch + bounded chunker + CPU ingest fallback +83835a1 feat(demo): load_financebench.py — SEC 10-Ks + S&P 500 bars/profiles loader +8007c34 docs+test: recruiter README, CHANGELOG, architecture doc, smoke scripts, test adapts +5a8a702 feat(frontend): RAG visualizer, Vault editor, Settings page, live API status, vault-selection gate +6a5db9e feat(demo): six seeded knowledge vaults +1b0e600 feat(backend): real LLM SDKs, multi-model per tenant, settings store, vault CRUD +7b5eddd chore(repo): gitignore runtime state, untrack vector_store + sqlite dbs +``` + +## Test state + +- **pytest: 24 / 24** green. Deprecation warning count dropped from 83 to 12 after the `datetime.utcnow()` cleanup. +- **ui_smoke.py**: 35 / 39 PASS + 4 NEEDS_KEY (providers without keys), 0 FAIL. +- **Frontend vite build**: 270 modules, clean. + +## CI (pending — needs a token with `workflow` scope) + +A `.github/workflows/test.yml` is ready locally (backend pytest + frontend vite build, both on Python 3.12 / Node 20 with pip/npm caching). GitHub rejected the push because the current PAT lacks `workflow` scope. Run `gh auth refresh -s workflow`, then the file can be committed to complete CI setup. + +## Verified live before opening this PR + +- AAPL 2024-06-14 exact_lookup → `close=212.49, volume=70,122,700` straight from `daily_bars` +- Audit log endpoint: 58 historic events, filterable +- Cross-tenant search "quokka" → 29 hits across 7 tenants +- Vault listing on `financebench`: 1,744 files visible recursively +- Privacy scrub: 0 absolute paths in 1,241 markdown files and 0 in the top-5 semantic hits against "Program_Research smartbaseai full-submission.txt" +- Saved-trace round-trip: run → save → list → fetch → delete, all 200 +- Three-source orchestrator verified across all 7 tenants + +## What is still outstanding + +- Demo GIF + screenshots (`docs/screenshots/` empty) +- CI file push (PAT scope) +- Live deploy target (Fly.io / Railway) +- FinanceBench eval run to completion — harness running now with Ollama for 50 questions, `docs/financebench_eval.md` will be generated separately + +## How to run locally + +```bash +# Backend +pip install -r requirements.txt +cp .env.example .env +# Optional: paste ANTHROPIC_API_KEY / OPENAI_API_KEY into .env +python scripts/run_server.py --reload # -> http://localhost:8000 + +# Frontend +cd frontend && npm install && npm run dev # -> http://localhost:5173 + +# Seed the 6 hand-curated vaults +python scripts/seed_demo.py + +# (optional) load the 506-company FinanceBench vault (~2h first run) +CUDA_VISIBLE_DEVICES= python scripts/load_financebench.py --scale large \ + --sec-user-agent "Your Name " +CUDA_VISIBLE_DEVICES= python scripts/ingest_financebench.py +``` + +## Default logins + +- **super_admin**: `admin / ChangeThis123!` (cross-tenant) +- **personal**: `alice / Alice123!` +- **company** (Acme): `demo / Demo123!` +- **organization** (Lunar Harbor): `orion / Orion123!` +- **relativity**: `einat / Einat123!` +- **saas-ai**: `nadav / Nadav123!` +- **smartbase-docs**: `docs / Docs1234!` +- **financebench**: `fbuser / FbUser123!` + +🤖 Generated with [Claude Code](https://claude.com/claude-code) diff --git a/docs/screenshots/README.md b/docs/screenshots/README.md new file mode 100644 index 0000000..328781f --- /dev/null +++ b/docs/screenshots/README.md @@ -0,0 +1,11 @@ +# Screenshots + +Drop PNGs here and reference them from the root `README.md`. Suggested set: + +- `01-login.png` — login screen +- `02-chat.png` — chat with a RAG-grounded answer +- `03-chat-db-lookup.png` — chat with an exact DB-lookup answer showing the structured row in context +- `04-files.png` — file upload view +- `05-admin-tenants.png` — super-admin tenant management + +Record a short screen capture of the same flow and save it as `demo.gif` at the repo root. diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 7c8b98a..295f45c 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -11,6 +11,7 @@ "axios": "^1.11.0", "react": "^19.1.0", "react-dom": "^19.1.0", + "react-markdown": "^10.1.0", "react-router-dom": "^7.7.1" }, "devDependencies": { @@ -1437,13 +1438,39 @@ "@babel/types": "^7.28.2" } }, + "node_modules/@types/debug": { + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz", + "integrity": "sha512-KSVgmQmzMwPlmtljOomayoR89W4FynCAi3E8PPs7vmDVPe84hT+vGPKkJfThkmXs0x0jAaa9U8uW8bbfyS2fWw==", + "license": "MIT", + "dependencies": { + "@types/ms": "*" + } + }, "node_modules/@types/estree": { "version": "1.0.8", "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "dev": true, "license": "MIT" }, + "node_modules/@types/estree-jsx": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz", + "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==", + "license": "MIT", + "dependencies": { + "@types/estree": "*" + } + }, + "node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -1451,11 +1478,25 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/mdast": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", + "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/@types/ms": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz", + "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==", + "license": "MIT" + }, "node_modules/@types/react": { "version": "19.1.9", "resolved": "https://registry.npmjs.org/@types/react/-/react-19.1.9.tgz", "integrity": "sha512-WmdoynAX8Stew/36uTSVMcLJJ1KRh6L3IZRx1PZ7qJtBqT3dYTgyDTx8H1qoRghErydW7xw9mSJ3wS//tCRpFA==", - "dev": true, "license": "MIT", "dependencies": { "csstype": "^3.0.2" @@ -1471,6 +1512,18 @@ "@types/react": "^19.0.0" } }, + "node_modules/@types/unist": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", + "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==", + "license": "MIT" + }, + "node_modules/@ungap/structured-clone": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz", + "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==", + "license": "ISC" + }, "node_modules/@vitejs/plugin-react": { "version": "4.7.0", "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz", @@ -1664,6 +1717,16 @@ "proxy-from-env": "^1.1.0" } }, + "node_modules/bail": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", + "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", @@ -1795,6 +1858,16 @@ ], "license": "CC-BY-4.0" }, + "node_modules/ccount": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", + "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -1812,6 +1885,46 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/character-entities": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz", + "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-html4": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", + "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-legacy": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", + "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-reference-invalid": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", + "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/chokidar": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", @@ -1882,6 +1995,16 @@ "node": ">= 0.8" } }, + "node_modules/comma-separated-tokens": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz", + "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/commander": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", @@ -1947,14 +2070,12 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==", - "dev": true, "license": "MIT" }, "node_modules/debug": { "version": "4.4.1", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz", "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==", - "dev": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -1968,6 +2089,19 @@ } } }, + "node_modules/decode-named-character-reference": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.3.0.tgz", + "integrity": "sha512-GtpQYB283KrPp6nRw50q3U9/VfOutZOe103qlN7BPP6Ad27xYnOIWv4lPzo8HCAL+mMZofJ9KEy30fq6MfaK6Q==", + "license": "MIT", + "dependencies": { + "character-entities": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -1984,6 +2118,28 @@ "node": ">=0.4.0" } }, + "node_modules/dequal": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", + "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/devlop": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", + "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", + "license": "MIT", + "dependencies": { + "dequal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/didyoumean": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", @@ -2311,6 +2467,16 @@ "node": ">=4.0" } }, + "node_modules/estree-util-is-identifier-name": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz", + "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/esutils": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", @@ -2321,6 +2487,12 @@ "node": ">=0.10.0" } }, + "node_modules/extend": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==", + "license": "MIT" + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -2733,6 +2905,56 @@ "node": ">= 0.4" } }, + "node_modules/hast-util-to-jsx-runtime": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz", + "integrity": "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==", + "license": "MIT", + "dependencies": { + "@types/estree": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "devlop": "^1.0.0", + "estree-util-is-identifier-name": "^3.0.0", + "hast-util-whitespace": "^3.0.0", + "mdast-util-mdx-expression": "^2.0.0", + "mdast-util-mdx-jsx": "^3.0.0", + "mdast-util-mdxjs-esm": "^2.0.0", + "property-information": "^7.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-js": "^1.0.0", + "unist-util-position": "^5.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-whitespace": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", + "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/html-url-attributes": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.1.tgz", + "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -2770,6 +2992,36 @@ "node": ">=0.8.19" } }, + "node_modules/inline-style-parser": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz", + "integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==", + "license": "MIT" + }, + "node_modules/is-alphabetical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", + "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-alphanumerical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz", + "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==", + "license": "MIT", + "dependencies": { + "is-alphabetical": "^2.0.0", + "is-decimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-binary-path": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz", @@ -2799,6 +3051,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-decimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", + "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -2832,6 +3094,16 @@ "node": ">=0.10.0" } }, + "node_modules/is-hexadecimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", + "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-number": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", @@ -2842,6 +3114,18 @@ "node": ">=0.12.0" } }, + "node_modules/is-plain-obj": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz", + "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==", + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -3009,6 +3293,16 @@ "dev": true, "license": "MIT" }, + "node_modules/longest-streak": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", + "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/lru-cache": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", @@ -3028,92 +3322,686 @@ "node": ">= 0.4" } }, - "node_modules/merge2": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", - "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", - "dev": true, + "node_modules/mdast-util-from-markdown": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.3.tgz", + "integrity": "sha512-W4mAWTvSlKvf8L6J+VN9yLSqQ9AOAAvHuoDAmPkz4dHf553m5gVj2ejadHJhoJmcmxEnOv6Pa8XJhpxE93kb8Q==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark": "^4.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-expression": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz", + "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==", "license": "MIT", - "engines": { - "node": ">= 8" + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/micromatch": { - "version": "4.0.8", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", - "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", - "dev": true, + "node_modules/mdast-util-mdx-jsx": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz", + "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==", + "license": "MIT", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "parse-entities": "^4.0.0", + "stringify-entities": "^4.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdxjs-esm": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz", + "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==", "license": "MIT", "dependencies": { - "braces": "^3.0.3", - "picomatch": "^2.3.1" + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" }, - "engines": { - "node": ">=8.6" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/micromatch/node_modules/picomatch": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", - "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", - "dev": true, + "node_modules/mdast-util-phrasing": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", + "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==", "license": "MIT", - "engines": { - "node": ">=8.6" + "dependencies": { + "@types/mdast": "^4.0.0", + "unist-util-is": "^6.0.0" }, "funding": { - "url": "https://github.com/sponsors/jonschlinkert" + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "node_modules/mdast-util-to-hast": { + "version": "13.2.1", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.1.tgz", + "integrity": "sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==", "license": "MIT", - "engines": { - "node": ">= 0.6" + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@ungap/structured-clone": "^1.0.0", + "devlop": "^1.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "trim-lines": "^3.0.0", + "unist-util-position": "^5.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "node_modules/mdast-util-to-markdown": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz", + "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==", "license": "MIT", "dependencies": { - "mime-db": "1.52.0" + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "longest-streak": "^3.0.0", + "mdast-util-phrasing": "^4.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "unist-util-visit": "^5.0.0", + "zwitch": "^2.0.0" }, - "engines": { - "node": ">= 0.6" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, - "license": "ISC", + "node_modules/mdast-util-to-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz", + "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==", + "license": "MIT", "dependencies": { - "brace-expansion": "^1.1.7" + "@types/mdast": "^4.0.0" }, - "engines": { - "node": "*" + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, - "node_modules/minipass": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", - "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", "dev": true, - "license": "ISC", + "license": "MIT", "engines": { - "node": ">=16 || 14 >=14.17" + "node": ">= 8" } }, - "node_modules/ms": { + "node_modules/micromark": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz", + "integrity": "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "@types/debug": "^4.0.0", + "debug": "^4.0.0", + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-core-commonmark": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.3.tgz", + "integrity": "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "devlop": "^1.0.0", + "micromark-factory-destination": "^2.0.0", + "micromark-factory-label": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-factory-title": "^2.0.0", + "micromark-factory-whitespace": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-html-tag-name": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-destination": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", + "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-label": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz", + "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-space": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz", + "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-title": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz", + "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-factory-whitespace": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz", + "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-character": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz", + "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-chunked": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz", + "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-classify-character": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz", + "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-combine-extensions": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz", + "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-chunked": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-decode-numeric-character-reference": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz", + "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-decode-string": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz", + "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "decode-named-character-reference": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-encode": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz", + "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-html-tag-name": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz", + "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-normalize-identifier": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz", + "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-resolve-all": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz", + "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-sanitize-uri": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz", + "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-symbol": "^2.0.0" + } + }, + "node_modules/micromark-util-subtokenize": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.1.0.tgz", + "integrity": "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-util-symbol": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz", + "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromark-util-types": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.2.tgz", + "integrity": "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==", + "funding": [ + { + "type": "GitHub Sponsors", + "url": "https://github.com/sponsors/unifiedjs" + }, + { + "type": "OpenCollective", + "url": "https://opencollective.com/unified" + } + ], + "license": "MIT" + }, + "node_modules/micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dev": true, + "license": "MIT", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/micromatch/node_modules/picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "dev": true, + "license": "ISC", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, "license": "MIT" }, "node_modules/mz": { @@ -3271,6 +4159,31 @@ "node": ">=6" } }, + "node_modules/parse-entities": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz", + "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^2.0.0", + "character-entities-legacy": "^3.0.0", + "character-reference-invalid": "^2.0.0", + "decode-named-character-reference": "^1.0.0", + "is-alphanumerical": "^2.0.0", + "is-decimal": "^2.0.0", + "is-hexadecimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/parse-entities/node_modules/@types/unist": { + "version": "2.0.11", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz", + "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==", + "license": "MIT" + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -3522,6 +4435,16 @@ "node": ">= 0.8.0" } }, + "node_modules/property-information": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz", + "integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -3580,6 +4503,33 @@ "react": "^19.1.1" } }, + "node_modules/react-markdown": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-10.1.0.tgz", + "integrity": "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "hast-util-to-jsx-runtime": "^2.0.0", + "html-url-attributes": "^3.0.0", + "mdast-util-to-hast": "^13.0.0", + "remark-parse": "^11.0.0", + "remark-rehype": "^11.0.0", + "unified": "^11.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + }, + "peerDependencies": { + "@types/react": ">=18", + "react": ">=18" + } + }, "node_modules/react-refresh": { "version": "0.17.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz", @@ -3664,6 +4614,39 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/remark-parse": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", + "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-rehype": { + "version": "11.1.2", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.2.tgz", + "integrity": "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "mdast-util-to-hast": "^13.0.0", + "unified": "^11.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/resolve": { "version": "1.22.10", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.10.tgz", @@ -3838,6 +4821,16 @@ "node": ">=0.10.0" } }, + "node_modules/space-separated-tokens": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz", + "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -3902,6 +4895,20 @@ "node": ">=8" } }, + "node_modules/stringify-entities": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", + "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", + "license": "MIT", + "dependencies": { + "character-entities-html4": "^2.0.0", + "character-entities-legacy": "^3.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/strip-ansi": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", @@ -3955,6 +4962,24 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/style-to-js": { + "version": "1.1.21", + "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz", + "integrity": "sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ==", + "license": "MIT", + "dependencies": { + "style-to-object": "1.0.14" + } + }, + "node_modules/style-to-object": { + "version": "1.0.14", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.14.tgz", + "integrity": "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==", + "license": "MIT", + "dependencies": { + "inline-style-parser": "0.2.7" + } + }, "node_modules/sucrase": { "version": "3.35.0", "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", @@ -4095,6 +5120,26 @@ "node": ">=8.0" } }, + "node_modules/trim-lines": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", + "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/trough": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", + "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/ts-interface-checker": { "version": "0.1.13", "resolved": "https://registry.npmjs.org/ts-interface-checker/-/ts-interface-checker-0.1.13.tgz", @@ -4115,6 +5160,93 @@ "node": ">= 0.8.0" } }, + "node_modules/unified": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", + "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "bail": "^2.0.0", + "devlop": "^1.0.0", + "extend": "^3.0.0", + "is-plain-obj": "^4.0.0", + "trough": "^2.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-is": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz", + "integrity": "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-position": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", + "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-stringify-position": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", + "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.1.0.tgz", + "integrity": "sha512-m+vIdyeCOpdr/QeQCu2EzxX/ohgS8KbnPDgFni4dQsfSCtpz8UqDyY5GjRru8PDKuYn7Fq19j1CQ+nJSsGKOzg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-visit-parents": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.2.tgz", + "integrity": "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/update-browserslist-db": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz", @@ -4163,6 +5295,34 @@ "dev": true, "license": "MIT" }, + "node_modules/vfile": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", + "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-message": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz", + "integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/vite": { "version": "7.0.6", "resolved": "https://registry.npmjs.org/vite/-/vite-7.0.6.tgz", @@ -4391,6 +5551,16 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zwitch": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", + "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } } } } diff --git a/frontend/package.json b/frontend/package.json index 9b2e786..4596861 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -13,6 +13,7 @@ "axios": "^1.11.0", "react": "^19.1.0", "react-dom": "^19.1.0", + "react-markdown": "^10.1.0", "react-router-dom": "^7.7.1" }, "devDependencies": { diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index c71272b..5f24370 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -1,6 +1,11 @@ import AppRouter from './router'; +import ErrorBoundary from './components/ErrorBoundary'; import './index.css'; export default function App() { - return ; + return ( + + + + ); } diff --git a/frontend/src/api/api.js b/frontend/src/api/api.js index 8be98c1..86d3d7c 100644 --- a/frontend/src/api/api.js +++ b/frontend/src/api/api.js @@ -22,4 +22,34 @@ api.interceptors.request.use((config) => { return config; }); +// Session-timeout handler: when the backend rejects a request with 401 or +// any "Token expired" / "Invalid token" error, clear the local session state +// and bounce the user back to the login page. Without this, expired tokens +// silently produce 401s all over the app with no recovery path. +api.interceptors.response.use( + (res) => res, + (err) => { + const status = err.response?.status; + const detail = err.response?.data?.detail || ''; + const expired = + status === 401 || + /token expired|invalid token/i.test(detail); + if (expired && typeof window !== 'undefined') { + const path = window.location.pathname; + // Don't bounce if we're already on the login page (prevents infinite loop) + if (path !== '/login') { + localStorage.removeItem('access_token'); + localStorage.removeItem('role'); + localStorage.removeItem('tenant_id'); + localStorage.removeItem('active_tenant'); + localStorage.removeItem('username'); + // Preserve where the user was so we can bounce back after re-login + sessionStorage.setItem('post_login_redirect', path + window.location.search); + window.location.assign('/login'); + } + } + return Promise.reject(err); + }, +); + export default api; diff --git a/frontend/src/components/ApiStatus.jsx b/frontend/src/components/ApiStatus.jsx new file mode 100644 index 0000000..42c0df5 --- /dev/null +++ b/frontend/src/components/ApiStatus.jsx @@ -0,0 +1,73 @@ +import { useEffect, useState } from 'react'; +import api from '../api/api'; + +const PROVIDERS = [ + { key: 'ollama', label: 'Ollama' }, + { key: 'openai', label: 'OpenAI' }, + { key: 'anthropic', label: 'Anthropic' }, +]; + +export default function ApiStatus() { + const [status, setStatus] = useState(null); + const [loading, setLoading] = useState(false); + + const poll = async () => { + setLoading(true); + try { + const res = await api.get('/admin/models/status'); + setStatus(res.data); + } catch { + setStatus(null); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + poll(); + const id = setInterval(poll, 30_000); + return () => clearInterval(id); + }, []); + + return ( +
+
+

API Connections

+ +
+
+ {PROVIDERS.map(({ key, label }) => { + const s = status?.[key]; + const ok = s?.ok === true; + const unknown = !status; + const dot = unknown + ? 'bg-gray-500' + : ok + ? 'bg-green-500' + : 'bg-red-500'; + const detail = s?.detail || (unknown ? 'unknown' : ''); + return ( +
+
+ {label} + + {unknown ? '' : ok ? 'ok' : 'down'} + +
+ ); + })} +
+
+ ); +} diff --git a/frontend/src/components/ErrorBoundary.jsx b/frontend/src/components/ErrorBoundary.jsx new file mode 100644 index 0000000..96b22ad --- /dev/null +++ b/frontend/src/components/ErrorBoundary.jsx @@ -0,0 +1,64 @@ +import { Component } from 'react'; + +/** + * Catches render errors in the child tree and shows a recoverable fallback. + * Without this, a single bad trace response or malformed file could blank + * the page with no way back except a full reload. + */ +export default class ErrorBoundary extends Component { + constructor(props) { + super(props); + this.state = { error: null, info: null }; + } + + static getDerivedStateFromError(error) { + return { error }; + } + + componentDidCatch(error, info) { + // eslint-disable-next-line no-console + console.error('[ErrorBoundary]', error, info); + this.setState({ info }); + } + + reset = () => { + this.setState({ error: null, info: null }); + }; + + render() { + if (this.state.error) { + return ( +
+
+
⚠️
+

+ Something rendered badly +

+

+ The UI caught an error and bailed out of that view. You can try again without + losing your session. +

+
+              {String(this.state.error?.message || this.state.error)}
+            
+
+ + +
+
+
+ ); + } + return this.props.children; + } +} diff --git a/frontend/src/components/FileTree.jsx b/frontend/src/components/FileTree.jsx new file mode 100644 index 0000000..bd9ae37 --- /dev/null +++ b/frontend/src/components/FileTree.jsx @@ -0,0 +1,260 @@ +import { useMemo, useState, useEffect } from 'react'; + +/** + * Interactive file browser for a vault. + * + * Features: + * - Groups files by first path segment (per-ticker folders for financebench, + * a single "(root)" group for flat tenants like personal/company). + * - Live search box that filters files and folders by substring. Matching + * substrings are highlighted inline. When a query is active, all matching + * folders auto-expand; clearing the query restores the user's manual + * expand/collapse state. + * - Collapsed folders by default so big vaults (financebench has 1,744 files + * across 506 folders) don't flood the sidebar. + * - File icon + size suffix for visual scanning. + * - "Expand all" / "Collapse all" shortcuts. + */ + +const FILE_ICONS = { + '.md': '📄', + '.markdown': '📄', + '.txt': '📄', + '.csv': '📊', + '.log': '📜', +}; + +function formatBytes(n) { + if (n < 1024) return `${n} B`; + if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`; + return `${(n / 1024 / 1024).toFixed(1)} MB`; +} + +function highlight(text, query) { + if (!query) return text; + const idx = text.toLowerCase().indexOf(query.toLowerCase()); + if (idx === -1) return text; + return ( + <> + {text.slice(0, idx)} + + {text.slice(idx, idx + query.length)} + + {text.slice(idx + query.length)} + + ); +} + +export default function FileTree({ files, selected, onOpen, loading }) { + const [query, setQuery] = useState(''); + const [userOpen, setUserOpen] = useState({}); // manual open state per folder + const [allMode, setAllMode] = useState(null); // 'all' | 'none' | null + + // Group files into folders + const groups = useMemo(() => { + const g = new Map(); + const root = []; + for (const f of files) { + const parts = f.filename.split('/'); + if (parts.length === 1) { + root.push(f); + } else { + const folder = parts[0]; + if (!g.has(folder)) g.set(folder, []); + g.get(folder).push({ ...f, basename: parts.slice(1).join('/') }); + } + } + return { root, folders: Array.from(g.entries()).sort() }; + }, [files]); + + // Compute filtered view + const q = query.trim().toLowerCase(); + const filtered = useMemo(() => { + if (!q) return groups; + const out = { root: [], folders: [] }; + for (const f of groups.root) { + if (f.filename.toLowerCase().includes(q)) out.root.push(f); + } + for (const [folder, entries] of groups.folders) { + const folderMatches = folder.toLowerCase().includes(q); + const matched = entries.filter((e) => { + if (folderMatches) return true; + return ( + e.basename.toLowerCase().includes(q) || + e.filename.toLowerCase().includes(q) + ); + }); + if (matched.length > 0) out.folders.push([folder, matched]); + } + return out; + }, [groups, q]); + + // When there's a query, all result-folders auto-expand. Without a query, + // defer to userOpen / allMode. Any folder containing the selected file also + // auto-expands so the highlighted row is visible. + const isOpen = (folder) => { + if (q) return true; + if (allMode === 'all') return true; + if (allMode === 'none') return false; + if (userOpen[folder]) return true; + if (selected && selected.startsWith(folder + '/')) return true; + return false; + }; + + const toggle = (folder) => { + setAllMode(null); + setUserOpen((prev) => ({ ...prev, [folder]: !prev[folder] })); + }; + + const totalFiles = groups.root.length + groups.folders.reduce((n, [, e]) => n + e.length, 0); + const shownFiles = filtered.root.length + filtered.folders.reduce((n, [, e]) => n + e.length, 0); + + return ( +
+ {/* Search + actions */} +
+ setQuery(e.target.value)} + placeholder={`Search ${totalFiles} files…`} + className="w-full text-xs px-2 py-1.5 border border-slate-300 rounded focus:outline-none focus:border-indigo-400" + /> +
+ + {q ? ( + <> + {shownFiles} of {totalFiles} match + + ) : ( + <> + {totalFiles} files · {groups.folders.length} folders + + )} + + {!q && ( +
+ + +
+ )} +
+
+ + {/* Tree body */} +
+ {loading &&
loading…
} + + {!loading && totalFiles === 0 && ( +
no files yet — click + Add file
+ )} + + {!loading && shownFiles === 0 && q && ( +
no match for "{query}"
+ )} + + {/* Root (flat) files */} + {filtered.root.length > 0 && ( +
    + {filtered.root.map((f) => { + const icon = FILE_ICONS[f.suffix] || '📄'; + const isSel = selected === f.filename; + return ( +
  • + +
  • + ); + })} +
+ )} + + {/* Folder groups */} +
    + {filtered.folders.map(([folder, entries]) => { + const open = isOpen(folder); + return ( +
  • + + {open && ( +
      + {entries.map((f) => { + const icon = FILE_ICONS[f.suffix] || '📄'; + const isSel = selected === f.filename; + return ( +
    • + +
    • + ); + })} +
    + )} +
  • + ); + })} +
+
+
+ ); +} diff --git a/frontend/src/components/Layout.jsx b/frontend/src/components/Layout.jsx index 733e97a..c9fc544 100644 --- a/frontend/src/components/Layout.jsx +++ b/frontend/src/components/Layout.jsx @@ -2,10 +2,10 @@ import { useState, useEffect } from 'react'; import { Link, useNavigate } from 'react-router-dom'; import api from '../api/api'; import AppContext from '../store/AppContext'; +import ApiStatus from './ApiStatus'; export default function Layout({ children }) { const [sidebarOpen, setSidebarOpen] = useState(true); - const [settingsOpen, setSettingsOpen] = useState(false); const [sessions, setSessions] = useState([]); const [tenants, setTenants] = useState([]); const [activeTenant, setActiveTenant] = useState( @@ -44,7 +44,7 @@ export default function Layout({ children }) { const t = (localStorage.getItem('tenant_id') || '').replace(/\s+/g, ''); setTenants(t ? [t] : []); setActiveTenant(t || ''); - localStorage.setItem('active_tenant', t || ''); + if (t) localStorage.setItem('active_tenant', t); } }, [role]); @@ -55,12 +55,14 @@ export default function Layout({ children }) { .catch(() => setSessions([])); }, []); + // Intentionally NOT auto-selecting a tenant for super_admin — they must + // pick a vault from the top-right dropdown before any per-vault page loads. useEffect(() => { - if (!activeTenant && tenants.length > 0) { + if (role !== 'super_admin' && !activeTenant && tenants.length > 0) { setActiveTenant(tenants[0]); localStorage.setItem('active_tenant', tenants[0]); } - }, [activeTenant, tenants]); + }, [role, activeTenant, tenants]); const updateActiveTenant = (t) => { const id = (t || '').replace(/\s+/g, ''); @@ -132,8 +134,11 @@ export default function Layout({ children }) { Chat - - Files + + Vault + + + RAG Visualizer {(role === 'admin' || role === 'super_admin') && ( <> @@ -145,21 +150,27 @@ export default function Layout({ children }) { )} + {role === 'super_admin' && ( + <> + + Cross-tenant Search + + + Usage + + + Audit Log + + + Settings + + + )}
-

API Connections

-
-
-
- OpenAI API -
-
-
- Local Model -
-
+
@@ -193,62 +204,38 @@ export default function Layout({ children }) {
- + {role === 'super_admin' && tenants.length > 0 && ( +
+ Active vault: + +
+ )} + {role === 'super_admin' && ( + + )}
{/* Content */}
{children}
- - {/* Settings Modal */} - {settingsOpen && ( -
setSettingsOpen(false)} - > -
e.stopPropagation()} - > -
-
-

Settings

- -
- -
- {(role === 'admin' || role === 'super_admin') && ( -
- - -
- )} -
-
-
-
- )} ); diff --git a/frontend/src/components/VaultGate.jsx b/frontend/src/components/VaultGate.jsx new file mode 100644 index 0000000..63c7eff --- /dev/null +++ b/frontend/src/components/VaultGate.jsx @@ -0,0 +1,21 @@ +/** + * Blocking placeholder shown when the current page needs an active vault + * but none has been picked yet. Super-admins start in this state by design — + * they can see every vault but must choose one first, so that retrieval, + * chat, and editing are always explicitly scoped. + */ +export default function VaultGate({ role, title = 'Pick a vault to continue' }) { + return ( +
+
+
🗂️
+

{title}

+

+ {role === 'super_admin' + ? 'You have access to every vault. Use the "Active vault" dropdown at the top of the page to pick one before chatting, running retrieval traces, or editing files.' + : 'No vault is associated with your account. Contact an administrator to be assigned to a tenant.'} +

+
+
+ ); +} diff --git a/frontend/src/pages/AuditLog.jsx b/frontend/src/pages/AuditLog.jsx new file mode 100644 index 0000000..dcaa8cc --- /dev/null +++ b/frontend/src/pages/AuditLog.jsx @@ -0,0 +1,121 @@ +import { useEffect, useState } from 'react'; +import api from '../api/api'; + +export default function AuditLog() { + const [rows, setRows] = useState([]); + const [total, setTotal] = useState(0); + const [filter, setFilter] = useState({ username: '', action: '' }); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + + const load = async () => { + setLoading(true); + setError(''); + try { + const params = { limit: 200, offset: 0 }; + if (filter.username) params.username = filter.username; + if (filter.action) params.action = filter.action; + const res = await api.get('/admin/audit-log', { params }); + setRows(res.data?.logs || []); + setTotal(res.data?.total || 0); + } catch (err) { + setError(err.response?.data?.detail || err.message); + } finally { + setLoading(false); + } + }; + + useEffect(() => { + load(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + return ( +
+
+

Audit log

+

+ Every authenticated action against the backend — login, chat, vault edit, tenant + CRUD, settings change — is recorded in audit_logs. Super-admin only. +

+ +
+ + + + + {total.toLocaleString()} total matches + +
+ + {error && ( +
+ {error} +
+ )} + +
+ + + + + + + + + + + {rows.map((r) => ( + + + + + + + ))} + {rows.length === 0 && !loading && ( + + + + )} + +
TimeUserActionDetails
+ {r.created_at?.slice(0, 19).replace('T', ' ')} + {r.username} + + {r.action} + + + {r.details || '—'} +
+ no events match +
+
+
+
+ ); +} diff --git a/frontend/src/pages/Chat.jsx b/frontend/src/pages/Chat.jsx index 29f6543..659961c 100644 --- a/frontend/src/pages/Chat.jsx +++ b/frontend/src/pages/Chat.jsx @@ -1,13 +1,29 @@ import { useState, useEffect, useCallback, useRef, useContext } from 'react'; import { useLocation } from 'react-router-dom'; -import Layout from '../components/Layout'; import api from '../api/api'; import AppContext from '../store/AppContext'; +import VaultGate from '../components/VaultGate'; export default function Chat() { const location = useLocation(); const { activeTenant, setSessions } = useContext(AppContext); const role = localStorage.getItem('role'); + const [models, setModels] = useState([]); + const [selectedModelIdx, setSelectedModelIdx] = useState(0); + + useEffect(() => { + if (!activeTenant) { + setModels([]); + return; + } + api + .get(`/admin/tenants/${activeTenant}`) + .then((res) => { + setModels(res.data?.models || []); + setSelectedModelIdx(0); + }) + .catch(() => setModels([])); + }, [activeTenant]); const [sessionId, setSessionId] = useState(() => { const params = new URLSearchParams(location.search); return params.get('session') || crypto.randomUUID(); @@ -65,10 +81,14 @@ export default function Chat() { return; } + const chosen = models[selectedModelIdx]; const payload = { session_id: sessionId, tenant_id: tenant, message: input, + ...(chosen + ? { model_provider: chosen.provider, model_name: chosen.name } + : {}), }; console.log('>>> Sending to backend', JSON.stringify(payload)); @@ -101,9 +121,12 @@ export default function Chat() { 'How do I make an HTTP request in JavaScript?', ]; + if (!activeTenant) { + return ; + } + return ( - -
+
{history.length === 0 && ( @@ -159,6 +182,25 @@ export default function Chat() {
+ {models.length > 0 && ( +
+ Model: + + + vault: {activeTenant} + +
+ )}