From 50b6d99c8ecae74b471f562bbb4cc9cafbbd2767 Mon Sep 17 00:00:00 2001 From: level09 Date: Fri, 19 Jun 2026 12:40:45 +0200 Subject: [PATCH] docs: add Transcription, Media Import, Account Security, and Setup Wizard guides --- docs/.vitepress/config.ts | 4 +++ docs/deployment/setup-wizard.md | 38 +++++++++++++++++++++++ docs/guide/account-security.md | 55 +++++++++++++++++++++++++++++++++ docs/guide/media-import.md | 46 +++++++++++++++++++++++++++ docs/guide/transcription.md | 45 +++++++++++++++++++++++++++ 5 files changed, 188 insertions(+) create mode 100644 docs/deployment/setup-wizard.md create mode 100644 docs/guide/account-security.md create mode 100644 docs/guide/media-import.md create mode 100644 docs/guide/transcription.md diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index b18f6b7f7..3b479042b 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -51,12 +51,15 @@ export default withMermaid( { text: "Search", link: "/guide/search" }, { text: "Permissions", link: "/guide/permissions" }, { text: "Access Control", link: "/guide/access-control" }, + { text: "Account Security", link: "/guide/account-security" }, { text: "Revision History", link: "/guide/revision-history" }, { text: "Activity Monitor", link: "/guide/activity" }, { text: "Data Export", link: "/guide/data-export" }, { text: "Video Deduplication", link: "/guide/deduplication" }, { text: "OCR & Text Extraction", link: "/guide/ocr" }, + { text: "Transcription", link: "/guide/transcription" }, { text: "Data Import", link: "/guide/data-import" }, + { text: "Media Import", link: "/guide/media-import" }, { text: "Bulk Operations", link: "/guide/bulk-operations" }, { text: "Media Management", link: "/guide/media" }, { text: "Notifications", link: "/guide/notifications" }, @@ -70,6 +73,7 @@ export default withMermaid( text: "Deployment", items: [ { text: "Installation", link: "/deployment/installation" }, + { text: "Setup Wizard", link: "/deployment/setup-wizard" }, { text: "Configuration", link: "/deployment/configuration" }, { text: "Upgrading", link: "/deployment/upgrading" }, { text: "Database Migrations", link: "/deployment/migrations" }, diff --git a/docs/deployment/setup-wizard.md b/docs/deployment/setup-wizard.md new file mode 100644 index 000000000..2903c3661 --- /dev/null +++ b/docs/deployment/setup-wizard.md @@ -0,0 +1,38 @@ +# Setup Wizard + +The first time you open a fresh Bayanat installation, it redirects to a setup wizard that walks you through initial configuration in the browser. The wizard runs once: after you complete it, Bayanat goes to the normal login and the wizard is no longer shown. + +## Before the Wizard + +The wizard configures the application, but the database must exist first. After installing Bayanat (see [Installation](/deployment/installation)), create the database schema, then open the site in a browser to start the wizard. + +```bash +flask create-db --create-exts +flask db stamp head +``` + +Until setup is complete, any page you visit redirects to the wizard. + +## What the Wizard Covers + +The wizard collects the essentials to get a working instance. The main steps are: + +- **First admin user** — create the initial administrator account (username and password). This is the account you will log in with. +- **Language** — the default interface language for the system. Users can change their own later. +- **Default data** — optionally import Bayanat's built-in reference lists (violation types, event types, countries, and similar). You can skip this and add data later. +- **Location** — set the default map center and administrative division names for your context. +- **Storage** — choose where uploaded media is stored: the local filesystem or S3-compatible object storage. +- **Security** — password policy and whether two-factor authentication is required (see [Account Security](/guide/account-security)). +- **Access control** — whether new items are visible to everyone by default or restricted (see [Access Control](/guide/access-control)). +- **Tools** — enable optional features such as media import, spreadsheet import, and data export. +- **Retention** — how long activity logs, exports, and sessions are kept. + +Most steps come with sensible defaults, so you can move through quickly and refine settings later. + +## After Setup + +Everything chosen in the wizard is saved to the application configuration and can be changed afterwards under **System Administration**. The wizard does not need to be run again. + +::: tip Command-line alternative +The wizard is the recommended path for new installations, but the same setup can be done from the command line (`flask install` to create the first admin, `flask import-data` to load default data). This is handy for scripted or headless deployments. +::: diff --git a/docs/guide/account-security.md b/docs/guide/account-security.md new file mode 100644 index 000000000..78d8a4d02 --- /dev/null +++ b/docs/guide/account-security.md @@ -0,0 +1,55 @@ +# Account Security + +Bayanat protects accounts with passwords, optional two-factor authentication, passkeys and hardware security keys, and optional single sign-on. Each user manages their own security from the **Account Security** page (available from the user menu); administrators set the policies that apply to everyone. + +## Two-Factor Authentication (2FA) + +Two-factor authentication adds a one-time code from an authenticator app on top of your password. + +To enrol, open **Account Security → Authentication Methods**, start 2FA setup, and scan the displayed QR code with an authenticator app (such as Google Authenticator or Authy). Enter the 6-digit code to confirm. After that, each login asks for a current code from the app. + +**Recovery codes** are generated when you enrol. Save them somewhere safe; each code can be used once to sign in if you lose access to your authenticator app. + +::: tip Administrators can require 2FA +An administrator can enforce 2FA for everyone via the "Enforce 2FA User Enrollment" setting. When enabled, users are prompted to enrol before they can continue using Bayanat. +::: + +## Passkeys and Security Keys + +Bayanat supports WebAuthn passkeys and hardware security keys (such as YubiKey, or a device fingerprint/face unlock) as a second factor. They are used after your password, not as a passwordless replacement. + +Register one under **Account Security → Authentication Methods**: give the device a name, then follow your browser's prompt to confirm with the key, fingerprint, or face scan. You can register several devices and remove them individually. + +## Single Sign-On with Google + +If enabled, the login page shows a **Sign in with Google** button. Signing in this way matches your Google account to an existing Bayanat user by email address. + +Accounts are not created automatically: an administrator must create the Bayanat user first, otherwise sign-in is refused. Administrators can also restrict sign-in to a specific email domain. Single sign-on is configured by an administrator; see [Configuration](/deployment/configuration). + +## Passwords + +Administrators set the password policy: + +- A **minimum length** (10 characters by default). +- A **strength requirement**, checked as you type, that rejects weak or easily guessed passwords. + +Change your password anytime under **Account Security → Change Password**. If your account was created for single sign-on and has no password yet, setting one here adds it. + +## Login Protection + +To slow down automated guessing, administrators can enable a CAPTCHA challenge that appears on the login form after repeated failed attempts. + +## Sessions + +- **One active session** (default): logging in on a new device or browser signs you out everywhere else, so only one session is active at a time. +- **Re-authentication for sensitive actions**: after a period of inactivity, Bayanat asks you to confirm your password again before changing security settings. +- Old session records are cleaned up automatically after an administrator-defined retention period. + +## What You Can Manage Yourself + +From **Account Security**, every user can: + +- Change their password +- Enrol or remove two-factor authentication +- Register or remove passkeys and security keys +- Generate and view recovery codes diff --git a/docs/guide/media-import.md b/docs/guide/media-import.md new file mode 100644 index 000000000..891be8467 --- /dev/null +++ b/docs/guide/media-import.md @@ -0,0 +1,46 @@ +# Media Import + +Bayanat's Media Import tool creates Bulletins in bulk from media files (images, video, audio, and documents). Each file becomes a new Bulletin with the media attached. This is distinct from [Data Import](/guide/data-import), which loads structured records from spreadsheets. + +Media Import is an administrator tool, available under **Data Import → Media** when enabled. + +## Import Sources + +There are two ways to bring files in: + +- **Upload** — select files from your computer and upload them through the browser. +- **Server path** — import files already present in a designated folder on the server. This is useful for large batches that are impractical to upload through a browser. It must be enabled and restricted by an administrator (see below). + +## Workflow + +1. Open **Media Import** and choose a source (upload files, or scan a server path) +2. Set shared metadata for the batch: sources, labels, and access roles +3. Choose processing options: + - **Parse** text from PDFs and documents + - **OCR** images and scanned PDFs (requires [OCR](/guide/ocr) enabled) + - **Transcribe** audio and video (requires [Transcription](/guide/transcription) enabled) +4. Process the batch; files are handled asynchronously in the background +5. One Bulletin is created per file, each with its media and any extracted text. You receive a notification when the batch completes. + +Imported Bulletins are marked as machine-created and tagged with a batch ID so you can find and review them together. + +## Supported File Types + +Common image, video, audio, PDF, and document formats are supported. The exact allowed extensions are configurable by an administrator under **System Administration**. + +## Deduplication + +Files are tracked so the same file is not imported twice, which makes it safe to re-run a batch that was interrupted. + +## Server Path Import (Administrators) + +Importing from a server path is disabled by default. To use it, an administrator must both enable it and restrict it to a specific folder: + +- Turn on **Media Import from a local path** in **System Administration**. +- Set the allowed folder on the server via the `ETL_ALLOWED_PATH` environment variable. + +::: warning Restrict the import folder +Server path import reads files from within `ETL_ALLOWED_PATH`. Point it at a dedicated staging folder that holds only material intended for import, not a broad system directory. If `ETL_ALLOWED_PATH` is not set, server path import stays disabled even when the toggle is on. +::: + +See [Configuration](/deployment/configuration) for setup details. diff --git a/docs/guide/transcription.md b/docs/guide/transcription.md new file mode 100644 index 000000000..1600960ce --- /dev/null +++ b/docs/guide/transcription.md @@ -0,0 +1,45 @@ +# Transcription + +Bayanat can transcribe audio and video media to searchable text using [Whisper](https://github.com/openai/whisper), running locally on your own server. It is the audio/video counterpart to [OCR](/guide/ocr): transcripts are stored alongside the media and become searchable. + +## How It Works + +Transcription runs as part of [Media Import](/guide/media-import). When you import audio or video files, you can opt to transcribe them, and a background worker generates the transcript automatically. + +1. Start a Media Import and add audio or video files +2. Enable **Transcribe audio/video files** in the import options +3. Optionally choose a language, or leave it blank for automatic detection +4. Process the import; transcription runs asynchronously in the background +5. The transcript is attached to each media item and marked as auto-generated + +## Models + +Whisper offers several models, selected under **System Administration → Whisper Model**. Larger models are more accurate but slower and need more memory; smaller models are faster and lighter. + +| Model | Relative size | Notes | +|-------|---------------|-------| +| `tiny`, `base` | Smallest | Fast, modest accuracy. `base` is the default. | +| `small`, `medium` | Mid | Better accuracy, slower. | +| `large` | Largest | Best accuracy, slowest, highest memory use. | + +English-only variants (`.en`, e.g. `base.en`) are available and can be more accurate for English-only material. + +::: tip First run downloads the model +The selected model is downloaded and cached on the server the first time it is used. Later transcriptions reuse the cached copy. A GPU speeds transcription up considerably but is not required; Whisper also runs on CPU. +::: + +## Languages + +Whisper auto-detects the spoken language by default. You can also set a specific language for an import when you know it in advance, which can improve accuracy. + +## Reviewing and Correcting Transcripts + +Auto-generated transcripts are a starting point, not a final record. Reviewers (Admin or Data Analyst roles) can correct the text directly on the media item. Edits are preserved with a history of changes, the original is not lost, and the corrected text becomes the searchable version. + +## Searching Transcripts + +Transcribed text is indexed and searchable the same way as OCR text, so a phrase spoken in a video can be found through normal search. + +## Enabling Transcription + +Transcription is off by default. An administrator enables it under **System Administration** (the "Allow Transcription of Media Files" setting) and selects a Whisper model. The transcription engine ships as an optional component; see [Configuration](/deployment/configuration) for installation details.