Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,26 @@ EMBEDDING_DIMENSION=2622
# EMBEDDING_DIMENSION=512
# SIMILARITY_THRESHOLD=0.4

# ---------------------------------------------------------------------------
# DeepFace model integrity (PR #102 + 2026-05-12 bake-in)
# ---------------------------------------------------------------------------
# DEEPFACE_FACENET512_SHA256 must be set in production or the bio container
# fails fast at boot under `DEEPFACE_SHA256_REQUIRED=true`. The value below
# is the SHA256 of the upstream
# https://github.com/serengil/deepface_models/releases/download/v1.0/facenet512_weights.h5
# captured 2026-05-12 from the running container AND cross-verified against
# the GitHub release artifact. The Dockerfile model-fetcher stage pins this
# same hash via a build ARG and `sha256sum -c` so the image-baked copy and
# this runtime pin always agree.
DEEPFACE_FACENET512_SHA256=3f76b5117a9ca574d536af8199e6720089eb4ad3dc7e93534496d88265de864f
# Companion SHAs for the other three weights baked into the image. These
# are NOT runtime-enforced today (DeepFace 0.0.98 has no integrity hook for
# centerface/MiniFASNet) — they are documented here so a future audit can
# spot-check the image without re-reading the Dockerfile.
# centerface.onnx = 77e394b51108381b4c4f7b4baf1c64ca9f4aba73e5e803b2636419578913b5fe
# 2.7_80x80_MiniFASNetV2.pth = a5eb02e1843f19b5386b953cc4c9f011c3f985d0ee2bb9819eea9a142099bec0
# 4_0_0_80x80_MiniFASNetV1SE.pth = 84ee1d37d96894d5e82de5a57df044ef80a58be2b218b5ed7cdfd875ec2f5990

# FACE_MODEL=ArcFace
# EMBEDDING_DIMENSION=512
# SIMILARITY_THRESHOLD=0.68
Expand Down
112 changes: 107 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,67 @@
# Dockerfile for FIVUCSAS Biometric Processor (FastAPI)

# ============================================================================
# Stage 1: model-fetcher
# ----------------------------------------------------------------------------
# Bake-in stage for DeepFace / Facenet model weights so the runtime container
# does NOT need to download them on first request. Solves the 4th recurrence
# of `feedback_readonly_rootfs_cache_dirs` (2026-05-12): prod uses
# read_only:true rootfs, the cache named volume is owned by root:root, and
# DeepFace runs as uid 100 — first-inference downloads silently fail and
# the anti-spoof verdict collapses to a false-positive. By placing the
# weights inside the image layer (read-only by design, which is fine because
# DeepFace reads but never writes them) we get reproducible deploys + a
# verifiable supply chain (SHA256-checked at build time).
#
# Captured SHA256s (2026-05-12, from the running biometric-api container;
# cross-verified against upstream repos):
# facenet512_weights.h5 3f76b5117a9ca574d536af8199e6720089eb4ad3dc7e93534496d88265de864f
# centerface.onnx 77e394b51108381b4c4f7b4baf1c64ca9f4aba73e5e803b2636419578913b5fe
# 2.7_80x80_MiniFASNetV2.pth a5eb02e1843f19b5386b953cc4c9f011c3f985d0ee2bb9819eea9a142099bec0
# 4_0_0_80x80_MiniFASNetV1SE.pth 84ee1d37d96894d5e82de5a57df044ef80a58be2b218b5ed7cdfd875ec2f5990
# ============================================================================
FROM python:3.12-slim AS model-fetcher

ARG FACENET512_SHA256=3f76b5117a9ca574d536af8199e6720089eb4ad3dc7e93534496d88265de864f
ARG CENTERFACE_SHA256=77e394b51108381b4c4f7b4baf1c64ca9f4aba73e5e803b2636419578913b5fe
ARG MINIFASNET_V2_SHA256=a5eb02e1843f19b5386b953cc4c9f011c3f985d0ee2bb9819eea9a142099bec0
ARG MINIFASNET_V1SE_SHA256=84ee1d37d96894d5e82de5a57df044ef80a58be2b218b5ed7cdfd875ec2f5990

RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

# DeepFace looks for weights under $DEEPFACE_HOME/.deepface/weights/. Match
# that layout exactly so the runtime stage can just copy the directory tree
# verbatim into /tmp/.deepface/.
WORKDIR /models

RUN set -eux; \
mkdir -p /models/.deepface/weights; \
cd /models/.deepface/weights; \
\
curl -fsSL -o facenet512_weights.h5 \
"https://github.com/serengil/deepface_models/releases/download/v1.0/facenet512_weights.h5"; \
echo "${FACENET512_SHA256} facenet512_weights.h5" | sha256sum -c -; \
\
curl -fsSL -o centerface.onnx \
"https://github.com/Star-Clouds/CenterFace/raw/master/models/onnx/centerface.onnx"; \
echo "${CENTERFACE_SHA256} centerface.onnx" | sha256sum -c -; \
\
curl -fsSL -o 2.7_80x80_MiniFASNetV2.pth \
"https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/raw/master/resources/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth"; \
echo "${MINIFASNET_V2_SHA256} 2.7_80x80_MiniFASNetV2.pth" | sha256sum -c -; \
\
curl -fsSL -o 4_0_0_80x80_MiniFASNetV1SE.pth \
"https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/raw/master/resources/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth"; \
echo "${MINIFASNET_V1SE_SHA256} 4_0_0_80x80_MiniFASNetV1SE.pth" | sha256sum -c -; \
\
chmod 0644 *.h5 *.onnx *.pth

# ============================================================================
# Stage 2: runtime
# ============================================================================
FROM python:3.12-slim

# Set environment variables
Expand All @@ -11,7 +74,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \

WORKDIR /app

# Install system dependencies + Tesseract OCR with Turkish language pack
# Install system dependencies + Tesseract OCR with Turkish language pack.
# `gosu` is used by the entrypoint shim to drop privileges from root → uid 100
# after the (root-only) chown of any externally-mounted cache volume.
RUN apt-get update && apt-get install -y --no-install-recommends \
libglib2.0-0 \
libsm6 \
Expand All @@ -26,6 +91,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
tesseract-ocr \
tesseract-ocr-tur \
gosu \
&& rm -rf /var/lib/apt/lists/*

# Copy requirements
Expand Down Expand Up @@ -61,19 +127,55 @@ RUN python -c "import cv2; print('OpenCV version:', cv2.__version__)" && \
# Copy application code
COPY . .

# Create non-root user for security and ensure uploads dir is writable
RUN addgroup --system app && adduser --system --ingroup app app \
# Create non-root user for security and ensure uploads dir is writable.
# Pin UID/GID 100/101 explicitly so host-side chown on a named volume
# (e.g. /var/lib/docker/volumes/biometric-processor_biometric_models/_data)
# matches the in-container `app` user across rebuilds. The default
# `--system` numbering on debian-slim is dynamic and previously drifted
# silently — see feedback_readonly_rootfs_cache_dirs.
RUN addgroup --system --gid 101 app \
&& adduser --system --ingroup app --uid 100 app \
&& mkdir -p /app/uploads \
&& chown -R app:app /app

# ----------------------------------------------------------------------------
# Bake the four model files into the image at the path DeepFace expects.
# With read_only:true rootfs in prod, the image-baked content is read-only by
# design — fine because DeepFace only reads these files. The entrypoint shim
# below also seeds an empty mounted cache volume from /opt/baked-models so a
# fresh `docker volume rm` no longer requires the operator to remember to
# re-download MiniFASNet by hand (the bug pattern that triggered this PR).
# ----------------------------------------------------------------------------
COPY --from=model-fetcher --chown=100:101 /models/.deepface /opt/baked-models/.deepface

# Entrypoint shim (runs as root, drops to uid 100 via gosu):
# 1. Chowns any externally-mounted /tmp/.deepface volume to 100:101 so a
# root-owned named volume does not silently break DeepFace cache writes
# under uid 100 (the recurring bug pattern — 4th sighting 2026-05-12).
# 2. Seeds missing weight files from /opt/baked-models so a wiped named
# volume immediately repopulates with the four critical model files,
# removing operator memory as a load-bearing dependency.
# Both operations are idempotent and best-effort (|| true) — they never block
# container startup. After running them the shim execs the original CMD as
# `app` via gosu.
COPY deploy/entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod 0755 /usr/local/bin/entrypoint.sh

# Expose port
EXPOSE ${PORT:-8001}

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:${PORT:-8001}/api/v1/health || exit 1

USER app
# NOTE: ENTRYPOINT starts as root so it can chown the mounted cache volume,
# then execs the CMD under uid 100 (`app`) via gosu. No `USER` directive
# here on purpose — the entrypoint owns privilege drop. Anyone bypassing
# the entrypoint (e.g. `docker run --entrypoint /bin/sh`) must drop
# privileges themselves; this is acceptable because debug bypasses are
# operator-initiated.
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]

# Start the application (uses PORT from environment, defaults to 8001)
# Start the application (uses PORT from environment, defaults to 8001).
# The entrypoint forwards $@ to `gosu app` so this CMD runs as uid 100.
CMD ["sh", "-c", "python -m uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8001}"]
64 changes: 64 additions & 0 deletions deploy/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/sh
# ============================================================================
# biometric-processor entrypoint shim
# ----------------------------------------------------------------------------
# Runs as root so it can:
# 1. chown any externally-mounted /tmp/.deepface cache volume to 100:101
# (uid/gid of the `app` user). Without this, a root-owned named volume
# shadows the image-baked weights and DeepFace cannot write its cache.
# This is the 4th-recurrence pattern from feedback_readonly_rootfs_cache_dirs
# (DeepFace + Numba + UniFace, now MiniFASNet) — solved here at
# defense-in-depth layer 2 (layer 1 = bake weights into the image).
#
# 2. Seed missing model files from /opt/baked-models into the mounted cache
# volume. After `docker volume rm biometric-processor_biometric_models`
# the volume comes back empty and root-owned — without this seed, the
# operator has to remember to manually `docker cp` the MiniFASNet
# weights (which is what bit us today). With the seed, the named volume
# is self-healing from the image layer.
#
# After both steps complete, the shim execs the original CMD as uid 100 via
# `gosu`. Both seed steps are best-effort (`|| true`) and never block boot.
#
# This script is a no-op if there is no volume mount at /tmp/.deepface — the
# baked /opt/baked-models layer is sufficient on its own (DEEPFACE_HOME is
# overridden to /opt/baked-models in that path).
# ============================================================================
set -eu

DEEPFACE_CACHE_DIR="${DEEPFACE_HOME:-/tmp/.deepface}"
BAKED_MODELS_DIR="/opt/baked-models/.deepface"

# Only attempt cache-volume initialisation if a directory is actually mounted
# (or createable) at the cache location. Under read_only:true rootfs this is
# only writable if a tmpfs or named volume covers it.
if [ -d "${DEEPFACE_CACHE_DIR}" ] || mkdir -p "${DEEPFACE_CACHE_DIR}" 2>/dev/null; then
# 1. Defense-in-depth: ensure the mount is owned by uid 100 / gid 101.
chown -R 100:101 "${DEEPFACE_CACHE_DIR}" 2>/dev/null || true
chmod -R u+rwX,go+rX "${DEEPFACE_CACHE_DIR}" 2>/dev/null || true

# 2. Seed missing weight files from the baked image layer. We copy
# only when the destination file is absent so we never overwrite
# an operator's deliberate model rotation.
if [ -d "${BAKED_MODELS_DIR}/weights" ]; then
mkdir -p "${DEEPFACE_CACHE_DIR}/.deepface/weights" 2>/dev/null || true
for src in "${BAKED_MODELS_DIR}"/weights/*; do
[ -f "${src}" ] || continue
name="$(basename "${src}")"
dst="${DEEPFACE_CACHE_DIR}/.deepface/weights/${name}"
if [ ! -f "${dst}" ]; then
cp "${src}" "${dst}" 2>/dev/null || true
chown 100:101 "${dst}" 2>/dev/null || true
chmod 0644 "${dst}" 2>/dev/null || true
fi
done
fi
fi

# Drop privileges and exec the CMD. If we're somehow already non-root (e.g.
# docker --user override), skip gosu and exec directly so we don't fail.
if [ "$(id -u)" = "0" ]; then
exec gosu app "$@"
else
exec "$@"
fi
16 changes: 16 additions & 0 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,22 @@ services:
DEEPFACE_HOME: /tmp/.deepface
volumes:
- biometric_uploads:/app/uploads
# /tmp/.deepface — DeepFace weights cache. As of 2026-05-12 the four
# critical model files (Facenet512, centerface, MiniFASNetV2,
# MiniFASNetV1SE) are BAKED INTO THE IMAGE under /opt/baked-models/
# and the entrypoint shim self-heals this volume from that source on
# boot (chown to 100:101 + cp missing weights). Two consequences:
# 1) Removing this volume mount is now safe — DeepFace will read
# directly from the image layer (DEEPFACE_HOME=/tmp/.deepface
# with no overlay means the read_only:true rootfs blocks writes,
# but reads succeed). We keep it for now so DeepFace can still
# cache additional models it may want to download in the future
# (e.g. new MiniFASNet variants in a DeepFace upgrade).
# 2) `docker volume rm biometric-processor_biometric_models` is now
# a no-op for correctness: the entrypoint shim seeds it again
# from the baked image layer on the next boot. Operator memory
# ("remember to docker cp the .pth files") is no longer
# load-bearing. See feedback_readonly_rootfs_cache_dirs.
- biometric_models:/tmp/.deepface
- biometric_uniface:/app/uniface-cache
# No Traefik labels by design (CLAUDE.md: "Internal only — No public
Expand Down
Loading