prosdevlab · prosdev · Jan 15, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/.gitignore b/.gitignore
@@ -60,6 +60,9 @@ pip-wheel-metadata/
 *.log
 logs/
 
+# Local data (SQLite databases, etc.)
+data/
+
 # GCP credentials (just in case)
 *.json
 !pyproject.json

diff --git a/README.md b/README.md
@@ -329,6 +329,90 @@ class MyCustomStore(EventStore):
     def health_check(self) -> bool: ...
 ```
 
+## Monitoring & Metrics
+
+`eventkit` exposes Prometheus metrics on a dedicated port (default: 9090) for production observability.
+
+### Metrics Server
+
+The metrics server runs independently from the main API server, isolating monitoring traffic from production requests:
+
+```python
+settings = Settings(
+    eventkit_metrics_enabled=True,  # Default
+    eventkit_metrics_port=9090,      # Default
+)
+```
+
+Access metrics:
+```bash
+curl http://localhost:9090/metrics  # Prometheus format
+curl http://localhost:9090/health    # Health check
+```
+
+### Available Metrics
+
+**API Layer:**
+- `eventkit_api_requests_total` - Total HTTP requests (labels: `endpoint`, `method`, `status`)
+- `eventkit_api_request_duration_seconds` - Request latency histogram (labels: `endpoint`, `method`)
+
+**Event Processing:**
+- `eventkit_events_received_total` - Events received at ingestion
+- `eventkit_events_processed_total` - Events successfully processed (label: `event_type`)
+- `eventkit_events_failed_total` - Events that failed validation (label: `reason`)
+
+**Storage:**
+- `eventkit_storage_bytes_written_total` - Bytes written to storage
+- `eventkit_storage_files_written_total` - Files written to storage (label: `storage_type`)
+
+**Queue & Ring Buffer:**
+- `eventkit_queue_enqueued_total` - Events enqueued (label: `queue_mode`)
+- `eventkit_queue_dequeued_total` - Events dequeued (label: `queue_mode`)
+- `eventkit_queue_processed_total` - Events processed by workers (labels: `queue_mode`, `result`)
+- `eventkit_queue_depth` - Current queue depth gauge (labels: `queue_mode`, `partition`)
+- `eventkit_ringbuffer_written_total` - Events written to ring buffer
+- `eventkit_ringbuffer_published_total` - Events published from ring buffer (label: `result`)
+- `eventkit_ringbuffer_marked_published_total` - Events marked as published
+- `eventkit_ringbuffer_size` - Total ring buffer size gauge
+- `eventkit_ringbuffer_unpublished` - Unpublished events gauge
+
+**Warehouse Loader:**
+- `eventkit_warehouse_loader_files_processed_total` - Files loaded to warehouse
+- `eventkit_warehouse_loader_errors_total` - Loader errors
+
+**System:**
+- `eventkit_info` - Version and platform info
+- `eventkit_uptime_seconds` - Process uptime
+- `eventkit_component_health` - Component health status (labels: `component`, `status`)
+
+### Grafana Dashboard
+
+Example queries for building dashboards:
+
+```promql
+# API request rate
+rate(eventkit_api_requests_total[5m])
+
+# Event processing throughput
+rate(eventkit_events_processed_total[5m])
+
+# Storage write rate (bytes/sec)
+rate(eventkit_storage_bytes_written_total[5m])
+
+# Queue depth (monitor backlog)
+eventkit_queue_depth
+
+# Error rate
+rate(eventkit_events_failed_total[5m]) / rate(eventkit_events_received_total[5m])
+```
+
+### Design Principles
+
+- **Counter-focused** - Prefer counters over gauges for aggregation
+- **Low cardinality labels** - No unbounded values (user IDs, etc.)
+- **Separate server** - Metrics on dedicated port isolates monitoring traffic
+- **Standard naming** - Follow Prometheus conventions: `{namespace}_{verb_noun}_{unit}_{suffix}`
+
 ## Development
 
 See [LOCAL_DEV.md](LOCAL_DEV.md) for detailed local development instructions.

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,21 +1,7 @@
 # Docker Compose for local development and testing
-# Runs Firestore, Pub/Sub, and GCS emulators for integration tests
+# Runs Pub/Sub and GCS emulators for integration tests
 
 services:
-  firestore-emulator:
-    image: gcr.io/google.com/cloudsdktool/google-cloud-cli:emulators
-    command: gcloud emulators firestore start --host-port=0.0.0.0:8080
-    ports:
-      - "8080:8080"
-    environment:
-      - FIRESTORE_PROJECT_ID=test-project
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8080"]
-      interval: 5s
-      timeout: 5s
-      retries: 10
-      start_period: 10s
-
   pubsub-emulator:
     image: gcr.io/google.com/cloudsdktool/google-cloud-cli:emulators
     command: gcloud beta emulators pubsub start --host-port=0.0.0.0:8085

diff --git a/pyproject.toml b/pyproject.toml
@@ -37,6 +37,7 @@ dependencies = [
     "structlog>=23.2.0",
     "tenacity>=8.2.0",
     "python-dateutil>=2.9.0.post0",
+    "prometheus-client>=0.20.0",
 ]
 
 [project.optional-dependencies]
-Original file line number
+Diff line change
@@ Expand Up / @@ -60,6 +60,9 @@ pip-wheel-metadata/ @@
     *.log
     logs/
+    # Local data (SQLite databases, etc.)
+    data/
     # GCP credentials (just in case)
     *.json
     !pyproject.json
@@ Expand Down @@