From af598f4bf8e6cdb51f25cfa93ee412922d84b2c7 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 07:11:54 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20SQLite=20storage?= =?UTF-8?q?=20and=20avoid=20expensive=20asdict()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move persistent PRAGMA journal_mode=WAL to _init_schema to reduce connection overhead. - Maintain PRAGMA synchronous=NORMAL in _connect as it is connection-local. - Replace recursive dataclasses.asdict() with direct attribute access in high-frequency database methods (record_event, store_record, upsert_session, record_episode) to eliminate measurable serialization overhead. - Clean up temporary benchmark scripts. Co-authored-by: jhweb <12529192+jhweb@users.noreply.github.com> --- braindrain/observer.py | 23 ++++++----- braindrain/session.py | 65 +++++++++++++++--------------- braindrain/wiki_brain.py | 87 ++++++++++++++++++++-------------------- 3 files changed, 89 insertions(+), 86 deletions(-) diff --git a/braindrain/observer.py b/braindrain/observer.py index 6530b02..f1dcab7 100644 --- a/braindrain/observer.py +++ b/braindrain/observer.py @@ -33,13 +33,14 @@ def __init__(self, db_path: str | Path, *, max_events: int = 10_000) -> None: def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row - # Enable WAL mode for better write performance - conn.execute("PRAGMA journal_mode=WAL") + # PRAGMA synchronous is connection-local and must be set on every connection. conn.execute("PRAGMA synchronous=NORMAL") return conn def _init_schema(self) -> None: with self._connect() as conn: + # PRAGMA journal_mode=WAL is persistent and only needs to be set once. + conn.execute("PRAGMA journal_mode=WAL") conn.execute( """ CREATE TABLE IF NOT EXISTS brain_events ( @@ -75,7 +76,7 @@ def _init_schema(self) -> None: ) def record_event(self, event: BrainEvent) -> dict[str, Any]: - payload = asdict(event) + # Avoid asdict() to skip expensive recursive overhead with self._connect() as conn: cursor = conn.execute( """ @@ -91,14 +92,14 @@ def record_event(self, event: BrainEvent) -> dict[str, Any]: ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """, ( - payload["timestamp"], - payload["session_id"], - payload["event_type"], - payload["tool_name"], - json.dumps(payload["files_touched"]), - payload["token_cost"], - payload["duration_ms"], - json.dumps(payload["metadata"]), + event.timestamp, + event.session_id, + event.event_type, + event.tool_name, + json.dumps(event.files_touched), + event.token_cost, + event.duration_ms, + json.dumps(event.metadata), ), ) pruned = self._prune_oldest(conn) diff --git a/braindrain/session.py b/braindrain/session.py index 35949a7..797afd4 100644 --- a/braindrain/session.py +++ b/braindrain/session.py @@ -54,13 +54,14 @@ def __init__(self, db_path: str | Path, *, inactivity_timeout_minutes: int = 30) def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row - # Enable WAL mode for better write performance - conn.execute("PRAGMA journal_mode=WAL") + # PRAGMA synchronous is connection-local and must be set on every connection. conn.execute("PRAGMA synchronous=NORMAL") return conn def _init_schema(self) -> None: with self._connect() as conn: + # PRAGMA journal_mode=WAL is persistent and only needs to be set once. + conn.execute("PRAGMA journal_mode=WAL") conn.execute( """ CREATE TABLE IF NOT EXISTS session_summaries ( @@ -144,7 +145,7 @@ def touch_session( return existing def upsert_session(self, summary: SessionSummary) -> None: - payload = asdict(summary) + # Avoid expensive asdict() in hot path with self._connect() as conn: conn.execute( """ @@ -172,16 +173,16 @@ def upsert_session(self, summary: SessionSummary) -> None: updated_at = excluded.updated_at """, ( - payload["session_id"], - payload["start_time"], - payload["end_time"], - payload["events_count"], - json.dumps(payload["tools_used"]), - json.dumps(payload["files_modified"]), - json.dumps(payload["key_decisions"]), - json.dumps(payload["errors"]), - payload["token_total"], - payload["updated_at"], + summary.session_id, + summary.start_time, + summary.end_time, + summary.events_count, + json.dumps(summary.tools_used), + json.dumps(summary.files_modified), + json.dumps(summary.key_decisions), + json.dumps(summary.errors), + summary.token_total, + summary.updated_at, ), ) @@ -237,11 +238,11 @@ def should_dream(self, *, quiet_minutes: int | None = None, now: float | None = return (current - latest) >= quiet * 60 def record_episode(self, episode: EpisodeRecord) -> dict[str, Any]: - payload = asdict(episode) - if not payload["episode_id"]: - payload["episode_id"] = str(uuid.uuid4()) - if not payload["created_at"]: - payload["created_at"] = time.time() + # Avoid expensive asdict() in hot path + if not episode.episode_id: + episode.episode_id = str(uuid.uuid4()) + if not episode.created_at: + episode.created_at = time.time() with self._connect() as conn: conn.execute( """ @@ -262,22 +263,22 @@ def record_episode(self, episode: EpisodeRecord) -> dict[str, Any]: ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( - payload["episode_id"], - payload["session_id"], - payload["problem"], - payload["context"], - payload["action"], - payload["outcome"], - json.dumps(payload["evidence_refs"]), - payload["local_critique"], - payload["global_reflection"], - payload["confidence"], - json.dumps(payload["tags"]), - payload["created_at"], - payload["promoted_lesson_id"], + episode.episode_id, + episode.session_id, + episode.problem, + episode.context, + episode.action, + episode.outcome, + json.dumps(episode.evidence_refs), + episode.local_critique, + episode.global_reflection, + episode.confidence, + json.dumps(episode.tags), + episode.created_at, + episode.promoted_lesson_id, ), ) - return {"episode_id": payload["episode_id"]} + return {"episode_id": episode.episode_id} def mark_episode_promoted(self, episode_id: str, lesson_id: str) -> None: with self._connect() as conn: diff --git a/braindrain/wiki_brain.py b/braindrain/wiki_brain.py index c2f4b8a..3739916 100644 --- a/braindrain/wiki_brain.py +++ b/braindrain/wiki_brain.py @@ -65,13 +65,14 @@ def __init__( def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path) conn.row_factory = sqlite3.Row - # Enable WAL mode for better write performance - conn.execute("PRAGMA journal_mode=WAL") + # PRAGMA synchronous is connection-local and must be set on every connection. conn.execute("PRAGMA synchronous=NORMAL") return conn def _init_schema(self) -> None: with self._connect() as conn: + # PRAGMA journal_mode=WAL is persistent and only needs to be set once. + conn.execute("PRAGMA journal_mode=WAL") conn.execute( """ CREATE TABLE IF NOT EXISTS brain_records ( @@ -138,22 +139,22 @@ def _init_schema(self) -> None: self._fts_available = False def store_record(self, record: BrainRecord) -> dict[str, Any]: - payload = asdict(record) + # Avoid expensive asdict() in hot path now = time.time() - if not payload["record_id"]: - payload["record_id"] = str(uuid.uuid4()) - if not payload["created_at"]: - payload["created_at"] = now - payload["updated_at"] = now + if not record.record_id: + record.record_id = str(uuid.uuid4()) + if not record.created_at: + record.created_at = now + record.updated_at = now contradiction = self.detect_contradiction( - content=payload["content"], - title=payload["title"], - record_class=payload["record_class"], - exclude_record_id=payload["record_id"], + content=record.content, + title=record.title, + record_class=record.record_class, + exclude_record_id=record.record_id, ) if contradiction: - payload["supersedes_id"] = contradiction["record_id"] + record.supersedes_id = contradiction["record_id"] with self._connect() as conn: conn.execute( @@ -179,33 +180,33 @@ def store_record(self, record: BrainRecord) -> dict[str, Any]: ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( - payload["record_id"], - payload["record_class"], - payload["title"], - payload["content"], - payload["source"], - payload["category"], - payload["status"], - payload["importance"], - payload["confidence"], - json.dumps(payload["tags"]), - json.dumps(payload["evidence_refs"]), - json.dumps(payload["metadata"]), - payload["supersedes_id"], - payload["created_at"], - payload["updated_at"], - payload["last_accessed"], - payload["access_count"], + record.record_id, + record.record_class, + record.title, + record.content, + record.source, + record.category, + record.status, + record.importance, + record.confidence, + json.dumps(record.tags), + json.dumps(record.evidence_refs), + json.dumps(record.metadata), + record.supersedes_id, + record.created_at, + record.updated_at, + record.last_accessed, + record.access_count, ), ) - if payload["supersedes_id"]: + if record.supersedes_id: conn.execute( """ UPDATE brain_records SET status = 'superseded', updated_at = ? WHERE record_id = ? """, - (now, payload["supersedes_id"]), + (now, record.supersedes_id), ) if self._fts_available: conn.execute( @@ -226,21 +227,21 @@ def store_record(self, record: BrainRecord) -> dict[str, Any]: ) """, ( - payload["record_id"], - payload["record_id"], - payload["title"], - payload["content"], - " ".join(payload["tags"]), - payload["record_class"], - payload["category"], - payload["status"], + record.record_id, + record.record_id, + record.title, + record.content, + " ".join(record.tags), + record.record_class, + record.category, + record.status, ), ) return { - "record_id": payload["record_id"], - "status": payload["status"], - "supersedes_id": payload["supersedes_id"], + "record_id": record.record_id, + "status": record.status, + "supersedes_id": record.supersedes_id, } def store_fact(