diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 47826dd..8bbb02c 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -24,6 +24,14 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + + - name: Build blog + run: npm ci && npm run build:blog + - name: Setup Pages uses: actions/configure-pages@v5 diff --git a/package-lock.json b/package-lock.json index 89fb090..910a399 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,13 +1,13 @@ { - "name": "brain", + "name": "@vraspar/brain", "version": "0.1.0-alpha.5", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "brain", + "name": "@vraspar/brain", "version": "0.1.0-alpha.5", - "license": "ISC", + "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "^1.27.1", "better-sqlite3": "^12.8.0", @@ -17,13 +17,20 @@ "gray-matter": "^4.0.3", "simple-git": "^3.33.0" }, + "bin": { + "brain": "dist/index.js" + }, "devDependencies": { "@types/better-sqlite3": "^7.6.13", "@types/node": "^25.5.0", "eslint": "^10.1.0", + "marked": "^17.0.5", "tsx": "^4.21.0", "typescript": "^5.9.3", "vitest": "^4.1.0" + }, + "engines": { + "node": ">=20.0.0" } }, "node_modules/@colors/colors": { @@ -2979,6 +2986,19 @@ "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/marked": { + "version": "17.0.5", + "resolved": "https://registry.npmjs.org/marked/-/marked-17.0.5.tgz", + "integrity": "sha512-6hLvc0/JEbRjRgzI6wnT2P1XuM1/RrrDEX0kPt0N7jGm1133g6X7DlxFasUIx+72aKAr904GTxhSLDrd5DIlZg==", + "dev": true, + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 20" + } + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", diff --git a/package.json b/package.json index a44ffe8..6515276 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "test": "vitest run", "test:watch": "vitest", "lint": "eslint src/ test/", + "build:blog": "node scripts/build-blog.js", "prepublishOnly": "npm run build" }, "keywords": [ @@ -56,6 +57,7 @@ "@types/better-sqlite3": "^7.6.13", "@types/node": "^25.5.0", "eslint": "^10.1.0", + "marked": "^17.0.5", "tsx": "^4.21.0", "typescript": "^5.9.3", "vitest": "^4.1.0" diff --git a/scripts/build-blog.js b/scripts/build-blog.js new file mode 100644 index 0000000..25b6d3f --- /dev/null +++ b/scripts/build-blog.js @@ -0,0 +1,168 @@ +#!/usr/bin/env node + +/** + * Blog build script for brain.vraspar.com + * + * Converts markdown posts in website/blog-src/posts/ to HTML in website/blog/. + * Uses gray-matter for frontmatter + marked for markdown-to-HTML. + * + * Usage: node scripts/build-blog.js + */ + +import { readFileSync, writeFileSync, readdirSync, mkdirSync, existsSync } from 'node:fs'; +import { join, basename, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import matter from 'gray-matter'; +import { marked } from 'marked'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const ROOT = join(__dirname, '..'); +const POSTS_DIR = join(ROOT, 'website', 'blog-src', 'posts'); +const OUTPUT_DIR = join(ROOT, 'website', 'blog'); +const SITE_URL = 'https://brain.vraspar.com'; + +function loadPosts() { + if (!existsSync(POSTS_DIR)) return []; + return readdirSync(POSTS_DIR) + .filter(f => f.endsWith('.md')) + .map(file => { + const raw = readFileSync(join(POSTS_DIR, file), 'utf8'); + const { data, content } = matter(raw); + const slug = basename(file, '.md'); + const html = marked.parse(content); + return { slug, html, ...data }; + }) + .sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()); +} + +function formatDate(date) { + return new Date(date).toLocaleDateString('en-US', { + year: 'numeric', month: 'long', day: 'numeric', + }); +} + +function esc(str) { + return String(str).replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); +} + +const HEAD = ` + + + `; + +function renderPost(post) { + return ` + + + + + ${esc(post.title)} \u2014 Brain CLI Blog + + + + + + +${HEAD} + + + + + +
+
+
+

${esc(post.title)}

+ +
+
+ ${post.html} +
+
+
+ ← All posts + GitHub → +
+
+ + +`; +} + +function renderIndex(posts) { + const list = posts.map(p => ` +
+ +

${esc(p.title)}

+ + ${p.summary ? `

${esc(p.summary)}

` : ''} +
+
`).join('\n'); + + return ` + + + + + Blog \u2014 Brain CLI + + + + + +${HEAD} + + + + + +
+
+

Blog

+

Notes on building Brain CLI.

+
+
+${list} +
+
+ + +`; +} + +const posts = loadPosts(); +if (posts.length === 0) { + console.log('No blog posts found in website/blog-src/posts/'); + process.exit(0); +} + +for (const post of posts) { + const postDir = join(OUTPUT_DIR, post.slug); + mkdirSync(postDir, { recursive: true }); + writeFileSync(join(postDir, 'index.html'), renderPost(post)); + console.log(` Built: blog/${post.slug}/`); +} + +writeFileSync(join(OUTPUT_DIR, 'index.html'), renderIndex(posts)); +console.log(` Built: blog/index.html (${posts.length} post${posts.length === 1 ? '' : 's'})`); diff --git a/website/blog-src/posts/building-brain.md b/website/blog-src/posts/building-brain.md new file mode 100644 index 0000000..8ec3ffe --- /dev/null +++ b/website/blog-src/posts/building-brain.md @@ -0,0 +1,57 @@ +--- +title: "Building Brain: A CLI for Team Knowledge Sharing" +date: 2026-03-28 +author: Vivek Parikh +summary: "How I built a CLI tool that stores team knowledge in git, searches it with FTS5, and exposes it to AI agents through MCP." +--- + +## The problem + +I use AI agents for most of my development work. They produce a lot of markdown: guides, runbooks, patterns, context files. Over months, this accumulates into a personal knowledge base that's genuinely useful. + +The problem is sharing it. I tried Obsidian, which works well for personal use but doesn't solve team knowledge sharing. There's no good way for a teammate's agent to access what my agent has already figured out. The pattern I kept seeing: I'd ask a teammate a question, they'd ask their agent, the agent would answer from scratch. That knowledge existed somewhere, but nobody could find it. + +Wikis don't solve this either. They require manual curation, they rot without maintenance, and AI agents can't interact with them programmatically. I wanted something that fits how developers already work: command line, git, markdown. + +## Architecture + +Brain is a CLI tool that stores knowledge as markdown files in a git repository. Three design decisions define the architecture: + +**Git as storage.** Entries are markdown files with YAML frontmatter, committed to a shared repo. No server to run, no database to manage, no accounts to create. Version history and access control come from git. A team joins by cloning the repo. + +**SQLite FTS5 for search.** Each machine maintains a local search index using SQLite's FTS5 virtual table with BM25 ranking. The index is a disposable cache, rebuilt from git on every sync. This gives sub-millisecond full-text search with prefix matching and contextual snippets, without requiring any external service. + +**MCP as the agent interface.** Brain exposes 10 tools and 2 resources via the Model Context Protocol over stdio. An AI agent connected to Brain can search team knowledge, read entries, publish findings, and check what's new. The agent doesn't need the CLI; it talks MCP directly. This is the key differentiator: the agent is a first-class user, not an afterthought. + +The rest follows from these three decisions. Read receipts are JSON files in the repo (so they sync with git). Freshness scoring uses a multiplicative formula over recency and read frequency. Pruning moves stale entries to `_archive/` (reversible). Everything runs locally, everything syncs through git. + +## The tagging problem + +Brain's first auto-tagger was a 56-term hardcoded dictionary. It matched words like "docker" and "kubernetes" in entry content and used them as tags. This works for the obvious cases but misses everything else. A guide about "payment service deployment patterns" gets tagged `docker` but not `payments`, `deployment-pipeline`, or `microservices`. The dictionary doesn't know your domain. + +The relationship system had the same issue: four heuristic signals (shared tags, title overlap, same author, content cross-references) that miss connections between entries with different vocabulary. Two entries about Redis timeouts and connection pooling aren't linked because they happen to use different words. + +We're replacing this with a two-algorithm approach, both zero-dependency: + +**RAKE (Rapid Automatic Keyword Extraction)** extracts multi-word keyphrases per document. Instead of matching "docker" from a dictionary, it extracts "multi-stage docker builds" as a meaningful phrase. About 60 lines of TypeScript, no corpus needed. + +**TF-IDF with zone weighting** scores terms by how distinctive they are within the corpus. A term that appears in one entry but rarely across the brain scores high. A term that appears everywhere (like "the" or even "guide") scores low. Markdown structure matters: title tokens get 3x weight, headings get 2x, code blocks 1.5x. The corpus index lives in SQLite and improves as the brain grows. + +For relationships, TF-IDF cosine similarity replaces the heuristic linker. Two entries with high overlap in distinctive terms are related, regardless of whether they share tags or title words. This catches the Redis timeout / connection pooling case: both score high on `redis`, `connection`, `timeout`, `pool` relative to the rest of the corpus. + +## Obsidian compatibility + +Every brain works as an Obsidian vault. The directory structure (`guides/`, `skills/`) maps to folders. Entries are standard markdown with YAML frontmatter. Open `~/.brain/repo` in Obsidian and you get a visual graph of your team's knowledge for free. + +This matters because it meets people where they are. Some team members prefer a visual editor. Some want a graph view. Brain doesn't force a choice between CLI and GUI; the same data works in both. + +## What's next + +The intelligent tagging system is the next major feature. After that: + +- Better auto-linking via TF-IDF cosine similarity and entity extraction (CLI commands, file paths, URLs as link signals) +- Louvain clustering for auto-discovered topic groups +- Multi-brain support (multiple knowledge bases per machine) +- Auto-archive for entries that stay stale for 30+ days + +Brain is open source and in alpha. If you're interested, the repo is at [github.com/vraspar/brain](https://github.com/vraspar/brain) and the project site is at [brain.vraspar.com](https://brain.vraspar.com). diff --git a/website/blog/blog.css b/website/blog/blog.css new file mode 100644 index 0000000..6fefa56 --- /dev/null +++ b/website/blog/blog.css @@ -0,0 +1,243 @@ +/* Blog styles — extends the main site theme */ + +.blog-nav { + padding: 1.5rem 0; + border-bottom: 1px solid var(--border); +} + +.blog-nav .container { + display: flex; + align-items: center; + gap: 2rem; +} + +.blog-nav-brand { + font-family: 'JetBrains Mono', monospace; + font-size: 1.3rem; + font-weight: 700; + color: var(--accent) !important; + text-decoration: none !important; + letter-spacing: -0.03em; +} + +.blog-nav a { + color: var(--text-secondary); + font-size: 0.9rem; +} + +.blog-nav a:hover { + color: var(--accent); +} + +.blog-main { + max-width: 720px; + margin: 0 auto; + padding: 0 1.5rem; +} + +.blog-post { + padding: 4rem 0 3rem; +} + +.blog-post-header { + margin-bottom: 3rem; +} + +.blog-post-header h1 { + font-size: 2.2rem; + font-weight: 700; + line-height: 1.2; + margin-bottom: 1rem; + text-align: left; +} + +.blog-post-meta { + display: flex; + align-items: center; + gap: 1rem; + font-size: 0.9rem; + color: var(--text-muted); + font-family: 'JetBrains Mono', monospace; +} + +.blog-post-author { + color: var(--text-secondary); +} + +.blog-post-content { + line-height: 1.8; + color: var(--text-body); +} + +.blog-post-content h2 { + font-size: 1.4rem; + margin-top: 3rem; + margin-bottom: 1rem; + text-align: left; +} + +.blog-post-content h3 { + font-size: 1.15rem; + margin-top: 2rem; + margin-bottom: 0.75rem; +} + +.blog-post-content p { + margin-bottom: 1.25rem; + max-width: none; + margin-left: 0; + margin-right: 0; +} + +.blog-post-content a { + color: var(--accent); + text-decoration: underline; + text-decoration-color: var(--accent-dim); + text-underline-offset: 2px; +} + +.blog-post-content a:hover { + text-decoration-color: var(--accent); +} + +.blog-post-content strong { + color: var(--text-primary); + font-weight: 600; +} + +.blog-post-content code { + font-family: 'JetBrains Mono', monospace; + font-size: 0.88em; + background: var(--bg-elevated); + padding: 0.15em 0.4em; + border-radius: 3px; + color: var(--accent-bright); +} + +.blog-post-content pre { + background: var(--term-bg); + border: 1px solid var(--term-border); + border-radius: 6px; + padding: 1.25rem 1.5rem; + overflow-x: auto; + margin: 1.5rem 0; + line-height: 1.5; +} + +.blog-post-content pre code { + background: none; + padding: 0; + border-radius: 0; + font-size: 0.85rem; + color: var(--text-primary); +} + +.blog-post-content ul, +.blog-post-content ol { + margin: 1rem 0 1.25rem 1.5rem; +} + +.blog-post-content li { + margin-bottom: 0.4rem; +} + +.blog-post-content blockquote { + border-left: 3px solid var(--accent-dim); + padding-left: 1.25rem; + margin: 1.5rem 0; + color: var(--text-secondary); + font-style: italic; +} + +.blog-post-content hr { + border: none; + border-top: 1px solid var(--border); + margin: 2.5rem 0; +} + +.blog-post-footer { + display: flex; + justify-content: space-between; + padding: 2rem 0 4rem; + border-top: 1px solid var(--border); + font-size: 0.9rem; +} + +.blog-post-footer a { + color: var(--text-secondary); +} + +.blog-post-footer a:hover { + color: var(--accent); +} + +.blog-index-header { + padding: 4rem 0 2rem; +} + +.blog-index-header h1 { + font-size: 2.5rem; + margin-bottom: 0.5rem; + text-align: left; +} + +.blog-index-header p { + color: var(--text-secondary); + font-size: 1.05rem; + max-width: none; + margin-left: 0; + margin-right: 0; +} + +.blog-index-list { + padding-bottom: 4rem; +} + +.blog-index-post { + border-bottom: 1px solid var(--border); + padding: 1.75rem 0; +} + +.blog-index-post:first-child { + border-top: 1px solid var(--border); +} + +.blog-index-post a { + text-decoration: none; + display: block; +} + +.blog-index-post a:hover h2 { + color: var(--accent); +} + +.blog-index-post h2 { + font-size: 1.3rem; + font-weight: 600; + margin-bottom: 0.4rem; + transition: color 200ms ease; + text-align: left; +} + +.blog-index-post time { + font-family: 'JetBrains Mono', monospace; + font-size: 0.8rem; + color: var(--text-muted); +} + +.blog-index-post p { + color: var(--text-secondary); + font-size: 0.93rem; + margin-top: 0.5rem; + line-height: 1.6; + max-width: none; + margin-left: 0; + margin-right: 0; +} + +@media (max-width: 768px) { + .blog-post-header h1 { font-size: 1.7rem; } + .blog-index-header h1 { font-size: 2rem; } + .blog-post { padding: 2.5rem 0 2rem; } + .blog-main { padding: 0 1.25rem; } +} diff --git a/website/blog/building-brain/index.html b/website/blog/building-brain/index.html new file mode 100644 index 0000000..46da8c3 --- /dev/null +++ b/website/blog/building-brain/index.html @@ -0,0 +1,80 @@ + + + + + + Building Brain: A CLI for Team Knowledge Sharing — Brain CLI Blog + + + + + + + + + + + + + + + +
+
+
+

Building Brain: A CLI for Team Knowledge Sharing

+ +
+
+

The problem

+

I use AI agents for most of my development work. They produce a lot of markdown: guides, runbooks, patterns, context files. Over months, this accumulates into a personal knowledge base that's genuinely useful.

+

The problem is sharing it. I tried Obsidian, which works well for personal use but doesn't solve team knowledge sharing. There's no good way for a teammate's agent to access what my agent has already figured out. The pattern I kept seeing: I'd ask a teammate a question, they'd ask their agent, the agent would answer from scratch. That knowledge existed somewhere, but nobody could find it.

+

Wikis don't solve this either. They require manual curation, they rot without maintenance, and AI agents can't interact with them programmatically. I wanted something that fits how developers already work: command line, git, markdown.

+

Architecture

+

Brain is a CLI tool that stores knowledge as markdown files in a git repository. Three design decisions define the architecture:

+

Git as storage. Entries are markdown files with YAML frontmatter, committed to a shared repo. No server to run, no database to manage, no accounts to create. Version history and access control come from git. A team joins by cloning the repo.

+

SQLite FTS5 for search. Each machine maintains a local search index using SQLite's FTS5 virtual table with BM25 ranking. The index is a disposable cache, rebuilt from git on every sync. This gives sub-millisecond full-text search with prefix matching and contextual snippets, without requiring any external service.

+

MCP as the agent interface. Brain exposes 10 tools and 2 resources via the Model Context Protocol over stdio. An AI agent connected to Brain can search team knowledge, read entries, publish findings, and check what's new. The agent doesn't need the CLI; it talks MCP directly. This is the key differentiator: the agent is a first-class user, not an afterthought.

+

The rest follows from these three decisions. Read receipts are JSON files in the repo (so they sync with git). Freshness scoring uses a multiplicative formula over recency and read frequency. Pruning moves stale entries to _archive/ (reversible). Everything runs locally, everything syncs through git.

+

The tagging problem

+

Brain's first auto-tagger was a 56-term hardcoded dictionary. It matched words like "docker" and "kubernetes" in entry content and used them as tags. This works for the obvious cases but misses everything else. A guide about "payment service deployment patterns" gets tagged docker but not payments, deployment-pipeline, or microservices. The dictionary doesn't know your domain.

+

The relationship system had the same issue: four heuristic signals (shared tags, title overlap, same author, content cross-references) that miss connections between entries with different vocabulary. Two entries about Redis timeouts and connection pooling aren't linked because they happen to use different words.

+

We're replacing this with a two-algorithm approach, both zero-dependency:

+

RAKE (Rapid Automatic Keyword Extraction) extracts multi-word keyphrases per document. Instead of matching "docker" from a dictionary, it extracts "multi-stage docker builds" as a meaningful phrase. About 60 lines of TypeScript, no corpus needed.

+

TF-IDF with zone weighting scores terms by how distinctive they are within the corpus. A term that appears in one entry but rarely across the brain scores high. A term that appears everywhere (like "the" or even "guide") scores low. Markdown structure matters: title tokens get 3x weight, headings get 2x, code blocks 1.5x. The corpus index lives in SQLite and improves as the brain grows.

+

For relationships, TF-IDF cosine similarity replaces the heuristic linker. Two entries with high overlap in distinctive terms are related, regardless of whether they share tags or title words. This catches the Redis timeout / connection pooling case: both score high on redis, connection, timeout, pool relative to the rest of the corpus.

+

Obsidian compatibility

+

Every brain works as an Obsidian vault. The directory structure (guides/, skills/) maps to folders. Entries are standard markdown with YAML frontmatter. Open ~/.brain/repo in Obsidian and you get a visual graph of your team's knowledge for free.

+

This matters because it meets people where they are. Some team members prefer a visual editor. Some want a graph view. Brain doesn't force a choice between CLI and GUI; the same data works in both.

+

What's next

+

The intelligent tagging system is the next major feature. After that:

+
    +
  • Better auto-linking via TF-IDF cosine similarity and entity extraction (CLI commands, file paths, URLs as link signals)
  • +
  • Louvain clustering for auto-discovered topic groups
  • +
  • Multi-brain support (multiple knowledge bases per machine)
  • +
  • Auto-archive for entries that stay stale for 30+ days
  • +
+

Brain is open source and in alpha. If you're interested, the repo is at github.com/vraspar/brain and the project site is at brain.vraspar.com.

+ +
+
+
+ ← All posts + GitHub → +
+
+ + + \ No newline at end of file diff --git a/website/blog/index.html b/website/blog/index.html new file mode 100644 index 0000000..e0741f0 --- /dev/null +++ b/website/blog/index.html @@ -0,0 +1,48 @@ + + + + + + Blog — Brain CLI + + + + + + + + + + + + + + +
+
+

Blog

+

Notes on building Brain CLI.

+
+
+ + +
+
+ + + \ No newline at end of file