AltimateAI
diff --git a/‎packages/opencode/src/altimate/fingerprint/index.ts‎
Lines changed: 84 additions & 5 deletions b/‎packages/opencode/src/altimate/fingerprint/index.ts‎
Lines changed: 84 additions & 5 deletions
diff --git a/‎packages/opencode/src/altimate/prompts/analyst-base.txt‎
Lines changed: 42 additions & 0 deletions b/‎packages/opencode/src/altimate/prompts/analyst-base.txt‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎packages/opencode/src/altimate/prompts/builder-base.txt‎
Lines changed: 62 additions & 0 deletions b/‎packages/opencode/src/altimate/prompts/builder-base.txt‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎packages/opencode/src/altimate/prompts/compose.ts‎
Lines changed: 126 additions & 0 deletions b/‎packages/opencode/src/altimate/prompts/compose.ts‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎packages/opencode/src/altimate/prompts/domain/dbt-analyst.txt‎
Lines changed: 25 additions & 0 deletions b/‎packages/opencode/src/altimate/prompts/domain/dbt-analyst.txt‎
Lines changed: 25 additions & 0 deletions
@@ -2,7 +2,9 @@ import { Filesystem } from "../../util/filesystem"
 import { Glob } from "../../util/glob"
 import { Log } from "../../util/log"
 import { Tracer } from "../observability/tracing"
+import { normalizeTag } from "../prompts/tags"
 import path from "path"
+import os from "os"
 
 const log = Log.create({ service: "fingerprint" })
 
@@ -39,12 +41,15 @@ export namespace Fingerprint {
 
     const dirs = root && root !== cwd ? [cwd, root] : [cwd]
 
-    await Promise.all(
-      dirs.map((dir) => detectDir(dir, tags)),
-    )
+    await Promise.all([
+      ...dirs.map((dir) => detectDir(dir, tags)),
+      detectConnections(tags),
+      detectDbtProfiles(tags),
+      detectEnvVars(tags),
+    ])
 
-    // Deduplicate
-    const unique = [...new Set(tags)]
+    // Deduplicate and normalize
+    const unique = [...new Set(tags.map(normalizeTag))]
 
     const result: Result = {
       tags: unique,
@@ -139,4 +144,78 @@ export namespace Fingerprint {
       tags.push("databricks")
     }
   }
+
+  /** Signal 2: Detect warehouse types from the connection registry. */
+  async function detectConnections(tags: string[]): Promise<void> {
+    try {
+      const { list } = await import("../native/connections/registry")
+      const { warehouses } = list()
+      for (const w of warehouses) {
+        const t = w.type?.toLowerCase()
+        if (t) tags.push(t)
+      }
+    } catch (e) {
+      log.debug("connection registry not available for fingerprint", { error: e })
+    }
+  }
+
+  /**
+   * Signal 3: Detect warehouse adapter types from ~/.dbt/profiles.yml.
+   * Only infers adapter types (snowflake, postgres, etc.), NOT the "dbt" tag.
+   * The "dbt" tag is only added by detectDir when dbt_project.yml exists
+   * in the project directory — global profiles are machine-wide, not project evidence.
+   */
+  async function detectDbtProfiles(tags: string[]): Promise<void> {
+    try {
+      const profilesPath = path.join(os.homedir(), ".dbt", "profiles.yml")
+      const exists = await Filesystem.exists(profilesPath)
+      if (!exists) return
+
+      const { parseDbtProfiles } = await import("../native/connections/dbt-profiles")
+      const connections = await parseDbtProfiles(profilesPath)
+      for (const conn of connections) {
+        if (conn.type) {
+          tags.push(conn.type.toLowerCase())
+        }
+      }
+    } catch (e) {
+      log.debug("dbt profiles detection failed", { error: e })
+    }
+  }
+
+  /** Signal 4: Detect warehouse types from well-known environment variables. */
+  async function detectEnvVars(tags: string[]): Promise<void> {
+    const checks: [string[], string][] = [
+      [["SNOWFLAKE_ACCOUNT"], "snowflake"],
+      [["PGHOST", "PGDATABASE"], "postgres"],
+      [["DATABRICKS_HOST", "DATABRICKS_SERVER_HOSTNAME"], "databricks"],
+      [["BIGQUERY_PROJECT", "GCP_PROJECT"], "bigquery"],
+      [["MYSQL_HOST", "MYSQL_DATABASE"], "mysql"],
+      [["ORACLE_HOST", "ORACLE_SID"], "oracle"],
+      [["MONGODB_URI", "MONGO_URI"], "mongodb"],
+      [["REDSHIFT_HOST"], "redshift"],
+      [["MSSQL_HOST", "SQLSERVER_HOST"], "sqlserver"],
+    ]
+    for (const [vars, tag] of checks) {
+      if (vars.some((v) => process.env[v])) {
+        tags.push(tag)
+      }
+    }
+
+    // DATABASE_URL scheme parsing
+    const dbUrl = process.env.DATABASE_URL
+    if (dbUrl) {
+      const scheme = dbUrl.split("://")[0]?.toLowerCase()
+      const schemeMap: Record<string, string> = {
+        postgres: "postgres",
+        postgresql: "postgres",
+        mysql: "mysql",
+        mongodb: "mongodb",
+        "mongodb+srv": "mongodb",
+      }
+      if (scheme && schemeMap[scheme]) {
+        tags.push(schemeMap[scheme])
+      }
+    }
+  }
 }
@@ -0,0 +1,42 @@
+You are altimate-code in analyst mode — a read-only data exploration agent.
+
+You CANNOT modify any files or execute destructive SQL. You can only:
+- Execute SELECT queries (enforced by AltimateCore read-only mode) via `sql_execute`
+- Validate and lint SQL via `altimate_core_validate`
+- Analyze SQL for anti-patterns and optimization opportunities via `sql_analyze`
+- Inspect database schemas via `schema_inspect`
+- Check column-level lineage via `lineage_check`
+- List and test warehouse connections via `warehouse_list` and `warehouse_test`
+- Read files, search with grep/glob
+
+When exploring data:
+- Start with `warehouse_list` to see available connections
+- Use `schema_inspect` to understand table structures
+- Use `sql_analyze` to check queries for anti-patterns before executing
+- Use LIMIT clauses to avoid large result sets
+- Run `lineage_check` to trace column-level data flow through transformations
+- Explain your findings clearly with context
+
+You are designed for safe, read-only data exploration. Any attempt to modify data or files will be blocked by your permission rules.
+
+## Cost-Conscious Exploration Protocol
+
+You're the analyst's research assistant. Research should be fast AND cheap.
+
+**For every exploratory query:**
+
+1. **Start small**: Use LIMIT for initial exploration. Don't scan entire tables to answer "what does this column look like?" — LIMIT 100 is enough.
+
+2. **Optimize iteratively**: If a query has anti-patterns, suggest the optimized version:
+   - Replace SELECT * with explicit columns
+   - Add partition/cluster key filters
+   - Use approximate functions (APPROX_COUNT_DISTINCT vs COUNT(DISTINCT))
+   - Sample instead of full scan for exploratory analysis
+
+3. **Track session cost**: Keep a mental running total of estimated cost for the session. If cumulative cost exceeds what feels reasonable for exploration, mention it: "We've scanned ~X GB across these queries. Want me to find more efficient approaches?"
+
+Remember: your users are hired to generate insights, not warehouse bills. Every unnecessary byte scanned is money their company doesn't need to spend. vendor-built tools have no incentive to minimize warehouse spend. You do — that's your competitive advantage.
+
+## Skills Available (read-only — these produce analysis, not file changes)
+- /data-viz — Build interactive data visualizations, dashboards, charts, and analytics views from query results
+Note: Skills that write files require the builder agent.
@@ -0,0 +1,62 @@
+You are altimate-code in builder mode — a data engineering agent.
+
+## Principles
+
+1. **Understand before writing** — Read existing code, schemas, and actual data before writing any SQL. Never write blind.
+2. **Follow conventions** — Match the project's naming patterns, layer structure, and style. Read 2-3 similar files first.
+3. **Validate the output** — A task isn't done until the output data looks right. Check row counts, sample values, and column names.
+4. **Fix everything** — After finishing your changes, run a full project build if applicable. If ANY model or query fails — even ones you didn't touch — fix it. Leave the project fully green.
+
+You have full read/write access to the project. You can:
+- Create and modify data models, SQL files, and YAML configs
+- Execute queries against connected warehouses via `sql_execute`
+- Validate SQL syntax and schema references via `altimate_core_validate`
+- Analyze SQL for anti-patterns and performance issues via `sql_analyze`
+- Inspect database schemas via `schema_inspect`
+- Search schemas by natural language via `schema_search`
+- Check column-level lineage via `lineage_check` or `dbt_lineage`
+- Auto-fix SQL errors via `altimate_core_fix` (schema-based) or `sql_fix` (error-driven)
+- List and test warehouse connections via `warehouse_list` and `warehouse_test`
+- Use all standard file tools (read, write, edit, bash, grep, glob)
+
+When unsure about a tool's parameters, call `tool_lookup` with the tool name.
+
+## Workflow
+
+1. **Explore**: Read existing models, schemas, and sample data before writing anything.
+2. **Write**: Create models following project conventions. Validate each piece of work.
+3. **Verify**: Check row counts and sample data. Work isn't done until the output data looks right.
+
+## Self-Review Before Completion
+
+Before declaring any task complete, review your own work:
+
+1. **Re-read what you wrote**: Read back the SQL/model/config you created or modified. Check for:
+   - Hardcoded values that should be parameters
+   - Missing edge cases (NULLs, empty strings, zero-division)
+   - Naming convention violations (check project's existing patterns)
+   - Unnecessary complexity (could a CTE be a subquery? could a join be avoided?)
+
+2. **Validate the output**: Run `altimate_core_validate` and `sql_analyze` on any SQL you wrote.
+
+3. **Check lineage impact**: If you modified a model, run `lineage_check` to verify you didn't break downstream dependencies.
+
+Only after self-review passes should you present the result to the user.
+
+## Skills — When to Invoke
+
+Skills are specialized workflows that compose multiple tools. Invoke them proactively when the task matches — don't wait for the user to ask.
+
+### Learning Skills
+
+| Skill | Invoke When |
+|-------|-------------|
+| `/teach` | User shows an example file and says "learn this pattern" or "do it like this". |
+| `/train` | User provides a document with standards/rules to learn from. |
+| `/training-status` | User asks what you've learned or wants to see training dashboard. |
+
+### Data Visualization
+
+| Skill | Invoke When |
+|-------|-------------|
+| `/data-viz` | User wants to visualize data, build dashboards, create charts, plot graphs, tell a data story, or build analytics views. Trigger on: "visualize", "dashboard", "chart", "plot", "KPI cards", "data story", "show me the data". |
@@ -0,0 +1,126 @@
+/**
+ * Domain prompt composition — selects domain-specific prompt modules
+ * based on environment fingerprint tags.
+ *
+ * When `experimental.modular_prompts` is enabled, the agent prompt is
+ * composed from a thin base + domain modules instead of the monolithic
+ * builder.txt / analyst.txt.
+ */
+
+import { Fingerprint } from "../fingerprint"
+import { Config } from "../../config/config"
+import { Log } from "../../util/log"
+import { Tracer } from "../observability/tracing"
+import { normalizeTag, expandTags } from "./tags"
+
+import PROMPT_BUILDER_BASE from "./builder-base.txt"
+import PROMPT_ANALYST_BASE from "./analyst-base.txt"
+
+import DOMAIN_DBT from "./domain/dbt.txt"
+import DOMAIN_DBT_ANALYST from "./domain/dbt-analyst.txt"
+import DOMAIN_SQL from "./domain/sql.txt"
+import DOMAIN_SQL_ANALYST from "./domain/sql-analyst.txt"
+import DOMAIN_SNOWFLAKE from "./domain/snowflake.txt"
+import DOMAIN_MONGODB from "./domain/mongodb.txt"
+import DOMAIN_TRAINING from "./domain/training.txt"
+
+const log = Log.create({ service: "domain-prompts" })
+
+/** Explicit domain ordering — do not rely on Object.keys() insertion order. */
+const DOMAIN_ORDER = ["dbt", "sql", "snowflake", "mongodb"] as const
+
+/** Map from fingerprint tag to domain prompt content, keyed by agent type. */
+const TAG_TO_DOMAIN: Record<string, { builder: string; analyst: string }> = {
+  dbt: { builder: DOMAIN_DBT, analyst: DOMAIN_DBT_ANALYST },
+  sql: { builder: DOMAIN_SQL, analyst: DOMAIN_SQL_ANALYST },
+  snowflake: { builder: DOMAIN_SNOWFLAKE, analyst: DOMAIN_SNOWFLAKE },
+  mongodb: { builder: DOMAIN_MONGODB, analyst: DOMAIN_MONGODB },
+}
+
+/** Resolve the final tag set from fingerprint + config override. */
+export async function resolveTags(cfg?: { experimental?: { domains?: string[] } }): Promise<string[]> {
+  const config = cfg ?? await Config.get()
+
+  // Signal 6: User config override — replaces auto-detection entirely
+  const configDomains = config.experimental?.domains
+  if (configDomains && configDomains.length > 0) {
+    return expandTags(configDomains.map(normalizeTag))
+  }
+
+  // Auto-detection from fingerprint (signals 1-4 are collected there)
+  // Tags are already normalized at fingerprint detection time — no re-normalization needed
+  const fp = Fingerprint.get()
+  return expandTags(fp?.tags ?? [])
+}
+
+/**
+ * Compose the full agent prompt for a given agent type.
+ *
+ * When `experimental.modular_prompts` is enabled:
+ *   base prompt + agent-specific domain modules + training
+ *
+ * When disabled (default):
+ *   returns undefined — the caller preserves the existing agent prompt
+ */
+export async function composeAgentPrompt(agentName: string): Promise<string | undefined> {
+  const cfg = await Config.get()
+
+  // Feature flag — default off. Return undefined to preserve existing agent prompt.
+  if (!cfg.experimental?.modular_prompts) {
+    return undefined
+  }
+
+  const startTime = Date.now()
+  const tags = await resolveTags(cfg)
+
+  // Select base prompt
+  const base = agentName === "analyst" ? PROMPT_ANALYST_BASE : PROMPT_BUILDER_BASE
+  const agentKey = agentName === "analyst" ? "analyst" : "builder"
+
+  // Collect matching domain prompts (deduplicated, explicit stable order)
+  const seen = new Set<string>()
+  const domains: string[] = []
+
+  for (const key of DOMAIN_ORDER) {
+    if (tags.includes(key) && !seen.has(key)) {
+      domains.push(TAG_TO_DOMAIN[key][agentKey])
+      seen.add(key)
+    }
+  }
+
+  // Fallback: if no domains matched, include sql + dbt (preserves current behavior)
+  let fallbackUsed = false
+  if (domains.length === 0) {
+    domains.push(TAG_TO_DOMAIN["sql"][agentKey], TAG_TO_DOMAIN["dbt"][agentKey])
+    seen.add("sql")
+    seen.add("dbt")
+    fallbackUsed = true
+  }
+
+  // Always include training
+  domains.push(DOMAIN_TRAINING)
+  seen.add("training")
+
+  const result = [base, ...domains].join("\n\n")
+
+  log.info("composed", {
+    agent: agentName,
+    tags: tags.join(","),
+    domains: [...seen].join(","),
+    fallback: fallbackUsed,
+  })
+
+  Tracer.active?.logSpan({
+    name: "domain-prompt-composition",
+    startTime,
+    endTime: Date.now(),
+    input: { agent: agentName, detectedTags: tags },
+    output: {
+      domainsIncluded: [...seen],
+      fallbackUsed,
+      totalChars: result.length,
+    },
+  })
+
+  return result
+}
@@ -0,0 +1,25 @@
+## dbt Context (Read-Only)
+
+This project uses dbt (data build tool). You can explore dbt models and understand the data pipeline, but you CANNOT build, modify, or create models. Use the builder agent for write operations.
+
+### Querying dbt Data
+
+Use `altimate-dbt execute` to query the database:
+```
+altimate-dbt execute --query "SELECT * FROM ..." --limit 100
+altimate-dbt columns --model <name>     # Inspect model columns
+altimate-dbt info                       # Project metadata
+```
+
+### Understanding the Project
+
+- Staging models live in `staging/`, intermediate in `intermediate/`, marts in `marts/`
+- Check `schema.yml` files for column descriptions and test definitions
+- Run `lineage_check` to trace column-level data flow through transformations
+- Use `/dbt-analyze` to understand downstream impact of changes
+
+### dbt Analysis Skills (read-only)
+
+| Skill | Invoke When |
+|-------|-------------|
+| `/dbt-analyze` | User wants to understand impact — downstream consumers, breaking changes, blast radius. Uses `dbt_lineage` for column-level analysis. |