Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions scripts/audit-bd-memories.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
#!/usr/bin/env bash
# audit-bd-memories.sh — surface duplicate / stale-surface bd memories.
#
# 156+ bd memories as of 2026-05-20. Without curation, recall quality
# degrades (same lesson stored 3 ways; old lessons referencing retired
# surfaces like Ollama / shepherd-cron). This script does NOT delete
# anything; it produces a markdown report at
# `.agents/audits/bd-memories-<YYYY-MM-DD>.md` with three sections:
#
# NEAR-DUPLICATES memory pairs with content jaccard >= threshold
# RETIRED-SURFACE memories whose body mentions terms in
# the retired-surfaces list
# SUMMARY total / candidates-for-review counts
#
# Operator reviews and selectively runs `bd forget <key>`.
#
# Flags:
# --threshold <0..1> Jaccard similarity floor for near-duplicates
# (default: 0.65)
# --out <path> Output markdown path
# (default: .agents/audits/bd-memories-<date>.md)
# --stdout Emit markdown to stdout (skip file write)
# --retired <csv> Override retired-surface keywords list
# --no-retired Skip retired-surface section
# --no-dups Skip near-duplicate section
# --json Machine-readable summary (skips markdown)
#
# Exit codes:
# 0 — audit completed (whether candidates were found or not)
# 2 — usage error
# 3 — bd unavailable or returned no memories

set -euo pipefail

THRESHOLD="0.65"
OUT_PATH=""
TO_STDOUT=0
JSON=0
INCLUDE_DUPS=1
INCLUDE_RETIRED=1
RETIRED_DEFAULT="ollama,shepherd-cron,openclaw,gemma,morai-codex,d:\\\\dream,dreamworker"
RETIRED_LIST="$RETIRED_DEFAULT"

usage() {
sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//'
exit "${1:-0}"
}

while [ $# -gt 0 ]; do
case "$1" in
--threshold) shift; THRESHOLD="${1:-0.65}" ;;
--out) shift; OUT_PATH="${1:-}" ;;
--stdout) TO_STDOUT=1 ;;
--retired) shift; RETIRED_LIST="${1:-}" ;;
--no-retired) INCLUDE_RETIRED=0 ;;
--no-dups) INCLUDE_DUPS=0 ;;
--json) JSON=1 ;;
-h|--help) usage 0 ;;
*) echo "audit-bd-memories: unknown arg: $1" >&2; usage 2 ;;
esac
shift || true
done

if ! command -v bd >/dev/null 2>&1; then
echo "audit-bd-memories: bd CLI not available" >&2
exit 3
fi

DATE_STR="$(date -u +%Y-%m-%d)"
if [ -z "$OUT_PATH" ]; then
OUT_PATH=".agents/audits/bd-memories-$DATE_STR.md"
fi

# Step 1: parse `bd memories` into a TSV of "key\tcontent". The format is:
# Memories (N): ← header line
# ← blank
# <key> ← 2-space indent
# <content snippet>... ← 4-space indent (may be truncated)
# ← blank between memories
TMP_TSV="$(mktemp)"
# The tokens dir is `$TMP_TSV.tokens`, so use -rf to clean the whole sibling set.
trap 'rm -rf "$TMP_TSV" "$TMP_TSV".*' EXIT

bd memories 2>/dev/null | awk '
/^Memories \(/ { next }
/^ [^ ]/ {
if (key) { print key "\t" content }
sub(/^ /, ""); key=$0; content=""; next
}
/^ / {
sub(/^ /, "")
content = (content == "" ? $0 : content " " $0)
next
}
/^$/ { next }
END { if (key) { print key "\t" content } }
' > "$TMP_TSV"

count=$(wc -l < "$TMP_TSV" | tr -d ' ')
if [ "$count" -eq 0 ]; then
echo "audit-bd-memories: no memories found" >&2
exit 3
fi

# Step 2: near-duplicate detection via Jaccard on word-token sets.
# We compute one token-set file per memory under $TMP_TSV.tokens/<n>,
# then walk pairs.
mkdir -p "$TMP_TSV.tokens"
i=0
keys_file="$TMP_TSV.keys"
: > "$keys_file"
while IFS=$'\t' read -r key content; do
i=$((i + 1))
printf '%s\n' "$key" >> "$keys_file"
printf '%s\n' "$content" | tr 'A-Z' 'a-z' | tr -c 'a-z0-9' '\n' \
| awk 'length($0) >= 3' | sort -u > "$TMP_TSV.tokens/$i"
done < "$TMP_TSV"

# Helper: jaccard A B → prints decimal 0..1 (0 when both empty)
jaccard() {
local a="$1" b="$2" union inter
inter="$(comm -12 "$a" "$b" 2>/dev/null | wc -l | tr -d ' ')"
union="$(cat "$a" "$b" | sort -u | wc -l | tr -d ' ')"
if [ "$union" -eq 0 ]; then
echo "0"
else
awk -v i="$inter" -v u="$union" 'BEGIN { printf "%.3f", i/u }'
fi
}

# Collect (key_a, key_b, score) for pairs above threshold.
DUPS_FILE="$TMP_TSV.dups"
: > "$DUPS_FILE"
if [ "$INCLUDE_DUPS" -eq 1 ] && [ "$count" -gt 1 ]; then
for ((a=1; a<count; a++)); do
key_a="$(sed -n "${a}p" "$keys_file")"
for ((b=a+1; b<=count; b++)); do
score="$(jaccard "$TMP_TSV.tokens/$a" "$TMP_TSV.tokens/$b")"
# awk for compare so we can compare decimals robustly
if awk -v s="$score" -v t="$THRESHOLD" 'BEGIN { exit !(s+0 >= t+0) }'; then
key_b="$(sed -n "${b}p" "$keys_file")"
printf '%s\t%s\t%s\n' "$score" "$key_a" "$key_b" >> "$DUPS_FILE"
fi
done
done
# Sort highest-score first.
sort -r -o "$DUPS_FILE" "$DUPS_FILE"
fi

dup_count="$(wc -l < "$DUPS_FILE" | tr -d ' ')"

# Step 3: retired-surface scan.
RETIRED_FILE="$TMP_TSV.retired"
: > "$RETIRED_FILE"
if [ "$INCLUDE_RETIRED" -eq 1 ] && [ -n "$RETIRED_LIST" ]; then
# Convert csv to alternation regex.
pattern="$(printf '%s' "$RETIRED_LIST" | tr ',' '|')"
while IFS=$'\t' read -r key content; do
if printf '%s' "$content" | grep -iqE "$pattern"; then
hit="$(printf '%s' "$content" | grep -ioE "$pattern" | head -1)"
printf '%s\t%s\n' "$key" "$hit" >> "$RETIRED_FILE"
fi
done < "$TMP_TSV"
fi
retired_count="$(wc -l < "$RETIRED_FILE" | tr -d ' ')"

# Step 4: emit output.
if [ "$JSON" -eq 1 ]; then
printf '{"total":%d,"near_duplicates":%d,"retired_candidates":%d,"threshold":%s}\n' \
"$count" "$dup_count" "$retired_count" "$THRESHOLD"
exit 0
fi

emit_markdown() {
printf '# bd memories audit — %s\n\n' "$DATE_STR"
printf '*Inspected %d memories. Jaccard threshold: %s.*\n\n' "$count" "$THRESHOLD"
printf '## Summary\n\n'
printf -- '- Total memories: **%d**\n' "$count"
printf -- '- Near-duplicate pairs (>= %s jaccard): **%d**\n' "$THRESHOLD" "$dup_count"
printf -- '- Retired-surface candidates: **%d**\n' "$retired_count"

if [ "$INCLUDE_DUPS" -eq 1 ]; then
printf '\n## Near-duplicates\n\n'
if [ "$dup_count" -eq 0 ]; then
printf '*(none)*\n'
else
printf '| Score | Key A | Key B |\n'
printf '|---|---|---|\n'
awk -F'\t' '{ printf "| %s | `%s` | `%s` |\n", $1, $2, $3 }' "$DUPS_FILE"
fi
fi

if [ "$INCLUDE_RETIRED" -eq 1 ]; then
printf '\n## Retired-surface candidates\n\n'
if [ "$retired_count" -eq 0 ]; then
printf '*(none)*\n'
else
printf '*Pattern: %s*\n\n' "$RETIRED_LIST"
printf '| Key | Matched term |\n'
printf '|---|---|\n'
awk -F'\t' '{ printf "| `%s` | %s |\n", $1, $2 }' "$RETIRED_FILE"
fi
fi
printf '\n---\n*Generated by `scripts/audit-bd-memories.sh`. Operator reviews and selectively runs `bd forget <key>`.*\n'
}

if [ "$TO_STDOUT" -eq 1 ]; then
emit_markdown
else
mkdir -p "$(dirname "$OUT_PATH")"
emit_markdown > "$OUT_PATH"
echo "audit-bd-memories: wrote $OUT_PATH"
echo "audit-bd-memories: $count memories scanned, $dup_count near-dup pair(s), $retired_count retired-surface match(es)"
fi
181 changes: 181 additions & 0 deletions tests/scripts/audit-bd-memories.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/env bats
# Regression tests for scripts/audit-bd-memories.sh (soc-lgq4).
#
# The script shells out to `bd memories`. We stub that binary via PATH
# so tests get deterministic input without hitting the real dolt store.

setup() {
REPO_ROOT="$(git rev-parse --show-toplevel)"
SCRIPT="$REPO_ROOT/scripts/audit-bd-memories.sh"
TMP="$(mktemp -d)"
ORIG_DIR="$PWD"
ORIG_PATH="$PATH"
mkdir -p "$TMP/bin"
}

teardown() {
cd "$ORIG_DIR" 2>/dev/null || true
export PATH="$ORIG_PATH"
rm -rf "$TMP"
}

# stub_bd <memories-output-file> — write a bd shim that emits the file.
stub_bd() {
local out_file="$1"
cat >"$TMP/bin/bd" <<EOF
#!/usr/bin/env bash
if [ "\$1" = "memories" ]; then
cat "$out_file"
exit 0
fi
exit 0
EOF
chmod +x "$TMP/bin/bd"
export PATH="$TMP/bin:$ORIG_PATH"
}

# Common synthetic memory corpus.
write_corpus_basic() {
cat >"$TMP/mems.txt" <<'EOF'
Memories (4):

alpha-one
The quick brown fox jumps over the lazy dog repeatedly.

alpha-two
The quick brown fox jumps over the lazy dog repeatedly.

beta-distinct
Completely unrelated content about systemd timers and journald.

retired-mention
Old lesson about ollama gemma morai-codex pipelines that no longer apply.
EOF
}

run_audit() {
cd "$TMP"
run "$SCRIPT" "$@"
}

@test "exits 3 when no bd memories present" {
cat >"$TMP/mems.txt" <<'EOF'
Memories (0):

EOF
stub_bd "$TMP/mems.txt"
run_audit --json
[ "$status" -eq 3 ]
}

@test "--json reports counts on a 4-memory corpus" {
write_corpus_basic
stub_bd "$TMP/mems.txt"
run_audit --json
[ "$status" -eq 0 ]
echo "$output" | jq -e '.total == 4' >/dev/null
# alpha-one and alpha-two are byte-identical → 1.0 jaccard, definitely above threshold.
echo "$output" | jq -e '.near_duplicates >= 1' >/dev/null
# retired-mention contains "ollama" → matches default pattern.
echo "$output" | jq -e '.retired_candidates >= 1' >/dev/null
}

@test "default markdown output lands under .agents/audits/" {
write_corpus_basic
stub_bd "$TMP/mems.txt"
# We need .agents/ to be writable; the script creates the audit dir.
run_audit
[ "$status" -eq 0 ]
files=$(ls "$TMP/.agents/audits/bd-memories-"*.md 2>/dev/null | wc -l | tr -d ' ')
[ "$files" -eq 1 ]
}

@test "--stdout emits markdown instead of writing a file" {
write_corpus_basic
stub_bd "$TMP/mems.txt"
run_audit --stdout
[ "$status" -eq 0 ]
[[ "$output" == *"# bd memories audit"* ]]
[[ "$output" == *"## Near-duplicates"* ]]
[[ "$output" == *"## Retired-surface candidates"* ]]
# No file should have been written.
! ls "$TMP/.agents/audits/bd-memories-"*.md 2>/dev/null
}

@test "near-duplicates table includes the duplicate keys" {
write_corpus_basic
stub_bd "$TMP/mems.txt"
run_audit --stdout
[ "$status" -eq 0 ]
[[ "$output" == *"alpha-one"* ]]
[[ "$output" == *"alpha-two"* ]]
}

@test "--threshold 0.99 raises the bar; identical pairs still pass, near misses don't" {
cat >"$TMP/mems.txt" <<'EOF'
Memories (2):

a-mostly-same
apple banana cherry date elderberry fig grape

b-mostly-same
apple banana cherry date elderberry fig pear
EOF
stub_bd "$TMP/mems.txt"
run_audit --threshold 0.99 --json
[ "$status" -eq 0 ]
# 6 of 8 unique words shared = 0.75 jaccard → below 0.99.
echo "$output" | jq -e '.near_duplicates == 0' >/dev/null
}

@test "--no-dups suppresses near-duplicate scanning entirely" {
write_corpus_basic
stub_bd "$TMP/mems.txt"
run_audit --stdout --no-dups
[ "$status" -eq 0 ]
[[ "$output" != *"## Near-duplicates"* ]]
}

@test "--no-retired suppresses retired-surface section" {
write_corpus_basic
stub_bd "$TMP/mems.txt"
run_audit --stdout --no-retired
[ "$status" -eq 0 ]
[[ "$output" != *"## Retired-surface candidates"* ]]
}

@test "--retired <csv> overrides default retired-keyword list" {
cat >"$TMP/mems.txt" <<'EOF'
Memories (2):

a-clean
nothing notable about this one

b-special
this memory mentions cobalt-strike very loudly
EOF
stub_bd "$TMP/mems.txt"
run_audit --stdout --retired "cobalt-strike"
[ "$status" -eq 0 ]
[[ "$output" == *"b-special"* ]]
[[ "$output" == *"cobalt-strike"* ]]
}

@test "unknown flag exits 2 with usage error" {
stub_bd "$TMP/mems.txt"
run_audit --weasel
[ "$status" -eq 2 ]
[[ "$output" == *"unknown"* ]]
}

@test "missing bd binary exits 3" {
# Don't stub bd; ensure it's not on the test PATH while keeping coreutils.
mkdir -p "$TMP/coreutils-only"
for cmd in bash sh sed awk grep tr sort comm cat mkdir mv rm cp ls wc dirname basename head tail printf jq mktemp; do
full="$(command -v "$cmd" 2>/dev/null || true)"
[ -n "$full" ] && ln -sf "$full" "$TMP/coreutils-only/$cmd"
done
export PATH="$TMP/coreutils-only"
run_audit --json
[ "$status" -eq 3 ]
}
Loading