|
| 1 | +#!/usr/bin/env bash |
| 2 | +# check-harness-diff.sh — report exact file-level diff between our vendored |
| 3 | +# harness and upstream `harness/main`. |
| 4 | +# |
| 5 | +# Companion to `check-upstream.sh` (which reports commit count). This one |
| 6 | +# reports per-file changes, so you can see at a glance whether we're drifting |
| 7 | +# on files we shouldn't be. |
| 8 | +# |
| 9 | +# Usage: script/check-harness-diff.sh |
| 10 | +# Exits 0 regardless of diff size — informational. |
| 11 | +# |
| 12 | +# Output: three sections |
| 13 | +# 1. Files changed (per-file list, post-filter for noise + known divergences) |
| 14 | +# 2. Summary (insertions/deletions vs upstream HEAD) |
| 15 | +# 3. Verdict (clean / known divergences / unexpected drift) |
| 16 | + |
| 17 | +set -euo pipefail |
| 18 | + |
| 19 | +REPO_ROOT="$(git rev-parse --show-toplevel)" |
| 20 | +VENDORED="$REPO_ROOT/packages/bcode-browser/harness" |
| 21 | +TMP="$(mktemp -d)" |
| 22 | +trap 'rm -rf "$TMP"' EXIT |
| 23 | + |
| 24 | +if ! git remote | grep -qx "harness"; then |
| 25 | + echo "error: 'harness' remote not configured. Add it:" >&2 |
| 26 | + echo " git remote add harness https://github.com/browser-use/browser-harness.git" >&2 |
| 27 | + exit 1 |
| 28 | +fi |
| 29 | + |
| 30 | +git fetch --quiet harness main |
| 31 | + |
| 32 | +UPSTREAM_HEAD="$(git rev-parse harness/main)" |
| 33 | +UPSTREAM_SHORT="$(git rev-parse --short harness/main)" |
| 34 | + |
| 35 | +# Extract upstream HEAD into a temp dir without touching index/worktree. |
| 36 | +# `git archive | tar -x` is the right tool here — `git --work-tree=X checkout` |
| 37 | +# mutates the active index relative to the current branch, which is wrong. |
| 38 | +git archive --format=tar "$UPSTREAM_HEAD" | tar -xf - -C "$TMP" |
| 39 | + |
| 40 | +# Known-divergence filter: |
| 41 | +# - .gitignore (we add .venv/ — see UPSTREAM.md §3 divergences table) |
| 42 | +# Build artifacts the vendored side might generate during smoke tests: |
| 43 | +# - uv.lock, .venv/, __pycache__/, *.egg-info/, *.pyc |
| 44 | +# These are gitignored on our side, but `diff -rq` doesn't read .gitignore. |
| 45 | +# Files in the divergences table go in EXPECTED; everything else is drift. |
| 46 | +EXPECTED_REGEX='/(\.gitignore)( |$)' |
| 47 | +# `diff -rq` emits two line shapes: |
| 48 | +# "Files A and B differ" |
| 49 | +# "Only in <dir>: <name>" |
| 50 | +# Match noise in either shape. |
| 51 | +NOISE_REGEX='(uv\.lock|\.venv|__pycache__|\.egg-info|\.pyc|\.cache)' |
| 52 | + |
| 53 | +DIFF_OUT="$(diff -rq "$VENDORED/" "$TMP/" 2>&1 | grep -Ev "$NOISE_REGEX" || true)" |
| 54 | + |
| 55 | +echo "=== vendored vs harness/main ($UPSTREAM_SHORT) ===" |
| 56 | +echo |
| 57 | + |
| 58 | +if [[ -z "$DIFF_OUT" ]]; then |
| 59 | + echo " No differences. Vendored harness matches upstream HEAD exactly." |
| 60 | + echo |
| 61 | + exit 0 |
| 62 | +fi |
| 63 | + |
| 64 | +EXPECTED="$(echo "$DIFF_OUT" | grep -E "$EXPECTED_REGEX" || true)" |
| 65 | +UNEXPECTED="$(echo "$DIFF_OUT" | grep -Ev "$EXPECTED_REGEX" || true)" |
| 66 | + |
| 67 | +if [[ -n "$EXPECTED" ]]; then |
| 68 | + echo "Known divergences (UPSTREAM.md §3):" |
| 69 | + echo "$EXPECTED" | sed 's|^| |' |
| 70 | + echo |
| 71 | +fi |
| 72 | + |
| 73 | +if [[ -n "$UNEXPECTED" ]]; then |
| 74 | + echo "Unexpected drift:" |
| 75 | + echo "$UNEXPECTED" | sed 's|^| |' |
| 76 | + echo |
| 77 | + echo "Each line is one of:" |
| 78 | + echo " - upstream commit we haven't synced yet (run script/check-upstream.sh to see commits behind), or" |
| 79 | + echo " - a Yellow-zone modification we forgot to record in UPSTREAM.md §3." |
| 80 | + echo |
| 81 | +fi |
| 82 | + |
| 83 | +# Per-file line stats vs upstream, excluding noise + known-divergence files. |
| 84 | +# `diff -ruN` emits per-hunk patch text; we want body lines only (skip |
| 85 | +# +++/--- headers and @@ hunk markers), and we exclude any hunk whose header |
| 86 | +# matched the noise regex. |
| 87 | +echo "Line stats vs upstream (added on our side, removed on our side):" |
| 88 | +diff -ruN \ |
| 89 | + --exclude='.venv' --exclude='__pycache__' --exclude='*.egg-info' \ |
| 90 | + --exclude='*.pyc' --exclude='uv.lock' --exclude='.cache' \ |
| 91 | + "$TMP" "$VENDORED" 2>/dev/null \ |
| 92 | + | awk ' |
| 93 | + /^\+\+\+ |^--- / { in_header=1; next } |
| 94 | + /^@@ / { in_header=0; next } |
| 95 | + in_header { next } |
| 96 | + /^\+/ { added++ } |
| 97 | + /^-/ { removed++ } |
| 98 | + END { printf " +%d / -%d lines (excluding headers/hunk markers)\n", added+0, removed+0 } |
| 99 | + ' |
| 100 | + |
| 101 | +echo |
| 102 | +echo "Source of truth: UPSTREAM.md §3. Update the divergences table when adding/removing intentional differences." |
0 commit comments