diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index c612152..c569dfb 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -145,6 +145,17 @@
         "./scientific-problem-selection"
       ]
     },
+    {
+      "name": "igv-reports",
+      "source": "./",
+      "description": "Build self-contained, offline HTML genomic-region reports with igv-reports (create_report). Cohort-aware driver + post-render structural and content verifiers. Includes ONT 5mC/5hmC methylation viewer presets.",
+      "category": "life-sciences",
+      "tags": ["bioinformatics", "genomics", "visualization", "variant-validation", "structural-variants", "ont", "nanopore", "methylation", "igv", "html-report"],
+      "strict": false,
+      "skills": [
+        "./igv-reports"
+      ]
+    },
     {
       "name": "tooluniverse",
       "source": "./tooluniverse",
diff --git a/README.md b/README.md
index 20015ed..7bb07d1 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ This marketplace provides MCP (Model Context Protocol) servers and skills for li
 /plugin install nextflow-development@life-sciences
 /plugin install scvi-tools@life-sciences
 /plugin install scientific-problem-selection@life-sciences
+/plugin install igv-reports@life-sciences
 ```
 
 For servers requiring authentication (all except PubMed), configure credentials after installation:
@@ -146,6 +147,19 @@ Systematic framework for scientific problem selection and strategic research dec
 - Navigate decision trees in active projects
 - Strategic research planning and problem choice
 
+#### igv-reports
+**Plugin ID**: `igv-reports@life-sciences`
+
+Build self-contained, offline HTML genomic-region reports with [igv-reports](https://github.com/igvteam/igv-reports) (`create_report`). Cohort-aware driver + post-render structural and content verifiers on top of the upstream Python package. Includes ONT 5mC/5hmC methylation viewer presets.
+
+**Use cases:**
+- Generate per-sample HTML viewers for SV breakpoints, viral integrations, variants, fusion junctions, ChIP peaks, or ROIs
+- Build cohort-wide report bundles (one HTML per sample + index)
+- Per-read ONT 5mC/5hmC methylation views at promoters / gene bodies / DMRs
+- Auto-verify rendered HTML structure and (opt-in) read-count anchors so cohort builds gate on correctness, not just exit code
+
+**Requirements**: `pip install -U 'igv-reports>=1.16.0'` (upstream engine)
+
 ## Detailed Installation
 
 ### 1. Add the marketplace (one time)
@@ -172,6 +186,7 @@ Systematic framework for scientific problem selection and strategic research dec
 /plugin install nextflow-development@life-sciences
 /plugin install scvi-tools@life-sciences
 /plugin install scientific-problem-selection@life-sciences
+/plugin install igv-reports@life-sciences
 ```
 
 ### 3. Configure credentials (if needed)
diff --git a/igv-reports/LICENSE.txt b/igv-reports/LICENSE.txt
new file mode 100644
index 0000000..d2a37d3
--- /dev/null
+++ b/igv-reports/LICENSE.txt
@@ -0,0 +1,201 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+other entities that control, are controlled by, or are under common
+control with that entity. For the purposes of this definition,
+"control" means (i) the power, direct or indirect, to cause the
+direction or management of such entity, whether by contract or
+otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation
+source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+transformation or translation of a Source form, including but
+not limited to compiled object code, generated documentation,
+and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+Object form, made available under the License, as indicated by a
+copyright notice that is included in or attached to the work
+(an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+form, that is based on (or derived from) the Work and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship. For the purposes
+of this License, Derivative Works shall not include works that remain
+separable from, or merely link (or bind by name) to the interfaces of,
+the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+the original version of the Work and any modifications or additions
+to that Work or Derivative Works thereof, that is intentionally
+submitted to Licensor for inclusion in the Work by the copyright owner
+or by an individual or Legal Entity authorized to submit on behalf of
+the copyright owner. For the purposes of this definition, "submitted"
+means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems,
+and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise
+designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+on behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the
+Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+(except as stated in this section) patent license to make, have made,
+use, offer to sell, sell, import, and otherwise transfer the Work,
+where such license applies only to those patent claims licensable
+by such Contributor that are necessarily infringed by their
+Contribution(s) alone or by combination of their Contribution(s)
+with the Work to which such Contribution(s) was submitted. If You
+institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work
+or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses
+granted to You under this License for that Work shall terminate
+as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+Work or Derivative Works thereof in any medium, with or without
+modifications, and in Source or Object form, provided that You
+meet the following conditions:
+
+(a) You must give any other recipients of the Work or
+Derivative Works a copy of this License; and
+
+(b) You must cause any modified files to carry prominent notices
+stating that You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works
+that You distribute, all copyright, patent, trademark, and
+attribution notices from the Source form of the Work,
+excluding those notices that do not pertain to any part of
+the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its
+distribution, then any Derivative Works that You distribute must
+include a readable copy of the attribution notices contained
+within such NOTICE file, excluding those notices that do not
+pertain to any part of the Derivative Works, in at least one
+of the following places: within a NOTICE text file distributed
+as part of the Derivative Works; within the Source form or
+documentation, if provided along with the Derivative Works; or,
+within a display generated by the Derivative Works, if and
+wherever such third-party notices normally appear. The contents
+of the NOTICE file are for informational purposes only and
+do not modify the License. You may add Your own attribution
+notices within Derivative Works that You distribute, alongside
+or as an addendum to the NOTICE text from the Work, provided
+that such additional attribution notices cannot be construed
+as modifying the License.
+
+You may add Your own copyright statement to Your modifications and
+may provide additional or different license terms and conditions
+for use, reproduction, or distribution of Your modifications, or
+for any such Derivative Works as a whole, provided Your use,
+reproduction, and distribution of the Work otherwise complies with
+the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+any Contribution intentionally submitted for inclusion in the Work
+by You to the Licensor shall be under the terms and conditions of
+this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify
+the terms of any separate license agreement you may have executed
+with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+names, trademarks, service marks, or product names of the Licensor,
+except as required for reasonable and customary use in describing the
+origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+agreed to in writing, Licensor provides the Work (and each
+Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied, including, without limitation, any warranties or conditions
+of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any
+risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+whether in tort (including negligence), contract, or otherwise,
+unless required by applicable law (such as deliberate and grossly
+negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special,
+incidental, or consequential damages of any character arising as a
+result of this License or out of the use or inability to use the
+Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all
+other commercial damages or losses), even if such Contributor
+has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+the Work or Derivative Works thereof, You may choose to offer,
+and charge a fee for, acceptance of support, warranty, indemnity,
+or other liability obligations and/or rights consistent with this
+License. However, in accepting such obligations, You may act only
+on Your own behalf and on Your sole responsibility, not on behalf
+of any other Contributor, and only if You agree to indemnify,
+defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason
+of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+To apply the Apache License to your work, attach the following
+boilerplate notice, with the fields enclosed by brackets "[]"
+replaced with your own identifying information. (Don't include
+the brackets!) The text should be enclosed in the appropriate
+comment syntax for the file format. We also recommend that a
+file or class name and description of purpose be included on the
+same "printed page" as the copyright notice for easier
+identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/igv-reports/SKILL.md b/igv-reports/SKILL.md
new file mode 100644
index 0000000..7d69985
--- /dev/null
+++ b/igv-reports/SKILL.md
@@ -0,0 +1,751 @@
+---
+name: igv-reports
+description: Use when the user wants an HTML, clickable, browseable, offline, or emailable viewer of genomic data — phrases like "HTML IGV report", "offline IGV", "self-contained HTML", "clickable viewer", "create_report", "igv-reports", "email this viewer", or any browseable HTML of reads at variants, fusion breakpoints, SV junctions, viral integrations, ChIP peaks, ROIs, or ONT 5mC/5hmC methylation views at promoters/gene bodies/DMRs. Trigger even when the user doesn't say "igv-reports" — giveaway is HTML/clickable/offline plus genomic regions. Also fire on /igv-reports. DO NOT use for static PNG/PDF/SVG IGV screenshots — use the igv-screenshots skill instead.
+---
+
+# igv-reports
+
+This skill builds **self-contained HTML genomic-region reports** with
+[igv-reports](https://github.com/igvteam/igv-reports) (`create_report`).
+Each report is a single browseable HTML containing the igv.js viewer plus
+embedded data slices for every region. No server, no internet, no IGV
+install needed at view time.
+
+The skill has three entry points:
+- **build** — one-shot: sites BED + BAM(s) ± VCF → HTML.
+- **cohort** — multi-sample driver from a samplesheet → per-sample HTMLs + index.
+- **prep-track** — utility: convert plain-gzip GFF/GTF/BED.gz into a
+  bgzip + tabix-indexed track that igv-reports can load.
+
+## What this skill is (and is not)
+
+This skill is a **driver layer** on top of the upstream `igv-reports`
+Python package by the IGV team
+([github.com/igvteam/igv-reports](https://github.com/igvteam/igv-reports)).
+The naming is unavoidable — both share the `igv-reports` name.
+
+| Component | Source | Role |
+|---|---|---|
+| `create_report` CLI | upstream PyPI package `igv-reports` | does the actual HTML rendering |
+| `scripts/build_igvreports.py` | **this skill** | wraps `create_report` with default-track resolution, cohort/samplesheet mode, SIF auto-detect |
+| `scripts/verify_{report,cohort,anchors}.py` | **this skill** | post-render structural + content audits (not in upstream) |
+| `scripts/prep_track.sh` | **this skill** | bgzip+tabix utility for annotation tracks |
+
+## Install
+
+```bash
+# 1. Install the UPSTREAM igv-reports package (provides `create_report`):
+pip install -U 'igv-reports>=1.16.0'
+
+# 2. The skill's wrapper scripts ship inside this plugin. Once the plugin is
+#    installed, the scripts live alongside SKILL.md.
+```
+
+If you only need raw `create_report` (no cohort mode, no verifiers, no
+auto-tracks), skip this skill entirely and use upstream directly —
+see [igvteam/igv-reports](https://github.com/igvteam/igv-reports) docs.
+
+## Quickstart
+
+```bash
+python scripts/build_igvreports.py \
+    --genome hg38 \
+    --sites sites.hg38.bed \
+    --bam tumor.bam normal.bam \
+    --fasta /path/to/hg38.fa \
+    --no-default-tracks \
+    --extra-track /path/to/cpg_islands.bed.gz \
+    --extra-track /path/to/gencode.v47.annotation.gff3.gz \
+    --output report.hg38.html
+```
+
+If you run many reports across the same genome build, set up a databases YAML
+once (schema in `references/databases_config_paths.md`) and point
+`$IGV_REPORTS_DB_CONFIG` at it — then `--fasta` and `--no-default-tracks`
+become optional.
+
+## Environment overrides
+
+All optional. Set per-shell or in a project `.env`:
+
+| Var | Effect |
+|---|---|
+| `IGV_REPORTS_DB_CONFIG` | Path to a databases YAML resolving `--genome` to FASTA + default tracks (see `references/databases_config_paths.md`) |
+| `IGV_REPORTS_SIF` | Path to an `igv-reports` apptainer SIF (offline / HPC use). Galaxy depot: `https://depot.galaxyproject.org/singularity/igv-reports:1.16.0--pyh7cba7a3_0` |
+| `SAMTOOLS_SIF_DEFAULT` | Path to a `samtools` SIF (verifier only) |
+| `IGV_REPORTS_BIND` | Colon-separated bind paths for singularity. Unset = no binds. |
+
+Driver flags `--fasta` and `--no-default-tracks` let you skip the databases
+YAML entirely without setting any env var. `--no-apptainer` forces the PATH
+`create_report` path even on a SLURM node. The hermetic `tests/unit/` suite
+runs anywhere with `pytest` + Python ≥ 3.10.
+
+## When to use which entry point
+
+| User request | Entry point |
+|---|---|
+| "Make an HTML for these 5 SV breakpoints in tumor.bam" | **build** |
+| "Give me one HTML per patient for the cohort integration calls" | **cohort** |
+| "create_report fails with 'not BGZF' on this gencode" | **prep-track** |
+
+## Defaults (locked in)
+
+- Tracks always loaded, top-to-bottom in the viewer:
+  1. CpG islands (BED, plain or bgzipped)
+  2. Gencode full annotation (GFF3.gz, **transcripts + exons + CDS + UTRs**, NOT a gene-level-only file)
+  3. RepeatMasker (BED.gz, bgzipped + tabix-indexed)
+  Plus the user's BAM(s), VCF, and any extra tracks they pass.
+- `--flanking 300` bp on either side of each site (good for SV breakpoints
+  and point variants alike). Override per call if needed.
+- `--standalone` so the HTML is offline-viewable.
+- Output filename includes the genome tag — e.g. `cohort.hg38.html` —
+  so downstream genome-tag enforcement hooks pass.
+- Reference FASTA is resolved either explicitly via `--fasta` or via the
+  YAML pointed to by `$IGV_REPORTS_DB_CONFIG`. Supported genome IDs out of
+  the box: `hg38`, `mm10`, `mm39`, `t2t_CHM13v2_plusY`, `GRCh37` (extend
+  `GENOME_ALIASES` in `scripts/build_igvreports.py` if needed).
+- Per-genome default track availability when using the YAML is recorded in
+  `references/databases_config_paths.md` — read it before assembling tracks
+  so the skill doesn't try to load a track that doesn't exist for the
+  selected genome.
+
+## Sites BED format (critical)
+
+igv-reports' BED parser reads fields **by position** and trips on a header
+row (`ValueError: invalid literal for int() with base 10: 'start'`). Always
+emit a **plain headerless 4-column BED**:
+
+```
+chr    start    end    name
+chr2   25227855 25342590 DNMT3A_full_gene
+```
+
+Tab-separated. The `name` becomes the row label in the report's variant
+table — make it specific enough to identify the site after deduping.
+
+By default `create_report` shows only the chr/start/end position columns
+in the clickable table. To surface the `name` (or any extra columns from
+a 5+ column BED), pass `--info-columns <colname>` to the driver:
+
+```bash
+python scripts/build_igvreports.py ... --info-columns name
+python scripts/build_igvreports.py ... --info-columns gene_name,score
+```
+
+Column names are matched by header (so a `#chrom\tstart\tend\tname\tscore`
+header works). For positional BED without a header, the convention is the
+4th column = `name`, 5th = `score`, 6th = `strand`.
+
+The project's `enforce-genome-tag.sh` hook requires a genome tag in the BED
+filename: use `sites.hg38.bed`, not `sites.bed`.
+
+### `--type` for BED-style sites
+
+When the sites input is a BED (not a VCF), pass `--type mutation` to
+`create_report` (or the driver). This gives the right viewer behavior at
+each row — one locus per row, no split-screen, table on top. Without it,
+some BED layouts trigger create_report's split-screen junction view by
+heuristic. Use `--type variant` for VCF sites, or omit for create_report's
+auto-detection (only safe with a VCF).
+
+```bash
+python scripts/build_igvreports.py ... --type mutation --info-columns name
+```
+
+## Pitfalls (the skill should encode and/or detect these)
+
+| Symptom | Root cause | Fix |
+|---|---|---|
+| `ValueError: invalid literal for int()` on first row | Header row in sites BED | Strip header — plain BED |
+| `UnicodeDecodeError: byte 0x8b` reading a track | igv-reports reading bgzip as text | Filename must end `.gff3.gz` / `.bed.gz` AND be true bgzip (check with `file <name>` for "extra field") |
+| `tabix: not BGZF` | Track was plain-gzipped, not bgzipped | Run **prep-track** entry point |
+| `tabix: out of order` while indexing | GFF/GTF/BED records not pos-sorted within chr | **prep-track** does `sort -k1,1 -k4,4n` before bgzip |
+| Annotation track empty in viewer | Tabix returns no rows in displayed window — often correct biology (e.g., CGI-distal site). Confirm with `tabix file region` |
+| Genome ID lookup fails with `--genome hg38` | igv.js bundled IDs require internet at view + render time. Use `--fasta /path/to/local.fa` instead (always works offline) |
+
+Full pitfalls + create_report flag reference in `references/best_practices.md`.
+
+## How to run — quick recipe
+
+Ensure `create_report` is on PATH (`pip install -U 'igv-reports>=1.16.0'`).
+If you use a conda env, activate it first.
+
+Then call the bundled driver script (paths relative to the installed plugin):
+
+```bash
+python scripts/build_igvreports.py \
+    --sites results/run/inputs/sites.hg38.bed \
+    --bam tumor.bam normal.bam \
+    --vcf calls.vcf \
+    --genome hg38 \
+    --fasta /path/to/hg38.fa \
+    --no-default-tracks \
+    --output results/run/reports/cohort.hg38.html
+```
+
+The driver:
+- Resolves the genome's CpG / gencode / rmsk paths from `$IGV_REPORTS_DB_CONFIG`
+  if set, skipping any that aren't configured for the chosen genome.
+- Validates the sites BED is headerless and that all rows have `start < end`.
+- Calls `create_report` with `--flanking 300 --standalone`.
+- Writes a logs/ entry capturing the full command, the flanking value, the
+  per-region embedded data sizes, and the resolved track list — useful for
+  reproducibility and audit-trail expectations.
+
+For multi-sample cohorts, use `--samplesheet samplesheet.tsv` instead of
+`--bam/--vcf`. Samplesheet format: `sample, bam_tumor, bam_normal, vcf, sites_bed`.
+The driver emits one HTML per sample plus a top-level `index.html` that lists
+all samples with links. Pass `--jobs N` to build the per-sample HTMLs in
+parallel via `ThreadPoolExecutor` (each `create_report` call is I/O-bound on
+BAM slicing, so threading scales well; `--jobs 6` for a 6-patient cohort
+roughly 1/Nx wall-clock vs sequential). Default is `--jobs 1`. Layout matches
+the ATLL viral-integration reference implementation:
+
+```
+results/<run>/
+├── inputs/<sample>/sites.<genome>.bed
+├── reports/<sample>.<genome>.html
+├── reports/index.html
+└── logs/run_<timestamp>.log
+```
+
+## prep-track — fixing a non-bgzip track
+
+If a GFF3/GTF/BED.gz is plain-gzip rather than bgzip, igv-reports fails
+silently or with an obscure error. Two modes:
+
+**In-place** (with `.bak.original_gzip` backup) — replaces the original:
+
+```bash
+bash scripts/prep_track.sh /path/to/track.gff3.gz
+```
+
+**Sibling file** (non-destructive — original untouched) — write the
+bgzipped+indexed track to a new path. Use this when other pipelines point
+at the original `.gff3.gz` and you can't risk a brief window where the
+file is replaced:
+
+```bash
+bash scripts/prep_track.sh /path/to/track.gff3.gz \
+    --out /path/to/track.bgz.gff3.gz
+```
+
+The script (both modes):
+1. Backs up the original to `<name>.bak.original_gzip` (in-place mode only).
+2. `gunzip -c`s the file.
+3. Sorts by `chr` then numeric `pos` (`sort -k1,1 -k4,4n`).
+   (Gencode delivers records interleaved by feature type at the same locus —
+   tabix requires pos-sorted.)
+4. `bgzip`s to target.
+5. `tabix -p <gff|gtf|bed>`s.
+6. Verifies a sample tabix query returns rows.
+
+Requires `bgzip` and `tabix` from htslib on PATH.
+
+**Diagnostic** — `file <name>` for distinguishing the two formats:
+- Plain gzip: `gzip compressed data, from Unix, original size <N>`
+- bgzip:      `Blocked GNU Zip Format (BGZF; gzipped file with extra field)`
+
+The `extra field` keyword is the bgzip giveaway.
+
+## When generating an answer.md / run.sh for the user
+
+The driver script (`build_igvreports.py`) deliberately abstracts the
+underlying `create_report` flags — it sets `--standalone`, `--fasta`, the
+`--flanking 300` default, and the YAML-resolved annotation tracks
+internally so the user doesn't have to remember them. That abstraction is
+good for ergonomics but bad for auditability: a reviewer reading the
+`answer.md` later can't see what flags are actually being invoked without
+opening the driver source.
+
+To keep both: when you produce a runnable command for the user, **also
+include a code block titled "Equivalent direct create_report invocation"
+that shows the fully-expanded command** with all flags and resolved track
+paths inline. The user should see the wrapper command they're going to
+run AND the underlying command it expands to. Example:
+
+````
+## Run
+
+```bash
+python build_igvreports.py --genome mm10 --sites peaks.mm10.bed \\
+    --bam ./data/ip.bam ./data/input.bam \\
+    --output reports/peaks_qc.mm10.html
+```
+
+### Equivalent direct create_report invocation
+
+```bash
+create_report peaks.mm10.bed \\
+    --fasta /path/to/mm10.fa \\
+    --flanking 300 --standalone \\
+    --tracks ./data/ip.bam ./data/input.bam \\
+        /path/to/mm10_CpGIslands.bed \\
+        /path/to/gencode.vM25.annotation.gtf.gz \\
+        /path/to/rmsk_all_repeats_mm10.bed.gz \\
+    --title "ChIP-seq peak QC (mm10) — IP vs Input" \\
+    --output reports/peaks_qc.mm10.html
+```
+````
+
+This costs you ~10 lines and gives the reviewer a full audit trail. For
+cohort runs, show the expanded form for ONE representative sample only —
+the others differ only in BAM/VCF paths.
+
+## Post-render verification
+
+`scripts/verify_report.py` parses a built HTML and confirms it actually
+contains what its inputs declared. Six checks: `html_exists`,
+`html_min_size`, `region_count` (tableJson rows == sites BED rows),
+`region_coords` (each BED row finds a matching `(chrom, start+1, end[, name])`
+in tableJson — BED is 0-based, the HTML stores 1-based start), `region_sessions`
+(sessionDictionary has one entry per row), and `tracks_present` (every
+`name` from `--track-config` or every basename from positional `--tracks`
+appears in the decoded igv.js session's `tracks[].name` list).
+
+```bash
+python scripts/verify_report.py \
+    --html         results/<run>/reports/sample.hg38.html \
+    --sites        results/<run>/inputs/sites.hg38.bed \
+    --track-config results/<run>/inputs/tracks.json \
+    --min-size-mb  1.0 \
+    --out          results/<run>/reports/sample.verify.tsv \
+    --fail-on-fail
+```
+
+Output is a TSV with columns `check / status / observed / expected / details`
+(also printed to stdout). With `--fail-on-fail`, exits nonzero if any check
+is FAIL — wire this into Snakemake / CI so the pipeline gates on render
+quality, not just on `create_report`'s exit code.
+
+NOTE: `--standalone` replaces every track URL with an inlined `data:` URL
+after slicing, so URL paths are unrecoverable from the embedded session.
+The check matches on track NAMES (which `--standalone` preserves) — for
+`--track-config` JSON pass meaningful names; positional `--tracks` mode
+uses basenames.
+
+### Cohort-level verification (`verify_cohort.py`)
+
+The per-sample verifier above confirms each HTML is internally consistent
+but cannot tell whether sample-1's HTML accidentally embeds sample-2's BAM
+(e.g., samplesheet typo, copy-paste, tumor/normal slot swap). For cohort
+runs, `scripts/verify_cohort.py` adds five cross-sample checks:
+
+| Check | What it asserts |
+|---|---|
+| `cohort_html_coverage` (global) | Each samplesheet row has exactly one HTML; flags missing + extras |
+| `sample_tracks_match` (per-sample) | Each HTML's session contains every BAM/VCF basename declared in THAT row |
+| `no_cross_sample_contamination` (per-sample) | Each HTML contains no basename that belongs to a DIFFERENT row's track columns (default tracks from `databases_config.yaml` are allow-listed) |
+| `sample_id_embedded` (per-sample) | The `sample` column value appears in the HTML's `<title>` or filename |
+| `index_consistency` (global) | `index.html` links exactly the samplesheet sample set; each target exists and is non-empty |
+
+**Auto-invoked by default** at the end of `build_igvreports.py --samplesheet`
+cohort runs. Disable with `--no-verify`; gate the pipeline with
+`--fail-on-fail`. Standalone invocation:
+
+```bash
+python scripts/verify_cohort.py \
+    --samplesheet samplesheet.tsv \
+    --reports-dir results/<run>/reports/ \
+    --genome hg38 \
+    --out results/<run>/reports/cohort_verify.tsv \
+    --summary results/<run>/reports/cohort_verify.summary.md \
+    --fail-on-fail
+```
+
+The TSV adds a `sample` column on top of the per-sample verify schema, with
+`"*"` for cohort-global rows. The markdown rollup (`--summary`) groups
+PASS/FAIL counts by check + lists every failure inline.
+
+Worked regression: `tests/integration/cohort_verify/scenarios.sh` builds a
+3-sample cohort and asserts each of four corruption scenarios (missing
+HTML, sample swap, index drift, truncated HTML) triggers the expected
+check FAILs.
+
+### Content verification (`verify_anchors.py`) — opt-in, slow
+
+`verify_cohort.py` proves the HTML *says* the right thing. It can NOT
+confirm the embedded BAM *slice* contains the data it claims to. Two
+failure modes slip past structural checks:
+
+1. **Sample swap with matching basename** — the cohort loop wired the wrong
+   BAM into `sample_1`'s build, but the swapped BAM's `Path.stem` happens
+   to match what `sample_1`'s row declared (or two files in different dirs
+   share a basename). Track name passes; slice content is wrong.
+2. **Silent empty slice** — region rendered, but the slice has 0 reads
+   (failed `samtools index`, source BAM corruption, coords outside coverage).
+
+`scripts/verify_anchors.py` closes the gap by re-running `samtools view -c`
+against both the source BAM (at generate time) and the embedded slice (at
+verify time), then comparing counts. Two-mode workflow:
+
+```bash
+# 1. After the cohort renders cleanly, freeze the read counts as a regression fixture.
+python scripts/verify_anchors.py generate \
+    --samplesheet samplesheet.tsv \
+    --sites sites.hg38.bed \
+    --out anchors.hg38.tsv
+
+# 2. Re-verify any time after — works against a fresh build of the same inputs,
+#    or to audit an existing HTML for unexpected content drift.
+python scripts/verify_anchors.py verify-cohort \
+    --samplesheet samplesheet.tsv \
+    --reports-dir results/<run>/reports/ \
+    --genome hg38 \
+    --anchors anchors.hg38.tsv \
+    --out results/<run>/reports/cohort_verify_anchors.tsv \
+    --fail-on-fail
+```
+
+Or chained into the build driver:
+
+```bash
+# Freeze anchors at build time:
+python scripts/build_igvreports.py --samplesheet ... --anchors-mode generate \
+    --anchors anchors.hg38.tsv
+
+# Verify a later build against frozen anchors:
+python scripts/build_igvreports.py --samplesheet ... --anchors-mode verify \
+    --anchors anchors.hg38.tsv --fail-on-fail
+```
+
+Anchors TSV schema (`#`-prefixed header per lab BED convention):
+
+```
+#sample	track_name	track_type	chrom	start	end	expected	tolerance	min	max	notes
+```
+
+`track_type` is one of:
+- `bam` — `expected` is the count from `samtools view -c -F 1536` against
+  the source BAM at generate time, and the same count against the
+  embedded BAM slice at verify time. Default when the column is absent
+  (backwards compat — pre-2026-05-19 anchor files keep working).
+- `bedgraph` — `expected` is the number of data rows in the source
+  bedGraph overlapping the region (CpG count for methylation data,
+  peak count for ChIP coverage). Verify-time count comes from the
+  wig/bedGraph slice embedded by igv-reports in the HTML — gzip-decoded
+  in-memory, no samtools needed.
+
+bedGraph tracks come from the samplesheet's `extra_tracks` column.
+Anchors for them are generated automatically alongside BAM anchors when
+you run `verify_anchors.py generate` against a samplesheet that includes
+bedGraph entries (e.g. `*.5mC.bedgraph`, `*.5hmC.bg`, plain or `.gz`).
+
+`tolerance` is a ratio (default 5%). `min`/`max` are absolute bounds that
+override tolerance when set — useful for known-positive sites like
+"this integration must have ≥20 reads" or "this promoter must have ≥10 CpGs".
+
+samtools is resolved in this order: `--samtools-sif PATH` → `$SAMTOOLS_SIF`
+→ `$SAMTOOLS_SIF_DEFAULT` → PATH `samtools`. On HPC, prefer a SIF to avoid
+the NFS conda cold-start tax. bedGraph anchors don't require samtools.
+
+**Why this matters for methylation viewers**: the silent-failure mode for
+methylation reports is "region rendered, slice has 0 CpGs" — an empty
+bedGraph slice because the source had no calls in that window, or
+because the slice extraction silently dropped them. Pure structural
+verification confirms the bedGraph track is in the HTML but can't tell
+whether it's empty. The bedgraph-anchor mode closes this gap.
+
+**Why opt-in and not default:** the verify step shells out to samtools per
+(sample × region) and indexes each slice — ~1 s/anchor. For a 6-sample
+cohort × 50 regions that's ~5 min on top of the structural verify (which
+runs in seconds). Reach for this when sample swap or content regression
+is a real concern; the structural verifier is sufficient for routine builds.
+
+Worked regression: `tests/integration/anchor_verify/scenarios.sh` builds a
+2-sample cohort and asserts each of four content scenarios (tolerance
+violation, min-bound violation, corrupted slice, missing anchor) triggers
+the expected PASS / FAIL / SKIP outcome.
+
+## Output and workflow logging
+
+Every run logs to `logs/run_<YYYYMMDD_HHMMSS>.log` next to the reports dir.
+The log captures:
+- Resolved track paths (per genome, after databases_config.yaml lookup).
+- The exact `create_report` command.
+- The flanking value used (default **300 bp** — this is the value that's
+  baked into all the embedded data slices, so audit trails depend on it).
+- Per-region embedded data sizes (extracted post-render so the user can
+  see which regions inflated the HTML).
+- Total HTML size.
+
+This satisfies CLAUDE.md §"Logging and Audit Trail" — every run is
+reproducible from the log alone.
+
+## Track choice nuances
+
+For gencode on hg38, the default points at
+`gencode.v47.annotation.gff3.gz` (full annotation, bgzip + tabix). This
+gives transcript models with exons / CDS / UTRs. The gene-level-only
+companion (`gencode.v47.genes.annotation.sorted.gff3.gz`) renders only
+solid gene boxes and is fine for high-zoom views, but the full annotation
+is the right default for read-level inspection at integration / fusion /
+SV junctions.
+
+For mouse genomes, `databases_config.yaml` ships `.gtf.gz` paths instead.
+GTFs work in igv-reports if bgzip + tabix-indexed; **prep-track** converts
+plain-gzip GTFs the same way it does GFF3s.
+
+For T2T-CHM13, only the FASTA + GTF + CGI are indexed in our DB; rmsk is
+absent and is auto-skipped by the driver. The variant table will load
+without rmsk; flag this in the run log.
+
+## Common-case examples
+
+The `examples/` directory has runnable templates:
+
+- `single_sample.sh` — one BAM + one VCF + a sites BED → one HTML.
+- `cohort_samplesheet.sh` — TSV-driven multi-sample run.
+- `prep_track_demo.sh` — convert a plain-gzip gencode to bgzip+tabix.
+- `methylation_ont/` — ONT 5mC/5hmC viewer (BAM with `colorBy: basemod2`
+  + per-sample bedGraph at fixed y-axis 0..100). End-to-end worked
+  example with pre-sliced data; recipe.md explains the slots.
+
+These are reference implementations; copy and edit them for new runs
+rather than starting from scratch.
+
+## Tests
+
+Three-layer suite under `tests/`, orchestrated by `tests/run_all.sh`:
+
+| Layer | What it covers | Runtime | Needs |
+|---|---|---|---|
+| **unit** (`tests/unit/`) | parser layer of `verify_report.py` + `verify_anchors.py` — TSV loading, status decision, session-entry locator, balanced-brace JSON extractor, decode round-trip — all with synthetic inputs | ~1 s | pytest |
+| **smoke** (`tests/smoke/`) | `samtools_count` / `samtools_index` / full slice-decode-and-count round-trip against the committed `tests/fixtures/tiny_colo829.hg38.bam` (457 KB, sliced from public ONT COLO829 release) | ~3 s | pytest + samtools (SIF or PATH) |
+| **integration** (`tests/integration/`) | end-to-end: build a 2-/3-sample cohort, structural verify, anchor verify, run 4 corruption scenarios per verifier | ~7 min cold, ~30 s cached | full cohort BAMs (lab default OR `IGV_REPORTS_TEST_BAM_{1,2,3}` env override). SKIPs with exit 77 if neither is available |
+
+```bash
+bash tests/run_all.sh                  # all three layers
+bash tests/run_all.sh --unit-only      # ~1 s — fastest feedback loop
+bash tests/run_all.sh --no-integration # ~12 s — works on any machine
+bash tests/run_all.sh --integration-only
+```
+
+The fixture provenance + regeneration recipe live in
+[tests/fixtures/README.md](tests/fixtures/README.md). Anchor counts the
+smoke layer expects (chr2=5, chr7=9) are the contract — any fixture
+regeneration that changes them must also update the smoke test constants.
+
+## ONT methylation viewers (specialized path)
+
+For per-read 5mC/5hmC visualization the positional `--tracks` API does
+not work — you need named tracks with `colorBy: "basemod2"` on the BAMs
+and `min: 0, max: 100` on the bedGraph tracks (cross-sample y-axis lock,
+see `rules/igv.md`). Use the `--track-config <json>` passthrough:
+
+```bash
+# 1. Write a YAML spec listing samples (see tracks_spec.example.yaml).
+# 2. Generate tracks.json with the right defaults baked in:
+python scripts/generate_tracks_json.py \
+    --spec tracks_spec.yaml --run-dir results/<run>/ \
+    --out results/<run>/tracks.json
+
+# 3. Build the report:
+python scripts/build_igvreports.py \
+    --sites results/<run>/sites.hg38.bed \
+    --track-config results/<run>/tracks.json \
+    --genome hg38 --flanking 0 \
+    --type mutation --info-columns name \
+    --output results/<run>/methylation_report.hg38.html
+```
+
+### Annotation shortcuts in the YAML
+
+The default `--tracks` path (SV/variant viewers) auto-resolves CpG islands,
+gencode, and RepeatMasker from a databases YAML when you pass
+`--genome hg38`. On the `--track-config` (methylation) path you used to
+have to hand-paste those paths into the YAML. As of the methylation-polish
+round, you can use a `default:` shortcut for the same resolution:
+
+```yaml
+genome: hg38
+
+annotation:
+  # SHORTCUT — resolved from the databases YAML for the genome above.
+  # Gets an Okabe-Ito color + sensible displayMode you can override per entry.
+  - default: gencode
+  - default: cgi
+  - default: repmasker
+  - default: epdnew_coding         # hg38 only
+  - default: epdnew_noncoding      # hg38 only
+
+  # Mix with EXPLICIT entries when needed (e.g. a pre-sliced custom track):
+  - name: "My custom peak set"
+    url: peaks/promoter_slices.bed
+    format: bed
+```
+
+Valid `default:` keys: `cgi`, `gencode`, `repmasker`, `epdnew_coding`,
+`epdnew_noncoding`. Mixing both forms is supported; order is preserved.
+Override the canned `name`/`color`/`displayMode` per entry by adding the
+field alongside `default:`. The shortcut needs a top-level `genome:` in
+the spec, plus a databases YAML on `--db-config PATH` or
+`$IGV_REPORTS_DB_CONFIG` (see `references/databases_config_paths.md` for
+the schema).
+
+Key methylation-specific defaults:
+- `--flanking 0` (sites BED already encodes the window — promoter/gene span).
+- `--info-columns name` (surface the BED `name` column in the variant table).
+- `--type mutation` (one-locus view per row; not split-screen).
+- bedGraph not bigwig — igv-reports cannot slice `.bw` directly.
+
+When `--track-config` is set the driver bypasses the auto-resolved
+default annotation tracks (CGI / gencode / rmsk) and the `--bam` /
+`--vcf` / `--extra-track` flags — the JSON is the source of truth.
+Build annotation slices into the JSON instead.
+
+**`--apptainer` is auto-detected**: the driver flips to the apptainer SIF
+pointed to by `$IGV_REPORTS_SIF` (igv-reports 1.16.0, ~83 MB, pulled from
+the Galaxy depot) when `SLURM_JOB_ID` is in the environment — i.e. running
+on a compute node where the NFS conda cold-start tax matters. On the login
+node or when `$IGV_REPORTS_SIF` is unset, the driver uses PATH
+`create_report`. Override with `--apptainer` / `--no-apptainer`; the
+decision lands in the run log.
+
+Full recipe and rationale: `references/methylation_ont.md`. Worked
+example with real data: `examples/methylation_ont/`.
+
+## Exporting HTML and PNG side-by-side (`--also-png`)
+
+The HTML report is the deep-dive view; sometimes you also need static
+PNGs you can email, drop in a Slack channel, or paste into slides. The
+driver's `--also-png` flag invokes the sister `igver` tool against the
+**same sites BED and same track list** that drove `create_report`, so
+both artifacts cover identical regions with matching content.
+
+```bash
+python scripts/build_igvreports.py \
+    --samplesheet samplesheet.tsv \
+    --genome hg38 \
+    --output-dir results/run/reports/ \
+    --jobs 6 \
+    --also-png \
+    --png-dpi 600 --png-display-mode collapse
+```
+
+Output layout per sample:
+
+```
+results/run/reports/
+├── <sample>.hg38.html              # interactive
+├── png_<sample>.hg38/
+│   ├── igver_regions.bed           # flanked BED with UIDs (igver -r)
+│   ├── igver_input.txt             # track paths, one per line (igver -i)
+│   ├── manifest.tsv                # bridge: BED row ↔ PNG ↔ HTML row
+│   └── png/
+│       ├── chr1-100-500.alpha.png  # one PNG per region
+│       └── chr2-0-700.beta.png
+└── index.html
+```
+
+### How consistency is guaranteed — five levers
+
+1. **Single sites BED with `--flanking` baked in.** The driver writes
+   `igver_regions.bed` with `start − flanking` and `end + flanking`
+   already applied (clamped to 0 on the low side); igver sees the same
+   coordinates create_report's igv.js viewer slices to.
+2. **Single resolved track list.** On the default (positional) path the
+   exact `[BAMs, VCF, extras, defaults]` list passed to `create_report` is
+   also written to `igver_input.txt`. On the `--track-config` path the
+   local-path `url:` entries from the JSON are extracted (http(s) URLs are
+   skipped — igver can't consume them).
+3. **Matched display mode.** Default is `--png-display-mode collapse` to
+   line up with the HTML's `BAM_DEFAULTS displayMode: COLLAPSED`. Override
+   to `expand` for per-read SV inspection on both artifacts.
+4. **UID-based filenames.** The BED's `name` column (auto-assigned
+   `region_<idx>` when missing) becomes both the HTML table label (via
+   `--info-columns name`) and the PNG filename suffix
+   (`<chr-start-end>.<uid>.png`). A user finds the same region in either
+   artifact by the same string.
+5. **`manifest.tsv` audit trail.** Per-sample TSV with columns:
+   `bed_row_idx, uid, chrom, start_orig, end_orig, start_flanked,
+   end_flanked, region, png_path, html_path, html_table_row`. One row
+   per region in BED order. `verify_cohort.py` reads this to run three
+   PNG-side checks (count matches, exist + non-empty, html-row contiguity).
+
+### Resolution of the `igver` invocation
+
+Order, first match wins:
+1. `--igver-cmd '...'` (split on whitespace — supports `apptainer exec ... igver`).
+2. `$IGVER_CMD` env var (same shape).
+3. `igver` on PATH.
+
+If none resolve, the build exits before invoking create_report so you
+don't pay the HTML cost before finding out PNGs are unavailable. Install
+with `pip install igver` or pull a pre-built SIF and point `--igver-cmd`
+at it.
+
+### Methylation caveat (bigwig vs bedGraph)
+
+The HTML methylation path uses **bedGraph** tracks (igv.js consumes
+those directly); igver's per-read methylation view uses **BAMs** with
+`--color-by BASE_MODIFICATION`, and igver's cross-sample comparison view
+uses **bigwig** tracks. Content can be made identical only if both
+formats trace back to the same `modkit pileup` output (`modkit bedmethyl
+tobigwig` of the same bedGraph). The driver's `--also-png` passes the
+JSON's `url:` entries through verbatim, so if your YAML lists bedGraphs
+they'll go to igver as-is — igver will render them but the result may
+look different from the HTML's color-coded per-read view. For
+publication-quality methylation PNGs, supply a parallel `tracks.json`
+that lists bigwigs and run `igver` separately.
+
+For SV/variant viewers this caveat doesn't apply — both render the
+identical BAMs and the result is content-equivalent.
+
+### Cross-artifact verification
+
+The driver runs an **inline existence check** right after igver returns:
+walks each expected PNG path (`<chr>-<start>-<end>.<uid>.<ext>` derived
+from the manifest) and fails the build with an actionable message if
+any are missing or zero-byte. This catches igver's documented
+silent-exit-0 failure mode (egg-link install without the IGV Java
+binary) — `proc.returncode != 0` alone misses it.
+
+In addition, `verify_cohort.py` then runs three checks per sample:
+
+| Check | Catches |
+|---|---|
+| `png_count_matches_bed` | partial igver run (SIGKILL mid-batch), stale manifest from a previous build, filename collisions |
+| `pngs_exist_and_nonempty` | empty IGV screenshots (< 10 KB threshold; useful screenshots are typically ≥ 50 KB) |
+| `png_html_row_alignment` | manifest rows referencing a different HTML, html_table_row not contiguous 1..N |
+
+`--png-min-size-kb 5.0` lowers the threshold if you have legitimate
+no-data regions where igver produces a near-empty PNG.
+
+## See also
+
+- `references/best_practices.md` — full create_report flag reference,
+  format gotchas, performance notes. Read this if a run fails in a way
+  not listed in the Pitfalls table above.
+- `references/databases_config_paths.md` — per-genome track availability
+  matrix and exact YAML keys. Read this when adding a new genome or
+  diagnosing a missing-track warning.
+- `references/methylation_ont.md` — ONT 5mC/5hmC cheat-sheet (colorBy,
+  min:0/max:100, flanking=0, bedGraph vs bigwig, EPDnew lookup).
+- `scripts/build_igvreports.py` — the driver. Reads `--samplesheet` or
+  `--bam/--vcf` direct-args, resolves tracks, validates the sites BED,
+  writes the HTMLs and the run log. Supports `--track-config <json>`
+  passthrough for fully-styled track sets.
+- `scripts/generate_tracks_json.py` — YAML spec → tracks.json with
+  ONT-methylation defaults baked in (colorBy=basemod2, min:0/max:100,
+  group-paired Okabe-Ito colors).
+- `scripts/verify_report.py` — post-render structural verifier; parses
+  the HTML's embedded tableJson + sessionDictionary, confirms region
+  count / coordinates / track names match the inputs. Emits a verify.tsv
+  and gates on `--fail-on-fail`.
+- `scripts/verify_cohort.py` — cohort-level verifier; layered on top of
+  verify_report's per-sample checks, adds cross-sample contamination
+  scanning + index.html / sample-id consistency. Auto-invoked at the end
+  of `build_igvreports.py --samplesheet`; standalone-runnable too.
+- `scripts/verify_anchors.py` — content verifier; samtools-counts the
+  embedded BAM slices and compares to anchors frozen from the source BAMs
+  at build time. Catches sample swaps that share basenames and silent
+  empty slices. Opt-in via `--anchors-mode generate|verify` on the build
+  driver; slow (~1 s/anchor). See SKILL.md content-verification section.
+- `scripts/prep_track.sh` — gunzip → sort → bgzip → tabix utility.
+- `igv-screenshots` skill — the **static PNG/PDF/SVG** counterpart based
+  on igver. Use it instead of this one when the deliverable is a
+  publication-quality figure rather than a clickable viewer.
+- Upstream development: https://github.com/sahuno/igv-reports-skill
+  — file issues there for skill-level bugs; file issues at
+  https://github.com/igvteam/igv-reports for `create_report` rendering bugs.
diff --git a/igv-reports/examples/portable/README.md b/igv-reports/examples/portable/README.md
new file mode 100644
index 0000000..ef14a21
--- /dev/null
+++ b/igv-reports/examples/portable/README.md
@@ -0,0 +1,20 @@
+# examples/portable
+
+Reference invocations using only paths and tools you control (no lab
+`databases_config.yaml`. Each script accepts environment-variable
+overrides for input paths, with `${HOME}/data/...` defaults you can edit
+in-place or override at call time:
+
+```bash
+FASTA=/path/to/hg38.fa TUMOR_BAM=/path/to/tumor.bam \
+    bash examples/portable/single_sample.sh
+```
+
+| Script | What it does |
+|---|---|
+| `single_sample.sh` | Builds one HTML for a tumor/normal pair at two SNV sites |
+| `cohort_samplesheet.sh` | Builds per-sample HTMLs + index.html from a 2-row samplesheet |
+
+For more advanced examples (cohort orchestration, ONT methylation viewer
+presets), see the upstream development repo at
+https://github.com/sahuno/igv-reports-skill.
diff --git a/igv-reports/examples/portable/cohort_samplesheet.sh b/igv-reports/examples/portable/cohort_samplesheet.sh
new file mode 100644
index 0000000..5530f05
--- /dev/null
+++ b/igv-reports/examples/portable/cohort_samplesheet.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# examples/portable/cohort_samplesheet.sh — generic cohort build.
+#
+# Builds one HTML per row of a TSV samplesheet, plus an index.html linking
+# them all. Demonstrates the samplesheet format and the most common flags.
+
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+WORKDIR="${WORKDIR:-${PWD}/igv_reports_cohort_demo}"
+mkdir -p "$WORKDIR" && cd "$WORKDIR"
+
+# --- inputs (edit these) ---
+FASTA="${FASTA:-${HOME}/data/hg38/hg38.fa}"
+GENCODE_GFF="${GENCODE_GFF:-${HOME}/data/hg38/gencode.v47.annotation.gff3.gz}"
+
+# --- samplesheet (one row per sample) ---
+# Required columns: sample, sites_bed
+# Optional columns: bam_tumor, bam_normal, vcf, extra_tracks (comma-separated)
+cat > cohort.tsv <<EOF
+sample	bam_tumor	bam_normal	vcf	sites_bed
+p001	${HOME}/data/p001/tumor.bam	${HOME}/data/p001/normal.bam		sites.hg38.bed
+p002	${HOME}/data/p002/tumor.bam	${HOME}/data/p002/normal.bam		sites.hg38.bed
+EOF
+
+# --- shared sites for both patients ---
+cat > sites.hg38.bed <<'EOF'
+#chrom	start	end	name
+chr2	25246499	25246500	DNMT3A_R882
+chr7	148884000	148884001	EZH2_Y646
+EOF
+
+python "${REPO_ROOT}/scripts/build_igvreports.py" \
+    --genome hg38 \
+    --samplesheet cohort.tsv \
+    --fasta "${FASTA}" \
+    --no-default-tracks \
+    --extra-track "${GENCODE_GFF}" \
+    --output-dir reports \
+    --no-apptainer
+
+echo "Done. Open ${WORKDIR}/reports/index.html in a browser."
+echo "Cohort verifier ran automatically; see reports/cohort_verify.summary.md."
diff --git a/igv-reports/examples/portable/single_sample.sh b/igv-reports/examples/portable/single_sample.sh
new file mode 100644
index 0000000..7296b48
--- /dev/null
+++ b/igv-reports/examples/portable/single_sample.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# examples/portable/single_sample.sh — generic single-sample build.
+#
+# Builds one HTML for a tumor/normal pair at a handful of SNV sites.
+# Assumes:
+#   - `pip install igv-reports` has put `create_report` on PATH.
+#   - You have your own hg38 FASTA (with .fai sibling) and BAMs.
+#   - You have your own gencode + CpG-islands track files (or skip them
+#     with --no-default-tracks alone).
+#
+# Set these to match your environment before running.
+
+set -euo pipefail
+
+REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+WORKDIR="${WORKDIR:-${PWD}/igv_reports_demo}"
+mkdir -p "$WORKDIR" && cd "$WORKDIR"
+
+# --- inputs (edit these) ---
+FASTA="${FASTA:-${HOME}/data/hg38/hg38.fa}"          # must have ${FASTA}.fai
+TUMOR_BAM="${TUMOR_BAM:-${HOME}/data/tumor.bam}"
+NORMAL_BAM="${NORMAL_BAM:-${HOME}/data/normal.bam}"
+GENCODE_GFF="${GENCODE_GFF:-${HOME}/data/hg38/gencode.v47.annotation.gff3.gz}"  # bgzip+tabix
+CPG_ISLANDS="${CPG_ISLANDS:-${HOME}/data/hg38/hg38_CpGIslands.bed}"
+
+# --- sites BED (4 cols: chrom, start, end, name) ---
+cat > sites.hg38.bed <<'EOF'
+#chrom	start	end	name
+chr2	25246499	25246500	DNMT3A_R882
+chr7	148884000	148884001	EZH2_Y646
+EOF
+
+python "${REPO_ROOT}/scripts/build_igvreports.py" \
+    --genome hg38 \
+    --sites sites.hg38.bed \
+    --bam "${TUMOR_BAM}" "${NORMAL_BAM}" \
+    --fasta "${FASTA}" \
+    --no-default-tracks \
+    --extra-track "${GENCODE_GFF}" \
+    --extra-track "${CPG_ISLANDS}" \
+    --info-columns name \
+    --output report.hg38.html \
+    --no-apptainer
+
+echo "Done. Open ${WORKDIR}/report.hg38.html in a browser."
diff --git a/igv-reports/references/best_practices.md b/igv-reports/references/best_practices.md
new file mode 100644
index 0000000..4fe5c64
--- /dev/null
+++ b/igv-reports/references/best_practices.md
@@ -0,0 +1,158 @@
+# igv-reports best practices
+
+Authoritative companion to the skill. Read this when something fails in a
+way the SKILL.md pitfalls table doesn't cover, or when introducing a new
+input format / track type.
+
+## Sites/regions input
+
+Supported by `create_report`:
+- **VCF** — variant table is built from CHROM/POS/ID/REF/ALT plus any
+  `--info-columns` you surface from INFO and `--sample-columns` from
+  FORMAT. Use `--idlink "https://url/$$"` to make ID a clickable link.
+- **BED** — fields parsed by position: `chr / start / end [/ name]`.
+  A **non-comment header row** (e.g., `chrom start end name`) crashes
+  `create_report` with `ValueError: invalid literal for int()` because
+  the parser tries to `int()` the string `start`. A `#`-prefixed comment
+  header (e.g., `#chrom\tstart\tend\tname`) IS accepted — `create_report`
+  skips lines starting with `#`. This matches the lab's "BED-like outputs
+  must have a `#`-prefixed header" convention in CLAUDE.md.
+- **MAF** — Mutation Annotation Format (TCGA standard).
+- **BEDPE** — paired-end / fusion / SV format. With `--type fusion` each
+  row is rendered as a multi-locus split-screen view.
+- **Generic TSV** — any tab-delimited file. Requires `--sequence`,
+  `--begin`, `--end` to name the chrom/start/end columns. Add
+  `--zero_based` if 0-based.
+
+**File-extension dispatch**: igv-reports picks the parser by extension,
+not content. `.bed` → BED parser (which IGNORES `--sequence/--begin/--end`).
+If you want a TSV-with-header parsed by name, the extension must NOT be
+`.bed`/`.vcf`/`.gff3`/`.maf` — use `.tsv` or `.txt`.
+
+The project's `enforce-genome-tag.sh` hook requires a genome tag in the
+filename: `sites.hg38.bed`, not `sites.bed`.
+
+## Tracks
+
+Supported track formats: BAM, CRAM, VCF, BED, GFF3, GTF, WIG, BEDGRAPH.
+
+**Indexing**:
+- BAM/CRAM/VCF MUST be indexed (`.bai`/`.crai`/`.tbi` sidecar).
+- Large `.bed.gz` / `.gff3.gz` / `.gtf.gz` SHOULD be tabix-indexed
+  (`.tbi` sidecar) and **must be true bgzip** — not plain gzip.
+- Check format with `file <name>` — true bgzip says
+  `gzip compressed data, extra field, original size 0`. Plain gzip
+  has no "extra field". igv-reports trips on plain-gzip .gff3.gz with
+  cryptic `UnicodeDecodeError: byte 0x8b at position 1` — that 0x8b is
+  the gzip magic byte the parser is reading as text.
+
+**Sortedness**: gencode and many other GFF/GTF distributions interleave
+records by feature type at the same locus (gene → transcript → exon → CDS →
+exon → CDS → ...) rather than strictly position-sorted within each
+chromosome. tabix requires pos-sorted within chr. Fix:
+`sort -k1,1 -k4,4n` on the body, then bgzip + tabix. The `prep-track`
+script in this skill does the full pipeline with backup.
+
+**Track render order**: the order you pass to `--tracks` is the order
+they appear in the IGV.js viewer (top-to-bottom). Convention:
+1. BAM/CRAM (the data you want to evaluate)
+2. VCF (the calls being inspected)
+3. Annotation tracks (genes, regulatory, repeats, CGI)
+
+The skill defaults always render annotation tracks LAST so they sit at
+the bottom and don't push the read evidence off-screen.
+
+## Reference
+
+One of `--fasta`, `--twobit`, or `--genome` is required.
+
+- `--fasta /path/to/local.fa` (with `.fai`) — fully offline, supports
+  custom or combined references (e.g., host + viral).
+- `--genome hg38` — uses igv.js bundled IDs, but **requires internet at
+  view AND render time** because igv.js fetches the bundled genome.
+  Avoid for HPC/offline.
+- `--twobit` — alternative reference in 2bit format.
+
+For combined viral+host references, the single FASTA must include all
+contigs, and any per-contig tracks must align (e.g., HTLV1_features.bed
+must use the same contig name as in the FASTA).
+
+## Window sizing
+
+`--flanking N` (igv-reports default 1000, this skill default **300**)
+adds N bp on either side of each site.
+
+| Use case | Recommended flanking |
+|---|---|
+| Point variants (SNV/indel) | 50–200 bp |
+| SV / integration breakpoints | 300–1000 bp (this skill: 300) |
+| Whole-gene context | gene length + 5–10 kb |
+
+`--maxlen N` (default 10,000) — variants exceeding this length switch to
+split-screen multilocus view automatically. Useful for SVs > 10 kb.
+
+`--window N` — initial visible window inside the embedded igv.js viewer
+(if not supplied, igv.js defaults to 41 bp, which is too narrow for
+read-level inspection). Set to ~`2 × flanking` so the user lands on the
+full embedded slice.
+
+## Output
+
+- `--standalone` embeds all igv.js JS in the HTML → fully offline,
+  4–11 MB per patient typical for cohort runs.
+- `--no-embed` keeps external URLs → smaller HTML but online required.
+  Avoid for HPC/sharing-by-email.
+
+Per-region BAM data is ALWAYS sliced and embedded by default; only the
+flanking-sized portion of large BAMs ships in the HTML — so the HTML stays
+manageable even when input BAMs are 100+ GB.
+
+## Variant table customization
+
+For VCF input:
+- `--info-columns SVTYPE SVLEN ALIGNED_POS DR DV VAF` surfaces those
+  INFO fields as table columns.
+- `--info-columns-prefixes ANN_ HTLV1_` includes any INFO field starting
+  with the listed prefixes.
+- `--sample-columns DP AD GT` (with optional `--samples NAME`) surfaces
+  per-sample FORMAT fields.
+- `--idlink "https://example.com/$$"` makes the VCF ID column clickable
+  with `$$` replaced by the ID value.
+
+Order of operations: include `--info-columns` for the call-quality fields
+your reviewer needs to see at a glance; the rest is one click into the
+variant detail.
+
+## Performance / size control
+
+- `--subsample 0.0-1.0` — keep a fraction of BAM alignments per region.
+  Use for very deep BAMs (>100×) where the rendered viewer would be
+  read-cluttered.
+- `--exclude-flags 1536` (default) — excludes duplicates and QC-fail
+  reads. Set to 0 to keep everything.
+- Render time scales roughly linearly with `n_regions × n_tracks`. The
+  ATLL cohort run (6 patients × 1–3 integrations + HTLV1 + EBV regions,
+  6 tracks) took ~2 min/patient with the gene-level GFF and ~3 min/patient
+  with the full annotation.
+
+## Pitfalls observed in production
+
+| Symptom | Root cause | Fix |
+|---|---|---|
+| `ValueError: invalid literal for int() with base 10: 'start'` | Non-comment header row in BED sites file | Prefix the header with `#` (skipped by create_report and matches lab convention); or strip it entirely |
+| `UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b` | igv-reports reading bgzip as text (file actually plain-gzip but with `.gz` ext) | Convert with prep-track; verify with `file <name>` |
+| `tabix: not BGZF` | Plain gzip masquerading as `.gz` | `gunzip → bgzip → tabix` |
+| `tabix: out of order` | GFF/GTF/BED records not pos-sorted within chr | `sort -k1,1 -k4,4n` first |
+| Empty annotation track in viewer | Tabix lookup returns nothing in window. Often correct biology (e.g., CGI-distal site) — verify with `tabix file region` |
+| Title shows weird characters | Unicode em-dash (`—`) in `--title` got mangled by shell escaping | Use plain ASCII `-` |
+| HTML loads but viewer is blank | `--genome hg38` without internet at view time | Use `--fasta` + `--standalone` |
+| `tabix` index missing for a track | igv-reports looked for `<track>.tbi`, not present | Re-run `tabix -p <gff|gtf|bed>` |
+| `samtools index` errors mid-render | BAM index stale (BAM modified after `.bai`) | `samtools index -@ 4 file.bam` |
+| Output HTML size much larger than expected | Some region accidentally spans Mb-scale (e.g., a row with start=0 end=chrom_length); flanking compounds this | Validate the sites BED — `awk '$3-$2 > 1e6'` to find offenders |
+
+## See also
+
+- Official docs: https://github.com/igvteam/igv-reports
+- igv.js track config schema: https://github.com/igvteam/igv.js/wiki/Tracks-2.0
+- This skill's `references/databases_config_paths.md` for which YAML keys
+  hold which tracks per genome.
diff --git a/igv-reports/references/databases_config_paths.md b/igv-reports/references/databases_config_paths.md
new file mode 100644
index 0000000..466d13f
--- /dev/null
+++ b/igv-reports/references/databases_config_paths.md
@@ -0,0 +1,70 @@
+# Databases-config YAML schema (for `--db-config` / `$IGV_REPORTS_DB_CONFIG`)
+
+Optional. Without a databases YAML the driver still works — pass `--fasta` and
+`--no-default-tracks` (plus any `--extra-track` you need) on every call.
+
+The YAML is convenient when running across many regions/cohorts on the same
+genome build: one file maps a short `--genome <id>` flag to the FASTA + the
+default annotation tracks (CpG islands, gencode, RepeatMasker), so each
+invocation stays short.
+
+## Schema
+
+```yaml
+reference_genomes:
+  local:
+    <genome_id>:
+      fasta:         <path>          # required
+      gtf:           <path>          # gencode .gtf.gz or .gff3.gz (bgzip + tabix preferred)
+      sizes:         <path>          # chrom.sizes (optional)
+      CpGIslands:    <path>          # .bed (uncompressed or bgzip)
+      repMaskerBed:  <path>          # .bed.gz (bgzip + tabix)
+```
+
+`<genome_id>` is the value you pass to `--genome`. Suggested IDs and aliases:
+
+| `--genome` value     | YAML key            | Common alias |
+|----------------------|---------------------|--------------|
+| `hg38`               | `hg38`              | GRCh38       |
+| `mm10`               | `mm10`              | GRCm38       |
+| `mm39`               | `mm39`              | GRCm39       |
+| `t2t` / `chm13`      | `t2t_CHM13v2_plusY` | T2T-CHM13v2  |
+| `grch37` / `hg19`    | `GRCh37`            | hg19         |
+
+The driver normalizes the input alias to the canonical YAML key. Extend
+`GENOME_ALIASES` in `scripts/build_igvreports.py` if you need additional builds.
+
+## Default-track resolution
+
+For the `--genome` you pass, the driver tries to load three default tracks:
+
+1. **CpG islands** → `CpGIslands` key
+2. **Gene annotation** → `gtf` key (prefers a sibling `*.gff3.gz` if present)
+3. **RepeatMasker** → `repMaskerBed` key
+
+Any track absent from the YAML for that genome is logged as a warning and
+skipped — the report still builds, just without that track.
+
+## Gencode preference: GFF3 over GTF
+
+If `gtf` points at `gencode.<version>.annotation.gtf.gz` and a sibling
+`gencode.<version>.annotation.gff3.gz` exists in the same directory, the
+driver prefers the GFF3 — it carries the full transcript / exon / CDS / UTR
+detail that's most useful for read-level inspection at SV / fusion / integration
+junctions. The GTF (gene-level) loads as a fallback.
+
+Override with `--gencode-from-yaml` to force the YAML's `gtf` path regardless.
+
+## EPDnew (methylation-specific)
+
+`EPDnewCoding` / `EPDnewNonCoding` keys (BED.gz, bgzip + tabix) are
+**not** auto-loaded — methylation-specific. Reference them explicitly via a
+`--track-config tracks.json` entry when building a methylation viewer (see
+`references/methylation_ont.md`).
+
+## Missing tracks → workflow
+
+1. Build or locate the BED / GFF3 / GTF.
+2. If it needs bgzip + tabix conversion, run `scripts/prep_track.sh <path>`.
+3. Add the path to your `databases_config.yaml` under the appropriate key, or
+   pass it via `--extra-track <path>` for a one-off run.
diff --git a/igv-reports/references/methylation_ont.md b/igv-reports/references/methylation_ont.md
new file mode 100644
index 0000000..8764546
--- /dev/null
+++ b/igv-reports/references/methylation_ont.md
@@ -0,0 +1,243 @@
+---
+name: methylation_ont
+genome: hg38 | mm10 | mm39 | t2t
+assay: ONT 5mC + 5hmC (CpG)
+worked_example: ../examples/methylation_ont/
+---
+
+# ONT methylation viewer — cheat-sheet
+
+Targeted reference for building an igv-reports HTML that shows per-read
+5mC/5hmC base-modification calls (BAM, basemod2 coloring) plus per-sample
+methylation-fraction bedGraph tracks at fixed promoter / gene / DMR windows.
+
+When this skill needs to build a methylation viewer, the **default path
+(positional `--tracks`) is wrong** — methylation viewers need named,
+colored, y-axis-locked tracks. The right path is:
+
+```bash
+build_igvreports.py --track-config tracks.json ...
+```
+
+with `tracks.json` either generated from a YAML spec (see worked example)
+or hand-written from `tracks.template.json`.
+
+## The four-thing checklist
+
+### 1. BAM tracks need `colorBy: "basemod2"`
+
+```json
+{
+  "name": "<sample>",
+  "url": "<bam>",
+  "indexURL": "<bam>.bai",
+  "format": "bam",
+  "type": "alignment",
+  "colorBy": "basemod2",
+  "showSoftClips": false,
+  "displayMode": "COLLAPSED"
+}
+```
+
+Without `colorBy: "basemod2"`, the BAM renders as plain alignments
+without the per-base 5mC/5hmC colors that are the whole point of the
+view. `displayMode: "COLLAPSED"` keeps the BAM panel short so the
+bedGraph summary tracks below stay visible.
+
+### 2. bedGraph tracks need fixed `min: 0, max: 100`
+
+```json
+{
+  "name": "<sample> 5mC",
+  "url": "<bedgraph>",
+  "format": "bedgraph",
+  "type": "wig",
+  "color": "rgb(0,68,136)",
+  "min": 0, "max": 100
+}
+```
+
+modkit's bedmethyl output is **percent (0..100)**, not fraction (0..1) —
+the y-axis ceiling must be 100. IGV's per-track autoscale defaults
+differ per track and hide real cross-sample differences (one sample
+might autoscale to 0..82, the next to 0..100; same bar height means
+different methylation). Lock all samples' bedGraph tracks to the same
+0..100 range. See `rules/igv.md` for the original incident.
+
+**Use bedGraph, not bigwig.** igv-reports' Python slicer (`utils.getreader`)
+dispatches on file extension and has no `.bw` reader — runs fail with
+`Exception: Unknown file format`. Pre-slice bigwigs over the report
+regions with `bigWigToBedGraph -chrom -start -end <bw> <bg>`, one
+output per region, then `cat >>` them into a single bedGraph (UCSC
+`bigWigToBedGraph` opens `/dev/stdout` with `O_TRUNC` between calls —
+piping multiple invocations loses everything but the last region).
+
+### 3. `--flanking 0` when sites encode the desired window
+
+For methylation viewers the sites BED almost always carries the desired
+window directly (a promoter span, a DMR, a gene body). Adding 300 bp of
+flanking adds nothing and shifts the initial viewer frame. Pass
+`--flanking 0` and let the BED row coordinates be the frame.
+
+The 300 bp default is right for the SV/integration breakpoint workflow
+this skill was extracted from — there the BED row is a one-base
+breakpoint and you need flanking to see read support.
+
+### 4. Sites BED with `#chrom\tstart\tend\tname` comment header is fine
+
+The skill's older docs say "headerless" because non-`#` header rows
+crash `create_report` with `ValueError: invalid literal for int()`.
+A line starting with `#` is treated as a comment and is fine — and
+matches CLAUDE.md's "BED-like outputs must have a `#`-prefixed
+header" rule. Use:
+
+```
+#chrom	start	end	name
+chr2	25246000	25259000	DNMT3A_2_promoter
+```
+
+Pair this with `--info-columns name` so the `name` column shows up in
+the report's variant table.
+
+## Track ordering
+
+Render order is top-to-bottom in the viewer; put annotation FIRST so
+gene tracks anchor the user's eye at the top, then per-sample BAM + 5mC
++ 5hmC triplets stacked below in sample-group order. The worked example
+follows: gencode → EPDnew → CpGIslands → RepeatMasker → (per-sample:
+BAM, 5mC, 5hmC).
+
+## Colors (Okabe-Ito, group-paired)
+
+For two-group studies (e.g., normal vs tumor) pick two color pairs out
+of the Okabe-Ito palette so groups are pre-attentively distinguishable:
+
+| Group  | 5mC color           | 5hmC color           |
+|--------|---------------------|----------------------|
+| Group A (normal) | `rgb(0,68,136)` blue | `rgb(204,121,167)` reddish-purple |
+| Group B (tumor)  | `rgb(213,94,0)` vermillion | `rgb(230,159,0)` orange |
+
+Annotation track colors (also Okabe-Ito): EPDnew = vermillion
+`rgb(213,94,0)`, CpG islands = bluish-green `rgb(0,158,115)`,
+RepeatMasker = sky-blue `rgb(86,180,233)`.
+
+`scripts/generate_tracks_json.py` reads these from a `group_colors:`
+map in the YAML spec, so a new group only needs one entry.
+
+## EPDnew promoter track (hg38)
+
+If your `databases_config.yaml` carries EPDnew for hg38, the suggested
+keys are:
+
+```yaml
+reference_genomes:
+  local:
+    hg38:
+      EPDnewCoding:    <path-to>/Hs_EPDnew.hg38.bed.gz
+      EPDnewNonCoding: <path-to>/HsNC_EPDnew.hg38.bed.gz
+```
+
+Source: <https://epd.expasy.org/epd/human/human_database.php?db=human>
+
+The skill driver doesn't load these by default — they're a methylation-
+specific track. Either reference them directly from `tracks.json` or add
+an `EPDnew` entry to a custom `annotation:` section in your YAML spec.
+mm10 / mm39 / t2t builds don't ship with EPDnew.
+
+## Reference-fasta vs `--genome hg38`
+
+Always pass `--fasta` (skill driver default), never `--genome hg38`.
+The igv.js bundled genome IDs require internet at view + render time;
+`--fasta` + `--standalone` produces a fully-offline HTML. See
+`references/best_practices.md` Reference section.
+
+## When to use the apptainer SIF (mostly automatic)
+
+The driver auto-detects whether to run via an apptainer SIF or PATH
+`create_report` based on `SLURM_JOB_ID` and `$IGV_REPORTS_SIF`:
+
+| Environment | Default | Why |
+|---|---|---|
+| Local / login node (`SLURM_JOB_ID` unset) | PATH `create_report` | No cold-start tax; simplest path. |
+| Compute node under SLURM (`SLURM_JOB_ID` set) AND `$IGV_REPORTS_SIF` points at an existing SIF | apptainer + SIF | Fresh node = cold NFS cache = 1-2 M page faults on conda init (~2.5 us each). The SIF reads once into RAM, then stays warm. |
+| Compute node BUT no SIF set/found | falls back to PATH `create_report` (logged) | Safe default; no surprise SIF-not-found error. |
+
+Override either way with `--apptainer` / `--no-apptainer`. The decision
+(auto vs. explicit) is logged at run start so post-mortems are unambiguous.
+
+To set up the SIF once, pull from the Galaxy depot:
+
+```bash
+export IGV_REPORTS_SIF=/path/to/igv-reports_1.16.0.sif
+wget -O "$IGV_REPORTS_SIF" \
+  'https://depot.galaxyproject.org/singularity/igv-reports:1.16.0--pyh7cba7a3_0'
+```
+
+**Mandatory `--cleanenv` for the SIF (driver handles it).** Host RHEL 8
+exports `SSL_CERT_FILE=/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`
+which doesn't exist inside the Galaxy-depot SIF. `create_report`'s
+standalone build path makes an HTTPS GET (likely for the IGV.js
+ideogram CDN) that crashes with `[SSL: CERTIFICATE_VERIFY_FAILED]`
+mid-render. The driver always invokes `singularity exec --cleanenv ...`
+to scrub host env vars before they enter the SIF, so users don't need
+to remember the flag. If you call create_report from the SIF directly
+(bypassing the driver), include `--cleanenv` yourself. See
+`rules/apptainer_env_leak.md` for the full pattern.
+
+## Worked example
+
+`../examples/methylation_ont/` is the canonical end-to-end run:
+- 4 COLO829 ONT samples (2 normal-blood × 2 tumor)
+- 2 promoter windows (DNMT3A_2 + EZH2)
+- 5mC + 5hmC bedGraph per sample (8 bedGraph files, pre-sliced)
+- gencode + EPDnew + CGI + rmsk annotation slices
+
+Run `bash examples/methylation_ont/build.sh` to regenerate the HTML;
+read `examples/methylation_ont/recipe.md` for the slot-by-slot guide
+to adapting it.
+
+## Post-render verification
+
+After building the HTML, run `scripts/verify_report.py` to confirm the
+embedded content matches your inputs (region count, coordinates, track
+names). For methylation viewers this catches the worst silent failure
+mode — a render that succeeded for the wrong samples — which the input-
+side validation alone can't catch.
+
+```bash
+python scripts/verify_report.py \
+    --html         methylation_report.hg38.html \
+    --sites        sites.hg38.bed \
+    --track-config tracks.json \
+    --min-size-mb  1.0 \
+    --out          methylation_report.verify.tsv \
+    --fail-on-fail
+```
+
+For `--track-config` builds the check uses the JSON's `name` fields; in
+the YAML spec consumed by `generate_tracks_json.py`, those names are the
+`name:` keys in `annotation:` and the auto-generated `<sample>`,
+`<sample> 5mC`, `<sample> 5hmC` labels per sample. Picking specific
+sample names in the YAML therefore drives the verifier's coverage —
+generic names like "sample1" weaken the check.
+
+**For cohort methylation runs** (multi-patient × per-sample HTMLs +
+`index.html`), the cohort verifier (`scripts/verify_cohort.py`) is the
+more relevant tool: it additionally catches sample-swap bugs (sample-2's
+BAMs accidentally ending up in sample-1's HTML), missing samples, and
+`index.html` drift. The methylation workflow is especially vulnerable to
+sample-swap typos because each patient has multiple ONT runs with similar-
+looking flowcell IDs (e.g., `PAU59807` vs `PAU61427`). Auto-invoked by
+`build_igvreports.py --samplesheet`; see SKILL.md "Cohort-level
+verification" for details.
+
+## Cross-references
+
+- `rules/igv.md` — bigwig-can't-be-sliced, y-axis-autoscale, UCSC
+  `/dev/stdout` truncation; the rules that motivate this cheat-sheet.
+- `rules/apptainer_vs_conda.md` — when the `--apptainer` flag pays off.
+- `references/best_practices.md` — generic create_report flag reference;
+  sites BED, tracks, reference, performance, pitfalls table.
+- `examples/methylation_ont/recipe.md` — full slot-by-slot example doc.
+- `CLAUDE.md` §3A — upstream ONT methylation pipeline (pod5 → dorado →
+  modkit pileup → bedGraph + bigwig).
diff --git a/igv-reports/scripts/build_igvreports.py b/igv-reports/scripts/build_igvreports.py
new file mode 100755
index 0000000..d3f5ff7
--- /dev/null
+++ b/igv-reports/scripts/build_igvreports.py
@@ -0,0 +1,1234 @@
+#!/usr/bin/env python3
+"""build_igvreports.py — generic driver for the igv-reports skill.
+
+Author: Samuel Ahuno
+Purpose:
+  Build self-contained HTML genomic-region reports with create_report
+  (igv-reports). Two run modes:
+
+    1. Single — direct CLI: --sites BED + --bam BAM(s) [+--vcf VCF]
+       → one HTML at --output.
+
+    2. Cohort — TSV samplesheet: one HTML per row + an index.html.
+       Samplesheet columns (tab-separated, with header):
+         sample   bam_tumor   bam_normal   vcf   sites_bed
+       Optional fifth column: extra_tracks (comma-separated paths).
+
+  Either way, the driver:
+    - Resolves CpG islands, gencode, and RepeatMasker paths from
+      databases_config.yaml for the chosen genome (skipping any not
+      configured for that genome, with a warning).
+    - Validates that the sites BED is headerless and well-formed.
+    - Calls create_report with --flanking 300 --standalone by default.
+    - Writes a logs/ entry capturing the resolved track list, the full
+      command, the flanking value, and per-region embedded data sizes.
+
+Usage:
+  python build_igvreports.py --sites SITES.hg38.bed \\
+      --bam tumor.bam normal.bam --vcf calls.vcf \\
+      --genome hg38 --fasta /path/to/hg38.fa \\
+      --no-default-tracks --output report.hg38.html
+
+  python build_igvreports.py --samplesheet sheet.tsv \\
+      --genome hg38 --fasta /path/to/hg38.fa \\
+      --no-default-tracks --output-dir results/cohort/
+
+Defaults can be shipped via a YAML at IGV_REPORTS_DB_CONFIG (see the
+references/databases_config_paths.md schema), in which case --fasta /
+--no-default-tracks are not needed.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+import os
+import shutil
+import subprocess
+import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime
+from pathlib import Path
+
+try:
+    import yaml  # PyYAML
+except ImportError:
+    print("ERROR: PyYAML not available. Install with: pip install pyyaml", file=sys.stderr)
+    sys.exit(2)
+
+# Optional: point IGV_REPORTS_DB_CONFIG at a YAML mapping genome IDs to
+# FASTA + CGI + gencode + RepeatMasker paths so --genome resolves tracks
+# automatically. Without it, pass --fasta and --no-default-tracks explicitly.
+_DB_CONFIG_ENV = os.environ.get("IGV_REPORTS_DB_CONFIG")
+DEFAULT_DBCONFIG = Path(_DB_CONFIG_ENV) if _DB_CONFIG_ENV else None
+DEFAULT_FLANKING = 300
+# Optional: point IGV_REPORTS_SIF at an apptainer SIF for offline / HPC runs.
+# Galaxy depot: https://depot.galaxyproject.org/singularity/igv-reports:1.16.0--pyh7cba7a3_0
+_SIF_ENV = os.environ.get("IGV_REPORTS_SIF")
+IGVREPORTS_SIF = Path(_SIF_ENV) if _SIF_ENV else None
+
+
+def apptainer_bind_args() -> list[str]:
+    """Build `--bind <path>` tokens for singularity, skipping paths that don't
+    exist. Source: `$IGV_REPORTS_BIND` (colon-separated). Empty / unset = no
+    binds (singularity tolerates this; you only need binds when your data
+    lives outside the container's default-visible filesystem)."""
+    raw = os.environ.get("IGV_REPORTS_BIND", "")
+    if not raw:
+        return []
+    tokens: list[str] = []
+    for p in raw.split(":"):
+        if p and Path(p).exists():
+            tokens.extend(["--bind", p])
+    return tokens
+
+GENOME_ALIASES = {
+    "hg38": "hg38",
+    "GRCh38": "hg38",
+    "mm10": "mm10",
+    "GRCm38": "mm10",
+    "mm39": "mm39",
+    "GRCm39": "mm39",
+    "t2t": "t2t_CHM13v2_plusY",
+    "chm13": "t2t_CHM13v2_plusY",
+    "T2T": "t2t_CHM13v2_plusY",
+    "T2T-CHM13": "t2t_CHM13v2_plusY",
+    "t2t_CHM13v2_plusY": "t2t_CHM13v2_plusY",
+    "GRCh37": "GRCh37",
+    "hg19": "GRCh37",
+}
+
+
+def setup_logger(log_path: Path) -> logging.Logger:
+    """Dual-handler logger: file + stderr, with timestamp prefix."""
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    fmt = logging.Formatter(
+        "[%(asctime)s] %(levelname)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
+    )
+    log = logging.getLogger("igv_reports")
+    log.setLevel(logging.INFO)
+    log.handlers.clear()
+    fh = logging.FileHandler(log_path)
+    fh.setFormatter(fmt)
+    log.addHandler(fh)
+    sh = logging.StreamHandler(sys.stderr)
+    sh.setFormatter(fmt)
+    log.addHandler(sh)
+    return log
+
+
+def resolve_genome(genome: str) -> str:
+    canon = GENOME_ALIASES.get(genome)
+    if not canon:
+        raise SystemExit(
+            f"ERROR: unknown genome '{genome}'. Supported: {sorted(set(GENOME_ALIASES.values()))}"
+        )
+    return canon
+
+
+def load_db_config(path: Path) -> dict:
+    """Load the databases YAML. Returns {} (with a warning to stderr) if the
+    file is missing — callers must handle empty cfg gracefully.
+
+    Users without a YAML can pass --fasta and --no-default-tracks
+    on the driver, OR set $IGV_REPORTS_DB_CONFIG to their own YAML."""
+    if not path.exists():
+        sys.stderr.write(
+            f"[build_igvreports] WARNING: db-config not found at {path}\n"
+            "  Set $IGV_REPORTS_DB_CONFIG to point at your YAML, or pass\n"
+            "  --fasta PATH and --no-default-tracks to bypass it entirely.\n"
+        )
+        return {}
+    with path.open() as fh:
+        cfg = yaml.safe_load(fh) or {}
+    return cfg
+
+
+def resolve_default_tracks(cfg: dict, genome: str, log: logging.Logger) -> list[str]:
+    """Return ordered list of default tracks present on disk for this genome.
+
+    Order matters — first entry renders at the bottom of the IGV.js view by
+    default? Actually igv-reports renders --tracks in the order passed,
+    top-to-bottom. We put annotation tracks LAST so they sit below the
+    BAM/VCF data the user is actually inspecting.
+
+    Empty cfg (e.g. generic, no databases YAML) → returns [] with a warning.
+    """
+    g = cfg.get("reference_genomes", {}).get("local", {}).get(genome, {})
+    if not g:
+        log.warning(
+            f"no entry for genome '{genome}' in db-config — skipping default tracks. "
+            "Pass --extra-track or --track-config for annotation tracks."
+        )
+        return []
+
+    tracks: list[str] = []
+
+    # CpG islands.
+    cgi = g.get("CpGIslands")
+    if cgi and Path(cgi).exists():
+        tracks.append(cgi)
+    else:
+        log.warning(f"CpG islands track missing for {genome} (key=CpGIslands, value={cgi})")
+
+    # Gencode. For hg38 prefer the bgzip+tabix .gff3.gz sibling if present.
+    gtf = g.get("gtf")
+    gencode_track: str | None = None
+    if genome == "hg38" and gtf:
+        sibling = (Path(gtf).parent / "gencode.v47.annotation.gff3.gz")
+        if sibling.exists() and (sibling.parent / (sibling.name + ".tbi")).exists():
+            gencode_track = str(sibling)
+            log.info(f"  hg38: using full gencode annotation: {sibling}")
+    if gencode_track is None and gtf and Path(gtf).exists():
+        gencode_track = gtf
+    if gencode_track:
+        tracks.append(gencode_track)
+    else:
+        log.warning(f"Gencode track missing for {genome}")
+
+    # RepeatMasker.
+    rmsk = g.get("repMaskerBed")
+    if rmsk and Path(rmsk).exists():
+        tracks.append(rmsk)
+    else:
+        log.warning(f"RepeatMasker track not configured for {genome}")
+
+    return tracks
+
+
+def fasta_for(cfg: dict, genome: str) -> str:
+    """Resolve a FASTA path from the db-config. Users without a
+    YAML can bypass this by passing --fasta PATH on the driver."""
+    try:
+        fasta = cfg["reference_genomes"]["local"][genome].get("fasta")
+    except (KeyError, TypeError):
+        raise SystemExit(
+            f"ERROR: db-config has no '{genome}' entry to resolve FASTA from.\n"
+            "       Pass --fasta PATH explicitly, or set $IGV_REPORTS_DB_CONFIG\n"
+            "       to a YAML that defines reference_genomes.local.<genome>.fasta."
+        )
+    if not fasta or not Path(fasta).exists():
+        raise SystemExit(f"ERROR: FASTA missing for {genome}: {fasta}")
+    if not Path(fasta + ".fai").exists():
+        raise SystemExit(
+            f"ERROR: FASTA index missing for {fasta} — run `samtools faidx {fasta}`"
+        )
+    return fasta
+
+
+def validate_bams(bams: list[Path]) -> None:
+    """Sanity-check BAMs have a coindex sibling (.bai or .csi).
+
+    `create_report` needs a random-access index to slice BAMs at each region;
+    a BAM with no sibling index produces an obscure pysam error several
+    layers in. Catch it up front with an actionable message."""
+    for bam in bams:
+        if not bam.exists():
+            raise SystemExit(f"ERROR: BAM not found: {bam}")
+        if not (bam.with_suffix(bam.suffix + ".bai").exists()
+                or bam.with_suffix(bam.suffix + ".csi").exists()
+                or bam.with_suffix(".bai").exists()
+                or bam.with_suffix(".csi").exists()):
+            raise SystemExit(
+                f"ERROR: BAM index missing for {bam} — create_report cannot slice it.\n"
+                f"       Fix: samtools index {bam}\n"
+                f"       (or `samtools index -c {bam}` for a .csi index on contigs >512 Mb)"
+            )
+
+
+def validate_sites_bed(bed: Path) -> None:
+    """Sanity-check the sites BED before invoking create_report.
+
+    create_report's BED parser is positional. It skips lines starting with
+    `#` or `track ` (so the lab's `#chrom\\tstart\\tend\\tname` header is
+    fine), but a non-comment header row like `chrom\\tstart\\tend` crashes
+    with `ValueError: invalid literal for int()`. We mirror create_report's
+    line-skipping logic and emit an informative error if any data row has
+    non-numeric start/end."""
+    if not bed.exists():
+        raise SystemExit(f"ERROR: sites BED not found: {bed}")
+    with bed.open() as fh:
+        for i, line in enumerate(fh, start=1):
+            line = line.rstrip("\n")
+            if not line or line.startswith("#") or line.startswith("track "):
+                continue
+            cols = line.split("\t")
+            if len(cols) < 3:
+                raise SystemExit(f"ERROR: {bed}:{i}: BED needs >=3 tab-separated columns; got {cols!r}")
+            try:
+                start = int(cols[1])
+                end = int(cols[2])
+            except ValueError:
+                raise SystemExit(
+                    f"ERROR: {bed}:{i}: non-numeric start/end — likely a header row.\n"
+                    "       igv-reports' BED parser is positional and chokes on non-comment\n"
+                    "       headers. Prefix the header with `#` (skipped by create_report\n"
+                    "       and matches the lab's BED-output convention) or strip it."
+                )
+            if start >= end:
+                raise SystemExit(f"ERROR: {bed}:{i}: start ({start}) >= end ({end})")
+
+
+def find_create_report() -> str:
+    """Resolve `create_report` on PATH (provided by `pip install igv-reports`
+    or any conda env that activated it)."""
+    cr = shutil.which("create_report")
+    if cr:
+        return cr
+    raise SystemExit(
+        "ERROR: create_report not on PATH.\n"
+        "  Install:              pip install -U 'igv-reports>=1.16.0'\n"
+        "  Offline / air-gapped: point IGV_REPORTS_SIF at an igv-reports SIF\n"
+        "                        and rerun with --apptainer (Galaxy depot:\n"
+        "                        https://depot.galaxyproject.org/singularity/igv-reports:1.16.0--pyh7cba7a3_0)"
+    )
+
+
+def apptainer_create_report_prefix(sif: Path) -> list[str]:
+    """Return the `singularity exec --cleanenv [--bind <path> ...] <sif>
+    create_report` prefix. Used when --apptainer is passed; avoids the NFS
+    conda cold-start tax (rules/apptainer_vs_conda.md). The default SIF is
+    a dedicated igv-reports container (igv-reports_1.16.0.sif, ~83 MB)
+    pulled from the Galaxy depot. Override via $IGV_REPORTS_SIF.
+
+    --cleanenv: scrubs host env vars so they don't leak into the SIF.
+    Specifically: host SSL_CERT_FILE / SSL_CERT_DIR on RHEL 8 point at paths
+    that don't exist inside Galaxy-depot SIFs, and create_report's standalone-
+    HTML build path performs an HTTPS GET (for the IGV.js ideogram or similar)
+    that aborts with `[SSL: CERTIFICATE_VERIFY_FAILED]`. See
+    rules/apptainer_env_leak.md.
+
+    Binds: see `apptainer_bind_args()` — conditional on path existence."""
+    if sif is None:
+        raise SystemExit(
+            "ERROR: --apptainer requested but $IGV_REPORTS_SIF is not set.\n"
+            "       Set IGV_REPORTS_SIF to a SIF path and rerun, e.g.:\n"
+            "         export IGV_REPORTS_SIF=/path/to/igv-reports_1.16.0.sif\n"
+            "       Pull the SIF first if needed:\n"
+            "         wget -O \"$IGV_REPORTS_SIF\" \\\n"
+            "           'https://depot.galaxyproject.org/singularity/igv-reports:1.16.0--pyh7cba7a3_0'"
+        )
+    if not sif.exists():
+        raise SystemExit(
+            f"ERROR: apptainer SIF not found: {sif}\n"
+            "       Pull with one of:\n"
+            f"         apptainer pull {sif} \\\n"
+            "           docker://igv-org/igv-reports:1.16.0\n"
+            f"         wget -O {sif} \\\n"
+            "           'https://depot.galaxyproject.org/singularity/igv-reports:1.16.0--pyh7e72e81_0'\n"
+            "       Or set $IGV_REPORTS_SIF to a SIF you already have."
+        )
+    return ["singularity", "exec", "--cleanenv", *apptainer_bind_args(), str(sif), "create_report"]
+
+
+def _read_sites_bed_rows(sites: Path) -> list[dict]:
+    """Read a sites BED into a list of dicts, one per data row. Lines
+    starting with `#` / `track ` / `browser ` are skipped (same rule
+    `validate_sites_bed` uses). The 4th column (`name`) becomes the UID
+    when present; otherwise an auto-generated `region_<idx>` is used."""
+    rows: list[dict] = []
+    with sites.open() as fh:
+        for line in fh:
+            line = line.rstrip("\n")
+            if not line or line.startswith("#") or line.startswith("track ") or line.startswith("browser "):
+                continue
+            cols = line.split("\t")
+            if len(cols) < 3:
+                continue
+            chrom, start_s, end_s = cols[0], cols[1], cols[2]
+            name = cols[3].strip() if len(cols) >= 4 and cols[3].strip() else ""
+            rows.append({
+                "chrom": chrom,
+                "start": int(start_s),
+                "end": int(end_s),
+                "name": name,
+            })
+    for idx, r in enumerate(rows, start=1):
+        if not r["name"]:
+            r["name"] = f"region_{idx:03d}"
+        r["bed_row_idx"] = idx
+    return rows
+
+
+def _write_igver_regions_bed(rows: list[dict], flanking: int, out: Path) -> None:
+    """Emit a BED with `--flanking` baked into start/end and UID in col 4.
+    Filename collisions in igver's `chr-start-end.<uid>.png` are avoided by
+    the auto-assigned UIDs in `_read_sites_bed_rows`."""
+    with out.open("w") as fh:
+        for r in rows:
+            start = max(0, r["start"] - flanking)
+            end = r["end"] + flanking
+            fh.write(f"{r['chrom']}\t{start}\t{end}\t{r['name']}\n")
+
+
+def _write_igver_input_list(tracks: list[str], out: Path) -> None:
+    """One path per line — igver's `-i FOO.txt` consumes this verbatim."""
+    with out.open("w") as fh:
+        for t in tracks:
+            fh.write(f"{t}\n")
+
+
+def _resolve_igver_cmd(override: str | None) -> list[str]:
+    """Return the argv prefix used to invoke igver. Resolution order:
+    1. Explicit override (split on whitespace — supports `apptainer exec ... igver`).
+    2. $IGVER_CMD env var (same shape as the override).
+    3. `igver` on PATH."""
+    if override:
+        return override.split()
+    env_cmd = os.environ.get("IGVER_CMD")
+    if env_cmd:
+        return env_cmd.split()
+    on_path = shutil.which("igver")
+    if on_path:
+        return [on_path]
+    raise SystemExit(
+        "ERROR: igver not found.\n"
+        "  Install: pip install igver\n"
+        "  Override: --igver-cmd 'apptainer exec /path/to/igver.sif igver'\n"
+        "  Or set $IGVER_CMD"
+    )
+
+
+def build_pngs_with_igver(
+    sites: Path,
+    tracks: list[str],
+    genome: str,
+    flanking: int,
+    out_dir: Path,
+    log: logging.Logger,
+    html_path: Path,
+    igver_cmd: str | None = None,
+    dpi: int = 300,
+    display_mode: str = "collapse",
+    panel_height: int | None = None,
+    fmt: str = "png",
+) -> Path:
+    """Invoke igver against the same sites + track list that drove
+    create_report, write a manifest mapping each BED row to its PNG path
+    and HTML row, return the manifest path.
+
+    Consistency contract (the five levers from the design):
+      1. Same sites BED + same `flanking` baked into the BED rows we pass.
+      2. Same resolved track list.
+      3. `display_mode` chosen to match HTML defaults (collapse).
+      4. UID-based filenames let a user pair PNG ↔ HTML by string match.
+      5. The manifest TSV is the audit trail; verify_cohort.py reads it.
+
+    Output layout (caller controls `out_dir`):
+      out_dir/
+        igver_regions.bed     - flanked BED with UIDs in col 4 (igver -r)
+        igver_input.txt       - track paths, one per line (igver -i)
+        png/                  - actual PNGs (igver -o); filenames are
+                                chr-start-end.<uid>.<png|svg|pdf>
+        manifest.tsv          - cross-artifact bridge to the HTML
+    """
+    out_dir.mkdir(parents=True, exist_ok=True)
+    png_dir = out_dir / "png"
+    png_dir.mkdir(parents=True, exist_ok=True)
+
+    rows = _read_sites_bed_rows(sites)
+    if not rows:
+        raise SystemExit(f"ERROR: no data rows found in sites BED: {sites}")
+
+    regions_bed = out_dir / "igver_regions.bed"
+    _write_igver_regions_bed(rows, flanking, regions_bed)
+    input_txt = out_dir / "igver_input.txt"
+    _write_igver_input_list(tracks, input_txt)
+
+    cmd = list(_resolve_igver_cmd(igver_cmd)) + [
+        "-i", str(input_txt),
+        "-r", str(regions_bed),
+        "-o", str(png_dir),
+        "-g", genome,
+        "-d", display_mode,
+        "--dpi", str(dpi),
+        "-f", fmt,
+        "--no-singularity",
+    ]
+    if panel_height is not None:
+        cmd.extend(["-p", str(panel_height)])
+
+    log.info(f"  igver: dpi={dpi} display={display_mode} fmt={fmt} regions={len(rows)}")
+    log.info(f"  igver cmd: {' '.join(cmd)}")
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        log.error(f"igver FAILED for {sites}")
+        log.error(f"stdout: {proc.stdout}")
+        log.error(f"stderr: {proc.stderr}")
+        raise SystemExit(proc.returncode)
+
+    # PNG filename convention is set by igver's _parse_bed_file:
+    # `<chrom>-<start>-<end>.<uid>.<ext>`. We reconstruct it here.
+    ext = "svg" if fmt in ("svg", "pdf") else fmt
+    manifest = out_dir / "manifest.tsv"
+    with manifest.open("w") as fh:
+        fh.write(
+            "#bed_row_idx\tuid\tchrom\tstart_orig\tend_orig\t"
+            "start_flanked\tend_flanked\tregion\tpng_path\thtml_path\thtml_table_row\n"
+        )
+        for r in rows:
+            start_f = max(0, r["start"] - flanking)
+            end_f = r["end"] + flanking
+            fname = f"{r['chrom']}-{start_f}-{end_f}.{r['name']}.{ext}"
+            png_rel = (png_dir / fname).resolve()
+            html_rel = html_path.resolve()
+            fh.write(
+                f"{r['bed_row_idx']}\t{r['name']}\t{r['chrom']}\t"
+                f"{r['start']}\t{r['end']}\t{start_f}\t{end_f}\t"
+                f"{r['chrom']}:{start_f}-{end_f}\t{png_rel}\t{html_rel}\t"
+                f"{r['bed_row_idx']}\n"
+            )
+
+    log.info(f"  png manifest: {manifest} ({len(rows)} rows)")
+
+    # Inline existence check — igver exits 0 even when it fails to render
+    # (silent exit-0 failure documented in the upstream skill's notes).
+    # We can't trust the exit code, so verify every expected PNG path is
+    # on disk and non-empty before returning. Without this a "successful"
+    # build silently ships an empty png/ dir.
+    missing: list[str] = []
+    empty: list[str] = []
+    for r in rows:
+        start_f = max(0, r["start"] - flanking)
+        end_f = r["end"] + flanking
+        fname = f"{r['chrom']}-{start_f}-{end_f}.{r['name']}.{ext}"
+        p = png_dir / fname
+        if not p.exists():
+            missing.append(fname)
+        elif p.stat().st_size == 0:
+            empty.append(fname)
+    if missing or empty:
+        log.error(
+            f"igver returned exit 0 but {len(missing)} expected PNG(s) are missing "
+            f"and {len(empty)} are zero-byte (out of {len(rows)} regions). "
+            "This is a documented silent-failure mode of igver."
+        )
+        if missing:
+            log.error(f"  missing: {missing[:5]}{'...' if len(missing) > 5 else ''}")
+        if empty:
+            log.error(f"  empty:   {empty[:5]}{'...' if len(empty) > 5 else ''}")
+        raise SystemExit(
+            f"ERROR: igver produced {len(rows) - len(missing) - len(empty)} of "
+            f"{len(rows)} PNGs (silent exit-0 failure). Check the igver install "
+            "path — `pip install igver` egg-link lacks the IGV Java binary; use "
+            "an apptainer SIF via --igver-cmd or $IGVER_CMD."
+        )
+
+    return manifest
+
+
+def build_one(
+    sites: Path,
+    bams: list[Path],
+    vcf: Path | None,
+    extra_tracks: list[Path],
+    fasta: str,
+    default_tracks: list[str],
+    output: Path,
+    title: str,
+    flanking: int,
+    log: logging.Logger,
+    track_config: Path | None = None,
+    report_type: str | None = None,
+    info_columns: list[str] | None = None,
+    use_apptainer: bool = False,
+    also_png: bool = False,
+    igver_cmd: str | None = None,
+    png_dpi: int = 300,
+    png_display_mode: str = "collapse",
+    png_out_dir: Path | None = None,
+) -> Path:
+    """Run create_report for one site set and return the HTML path.
+
+    Two track modes:
+      * Default — positional `--tracks <path> <path> ...`. Used when
+        `track_config` is None. BAM + VCF + extra + default annotations,
+        in render order top-to-bottom.
+      * track-config — `--track-config <json>`. Used when `track_config`
+        is provided. The JSON is the source of truth; default_tracks,
+        bams, vcf, extra_tracks are IGNORED (they go in the JSON instead).
+        This is the path required for ONT methylation viewers (named
+        tracks, per-track color/min/max/colorBy/displayMode).
+    """
+    validate_sites_bed(sites)
+    # Only validate BAMs on the positional --tracks path. The --track-config
+    # JSON has its own track-resolution semantics and may reference BAMs by
+    # arbitrary url:; create_report itself will fail loudly there if needed.
+    if track_config is None:
+        validate_bams(bams)
+    output.parent.mkdir(parents=True, exist_ok=True)
+
+    create_report_cmd = (
+        apptainer_create_report_prefix(IGVREPORTS_SIF) if use_apptainer
+        else [find_create_report()]
+    )
+
+    cmd: list[str] = list(create_report_cmd) + [
+        str(sites),
+        "--fasta", fasta,
+        "--flanking", str(flanking),
+    ]
+
+    if track_config is not None:
+        cmd.extend(["--track-config", str(track_config)])
+        log.info(f"  track-config: {track_config}  (defaults+bams+vcf bypassed)")
+        if bams or vcf or extra_tracks or default_tracks:
+            log.warning(
+                "--track-config supplied; ignoring --bam/--vcf/--extra-track and "
+                "auto-resolved default tracks. Put everything in the JSON instead."
+            )
+    else:
+        # Track ordering: BAMs (data) -> VCF (calls) -> extra -> defaults (annotation, last).
+        tracks: list[str] = [str(b) for b in bams]
+        if vcf:
+            tracks.append(str(vcf))
+        tracks.extend(str(t) for t in extra_tracks)
+        tracks.extend(default_tracks)
+        cmd.extend(["--tracks", *tracks])
+        log.info(f"  tracks (in render order):")
+        for i, t in enumerate(tracks, start=1):
+            log.info(f"    {i:>2}. {t}")
+
+    if report_type:
+        cmd.extend(["--type", report_type])
+    if info_columns:
+        cmd.extend(["--info-columns", *info_columns])
+
+    cmd.extend([
+        "--standalone",
+        "--title", title,
+        "--output", str(output),
+    ])
+
+    log.info(f"  cmd: {' '.join(cmd)}")
+    log.info(f"  flanking_bp: {flanking}")
+
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        log.error(f"create_report FAILED for {sites}")
+        log.error(f"stdout: {proc.stdout}")
+        log.error(f"stderr: {proc.stderr}")
+        raise SystemExit(proc.returncode)
+
+    if output.exists():
+        log.info(f"  HTML: {output} ({output.stat().st_size / 1024 / 1024:.2f} MB)")
+
+    # PNG sidecar — same regions, same tracks, written next to the HTML.
+    # On the --track-config path we extract every `url` from the JSON
+    # (file resources only — http(s) URLs are skipped since igver can't
+    # consume them); on the positional path we reuse the same ordered
+    # list we just passed to create_report.
+    if also_png:
+        if track_config is not None:
+            try:
+                with track_config.open() as fh:
+                    cfg_tracks = json.load(fh)
+            except Exception as e:
+                log.warning(f"--also-png: unable to parse --track-config JSON: {e} — skipping PNG step")
+                return output
+            png_tracks: list[str] = []
+            for t in cfg_tracks if isinstance(cfg_tracks, list) else []:
+                url = t.get("url") if isinstance(t, dict) else None
+                if url and not str(url).startswith(("http://", "https://")):
+                    png_tracks.append(str(url))
+            if not png_tracks:
+                log.warning("--also-png: track-config has no local-path tracks — skipping PNG step")
+                return output
+        else:
+            png_tracks = [str(b) for b in bams]
+            if vcf:
+                png_tracks.append(str(vcf))
+            png_tracks.extend(str(t) for t in extra_tracks)
+            png_tracks.extend(default_tracks)
+
+        out_dir = png_out_dir if png_out_dir is not None else (
+            output.parent / f"png_{output.stem}"
+        )
+        parts = output.stem.split(".")
+        genome_tag = parts[-1] if len(parts) >= 2 else "hg38"
+        build_pngs_with_igver(
+            sites=sites,
+            tracks=png_tracks,
+            genome=genome_tag,
+            flanking=flanking,
+            out_dir=out_dir,
+            log=log,
+            html_path=output,
+            igver_cmd=igver_cmd,
+            dpi=png_dpi,
+            display_mode=png_display_mode,
+        )
+
+    return output
+
+
+def parse_samplesheet(path: Path) -> list[dict]:
+    rows: list[dict] = []
+    with path.open() as fh:
+        header = fh.readline().lstrip("#").rstrip("\n").split("\t")
+        for ln in fh:
+            cols = ln.rstrip("\n").split("\t")
+            if not cols or not cols[0].strip():
+                continue
+            row = dict(zip(header, cols))
+            rows.append(row)
+    required = {"sample", "sites_bed"}
+    if rows and not required.issubset(rows[0].keys()):
+        raise SystemExit(
+            f"ERROR: samplesheet must have columns: sample, sites_bed (got {list(rows[0].keys())}).\n"
+            "       Optional columns: bam_tumor, bam_normal, vcf, extra_tracks (comma-separated)."
+        )
+    return rows
+
+
+def derive_log_path(out_dir: Path, override: Path | None = None) -> Path:
+    """Choose a log dir matching the lab's `results/<run>/{reports,logs}/`
+    sibling layout when possible. Fall back to `out_dir/logs/` (in-dir) when
+    the sibling can't be created — `out_dir.parent` is root, read-only, or
+    otherwise unwritable. Honor an explicit `override` unconditionally."""
+    if override is not None:
+        log_dir = override
+    else:
+        out_dir = out_dir.resolve()
+        sibling = out_dir.parent / "logs"
+        try:
+            sibling.mkdir(parents=True, exist_ok=True)
+            log_dir = sibling
+        except (PermissionError, OSError):
+            log_dir = out_dir / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    return log_dir / f"run_{datetime.now():%Y%m%d_%H%M%S}.log"
+
+
+def write_index(report_paths: dict[str, Path], out: Path, title: str) -> Path:
+    items = "\n".join(
+        f'  <li><a href="{p.name}">{s}</a></li>'
+        for s, p in sorted(report_paths.items())
+    )
+    out.write_text(
+        "<!doctype html>\n<html><head><title>"
+        + title
+        + "</title><style>body{font-family:Arial,sans-serif;margin:2em}li{margin:0.4em 0}</style></head><body>\n"
+        f"<h1>{title}</h1>\n<ul>\n{items}\n</ul>\n</body></html>\n"
+    )
+    return out
+
+
+def run_anchors_generate(
+    samplesheet: Path,
+    sites_files: list[Path],
+    out: Path,
+    fail_on_fail: bool,
+    log: logging.Logger,
+) -> None:
+    """Invoke `verify_anchors.py generate` once per distinct sites BED in the
+    cohort, merging into a single anchors TSV at `out`. Most cohorts share
+    one sites BED so this collapses to a single call; multi-sites cohorts
+    get one anchor block per sites file."""
+    script = Path(__file__).resolve().parent / "verify_anchors.py"
+    if not script.exists():
+        log.warning(f"anchors generate: script not found at {script} — skipping")
+        return
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text("")  # truncate; per-sites blocks appended below
+    for i, sites in enumerate(sites_files):
+        block = out.with_suffix(f".part{i}.tsv")
+        cmd = [
+            sys.executable, str(script), "generate",
+            "--samplesheet", str(samplesheet),
+            "--sites", str(sites),
+            "--out", str(block),
+        ]
+        log.info(f"anchors generate: {' '.join(cmd)}")
+        proc = subprocess.run(cmd, capture_output=True, text=True)
+        for line in (proc.stdout or "").splitlines():
+            log.info(f"  anchors > {line}")
+        if proc.stderr:
+            for line in proc.stderr.splitlines():
+                log.info(f"  anchors (stderr) > {line}")
+        if proc.returncode != 0:
+            if fail_on_fail:
+                raise SystemExit(proc.returncode)
+            log.warning(f"anchors generate exited {proc.returncode}; continuing")
+            continue
+        # Merge: keep header from first block, body rows from all.
+        if i == 0:
+            out.write_text(block.read_text())
+        else:
+            with out.open("a") as fh:
+                for ln in block.read_text().splitlines():
+                    if not ln or ln.startswith("#"):
+                        continue
+                    fh.write(ln + "\n")
+        block.unlink()
+    log.info(f"anchors generate: wrote {out}")
+
+
+def run_anchors_verify(
+    samplesheet: Path,
+    reports_dir: Path,
+    genome: str,
+    anchors: Path,
+    fail_on_fail: bool,
+    log: logging.Logger,
+) -> None:
+    """Invoke `verify_anchors.py verify-cohort` after a cohort build."""
+    script = Path(__file__).resolve().parent / "verify_anchors.py"
+    if not script.exists():
+        log.warning(f"anchors verify: script not found at {script} — skipping")
+        return
+    if not anchors.exists():
+        log.warning(f"anchors verify: anchors TSV missing: {anchors} — skipping")
+        return
+    out = reports_dir / "cohort_verify_anchors.tsv"
+    cmd = [
+        sys.executable, str(script), "verify-cohort",
+        "--samplesheet", str(samplesheet),
+        "--reports-dir", str(reports_dir),
+        "--genome", genome,
+        "--anchors", str(anchors),
+        "--out", str(out),
+    ]
+    if fail_on_fail:
+        cmd.append("--fail-on-fail")
+    log.info(f"anchors verify: {' '.join(cmd)}")
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    for line in (proc.stdout or "").splitlines():
+        log.info(f"  anchors > {line}")
+    if proc.stderr:
+        for line in proc.stderr.splitlines():
+            log.info(f"  anchors (stderr) > {line}")
+    log.info(f"anchors verify: TSV={out} exit={proc.returncode}")
+    if proc.returncode != 0:
+        if fail_on_fail:
+            raise SystemExit(proc.returncode)
+        log.warning(f"anchors verify exited {proc.returncode}; --fail-on-fail not set, continuing")
+
+
+def run_cohort_verify(
+    samplesheet: Path,
+    reports_dir: Path,
+    genome: str,
+    db_config: Path,
+    fail_on_fail: bool,
+    log: logging.Logger,
+) -> None:
+    """Invoke verify_cohort.py at the end of a cohort build. Writes the TSV +
+    summary next to the cohort's index.html. Fails the build if
+    `fail_on_fail` is set and the verifier exits nonzero."""
+    verify_script = Path(__file__).resolve().parent / "verify_cohort.py"
+    if not verify_script.exists():
+        log.warning(f"verify_cohort: script not found at {verify_script} — skipping")
+        return
+    tsv_out = reports_dir / "cohort_verify.tsv"
+    md_out = reports_dir / "cohort_verify.summary.md"
+    cmd = [
+        sys.executable, str(verify_script),
+        "--samplesheet", str(samplesheet),
+        "--reports-dir", str(reports_dir),
+        "--genome", genome,
+        "--db-config", str(db_config),
+        "--out", str(tsv_out),
+        "--summary", str(md_out),
+    ]
+    if fail_on_fail:
+        cmd.append("--fail-on-fail")
+    log.info(f"verify_cohort: running {' '.join(cmd)}")
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    # Mirror the verifier's stdout/stderr into the run log so audit-trail stays single-source.
+    for line in (proc.stdout or "").splitlines():
+        log.info(f"  verify_cohort > {line}")
+    if proc.stderr:
+        for line in proc.stderr.splitlines():
+            log.warning(f"  verify_cohort (stderr) > {line}")
+    log.info(f"verify_cohort: TSV={tsv_out} summary={md_out} exit={proc.returncode}")
+    if proc.returncode != 0:
+        if fail_on_fail:
+            raise SystemExit(proc.returncode)
+        log.warning(f"verify_cohort: exited {proc.returncode} but --fail-on-fail not set; continuing")
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--genome", required=True, help="hg38 | mm10 | mm39 | t2t | GRCh37 (alias-tolerant)")
+    ap.add_argument("--db-config", default=(str(DEFAULT_DBCONFIG) if DEFAULT_DBCONFIG else None), help=(
+        "YAML resolving genome -> {fasta, CpGIslands, gtf, repMaskerBed}. "
+        "Schema: see references/databases_config_paths.md. "
+        "Override via $IGV_REPORTS_DB_CONFIG, or skip entirely with --fasta + --no-default-tracks."
+    ))
+    ap.add_argument("--fasta", help=(
+        "Explicit FASTA path; bypasses --db-config for FASTA lookup. "
+        "Required when --db-config is not set or doesn't list the chosen genome. "
+        "Requires a sibling .fai (run `samtools faidx`)."
+    ))
+    ap.add_argument("--no-default-tracks", action="store_true", help=(
+        "Skip the CpG-islands/gencode/RepeatMasker auto-tracks from --db-config. "
+        "Combine with --fasta and --extra-track to operate without a databases YAML."
+    ))
+    ap.add_argument("--flanking", type=int, default=DEFAULT_FLANKING)
+    ap.add_argument("--extra-track", action="append", default=[], help="(repeat) extra track path; rendered above default annotations")
+
+    mode = ap.add_mutually_exclusive_group(required=True)
+    mode.add_argument("--samplesheet", help="TSV: sample, [bam_tumor, bam_normal, vcf,] sites_bed[, extra_tracks]")
+    mode.add_argument("--sites", help="path to sites BED for single-sample mode")
+
+    ap.add_argument("--bam", nargs="*", default=[], help="BAM/CRAM tracks (single-sample mode)")
+    ap.add_argument("--vcf", help="VCF track (single-sample mode)")
+
+    ap.add_argument("--output", help="output HTML path (single-sample mode)")
+    ap.add_argument("--output-dir", help="output dir for cohort mode (default: ./reports)")
+    ap.add_argument("--title", default=None, help="report title; defaults to sample name + genome")
+
+    ap.add_argument(
+        "--track-config",
+        help="path to a tracks.json (igv.js track config). When set, the JSON is "
+             "passed straight to create_report --track-config and all default "
+             "tracks / --bam / --vcf / --extra-track are bypassed. Use this for "
+             "ONT methylation viewers — see examples/methylation_ont/.",
+    )
+    ap.add_argument(
+        "--type",
+        dest="report_type",
+        choices=["mutation", "fusion", "junction"],
+        default=None,
+        help="create_report --type. Sets viewer behaviour at each site.",
+    )
+    ap.add_argument(
+        "--info-columns",
+        nargs="*",
+        default=[],
+        help="VCF INFO or BED columns to surface in the variant table. "
+             "For BED sites, 'name' is the most useful.",
+    )
+    ap.add_argument(
+        "--also-png",
+        action="store_true",
+        help="After create_report finishes, invoke igver against the same "
+             "sites BED + track list to produce per-region PNGs alongside "
+             "the HTML. PNGs land in <html_parent>/png_<sample>/png/ with "
+             "filename `<chr-start-end>.<uid>.png` (uid = BED `name` col, "
+             "auto-assigned `region_<idx>` when missing). A manifest TSV "
+             "bridges PNG ↔ HTML rows. Requires `igver` on PATH or "
+             "$IGVER_CMD / --igver-cmd override.",
+    )
+    ap.add_argument(
+        "--igver-cmd",
+        default=None,
+        help="Override the igver invocation. Resolution order: this flag, "
+             "$IGVER_CMD, `igver` on PATH. Pass the full command including "
+             "any apptainer wrapper, e.g. 'apptainer exec /path/to/igver.sif igver'.",
+    )
+    ap.add_argument(
+        "--png-dpi",
+        type=int,
+        default=300,
+        help="DPI for igver PNG output (default 300; bump to 600 for slide-quality).",
+    )
+    ap.add_argument(
+        "--png-display-mode",
+        choices=["expand", "collapse", "squish"],
+        default="collapse",
+        help="igver `-d` flag. Default 'collapse' to match the HTML's BAM "
+             "BAM_DEFAULTS displayMode. Use 'expand' for per-read SV inspection.",
+    )
+    ap.add_argument(
+        "--apptainer",
+        action=argparse.BooleanOptionalAction,
+        default=None,
+        help="Run create_report from inside the apptainer SIF pointed to "
+             "by $IGV_REPORTS_SIF (dedicated igv-reports 1.16.0 SIF, ~83 MB; "
+             "pull from the Galaxy depot). Skips the NFS conda cold-start "
+             "tax on HPC. Default: auto-detect — on if SLURM_JOB_ID is set "
+             "AND $IGV_REPORTS_SIF points at an existing SIF, off otherwise. "
+             "Override either way with --apptainer / --no-apptainer.",
+    )
+    ap.add_argument(
+        "--log-dir",
+        help="explicit log directory. Default: sibling 'logs/' of the output "
+             "dir (matches results/<run>/{reports,logs}/ lab layout); falls "
+             "back to <out_dir>/logs/ when the sibling is unwritable.",
+    )
+    ap.add_argument(
+        "--jobs",
+        "-j",
+        type=int,
+        default=1,
+        help="Number of parallel per-sample builds in cohort (--samplesheet) "
+             "mode. Each worker invokes create_report in a subprocess, so the "
+             "win comes from running multiple slicers concurrently against "
+             "different BAMs. I/O-bound on the BAM-slice step, so threads "
+             "scale well to ~min(N_samples, N_cores). Default 1 (sequential, "
+             "preserves prior behavior). Has no effect in single-sample mode.",
+    )
+    ap.add_argument(
+        "--verify",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Run scripts/verify_cohort.py at the end of cohort builds "
+             "(--samplesheet mode). Single-sample (--sites) mode is unaffected "
+             "and emits no cohort verify TSV. Default: on. Use --no-verify to "
+             "skip. The verifier inherits --fail-on-fail.",
+    )
+    ap.add_argument(
+        "--fail-on-fail",
+        action="store_true",
+        help="Propagated to verify_cohort.py and verify_anchors.py: exit "
+             "nonzero if any verifier check is FAIL. Only meaningful with "
+             "--verify / --anchors-mode and --samplesheet.",
+    )
+    ap.add_argument(
+        "--anchors-mode",
+        choices=["off", "generate", "verify"],
+        default="off",
+        help="Content (read-count) verification — opt-in because it shells "
+             "out to samtools per (sample, region) and is slow. 'generate' "
+             "runs samtools view -c against source BAMs at build time and "
+             "freezes the counts to --anchors (becomes regression fixture). "
+             "'verify' decodes each BAM slice from the built HTMLs and "
+             "compares to --anchors. 'off' (default) skips. See "
+             "examples/anchor_verify_demo/.",
+    )
+    ap.add_argument(
+        "--anchors",
+        help="Path to anchors TSV. With --anchors-mode generate: output. "
+             "With --anchors-mode verify: input. Ignored when mode=off.",
+    )
+
+    args = ap.parse_args()
+
+    genome = resolve_genome(args.genome)
+    # Only load db-config when something actually needs it (fasta lookup or
+    # default tracks). Saves the warning noise + lets a fully-explicit
+    # --fasta + --no-default-tracks invocation run with no YAML at all.
+    need_db_config = (not args.fasta) or (not args.no_default_tracks)
+    if need_db_config and not args.db_config:
+        raise SystemExit(
+            "ERROR: need a databases YAML to resolve FASTA / default tracks.\n"
+            "  Either:\n"
+            "    - pass --fasta /path/to/genome.fa --no-default-tracks "
+            "[--extra-track ...], or\n"
+            "    - set $IGV_REPORTS_DB_CONFIG / --db-config to a YAML matching\n"
+            "      the schema in references/databases_config_paths.md."
+        )
+    cfg = load_db_config(Path(args.db_config)) if need_db_config else {}
+    if args.fasta:
+        fasta = args.fasta
+        if not Path(fasta).exists():
+            raise SystemExit(f"ERROR: --fasta path not found: {fasta}")
+        if not Path(fasta + ".fai").exists():
+            raise SystemExit(
+                f"ERROR: FASTA index missing for {fasta} — run `samtools faidx {fasta}`"
+            )
+    else:
+        fasta = fasta_for(cfg, genome)
+
+    # Logger placed alongside the output. See derive_log_path docstring.
+    if args.samplesheet:
+        out_dir = Path(args.output_dir or "reports")
+    else:
+        if not args.output:
+            raise SystemExit("ERROR: --output required in single-sample mode")
+        out_dir = Path(args.output).parent
+    out_dir.mkdir(parents=True, exist_ok=True)
+    log_path = derive_log_path(out_dir, Path(args.log_dir) if args.log_dir else None)
+    log = setup_logger(log_path)
+
+    log.info(f"=== igv-reports skill, genome={genome} ===")
+    log.info(f"db_config: {args.db_config}")
+    log.info(f"fasta:     {fasta}")
+    log.info(f"flanking:  {args.flanking} bp (default {DEFAULT_FLANKING})")
+
+    # Resolve --apptainer auto-detect. Tri-state:
+    #   user said --apptainer        -> True
+    #   user said --no-apptainer     -> False
+    #   user said nothing (None)     -> True iff SLURM_JOB_ID is in env
+    # Rationale: on a fresh SLURM compute node, the NFS conda cold-start tax
+    # (~1-2 M page faults, ~2.5 us each) is large; the dedicated SIF skips it.
+    # On the login node, conda is usually warm and the simpler path wins.
+    # See rules/apptainer_vs_conda.md.
+    slurm_job = os.environ.get("SLURM_JOB_ID")
+    if args.apptainer is None:
+        # Auto-enable SIF mode only when both (a) we're on a SLURM compute
+        # node where the conda cold-start tax bites, AND (b) $IGV_REPORTS_SIF
+        # points at an existing SIF. The existence check protects users from
+        # a confusing SIF-not-found error when they didn't ask for apptainer.
+        sif_ok = IGVREPORTS_SIF is not None and IGVREPORTS_SIF.exists()
+        args.apptainer = bool(slurm_job) and sif_ok
+        if args.apptainer:
+            decision = f"auto-enabled (SLURM_JOB_ID={slurm_job}, SIF={IGVREPORTS_SIF})"
+        elif slurm_job and IGVREPORTS_SIF is None:
+            decision = (
+                f"auto-disabled (SLURM_JOB_ID={slurm_job} set, but $IGV_REPORTS_SIF unset; "
+                f"falling back to PATH create_report)"
+            )
+        elif slurm_job:
+            decision = (
+                f"auto-disabled (SLURM_JOB_ID={slurm_job} set, but SIF not found at "
+                f"{IGVREPORTS_SIF}; falling back to PATH create_report)"
+            )
+        else:
+            decision = "auto-disabled (no SLURM_JOB_ID; PATH create_report path)"
+        log.info(f"apptainer: {decision}")
+    else:
+        log.info(f"apptainer: {args.apptainer} (explicit)")
+
+    if args.no_default_tracks:
+        default_tracks: list[str] = []
+        log.info("default tracks: skipped (--no-default-tracks)")
+    else:
+        default_tracks = resolve_default_tracks(cfg, genome, log)
+        log.info(f"default tracks resolved: {len(default_tracks)}")
+        for t in default_tracks:
+            log.info(f"  - {t}")
+
+    extra_tracks = [Path(p) for p in args.extra_track]
+
+    track_config = Path(args.track_config) if args.track_config else None
+    if track_config is not None and not track_config.exists():
+        raise SystemExit(f"ERROR: --track-config file not found: {track_config}")
+
+    if args.sites:
+        title = args.title or f"{Path(args.sites).stem} ({genome})"
+        build_one(
+            sites=Path(args.sites),
+            bams=[Path(b) for b in args.bam],
+            vcf=Path(args.vcf) if args.vcf else None,
+            extra_tracks=extra_tracks,
+            fasta=fasta,
+            default_tracks=default_tracks,
+            output=Path(args.output),
+            title=title,
+            flanking=args.flanking,
+            log=log,
+            track_config=track_config,
+            report_type=args.report_type,
+            info_columns=args.info_columns,
+            use_apptainer=args.apptainer,
+            also_png=args.also_png,
+            igver_cmd=args.igver_cmd,
+            png_dpi=args.png_dpi,
+            png_display_mode=args.png_display_mode,
+        )
+    else:
+        rows = parse_samplesheet(Path(args.samplesheet))
+        n_jobs = max(1, args.jobs)
+        n_workers = min(n_jobs, len(rows)) if rows else 1
+        mode = "sequential" if n_workers == 1 else f"parallel ({n_workers} workers)"
+        log.info(f"cohort: {len(rows)} samples from {args.samplesheet} — {mode}")
+        report_paths: dict[str, Path] = {}
+        failures: list[tuple[str, str]] = []  # (sample, error_message)
+
+        def _build_row(row: dict) -> tuple[str, Path]:
+            """Build one sample. Runs in a worker thread when --jobs > 1.
+
+            Returns (sample, out_html). Raises on build failure — caught by
+            the executor and surfaced via future.exception() in the caller."""
+            sample = row["sample"]
+            sites = Path(row["sites_bed"])
+            bams = [Path(row[k]) for k in ("bam_tumor", "bam_normal") if row.get(k)]
+            vcf = Path(row["vcf"]) if row.get("vcf") else None
+            sample_extras = list(extra_tracks)
+            if row.get("extra_tracks"):
+                sample_extras += [Path(p.strip()) for p in row["extra_tracks"].split(",") if p.strip()]
+            out_html = out_dir / f"{sample}.{genome}.html"
+            title = args.title or f"{sample} ({genome})"
+            log.info(f"=== {sample} ===")
+            build_one(
+                sites=sites, bams=bams, vcf=vcf, extra_tracks=sample_extras,
+                fasta=fasta, default_tracks=default_tracks,
+                output=out_html, title=title, flanking=args.flanking, log=log,
+                track_config=track_config,
+                report_type=args.report_type,
+                info_columns=args.info_columns,
+                use_apptainer=args.apptainer,
+                also_png=args.also_png,
+                igver_cmd=args.igver_cmd,
+                png_dpi=args.png_dpi,
+                png_display_mode=args.png_display_mode,
+            )
+            return sample, out_html
+
+        # ThreadPoolExecutor is the right primitive here: build_one() spends
+        # nearly all its wall time inside subprocess.run(create_report), which
+        # releases the GIL — so threads scale linearly to the number of
+        # concurrent create_report processes the host can support. Don't use
+        # ProcessPoolExecutor: build_one() captures a non-picklable logger.
+        if n_workers == 1:
+            for row in rows:
+                try:
+                    sample, out_html = _build_row(row)
+                    report_paths[sample] = out_html
+                except SystemExit as exc:
+                    failures.append((row.get("sample", "?"), f"exit={exc.code}"))
+                except Exception as exc:
+                    failures.append((row.get("sample", "?"), f"{type(exc).__name__}: {exc}"))
+        else:
+            with ThreadPoolExecutor(max_workers=n_workers) as pool:
+                future_to_sample = {
+                    pool.submit(_build_row, row): row.get("sample", "?") for row in rows
+                }
+                # as_completed lets failures surface immediately while other
+                # samples continue building. We collect all errors and decide
+                # whether to fail the whole run at the end.
+                for fut in as_completed(future_to_sample):
+                    sample_name = future_to_sample[fut]
+                    try:
+                        sample, out_html = fut.result()
+                        report_paths[sample] = out_html
+                    except SystemExit as exc:
+                        failures.append((sample_name, f"exit={exc.code}"))
+                    except Exception as exc:
+                        failures.append((sample_name, f"{type(exc).__name__}: {exc}"))
+
+        if failures:
+            log.error(f"cohort: {len(failures)} of {len(rows)} samples FAILED:")
+            for s, err in failures:
+                log.error(f"  - {s}: {err}")
+            # Always raise on build failures — these aren't verifier soft-fails,
+            # they're missing HTMLs. --fail-on-fail is for verifier behavior.
+            raise SystemExit(1)
+
+        idx = write_index(report_paths, out_dir / "index.html", f"igv-reports cohort ({genome})")
+        log.info(f"Wrote cohort index: {idx}")
+
+        if args.verify:
+            run_cohort_verify(
+                samplesheet=Path(args.samplesheet),
+                reports_dir=out_dir,
+                genome=genome,
+                db_config=Path(args.db_config),
+                fail_on_fail=args.fail_on_fail,
+                log=log,
+            )
+        else:
+            log.info("verify_cohort: skipped (--no-verify)")
+
+        if args.anchors_mode != "off":
+            if not args.anchors:
+                raise SystemExit("ERROR: --anchors PATH required when --anchors-mode != off")
+            anchors_path = Path(args.anchors)
+            if args.anchors_mode == "generate":
+                sites_files = sorted({Path(r["sites_bed"]) for r in rows if r.get("sites_bed")})
+                run_anchors_generate(
+                    samplesheet=Path(args.samplesheet),
+                    sites_files=sites_files,
+                    out=anchors_path,
+                    fail_on_fail=args.fail_on_fail,
+                    log=log,
+                )
+            else:  # verify
+                run_anchors_verify(
+                    samplesheet=Path(args.samplesheet),
+                    reports_dir=out_dir,
+                    genome=genome,
+                    anchors=anchors_path,
+                    fail_on_fail=args.fail_on_fail,
+                    log=log,
+                )
+
+    log.info(f"=== DONE: build_igvreports.py completed successfully ===")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/igv-reports/scripts/generate_tracks_json.py b/igv-reports/scripts/generate_tracks_json.py
new file mode 100755
index 0000000..baedd42
--- /dev/null
+++ b/igv-reports/scripts/generate_tracks_json.py
@@ -0,0 +1,346 @@
+#!/usr/bin/env python3
+"""generate_tracks_json.py — build an igv-reports tracks.json from a YAML spec.
+
+Author: Samuel Ahuno
+Purpose:
+  ONT methylation viewers need named, colored, y-axis-locked tracks that
+  the positional `create_report --tracks` API cannot express. The path is
+  `--track-config <json>`, but hand-writing that JSON for 4-8 samples
+  with 5mC + 5hmC bedGraph pairs each is tedious and error-prone.
+
+  This helper consumes a small YAML spec (see
+  examples/methylation_ont/tracks_spec.example.yaml) and emits the JSON
+  with the right defaults baked in:
+
+    * BAM tracks  -> colorBy=basemod2, showSoftClips=false, displayMode=COLLAPSED
+    * bedGraph    -> type=wig, min=0, max=100 (methylation percent)
+    * Annotation  -> displayMode honored, color honored
+    * Group color -> reads from `group_colors:` map keyed by sample.group
+
+Usage:
+  python generate_tracks_json.py \
+      --spec examples/methylation_ont/tracks_spec.example.yaml \
+      --run-dir examples/methylation_ont \
+      --out examples/methylation_ont/tracks.json
+
+  --run-dir is prepended to any relative `url:` path in the spec, so the
+  emitted JSON has absolute paths that create_report can resolve from any
+  working directory.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print("ERROR: PyYAML not available. Install with: pip install pyyaml", file=sys.stderr)
+    sys.exit(2)
+
+
+BAM_DEFAULTS = {
+    "format": "bam",
+    "type": "alignment",
+    "colorBy": "basemod2",
+    "showSoftClips": False,
+    "displayMode": "COLLAPSED",
+}
+
+BEDGRAPH_DEFAULTS = {
+    "format": "bedgraph",
+    "type": "wig",
+    "min": 0,
+    "max": 100,
+}
+
+
+# YAML shortcut keys (annotation: - default: <KEY>) map to the
+# databases_config.yaml field for each genome plus display metadata.
+# Colors are Okabe-Ito where chosen — colorblind-safe. format/displayMode
+# match what build_igvreports.py emits on the non-track-config path.
+ANNOTATION_DEFAULTS = {
+    "cgi": {
+        "display_name": "CpG islands",
+        "yaml_key": "CpGIslands",
+        "format": "bed",
+        "displayMode": "EXPANDED",
+        "color": "rgb(0,158,115)",       # Okabe-Ito green
+    },
+    "gencode": {
+        "display_name": "Gencode",
+        "yaml_key": "gtf",
+        "format": "gff",                  # works for .gtf.gz and .gff3.gz
+        "displayMode": "EXPANDED",
+        "color": None,                    # IGV.js renders its own gene-track palette
+    },
+    "repmasker": {
+        "display_name": "RepeatMasker",
+        "yaml_key": "repMaskerBed",
+        "format": "bed",
+        "displayMode": "COLLAPSED",
+        "color": None,
+    },
+    "epdnew_coding": {
+        "display_name": "EPDnew (coding)",
+        "yaml_key": "EPDnewCoding",
+        "format": "bed",
+        "displayMode": "EXPANDED",
+        "color": "rgb(213,94,0)",         # Okabe-Ito vermillion
+    },
+    "epdnew_noncoding": {
+        "display_name": "EPDnew (non-coding)",
+        "yaml_key": "EPDnewNonCoding",
+        "format": "bed",
+        "displayMode": "EXPANDED",
+        "color": "rgb(86,180,233)",       # Okabe-Ito sky blue
+    },
+}
+
+
+def load_db_config(path: Path) -> dict:
+    """Load databases_config.yaml; return {} on miss. Same semantics as the
+    twin function in build_igvreports.py so the two stay aligned."""
+    if not path.exists():
+        sys.stderr.write(
+            f"[generate_tracks_json] WARNING: db-config not found at {path}\n"
+            "  Annotation entries using `default:` shortcuts will fail to resolve.\n"
+            "  Use explicit `url:` paths, or set $IGV_REPORTS_DB_CONFIG.\n"
+        )
+        return {}
+    with path.open() as fh:
+        return yaml.safe_load(fh) or {}
+
+
+def resolve_annotation_default(default_key: str, genome: str, cfg: dict) -> dict:
+    """Look up a built-in annotation by short key (`cgi`, `gencode`, ...) for
+    the given genome in the databases YAML. Returns a partial track dict with
+    `display_name` / `url` / `indexURL` / `format` / `displayMode` / `color`
+    populated; caller merges with name-overrides from the YAML.
+
+    Raises SystemExit if the key is unknown, the genome is absent, or the
+    resolved path doesn't exist on disk."""
+    if default_key not in ANNOTATION_DEFAULTS:
+        valid = ", ".join(sorted(ANNOTATION_DEFAULTS))
+        raise SystemExit(
+            f"ERROR: unknown annotation default '{default_key}'. Valid: {valid}"
+        )
+    meta = ANNOTATION_DEFAULTS[default_key]
+    g = cfg.get("reference_genomes", {}).get("local", {}).get(genome, {})
+    if not g:
+        raise SystemExit(
+            f"ERROR: db-config has no entry for genome '{genome}' "
+            f"(needed to resolve `default: {default_key}`)."
+        )
+    yaml_key = meta["yaml_key"]
+    raw = g.get(yaml_key)
+    if not raw:
+        raise SystemExit(
+            f"ERROR: db-config has no '{yaml_key}' for genome '{genome}' "
+            f"(needed to resolve `default: {default_key}`)."
+        )
+    # For hg38 gencode, prefer the bgzip+tabix .gff3.gz sibling if present
+    # (mirrors build_igvreports.py:resolve_default_tracks gencode handling).
+    url = raw
+    if default_key == "gencode" and genome == "hg38":
+        sibling = Path(raw).parent / "gencode.v47.annotation.gff3.gz"
+        if sibling.exists() and (sibling.parent / (sibling.name + ".tbi")).exists():
+            url = str(sibling)
+    if not Path(url).exists():
+        raise SystemExit(
+            f"ERROR: resolved path missing on disk for `default: {default_key}` "
+            f"({genome}): {url}"
+        )
+    # indexURL: include only if it actually exists. tabix .tbi is the standard
+    # sibling for bgzipped tracks; igv.js falls back gracefully when absent.
+    index_url = None
+    for cand in (url + ".tbi", url + ".csi"):
+        if Path(cand).exists():
+            index_url = cand
+            break
+
+    track: dict = {
+        "display_name": meta["display_name"],
+        "url": url,
+        "format": meta["format"],
+        "displayMode": meta["displayMode"],
+    }
+    if index_url is not None:
+        track["indexURL"] = index_url
+    if meta["color"] is not None:
+        track["color"] = meta["color"]
+    return track
+
+
+def abspath_relative_to(p: str, run_dir: Path) -> str:
+    """Resolve `p` to an absolute path. If `p` is already absolute, return as-is."""
+    pp = Path(p)
+    if pp.is_absolute():
+        return str(pp)
+    return str((run_dir / pp).resolve())
+
+
+def build_annotation_tracks(spec: dict, run_dir: Path, cfg: dict | None = None) -> list[dict]:
+    """Build the annotation-track list. Each entry in `spec["annotation"]`
+    is either:
+
+      Explicit (existing behavior):
+        - name: "Gencode v47"
+          url: /abs/or/relative/path.gff3.gz
+          indexURL: /abs/or/relative/path.gff3.gz.tbi  (optional)
+          format: gff                                  (optional, default bed)
+          displayMode: EXPANDED                        (optional)
+          color: "rgb(...)"                            (optional)
+
+      Shortcut (NEW — needs top-level `genome:` in spec and a loaded `cfg`):
+        - default: gencode    # one of: cgi, gencode, repmasker,
+                              #         epdnew_coding, epdnew_noncoding
+          name: "Gencode v47"  # OPTIONAL override of the canned display name
+          color: "rgb(...)"    # OPTIONAL override of the canned color
+          displayMode: COLLAPSED  # OPTIONAL override
+
+    Shortcut entries are resolved through resolve_annotation_default() against
+    the databases YAML keyed by the spec's top-level `genome:`."""
+    out: list[dict] = []
+    genome = spec.get("genome")
+    for a in spec.get("annotation", []):
+        if "default" in a:
+            if not genome:
+                raise SystemExit(
+                    "ERROR: annotation entry uses `default:` but spec is missing "
+                    "top-level `genome:` — add e.g. `genome: hg38` to the YAML."
+                )
+            resolved = resolve_annotation_default(a["default"], genome, cfg or {})
+            track = {
+                "name": a.get("name", resolved["display_name"]),
+                "url": resolved["url"],
+                "format": a.get("format", resolved["format"]),
+                "type": "annotation",
+                "displayMode": a.get("displayMode", resolved["displayMode"]),
+            }
+            if "indexURL" in resolved:
+                track["indexURL"] = resolved["indexURL"]
+            if a.get("color") or resolved.get("color"):
+                track["color"] = a.get("color", resolved.get("color"))
+            out.append(track)
+            continue
+        # Explicit-path entry — preserves the prior behavior verbatim.
+        track = {
+            "name": a["name"],
+            "url": abspath_relative_to(a["url"], run_dir),
+            "format": a.get("format", "bed"),
+            "type": "annotation",
+            "displayMode": a.get("displayMode", "EXPANDED"),
+        }
+        if a.get("indexURL"):
+            track["indexURL"] = abspath_relative_to(a["indexURL"], run_dir)
+        if a.get("color"):
+            track["color"] = a["color"]
+        out.append(track)
+    return out
+
+
+def build_sample_tracks(spec: dict, run_dir: Path) -> list[dict]:
+    group_colors = spec.get("group_colors", {})
+    out: list[dict] = []
+    for s in spec.get("samples", []):
+        name = s["name"]
+        group = s.get("group", "default")
+        gc = group_colors.get(group, {})
+
+        # BAM (per-read basemod2 view).
+        if s.get("bam"):
+            bam_abs = abspath_relative_to(s["bam"], run_dir)
+            track = {"name": name, "url": bam_abs, "indexURL": bam_abs + ".bai"}
+            track.update(BAM_DEFAULTS)
+            out.append(track)
+
+        # 5mC bedGraph.
+        if s.get("bedgraph_5mC"):
+            track = {
+                "name": f"{name} 5mC",
+                "url": abspath_relative_to(s["bedgraph_5mC"], run_dir),
+            }
+            track.update(BEDGRAPH_DEFAULTS)
+            if gc.get("5mC"):
+                track["color"] = gc["5mC"]
+            out.append(track)
+
+        # 5hmC bedGraph.
+        if s.get("bedgraph_5hmC"):
+            track = {
+                "name": f"{name} 5hmC",
+                "url": abspath_relative_to(s["bedgraph_5hmC"], run_dir),
+            }
+            track.update(BEDGRAPH_DEFAULTS)
+            if gc.get("5hmC"):
+                track["color"] = gc["5hmC"]
+            out.append(track)
+
+    return out
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--spec", required=True, help="YAML spec (see tracks_spec.example.yaml)")
+    ap.add_argument("--run-dir", required=True, help="dir that relative urls in spec are resolved against")
+    ap.add_argument("--out", required=True, help="output tracks.json path")
+    ap.add_argument("--db-config", default=os.environ.get("IGV_REPORTS_DB_CONFIG"), help=(
+        "Databases YAML used to resolve `annotation: - default: <key>` shortcuts "
+        "(cgi/gencode/repmasker/epdnew_coding/epdnew_noncoding) for the spec's "
+        "`genome:`. Defaults to $IGV_REPORTS_DB_CONFIG. "
+        "Not loaded if no shortcut entries appear. The YAML schema is "
+        "`reference_genomes.local.<genome>.{CpGIslands,gtf,repMaskerBed,"
+        "EPDnewCoding,EPDnewNonCoding}` — see references/databases_config_paths.md."
+    ))
+    ap.add_argument("--force", action="store_true",
+                    help="overwrite --out if it already exists (default: refuse and exit 2 so hand-edits aren't clobbered)")
+    args = ap.parse_args()
+
+    spec_path = Path(args.spec)
+    if not spec_path.exists():
+        raise SystemExit(f"ERROR: spec not found: {spec_path}")
+    run_dir = Path(args.run_dir).resolve()
+    if not run_dir.exists():
+        raise SystemExit(f"ERROR: run-dir not found: {run_dir}")
+
+    with spec_path.open() as fh:
+        spec = yaml.safe_load(fh)
+
+    # Only load the db-config if any annotation entry uses the shortcut form;
+    # specs that hand-paste paths remain self-contained.
+    needs_cfg = any("default" in a for a in spec.get("annotation", []))
+    if needs_cfg:
+        if not args.db_config:
+            raise SystemExit(
+                "ERROR: spec has `default:` annotation shortcuts but --db-config "
+                "is not set and $IGV_REPORTS_DB_CONFIG is empty.\n"
+                "       Pass --db-config /path/to/databases.yaml, or convert the "
+                "shortcuts to explicit `url:` entries."
+            )
+        cfg = load_db_config(Path(args.db_config))
+    else:
+        cfg = {}
+
+    tracks = build_annotation_tracks(spec, run_dir, cfg) + build_sample_tracks(spec, run_dir)
+
+    out_path = Path(args.out)
+    if out_path.exists() and not args.force:
+        raise SystemExit(
+            f"ERROR: {out_path} already exists. A user may have hand-edited it after generation.\n"
+            "       Pass --force to overwrite, or move the existing file aside and rerun."
+        )
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    with out_path.open("w") as fh:
+        json.dump(tracks, fh, indent=2)
+        fh.write("\n")
+
+    print(f"Wrote {len(tracks)} tracks to {out_path}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/igv-reports/scripts/prep_track.sh b/igv-reports/scripts/prep_track.sh
new file mode 100755
index 0000000..42b2b01
--- /dev/null
+++ b/igv-reports/scripts/prep_track.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+# prep_track.sh — convert a plain-gzip GFF3/GTF/BED.gz into a properly
+# bgzipped + tabix-indexed track that igv-reports can load.
+#
+# Author: Samuel Ahuno
+# Why: igv-reports parses tracks by extension and needs bgzip+tabix.
+# Plain gzip with `.gz` extension trips it with a UnicodeDecodeError or
+# silently fails. Tabix indexing additionally requires position-sorted
+# records within each chromosome, which gencode/many-other distributions
+# do not guarantee — they interleave records by feature type.
+#
+# Pipeline: backup -> gunzip -> sort by chr+pos (preserving header) ->
+# bgzip in place -> tabix -p <gff|gtf|bed>.
+#
+# Usage:
+#   prep_track.sh <track.gff3.gz | track.gtf.gz | track.bed.gz>
+#   prep_track.sh <input.gz> --out <sibling.bgz.gz>
+#
+# In-place mode (default):
+#   <input>                              (replaced with new bgzip)
+#   <input>.tbi                          (new tabix index)
+#   <input>.bak.original_gzip            (backup of the original .gz)
+#
+# Sibling mode (--out PATH; non-destructive):
+#   <input>                              (unchanged)
+#   <out>                                (new bgzip — same extension family as input)
+#   <out>.tbi                            (new tabix index)
+#   (no backup created — original is left as-is)
+
+set -euo pipefail
+
+INPUT=""
+OUT=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --out)
+            [[ $# -lt 2 ]] && { echo "ERROR: --out requires a path" >&2; exit 2; }
+            OUT=$2; shift 2 ;;
+        --out=*)
+            OUT=${1#--out=}; shift ;;
+        -h|--help)
+            sed -n '2,28p' "$0" >&2; exit 0 ;;
+        --)
+            shift; break ;;
+        -*)
+            echo "ERROR: unknown flag: $1" >&2; exit 2 ;;
+        *)
+            if [[ -z "$INPUT" ]]; then INPUT=$1
+            else echo "ERROR: unexpected positional arg: $1" >&2; exit 2
+            fi
+            shift ;;
+    esac
+done
+
+if [[ -z "$INPUT" ]]; then
+    echo "Usage: $0 <track.gff3.gz | track.gtf.gz | track.bed.gz> [--out <sibling.gz>]" >&2
+    exit 2
+fi
+if [[ ! -f "$INPUT" ]]; then
+    echo "ERROR: file not found: $INPUT" >&2
+    exit 2
+fi
+if [[ -n "$OUT" && -e "$OUT" ]]; then
+    echo "ERROR: --out target already exists: $OUT — refusing to overwrite. Move it aside and rerun." >&2
+    exit 2
+fi
+
+# Detect format by suffix.
+case "$INPUT" in
+    *.gff3.gz|*.gff.gz)  FMT=gff ;;
+    *.gtf.gz)            FMT=gff ;;   # tabix preset for GTF is named "gff"
+    *.bed.gz|*.bedgraph.gz) FMT=bed ;;
+    *) echo "ERROR: unsupported extension: $INPUT (need .gff3.gz, .gtf.gz, .bed.gz, .bedgraph.gz)" >&2; exit 2 ;;
+esac
+
+# Need bgzip / tabix / sort / gunzip.
+for tool in bgzip tabix sort gunzip awk file; do
+    if ! command -v "$tool" >/dev/null 2>&1; then
+        echo "ERROR: $tool not on PATH. Install htslib and add bgzip/tabix to PATH first." >&2
+        exit 2
+    fi
+done
+
+# Resolve where the final bgzip + .tbi will land. In sibling mode we never
+# touch the original. In in-place mode the target IS the original, with a
+# backup taken first.
+if [[ -n "$OUT" ]]; then
+    TARGET=$OUT
+    mkdir -p "$(dirname "$TARGET")"
+else
+    TARGET=$INPUT
+fi
+
+# Detect if already bgzip — skip the whole conversion if it is and just
+# rebuild the index. (In sibling mode this means: copy + index, leaving
+# the original untouched.)
+if file "$INPUT" | grep -q "extra field"; then
+    if [[ "$TARGET" != "$INPUT" ]]; then
+        cp -p "$INPUT" "$TARGET"
+        echo "[$(date '+%F %T')] $INPUT already bgzip; copied to $TARGET, rebuilding tabix index."
+    else
+        echo "[$(date '+%F %T')] $INPUT is already bgzip; rebuilding tabix index only."
+    fi
+    rm -f "${TARGET}.tbi"
+    tabix -p "$FMT" "$TARGET"
+    echo "[$(date '+%F %T')] DONE: ${TARGET}.tbi"
+    exit 0
+fi
+
+# In-place mode: take a backup of the original. In sibling mode no backup is
+# needed since the original is never modified.
+if [[ "$TARGET" == "$INPUT" ]]; then
+    BACKUP="${INPUT}.bak.original_gzip"
+    if [[ -f "$BACKUP" ]]; then
+        echo "[$(date '+%F %T')] backup already exists: $BACKUP — refusing to overwrite. Move it aside and rerun if you want a fresh backup."
+    else
+        cp -p "$INPUT" "$BACKUP"
+        echo "[$(date '+%F %T')] backed up to $BACKUP"
+    fi
+fi
+
+# Decompress to a sibling-of-INPUT temp (always, regardless of target).
+TMP="${INPUT%.gz}.unsorted.tmp"
+SORTED="${INPUT%.gz}.sorted.tmp"
+gunzip -c "$INPUT" > "$TMP"
+echo "[$(date '+%F %T')] decompressed to $TMP ($(stat -c %s "$TMP") bytes)"
+
+# Sort: preserve any leading # header lines, sort body by chr (column 1)
+# then numeric pos (column 4 for GFF/GTF; column 2 for BED).
+case "$FMT" in
+    gff)  POS_COL=4 ;;
+    bed)  POS_COL=2 ;;
+esac
+
+(grep '^#' "$TMP" || true) > "$SORTED"
+grep -v '^#' "$TMP" \
+    | sort -k1,1 -k${POS_COL},${POS_COL}n -S 2G --parallel=4 \
+    >> "$SORTED"
+echo "[$(date '+%F %T')] sorted by chr,pos (col $POS_COL) into $SORTED"
+
+# bgzip and index. Sibling mode: SORTED -> TARGET. In-place: SORTED -> TARGET (== INPUT).
+TARGET_UNCOMPRESSED="${TARGET%.gz}"
+mv "$SORTED" "$TARGET_UNCOMPRESSED"
+rm -f "$TMP"
+# Remove any pre-existing .gz at the target (in in-place mode the original
+# plain-gzip file is still present; bgzip refuses to overwrite without -f).
+rm -f "$TARGET"
+bgzip -@ 4 "$TARGET_UNCOMPRESSED"
+echo "[$(date '+%F %T')] bgzipped: $TARGET ($(stat -c %s "$TARGET") bytes)"
+
+rm -f "${TARGET}.tbi"
+tabix -p "$FMT" "$TARGET"
+echo "[$(date '+%F %T')] indexed: ${TARGET}.tbi ($(stat -c %s "${TARGET}.tbi") bytes)"
+
+# Sanity check: pull the first contig's first 100 kb and confirm tabix returns rows.
+FIRST_CONTIG=$(zcat "$TARGET" | awk '$1!~/^#/ {print $1; exit}')
+if [[ -n "$FIRST_CONTIG" ]]; then
+    N=$(tabix "$TARGET" "${FIRST_CONTIG}:1-100000" | wc -l)
+    echo "[$(date '+%F %T')] sanity: ${FIRST_CONTIG}:1-100000 returns $N row(s)"
+fi
+
+if [[ "$TARGET" == "$INPUT" ]]; then
+    echo "[$(date '+%F %T')] DONE — track ready for igv-reports. Original preserved at $BACKUP"
+else
+    echo "[$(date '+%F %T')] DONE — sibling track ready at $TARGET. Original $INPUT untouched."
+fi
diff --git a/igv-reports/scripts/verify_anchors.py b/igv-reports/scripts/verify_anchors.py
new file mode 100755
index 0000000..d11efc8
--- /dev/null
+++ b/igv-reports/scripts/verify_anchors.py
@@ -0,0 +1,822 @@
+#!/usr/bin/env python3
+"""verify_anchors.py — content verifier for create_report HTMLs.
+
+Author: Samuel Ahuno
+Purpose:
+  The structural verifier (verify_report / verify_cohort) confirms the HTML
+  *says* the right thing: region count, coords, track names. It cannot
+  confirm the embedded BAM slices actually contain the data they claim to.
+  Failure modes it misses:
+
+    1. Sample swap — track name says `p17424_1.sorted` but the slice was
+       cut from `p17424_3.sorted.bam` (cohort loop wired the wrong path;
+       Path.stem matched and the structural check passed).
+    2. Silent empty slice — region rendered, slice is 0 reads (failed
+       index, BAM corruption, coords outside coverage).
+    3. Regression across create_report versions — flanking/slicing logic
+       changes silently between releases.
+
+  This verifier closes the gap by re-running `samtools view -c` against
+  both the source BAM (at generate time) and the embedded slice (at
+  verify time), then comparing counts.
+
+  Anchor TSV format (`#`-prefixed header, lab BED-output convention):
+
+    #sample	track_name	chrom	start	end	expected	tolerance	min	max	notes
+
+  - `tolerance` and `min`/`max` are mutually exclusive per row; if `min`
+    or `max` is non-empty it wins. Blank tolerance falls back to
+    --tolerance flag default (0.05).
+  - `expected` is the count from `samtools view -c -F 1536 source.bam
+    chrom:start-end` at generate time. Generate writes it; verify reads it.
+
+Subcommands:
+  generate      — walk (sample × region) grid, count reads from source BAMs,
+                  write an anchors.tsv that becomes a regression fixture.
+  verify        — given one HTML + anchors.tsv, decode each anchor's BAM
+                  slice and count it, compare to expected.
+  verify-cohort — apply `verify` across all HTMLs in a cohort.
+
+Container resolution (samtools):
+  1. --samtools-sif PATH
+  2. $SAMTOOLS_SIF_DEFAULT env var
+  3. `samtools` on PATH
+  4. Hard error
+
+Typical use:
+  # at build time, freeze the regression fixture
+  python verify_anchors.py generate \\
+      --samplesheet sheet.tsv \\
+      --sites sites.hg38.bed \\
+      --out anchors.hg38.tsv
+
+  # any time after, audit a built HTML
+  python verify_anchors.py verify \\
+      --html report.hg38.html \\
+      --anchors anchors.hg38.tsv \\
+      --out verify_anchors.tsv \\
+      --fail-on-fail
+
+  # cohort-wide
+  python verify_anchors.py verify-cohort \\
+      --samplesheet sheet.tsv \\
+      --reports-dir results/<run>/reports/ \\
+      --genome hg38 \\
+      --anchors anchors.hg38.tsv \\
+      --out cohort_verify_anchors.tsv
+"""
+
+from __future__ import annotations
+
+import argparse
+import base64
+import dataclasses
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+# Same-dir imports — reuse verify_report's HTML parser helpers.
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import verify_report as vr
+
+
+_SAMTOOLS_SIF_ENV = os.environ.get("SAMTOOLS_SIF_DEFAULT")
+DEFAULT_SAMTOOLS_SIF = Path(_SAMTOOLS_SIF_ENV) if _SAMTOOLS_SIF_ENV else None
+
+
+def _apptainer_bind_args() -> list[str]:
+    """Conditional `--bind <path>` tokens, matching build_igvreports.py.
+
+    Source: $IGV_REPORTS_BIND (colon-separated). Empty / unset = no binds.
+    Paths that don't exist are silently skipped."""
+    raw = os.environ.get("IGV_REPORTS_BIND", "")
+    if not raw:
+        return []
+    tokens: list[str] = []
+    for p in raw.split(":"):
+        if p and Path(p).exists():
+            tokens.extend(["--bind", p])
+    return tokens
+# Match igv-reports BamReader default exclude flag (rules out PCR/optical
+# duplicates and supplementary alignments — see igv_reports/bam.py).
+EXCLUDE_FLAGS = "1536"
+DEFAULT_TOLERANCE = 0.05
+ANCHOR_HEADER = [
+    "sample", "track_name", "track_type", "chrom", "start", "end",
+    "expected", "tolerance", "min", "max", "notes",
+]
+# Supported track_type values. `bam` = samtools-view read count;
+# `bedgraph` = data row count in the wig/bedGraph slice (CpG count for
+# methylation, peak count for ChIP coverage, etc.).
+VALID_TRACK_TYPES = {"bam", "bedgraph"}
+
+
+@dataclasses.dataclass
+class AnchorRow:
+    sample: str
+    track_name: str
+    chrom: str
+    start: int
+    end: int
+    expected: int
+    track_type: str = "bam"   # bam | bedgraph; bam keeps backwards compat
+    tolerance: str = ""       # blank => fall back to --tolerance flag
+    min_count: str = ""       # blank => not used
+    max_count: str = ""       # blank => not used
+    notes: str = ""
+
+    @property
+    def region(self) -> str:
+        return f"{self.chrom}:{self.start}-{self.end}"
+
+
+@dataclasses.dataclass
+class AnchorCheck:
+    sample: str
+    track_name: str
+    region: str
+    status: str           # PASS | FAIL | SKIP
+    observed: str = ""
+    expected: str = ""
+    details: str = ""
+
+
+# ---------------------------------------------------------------------------
+# samtools resolution
+# ---------------------------------------------------------------------------
+
+def resolve_samtools(sif: Path | None) -> list[str]:
+    """Return a samtools command prefix (list of argv tokens).
+
+    Priority: --samtools-sif → $SAMTOOLS_SIF → $SAMTOOLS_SIF_DEFAULT → PATH.
+    Falling back to PATH emits a warning (SIF preferred on shared HPC
+    storage where conda envs on NFS pay a cold-start cost)."""
+    candidate = sif
+    if candidate is None:
+        env = os.environ.get("SAMTOOLS_SIF")
+        if env:
+            candidate = Path(env)
+    if candidate is None and DEFAULT_SAMTOOLS_SIF is not None and DEFAULT_SAMTOOLS_SIF.exists():
+        candidate = DEFAULT_SAMTOOLS_SIF
+    if candidate is not None:
+        if not candidate.exists():
+            raise SystemExit(f"ERROR: samtools SIF not found: {candidate}")
+        return [
+            "singularity", "exec", "--cleanenv", *_apptainer_bind_args(),
+            str(candidate), "samtools",
+        ]
+    path_sam = shutil.which("samtools")
+    if path_sam:
+        sys.stderr.write(
+            f"[verify_anchors] WARNING: falling back to PATH samtools at {path_sam}; "
+            "SIF preferred for HPC cold-start cost (rules/apptainer_vs_conda.md)\n"
+        )
+        return [path_sam]
+    raise SystemExit(
+        "ERROR: no samtools found. Provide --samtools-sif, set $SAMTOOLS_SIF, "
+        "or install samtools on PATH."
+    )
+
+
+def samtools_count(samtools_cmd: list[str], bam: Path, region: str) -> int:
+    """Run `samtools view -c -F 1536 <bam> <region>` and return the count."""
+    proc = subprocess.run(
+        samtools_cmd + ["view", "-c", "-F", EXCLUDE_FLAGS, str(bam), region],
+        capture_output=True, text=True,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(
+            f"samtools view -c failed (exit {proc.returncode}) for {bam} {region}: "
+            f"{proc.stderr.strip()}"
+        )
+    return int(proc.stdout.strip())
+
+
+def samtools_index(samtools_cmd: list[str], bam: Path) -> None:
+    """Run `samtools index <bam>`."""
+    proc = subprocess.run(
+        samtools_cmd + ["index", str(bam)],
+        capture_output=True, text=True,
+    )
+    if proc.returncode != 0:
+        raise RuntimeError(
+            f"samtools index failed (exit {proc.returncode}) for {bam}: "
+            f"{proc.stderr.strip()}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# bedGraph / wig counting
+# ---------------------------------------------------------------------------
+
+# A line in a wig/bedGraph file is either a header (track/fixedStep/
+# variableStep/browser/#) or a data row. We count data rows only.
+_WIG_HEADER_PREFIXES = ("track", "browser", "fixedStep", "variableStep", "#")
+
+
+def _is_wig_data_line(line: str) -> bool:
+    """True iff `line` is a non-empty wig/bedGraph data row (not header,
+    not comment, not blank)."""
+    s = line.strip()
+    if not s:
+        return False
+    if s.startswith(_WIG_HEADER_PREFIXES):
+        return False
+    return True
+
+
+def bedgraph_count_source(track_path: Path, chrom: str, start: int, end: int) -> int:
+    """Count data rows in `track_path` (bedGraph or wig) overlapping the
+    region [start, end) on `chrom`.
+
+    Handles three input shapes:
+      - bgzip+tabix indexed (`.bg.gz` / `.bedgraph.gz` + sibling `.tbi`):
+        delegate to `tabix` for O(log N) lookup.
+      - Plain gzip: stream-decompress, linear scan filtering on chrom + overlap.
+      - Plain text: linear scan filtering on chrom + overlap.
+
+    Overlap rule matches IGV/igv-reports: row [r_start, r_end) overlaps
+    query [q_start, q_end) iff r_start < q_end AND r_end > q_start.
+
+    Raises FileNotFoundError if `track_path` is absent. Returns 0 for a
+    region with no overlapping rows."""
+    import gzip
+    if not track_path.exists():
+        raise FileNotFoundError(f"bedGraph track not found: {track_path}")
+
+    tbi = track_path.with_suffix(track_path.suffix + ".tbi")
+    if track_path.suffix == ".gz" and tbi.exists() and shutil.which("tabix"):
+        # Fast path — tabix-indexed bgzip. Tabix already handles overlap and
+        # comment-line skipping; we count the lines it emits.
+        proc = subprocess.run(
+            ["tabix", str(track_path), f"{chrom}:{start}-{end}"],
+            capture_output=True, text=True,
+        )
+        if proc.returncode != 0:
+            raise RuntimeError(
+                f"tabix failed for {track_path} {chrom}:{start}-{end}: {proc.stderr.strip()}"
+            )
+        return sum(1 for ln in proc.stdout.splitlines() if _is_wig_data_line(ln))
+
+    # Slow path — linear scan. Open with gzip if .gz, else text. Filter on
+    # chrom first (cheap) before parsing positions.
+    opener = gzip.open if track_path.suffix == ".gz" else open
+    count = 0
+    with opener(track_path, "rt") as fh:
+        for line in fh:
+            if not _is_wig_data_line(line):
+                continue
+            cols = line.rstrip("\n").split("\t")
+            if len(cols) < 3:
+                continue
+            if cols[0] != chrom:
+                continue
+            try:
+                r_start = int(cols[1])
+                r_end = int(cols[2])
+            except ValueError:
+                continue
+            if r_start < end and r_end > start:
+                count += 1
+    return count
+
+
+def bedgraph_count_slice(slice_bytes: bytes) -> int:
+    """Count data rows in a wig/bedGraph slice that was extracted from an
+    igv-reports HTML via decode_track_slice().
+
+    igv-reports stores wig slices as `data:application/gzip;base64,<...>`
+    where the decoded bytes are gzipped wig text (per
+    igv_reports/datauri.py:get_data_uri). We gunzip in-memory and count
+    data rows."""
+    import gzip
+    try:
+        text = gzip.decompress(slice_bytes).decode("utf-8", errors="replace")
+    except (OSError, gzip.BadGzipFile):
+        # Some create_report versions write the wig slice uncompressed for
+        # small payloads. Fall back to raw bytes interpreted as text.
+        text = slice_bytes.decode("utf-8", errors="replace")
+    return sum(1 for ln in text.splitlines() if _is_wig_data_line(ln))
+
+
+# ---------------------------------------------------------------------------
+# anchors.tsv I/O
+# ---------------------------------------------------------------------------
+
+def write_anchors(anchors: list[AnchorRow], out: Path) -> None:
+    out.parent.mkdir(parents=True, exist_ok=True)
+    lines = ["#" + "\t".join(ANCHOR_HEADER)]
+    for a in anchors:
+        lines.append("\t".join([
+            a.sample, a.track_name, a.track_type,
+            a.chrom, str(a.start), str(a.end),
+            str(a.expected), a.tolerance, a.min_count, a.max_count, a.notes,
+        ]))
+    out.write_text("\n".join(lines) + "\n")
+
+
+def load_anchors(path: Path) -> list[AnchorRow]:
+    if not path.exists():
+        raise SystemExit(f"ERROR: anchors TSV not found: {path}")
+    rows: list[AnchorRow] = []
+    with path.open() as fh:
+        header: list[str] | None = None
+        for i, line in enumerate(fh, start=1):
+            line = line.rstrip("\n")
+            if not line:
+                continue
+            if line.startswith("#"):
+                if header is None:
+                    header = line.lstrip("#").split("\t")
+                continue
+            if header is None:
+                raise SystemExit(f"{path}:{i}: data row before header — anchors TSV needs a `#`-prefixed header")
+            cols = line.split("\t")
+            if len(cols) < len(header):
+                cols += [""] * (len(header) - len(cols))
+            d = dict(zip(header, cols))
+            try:
+                # Validate numeric optional fields at load time so a mis-tabbed
+                # row fails here, not deep inside decide_status() with a
+                # confusing 'could not convert' on the notes value.
+                tolerance = (d.get("tolerance", "") or "").strip()
+                if tolerance:
+                    float(tolerance)
+                min_count = (d.get("min", "") or "").strip()
+                if min_count:
+                    int(min_count)
+                max_count = (d.get("max", "") or "").strip()
+                if max_count:
+                    int(max_count)
+                # track_type was added 2026-05-19; older anchor files
+                # without the column default to "bam" so they keep working.
+                track_type = (d.get("track_type", "") or "bam").strip() or "bam"
+                if track_type not in VALID_TRACK_TYPES:
+                    raise ValueError(
+                        f"unknown track_type '{track_type}' (valid: {sorted(VALID_TRACK_TYPES)})"
+                    )
+                rows.append(AnchorRow(
+                    sample=d["sample"],
+                    track_name=d["track_name"],
+                    track_type=track_type,
+                    chrom=d["chrom"],
+                    start=int(d["start"]),
+                    end=int(d["end"]),
+                    expected=int(d["expected"]),
+                    tolerance=tolerance,
+                    min_count=min_count,
+                    max_count=max_count,
+                    notes=d.get("notes", "") or "",
+                ))
+            except (KeyError, ValueError) as e:
+                raise SystemExit(
+                    f"{path}:{i}: malformed anchor row: {e}\n"
+                    f"  row was: {cols!r}\n"
+                    f"  expected columns: {ANCHOR_HEADER}\n"
+                    f"  hint: TSV reader requires explicit tab separation — "
+                    "if you generate the row with awk, pass `-F'\\t'`."
+                )
+    return rows
+
+
+# ---------------------------------------------------------------------------
+# samplesheet → (sample, track_path) iteration (shared with build_igvreports)
+# ---------------------------------------------------------------------------
+
+def parse_samplesheet(path: Path) -> list[dict]:
+    """Mirror build_igvreports.parse_samplesheet without importing it (avoids
+    pulling in PyYAML for code paths that don't need it)."""
+    rows: list[dict] = []
+    with path.open() as fh:
+        header = fh.readline().lstrip("#").rstrip("\n").split("\t")
+        for ln in fh:
+            cols = ln.rstrip("\n").split("\t")
+            if not cols or not cols[0].strip():
+                continue
+            rows.append(dict(zip(header, cols)))
+    return rows
+
+
+def sample_bam_paths(row: dict) -> list[tuple[str, Path]]:
+    """Return [(track_name, bam_path), ...] for the BAM columns in a row.
+    track_name = Path.stem (matches igv-reports' positional auto-naming —
+    see verify_report.expected_track_labels)."""
+    out: list[tuple[str, Path]] = []
+    for col in ("bam_tumor", "bam_normal"):
+        v = row.get(col)
+        if v and v.strip():
+            p = Path(v.strip())
+            out.append((p.stem, p))
+    extras = row.get("extra_tracks") or ""
+    for entry in extras.split(","):
+        entry = entry.strip()
+        if entry.endswith(".bam") or entry.endswith(".cram"):
+            p = Path(entry)
+            out.append((p.stem, p))
+    return out
+
+
+# wig/bedGraph extensions that we count rows for. .wig included because
+# igv-reports treats both as "wig" format under the hood (tracks.py:60-61).
+_BEDGRAPH_EXTS = (".bedgraph", ".bedgraph.gz", ".bg", ".bg.gz", ".wig", ".wig.gz")
+
+
+def _is_bedgraph(path: str) -> bool:
+    p = path.lower()
+    return any(p.endswith(ext) for ext in _BEDGRAPH_EXTS)
+
+
+def sample_bedgraph_paths(row: dict) -> list[tuple[str, Path]]:
+    """Return [(track_name, bedgraph_path), ...] for the bedGraph/wig
+    entries in a row's `extra_tracks` (comma-separated, mirrors the
+    build_igvreports samplesheet schema). track_name = Path.stem after
+    stripping a trailing `.gz` — matches igv-reports' positional auto-
+    naming (a `foo.bedgraph.gz` becomes `foo.bedgraph` in the track table,
+    then the verifier's structural check strips the format suffix; we
+    keep the format suffix here so the anchor row pairs unambiguously
+    with the source file)."""
+    out: list[tuple[str, Path]] = []
+    extras = row.get("extra_tracks") or ""
+    for entry in extras.split(","):
+        entry = entry.strip()
+        if not _is_bedgraph(entry):
+            continue
+        p = Path(entry)
+        stem = p.stem
+        if stem.endswith(".bedgraph") or stem.endswith(".wig") or stem.endswith(".bg"):
+            # foo.bedgraph.gz -> Path.stem = 'foo.bedgraph'; strip one
+            # more level so the track_name matches what igv-reports renders.
+            stem = stem.rsplit(".", 1)[0]
+        out.append((stem, p))
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Slice extraction from embedded session
+# ---------------------------------------------------------------------------
+
+_DATA_URL_RE = re.compile(r"data:[^;]+;base64,(.+)", flags=re.DOTALL)
+
+
+def decode_track_slice(track_url: str, dest: Path) -> Path:
+    """Decode a track's `data:...;base64,...` URL, write bytes to `dest`.
+
+    Per igv_reports/datauri.py: BAM slices come back from pysam.view as
+    bytes starting with BGZF magic (0x1f 0x8b), so igv-reports tags them
+    as `data:application/gzip;base64,...`. We accept any data: URL with a
+    base64 payload — the bytes are what matters, not the declared mediatype.
+    """
+    m = _DATA_URL_RE.match(track_url)
+    if not m:
+        raise ValueError("track url is not a data: base64 URL")
+    raw = base64.b64decode(m.group(1))
+    dest.write_bytes(raw)
+    return dest
+
+
+def locate_session_entry(
+    session_dict: dict, table_json: dict, chrom: str, start: int, end: int,
+) -> tuple[str, dict | None, str]:
+    """Locate the session entry for an anchor's (chrom, start+1, end).
+
+    Returns (outcome, session_or_none, detail) where outcome is one of:
+      'absent'   — no tableJson row matches this region → caller should SKIP
+                   (anchor lists a region the HTML never rendered)
+      'broken'   — row matched but session missing/undecodable → caller FAILs
+                   (structural inconsistency or HTML corruption)
+      'ok'       — session decoded; second element is the dict
+    HTML stores 1-based start (per verify_report comment); BED is 0-based.
+    """
+    headers = table_json.get("headers", [])
+    try:
+        col_chrom = headers.index("Chrom")
+        col_start = headers.index("Start")
+        col_end = headers.index("End")
+    except ValueError as e:
+        return ("broken", None, f"tableJson missing expected column: {e}")
+    rows = table_json.get("rows", [])
+    want = (chrom, start + 1, end)
+    for idx, row in enumerate(rows):
+        if (row[col_chrom], int(row[col_start]), int(row[col_end])) == want:
+            data_url = session_dict.get(str(idx))
+            if data_url is None:
+                return ("broken", None, f"sessionDictionary has no entry for row index {idx}")
+            session = vr.decode_session_entry(data_url)
+            if session is None:
+                return ("broken", None, f"session entry {idx} failed to gunzip/decode")
+            return ("ok", session, "")
+    return ("absent", None, f"no tableJson row matched ({chrom}, {start+1}, {end})")
+
+
+def find_track(session: dict, track_name: str) -> dict | None:
+    for t in session.get("tracks", []):
+        if t.get("name") == track_name:
+            return t
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Status decision
+# ---------------------------------------------------------------------------
+
+def decide_status(anchor: AnchorRow, observed: int, default_tol: float) -> tuple[str, str]:
+    """Return (status, details). min/max wins over tolerance when present."""
+    if anchor.min_count or anchor.max_count:
+        bounds_ok = True
+        bits = []
+        if anchor.min_count:
+            ok = observed >= int(anchor.min_count)
+            bits.append(f"min={anchor.min_count} {'OK' if ok else 'FAIL'}")
+            bounds_ok = bounds_ok and ok
+        if anchor.max_count:
+            ok = observed <= int(anchor.max_count)
+            bits.append(f"max={anchor.max_count} {'OK' if ok else 'FAIL'}")
+            bounds_ok = bounds_ok and ok
+        return ("PASS" if bounds_ok else "FAIL"), "; ".join(bits)
+    tol = float(anchor.tolerance) if anchor.tolerance else default_tol
+    if anchor.expected == 0:
+        ok = observed == 0
+        return ("PASS" if ok else "FAIL"), f"expected=0, observed={observed}"
+    diff_ratio = abs(observed - anchor.expected) / anchor.expected
+    ok = diff_ratio <= tol
+    return ("PASS" if ok else "FAIL"), f"diff_ratio={diff_ratio:.3f} (tol={tol:.3f})"
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: generate
+# ---------------------------------------------------------------------------
+
+def cmd_generate(args: argparse.Namespace) -> None:
+    samtools_cmd = resolve_samtools(Path(args.samtools_sif) if args.samtools_sif else None)
+    rows = parse_samplesheet(Path(args.samplesheet))
+    bed_rows = vr.load_sites_bed(Path(args.sites))
+    if not rows:
+        raise SystemExit("ERROR: samplesheet has no data rows")
+    if not bed_rows:
+        raise SystemExit("ERROR: sites BED has no data rows")
+
+    anchors: list[AnchorRow] = []
+    for row in rows:
+        sample = row["sample"]
+        bams = sample_bam_paths(row)
+        bgs = sample_bedgraph_paths(row)
+        if not bams and not bgs:
+            sys.stderr.write(f"[generate] {sample}: no BAM or bedGraph tracks in row — skipping\n")
+            continue
+        # BAM anchors (read count via samtools).
+        for track_name, bam in bams:
+            if not bam.exists():
+                sys.stderr.write(f"[generate] {sample}/{track_name}: BAM missing: {bam}\n")
+                continue
+            for b in bed_rows:
+                region = f"{b['chrom']}:{b['start']}-{b['end']}"
+                try:
+                    count = samtools_count(samtools_cmd, bam, region)
+                except RuntimeError as e:
+                    sys.stderr.write(f"[generate] {sample}/{track_name} {region}: {e}\n")
+                    continue
+                anchors.append(AnchorRow(
+                    sample=sample, track_name=track_name, track_type="bam",
+                    chrom=b["chrom"], start=b["start"], end=b["end"],
+                    expected=count, notes=b["name"] or "",
+                ))
+                sys.stderr.write(f"[generate] {sample}/{track_name} {region}: {count} reads\n")
+        # bedGraph / wig anchors (data row count = CpG count for methylation).
+        for track_name, bg in bgs:
+            if not bg.exists():
+                sys.stderr.write(f"[generate] {sample}/{track_name}: bedGraph missing: {bg}\n")
+                continue
+            for b in bed_rows:
+                try:
+                    count = bedgraph_count_source(bg, b["chrom"], b["start"], b["end"])
+                except (FileNotFoundError, RuntimeError) as e:
+                    sys.stderr.write(f"[generate] {sample}/{track_name} {b['chrom']}:{b['start']}-{b['end']}: {e}\n")
+                    continue
+                anchors.append(AnchorRow(
+                    sample=sample, track_name=track_name, track_type="bedgraph",
+                    chrom=b["chrom"], start=b["start"], end=b["end"],
+                    expected=count, notes=b["name"] or "",
+                ))
+                sys.stderr.write(f"[generate] {sample}/{track_name} {b['chrom']}:{b['start']}-{b['end']}: {count} rows\n")
+
+    out = Path(args.out)
+    write_anchors(anchors, out)
+    sys.stderr.write(f"[generate] wrote {len(anchors)} anchors -> {out}\n")
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: verify (single HTML)
+# ---------------------------------------------------------------------------
+
+def verify_one_html(
+    html_path: Path, anchors: list[AnchorRow], samtools_cmd: list[str],
+    default_tol: float,
+) -> list[AnchorCheck]:
+    """Verify all anchors against one HTML. Anchors whose track_name doesn't
+    appear in the HTML are SKIPped (cohort verify-cohort filters by sample,
+    so this function trusts the caller passed the right anchor subset)."""
+    checks: list[AnchorCheck] = []
+    if not html_path.is_file():
+        for a in anchors:
+            checks.append(AnchorCheck(
+                a.sample, a.track_name, a.region, "SKIP",
+                details=f"HTML missing: {html_path}",
+            ))
+        return checks
+    html_text = html_path.read_text()
+    table_json = vr.parse_table_json(html_text)
+    session_dict = vr.parse_session_dictionary(html_text)
+    if table_json is None or session_dict is None:
+        for a in anchors:
+            checks.append(AnchorCheck(
+                a.sample, a.track_name, a.region, "FAIL",
+                details="tableJson or sessionDictionary missing from HTML",
+            ))
+        return checks
+    with tempfile.TemporaryDirectory(prefix="verify_anchors_") as td:
+        tmp = Path(td)
+        for a in anchors:
+            outcome, session, locate_detail = locate_session_entry(
+                session_dict, table_json, a.chrom, a.start, a.end,
+            )
+            if outcome == "absent":
+                checks.append(AnchorCheck(
+                    a.sample, a.track_name, a.region, "SKIP",
+                    details=locate_detail,
+                ))
+                continue
+            if outcome == "broken":
+                checks.append(AnchorCheck(
+                    a.sample, a.track_name, a.region, "FAIL",
+                    expected=str(a.expected),
+                    details=locate_detail,
+                ))
+                continue
+            assert session is not None  # outcome == "ok"
+            track = find_track(session, a.track_name)
+            if track is None:
+                checks.append(AnchorCheck(
+                    a.sample, a.track_name, a.region, "SKIP",
+                    details=f"track '{a.track_name}' not in HTML session",
+                ))
+                continue
+            url = track.get("url", "")
+            if a.track_type == "bedgraph":
+                # wig/bedGraph slices are gzip(text) base64-encoded by
+                # igv_reports/datauri.py. Count data rows in the embedded
+                # slice; no samtools needed.
+                try:
+                    m = _DATA_URL_RE.match(url)
+                    if not m:
+                        raise ValueError("track url is not a data: base64 URL")
+                    raw = base64.b64decode(m.group(1))
+                    observed = bedgraph_count_slice(raw)
+                except (ValueError, RuntimeError) as e:
+                    checks.append(AnchorCheck(
+                        a.sample, a.track_name, a.region, "FAIL",
+                        expected=str(a.expected),
+                        details=f"bedGraph slice decode/count failed: {e}",
+                    ))
+                    continue
+            else:
+                # BAM (default).
+                slice_path = tmp / f"{a.sample}__{a.track_name}__{a.chrom}_{a.start}_{a.end}.bam"
+                try:
+                    decode_track_slice(url, slice_path)
+                    samtools_index(samtools_cmd, slice_path)
+                    observed = samtools_count(samtools_cmd, slice_path, a.region)
+                except (ValueError, RuntimeError) as e:
+                    checks.append(AnchorCheck(
+                        a.sample, a.track_name, a.region, "FAIL",
+                        expected=str(a.expected),
+                        details=f"slice decode/count failed: {e}",
+                    ))
+                    continue
+            status, details = decide_status(a, observed, default_tol)
+            checks.append(AnchorCheck(
+                a.sample, a.track_name, a.region, status,
+                observed=str(observed), expected=str(a.expected),
+                details=details,
+            ))
+    return checks
+
+
+def cmd_verify(args: argparse.Namespace) -> None:
+    samtools_cmd = resolve_samtools(Path(args.samtools_sif) if args.samtools_sif else None)
+    anchors = load_anchors(Path(args.anchors))
+    checks = verify_one_html(Path(args.html), anchors, samtools_cmd, args.tolerance)
+    write_checks(checks, Path(args.out) if args.out else None)
+    if args.fail_on_fail and any(c.status == "FAIL" for c in checks):
+        sys.exit(1)
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: verify-cohort
+# ---------------------------------------------------------------------------
+
+def cmd_verify_cohort(args: argparse.Namespace) -> None:
+    samtools_cmd = resolve_samtools(Path(args.samtools_sif) if args.samtools_sif else None)
+    anchors = load_anchors(Path(args.anchors))
+    rows = parse_samplesheet(Path(args.samplesheet))
+    reports_dir = Path(args.reports_dir)
+    genome = args.genome
+
+    # Group anchors by sample for per-HTML filtering.
+    by_sample: dict[str, list[AnchorRow]] = {}
+    for a in anchors:
+        by_sample.setdefault(a.sample, []).append(a)
+
+    all_checks: list[AnchorCheck] = []
+    for row in rows:
+        sample = row["sample"]
+        html_path = reports_dir / f"{sample}.{genome}.html"
+        sample_anchors = by_sample.get(sample, [])
+        if not sample_anchors:
+            all_checks.append(AnchorCheck(
+                sample, "*", "*", "SKIP",
+                details="no anchors for this sample in anchors.tsv",
+            ))
+            continue
+        all_checks.extend(verify_one_html(html_path, sample_anchors, samtools_cmd, args.tolerance))
+
+    # Surface anchor samples that don't match any samplesheet row.
+    samplesheet_samples = {r["sample"] for r in rows}
+    anchor_orphans = sorted(set(by_sample.keys()) - samplesheet_samples)
+    for s in anchor_orphans:
+        all_checks.append(AnchorCheck(
+            s, "*", "*", "SKIP",
+            details="anchor sample not present in samplesheet",
+        ))
+
+    write_checks(all_checks, Path(args.out) if args.out else None)
+    if args.fail_on_fail and any(c.status == "FAIL" for c in all_checks):
+        sys.exit(1)
+
+
+# ---------------------------------------------------------------------------
+# Output
+# ---------------------------------------------------------------------------
+
+def write_checks(checks: list[AnchorCheck], out: Path | None) -> None:
+    lines = ["sample\ttrack_name\tregion\tstatus\tobserved\texpected\tdetails"]
+    for c in checks:
+        lines.append("\t".join([
+            c.sample, c.track_name, c.region, c.status,
+            c.observed, c.expected, c.details,
+        ]))
+    text = "\n".join(lines) + "\n"
+    if out:
+        out.parent.mkdir(parents=True, exist_ok=True)
+        out.write_text(text)
+    sys.stdout.write(text)
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    sub = ap.add_subparsers(dest="cmd", required=True)
+
+    # generate
+    g = sub.add_parser("generate", help="freeze samtools view -c counts into anchors.tsv")
+    g.add_argument("--samplesheet", required=True)
+    g.add_argument("--sites", required=True)
+    g.add_argument("--out", required=True, help="path to write anchors TSV")
+    g.add_argument("--samtools-sif", help="explicit samtools SIF path")
+    g.set_defaults(func=cmd_generate)
+
+    # verify
+    v = sub.add_parser("verify", help="audit one HTML against anchors.tsv")
+    v.add_argument("--html", required=True)
+    v.add_argument("--anchors", required=True)
+    v.add_argument("--out", help="write checks TSV here in addition to stdout")
+    v.add_argument("--samtools-sif")
+    v.add_argument("--tolerance", type=float, default=DEFAULT_TOLERANCE,
+                   help=f"default ratio tolerance when row tolerance/min/max blank (default {DEFAULT_TOLERANCE})")
+    v.add_argument("--fail-on-fail", action="store_true")
+    v.set_defaults(func=cmd_verify)
+
+    # verify-cohort
+    vc = sub.add_parser("verify-cohort", help="audit all HTMLs in a cohort against anchors.tsv")
+    vc.add_argument("--samplesheet", required=True)
+    vc.add_argument("--reports-dir", required=True)
+    vc.add_argument("--genome", required=True)
+    vc.add_argument("--anchors", required=True)
+    vc.add_argument("--out")
+    vc.add_argument("--samtools-sif")
+    vc.add_argument("--tolerance", type=float, default=DEFAULT_TOLERANCE)
+    vc.add_argument("--fail-on-fail", action="store_true")
+    vc.set_defaults(func=cmd_verify_cohort)
+
+    args = ap.parse_args()
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/igv-reports/scripts/verify_cohort.py b/igv-reports/scripts/verify_cohort.py
new file mode 100755
index 0000000..18b3a07
--- /dev/null
+++ b/igv-reports/scripts/verify_cohort.py
@@ -0,0 +1,632 @@
+#!/usr/bin/env python3
+"""verify_cohort.py — cohort-level structural verifier for create_report runs.
+
+Author: Samuel Ahuno
+Purpose:
+  Catches sample-to-HTML mismatches in cohort mode. Per-sample verification
+  (verify_report.py) confirms each HTML is internally consistent, but it
+  has no notion of WHICH sample an HTML *should* belong to. This verifier
+  re-reads the samplesheet and cross-checks every HTML against the row that
+  produced it, plus scans for cross-sample contamination.
+
+Threat model — failure modes this catches that per-sample verify cannot:
+  * Wrong BAM embedded under right filename (samplesheet typo, copy-paste).
+  * Tumor/normal slot swap.
+  * Missing HTML for a samplesheet row (cohort loop silently skipped).
+  * Index.html lying — links to a sample that doesn't exist, or omits one.
+  * Sample-2's BAM accidentally winding up inside sample-1's HTML.
+
+Checks emitted (per sample, plus two cohort-global rows tagged sample="*"):
+  Per-sample (delegated to verify_report.py for the structural ones):
+    * html_exists, html_min_size, region_count, region_coords,
+      region_sessions, tracks_present  -- run verify_report.py against
+      each sample's HTML using that sample's row as input
+  Cohort-specific (added here):
+    C2 sample_tracks_match           -- the HTML's session contains every
+                                        track basename declared in this row
+    C3 no_cross_sample_contamination -- the HTML's session contains NO
+                                        basename that belongs to another
+                                        row's track columns but not this
+                                        row (default-track basenames from
+                                        databases_config.yaml are excluded)
+    C4 sample_id_embedded            -- the `sample` column value appears in
+                                        the HTML's <title> or filename
+  Cohort-global (one row each, sample='*'):
+    C1 cohort_html_coverage          -- every samplesheet sample has exactly
+                                        one matching HTML; flag missing+extras
+    C5 index_consistency             -- index.html (if present) links exactly
+                                        the samplesheet sample set; each link
+                                        target exists and is non-empty
+
+Output:
+  TSV with columns: sample / check / status / observed / expected / details
+  (also printed to stdout). Optional --summary <path>.md emits a one-page
+  rollup: total samples, PASS/FAIL counts per check, contamination incidents
+  listed by sample.
+
+Exit code: 0, or 1 if --fail-on-fail is set and any row is FAIL.
+
+Typical use (auto-invoked by build_igvreports.py --samplesheet, but can be
+run standalone too):
+
+  python verify_cohort.py \\
+      --samplesheet samplesheet.tsv \\
+      --reports-dir results/<run>/reports/ \\
+      --genome hg38 \\
+      --out results/<run>/reports/cohort_verify.tsv \\
+      --summary results/<run>/reports/cohort_verify.summary.md \\
+      --fail-on-fail
+
+Skill location:
+  <repo-root>/
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import re
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+# Same-dir imports — both verify_report.py and build_igvreports.py live here.
+sys.path.insert(0, str(Path(__file__).resolve().parent))
+import verify_report as vr
+import build_igvreports as bir
+
+
+_DBCONFIG_ENV = os.environ.get("IGV_REPORTS_DB_CONFIG")
+DEFAULT_DBCONFIG = Path(_DBCONFIG_ENV) if _DBCONFIG_ENV else None
+DEFAULT_TRACK_COLUMNS = ["bam_tumor", "bam_normal", "vcf", "extra_tracks"]
+
+
+@dataclass
+class CohortCheck:
+    sample: str          # "*" for cohort-global checks
+    name: str
+    status: str          # PASS | FAIL | SKIP
+    observed: str = ""
+    expected: str = ""
+    details: str = ""
+
+
+# ---------------------------------------------------------------------------
+# Samplesheet inspection
+# ---------------------------------------------------------------------------
+
+def row_track_paths(row: dict, track_columns: list[str]) -> list[str]:
+    """Extract all track paths from a samplesheet row. Honors `extra_tracks`
+    being a comma-separated list (per build_igvreports.py convention)."""
+    paths: list[str] = []
+    for col in track_columns:
+        val = row.get(col)
+        if not val or not val.strip():
+            continue
+        if col == "extra_tracks":
+            paths.extend(p.strip() for p in val.split(",") if p.strip())
+        else:
+            paths.append(val.strip())
+    return paths
+
+
+def track_labels_of(paths: list[str]) -> set[str]:
+    """Return the names igv-reports auto-assigns to positional --tracks for
+    these paths. igv-reports strips ONE final suffix (verified against
+    create_report 1.16.2 — see verify_report.expected_track_labels)."""
+    return {Path(p).stem for p in paths}
+
+
+# ---------------------------------------------------------------------------
+# Cohort-global checks (C1, C5)
+# ---------------------------------------------------------------------------
+
+def check_html_coverage(rows: list[dict], reports_dir: Path, genome: str) -> CohortCheck:
+    expected_files = {f"{r['sample']}.{genome}.html" for r in rows}
+    actual_files = {p.name for p in reports_dir.glob(f"*.{genome}.html")}
+    missing = sorted(expected_files - actual_files)
+    extras = sorted(actual_files - expected_files)
+    if not missing and not extras:
+        return CohortCheck(
+            "*", "cohort_html_coverage", "PASS",
+            observed=f"{len(actual_files)} HTMLs",
+            expected=f"{len(expected_files)} HTMLs",
+        )
+    details = []
+    if missing:
+        details.append(f"missing: {', '.join(missing[:5])}" + (" ..." if len(missing) > 5 else ""))
+    if extras:
+        details.append(f"unexpected: {', '.join(extras[:5])}" + (" ..." if len(extras) > 5 else ""))
+    return CohortCheck(
+        "*", "cohort_html_coverage", "FAIL",
+        observed=f"{len(actual_files)} HTMLs",
+        expected=f"{len(expected_files)} HTMLs",
+        details="; ".join(details),
+    )
+
+
+def check_index_consistency(rows: list[dict], reports_dir: Path) -> CohortCheck:
+    index = reports_dir / "index.html"
+    if not index.exists():
+        return CohortCheck(
+            "*", "index_consistency", "SKIP",
+            details=f"no {index.name} present (cohort write_index() not invoked)",
+        )
+    text = index.read_text()
+    # build_igvreports.write_index() emits <li><a href="<file>">SAMPLE</a></li>.
+    # Match <a href="..."> ... </a> and pull both the href and the link text.
+    found: dict[str, str] = {}  # sample -> href
+    for m in re.finditer(r'<a href="([^"]+)">([^<]+)</a>', text):
+        href, label = m.group(1), m.group(2).strip()
+        found[label] = href
+
+    expected_samples = {r["sample"] for r in rows}
+    indexed_samples = set(found.keys())
+    missing = sorted(expected_samples - indexed_samples)
+    extras = sorted(indexed_samples - expected_samples)
+    broken_links = []
+    for sample, href in found.items():
+        target = reports_dir / href
+        if not target.exists() or target.stat().st_size < 1024:
+            broken_links.append(f"{sample}->{href}")
+
+    if not missing and not extras and not broken_links:
+        return CohortCheck(
+            "*", "index_consistency", "PASS",
+            observed=f"{len(found)} links",
+            expected=f"{len(expected_samples)} samples",
+        )
+    details = []
+    if missing:
+        details.append(f"missing from index: {', '.join(missing[:5])}")
+    if extras:
+        details.append(f"unexpected in index: {', '.join(extras[:5])}")
+    if broken_links:
+        details.append(f"broken: {', '.join(broken_links[:5])}")
+    return CohortCheck(
+        "*", "index_consistency", "FAIL",
+        observed=f"{len(found)} links",
+        expected=f"{len(expected_samples)} samples",
+        details="; ".join(details),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Per-sample checks (delegate to verify_report + add C2, C3, C4)
+# ---------------------------------------------------------------------------
+
+def per_sample_structural(sample: str, html_path: Path, sites_path: Path,
+                          tracks: list[str], min_size_mb: float) -> list[CohortCheck]:
+    """Run verify_report.py's 6 structural checks against one sample's HTML."""
+    out: list[CohortCheck] = []
+    out.append(_wrap(sample, vr.check_html_exists(html_path)))
+    if not html_path.is_file():
+        for n in ("html_min_size", "region_count", "region_coords",
+                  "region_sessions", "tracks_present"):
+            out.append(CohortCheck(sample, n, "SKIP", details="HTML missing"))
+        return out
+    out.append(_wrap(sample, vr.check_html_min_size(html_path, min_size_mb)))
+    if not sites_path.exists():
+        for n in ("region_count", "region_coords", "region_sessions", "tracks_present"):
+            out.append(CohortCheck(sample, n, "SKIP", details=f"sites BED missing: {sites_path}"))
+        return out
+    html_text = html_path.read_text()
+    table_json = vr.parse_table_json(html_text)
+    session_dict = vr.parse_session_dictionary(html_text)
+    bed_rows = vr.load_sites_bed(sites_path)
+    out.append(_wrap(sample, vr.check_region_count(bed_rows, table_json)))
+    out.append(_wrap(sample, vr.check_region_coords(bed_rows, table_json)))
+    out.append(_wrap(sample, vr.check_region_sessions(table_json, session_dict)))
+    labels = vr.expected_track_labels(tracks, None)
+    out.append(_wrap(sample, vr.check_tracks_present(session_dict, labels)))
+    return out
+
+
+def _wrap(sample: str, c: vr.Check) -> CohortCheck:
+    return CohortCheck(sample, c.name, c.status, c.observed, c.expected, c.details)
+
+
+def session_track_names(html_path: Path) -> set[str]:
+    """Decode the first sessionDictionary entry and return its track names.
+    Returns an empty set on any decode failure."""
+    if not html_path.is_file():
+        return set()
+    text = html_path.read_text()
+    sd = vr.parse_session_dictionary(text)
+    if not sd:
+        return set()
+    sample_key = sorted(sd.keys())[0]
+    session = vr.decode_session_entry(sd[sample_key])
+    if session is None:
+        return set()
+    return {t.get("name") for t in session.get("tracks", []) if t.get("name")}
+
+
+def check_sample_tracks_match(sample: str, html_path: Path, row_tracks: list[str]) -> CohortCheck:
+    """C2: each track-stem declared in this sample's row appears as a track
+    name in this HTML's session. (igv-reports auto-names positional tracks
+    by Path.stem — see verify_report.py's expected_track_labels rationale.)"""
+    if not html_path.is_file():
+        return CohortCheck(sample, "sample_tracks_match", "SKIP", details="HTML missing")
+    expected = sorted(track_labels_of(row_tracks))
+    if not expected:
+        return CohortCheck(sample, "sample_tracks_match", "SKIP",
+                           details="no track paths in samplesheet row")
+    names = session_track_names(html_path)
+    misses = [b for b in expected if b not in names]
+    if misses:
+        return CohortCheck(
+            sample, "sample_tracks_match", "FAIL",
+            observed=f"{len(expected) - len(misses)}/{len(expected)} found",
+            expected=f"{len(expected)}/{len(expected)} found",
+            details="missing: " + ", ".join(misses[:5]) + (" ..." if len(misses) > 5 else ""),
+        )
+    return CohortCheck(
+        sample, "sample_tracks_match", "PASS",
+        observed=f"{len(expected)}/{len(expected)} found",
+    )
+
+
+def check_no_cross_sample_contamination(
+    sample: str,
+    html_path: Path,
+    this_row_labels: set[str],
+    other_rows_labels: set[str],
+    allow_list: set[str],
+) -> CohortCheck:
+    """C3: HTML must not contain any track-name label that belongs to OTHER
+    samplesheet rows but not this one and not the default-track allow list.
+    Labels are Path.stem (igv-reports's auto-naming for positional tracks)."""
+    if not html_path.is_file():
+        return CohortCheck(sample, "no_cross_sample_contamination", "SKIP", details="HTML missing")
+    suspicious = (other_rows_labels - this_row_labels) - allow_list
+    if not suspicious:
+        return CohortCheck(
+            sample, "no_cross_sample_contamination", "PASS",
+            observed="0 suspect labels in scope",
+        )
+    names = session_track_names(html_path)
+    incidents = sorted([b for b in suspicious if b in names])
+    if not incidents:
+        return CohortCheck(
+            sample, "no_cross_sample_contamination", "PASS",
+            observed=f"{len(suspicious)} other-sample labels scanned, 0 found",
+        )
+    return CohortCheck(
+        sample, "no_cross_sample_contamination", "FAIL",
+        observed=f"{len(incidents)} contamination incidents",
+        details="found: " + ", ".join(incidents[:5]) + (" ..." if len(incidents) > 5 else ""),
+    )
+
+
+def check_sample_id_embedded(sample: str, html_path: Path) -> CohortCheck:
+    """C4: the sample id appears in the HTML's embedded <title>.
+
+    Filename is intentionally NOT checked. The filename is what the cohort
+    loop named the file; the title is what `create_report --title` baked
+    INTO the HTML at render time. For swap detection, only the title is a
+    real signal — a copy-paste of sample_2.html over sample_1.html leaves
+    the filename as `sample_1.hg38.html` but the title still says
+    `sample_2 (hg38)`. Build_igvreports.py's default title pattern is
+    `<sample> (<genome>)`, so this works out of the box.
+
+    If --title is overridden and omits the sample id, this check will FAIL
+    — which is the right behavior for a verifier that doesn't know the
+    user's intent."""
+    if not html_path.is_file():
+        return CohortCheck(sample, "sample_id_embedded", "SKIP", details="HTML missing")
+    # Read just the head so we don't scan 25 MB for a string.
+    head = html_path.read_text()[:16384]
+    m = re.search(r"<title>([^<]*)</title>", head, flags=re.IGNORECASE)
+    if not m:
+        return CohortCheck(
+            sample, "sample_id_embedded", "SKIP",
+            details="no <title> tag in HTML head; cannot verify",
+        )
+    title = m.group(1)
+    if sample in title:
+        return CohortCheck(sample, "sample_id_embedded", "PASS",
+                           observed=f"in <title>: {title!r}")
+    return CohortCheck(
+        sample, "sample_id_embedded", "FAIL",
+        observed=f"title={title!r}",
+        details=f"sample id {sample!r} not in <title> — likely a swap or wrong --title",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Allow-list (default tracks resolved from databases_config.yaml)
+# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
+# PNG-side checks (opt-in, only fire when a manifest exists alongside the HTML)
+# ---------------------------------------------------------------------------
+
+def find_png_manifest(reports_dir: Path, sample: str, genome: str) -> Path | None:
+    """Return the manifest path written by `build_pngs_with_igver` if the
+    sample was built with --also-png, else None.
+
+    Convention from build_igvreports.py:
+      <reports_dir>/png_<sample>.<genome>/manifest.tsv
+    """
+    candidate = reports_dir / f"png_{sample}.{genome}" / "manifest.tsv"
+    return candidate if candidate.exists() else None
+
+
+def _parse_png_manifest(manifest: Path) -> list[dict]:
+    """Read the manifest into a list of dicts. Schema is fixed at write time
+    (see build_igvreports.py:build_pngs_with_igver) so we use the file's `#`
+    header line for column names."""
+    rows: list[dict] = []
+    with manifest.open() as fh:
+        header_line = fh.readline().lstrip("#").rstrip("\n")
+        cols = header_line.split("\t")
+        for line in fh:
+            line = line.rstrip("\n")
+            if not line or line.startswith("#"):
+                continue
+            vals = line.split("\t")
+            if len(vals) != len(cols):
+                continue
+            rows.append(dict(zip(cols, vals)))
+    return rows
+
+
+def check_png_count_matches_bed(
+    sample: str, manifest: Path, sites_path: Path,
+) -> CohortCheck:
+    """P1 — manifest row count must equal the data-row count in the sites BED.
+    Catches a partial igver run (e.g. SIGKILL mid-way), filename collisions
+    that overwrite earlier PNGs, or a stale manifest from a previous build."""
+    try:
+        rows = _parse_png_manifest(manifest)
+    except Exception as e:
+        return CohortCheck(sample, "png_count_matches_bed", "FAIL",
+                           details=f"manifest unreadable: {e}")
+    try:
+        bed_rows = bir._read_sites_bed_rows(sites_path)
+    except Exception as e:
+        return CohortCheck(sample, "png_count_matches_bed", "FAIL",
+                           details=f"sites BED unreadable: {e}")
+    if len(rows) == len(bed_rows):
+        return CohortCheck(sample, "png_count_matches_bed", "PASS",
+                           observed=str(len(rows)), expected=str(len(bed_rows)))
+    return CohortCheck(sample, "png_count_matches_bed", "FAIL",
+                       observed=str(len(rows)), expected=str(len(bed_rows)),
+                       details="manifest row count != sites BED data row count")
+
+
+def check_pngs_exist_and_nonempty(
+    sample: str, manifest: Path, min_size_kb: float = 10.0,
+) -> CohortCheck:
+    """P2 — every PNG path in the manifest must exist and be larger than the
+    threshold. igver can produce a near-empty file on a region with no data
+    in any track; we want those flagged rather than silently shipped."""
+    try:
+        rows = _parse_png_manifest(manifest)
+    except Exception as e:
+        return CohortCheck(sample, "pngs_exist_and_nonempty", "FAIL",
+                           details=f"manifest unreadable: {e}")
+    missing: list[str] = []
+    tiny: list[str] = []
+    for r in rows:
+        p = Path(r.get("png_path", ""))
+        if not p.exists():
+            missing.append(p.name)
+            continue
+        if p.stat().st_size < min_size_kb * 1024:
+            tiny.append(f"{p.name} ({p.stat().st_size} B)")
+    if not missing and not tiny:
+        return CohortCheck(sample, "pngs_exist_and_nonempty", "PASS",
+                           observed=f"{len(rows)} pngs all present and >= {min_size_kb:.1f} kB")
+    parts = []
+    if missing:
+        parts.append(f"missing: {missing[:3]}{'...' if len(missing) > 3 else ''}")
+    if tiny:
+        parts.append(f"below threshold: {tiny[:3]}{'...' if len(tiny) > 3 else ''}")
+    return CohortCheck(sample, "pngs_exist_and_nonempty", "FAIL",
+                       observed=f"missing={len(missing)} tiny={len(tiny)}",
+                       expected="all PNGs present, >= 10 kB",
+                       details="; ".join(parts))
+
+
+def check_png_html_row_alignment(
+    sample: str, manifest: Path, html_path: Path,
+) -> CohortCheck:
+    """P3 — every manifest row references the matching HTML, and html_table_row
+    indices form a contiguous 1..N sequence (no skips, no duplicates). This is
+    the audit-trail check: a user clicking row N in the HTML should be able to
+    find the PNG named in manifest row N."""
+    try:
+        rows = _parse_png_manifest(manifest)
+    except Exception as e:
+        return CohortCheck(sample, "png_html_row_alignment", "FAIL",
+                           details=f"manifest unreadable: {e}")
+    if not rows:
+        return CohortCheck(sample, "png_html_row_alignment", "FAIL",
+                           details="manifest has no data rows")
+    html_resolved = str(html_path.resolve())
+    wrong_html = [r for r in rows if r.get("html_path") != html_resolved]
+    try:
+        indices = [int(r["html_table_row"]) for r in rows]
+    except (KeyError, ValueError) as e:
+        return CohortCheck(sample, "png_html_row_alignment", "FAIL",
+                           details=f"manifest html_table_row malformed: {e}")
+    expected_indices = list(range(1, len(rows) + 1))
+    if wrong_html:
+        return CohortCheck(sample, "png_html_row_alignment", "FAIL",
+                           details=f"{len(wrong_html)} manifest rows reference a different HTML")
+    if indices != expected_indices:
+        return CohortCheck(sample, "png_html_row_alignment", "FAIL",
+                           observed=f"{indices[:5]}{'...' if len(indices) > 5 else ''}",
+                           expected=f"contiguous 1..{len(rows)}",
+                           details="html_table_row indices not contiguous")
+    return CohortCheck(sample, "png_html_row_alignment", "PASS",
+                       observed=f"{len(rows)} aligned rows")
+
+
+def resolve_default_track_labels(db_config: Path, genome: str) -> set[str]:
+    """Reuse the driver's logic so the allow-list stays in sync with what was
+    actually loaded. Returns Path.stem of each default track (matches igv-
+    reports's auto-naming convention — see track_labels_of)."""
+    import logging
+    log = logging.getLogger("verify_cohort.allow_list_probe")
+    log.addHandler(logging.NullHandler())
+    cfg = bir.load_db_config(db_config)
+    canon = bir.resolve_genome(genome)
+    try:
+        paths = bir.resolve_default_tracks(cfg, canon, log)
+    except SystemExit:
+        # genome not in db_config — fail open with an empty allow-list; the
+        # contamination check will then be over-conservative, never under.
+        return set()
+    return {Path(p).stem for p in paths}
+
+
+# ---------------------------------------------------------------------------
+# Output
+# ---------------------------------------------------------------------------
+
+def write_tsv(checks: list[CohortCheck], out: Path | None) -> None:
+    lines = ["sample\tcheck\tstatus\tobserved\texpected\tdetails"]
+    for c in checks:
+        lines.append("\t".join((c.sample, c.name, c.status, c.observed, c.expected, c.details)))
+    text = "\n".join(lines) + "\n"
+    if out:
+        out.parent.mkdir(parents=True, exist_ok=True)
+        out.write_text(text)
+    sys.stdout.write(text)
+
+
+def write_summary(checks: list[CohortCheck], rows: list[dict], out: Path) -> None:
+    by_status = {"PASS": 0, "FAIL": 0, "SKIP": 0}
+    by_check: dict[str, dict[str, int]] = {}
+    fail_rows = []
+    for c in checks:
+        by_status[c.status] = by_status.get(c.status, 0) + 1
+        by_check.setdefault(c.name, {"PASS": 0, "FAIL": 0, "SKIP": 0})[c.status] += 1
+        if c.status == "FAIL":
+            fail_rows.append(c)
+
+    n_samples = len(rows)
+    lines = []
+    lines.append(f"# Cohort verification summary\n")
+    lines.append(f"- samples: **{n_samples}**")
+    lines.append(f"- total checks: {sum(by_status.values())} (PASS={by_status['PASS']}, FAIL={by_status['FAIL']}, SKIP={by_status['SKIP']})")
+    lines.append("")
+    lines.append("## Per-check totals")
+    lines.append("")
+    lines.append("| check | PASS | FAIL | SKIP |")
+    lines.append("|---|---:|---:|---:|")
+    for check_name in sorted(by_check):
+        s = by_check[check_name]
+        lines.append(f"| {check_name} | {s['PASS']} | {s['FAIL']} | {s['SKIP']} |")
+    lines.append("")
+    if fail_rows:
+        lines.append("## Failures")
+        lines.append("")
+        lines.append("| sample | check | observed | expected | details |")
+        lines.append("|---|---|---|---|---|")
+        for c in fail_rows:
+            lines.append(f"| {c.sample} | {c.name} | {c.observed} | {c.expected} | {c.details} |")
+    else:
+        lines.append("## Failures\n\nNone — cohort verified clean.\n")
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text("\n".join(lines) + "\n")
+
+
+# ---------------------------------------------------------------------------
+# Driver
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--samplesheet", required=True, help="TSV that drove the cohort build (same one passed to build_igvreports.py --samplesheet)")
+    ap.add_argument("--reports-dir", required=True, help="dir containing <sample>.<genome>.html files (and optional index.html)")
+    ap.add_argument("--genome", required=True, help="genome tag (hg38 | mm10 | mm39 | t2t | GRCh37)")
+    ap.add_argument("--db-config", default=str(DEFAULT_DBCONFIG) if DEFAULT_DBCONFIG else None,
+                    help="databases_config.yaml to resolve default-track allow-list. "
+                         "Defaults to $IGV_REPORTS_DB_CONFIG; the allow-list check is skipped "
+                         "if neither is set.")
+    ap.add_argument(
+        "--track-columns", nargs="*", default=DEFAULT_TRACK_COLUMNS,
+        help=f"samplesheet columns containing track paths (default: {DEFAULT_TRACK_COLUMNS}). "
+             "`extra_tracks` is parsed comma-separated if present.",
+    )
+    ap.add_argument("--min-size-mb", type=float, default=0.5, help="per-sample HTML min size (passed through to verify_report)")
+    ap.add_argument("--png-min-size-kb", type=float, default=10.0,
+                    help="PNG min size threshold (only used when --also-png manifests are present). "
+                         "Defaults to 10 KB — empty IGV screenshots are typically <2 KB, "
+                         "useful ones >= 50 KB.")
+    ap.add_argument("--out", help="write the TSV report here in addition to stdout")
+    ap.add_argument("--summary", help="write a one-page markdown rollup here")
+    ap.add_argument("--fail-on-fail", action="store_true", help="exit nonzero if any check is FAIL")
+    args = ap.parse_args()
+
+    samplesheet = Path(args.samplesheet)
+    reports_dir = Path(args.reports_dir)
+    if not samplesheet.exists():
+        raise SystemExit(f"ERROR: samplesheet not found: {samplesheet}")
+    if not reports_dir.is_dir():
+        raise SystemExit(f"ERROR: reports-dir not found: {reports_dir}")
+
+    rows = bir.parse_samplesheet(samplesheet)
+    if not rows:
+        raise SystemExit(f"ERROR: samplesheet has no data rows: {samplesheet}")
+
+    # When neither --db-config nor $IGV_REPORTS_DB_CONFIG is set, skip the
+    # contamination allow-list (no false positives, no false negatives — we
+    # just can't claim a track is a "known annotation" without the YAML).
+    allow_list = (resolve_default_track_labels(Path(args.db_config), args.genome)
+                  if args.db_config else set())
+
+    # Pre-compute track-label sets per sample for the contamination check.
+    # Labels are Path.stem of each track path, matching igv-reports's auto-
+    # naming (see track_labels_of).
+    per_sample_labels: dict[str, set[str]] = {
+        r["sample"]: track_labels_of(row_track_paths(r, args.track_columns)) for r in rows
+    }
+    all_labels = set().union(*per_sample_labels.values()) if per_sample_labels else set()
+
+    checks: list[CohortCheck] = []
+    # C1 cohort_html_coverage
+    checks.append(check_html_coverage(rows, reports_dir, args.genome))
+
+    # Per-sample: 6 structural (verify_report) + C2 + C3 + C4
+    for r in rows:
+        sample = r["sample"]
+        html_path = reports_dir / f"{sample}.{args.genome}.html"
+        sites_path = Path(r["sites_bed"])
+        tracks = row_track_paths(r, args.track_columns)
+
+        checks.extend(per_sample_structural(sample, html_path, sites_path, tracks, args.min_size_mb))
+        checks.append(check_sample_tracks_match(sample, html_path, tracks))
+
+        this_labels = per_sample_labels[sample]
+        other_labels = all_labels - this_labels
+        checks.append(check_no_cross_sample_contamination(sample, html_path, this_labels, other_labels, allow_list))
+        checks.append(check_sample_id_embedded(sample, html_path))
+
+        # PNG-side checks fire only when build_igvreports.py was run with
+        # --also-png (detected via the per-sample manifest). Cohorts without
+        # PNGs see no extra rows; cohorts with PNGs get three more checks.
+        manifest = find_png_manifest(reports_dir, sample, args.genome)
+        if manifest is not None:
+            checks.append(check_png_count_matches_bed(sample, manifest, sites_path))
+            checks.append(check_pngs_exist_and_nonempty(sample, manifest, args.png_min_size_kb))
+            checks.append(check_png_html_row_alignment(sample, manifest, html_path))
+
+    # C5 index_consistency
+    checks.append(check_index_consistency(rows, reports_dir))
+
+    out_path = Path(args.out) if args.out else None
+    write_tsv(checks, out_path)
+    if args.summary:
+        write_summary(checks, rows, Path(args.summary))
+
+    if args.fail_on_fail and any(c.status == "FAIL" for c in checks):
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/igv-reports/scripts/verify_report.py b/igv-reports/scripts/verify_report.py
new file mode 100755
index 0000000..744920e
--- /dev/null
+++ b/igv-reports/scripts/verify_report.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+"""verify_report.py — post-render structural verifier for create_report HTMLs.
+
+Author: Samuel Ahuno
+Purpose:
+  Validates that a self-contained create_report HTML actually contains what
+  its inputs declared. Catches the "silent garbage" failure mode where the
+  HTML builds (exit 0, plausible file size) but the content doesn't match the
+  user's intent: wrong region count, wrong coordinates, missing tracks, or a
+  catastrophic empty render.
+
+Dual role:
+  - CLI: `python verify_report.py --html ... --sites ... [--track-config ...]`
+  - Library: importable helpers (parse_table_json, parse_session_dictionary,
+    decode_session_entry, load_sites_bed, expected_track_labels, the
+    `check_*` functions, and the Check dataclass). verify_cohort.py imports
+    these to do per-sample checks + add cross-sample assertions.
+
+Checks emitted (one TSV row per check, ordered):
+  1. html_exists           Output file is a regular file.
+  2. html_min_size         Output >= --min-size-mb (default 0.5 MB).
+  3. region_count          tableJson rows count == sites BED data-row count.
+  4. region_coords         Each BED row finds a matching (chrom, start+1, end[, name])
+                           in the embedded tableJson. BED is 0-based half-open;
+                           create_report stores 1-based start in the table.
+  5. region_sessions       sessionDictionary has an entry for each tableJson row.
+  6. tracks_present        For --track-config <json>: each track's `name` field
+                           appears in the decoded session's tracks[].name list.
+                           For --tracks <path...>: each path's Path.stem appears
+                           in the decoded session's tracks[].name list. igv-
+                           reports strips ONE final suffix when auto-naming
+                           positional tracks (e.g. `x.5mC.bedgraph` -> `x.5mC`,
+                           `gencode.v47.annotation.gff3.gz` -> `gencode.v47.
+                           annotation.gff3`). Skipped if neither flag is given.
+                           NOTE: --standalone embeds slices as data: URLs, so
+                           original URL paths are absent from the session — we
+                           match on track NAMES, which are preserved.
+
+Output:
+  TSV with columns: check / status / observed / expected / details
+  status is one of PASS / FAIL / SKIP.
+
+Exit code:
+  0 always, unless --fail-on-fail is set and at least one row is FAIL.
+
+Typical use:
+  python verify_report.py \\
+      --html report.hg38.html \\
+      --sites sites.hg38.bed \\
+      --track-config tracks.json \\
+      --out verify.tsv \\
+      --min-size-mb 1.0 \\
+      --fail-on-fail
+"""
+
+from __future__ import annotations
+
+import argparse
+import base64
+import dataclasses
+import gzip
+import json
+import re
+import sys
+from pathlib import Path
+
+
+@dataclasses.dataclass
+class Check:
+    name: str
+    status: str  # PASS | FAIL | SKIP
+    observed: str = ""
+    expected: str = ""
+    details: str = ""
+
+
+# ---------------------------------------------------------------------------
+# Sites-BED loader (mirrors create_report's #-skip behavior)
+# ---------------------------------------------------------------------------
+
+def load_sites_bed(path: Path) -> list[dict]:
+    """Return a list of {chrom, start, end, name} dicts; skips '#' and 'track '."""
+    rows: list[dict] = []
+    with path.open() as fh:
+        for i, line in enumerate(fh, start=1):
+            line = line.rstrip("\n")
+            if not line or line.startswith("#") or line.startswith("track "):
+                continue
+            cols = line.split("\t")
+            if len(cols) < 3:
+                raise SystemExit(f"{path}:{i}: BED row has <3 columns")
+            try:
+                start = int(cols[1])
+                end = int(cols[2])
+            except ValueError as e:
+                raise SystemExit(f"{path}:{i}: non-numeric start/end: {e}")
+            rows.append({
+                "chrom": cols[0],
+                "start": start,
+                "end": end,
+                "name": cols[3] if len(cols) >= 4 else None,
+            })
+    return rows
+
+
+# ---------------------------------------------------------------------------
+# HTML extractors
+# ---------------------------------------------------------------------------
+
+def _extract_balanced_blob(text: str, anchor: str, opener: str = "{") -> str | None:
+    """Find `anchor` in `text`, then return the substring starting at the next
+    `opener` and ending at the matched closer. Skips characters inside double-
+    quoted strings (with backslash escapes). Returns None if not found."""
+    closer = "}" if opener == "{" else "]"
+    i = text.find(anchor)
+    if i < 0:
+        return None
+    start = text.find(opener, i)
+    if start < 0:
+        return None
+    depth = 0
+    in_str = False
+    escape = False
+    for j in range(start, len(text)):
+        c = text[j]
+        if escape:
+            escape = False
+            continue
+        if c == "\\":
+            escape = True
+            continue
+        if c == '"':
+            in_str = not in_str
+            continue
+        if in_str:
+            continue
+        if c == opener:
+            depth += 1
+        elif c == closer:
+            depth -= 1
+            if depth == 0:
+                return text[start:j + 1]
+    return None
+
+
+def parse_table_json(html: str) -> dict | None:
+    blob = _extract_balanced_blob(html, "tableJson = ", "{")
+    if not blob:
+        return None
+    return json.loads(blob)
+
+
+def parse_session_dictionary(html: str) -> dict | None:
+    blob = _extract_balanced_blob(html, "sessionDictionary = ", "{")
+    if not blob:
+        return None
+    return json.loads(blob)
+
+
+def decode_session_entry(data_url: str) -> dict | None:
+    """A sessionDictionary value looks like 'data:application/gzip;base64,XXXX'.
+    Strip the prefix, base64-decode, gunzip, parse JSON. Return the IGV.js
+    session dict (or None on any error — failures here are non-fatal)."""
+    try:
+        m = re.match(r"data:application/gzip;base64,(.+)", data_url, flags=re.DOTALL)
+        if not m:
+            return None
+        raw = base64.b64decode(m.group(1))
+        return json.loads(gzip.decompress(raw))
+    except Exception:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Track-input parser
+# ---------------------------------------------------------------------------
+
+def expected_track_labels(tracks: list[str] | None, track_config: Path | None) -> list[str]:
+    """Return the track NAMES we expect to see in the embedded igv.js session.
+
+    `--standalone` replaces every track URL with an inlined `data:...` URL after
+    slicing, so URL paths are unrecoverable from the embedded session — we have
+    to match on track names instead, which the standalone build preserves.
+
+    - For --track-config <json>: use the `name` field of each entry verbatim.
+    - For positional --tracks <path...>: use Path(p).stem (igv-reports strips
+      ONE final suffix when auto-naming positional tracks — verified 2026-05-16
+      against create_report 1.16.2: `colo829bl_PAU59807.5mC.bedgraph` ->
+      `colo829bl_PAU59807.5mC`, `gencode.v47.annotation.gff3.gz` ->
+      `gencode.v47.annotation.gff3`, `x.bam` -> `x`).
+    Empty list means 'check skipped'.
+    """
+    out: list[str] = []
+    if track_config and track_config.exists():
+        with track_config.open() as fh:
+            cfg = json.load(fh)
+        for entry in cfg:
+            name = entry.get("name")
+            if name:
+                out.append(name)
+        return out
+    if tracks:
+        for t in tracks:
+            out.append(Path(t).stem)
+    return out
+
+
+# ---------------------------------------------------------------------------
+# Individual checks
+# ---------------------------------------------------------------------------
+
+def check_html_exists(html: Path) -> Check:
+    if html.is_file():
+        return Check("html_exists", "PASS", observed=str(html))
+    return Check("html_exists", "FAIL", observed=str(html), details="not a regular file")
+
+
+def check_html_min_size(html: Path, floor_mb: float) -> Check:
+    size_mb = html.stat().st_size / 1024 / 1024
+    status = "PASS" if size_mb >= floor_mb else "FAIL"
+    return Check(
+        "html_min_size",
+        status,
+        observed=f"{size_mb:.2f} MB",
+        expected=f">= {floor_mb:.2f} MB",
+    )
+
+
+def check_region_count(bed_rows: list[dict], table_json: dict | None) -> Check:
+    if table_json is None:
+        return Check("region_count", "FAIL", details="tableJson not found in HTML")
+    n_html = len(table_json.get("rows", []))
+    n_bed = len(bed_rows)
+    return Check(
+        "region_count",
+        "PASS" if n_html == n_bed else "FAIL",
+        observed=str(n_html),
+        expected=str(n_bed),
+    )
+
+
+def check_region_coords(bed_rows: list[dict], table_json: dict | None) -> Check:
+    """For each BED row, find a matching row in the HTML by (chrom, start+1, end[, name]).
+    The HTML stores 1-based start, BED is 0-based half-open."""
+    if table_json is None:
+        return Check("region_coords", "FAIL", details="tableJson not found")
+    headers = table_json.get("headers", [])
+    rows = table_json.get("rows", [])
+    try:
+        col_chrom = headers.index("Chrom")
+        col_start = headers.index("Start")
+        col_end = headers.index("End")
+        col_name = headers.index("Name") if "Name" in headers else None
+    except ValueError as e:
+        return Check("region_coords", "FAIL", details=f"missing column in tableJson headers: {e}")
+
+    html_set = {
+        (r[col_chrom], int(r[col_start]), int(r[col_end])): (r[col_name] if col_name is not None else None)
+        for r in rows
+    }
+    misses: list[str] = []
+    for b in bed_rows:
+        key = (b["chrom"], b["start"] + 1, b["end"])
+        if key not in html_set:
+            misses.append(f"{b['chrom']}:{b['start']}-{b['end']}")
+            continue
+        # If both have a name, names must match.
+        if col_name is not None and b["name"] is not None and html_set[key] != b["name"]:
+            misses.append(f"{b['chrom']}:{b['start']}-{b['end']} name mismatch (BED={b['name']!r}, HTML={html_set[key]!r})")
+    if misses:
+        return Check(
+            "region_coords", "FAIL",
+            observed=f"{len(bed_rows) - len(misses)}/{len(bed_rows)} matched",
+            expected=f"{len(bed_rows)}/{len(bed_rows)} matched",
+            details="; ".join(misses[:5]) + (" ..." if len(misses) > 5 else ""),
+        )
+    return Check("region_coords", "PASS", observed=f"{len(bed_rows)}/{len(bed_rows)} matched")
+
+
+def check_region_sessions(table_json: dict | None, session_dict: dict | None) -> Check:
+    if table_json is None or session_dict is None:
+        return Check("region_sessions", "FAIL", details="tableJson or sessionDictionary missing")
+    n_rows = len(table_json.get("rows", []))
+    n_sess = len(session_dict)
+    # Sessions are keyed by stringified row index 0..N-1.
+    expected_keys = {str(i) for i in range(n_rows)}
+    actual_keys = set(session_dict.keys())
+    if expected_keys.issubset(actual_keys):
+        return Check(
+            "region_sessions", "PASS",
+            observed=str(n_sess),
+            expected=f">={n_rows} (one per row)",
+        )
+    return Check(
+        "region_sessions", "FAIL",
+        observed=f"keys={sorted(actual_keys)[:5]}...",
+        expected=f"keys 0..{n_rows-1}",
+        details=f"missing keys: {sorted(expected_keys - actual_keys)[:5]}",
+    )
+
+
+def check_tracks_present(
+    session_dict: dict | None,
+    expected_labels: list[str],
+) -> Check:
+    if not expected_labels:
+        return Check("tracks_present", "SKIP", details="neither --tracks nor --track-config provided")
+    if session_dict is None or not session_dict:
+        return Check("tracks_present", "FAIL", details="sessionDictionary missing or empty")
+    # Decode the first available session entry. Track names are identical
+    # across per-region sessions (only the data: URL slices differ).
+    sample_key = sorted(session_dict.keys())[0]
+    session = decode_session_entry(session_dict[sample_key])
+    if session is None:
+        return Check("tracks_present", "FAIL", details="failed to decode/gunzip session entry")
+    session_track_names = {t.get("name") for t in session.get("tracks", []) if t.get("name")}
+    misses = [lab for lab in expected_labels if lab not in session_track_names]
+    if misses:
+        return Check(
+            "tracks_present", "FAIL",
+            observed=f"{len(expected_labels) - len(misses)}/{len(expected_labels)} found",
+            expected=f"{len(expected_labels)}/{len(expected_labels)} found",
+            details="missing: " + ", ".join(misses[:5]) + (" ..." if len(misses) > 5 else ""),
+        )
+    return Check(
+        "tracks_present", "PASS",
+        observed=f"{len(expected_labels)}/{len(expected_labels)} found",
+    )
+
+
+# ---------------------------------------------------------------------------
+# Driver
+# ---------------------------------------------------------------------------
+
+def write_tsv(checks: list[Check], out: Path | None) -> None:
+    lines = ["check\tstatus\tobserved\texpected\tdetails"]
+    for c in checks:
+        lines.append(f"{c.name}\t{c.status}\t{c.observed}\t{c.expected}\t{c.details}")
+    text = "\n".join(lines) + "\n"
+    if out:
+        out.parent.mkdir(parents=True, exist_ok=True)
+        out.write_text(text)
+    # Always also emit to stdout for piping / inspection.
+    sys.stdout.write(text)
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    ap.add_argument("--html", required=True, help="path to the create_report HTML to verify")
+    ap.add_argument("--sites", required=True, help="path to the sites BED that was passed to create_report")
+    ap.add_argument("--tracks", nargs="*", default=[], help="track paths that were passed to create_report (--tracks mode)")
+    ap.add_argument("--track-config", help="track config JSON that was passed to create_report (--track-config mode)")
+    ap.add_argument("--min-size-mb", type=float, default=0.5, help="minimum acceptable HTML size in MB (default: 0.5)")
+    ap.add_argument("--out", help="write the TSV report here in addition to stdout")
+    ap.add_argument("--fail-on-fail", action="store_true", help="exit nonzero if any check is FAIL")
+    args = ap.parse_args()
+
+    html_path = Path(args.html)
+    sites_path = Path(args.sites)
+    out_path = Path(args.out) if args.out else None
+    track_config = Path(args.track_config) if args.track_config else None
+
+    checks: list[Check] = [check_html_exists(html_path)]
+
+    # If the HTML doesn't exist, every downstream check would crash; mark them SKIP and bail.
+    if checks[0].status == "FAIL":
+        checks.append(Check("html_min_size", "SKIP", details="HTML missing"))
+        checks.append(Check("region_count", "SKIP", details="HTML missing"))
+        checks.append(Check("region_coords", "SKIP", details="HTML missing"))
+        checks.append(Check("region_sessions", "SKIP", details="HTML missing"))
+        checks.append(Check("tracks_present", "SKIP", details="HTML missing"))
+        write_tsv(checks, out_path)
+        if args.fail_on_fail:
+            sys.exit(1)
+        return
+
+    checks.append(check_html_min_size(html_path, args.min_size_mb))
+
+    html_text = html_path.read_text()
+    table_json = parse_table_json(html_text)
+    session_dict = parse_session_dictionary(html_text)
+    bed_rows = load_sites_bed(sites_path)
+
+    checks.append(check_region_count(bed_rows, table_json))
+    checks.append(check_region_coords(bed_rows, table_json))
+    checks.append(check_region_sessions(table_json, session_dict))
+    checks.append(check_tracks_present(session_dict, expected_track_labels(args.tracks, track_config)))
+
+    write_tsv(checks, out_path)
+
+    if args.fail_on_fail and any(c.status == "FAIL" for c in checks):
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/igv-reports/tests/.gitignore b/igv-reports/tests/.gitignore
new file mode 100644
index 0000000..ae835ac
--- /dev/null
+++ b/igv-reports/tests/.gitignore
@@ -0,0 +1,11 @@
+# pytest + Python cache directories
+__pycache__/
+.pytest_cache/
+
+# Integration scratch outputs (per scenarios.sh trap cleanup, these only
+# linger on test failure or when KEEP_REPORTS=1)
+integration/*/reports/
+integration/*/samplesheet*.tsv
+integration/*/sites*.bed
+integration/*/anchors*.tsv
+integration/*/logs/
diff --git a/igv-reports/tests/fixtures/README.md b/igv-reports/tests/fixtures/README.md
new file mode 100644
index 0000000..1c0078b
--- /dev/null
+++ b/igv-reports/tests/fixtures/README.md
@@ -0,0 +1,51 @@
+# tests/fixtures
+
+Committed test fixtures derived from publicly released bioinformatics data.
+Used by the smoke + integration test layers; safe to redistribute.
+
+## tiny_colo829.hg38.bam
+
+A 457 KB BAM (+ 85 KB `.bai`) sliced from Oxford Nanopore Technologies'
+publicly released COLO829BL (matched normal) ONT reference dataset.
+
+| Property | Value |
+|---|---|
+| **Source dataset** | ONT COLO829 / COLO829BL R10.4.1 5kHz sup basecalls |
+| **ENA project** | PRJEB57425 |
+| **Source flowcell** | PAU59807 (COLO829BL) |
+| **Basecaller** | Dorado, model `dna_r10.4.1_e8.2_400bps_sup@v5.0.0`, `5mCG_5hmCG@latest,6mA@latest` |
+| **Reference** | hg38 (`Homo_sapiens_assembly38.fasta`) |
+| **Slice regions** | `chr2:25245000-25248000` (around DNMT3A), `chr7:148882000-148886000` (around EZH2) |
+| **Subsample** | 20% reads, seed 42 (`samtools view --subsample 0.2 --subsample-seed 42`) |
+| **Filtering** | `-F 1536` (drops PCR/optical dups + supplementary alignments — matches igv-reports' BamReader default) |
+| **License** | The source data is openly released by ONT; this slice inherits that status. Slicing/subsampling is non-creative transformation. |
+
+## Anchor sanity counts (used by smoke + integration tests)
+
+| Region | `samtools view -c -F 1536` |
+|---|---|
+| `chr2:25246500-25246501` | **5** |
+| `chr7:148884000-148884001` | **9** |
+
+These counts are the contract: any change to the fixture (regeneration with
+different params, etc.) must preserve these exact integers, or update the
+constants in `tests/smoke/test_slice_count.py` and the integration `scenarios.sh`.
+
+## Regenerate
+
+```bash
+bash tests/fixtures/build_fixtures.sh
+```
+
+Requires `samtools` (via PATH or `$SAMTOOLS_SIF`) and a local copy of the ONT
+COLO829 release pointed to by `$COLO829BL_BAM`. Public source: ENA project
+PRJEB57425.
+
+## Why these regions
+
+The two sites are coding mutations in well-known cancer driver genes
+(DNMT3A R882, EZH2 Y646) at coordinates the demos already use. Picking
+real loci keeps the test data biologically interpretable and lets the same
+fixture exercise both the parser layer (anchors named for real variants
+read naturally) and the slice-decode layer (read counts you can sanity-check
+in IGV against the source BAM if needed).
diff --git a/igv-reports/tests/fixtures/build_fixtures.sh b/igv-reports/tests/fixtures/build_fixtures.sh
new file mode 100755
index 0000000..54a7d89
--- /dev/null
+++ b/igv-reports/tests/fixtures/build_fixtures.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# build_fixtures.sh — regenerate tests/fixtures/tiny_colo829.hg38.bam from
+# the publicly released ONT COLO829BL reads.
+#
+# The output BAM is committed to the repo (it's small public data — see
+# fixtures/README.md). Regenerate only when you need to expand the slice
+# regions, change subsample rate, or update for a new basecaller version.
+# If the output counts change, also update tests/smoke/test_slice_count.py
+# anchor constants and any integration scenarios.sh expected values.
+set -euo pipefail
+
+FIX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Resolve samtools.
+if [[ -n "${SAMTOOLS_SIF:-}" && -f "${SAMTOOLS_SIF}" ]]; then
+    BINDS=()
+    if [[ -n "${IGV_REPORTS_BIND:-}" ]]; then
+        IFS=':' read -ra BP <<< "${IGV_REPORTS_BIND}"
+        for p in "${BP[@]}"; do [[ -d "$p" ]] && BINDS+=(--bind "$p"); done
+    fi
+    SAM=(apptainer exec --cleanenv "${BINDS[@]}" "${SAMTOOLS_SIF}" samtools)
+elif command -v samtools >/dev/null 2>&1; then
+    SAM=(samtools)
+else
+    echo "ERROR: no samtools available — install (pip/conda) or set SAMTOOLS_SIF" >&2
+    exit 1
+fi
+
+# Source BAM — must be supplied via env. Public source: ENA project PRJEB57425
+# (ONT COLO829 release).
+SRC="${COLO829BL_BAM:-}"
+if [[ -z "${SRC}" || ! -f "${SRC}" ]]; then
+    echo "ERROR: source BAM not provided." >&2
+    echo "       Set COLO829BL_BAM=<path-to-COLO829BL-ONT-BAM> and re-run." >&2
+    echo "       Public source: ENA project PRJEB57425." >&2
+    exit 1
+fi
+
+OUT="${FIX_DIR}/tiny_colo829.hg38.bam"
+
+echo "[build_fixtures] source: ${SRC}"
+echo "[build_fixtures] output: ${OUT}"
+echo "[build_fixtures] regions: chr2:25245000-25248000 (DNMT3A), chr7:148882000-148886000 (EZH2)"
+echo "[build_fixtures] subsample: 0.2, seed 42"
+
+"${SAM[@]}" view -bh -F 1536 --subsample 0.2 --subsample-seed 42 \
+    "${SRC}" \
+    chr2:25245000-25248000 chr7:148882000-148886000 \
+    -o "${OUT}"
+"${SAM[@]}" index "${OUT}"
+
+echo "[build_fixtures] sizes:"
+ls -lh "${OUT}" "${OUT}.bai"
+
+echo "[build_fixtures] anchor counts (must remain stable across regens):"
+chr2_n=$("${SAM[@]}" view -c -F 1536 "${OUT}" chr2:25246500-25246501)
+chr7_n=$("${SAM[@]}" view -c -F 1536 "${OUT}" chr7:148884000-148884001)
+echo "  chr2:25246500-25246501 = ${chr2_n}"
+echo "  chr7:148884000-148884001 = ${chr7_n}"
+
+if [[ "${chr2_n}" != "5" || "${chr7_n}" != "9" ]]; then
+    echo
+    echo "WARNING: anchor counts have changed from the committed fixture's contract" >&2
+    echo "         (chr2=5, chr7=9). Update tests/smoke/test_slice_count.py and any" >&2
+    echo "         integration scenarios.sh expected values, then commit both the new" >&2
+    echo "         BAM and the updated test constants together." >&2
+fi
diff --git a/igv-reports/tests/fixtures/tiny_colo829.hg38.bam b/igv-reports/tests/fixtures/tiny_colo829.hg38.bam
new file mode 100644
index 0000000..beaeca8
Binary files /dev/null and b/igv-reports/tests/fixtures/tiny_colo829.hg38.bam differ
diff --git a/igv-reports/tests/fixtures/tiny_colo829.hg38.bam.bai b/igv-reports/tests/fixtures/tiny_colo829.hg38.bam.bai
new file mode 100644
index 0000000..0920cae
Binary files /dev/null and b/igv-reports/tests/fixtures/tiny_colo829.hg38.bam.bai differ
diff --git a/igv-reports/tests/integration/anchor_verify/README.md b/igv-reports/tests/integration/anchor_verify/README.md
new file mode 100644
index 0000000..2c4b311
--- /dev/null
+++ b/igv-reports/tests/integration/anchor_verify/README.md
@@ -0,0 +1,92 @@
+# anchor_verify_demo — regression test for `verify_anchors.py`
+
+End-to-end check that the anchor-based content verifier catches the four
+failure modes it's designed to catch. Self-asserting — exits nonzero on any
+mismatch.
+
+## What it does
+
+1. Generates a 2-sample samplesheet (TSV) pointing at two real COLO829 ONT BAMs.
+2. Calls `verify_anchors.py generate` to freeze `samtools view -c` counts
+   into `anchors.hg38.tsv` (the regression fixture).
+3. Calls `build_igvreports.py --samplesheet ... --no-verify` to produce
+   `reports/sample_{1,2}.hg38.html` + `index.html`.
+4. Runs `verify_anchors.py verify-cohort` against the clean cohort (all PASS).
+5. Runs four corruption scenarios, each asserting the expected outcome:
+
+   | Scenario | Corruption | Expected |
+   |---|---|---|
+   | A | Mutate anchors `expected` to 9999 (real ~56) | `sample_1/chr2/FAIL` (diff_ratio) |
+   | B | Set anchor `min=1000` (real ~56) | `sample_1/chr2/FAIL` (min bound) |
+   | C | Mangle a session's base64 payload (`H4sI` → `XXXX`) | `sample_1/*/FAIL` (decode), sample_2 PASS |
+   | D | Drop an anchor row | row absent from output, others PASS |
+
+6. Cleans up generated `reports/`, samplesheet, sites BED, anchors TSVs, and
+   `logs/` on exit (set `KEEP_REPORTS=1` to leave them).
+
+## Run
+
+```bash
+bash tests/integration/anchor_verify/scenarios.sh
+```
+
+Or as part of the full test suite:
+
+```bash
+bash tests/run_all.sh                  # all layers
+bash tests/run_all.sh --integration-only
+```
+
+Runtime: **~6-8 min cold** (the cohort build dominates); **~15 s** when the
+cohort is cached. Set `REBUILD=1` to force a rebuild of the HTMLs; otherwise
+existing HTMLs in `reports/` are reused so verifier iteration is seconds.
+
+Disk: ~10 MB temp under `reports/`, auto-cleaned via `trap`.
+
+## Why these scenarios
+
+The four scenarios cover every status the verifier emits:
+
+- **PASS** (scenario 0): observed within tolerance of expected, or within
+  `min`/`max` bounds.
+- **FAIL — tolerance** (A): observed read count differs from expected beyond
+  the per-row tolerance (default 5%). Catches the silent sample-swap case
+  where the wrong source BAM was wired into the build pipeline — same track
+  name, different read counts.
+- **FAIL — bound** (B): `min`/`max` columns let you assert "this integration
+  site should have ≥20 reads supporting it" — a stronger claim than
+  tolerance, useful for known-positive sites.
+- **FAIL — broken decode** (C): the HTML's session entry can't be gunzipped
+  or its inner BAM data URL can't be base64-decoded. Catches arbitrary HTML
+  tampering or `create_report` version drift that breaks the embedding format.
+- **SKIP** (D): an anchor row references a `(sample, region)` pair that the
+  HTML doesn't render. Dropped silently because anchor TSVs are intentionally
+  re-usable across runs — a region that exists in one cohort's anchors but
+  not in another cohort's HTMLs is benign, not a build failure.
+
+## BAM paths (parameterized)
+
+Requires BAMs supplied via
+env vars when running elsewhere:
+
+```bash
+IGV_REPORTS_TEST_BAM_1=/path/to/sample1.bam \
+IGV_REPORTS_TEST_BAM_2=/path/to/sample2.bam \
+    bash tests/integration/anchor_verify/scenarios.sh
+```
+
+The verifier doesn't care which BAMs, only that they're different so
+scenarios A-C have the contrast they need. If a default doesn't exist and
+no env override is set, the script exits **77** (POSIX skipped-test
+convention) and `run_all.sh` reports it as a skip, not a failure.
+
+## Why this is `integration`, not `smoke` or `unit`
+
+This test depends on real BAMs and on `create_report` actually running, so
+it can't fit in `tests/smoke/` (which uses only the committed COLO829 slice
+fixture and runs in seconds) or `tests/unit/` (parser-only, no I/O).
+
+For the parser-level regression checks that gave rise to this verifier,
+see [tests/unit/test_verify_anchors.py](../../unit/test_verify_anchors.py).
+For the samtools/decode round-trip, see
+[tests/smoke/test_slice_count.py](../../smoke/test_slice_count.py).
diff --git a/igv-reports/tests/integration/anchor_verify/scenarios.sh b/igv-reports/tests/integration/anchor_verify/scenarios.sh
new file mode 100755
index 0000000..fcb95b9
--- /dev/null
+++ b/igv-reports/tests/integration/anchor_verify/scenarios.sh
@@ -0,0 +1,191 @@
+#!/usr/bin/env bash
+# scenarios.sh — end-to-end integration test for scripts/verify_anchors.py.
+#
+# Builds a 2-sample cohort, freezes BAM-read-count anchors from the source
+# BAMs, verifies the clean cohort, then runs four corruption scenarios and
+# asserts each triggers the expected PASS / FAIL / SKIP outcomes.
+#
+# Runtime: ~6-8 min cold (cohort build dominates); ~15 s when cohort is cached.
+# Disk: ~10 MB under ./reports/ (auto-cleaned on success unless KEEP_REPORTS=1).
+#
+# BAM source — two different indexed BAMs (any organism, any size). Defaults
+# require env vars (no built-in defaults):
+#   IGV_REPORTS_TEST_BAM_1, _2
+# Tests SKIP (exit 77) when defaults are unset and no override is provided.
+set -euo pipefail
+
+EX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SKILL_DIR="$(cd "${EX_DIR}/../../.." && pwd)"
+BUILD="${SKILL_DIR}/scripts/build_igvreports.py"
+ANCHORS="${SKILL_DIR}/scripts/verify_anchors.py"
+
+# BAM sources — must be supplied via env vars. No defaults: integration tests
+# require two indexed BAMs from a public release.
+BAM_S1="${IGV_REPORTS_TEST_BAM_1:-}"
+BAM_S2="${IGV_REPORTS_TEST_BAM_2:-}"
+
+for bam in "${BAM_S1}" "${BAM_S2}"; do
+    if [[ -z "${bam}" || ! -f "${bam}" ]]; then
+        echo "SKIP: integration test needs two indexed BAMs." >&2
+        echo "      Set IGV_REPORTS_TEST_BAM_{1,2} to paths of two .bam files (each with sibling .bai)." >&2
+        exit 77   # POSIX skipped-test convention
+    fi
+done
+
+SHEET="${EX_DIR}/samplesheet.hg38.tsv"
+SITES="${EX_DIR}/sites.hg38.bed"
+OUTDIR="${EX_DIR}/reports"
+ANCHORS_TSV="${EX_DIR}/anchors.hg38.tsv"
+
+cleanup() {
+    if [[ -n "${KEEP_REPORTS:-}" ]]; then
+        echo "(KEEP_REPORTS set — leaving artifacts in ${OUTDIR} and ${EX_DIR}/anchors* for inspection)"
+        return
+    fi
+    rm -rf "${OUTDIR}" "${SHEET}" "${SITES}" "${ANCHORS_TSV}" \
+           "${EX_DIR}/anchors.corrupted.tsv" "${EX_DIR}/anchors.min.tsv" \
+           "${EX_DIR}/anchors.subset.tsv" "${EX_DIR}/logs"
+}
+trap 'rc=$?; if [[ $rc -eq 0 ]]; then cleanup; else echo "(scenarios.sh exited $rc — leaving artifacts for debug)"; fi' EXIT
+
+# Requires `create_report` (pip install igv-reports) and `samtools` on PATH.
+# If you use a conda env, activate it before invoking this script.
+
+# Defensive restore: a previous run may have died mid-corruption leaving
+# .bak files. Bring HTMLs back to their original state before we start.
+for f in "${OUTDIR}"/*.hg38.html.bak; do
+    [[ -f "$f" ]] && mv "$f" "${f%.bak}" && echo "(restored ${f%.bak} from .bak)"
+done 2>/dev/null || true
+
+# --- 1. Inputs -----------------------------------------------------------------
+# Two SNV-style point sites; --flanking 300 keeps BAM slicing in seconds even
+# at 167 GB source BAMs. We're testing the verifier, not the renderer.
+cat >"${SITES}" <<EOF
+#chrom	start	end	name
+chr2	25246500	25246501	DNMT3A_SNV
+chr7	148884000	148884001	EZH2_SNV
+EOF
+
+printf 'sample\tbam_tumor\tsites_bed\n'  >"${SHEET}"
+printf 'sample_1\t%s\t%s\n' "${BAM_S1}" "${SITES}" >>"${SHEET}"
+printf 'sample_2\t%s\t%s\n' "${BAM_S2}" "${SITES}" >>"${SHEET}"
+
+# --- 2. Generate anchors from source BAMs --------------------------------------
+echo "=== generate: freezing samtools-view counts as anchors ==="
+python "${ANCHORS}" generate \
+    --samplesheet "${SHEET}" \
+    --sites "${SITES}" \
+    --out "${ANCHORS_TSV}" 2>&1 | tail -6
+echo
+
+# --- 3. Build cohort -----------------------------------------------------------
+if [[ -z "${REBUILD:-}" \
+      && -f "${OUTDIR}/sample_1.hg38.html" \
+      && -f "${OUTDIR}/sample_2.hg38.html" ]]; then
+    echo "=== reusing existing cohort in ${OUTDIR} (set REBUILD=1 to force) ==="
+else
+    echo "=== building 2-sample cohort (this takes ~5-7 min on warm node) ==="
+    python "${BUILD}" \
+        --samplesheet "${SHEET}" \
+        --genome hg38 \
+        --flanking 300 \
+        --type mutation \
+        --info-columns name \
+        --output-dir "${OUTDIR}" \
+        --no-apptainer \
+        --no-verify  # auto-verify is structural; we exercise the anchor verifier ourselves below
+fi
+echo
+
+assert_status() {
+    # assert_status <sample> <region> <expected_status> <verify_tsv>
+    local sample="$1" region="$2" expected="$3" tsv="$4"
+    local actual
+    actual=$(awk -F'\t' -v s="$sample" -v r="$region" '$1==s && $3==r {print $4; exit}' "$tsv")
+    if [[ "$actual" != "$expected" ]]; then
+        echo "  FAIL ASSERTION: sample=$sample region=$region expected=$expected actual=${actual:-<missing>}"
+        return 1
+    fi
+    echo "  OK   sample=$sample region=$region status=$actual"
+}
+
+# --- 4. Scenario 0: clean cohort, all PASS -------------------------------------
+echo "=== scenario 0: clean — all anchors expected PASS ==="
+python "${ANCHORS}" verify-cohort \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --anchors "${ANCHORS_TSV}" \
+    --out "${OUTDIR}/scenario0.tsv" \
+    --fail-on-fail >/dev/null
+echo "  baseline: 4/4 PASS (verify-cohort exited 0)"
+echo
+
+# --- 5. Scenario A: tolerance violation ----------------------------------------
+echo "=== scenario A: corrupt expected count outside tolerance — FAIL on diff_ratio ==="
+awk -F'\t' 'BEGIN{OFS="\t"} /^#/{print; next} NR==2 {$7=9999; print; next} {print}' "${ANCHORS_TSV}" > "${EX_DIR}/anchors.corrupted.tsv"
+python "${ANCHORS}" verify-cohort \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --anchors "${EX_DIR}/anchors.corrupted.tsv" \
+    --out "${OUTDIR}/A.tsv" >/dev/null || true
+assert_status "sample_1" "chr2:25246500-25246501" "FAIL" "${OUTDIR}/A.tsv"
+assert_status "sample_1" "chr7:148884000-148884001" "PASS" "${OUTDIR}/A.tsv"
+echo
+
+# --- 6. Scenario B: min/max bound violation ------------------------------------
+echo "=== scenario B: anchor min=1000 (real count ~56) — FAIL on min ==="
+awk -F'\t' 'BEGIN{OFS="\t"} /^#/{print; next} NR==2 {$9=1000; print; next} {print}' "${ANCHORS_TSV}" > "${EX_DIR}/anchors.min.tsv"
+python "${ANCHORS}" verify-cohort \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --anchors "${EX_DIR}/anchors.min.tsv" \
+    --out "${OUTDIR}/B.tsv" >/dev/null || true
+assert_status "sample_1" "chr2:25246500-25246501" "FAIL" "${OUTDIR}/B.tsv"
+echo
+
+# --- 7. Scenario C: corrupt data URL inside HTML — FAIL on decode --------------
+echo "=== scenario C: mangle a session's base64 payload — FAIL on session decode ==="
+cp "${OUTDIR}/sample_1.hg38.html" "${OUTDIR}/sample_1.hg38.html.bak"
+# Replace one base64 chunk inside a session data URL. The H4sI prefix is the
+# base64-encoded gzip magic 0x1f 0x8b 0x08; mangling it breaks the gunzip step
+# that decodes the session, simulating arbitrary HTML tampering.
+sed -i 's|data:application/gzip;base64,H4sI|data:application/gzip;base64,XXXX|g' "${OUTDIR}/sample_1.hg38.html"
+python "${ANCHORS}" verify-cohort \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --anchors "${ANCHORS_TSV}" \
+    --out "${OUTDIR}/C.tsv" >/dev/null || true
+# Both regions in sample_1 should FAIL (sed hits every session URL in the file).
+assert_status "sample_1" "chr2:25246500-25246501" "FAIL" "${OUTDIR}/C.tsv"
+assert_status "sample_1" "chr7:148884000-148884001" "FAIL" "${OUTDIR}/C.tsv"
+# sample_2 unaffected.
+assert_status "sample_2" "chr2:25246500-25246501" "PASS" "${OUTDIR}/C.tsv"
+mv "${OUTDIR}/sample_1.hg38.html.bak" "${OUTDIR}/sample_1.hg38.html"
+echo
+
+# --- 8. Scenario D: anchor missing for a (sample, region) — SKIP not FAIL ------
+echo "=== scenario D: drop sample_1's chr2 anchor — that region SKIPs, others PASS ==="
+awk -F'\t' 'BEGIN{OFS="\t"} /^#/{print; next} !($1=="sample_1" && $4=="chr2"){print}' "${ANCHORS_TSV}" > "${EX_DIR}/anchors.subset.tsv"
+python "${ANCHORS}" verify-cohort \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --anchors "${EX_DIR}/anchors.subset.tsv" \
+    --out "${OUTDIR}/D.tsv" \
+    --fail-on-fail >/dev/null
+# The dropped anchor shouldn't appear at all (nothing to verify). Remaining anchors PASS.
+n_rows=$(awk -F'\t' 'NR>1 && $1=="sample_1" && $3=="chr2"' "${OUTDIR}/D.tsv" | wc -l)
+if [[ "${n_rows}" -ne 0 ]]; then
+    echo "  FAIL ASSERTION: sample_1/chr2 should NOT appear (dropped anchor) but got ${n_rows} rows"
+    exit 1
+fi
+echo "  OK   sample_1/chr2 anchor dropped — no row emitted"
+assert_status "sample_1" "chr7:148884000-148884001" "PASS" "${OUTDIR}/D.tsv"
+assert_status "sample_2" "chr2:25246500-25246501" "PASS" "${OUTDIR}/D.tsv"
+echo
+
+echo "=== all 4 scenarios PASSED — verify_anchors.py behaves as expected ==="
diff --git a/igv-reports/tests/integration/cohort_verify/README.md b/igv-reports/tests/integration/cohort_verify/README.md
new file mode 100644
index 0000000..05de1d0
--- /dev/null
+++ b/igv-reports/tests/integration/cohort_verify/README.md
@@ -0,0 +1,79 @@
+# cohort_verify_demo — regression test for `verify_cohort.py`
+
+End-to-end check that the cohort verifier catches the four failure modes
+it's designed to catch. Self-asserting — exit nonzero on any mismatch.
+
+## What it does
+
+1. Generates a 3-sample samplesheet (TSV) pointing at three real COLO829 ONT BAMs.
+2. Calls `build_igvreports.py --samplesheet ... --no-verify` to produce
+   `reports/sample_{1,2,3}.hg38.html` + `index.html`.
+3. Runs `verify_cohort.py` against the clean cohort (expects all PASS).
+4. Runs four corruption scenarios, each asserting the expected check FAILs:
+
+   | Scenario | Corruption | Expected FAILs |
+   |---|---|---|
+   | A | Delete `sample_3.hg38.html` | `*/cohort_html_coverage`, `sample_3/html_exists` |
+   | B | Replace `sample_1.hg38.html` with sample_2's content | `sample_1/sample_tracks_match`, `sample_1/no_cross_sample_contamination`, `sample_1/sample_id_embedded` |
+   | C | Drop one `<li>` from `index.html` | `*/index_consistency` |
+   | D | Truncate `sample_2.hg38.html` to 1 KB | `sample_2/html_min_size`, `sample_2/region_count` |
+
+5. Cleans up generated `reports/`, samplesheet, sites BED, and logs/ on exit.
+
+## Run
+
+```bash
+bash tests/integration/cohort_verify/scenarios.sh
+```
+
+Or as part of the full test suite:
+
+```bash
+bash tests/run_all.sh                  # all layers
+bash tests/run_all.sh --integration-only
+```
+
+## BAM paths (parameterized)
+
+Requires BAMs supplied via
+env vars when running elsewhere:
+
+```bash
+IGV_REPORTS_TEST_BAM_1=/path/to/sample1.bam \
+IGV_REPORTS_TEST_BAM_2=/path/to/sample2.bam \
+IGV_REPORTS_TEST_BAM_3=/path/to/sample3.bam \
+    bash tests/integration/cohort_verify/scenarios.sh
+```
+
+If a default doesn't exist and no env override is set, the script exits
+**77** (POSIX skipped-test convention) and `run_all.sh` reports it as a
+skip, not a failure.
+
+Runtime: ~60-90 s on a warm node (3-sample cohort build at 1-bp point-variant
+sites + 4 reverify cycles). Per-sample HTML ends up ~3-5 MB. Cold-cache
+network reads of the underlying ONT BAMs can extend this to 2-3 min on
+first invocation.
+
+Disk: ~15 MB temporary under `reports/`, auto-cleaned via `trap`.
+
+The sites BED uses 1-bp point-variant style coordinates (not 13 kb promoter
+windows like the methylation example) so BAM slicing stays fast — we're
+testing the verifier, not the renderer. Adapt for other workflows if you
+want to exercise wider windows.
+
+## How to provide BAMs
+
+Set `IGV_REPORTS_TEST_BAM_{1,2,3}` to paths of three indexed BAMs you have
+access to. The verifier doesn't care which BAMs — it only requires that the
+three rows in the samplesheet declare *different* BAMs (so scenario B's
+contamination check has signal). Without those env vars, the test exits 77
+(POSIX skip).
+
+## Why this is `integration`, not `smoke` or `unit`
+
+This test depends on real BAMs and on `create_report` actually running, so
+it can't fit in `tests/smoke/` (which uses only the committed COLO829 slice
+fixture and runs in seconds) or `tests/unit/` (parser-only, no I/O).
+
+For the parser-level regression checks that gave rise to this verifier,
+see [tests/unit/test_verify_report.py](../../unit/test_verify_report.py).
diff --git a/igv-reports/tests/integration/cohort_verify/scenarios.sh b/igv-reports/tests/integration/cohort_verify/scenarios.sh
new file mode 100755
index 0000000..63ff7d2
--- /dev/null
+++ b/igv-reports/tests/integration/cohort_verify/scenarios.sh
@@ -0,0 +1,172 @@
+#!/usr/bin/env bash
+# scenarios.sh — end-to-end integration test for scripts/verify_cohort.py.
+#
+# Builds a 3-sample cohort, snapshots a clean verify pass, then runs four
+# corruption scenarios and asserts each triggers the expected check FAILs.
+# Exit nonzero if any assertion misses.
+#
+# Runtime: ~6-8 min cold (cohort build dominates); ~30 s when cohort is cached.
+# Disk: ~15 MB under ./reports/ (auto-cleaned on success unless KEEP_REPORTS=1).
+#
+# BAM source — three different indexed BAMs (any organism, any size). Defaults
+# require env vars (no built-in defaults):
+#   IGV_REPORTS_TEST_BAM_1, _2, _3
+# Tests SKIP (exit 77) when defaults are unset and no override is provided.
+set -euo pipefail
+
+EX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SKILL_DIR="$(cd "${EX_DIR}/../../.." && pwd)"
+BUILD="${SKILL_DIR}/scripts/build_igvreports.py"
+VERIFY="${SKILL_DIR}/scripts/verify_cohort.py"
+
+# BAM sources — must be supplied via env vars. No defaults: integration tests
+# require three indexed BAMs from a public ONT release (e.g. COLO829 from EBI).
+BAM_S1="${IGV_REPORTS_TEST_BAM_1:-}"
+BAM_S2="${IGV_REPORTS_TEST_BAM_2:-}"
+BAM_S3="${IGV_REPORTS_TEST_BAM_3:-}"
+
+for bam in "${BAM_S1}" "${BAM_S2}" "${BAM_S3}"; do
+    if [[ -z "${bam}" || ! -f "${bam}" ]]; then
+        echo "SKIP: integration test needs three indexed BAMs." >&2
+        echo "      Set IGV_REPORTS_TEST_BAM_{1,2,3} to paths of three .bam files (each with sibling .bai)." >&2
+        exit 77   # POSIX skipped-test convention
+    fi
+done
+
+SHEET="${EX_DIR}/samplesheet.hg38.tsv"
+SITES="${EX_DIR}/sites.hg38.bed"
+OUTDIR="${EX_DIR}/reports"
+
+cleanup() {
+    if [[ -n "${KEEP_REPORTS:-}" ]]; then
+        echo "(KEEP_REPORTS set — leaving artifacts in ${OUTDIR} for inspection)"
+        return
+    fi
+    rm -rf "${OUTDIR}" "${SHEET}" "${SITES}" "${EX_DIR}/logs"
+}
+# Only cleanup on success — failures leave artifacts so they can be debugged.
+trap 'rc=$?; if [[ $rc -eq 0 ]]; then cleanup; else echo "(scenarios.sh exited $rc — leaving artifacts in ${OUTDIR} for debug)"; fi' EXIT
+
+# Requires `create_report` (pip install igv-reports) and `samtools` on PATH.
+# If you use a conda env, activate it before invoking this script.
+
+# --- 1. Generate fresh inputs --------------------------------------------------
+# Point-variant style sites: 1-bp wide each, --flanking 300 = ~600 bp windows.
+# Keeps BAM slicing fast (seconds) even with 100+ GB ONT BAMs and full
+# annotation tracks. We're testing the verifier, not the renderer; tiny
+# windows are sufficient.
+cat >"${SITES}" <<EOF
+#chrom	start	end	name
+chr2	25246500	25246501	DNMT3A_SNV
+chr7	148884000	148884001	EZH2_SNV
+EOF
+
+printf 'sample\tbam_tumor\tsites_bed\n' >"${SHEET}"
+printf 'sample_1\t%s\t%s\n' "${BAM_S1}" "${SITES}" >>"${SHEET}"
+printf 'sample_2\t%s\t%s\n' "${BAM_S2}" "${SITES}" >>"${SHEET}"
+printf 'sample_3\t%s\t%s\n' "${BAM_S3}" "${SITES}" >>"${SHEET}"
+
+# --- 2. Build cohort (3 HTMLs + index.html) -----------------------------------
+# Skip rebuild if a complete cohort is already on disk (set REBUILD=1 to force).
+# Lets you iterate on the verifier in seconds instead of waiting ~12 min to
+# regenerate HTMLs that haven't changed.
+if [[ -z "${REBUILD:-}" \
+      && -f "${OUTDIR}/sample_1.hg38.html" \
+      && -f "${OUTDIR}/sample_2.hg38.html" \
+      && -f "${OUTDIR}/sample_3.hg38.html" \
+      && -f "${OUTDIR}/index.html" ]]; then
+    echo "=== reusing existing cohort in ${OUTDIR} (set REBUILD=1 to force rebuild) ==="
+else
+    echo "=== building cohort ==="
+    python "${BUILD}" \
+        --samplesheet "${SHEET}" \
+        --genome hg38 \
+        --flanking 300 \
+        --type mutation \
+        --info-columns name \
+        --output-dir "${OUTDIR}" \
+        --no-apptainer \
+        --no-verify  # don't auto-verify during build — we exercise the verifier explicitly below
+fi
+echo
+
+assert_status() {
+    # assert_status <sample> <check> <expected_status> <verify_tsv>
+    local sample="$1" check="$2" expected="$3" tsv="$4"
+    local actual
+    actual=$(awk -F'\t' -v s="$sample" -v c="$check" '$1==s && $2==c {print $3; exit}' "$tsv")
+    if [[ "$actual" != "$expected" ]]; then
+        echo "  FAIL ASSERTION: sample=$sample check=$check expected=$expected actual=${actual:-<missing>}"
+        return 1
+    fi
+    echo "  OK   sample=$sample check=$check status=$actual"
+}
+
+# --- 3. Baseline verify (all PASS) --------------------------------------------
+echo "=== scenario 0: baseline (all PASS expected) ==="
+python "${VERIFY}" \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --out "${OUTDIR}/baseline.tsv" \
+    --fail-on-fail >/dev/null
+echo "  baseline: all PASS (verify exited 0)"
+echo
+
+# --- 4. Scenario A: missing HTML ----------------------------------------------
+echo "=== scenario A: delete sample_3's HTML — C1 cohort_html_coverage should FAIL ==="
+mv "${OUTDIR}/sample_3.hg38.html" "${OUTDIR}/sample_3.hg38.html.bak"
+python "${VERIFY}" \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --out "${OUTDIR}/A.tsv" >/dev/null || true
+assert_status "*"        "cohort_html_coverage" "FAIL" "${OUTDIR}/A.tsv"
+assert_status "sample_3" "html_exists"          "FAIL" "${OUTDIR}/A.tsv"
+mv "${OUTDIR}/sample_3.hg38.html.bak" "${OUTDIR}/sample_3.hg38.html"
+echo
+
+# --- 5. Scenario B: sample swap (sample_1.html now contains sample_2 data) ---
+echo "=== scenario B: swap sample_1<-sample_2 — sample_tracks_match + id_embedded + contamination should FAIL on sample_1 ==="
+cp "${OUTDIR}/sample_1.hg38.html" "${OUTDIR}/sample_1.hg38.html.bak"
+cp "${OUTDIR}/sample_2.hg38.html" "${OUTDIR}/sample_1.hg38.html"
+python "${VERIFY}" \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --out "${OUTDIR}/B.tsv" >/dev/null || true
+assert_status "sample_1" "sample_tracks_match"          "FAIL" "${OUTDIR}/B.tsv"
+assert_status "sample_1" "no_cross_sample_contamination" "FAIL" "${OUTDIR}/B.tsv"
+assert_status "sample_1" "sample_id_embedded"           "FAIL" "${OUTDIR}/B.tsv"
+mv "${OUTDIR}/sample_1.hg38.html.bak" "${OUTDIR}/sample_1.hg38.html"
+echo
+
+# --- 6. Scenario C: corrupt index.html ----------------------------------------
+echo "=== scenario C: drop one <li> from index.html — C5 index_consistency should FAIL ==="
+cp "${OUTDIR}/index.html" "${OUTDIR}/index.html.bak"
+sed -i '/href="sample_2.hg38.html"/d' "${OUTDIR}/index.html"
+python "${VERIFY}" \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --out "${OUTDIR}/C.tsv" >/dev/null || true
+assert_status "*" "index_consistency" "FAIL" "${OUTDIR}/C.tsv"
+mv "${OUTDIR}/index.html.bak" "${OUTDIR}/index.html"
+echo
+
+# --- 7. Scenario D: tiny HTML (truncation) ------------------------------------
+echo "=== scenario D: truncate sample_2.html to 1 KB — html_min_size + parse failures expected ==="
+cp "${OUTDIR}/sample_2.hg38.html" "${OUTDIR}/sample_2.hg38.html.bak"
+head -c 1024 "${OUTDIR}/sample_2.hg38.html.bak" > "${OUTDIR}/sample_2.hg38.html"
+python "${VERIFY}" \
+    --samplesheet "${SHEET}" \
+    --reports-dir "${OUTDIR}" \
+    --genome hg38 \
+    --min-size-mb 1.0 \
+    --out "${OUTDIR}/D.tsv" >/dev/null || true
+assert_status "sample_2" "html_min_size"   "FAIL" "${OUTDIR}/D.tsv"
+assert_status "sample_2" "region_count"    "FAIL" "${OUTDIR}/D.tsv"
+mv "${OUTDIR}/sample_2.hg38.html.bak" "${OUTDIR}/sample_2.hg38.html"
+echo
+
+echo "=== all 4 scenarios PASSED — verify_cohort.py behaves as expected ==="
diff --git a/igv-reports/tests/integration/end_to_end/README.md b/igv-reports/tests/integration/end_to_end/README.md
new file mode 100644
index 0000000..85fad18
--- /dev/null
+++ b/igv-reports/tests/integration/end_to_end/README.md
@@ -0,0 +1,62 @@
+# tests/integration/end_to_end
+
+End-to-end smoke test against the **committed** `tests/fixtures/tiny_colo829.hg38.bam`
+fixture (457 KB). Unlike the other integration scenarios (`anchor_verify`,
+`cohort_verify`), this one needs no shared-storage access and runs in
+~30 s — so it ships in CI.
+
+## What it exercises (not via mocks)
+
+1. **`build_igvreports.py --bam ...`** actually invokes `create_report`
+   against the fixture BAM with a synthesized minimal FASTA. Produces a
+   real ~2 MB HTML.
+2. **`verify_report.py`** parses the HTML's `tableJson` + `sessionDictionary`,
+   confirms region count + track presence.
+3. **`verify_anchors.py generate`** counts reads in the source BAM at the
+   three sites; asserts the counts match the frozen contract documented in
+   `tests/fixtures/README.md` (`chr2:25246500-25246501 = 5`,
+   `chr7:148884000-148884001 = 9`).
+4. **`verify_anchors.py verify`** decodes the embedded BAM slices from the
+   freshly-built HTML and confirms the same counts — closes the loop on the
+   create_report ↔ source-BAM round trip.
+5. **`--also-png` (optional)** runs the same pipeline with the PNG sidecar
+   path. SKIPs cleanly when `igver` isn't installed or fails (the
+   documented silent-failure mode in `rules/igv.md`).
+
+## What it catches that unit tests don't
+
+- `create_report` flag rename / removal on upstream version bumps
+- HTML structural changes from upstream (e.g. session-dict layout drift)
+- Driver regressions on the **non-mock** code path
+- Off-MSKCC portability bugs — the test runs against the committed fixture
+  with no `shared-storage` dependency, so CI exercises the same code paths
+  external users would hit
+
+## Runtime
+
+| Step | Duration |
+|---|---|
+| `create_report` (3 regions × 1 BAM × 300 bp flanking) | ~2 s |
+| structural `verify_report.py` | <1 s |
+| `verify_anchors.py generate` + `verify` | ~5 s |
+| `--also-png` (if igver available) | ~5 s |
+| **total** | **~14 s** |
+
+## Prereqs
+
+- `create_report` on PATH (`pip install -U 'igv-reports>=1.16.0'`)
+- `samtools` on PATH (provided by the smoke layer prereqs)
+- `python3` on PATH
+
+If `create_report` is missing the test exits 77 (skipped) rather than
+failing — same convention as the other integration scenarios.
+
+## Knobs
+
+- `KEEP_REPORTS=1` — leave the `out/` directory in place after a successful
+  run for manual inspection.
+- `IGV_REPORTS_PY=/path/to/python` — pin the python interpreter (the
+  default search is conda's snakemake env → `python3` on PATH).
+- `IGVER_CMD='apptainer exec /path/to/igver.sif igver'` — provide a working
+  `igver` invocation so step 8 (`--also-png`) actually exercises the PNG
+  pipeline rather than SKIPping.
diff --git a/igv-reports/tests/integration/end_to_end/scenarios.sh b/igv-reports/tests/integration/end_to_end/scenarios.sh
new file mode 100755
index 0000000..ad83100
--- /dev/null
+++ b/igv-reports/tests/integration/end_to_end/scenarios.sh
@@ -0,0 +1,248 @@
+#!/usr/bin/env bash
+# scenarios.sh — end-to-end smoke test using the COMMITTED tiny_colo829 fixture.
+#
+# Author: Samuel Ahuno
+# Purpose:
+#   The other integration scenarios (anchor_verify, cohort_verify) require
+#   167 GB lab BAMs and take 6-8 min. This one uses the 457 KB
+#   tests/fixtures/tiny_colo829.hg38.bam fixture so the full pipeline runs
+#   in ~30 s on any machine with `create_report` on PATH.
+#
+#   What it exercises end-to-end (not via mocks):
+#     1. build_igvreports.py invokes create_report against the fixture
+#     2. The resulting HTML is parseable by verify_report.py (structural)
+#     3. verify_anchors.py generate → frozen counts (chr2=5, chr7=9 per
+#        tests/fixtures/README.md)
+#     4. verify_anchors.py verify → PASS on the freshly built HTML
+#     5. If `igver` is on PATH: --also-png produces non-empty per-region
+#        PNGs and the manifest. Otherwise that step is SKIPped (logged).
+#
+# Catches: create_report flag drift, HTML-format upstream changes, driver
+# regressions on the non-mock path, off-MSKCC portability bugs (the
+# fixture is committed; no shared-storage required).
+#
+# Runtime: ~30 s. Disk: ~5 MB under ./out/ (auto-cleaned on success).
+set -euo pipefail
+
+EX_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SKILL_DIR="$(cd "${EX_DIR}/../../.." && pwd)"
+BUILD="${SKILL_DIR}/scripts/build_igvreports.py"
+ANCHORS="${SKILL_DIR}/scripts/verify_anchors.py"
+VR="${SKILL_DIR}/scripts/verify_report.py"
+FIXTURE="${SKILL_DIR}/tests/fixtures/tiny_colo829.hg38.bam"
+
+# Prerequisite: create_report must be on PATH (provided by `pip install igv-reports`).
+if ! command -v create_report >/dev/null 2>&1; then
+    echo "SKIP: end-to-end test needs create_report on PATH." >&2
+    echo "      Install with: pip install -U 'igv-reports>=1.16.0'" >&2
+    exit 77
+fi
+
+# Prerequisite: the committed fixture must be readable.
+if [[ ! -f "${FIXTURE}" ]] || [[ ! -f "${FIXTURE}.bai" ]]; then
+    echo "ERROR: fixture missing or unindexed: ${FIXTURE}" >&2
+    echo "       Regenerate with: bash tests/fixtures/build_fixtures.sh" >&2
+    exit 1
+fi
+
+# Pick the python that can import pyyaml + the same scripts/. Tests/run_all.sh
+# already does this dance; we mirror it.
+PY="${IGV_REPORTS_PY:-}"
+if [[ -z "${PY}" ]]; then
+    if command -v python3 >/dev/null 2>&1; then
+        PY=$(command -v python3)
+    else
+        echo "ERROR: no python3 on PATH" >&2; exit 2
+    fi
+fi
+
+OUTDIR="${EX_DIR}/out"
+cleanup() {
+    if [[ -n "${KEEP_REPORTS:-}" ]]; then
+        echo "(KEEP_REPORTS set — leaving artifacts in ${OUTDIR})"
+        return
+    fi
+    rm -rf "${OUTDIR}"
+}
+trap 'rc=$?; if [[ $rc -eq 0 ]]; then cleanup; else echo "(scenarios.sh exited $rc — leaving ${OUTDIR} for debug)"; fi' EXIT
+
+rm -rf "${OUTDIR}"
+mkdir -p "${OUTDIR}"
+
+# --- 1. Inputs --------------------------------------------------------------
+# Three sites, all within the fixture's two slice regions:
+#   * chr2:25246500-25246501  (DNMT3A R882 SNV, frozen anchor count = 5)
+#   * chr7:148884000-148884001 (EZH2 Y646 SNV, frozen anchor count = 9)
+#   * chr2:25247500-25247501  (second DNMT3A locus, count not frozen)
+# Frozen counts are the contract per tests/fixtures/README.md.
+SITES="${OUTDIR}/sites.hg38.bed"
+cat >"${SITES}" <<EOF
+#chrom	start	end	name
+chr2	25246500	25246501	DNMT3A_R882
+chr7	148884000	148884001	EZH2_Y646
+chr2	25247500	25247501	DNMT3A_2nd
+EOF
+
+# Reference FASTA: in CI we won't have hg38 locally. Skip the --fasta
+# resolution and use --no-default-tracks; create_report will then need a
+# --fasta path. We supply a synthesized FASTA covering both regions so
+# create_report can compute its own slice without internet access.
+FASTA="${OUTDIR}/tiny.hg38.fa"
+${PY} -c "
+# Minimal multi-contig FASTA covering the fixture's coverage windows.
+# Only the size matters for create_report's region slicing — bases don't
+# need to be biologically real; the BAM's reads carry the actual signal.
+contigs = [
+    ('chr2', 30_000_000),
+    ('chr7', 150_000_000),
+]
+with open('${FASTA}', 'w') as fh:
+    for name, length in contigs:
+        fh.write(f'>{name}\n')
+        n_per_line = 60
+        for i in range(0, length, n_per_line):
+            fh.write('N' * min(n_per_line, length - i) + '\n')
+"
+samtools faidx "${FASTA}"
+
+# --- 2. Build HTML (the actual end-to-end step) -----------------------------
+echo "=== build: invoke create_report against fixture BAM ==="
+HTML="${OUTDIR}/sample.hg38.html"
+${PY} "${BUILD}" \
+    --sites "${SITES}" \
+    --bam "${FIXTURE}" \
+    --genome hg38 \
+    --fasta "${FASTA}" \
+    --no-default-tracks \
+    --flanking 300 \
+    --type mutation \
+    --info-columns name \
+    --output "${HTML}" \
+    --no-apptainer \
+    --no-verify 2>&1 | tail -8
+echo
+
+# --- 3. Assertion: HTML exists, plausible size ------------------------------
+if [[ ! -f "${HTML}" ]]; then
+    echo "FAIL: HTML not produced at ${HTML}"; exit 1
+fi
+size=$(stat -c %s "${HTML}")
+if [[ "${size}" -lt 50000 ]]; then
+    echo "FAIL: HTML suspiciously small (${size} bytes) — expected >= 50 KB"
+    exit 1
+fi
+echo "  OK   HTML: ${HTML} (${size} bytes)"
+echo
+
+# --- 4. Structural verify ---------------------------------------------------
+echo "=== verify_report.py: structural check ==="
+${PY} "${VR}" \
+    --html "${HTML}" \
+    --sites "${SITES}" \
+    --tracks "${FIXTURE}" \
+    --min-size-mb 0.05 \
+    --out "${OUTDIR}/verify_report.tsv" \
+    --fail-on-fail >/dev/null
+echo "  OK   structural verify PASS"
+echo
+
+# --- 5. Generate frozen anchors ---------------------------------------------
+echo "=== verify_anchors.py generate: BAM read counts ==="
+SHEET="${OUTDIR}/samplesheet.tsv"
+printf 'sample\tbam_tumor\tsites_bed\n'  >"${SHEET}"
+printf 'sample\t%s\t%s\n' "${FIXTURE}" "${SITES}" >>"${SHEET}"
+
+ANCHORS_TSV="${OUTDIR}/anchors.hg38.tsv"
+${PY} "${ANCHORS}" generate \
+    --samplesheet "${SHEET}" \
+    --sites "${SITES}" \
+    --out "${ANCHORS_TSV}" 2>&1 | tail -6
+echo
+
+# --- 6. Assertion: frozen anchor counts match contract ----------------------
+# Contract is in tests/fixtures/README.md. Any drift here is the loudest
+# signal that the fixture changed, the BAM filter changed, or the test
+# environment is using a different samtools.
+expected_chr2=5
+expected_chr7=9
+actual_chr2=$(awk -F'\t' '$4=="chr2" && $5==25246500 {print $7}' "${ANCHORS_TSV}")
+actual_chr7=$(awk -F'\t' '$4=="chr7" && $5==148884000 {print $7}' "${ANCHORS_TSV}")
+if [[ "${actual_chr2}" != "${expected_chr2}" ]]; then
+    echo "FAIL: chr2:25246500-25246501 expected=${expected_chr2} got=${actual_chr2}"
+    exit 1
+fi
+if [[ "${actual_chr7}" != "${expected_chr7}" ]]; then
+    echo "FAIL: chr7:148884000-148884001 expected=${expected_chr7} got=${actual_chr7}"
+    exit 1
+fi
+echo "  OK   anchor contract: chr2=5 chr7=9 (matches tests/fixtures/README.md)"
+echo
+
+# --- 7. verify_anchors against the just-built HTML --------------------------
+echo "=== verify_anchors.py verify: HTML slice round-trip ==="
+${PY} "${ANCHORS}" verify \
+    --html "${HTML}" \
+    --anchors "${ANCHORS_TSV}" \
+    --out "${OUTDIR}/verify_anchors.tsv" \
+    --fail-on-fail >/dev/null
+echo "  OK   anchor verify PASS (HTML slice counts match source BAM counts)"
+echo
+
+# --- 8. Optional: --also-png exercises the full HTML+PNG pipeline -----------
+# Skip semantics: this step is best-effort and never causes the test to FAIL.
+# `igver` may be on PATH as a `pip install igver` egg-link shim WITHOUT the
+# underlying IGV Java binary — exits 0 but produces no PNGs (the documented
+# silent-failure mode in rules/igv.md). Our --also-png driver catches that
+# via the inline existence check and raises SystemExit. Here we treat any
+# such failure as SKIP rather than propagate it, since a non-working igver
+# install isn't a regression in this skill's code.
+if command -v igver >/dev/null 2>&1 || [[ -n "${IGVER_CMD:-}" ]]; then
+    echo "=== --also-png: HTML + per-region PNGs (igver available) ==="
+    HTML_PNG="${OUTDIR}/png_sample.hg38.html"
+    if ${PY} "${BUILD}" \
+            --sites "${SITES}" \
+            --bam "${FIXTURE}" \
+            --genome hg38 \
+            --fasta "${FASTA}" \
+            --no-default-tracks \
+            --flanking 300 \
+            --type mutation \
+            --info-columns name \
+            --output "${HTML_PNG}" \
+            --no-apptainer \
+            --no-verify \
+            --also-png \
+            --png-dpi 100 >"${OUTDIR}/also_png.log" 2>&1; then
+        # --also-png returned 0 — assert the manifest + PNGs are real.
+        MANIFEST="${OUTDIR}/png_png_sample.hg38/manifest.tsv"
+        if [[ ! -f "${MANIFEST}" ]]; then
+            echo "FAIL: --also-png exited 0 but no manifest at ${MANIFEST}"
+            exit 1
+        fi
+        n_regions=$(awk -F'\t' 'NR>1 && !/^#/' "${MANIFEST}" | wc -l)
+        if [[ "${n_regions}" -ne 3 ]]; then
+            echo "FAIL: manifest has ${n_regions} regions, expected 3"
+            exit 1
+        fi
+        png_one=$(awk -F'\t' 'NR==2 {print $9}' "${MANIFEST}")
+        if [[ ! -s "${png_one}" ]]; then
+            echo "FAIL: PNG missing or empty: ${png_one}"
+            exit 1
+        fi
+        png_size=$(stat -c %s "${png_one}")
+        echo "  OK   manifest: ${n_regions} regions; spot-check ${png_one##*/} = ${png_size} bytes"
+    else
+        # Driver caught the silent-failure mode; surface the diagnostic but
+        # don't fail the test — broken igver install is environment-level.
+        echo "  SKIP (igver invocation failed — likely missing IGV Java binary or wrong PATH)"
+        echo "       see ${OUTDIR}/also_png.log for the driver's diagnostic."
+        if grep -q "silent exit-0 failure\|Failed to generate all PNG files" "${OUTDIR}/also_png.log" 2>/dev/null; then
+            echo "       (confirmed: this is the documented igver silent-failure mode)"
+        fi
+    fi
+else
+    echo "=== --also-png: SKIP (igver not on PATH; set \$IGVER_CMD or install via apptainer SIF) ==="
+fi
+echo
+
+echo "=== end-to-end PASS — full pipeline (create_report → verify → optional igver) ==="
diff --git a/igv-reports/tests/run_all.sh b/igv-reports/tests/run_all.sh
new file mode 100755
index 0000000..1356656
--- /dev/null
+++ b/igv-reports/tests/run_all.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+# tests/run_all.sh — orchestrate the three test layers in order.
+#
+# Author: Samuel Ahuno
+# Purpose:
+#   1. unit (~1 s)      — pure-Python parser tests; pytest.
+#   2. smoke (~3 s)     — samtools subprocess + slice-decode round-trip
+#                          against the committed fixture; pytest.
+#   3. integration      — full cohort build + verify-cohort + verify-anchors
+#                          end-to-end; bash scenarios.sh under each demo.
+#                          Skipped (exit 77) when the IGV_REPORTS_TEST_BAM_*
+#                          env vars are unset AND the MSKCC default paths
+#                          don't exist.
+#
+# Usage:
+#   bash tests/run_all.sh              # all three layers
+#   bash tests/run_all.sh --unit-only  # layer 1 only — instant feedback
+#   bash tests/run_all.sh --no-integration  # layers 1 + 2 (fast everywhere)
+#   bash tests/run_all.sh --integration-only  # layer 3 only — for the slow lane
+#
+# Exit code:
+#   0  — every requested layer passed (or was legitimately skipped).
+#   1+ — at least one layer failed; output preserved for debugging.
+set -euo pipefail
+
+TESTS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SKILL_DIR="$(cd "${TESTS_DIR}/.." && pwd)"
+
+RUN_UNIT=1
+RUN_SMOKE=1
+RUN_INTEGRATION=1
+
+for arg in "$@"; do
+    case "$arg" in
+        --unit-only)         RUN_SMOKE=0; RUN_INTEGRATION=0 ;;
+        --no-integration)    RUN_INTEGRATION=0 ;;
+        --integration-only)  RUN_UNIT=0; RUN_SMOKE=0 ;;
+        -h|--help)
+            sed -n '3,20p' "$0"
+            exit 0
+            ;;
+        *)
+            echo "ERROR: unknown flag: $arg" >&2
+            echo "       Use --help to see options." >&2
+            exit 2
+            ;;
+    esac
+done
+
+# Pick a Python with pytest. Prefer the snakemake conda env (where all
+# project tooling lives); fall back to PATH `python3`.
+PY="${IGV_REPORTS_PY:-}"
+if [[ -z "${PY}" ]]; then
+    if [[ -x /home/ahunos/miniforge3/envs/snakemake/bin/python ]]; then
+        PY=/home/ahunos/miniforge3/envs/snakemake/bin/python
+    elif command -v python3 >/dev/null 2>&1; then
+        PY=$(command -v python3)
+    else
+        echo "ERROR: no python3 available. Set IGV_REPORTS_PY=<path-to-python>" >&2
+        exit 2
+    fi
+fi
+
+FAILS=0
+SKIPS=0
+
+run_layer() {
+    local name="$1"; shift
+    local desc="$1"; shift
+    echo "=== ${name}: ${desc} ==="
+    if "$@"; then
+        echo "    ${name} PASS"
+    else
+        local rc=$?
+        if [[ $rc -eq 77 ]]; then
+            echo "    ${name} SKIP (exit 77 — see message above)"
+            SKIPS=$((SKIPS + 1))
+        else
+            echo "    ${name} FAIL (exit ${rc})"
+            FAILS=$((FAILS + 1))
+        fi
+    fi
+    echo
+}
+
+# --- Layer 1: unit ---------------------------------------------------------
+if [[ $RUN_UNIT -eq 1 ]]; then
+    run_layer "unit" "pure-Python parsers" \
+        "${PY}" -m pytest "${TESTS_DIR}/unit/" -q
+fi
+
+# --- Layer 2: smoke --------------------------------------------------------
+if [[ $RUN_SMOKE -eq 1 ]]; then
+    run_layer "smoke" "samtools + slice-decode round-trip" \
+        "${PY}" -m pytest "${TESTS_DIR}/smoke/" -q
+fi
+
+# --- Layer 3: integration --------------------------------------------------
+# Each scenarios.sh exits 77 if its required BAMs aren't available; we treat
+# that as a skip rather than a failure so the suite is portable.
+if [[ $RUN_INTEGRATION -eq 1 ]]; then
+    # end_to_end: uses the committed tiny_colo829 fixture (~30 s, runs in CI).
+    run_layer "integration / end_to_end" "full pipeline against committed fixture" \
+        bash "${TESTS_DIR}/integration/end_to_end/scenarios.sh"
+    run_layer "integration / cohort_verify" "cohort structural verifier scenarios" \
+        bash "${TESTS_DIR}/integration/cohort_verify/scenarios.sh"
+    run_layer "integration / anchor_verify" "anchor content verifier scenarios" \
+        bash "${TESTS_DIR}/integration/anchor_verify/scenarios.sh"
+fi
+
+echo "=== summary ==="
+echo "  failures: ${FAILS}"
+echo "  skips:    ${SKIPS}"
+
+if [[ $FAILS -gt 0 ]]; then
+    exit 1
+fi
+exit 0
diff --git a/igv-reports/tests/smoke/test_slice_count.py b/igv-reports/tests/smoke/test_slice_count.py
new file mode 100644
index 0000000..9c72586
--- /dev/null
+++ b/igv-reports/tests/smoke/test_slice_count.py
@@ -0,0 +1,154 @@
+"""Smoke tests for verify_anchors.py — exercises the samtools subprocess
+path and the end-to-end slice-decode-and-count flow against the committed
+COLO829 BAM fixture.
+
+Author: Samuel Ahuno
+Purpose:
+  The unit tests cover the parsers with synthetic inputs. These smoke
+  tests confirm the verifier's samtools shellouts actually work end-to-end:
+
+    1. `samtools_count` returns the right integer for a real BAM region.
+    2. `samtools_index` produces a usable index on a fresh BAM.
+    3. The full decode round-trip (read fixture BAM → base64 wrap into a
+       fake data: URL → decode_track_slice writes it back out → re-index
+       → re-count) preserves the original count exactly.
+
+  Skipped (not failed) if samtools is unavailable — so this works in a
+  CI sandbox without the SIF or PATH samtools.
+
+Run:
+  cd claude/skills/igv-reports
+  pytest tests/smoke/ -v
+"""
+
+from __future__ import annotations
+
+import base64
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+SCRIPTS = Path(__file__).resolve().parents[2] / "scripts"
+sys.path.insert(0, str(SCRIPTS))
+import verify_anchors as va  # noqa: E402
+
+FIXTURE = Path(__file__).resolve().parents[1] / "fixtures" / "tiny_colo829.hg38.bam"
+
+# Anchor sanity counts — must match fixtures/README.md and build_fixtures.sh.
+ANCHOR_CHR2 = ("chr2:25246500-25246501", 5)
+ANCHOR_CHR7 = ("chr7:148884000-148884001", 9)
+
+
+def _samtools_cmd() -> list[str] | None:
+    """Resolve samtools the same way verify_anchors does, but return None
+    instead of raising when nothing is available. Lets us SKIP gracefully."""
+    try:
+        return va.resolve_samtools(None)
+    except SystemExit:
+        # No SIF and no PATH samtools — environment can't run smoke tests.
+        return None
+
+
+@pytest.fixture(scope="module")
+def samtools_cmd():
+    cmd = _samtools_cmd()
+    if cmd is None:
+        pytest.skip("no samtools available (set SAMTOOLS_SIF or install samtools)")
+    if not FIXTURE.exists():
+        pytest.skip(
+            f"fixture missing: {FIXTURE} — regenerate with "
+            "bash tests/fixtures/build_fixtures.sh"
+        )
+    return cmd
+
+
+# ---------------------------------------------------------------------------
+# samtools_count + samtools_index against the committed fixture
+# ---------------------------------------------------------------------------
+
+@pytest.mark.parametrize("region,expected", [ANCHOR_CHR2, ANCHOR_CHR7])
+def test_samtools_count_matches_fixture_anchor(samtools_cmd, region, expected):
+    observed = va.samtools_count(samtools_cmd, FIXTURE, region)
+    assert observed == expected, (
+        f"fixture anchor drift: {region} should be {expected}, got {observed}. "
+        "Either the committed BAM was regenerated with different params "
+        "(see tests/fixtures/build_fixtures.sh) or the count filter changed."
+    )
+
+
+def test_samtools_index_creates_usable_index(samtools_cmd, tmp_path):
+    """Copy the fixture to tmp_path WITHOUT its .bai, then have verify_anchors
+    re-index it. After indexing, samtools_count must succeed."""
+    bam_copy = tmp_path / "no_bai.bam"
+    shutil.copy(FIXTURE, bam_copy)
+    # confirm no index exists yet
+    assert not (tmp_path / "no_bai.bam.bai").exists()
+    va.samtools_index(samtools_cmd, bam_copy)
+    assert (tmp_path / "no_bai.bam.bai").exists()
+    # count works now
+    assert va.samtools_count(samtools_cmd, bam_copy, ANCHOR_CHR2[0]) == ANCHOR_CHR2[1]
+
+
+def test_samtools_count_missing_bam_raises(samtools_cmd, tmp_path):
+    """A missing BAM path should produce a clear RuntimeError, not crash silently.
+
+    Note: samtools tolerates malformed region strings (treats them as unknown
+    references and returns 0 with a stderr warning + exit 0). The only
+    reliable error trigger is a missing/unreadable BAM file."""
+    missing = tmp_path / "does_not_exist.bam"
+    with pytest.raises(RuntimeError, match="samtools view -c failed"):
+        va.samtools_count(samtools_cmd, missing, "chr1:1-100")
+
+
+# ---------------------------------------------------------------------------
+# Slice decode round-trip (the central correctness claim of verify_anchors)
+# ---------------------------------------------------------------------------
+
+def test_full_decode_roundtrip(samtools_cmd, tmp_path):
+    """End-to-end: emulate what igv-reports does to embed a BAM slice in an
+    HTML data: URL, then have verify_anchors decode it back out and confirm
+    the read count is preserved.
+
+    This is the critical correctness claim: if the verifier's slice decode
+    silently corrupts the BAM bytes, every anchor verify would silently
+    pass when it shouldn't. Catching that here means we trust the
+    integration tests further down."""
+    # 1. Read the committed BAM raw, wrap it in a data: URL exactly the
+    #    way igv_reports/datauri.py does (mediatype application/gzip
+    #    because BAM is BGZF gzip — see decision in datauri.get_data_uri).
+    raw = FIXTURE.read_bytes()
+    data_url = "data:application/gzip;base64," + base64.b64encode(raw).decode()
+
+    # 2. Decode it via the production code path.
+    decoded = tmp_path / "decoded.bam"
+    va.decode_track_slice(data_url, decoded)
+
+    # 3. Bytes must match exactly.
+    assert decoded.read_bytes() == raw
+
+    # 4. samtools should treat the decoded file as a real BAM — index it
+    #    and count the same anchors.
+    va.samtools_index(samtools_cmd, decoded)
+    for region, expected in (ANCHOR_CHR2, ANCHOR_CHR7):
+        assert va.samtools_count(samtools_cmd, decoded, region) == expected
+
+
+def test_resolve_samtools_explicit_sif_missing(tmp_path):
+    """Passing a non-existent SIF path explicitly must fail loudly, not fall
+    back silently to PATH samtools."""
+    fake_sif = tmp_path / "does_not_exist.sif"
+    with pytest.raises(SystemExit, match="samtools SIF not found"):
+        va.resolve_samtools(fake_sif)
+
+
+def test_resolve_samtools_env_var(tmp_path, monkeypatch):
+    """$SAMTOOLS_SIF env var honored when no --samtools-sif passed."""
+    # Use a real-ish path that doesn't exist to verify it's the *path* the
+    # env-resolution picks up, not some unrelated SIF.
+    fake = tmp_path / "env_sif.sif"
+    monkeypatch.setenv("SAMTOOLS_SIF", str(fake))
+    with pytest.raises(SystemExit, match=str(fake)):
+        va.resolve_samtools(None)
diff --git a/igv-reports/tests/unit/test_build_pngs.py b/igv-reports/tests/unit/test_build_pngs.py
new file mode 100644
index 0000000..54a913c
--- /dev/null
+++ b/igv-reports/tests/unit/test_build_pngs.py
@@ -0,0 +1,338 @@
+"""Unit tests for the --also-png plumbing in build_igvreports.py.
+
+Author: Samuel Ahuno
+Purpose:
+  Exercises the helpers that bridge the HTML build to igver: sites BED
+  parsing + UID assignment, flanked regions BED writer, input.txt writer,
+  igver-cmd resolution, and manifest writing.
+
+  We don't actually invoke igver here — the manifest writer reconstructs
+  filenames from the same convention igver uses (validated against
+  igver's _parse_bed_file source: `<chrom>-<start>-<end>.<name>.<ext>`).
+  Cross-artifact consistency depends on this filename contract; if igver
+  ever changes it, this test plus verify_cohort will catch the drift.
+
+Run:
+  pytest tests/unit/test_build_pngs.py -v
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+SCRIPTS = Path(__file__).resolve().parents[2] / "scripts"
+sys.path.insert(0, str(SCRIPTS))
+
+import build_igvreports as b  # noqa: E402
+
+
+def _write_bed(path: Path, rows: list[tuple]) -> None:
+    """Helper — `rows` is a list of (chrom, start, end[, name][, ...])."""
+    with path.open("w") as fh:
+        for r in rows:
+            fh.write("\t".join(str(x) for x in r) + "\n")
+
+
+# ----- _read_sites_bed_rows -----
+
+
+def test_read_sites_bed_assigns_uids_when_name_missing(tmp_path):
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [("chr1", 100, 200), ("chr2", 300, 400)])
+    rows = b._read_sites_bed_rows(bed)
+    assert len(rows) == 2
+    # Auto-UIDs are zero-padded to 3 digits so directory listings sort right
+    # and `region_010` doesn't sort before `region_2`.
+    assert rows[0]["name"] == "region_001"
+    assert rows[1]["name"] == "region_002"
+    assert rows[0]["bed_row_idx"] == 1
+    assert rows[1]["bed_row_idx"] == 2
+
+
+def test_read_sites_bed_preserves_existing_names(tmp_path):
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [("chr2", 100, 200, "DNMT3A_full_gene"), ("chr7", 300, 400, "TP53")])
+    rows = b._read_sites_bed_rows(bed)
+    assert rows[0]["name"] == "DNMT3A_full_gene"
+    assert rows[1]["name"] == "TP53"
+
+
+def test_read_sites_bed_skips_comment_and_track_lines(tmp_path):
+    bed = tmp_path / "sites.hg38.bed"
+    bed.write_text(
+        "#chrom\tstart\tend\tname\n"
+        "track name=foo\n"
+        "browser dense\n"
+        "chr1\t100\t200\treal_row\n"
+    )
+    rows = b._read_sites_bed_rows(bed)
+    assert len(rows) == 1
+    assert rows[0]["name"] == "real_row"
+
+
+def test_read_sites_bed_handles_mixed_named_and_unnamed(tmp_path):
+    # If some rows have names and others don't, unnamed ones still get
+    # auto-UIDs based on file position so manifests stay deterministic.
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [
+        ("chr1", 100, 200, "named_first"),
+        ("chr2", 300, 400),
+        ("chr3", 500, 600, "named_third"),
+    ])
+    rows = b._read_sites_bed_rows(bed)
+    assert [r["name"] for r in rows] == ["named_first", "region_002", "named_third"]
+
+
+# ----- _write_igver_regions_bed -----
+
+
+def test_write_igver_regions_bed_applies_flanking(tmp_path):
+    rows = [
+        {"chrom": "chr1", "start": 100, "end": 200, "name": "A", "bed_row_idx": 1},
+        {"chrom": "chr2", "start": 50,  "end": 150, "name": "B", "bed_row_idx": 2},
+    ]
+    out = tmp_path / "igver_regions.bed"
+    b._write_igver_regions_bed(rows, flanking=300, out=out)
+    lines = out.read_text().splitlines()
+    # Row 1: 100-300=−200, clamped to 0; end 200+300=500.
+    assert lines[0] == "chr1\t0\t500\tA"
+    # Row 2: 50−300=−250, clamped to 0; end 150+300=450.
+    assert lines[1] == "chr2\t0\t450\tB"
+
+
+def test_write_igver_regions_bed_zero_flanking_passes_rows_verbatim(tmp_path):
+    rows = [{"chrom": "chrX", "start": 1000, "end": 2000, "name": "promoter", "bed_row_idx": 1}]
+    out = tmp_path / "igver_regions.bed"
+    b._write_igver_regions_bed(rows, flanking=0, out=out)
+    assert out.read_text().strip() == "chrX\t1000\t2000\tpromoter"
+
+
+# ----- _write_igver_input_list -----
+
+
+def test_write_igver_input_list_one_path_per_line(tmp_path):
+    tracks = ["/path/to/tumor.bam", "/path/to/normal.bam", "/path/to/calls.vcf"]
+    out = tmp_path / "igver_input.txt"
+    b._write_igver_input_list(tracks, out)
+    assert out.read_text().splitlines() == tracks
+
+
+# ----- _resolve_igver_cmd -----
+
+
+def test_resolve_igver_cmd_explicit_override_wins():
+    override = "apptainer exec /path/to/igver.sif igver"
+    assert b._resolve_igver_cmd(override) == override.split()
+
+
+def test_resolve_igver_cmd_env_var_falls_back(monkeypatch):
+    monkeypatch.setenv("IGVER_CMD", "/usr/local/bin/igver --debug")
+    # which() must not find igver for this branch to fire; mock it to None.
+    with patch.object(b.shutil, "which", return_value=None), \
+         patch.object(b.Path, "exists", return_value=False):
+        assert b._resolve_igver_cmd(None) == ["/usr/local/bin/igver", "--debug"]
+
+
+def test_resolve_igver_cmd_path_lookup(monkeypatch):
+    monkeypatch.delenv("IGVER_CMD", raising=False)
+    with patch.object(b.shutil, "which", return_value="/usr/bin/igver"):
+        assert b._resolve_igver_cmd(None) == ["/usr/bin/igver"]
+
+
+def test_resolve_igver_cmd_raises_when_not_found(monkeypatch):
+    monkeypatch.delenv("IGVER_CMD", raising=False)
+    with patch.object(b.shutil, "which", return_value=None), \
+         patch.object(b.Path, "exists", return_value=False):
+        with pytest.raises(SystemExit, match="igver not found"):
+            b._resolve_igver_cmd(None)
+
+
+# ----- build_pngs_with_igver — mocked subprocess -----
+
+
+def _fake_igver_run(cmd, **kwargs):
+    """Stand-in for subprocess.run that mimics a successful igver invocation:
+    parses the regions BED out of `-r`, parses the output dir out of `-o`,
+    and writes a non-empty fake PNG at each expected filename — same
+    `<chr>-<start>-<end>.<uid>.<ext>` convention real igver uses."""
+    import subprocess
+    out_dir = Path(cmd[cmd.index("-o") + 1])
+    out_dir.mkdir(parents=True, exist_ok=True)
+    regions_bed = Path(cmd[cmd.index("-r") + 1])
+    fmt = cmd[cmd.index("-f") + 1] if "-f" in cmd else "png"
+    ext = "svg" if fmt in ("svg", "pdf") else fmt
+    for line in regions_bed.read_text().splitlines():
+        if not line or line.startswith("#"):
+            continue
+        chrom, start, end, name = line.split("\t")[:4]
+        (out_dir / f"{chrom}-{start}-{end}.{name}.{ext}").write_bytes(b"PNG\x00" * 4096)
+    return subprocess.CompletedProcess(args=cmd, returncode=0, stdout="", stderr="")
+
+
+def test_build_pngs_with_igver_writes_manifest_and_inputs(tmp_path, monkeypatch):
+    # Set up a synthetic sites BED + tracks list + mock igver that actually
+    # writes the expected output files (the inline existence check rejects
+    # an igver run that produces zero PNGs).
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [
+        ("chr1", 100, 200, "alpha"),
+        ("chr2", 300, 400, "beta"),
+    ])
+    tracks = ["/data/sample.bam", "/data/calls.vcf"]
+    html_path = tmp_path / "sample.hg38.html"
+    html_path.write_text("<html/>")
+    out_dir = tmp_path / "png_sample.hg38"
+    log = logging.getLogger("test")
+
+    monkeypatch.setenv("IGVER_CMD", "/usr/bin/true")
+    with patch.object(b.shutil, "which", return_value="/usr/bin/true"), \
+         patch.object(b.subprocess, "run", side_effect=_fake_igver_run):
+        manifest = b.build_pngs_with_igver(
+            sites=bed,
+            tracks=tracks,
+            genome="hg38",
+            flanking=300,
+            out_dir=out_dir,
+            log=log,
+            html_path=html_path,
+            igver_cmd=None,
+            dpi=300,
+            display_mode="collapse",
+        )
+
+    # 1. Intermediate files exist with the expected content.
+    regions_bed = out_dir / "igver_regions.bed"
+    input_txt = out_dir / "igver_input.txt"
+    assert regions_bed.exists()
+    assert input_txt.exists()
+    assert regions_bed.read_text() == "chr1\t0\t500\talpha\nchr2\t0\t700\tbeta\n"
+    assert input_txt.read_text() == "/data/sample.bam\n/data/calls.vcf\n"
+
+    # 2. Manifest has one row per region with the right schema and the
+    #    expected PNG-filename convention (validated against igver source).
+    lines = manifest.read_text().splitlines()
+    assert lines[0].startswith("#bed_row_idx\tuid\tchrom\t")
+    data_rows = lines[1:]
+    assert len(data_rows) == 2
+
+    cols0 = data_rows[0].split("\t")
+    assert cols0[0] == "1"
+    assert cols0[1] == "alpha"
+    assert cols0[2] == "chr1"
+    assert cols0[3] == "100"            # start_orig
+    assert cols0[4] == "200"            # end_orig
+    assert cols0[5] == "0"              # start_flanked (clamped)
+    assert cols0[6] == "500"            # end_flanked
+    assert cols0[7] == "chr1:0-500"
+    assert cols0[8].endswith("/png/chr1-0-500.alpha.png"), cols0[8]
+    assert cols0[9].endswith("/sample.hg38.html"), cols0[9]
+    assert cols0[10] == "1"             # html_table_row matches bed_row_idx
+
+
+def test_build_pngs_with_igver_detects_silent_exit_0_failure(tmp_path, monkeypatch):
+    # The motivating bug: igver via `pip install` egg-link prints
+    # `[ERROR] Failed to generate all PNG files after 2 iterations.` then
+    # exits 0 with an empty output dir. proc.returncode != 0 misses it.
+    # Inline check must catch this regardless of exit code.
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [("chr1", 100, 200, "alpha"), ("chr2", 300, 400, "beta")])
+    html_path = tmp_path / "sample.hg38.html"; html_path.write_text("<html/>")
+    log = logging.getLogger("test")
+
+    # /usr/bin/true returns 0 but creates no files — the exact failure mode.
+    monkeypatch.setenv("IGVER_CMD", "/usr/bin/true")
+    with patch.object(b.shutil, "which", return_value="/usr/bin/true"):
+        with pytest.raises(SystemExit, match="silent exit-0 failure"):
+            b.build_pngs_with_igver(
+                sites=bed, tracks=["/data/sample.bam"], genome="hg38",
+                flanking=0, out_dir=tmp_path / "out", log=log, html_path=html_path,
+            )
+
+
+def test_build_pngs_with_igver_detects_partial_silent_failure(tmp_path, monkeypatch):
+    # Mid-batch failure: 1 of 2 PNGs produced, 1 missing, exit 0. Inline
+    # check must fail because the manifest would otherwise reference a
+    # non-existent PNG.
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [("chr1", 100, 200, "alpha"), ("chr2", 300, 400, "beta")])
+    html_path = tmp_path / "sample.hg38.html"; html_path.write_text("<html/>")
+    log = logging.getLogger("test")
+
+    def partial_run(cmd, **kwargs):
+        # Write only the first region's PNG, skip the second.
+        import subprocess
+        out_dir = Path(cmd[cmd.index("-o") + 1])
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "chr1-100-200.alpha.png").write_bytes(b"PNG\x00" * 4096)
+        return subprocess.CompletedProcess(args=cmd, returncode=0, stdout="", stderr="")
+
+    monkeypatch.setenv("IGVER_CMD", "/usr/bin/true")
+    with patch.object(b.shutil, "which", return_value="/usr/bin/true"), \
+         patch.object(b.subprocess, "run", side_effect=partial_run):
+        with pytest.raises(SystemExit, match="silent exit-0 failure"):
+            b.build_pngs_with_igver(
+                sites=bed, tracks=["/data/sample.bam"], genome="hg38",
+                flanking=0, out_dir=tmp_path / "out", log=log, html_path=html_path,
+            )
+
+
+def test_build_pngs_with_igver_detects_zero_byte_png(tmp_path, monkeypatch):
+    # Disk-full / truncated-write: PNG exists but is empty. Inline check
+    # must fail because the file is on disk but unusable.
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [("chr1", 100, 200, "alpha")])
+    html_path = tmp_path / "sample.hg38.html"; html_path.write_text("<html/>")
+    log = logging.getLogger("test")
+
+    def zero_byte_run(cmd, **kwargs):
+        import subprocess
+        out_dir = Path(cmd[cmd.index("-o") + 1])
+        out_dir.mkdir(parents=True, exist_ok=True)
+        (out_dir / "chr1-100-200.alpha.png").write_bytes(b"")  # zero-byte
+        return subprocess.CompletedProcess(args=cmd, returncode=0, stdout="", stderr="")
+
+    monkeypatch.setenv("IGVER_CMD", "/usr/bin/true")
+    with patch.object(b.shutil, "which", return_value="/usr/bin/true"), \
+         patch.object(b.subprocess, "run", side_effect=zero_byte_run):
+        with pytest.raises(SystemExit, match="silent exit-0 failure"):
+            b.build_pngs_with_igver(
+                sites=bed, tracks=["/data/sample.bam"], genome="hg38",
+                flanking=0, out_dir=tmp_path / "out", log=log, html_path=html_path,
+            )
+
+
+def test_build_pngs_with_igver_propagates_igver_failure(tmp_path, monkeypatch):
+    # If igver itself returns non-zero, the driver must SystemExit so the
+    # caller (and verify_cohort) sees the build as failed — silent success
+    # would let an empty PNG dir slip into a "verified" cohort.
+    bed = tmp_path / "sites.hg38.bed"
+    _write_bed(bed, [("chr1", 100, 200, "alpha")])
+    html_path = tmp_path / "sample.hg38.html"; html_path.write_text("<html/>")
+    log = logging.getLogger("test")
+
+    # /usr/bin/false always exits non-zero — perfect stand-in for a failing igver.
+    monkeypatch.setenv("IGVER_CMD", "/usr/bin/false")
+    with patch.object(b.shutil, "which", return_value="/usr/bin/false"):
+        with pytest.raises(SystemExit) as exc:
+            b.build_pngs_with_igver(
+                sites=bed, tracks=["/data/sample.bam"], genome="hg38", flanking=0,
+                out_dir=tmp_path / "out", log=log, html_path=html_path,
+            )
+    assert exc.value.code != 0
+
+
+def test_build_pngs_with_igver_errors_on_empty_bed(tmp_path):
+    bed = tmp_path / "sites.hg38.bed"
+    bed.write_text("# header only\n")
+    log = logging.getLogger("test")
+    with pytest.raises(SystemExit, match="no data rows"):
+        b.build_pngs_with_igver(
+            sites=bed, tracks=["/data/sample.bam"], genome="hg38", flanking=0,
+            out_dir=tmp_path / "out", log=log, html_path=tmp_path / "x.html",
+        )
diff --git a/igv-reports/tests/unit/test_generate_tracks_json.py b/igv-reports/tests/unit/test_generate_tracks_json.py
new file mode 100644
index 0000000..bf562ee
--- /dev/null
+++ b/igv-reports/tests/unit/test_generate_tracks_json.py
@@ -0,0 +1,226 @@
+"""Unit tests for generate_tracks_json.py — annotation-default resolver.
+
+Author: Samuel Ahuno
+Purpose:
+  Exercises the `default:` shortcut path added in the methylation-pathway
+  polish round. Without these tests a future Claude session could easily
+  break the resolver by adding a 6th key without updating the lookup.
+
+Covers:
+  * Happy path: each known default key resolves against a synthetic cfg.
+  * hg38 gencode-sibling preference (.gff3.gz over .gtf.gz when present).
+  * indexURL: included when .tbi exists; omitted otherwise.
+  * Unknown default key -> SystemExit with valid-keys hint.
+  * Missing genome in cfg -> SystemExit.
+  * Missing path on disk -> SystemExit.
+  * build_annotation_tracks() routes `default:` entries through the resolver
+    and preserves backwards compat for explicit `url:` entries.
+  * `default:` entry without top-level `genome:` -> SystemExit.
+
+Run:
+  cd igv-reports-skill && pytest tests/unit/test_generate_tracks_json.py -v
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+SCRIPTS = Path(__file__).resolve().parents[2] / "scripts"
+sys.path.insert(0, str(SCRIPTS))
+
+import generate_tracks_json as g  # noqa: E402
+
+
+def _fake_cfg(genome: str, paths: dict[str, str]) -> dict:
+    """Build a minimal databases YAML mirror keyed by genome.
+
+    `paths` maps YAML-keys (CpGIslands, gtf, repMaskerBed, EPDnewCoding,
+    EPDnewNonCoding) to filesystem paths."""
+    return {"reference_genomes": {"local": {genome: paths}}}
+
+
+def _touch(path: Path) -> Path:
+    """Create an empty file at `path`, parents auto-created."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(b"")
+    return path
+
+
+# ----- happy path: each known default key resolves -----
+
+
+def test_resolve_cgi(tmp_path):
+    cgi = _touch(tmp_path / "hg38_CpGIslands.bed")
+    cfg = _fake_cfg("hg38", {"CpGIslands": str(cgi)})
+    t = g.resolve_annotation_default("cgi", "hg38", cfg)
+    assert t["url"] == str(cgi)
+    assert t["display_name"] == "CpG islands"
+    assert t["format"] == "bed"
+    assert t["color"] == "rgb(0,158,115)"
+    assert "indexURL" not in t  # no .tbi alongside
+
+
+def test_resolve_repmasker(tmp_path):
+    rmsk = _touch(tmp_path / "rmsk.bed.gz")
+    _touch(tmp_path / "rmsk.bed.gz.tbi")
+    cfg = _fake_cfg("hg38", {"repMaskerBed": str(rmsk)})
+    t = g.resolve_annotation_default("repmasker", "hg38", cfg)
+    assert t["url"] == str(rmsk)
+    assert t["indexURL"] == str(rmsk) + ".tbi"
+    assert t["displayMode"] == "COLLAPSED"
+
+
+def test_resolve_gencode_hg38_prefers_gff3_sibling(tmp_path):
+    # When the YAML's `gtf` points at a .gtf.gz, but a sibling
+    # gencode.v47.annotation.gff3.gz + .tbi exists in the same dir,
+    # the resolver should switch to the bgzip+tabix .gff3.gz file.
+    gtf = _touch(tmp_path / "gencode.v47.annotation.gtf.gz")
+    sibling = _touch(tmp_path / "gencode.v47.annotation.gff3.gz")
+    _touch(tmp_path / "gencode.v47.annotation.gff3.gz.tbi")
+    cfg = _fake_cfg("hg38", {"gtf": str(gtf)})
+    t = g.resolve_annotation_default("gencode", "hg38", cfg)
+    assert t["url"] == str(sibling), "expected hg38 gencode to prefer .gff3.gz sibling"
+    assert t["indexURL"] == str(sibling) + ".tbi"
+
+
+def test_resolve_gencode_mm10_uses_gtf(tmp_path):
+    # The sibling-preference logic only fires for hg38. For mm10 the
+    # resolver should use the YAML-named gtf path verbatim.
+    gtf = _touch(tmp_path / "gencode.vM25.annotation.gtf.gz")
+    cfg = _fake_cfg("mm10", {"gtf": str(gtf)})
+    t = g.resolve_annotation_default("gencode", "mm10", cfg)
+    assert t["url"] == str(gtf)
+
+
+def test_resolve_epdnew_coding_and_noncoding(tmp_path):
+    coding = _touch(tmp_path / "Hs_EPDnew.hg38.bed.gz")
+    noncoding = _touch(tmp_path / "HsNC_EPDnew.hg38.bed.gz")
+    cfg = _fake_cfg("hg38", {
+        "EPDnewCoding": str(coding),
+        "EPDnewNonCoding": str(noncoding),
+    })
+    tc = g.resolve_annotation_default("epdnew_coding", "hg38", cfg)
+    tn = g.resolve_annotation_default("epdnew_noncoding", "hg38", cfg)
+    assert tc["url"] == str(coding)
+    assert tn["url"] == str(noncoding)
+    # Distinct Okabe-Ito colors so coding vs non-coding read separately.
+    assert tc["color"] != tn["color"]
+
+
+# ----- error paths -----
+
+
+def test_unknown_default_key_lists_valid_keys():
+    with pytest.raises(SystemExit) as exc:
+        g.resolve_annotation_default("DOES_NOT_EXIST", "hg38", _fake_cfg("hg38", {}))
+    # Error should enumerate the valid keys so the user can fix the typo
+    # without having to read the source.
+    msg = str(exc.value)
+    for key in ("cgi", "gencode", "repmasker", "epdnew_coding", "epdnew_noncoding"):
+        assert key in msg
+
+
+def test_missing_genome_in_cfg(tmp_path):
+    cgi = _touch(tmp_path / "hg38_CpGIslands.bed")
+    cfg = _fake_cfg("hg38", {"CpGIslands": str(cgi)})
+    with pytest.raises(SystemExit, match="no entry for genome 'GRCh37'"):
+        g.resolve_annotation_default("cgi", "GRCh37", cfg)
+
+
+def test_missing_yaml_key_for_genome(tmp_path):
+    # mm39 famously has no repMaskerBed configured — `default: repmasker`
+    # must fail with a clear error rather than silently emitting no track.
+    cfg = _fake_cfg("mm39", {"CpGIslands": "/tmp/fake_cgi"})
+    with pytest.raises(SystemExit, match="repMaskerBed"):
+        g.resolve_annotation_default("repmasker", "mm39", cfg)
+
+
+def test_missing_path_on_disk(tmp_path):
+    cfg = _fake_cfg("hg38", {"CpGIslands": str(tmp_path / "nonexistent.bed")})
+    with pytest.raises(SystemExit, match="resolved path missing on disk"):
+        g.resolve_annotation_default("cgi", "hg38", cfg)
+
+
+# ----- build_annotation_tracks() integration -----
+
+
+def test_build_annotation_tracks_shortcut(tmp_path):
+    cgi = _touch(tmp_path / "hg38_CpGIslands.bed")
+    cfg = _fake_cfg("hg38", {"CpGIslands": str(cgi)})
+    spec = {"genome": "hg38", "annotation": [{"default": "cgi"}]}
+    out = g.build_annotation_tracks(spec, tmp_path, cfg)
+    assert len(out) == 1
+    assert out[0]["name"] == "CpG islands"
+    assert out[0]["url"] == str(cgi)
+    assert out[0]["type"] == "annotation"
+    assert out[0]["color"] == "rgb(0,158,115)"
+
+
+def test_build_annotation_tracks_shortcut_with_overrides(tmp_path):
+    # The user can override the canned display name + color while still
+    # using `default:` for path resolution.
+    cgi = _touch(tmp_path / "hg38_CpGIslands.bed")
+    cfg = _fake_cfg("hg38", {"CpGIslands": str(cgi)})
+    spec = {
+        "genome": "hg38",
+        "annotation": [{
+            "default": "cgi",
+            "name": "My CpG view",
+            "color": "rgb(0,0,0)",
+            "displayMode": "COLLAPSED",
+        }],
+    }
+    out = g.build_annotation_tracks(spec, tmp_path, cfg)
+    assert out[0]["name"] == "My CpG view"
+    assert out[0]["color"] == "rgb(0,0,0)"
+    assert out[0]["displayMode"] == "COLLAPSED"
+    # url still resolved by the shortcut
+    assert out[0]["url"] == str(cgi)
+
+
+def test_build_annotation_tracks_explicit_path_unchanged(tmp_path):
+    # Backwards-compat: an explicit `url:` entry must not need a cfg and
+    # must produce the same shape as before this round of changes.
+    explicit = _touch(tmp_path / "my_custom.bed")
+    spec = {
+        "annotation": [{
+            "name": "My custom track",
+            "url": str(explicit),
+            "format": "bed",
+            "color": "rgb(1,2,3)",
+        }],
+    }
+    out = g.build_annotation_tracks(spec, tmp_path, {})
+    assert out[0] == {
+        "name": "My custom track",
+        "url": str(explicit),
+        "format": "bed",
+        "type": "annotation",
+        "displayMode": "EXPANDED",
+        "color": "rgb(1,2,3)",
+    }
+
+
+def test_build_annotation_tracks_mixed(tmp_path):
+    # Explicit + shortcut entries can coexist; order is preserved.
+    cgi = _touch(tmp_path / "hg38_CpGIslands.bed")
+    explicit = _touch(tmp_path / "custom.bed")
+    cfg = _fake_cfg("hg38", {"CpGIslands": str(cgi)})
+    spec = {
+        "genome": "hg38",
+        "annotation": [
+            {"name": "Custom first", "url": str(explicit)},
+            {"default": "cgi"},
+        ],
+    }
+    out = g.build_annotation_tracks(spec, tmp_path, cfg)
+    assert [t["name"] for t in out] == ["Custom first", "CpG islands"]
+
+
+def test_shortcut_without_top_level_genome(tmp_path):
+    spec = {"annotation": [{"default": "cgi"}]}  # missing `genome:`
+    with pytest.raises(SystemExit, match="top-level `genome:`"):
+        g.build_annotation_tracks(spec, tmp_path, {})
diff --git a/igv-reports/tests/unit/test_verify_anchors.py b/igv-reports/tests/unit/test_verify_anchors.py
new file mode 100644
index 0000000..fa7b514
--- /dev/null
+++ b/igv-reports/tests/unit/test_verify_anchors.py
@@ -0,0 +1,534 @@
+"""Unit tests for verify_anchors.py — parser layer only.
+
+Author: Samuel Ahuno
+Purpose:
+  Fast pytest suite covering the pure-Python parsing/decision logic in
+  verify_anchors.py. No subprocess, no real BAM, no shared-storage
+  dependency. Runs in ~1 s on any machine with pytest.
+
+  These tests catch the parser regressions that bit during the original
+  iteration: status-taxonomy conflation between SKIP and FAIL, mis-tabbed
+  TSV rows being silently mis-parsed, decode_status confusing tolerance
+  with notes when columns are out of order.
+
+Run:
+  cd claude/skills/igv-reports
+  pytest tests/unit/ -v
+"""
+
+from __future__ import annotations
+
+import base64
+import gzip
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+# Make scripts/ importable without installing the skill as a package.
+SCRIPTS = Path(__file__).resolve().parents[2] / "scripts"
+sys.path.insert(0, str(SCRIPTS))
+import verify_anchors as va  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# load_anchors
+# ---------------------------------------------------------------------------
+
+def _write_tsv(tmp_path: Path, body: str) -> Path:
+    p = tmp_path / "anchors.tsv"
+    p.write_text(body)
+    return p
+
+
+def test_load_anchors_full_row(tmp_path):
+    """All 10 columns populated, including notes."""
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\ttumor\tchr2\t25246500\t25246501\t56\t0.05\t\t\tDNMT3A\n"
+    ))
+    rows = va.load_anchors(p)
+    assert len(rows) == 1
+    r = rows[0]
+    assert (r.sample, r.track_name, r.chrom, r.start, r.end) == ("s1", "tumor", "chr2", 25246500, 25246501)
+    assert r.expected == 56
+    assert r.tolerance == "0.05"
+    assert r.min_count == ""
+    assert r.max_count == ""
+    assert r.notes == "DNMT3A"
+
+
+def test_load_anchors_min_max_row(tmp_path):
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\ttumor\tchrX\t100\t200\t50\t\t20\t100\thigh-conf\n"
+    ))
+    rows = va.load_anchors(p)
+    assert rows[0].min_count == "20"
+    assert rows[0].max_count == "100"
+
+
+def test_load_anchors_missing_header_errors(tmp_path):
+    """Data row before any header must abort with a clear error."""
+    p = _write_tsv(tmp_path, "s1\ttumor\tchr1\t0\t100\t10\t\t\t\t\n")
+    with pytest.raises(SystemExit, match="data row before header"):
+        va.load_anchors(p)
+
+
+def test_load_anchors_bad_tolerance_fails_fast(tmp_path):
+    """Mis-tabbed row where notes value falls into tolerance must fail at
+    load time with a hint, not crash later inside decide_status."""
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\ttumor\tchr2\t100\t200\t10\tNOT_A_NUMBER\t\t\tDNMT3A\n"
+    ))
+    with pytest.raises(SystemExit) as excinfo:
+        va.load_anchors(p)
+    msg = str(excinfo.value)
+    assert "malformed anchor row" in msg
+    assert "awk" in msg  # hint about -F'\t'
+
+
+def test_load_anchors_bad_min_fails_fast(tmp_path):
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\ttumor\tchr2\t100\t200\t10\t\tNAH\t\t\n"
+    ))
+    with pytest.raises(SystemExit, match="malformed anchor row"):
+        va.load_anchors(p)
+
+
+def test_load_anchors_missing_file(tmp_path):
+    with pytest.raises(SystemExit, match="anchors TSV not found"):
+        va.load_anchors(tmp_path / "does_not_exist.tsv")
+
+
+def test_load_anchors_skips_blank_lines(tmp_path):
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "\n"
+        "s1\ttumor\tchr1\t0\t100\t10\t\t\t\t\n"
+        "\n"
+    ))
+    rows = va.load_anchors(p)
+    assert len(rows) == 1
+
+
+# ---------------------------------------------------------------------------
+# decide_status
+# ---------------------------------------------------------------------------
+
+def _anchor(expected=10, tolerance="", min_count="", max_count=""):
+    return va.AnchorRow(
+        sample="s", track_name="t", chrom="chr1", start=0, end=100,
+        expected=expected, tolerance=tolerance,
+        min_count=min_count, max_count=max_count,
+    )
+
+
+def test_decide_status_pass_within_default_tolerance():
+    a = _anchor(expected=100)
+    status, _ = va.decide_status(a, observed=104, default_tol=0.05)
+    assert status == "PASS"
+
+
+def test_decide_status_fail_outside_default_tolerance():
+    a = _anchor(expected=100)
+    status, details = va.decide_status(a, observed=110, default_tol=0.05)
+    assert status == "FAIL"
+    assert "diff_ratio" in details
+
+
+def test_decide_status_per_row_tolerance_overrides_default():
+    """Row tolerance 0.20 should pass observed=115 even though default 0.05 wouldn't."""
+    a = _anchor(expected=100, tolerance="0.20")
+    status, _ = va.decide_status(a, observed=115, default_tol=0.05)
+    assert status == "PASS"
+
+
+def test_decide_status_min_bound_pass():
+    a = _anchor(expected=50, min_count="20")
+    status, details = va.decide_status(a, observed=50, default_tol=0.05)
+    assert status == "PASS"
+    assert "min=20 OK" in details
+
+
+def test_decide_status_min_bound_fail():
+    a = _anchor(expected=50, min_count="100")
+    status, details = va.decide_status(a, observed=50, default_tol=0.05)
+    assert status == "FAIL"
+    assert "min=100 FAIL" in details
+
+
+def test_decide_status_min_max_combined():
+    a = _anchor(min_count="20", max_count="80")
+    status, _ = va.decide_status(a, observed=50, default_tol=0.05)
+    assert status == "PASS"
+    status, _ = va.decide_status(a, observed=10, default_tol=0.05)
+    assert status == "FAIL"
+    status, _ = va.decide_status(a, observed=100, default_tol=0.05)
+    assert status == "FAIL"
+
+
+def test_decide_status_bounds_override_tolerance():
+    """When min/max present, tolerance is ignored."""
+    # observed within tolerance of expected, but violates min
+    a = _anchor(expected=50, tolerance="0.50", min_count="100")
+    status, _ = va.decide_status(a, observed=52, default_tol=0.05)
+    assert status == "FAIL"  # min wins over tolerance
+
+
+def test_decide_status_zero_expected_exact():
+    a = _anchor(expected=0)
+    status, _ = va.decide_status(a, observed=0, default_tol=0.05)
+    assert status == "PASS"
+    status, _ = va.decide_status(a, observed=1, default_tol=0.05)
+    assert status == "FAIL"
+
+
+# ---------------------------------------------------------------------------
+# decode_track_slice
+# ---------------------------------------------------------------------------
+
+def test_decode_track_slice_roundtrip(tmp_path):
+    """data: URL → bytes round-trip preserves the payload."""
+    payload = b"BAM\x01some bytes here"
+    url = "data:application/gzip;base64," + base64.b64encode(payload).decode()
+    dest = tmp_path / "out.bin"
+    va.decode_track_slice(url, dest)
+    assert dest.read_bytes() == payload
+
+
+def test_decode_track_slice_other_mediatype_accepted(tmp_path):
+    """We don't validate the mediatype — payload bytes are what matter."""
+    payload = b"\x1f\x8b\x08compressed body"
+    url = "data:application/octet-stream;base64," + base64.b64encode(payload).decode()
+    dest = tmp_path / "out.bin"
+    va.decode_track_slice(url, dest)
+    assert dest.read_bytes() == payload
+
+
+def test_decode_track_slice_not_a_data_url_raises(tmp_path):
+    with pytest.raises(ValueError, match="not a data: base64 URL"):
+        va.decode_track_slice("http://example.com/blob.bam", tmp_path / "out.bin")
+
+
+# ---------------------------------------------------------------------------
+# find_track
+# ---------------------------------------------------------------------------
+
+def test_find_track_hit():
+    session = {"tracks": [
+        {"name": "ann.bed"},
+        {"name": "sample.sorted", "url": "data:..."},
+    ]}
+    t = va.find_track(session, "sample.sorted")
+    assert t is not None and t["url"] == "data:..."
+
+
+def test_find_track_miss():
+    session = {"tracks": [{"name": "other"}]}
+    assert va.find_track(session, "missing") is None
+
+
+def test_find_track_empty():
+    assert va.find_track({}, "x") is None
+    assert va.find_track({"tracks": []}, "x") is None
+
+
+# ---------------------------------------------------------------------------
+# locate_session_entry — status taxonomy split (was the v1 regression)
+# ---------------------------------------------------------------------------
+
+def _make_table_json(rows):
+    return {"headers": ["Chrom", "Start", "End", "Name"], "rows": rows}
+
+
+def _make_session_dict(entries):
+    """Build a sessionDictionary mapping str(idx) -> a gzipped+b64 data URL
+    that decodes to the given entry dict."""
+    out = {}
+    for idx, entry in entries.items():
+        raw = gzip.compress(json.dumps(entry).encode())
+        out[str(idx)] = "data:application/gzip;base64," + base64.b64encode(raw).decode()
+    return out
+
+
+def test_locate_session_entry_ok():
+    tj = _make_table_json([["chr2", 25246501, 25246501, "x"]])
+    sd = _make_session_dict({0: {"tracks": [{"name": "t"}]}})
+    outcome, sess, det = va.locate_session_entry(sd, tj, "chr2", 25246500, 25246501)
+    assert outcome == "ok"
+    assert sess == {"tracks": [{"name": "t"}]}
+    assert det == ""
+
+
+def test_locate_session_entry_absent_returns_skip_signal():
+    """Anchor for a region that's not in the HTML — caller should SKIP."""
+    tj = _make_table_json([["chr2", 25246501, 25246501, "x"]])
+    sd = _make_session_dict({0: {"tracks": []}})
+    outcome, _, det = va.locate_session_entry(sd, tj, "chr2", 99999999, 99999999)
+    assert outcome == "absent"
+    assert "no tableJson row matched" in det
+
+
+def test_locate_session_entry_broken_missing_session():
+    """Row in tableJson but no corresponding sessionDictionary entry — FAIL."""
+    tj = _make_table_json([["chr2", 25246501, 25246501, "x"]])
+    sd = {}  # no entries at all
+    outcome, _, det = va.locate_session_entry(sd, tj, "chr2", 25246500, 25246501)
+    assert outcome == "broken"
+    assert "no entry for row index" in det
+
+
+def test_locate_session_entry_broken_undecodable():
+    """Row + session entry present but the session blob can't be gunzipped — FAIL."""
+    tj = _make_table_json([["chr2", 25246501, 25246501, "x"]])
+    sd = {"0": "data:application/gzip;base64,NOT_VALID_BASE64"}
+    outcome, _, det = va.locate_session_entry(sd, tj, "chr2", 25246500, 25246501)
+    assert outcome == "broken"
+    assert "failed to gunzip/decode" in det
+
+
+def test_locate_session_entry_broken_bad_headers():
+    """tableJson missing the Chrom/Start/End columns we need."""
+    tj = {"headers": ["foo", "bar"], "rows": [["x", "y"]]}
+    sd = {}
+    outcome, _, det = va.locate_session_entry(sd, tj, "chr2", 100, 200)
+    assert outcome == "broken"
+    assert "missing expected column" in det
+
+
+# ---------------------------------------------------------------------------
+# sample_bam_paths — samplesheet column handling
+# ---------------------------------------------------------------------------
+
+def test_sample_bam_paths_tumor_only():
+    row = {"sample": "s1", "bam_tumor": "/x/tumor.sorted.bam"}
+    out = va.sample_bam_paths(row)
+    assert out == [("tumor.sorted", Path("/x/tumor.sorted.bam"))]
+
+
+def test_sample_bam_paths_tumor_and_normal():
+    row = {"sample": "s1", "bam_tumor": "/x/t.bam", "bam_normal": "/x/n.bam"}
+    out = va.sample_bam_paths(row)
+    names = [n for n, _ in out]
+    assert names == ["t", "n"]
+
+
+def test_sample_bam_paths_extras_filtered_to_bam_cram():
+    row = {
+        "sample": "s1",
+        "bam_tumor": "/x/t.bam",
+        "extra_tracks": "/y/extra.bam,/y/annot.bed,/y/other.cram",
+    }
+    out = va.sample_bam_paths(row)
+    names = [n for n, _ in out]
+    # bam_tumor + the .bam + the .cram from extras; .bed should be filtered out
+    assert names == ["t", "extra", "other"]
+
+
+def test_sample_bam_paths_blank_row():
+    row = {"sample": "s1"}
+    assert va.sample_bam_paths(row) == []
+
+
+# ---------------------------------------------------------------------------
+# write_anchors round-trip
+# ---------------------------------------------------------------------------
+
+def test_write_load_round_trip(tmp_path):
+    anchors_in = [
+        va.AnchorRow(sample="s1", track_name="t1", chrom="chr1",
+                     start=0, end=100, expected=42, notes="hi"),
+        va.AnchorRow(sample="s2", track_name="t2", chrom="chr2",
+                     start=200, end=300, expected=7, min_count="3", max_count="20"),
+    ]
+    out = tmp_path / "anchors.tsv"
+    va.write_anchors(anchors_in, out)
+    rows = va.load_anchors(out)
+    assert len(rows) == 2
+    assert rows[0].notes == "hi"
+    assert rows[1].min_count == "3"
+    assert rows[1].max_count == "20"
+
+
+# ---------------------------------------------------------------------------
+# bedGraph / wig anchors (methylation-aware path added 2026-05-19)
+# ---------------------------------------------------------------------------
+
+def _write_bedgraph(path: Path, rows: list[tuple]) -> Path:
+    """Write a 4-col bedGraph (chrom/start/end/value), no header."""
+    path.write_text("".join(f"{r[0]}\t{r[1]}\t{r[2]}\t{r[3]}\n" for r in rows))
+    return path
+
+
+def test_is_wig_data_line():
+    assert va._is_wig_data_line("chr1\t100\t101\t0.5") is True
+    assert va._is_wig_data_line("track name=meth") is False
+    assert va._is_wig_data_line("browser dense") is False
+    assert va._is_wig_data_line("fixedStep chrom=chr1 start=1 step=1") is False
+    assert va._is_wig_data_line("variableStep chrom=chr1") is False
+    assert va._is_wig_data_line("# comment") is False
+    assert va._is_wig_data_line("") is False
+    assert va._is_wig_data_line("   ") is False
+
+
+def test_bedgraph_count_source_plain_text_in_region(tmp_path):
+    # 3 of 4 rows overlap [100, 200); the 4th is on a different chrom.
+    bg = _write_bedgraph(tmp_path / "sample.hg38.bedgraph", [
+        ("chr1", 100, 101, 0.5),
+        ("chr1", 150, 151, 0.8),
+        ("chr1", 199, 200, 0.3),  # r_end > q_start? 200 > 100 yes; r_start < q_end? 199 < 200 yes
+        ("chr2", 100, 101, 0.9),  # different chrom
+    ])
+    assert va.bedgraph_count_source(bg, "chr1", 100, 200) == 3
+
+
+def test_bedgraph_count_source_excludes_out_of_region(tmp_path):
+    # Rows must overlap [start, end). Boundary cases.
+    bg = _write_bedgraph(tmp_path / "sample.hg38.bedgraph", [
+        ("chr1", 50, 100, 0.1),    # r_end == q_start -> doesn't overlap (half-open)
+        ("chr1", 100, 150, 0.2),   # r_start == q_start -> overlaps
+        ("chr1", 195, 200, 0.3),   # r_start < q_end == 200 -> overlaps
+        ("chr1", 200, 250, 0.4),   # r_start == q_end -> doesn't overlap (half-open)
+        ("chr1", 1000, 1001, 0.5), # way out
+    ])
+    assert va.bedgraph_count_source(bg, "chr1", 100, 200) == 2
+
+
+def test_bedgraph_count_source_skips_headers_and_comments(tmp_path):
+    bg = tmp_path / "sample.hg38.bedgraph"
+    bg.write_text(
+        "#header comment\n"
+        "track name=test\n"
+        "browser dense\n"
+        "chr1\t100\t101\t0.5\n"
+        "chr1\t150\t151\t0.6\n"
+    )
+    assert va.bedgraph_count_source(bg, "chr1", 0, 1000) == 2
+
+
+def test_bedgraph_count_source_handles_gzipped_input(tmp_path):
+    # Plain-gzip (not bgzip+tabix). Linear-scan path.
+    import gzip
+    bg = tmp_path / "sample.hg38.bedgraph.gz"
+    with gzip.open(bg, "wt") as fh:
+        fh.write("chr3\t100\t101\t0.5\n")
+        fh.write("chr3\t150\t151\t0.6\n")
+        fh.write("chr3\t999\t1000\t0.7\n")
+    assert va.bedgraph_count_source(bg, "chr3", 100, 200) == 2
+    assert va.bedgraph_count_source(bg, "chr3", 0, 10000) == 3
+    assert va.bedgraph_count_source(bg, "chr4", 0, 10000) == 0
+
+
+def test_bedgraph_count_source_missing_file_raises(tmp_path):
+    with pytest.raises(FileNotFoundError, match="bedGraph track not found"):
+        va.bedgraph_count_source(tmp_path / "does_not_exist.bg", "chr1", 0, 100)
+
+
+def test_bedgraph_count_slice_decodes_gzipped_payload():
+    # Mimics how igv_reports/datauri.py encodes a wig/bedGraph slice:
+    # gzip(text) base64-encoded. verify_anchors only sees the gzipped
+    # bytes after base64 decoding, so we test the bytes-in entry point.
+    text = (
+        "track name=meth\n"
+        "chr1\t100\t101\t0.5\n"
+        "chr1\t150\t151\t0.6\n"
+        "chr1\t200\t201\t0.7\n"
+    )
+    assert va.bedgraph_count_slice(gzip.compress(text.encode())) == 3
+
+
+def test_bedgraph_count_slice_falls_back_to_uncompressed():
+    # Some create_report versions write small wig slices uncompressed —
+    # the fallback path must accept raw text bytes.
+    text = "chr1\t100\t101\t0.5\nchr1\t200\t201\t0.6\n"
+    assert va.bedgraph_count_slice(text.encode()) == 2
+
+
+def test_bedgraph_count_slice_zero_when_empty():
+    # No data rows in the slice = silent empty-methylation-slice failure.
+    # Caller (verify_one_html) compares to expected via decide_status.
+    assert va.bedgraph_count_slice(gzip.compress(b"track name=meth\n")) == 0
+    assert va.bedgraph_count_slice(b"") == 0
+
+
+# ---------------------------------------------------------------------------
+# Anchor schema: track_type column with backwards compat
+# ---------------------------------------------------------------------------
+
+def test_load_anchors_legacy_no_track_type_defaults_to_bam(tmp_path):
+    # Pre-2026-05-19 anchor files lack the track_type column. Loader must
+    # accept them and default each row to track_type='bam'.
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\ttumor\tchr2\t100\t200\t42\t\t\t\t\n"
+    ))
+    rows = va.load_anchors(p)
+    assert rows[0].track_type == "bam"
+
+
+def test_load_anchors_with_track_type_bedgraph(tmp_path):
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\ttrack_type\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\tmeth_track\tbedgraph\tchr2\t100\t200\t8\t\t\t\tDNMT3A_CpGs\n"
+    ))
+    rows = va.load_anchors(p)
+    assert rows[0].track_type == "bedgraph"
+    assert rows[0].expected == 8
+    assert rows[0].notes == "DNMT3A_CpGs"
+
+
+def test_load_anchors_rejects_unknown_track_type(tmp_path):
+    p = _write_tsv(tmp_path, (
+        "#sample\ttrack_name\ttrack_type\tchrom\tstart\tend\texpected\ttolerance\tmin\tmax\tnotes\n"
+        "s1\tt1\tcraaam\tchr1\t0\t100\t5\t\t\t\t\n"
+    ))
+    with pytest.raises(SystemExit, match="unknown track_type 'craaam'"):
+        va.load_anchors(p)
+
+
+def test_write_load_round_trip_preserves_track_type(tmp_path):
+    anchors_in = [
+        va.AnchorRow(sample="s1", track_name="tumor", track_type="bam",
+                     chrom="chr1", start=0, end=100, expected=42),
+        va.AnchorRow(sample="s1", track_name="tumor.5mC", track_type="bedgraph",
+                     chrom="chr1", start=0, end=100, expected=12),
+    ]
+    out = tmp_path / "anchors.tsv"
+    va.write_anchors(anchors_in, out)
+    rows = va.load_anchors(out)
+    assert [r.track_type for r in rows] == ["bam", "bedgraph"]
+
+
+# ---------------------------------------------------------------------------
+# sample_bedgraph_paths: samplesheet → (track_name, bedgraph_path) iteration
+# ---------------------------------------------------------------------------
+
+def test_sample_bedgraph_paths_picks_bedgraph_from_extras():
+    row = {"sample": "s1", "extra_tracks": "/data/x.5mC.bedgraph,/data/x.5hmC.bg"}
+    pairs = va.sample_bedgraph_paths(row)
+    assert pairs == [("x.5mC", Path("/data/x.5mC.bedgraph")),
+                     ("x.5hmC", Path("/data/x.5hmC.bg"))]
+
+
+def test_sample_bedgraph_paths_strips_gz_suffix_from_track_name():
+    # Path.stem of foo.bedgraph.gz is "foo.bedgraph"; igv-reports renders
+    # it as just "foo", so we strip one more level.
+    row = {"sample": "s1", "extra_tracks": "/data/foo.bedgraph.gz"}
+    pairs = va.sample_bedgraph_paths(row)
+    assert pairs[0][0] == "foo"
+
+
+def test_sample_bedgraph_paths_skips_non_bedgraph_extras():
+    # bam/vcf in extra_tracks are NOT bedgraphs — sample_bam_paths handles them.
+    row = {"sample": "s1", "extra_tracks": "/data/x.5mC.bedgraph,/data/y.bam,/data/z.vcf"}
+    pairs = va.sample_bedgraph_paths(row)
+    assert pairs == [("x.5mC", Path("/data/x.5mC.bedgraph"))]
+
+
+def test_sample_bedgraph_paths_empty_when_no_extras():
+    assert va.sample_bedgraph_paths({"sample": "s1"}) == []
+    assert va.sample_bedgraph_paths({"sample": "s1", "extra_tracks": ""}) == []
diff --git a/igv-reports/tests/unit/test_verify_cohort_png.py b/igv-reports/tests/unit/test_verify_cohort_png.py
new file mode 100644
index 0000000..2ac1bc1
--- /dev/null
+++ b/igv-reports/tests/unit/test_verify_cohort_png.py
@@ -0,0 +1,190 @@
+"""Unit tests for the PNG-side checks in verify_cohort.py.
+
+Author: Samuel Ahuno
+Purpose:
+  When build_igvreports.py runs with --also-png, verify_cohort.py picks up
+  the manifest TSV and runs three additional checks. These tests synthesize
+  a valid manifest + matching PNG files in tmp_path, then mutate one thing
+  at a time to confirm each check fires on the right defect.
+
+Run:
+  pytest tests/unit/test_verify_cohort_png.py -v
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+SCRIPTS = Path(__file__).resolve().parents[2] / "scripts"
+sys.path.insert(0, str(SCRIPTS))
+
+import verify_cohort as vc  # noqa: E402
+
+
+def _write_sites_bed(path: Path, rows: list[tuple]) -> None:
+    with path.open("w") as fh:
+        for r in rows:
+            fh.write("\t".join(str(x) for x in r) + "\n")
+
+
+def _write_manifest(path: Path, entries: list[dict]) -> None:
+    """Write a manifest TSV matching the schema build_pngs_with_igver emits."""
+    header = ("#bed_row_idx\tuid\tchrom\tstart_orig\tend_orig\t"
+              "start_flanked\tend_flanked\tregion\tpng_path\thtml_path\thtml_table_row\n")
+    with path.open("w") as fh:
+        fh.write(header)
+        for e in entries:
+            fh.write(
+                f"{e['bed_row_idx']}\t{e['uid']}\t{e['chrom']}\t"
+                f"{e['start_orig']}\t{e['end_orig']}\t"
+                f"{e['start_flanked']}\t{e['end_flanked']}\t"
+                f"{e['region']}\t{e['png_path']}\t{e['html_path']}\t"
+                f"{e['html_table_row']}\n"
+            )
+
+
+def _make_png(path: Path, size_bytes: int = 50_000) -> None:
+    """Create a fake PNG file of the requested size (default 50 KB, above
+    the 10 KB threshold)."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * (size_bytes - 8))
+
+
+@pytest.fixture
+def cohort(tmp_path):
+    """Two-region cohort with a valid manifest + matching PNGs."""
+    bed = tmp_path / "sites.hg38.bed"
+    _write_sites_bed(bed, [
+        ("chr1", 100, 200, "alpha"),
+        ("chr2", 300, 400, "beta"),
+    ])
+    html = tmp_path / "sample.hg38.html"
+    html.write_text("<html/>")
+
+    png_dir = tmp_path / "png_sample.hg38" / "png"
+    png1 = png_dir / "chr1-0-500.alpha.png"
+    png2 = png_dir / "chr2-0-700.beta.png"
+    _make_png(png1)
+    _make_png(png2)
+
+    manifest = tmp_path / "png_sample.hg38" / "manifest.tsv"
+    _write_manifest(manifest, [
+        {"bed_row_idx": 1, "uid": "alpha", "chrom": "chr1",
+         "start_orig": 100, "end_orig": 200,
+         "start_flanked": 0, "end_flanked": 500,
+         "region": "chr1:0-500", "png_path": str(png1.resolve()),
+         "html_path": str(html.resolve()), "html_table_row": 1},
+        {"bed_row_idx": 2, "uid": "beta", "chrom": "chr2",
+         "start_orig": 300, "end_orig": 400,
+         "start_flanked": 0, "end_flanked": 700,
+         "region": "chr2:0-700", "png_path": str(png2.resolve()),
+         "html_path": str(html.resolve()), "html_table_row": 2},
+    ])
+    return {"bed": bed, "html": html, "manifest": manifest, "png_dir": png_dir,
+            "png1": png1, "png2": png2, "tmp": tmp_path}
+
+
+# ----- find_png_manifest -----
+
+
+def test_find_png_manifest_returns_path_when_present(cohort, tmp_path):
+    # Manifest lives at <reports_dir>/png_<sample>.<genome>/manifest.tsv
+    # so we point reports_dir at tmp_path and check `sample` for genome `hg38`.
+    found = vc.find_png_manifest(tmp_path, "sample", "hg38")
+    assert found == cohort["manifest"]
+
+
+def test_find_png_manifest_returns_none_when_absent(tmp_path):
+    assert vc.find_png_manifest(tmp_path, "sample", "hg38") is None
+
+
+# ----- P1: png count matches BED -----
+
+
+def test_p1_pass(cohort):
+    c = vc.check_png_count_matches_bed("sample", cohort["manifest"], cohort["bed"])
+    assert c.status == "PASS"
+    assert c.observed == "2"
+    assert c.expected == "2"
+
+
+def test_p1_fail_when_manifest_short(cohort):
+    # Truncate the BED so it has 3 rows but manifest only has 2.
+    _write_sites_bed(cohort["bed"], [
+        ("chr1", 100, 200, "alpha"),
+        ("chr2", 300, 400, "beta"),
+        ("chr3", 500, 600, "gamma"),
+    ])
+    c = vc.check_png_count_matches_bed("sample", cohort["manifest"], cohort["bed"])
+    assert c.status == "FAIL"
+    assert c.observed == "2"
+    assert c.expected == "3"
+
+
+# ----- P2: pngs exist and non-empty -----
+
+
+def test_p2_pass(cohort):
+    c = vc.check_pngs_exist_and_nonempty("sample", cohort["manifest"])
+    assert c.status == "PASS"
+
+
+def test_p2_fail_on_missing_png(cohort):
+    cohort["png1"].unlink()
+    c = vc.check_pngs_exist_and_nonempty("sample", cohort["manifest"])
+    assert c.status == "FAIL"
+    assert "missing" in c.details
+
+
+def test_p2_fail_on_tiny_png(cohort):
+    # Re-write png1 as a 2 KB file — below the 10 KB threshold.
+    cohort["png1"].write_bytes(b"\x00" * 2048)
+    c = vc.check_pngs_exist_and_nonempty("sample", cohort["manifest"])
+    assert c.status == "FAIL"
+    assert "below threshold" in c.details
+
+
+def test_p2_threshold_can_be_lowered(cohort):
+    # The lab's smallest legitimate igver PNG can be ~5 KB on a no-data
+    # region. Users should be able to opt down without rewriting the check.
+    cohort["png1"].write_bytes(b"\x00" * 6144)
+    c = vc.check_pngs_exist_and_nonempty("sample", cohort["manifest"], min_size_kb=5.0)
+    assert c.status == "PASS"
+
+
+# ----- P3: html-row alignment -----
+
+
+def test_p3_pass(cohort):
+    c = vc.check_png_html_row_alignment("sample", cohort["manifest"], cohort["html"])
+    assert c.status == "PASS"
+
+
+def test_p3_fail_when_html_path_diverges(cohort, tmp_path):
+    # Pass a different HTML path than the manifest references — should fail.
+    other_html = tmp_path / "other.hg38.html"
+    other_html.write_text("<html/>")
+    c = vc.check_png_html_row_alignment("sample", cohort["manifest"], other_html)
+    assert c.status == "FAIL"
+    assert "different HTML" in c.details
+
+
+def test_p3_fail_when_row_indices_not_contiguous(cohort, tmp_path):
+    # Rewrite the manifest with non-contiguous html_table_row indices.
+    png1, png2 = cohort["png1"], cohort["png2"]
+    _write_manifest(cohort["manifest"], [
+        {"bed_row_idx": 1, "uid": "alpha", "chrom": "chr1",
+         "start_orig": 100, "end_orig": 200, "start_flanked": 0, "end_flanked": 500,
+         "region": "chr1:0-500", "png_path": str(png1.resolve()),
+         "html_path": str(cohort["html"].resolve()), "html_table_row": 1},
+        {"bed_row_idx": 2, "uid": "beta", "chrom": "chr2",
+         "start_orig": 300, "end_orig": 400, "start_flanked": 0, "end_flanked": 700,
+         "region": "chr2:0-700", "png_path": str(png2.resolve()),
+         "html_path": str(cohort["html"].resolve()), "html_table_row": 5},  # gap
+    ])
+    c = vc.check_png_html_row_alignment("sample", cohort["manifest"], cohort["html"])
+    assert c.status == "FAIL"
+    assert "contiguous" in c.details
diff --git a/igv-reports/tests/unit/test_verify_report.py b/igv-reports/tests/unit/test_verify_report.py
new file mode 100644
index 0000000..6ce4e43
--- /dev/null
+++ b/igv-reports/tests/unit/test_verify_report.py
@@ -0,0 +1,297 @@
+"""Unit tests for verify_report.py — pure-Python parser helpers.
+
+Author: Samuel Ahuno
+Purpose:
+  Covers the HTML-extraction helpers and individual checks in
+  verify_report.py without needing a real create_report HTML on disk:
+  synthesized fixtures in tmp_path exercise every parser branch.
+
+Run:
+  cd claude/skills/igv-reports
+  pytest tests/unit/ -v
+"""
+
+from __future__ import annotations
+
+import base64
+import gzip
+import json
+import sys
+from pathlib import Path
+
+import pytest
+
+SCRIPTS = Path(__file__).resolve().parents[2] / "scripts"
+sys.path.insert(0, str(SCRIPTS))
+import verify_report as vr  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# load_sites_bed
+# ---------------------------------------------------------------------------
+
+def test_load_sites_bed_basic(tmp_path):
+    p = tmp_path / "sites.bed"
+    p.write_text(
+        "#chrom\tstart\tend\tname\n"
+        "chr2\t25246500\t25246501\tDNMT3A\n"
+        "chr7\t148884000\t148884001\tEZH2\n"
+    )
+    rows = vr.load_sites_bed(p)
+    assert len(rows) == 2
+    assert rows[0] == {"chrom": "chr2", "start": 25246500, "end": 25246501, "name": "DNMT3A"}
+    assert rows[1]["name"] == "EZH2"
+
+
+def test_load_sites_bed_skips_track_line(tmp_path):
+    p = tmp_path / "sites.bed"
+    p.write_text(
+        'track name=foo description="bar"\n'
+        "chr1\t100\t200\n"
+    )
+    rows = vr.load_sites_bed(p)
+    assert len(rows) == 1
+    assert rows[0]["name"] is None  # 3-col bed; no name
+
+
+def test_load_sites_bed_blank_lines_ok(tmp_path):
+    p = tmp_path / "sites.bed"
+    p.write_text(
+        "#header\n"
+        "\n"
+        "chr1\t100\t200\tx\n"
+        "\n"
+    )
+    assert len(vr.load_sites_bed(p)) == 1
+
+
+def test_load_sites_bed_too_few_cols(tmp_path):
+    p = tmp_path / "sites.bed"
+    p.write_text("chr1\t100\n")
+    with pytest.raises(SystemExit, match="<3 columns"):
+        vr.load_sites_bed(p)
+
+
+def test_load_sites_bed_non_numeric(tmp_path):
+    p = tmp_path / "sites.bed"
+    p.write_text("chr1\tNOPE\t200\n")
+    with pytest.raises(SystemExit, match="non-numeric"):
+        vr.load_sites_bed(p)
+
+
+# ---------------------------------------------------------------------------
+# parse_table_json + parse_session_dictionary (regex extraction)
+# ---------------------------------------------------------------------------
+
+def _fake_html(table_json: dict, session_dict: dict) -> str:
+    """Build a minimal HTML whose JS literals match what create_report emits."""
+    return (
+        "<html><body><script>\n"
+        f"var tableJson = {json.dumps(table_json)};\n"
+        f"var sessionDictionary = {json.dumps(session_dict)};\n"
+        "</script></body></html>\n"
+    )
+
+
+def test_parse_table_json_extracts_dict():
+    tj = {"headers": ["Chrom"], "rows": [["chr1"]]}
+    html = _fake_html(tj, {})
+    out = vr.parse_table_json(html)
+    assert out == tj
+
+
+def test_parse_session_dictionary_extracts_dict():
+    sd = {"0": "data:application/gzip;base64,xxx"}
+    html = _fake_html({"headers": [], "rows": []}, sd)
+    out = vr.parse_session_dictionary(html)
+    assert out == sd
+
+
+def test_parse_table_json_missing_returns_none():
+    assert vr.parse_table_json("<html>nothing here</html>") is None
+
+
+def test_parse_balanced_blob_handles_braces_in_strings():
+    """The brace-balancing scanner must not be tricked by '{' inside string literals."""
+    html = "tableJson = {\"name\": \"value with { brace }\", \"n\": 1};"
+    out = vr.parse_table_json(html)
+    assert out["name"] == "value with { brace }"
+    assert out["n"] == 1
+
+
+def test_parse_balanced_blob_handles_escaped_quotes():
+    """Backslash-escaped quotes must not flip the in_str state prematurely."""
+    html = 'tableJson = {"name": "has \\" quote", "n": 2};'
+    out = vr.parse_table_json(html)
+    assert out["name"] == 'has " quote'
+    assert out["n"] == 2
+
+
+# ---------------------------------------------------------------------------
+# decode_session_entry
+# ---------------------------------------------------------------------------
+
+def _make_data_url(payload: dict) -> str:
+    raw = gzip.compress(json.dumps(payload).encode())
+    return "data:application/gzip;base64," + base64.b64encode(raw).decode()
+
+
+def test_decode_session_entry_roundtrip():
+    payload = {"tracks": [{"name": "t", "url": "data:..."}]}
+    url = _make_data_url(payload)
+    assert vr.decode_session_entry(url) == payload
+
+
+def test_decode_session_entry_bad_prefix_returns_none():
+    assert vr.decode_session_entry("http://example.com/file.bam") is None
+
+
+def test_decode_session_entry_bad_base64_returns_none():
+    """Non-fatal — corrupted entries return None so caller can SKIP gracefully."""
+    assert vr.decode_session_entry("data:application/gzip;base64,!!!notb64!!!") is None
+
+
+# ---------------------------------------------------------------------------
+# expected_track_labels (covers the Path.stem rule)
+# ---------------------------------------------------------------------------
+
+def test_expected_track_labels_from_paths():
+    """Positional --tracks mode: igv-reports auto-names by Path.stem (strips
+    ONE final suffix). Verified against create_report 1.16.2 in the script."""
+    labs = vr.expected_track_labels(
+        ["/x/sample.5mC.bedgraph", "/y/gencode.v47.annotation.gff3.gz", "/z/x.bam"],
+        track_config=None,
+    )
+    assert labs == ["sample.5mC", "gencode.v47.annotation.gff3", "x"]
+
+
+def test_expected_track_labels_from_track_config(tmp_path):
+    """--track-config mode: use the `name` field from the JSON, not the path."""
+    cfg = tmp_path / "tracks.json"
+    cfg.write_text(json.dumps([
+        {"name": "tumor", "url": "/x/tumor.bam"},
+        {"name": "normal", "url": "/x/normal.bam"},
+        {"url": "/x/no-name-track.bam"},  # entries without `name` are silently dropped
+    ]))
+    labs = vr.expected_track_labels([], track_config=cfg)
+    assert labs == ["tumor", "normal"]
+
+
+def test_expected_track_labels_empty():
+    assert vr.expected_track_labels([], None) == []
+    assert vr.expected_track_labels(None, None) == []
+
+
+# ---------------------------------------------------------------------------
+# Individual checks — drive them with synthetic inputs
+# ---------------------------------------------------------------------------
+
+def test_check_html_exists_pass(tmp_path):
+    p = tmp_path / "r.html"; p.write_text("x")
+    c = vr.check_html_exists(p)
+    assert c.status == "PASS"
+
+
+def test_check_html_exists_fail(tmp_path):
+    c = vr.check_html_exists(tmp_path / "missing.html")
+    assert c.status == "FAIL"
+
+
+def test_check_html_min_size_pass_fail(tmp_path):
+    p = tmp_path / "r.html"
+    p.write_bytes(b"x" * (2 * 1024 * 1024))  # 2 MB
+    assert vr.check_html_min_size(p, 1.0).status == "PASS"
+    assert vr.check_html_min_size(p, 3.0).status == "FAIL"
+
+
+def test_check_region_count_pass():
+    bed = [{"chrom": "chr1", "start": 0, "end": 100, "name": "x"}]
+    tj = {"headers": ["Chrom"], "rows": [["chr1"]]}
+    assert vr.check_region_count(bed, tj).status == "PASS"
+
+
+def test_check_region_count_fail_when_html_short():
+    bed = [{"chrom": "chr1", "start": 0, "end": 100, "name": None}] * 3
+    tj = {"headers": ["Chrom"], "rows": [["chr1"]]}
+    c = vr.check_region_count(bed, tj)
+    assert c.status == "FAIL"
+    assert c.observed == "1"
+    assert c.expected == "3"
+
+
+def test_check_region_count_missing_table_json_fails():
+    assert vr.check_region_count([{"chrom": "x", "start": 0, "end": 1, "name": None}], None).status == "FAIL"
+
+
+def test_check_region_coords_match_with_name():
+    """HTML stores 1-based start, BED is 0-based half-open."""
+    bed = [{"chrom": "chr2", "start": 25246500, "end": 25246501, "name": "DNMT3A"}]
+    tj = {
+        "headers": ["Chrom", "Start", "End", "Name"],
+        "rows": [["chr2", 25246501, 25246501, "DNMT3A"]],
+    }
+    assert vr.check_region_coords(bed, tj).status == "PASS"
+
+
+def test_check_region_coords_name_mismatch_fails():
+    bed = [{"chrom": "chr2", "start": 100, "end": 200, "name": "EXPECTED"}]
+    tj = {
+        "headers": ["Chrom", "Start", "End", "Name"],
+        "rows": [["chr2", 101, 200, "DIFFERENT"]],
+    }
+    c = vr.check_region_coords(bed, tj)
+    assert c.status == "FAIL"
+    assert "name mismatch" in c.details
+
+
+def test_check_region_coords_off_by_one_aware():
+    """BED 0-based start 100 must match HTML 1-based start 101."""
+    bed = [{"chrom": "chr1", "start": 100, "end": 200, "name": None}]
+    tj = {
+        "headers": ["Chrom", "Start", "End"],
+        "rows": [["chr1", 101, 200]],
+    }
+    assert vr.check_region_coords(bed, tj).status == "PASS"
+
+
+def test_check_region_sessions_pass():
+    tj = {"headers": ["Chrom"], "rows": [["chr1"], ["chr2"]]}
+    sd = {"0": "data:...", "1": "data:..."}
+    assert vr.check_region_sessions(tj, sd).status == "PASS"
+
+
+def test_check_region_sessions_missing_key():
+    tj = {"headers": ["Chrom"], "rows": [["chr1"], ["chr2"]]}
+    sd = {"0": "data:..."}  # missing "1"
+    c = vr.check_region_sessions(tj, sd)
+    assert c.status == "FAIL"
+
+
+def test_check_tracks_present_pass():
+    """Decode the first session entry and confirm all expected labels in tracks[].name."""
+    sd = {"0": _make_data_url({"tracks": [{"name": "tumor"}, {"name": "cpg"}, {"name": "rmsk"}]})}
+    c = vr.check_tracks_present(sd, ["tumor", "cpg"])
+    assert c.status == "PASS"
+
+
+def test_check_tracks_present_missing_track():
+    sd = {"0": _make_data_url({"tracks": [{"name": "tumor"}]})}
+    c = vr.check_tracks_present(sd, ["tumor", "missing_track"])
+    assert c.status == "FAIL"
+    assert "missing_track" in c.details
+
+
+def test_check_tracks_present_skip_when_no_labels():
+    sd = {"0": _make_data_url({"tracks": []})}
+    assert vr.check_tracks_present(sd, []).status == "SKIP"
+
+
+def test_check_tracks_present_empty_session_fails():
+    c = vr.check_tracks_present({}, ["x"])
+    assert c.status == "FAIL"
+
+
+def test_check_tracks_present_undecodable_session_fails():
+    c = vr.check_tracks_present({"0": "data:application/gzip;base64,!!!"}, ["x"])
+    assert c.status == "FAIL"
+    assert "gunzip" in c.details or "decode" in c.details