From 0c859b43d533208b44732a0da38963de444d1d9b Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Mon, 11 May 2026 09:26:55 +0100
Subject: [PATCH 1/7] Initial gen figs

---
 .../generate_curation_data.py                 |  16 ++
 notebooks/real_data_figure/pyproject.toml     |  10 ++
 .../real_data_figure/sort_all_real_data.py    |  78 +++++++++
 .../sort_one_piece_of_data.py                 | 151 ++++++++++++++++++
 4 files changed, 255 insertions(+)
 create mode 100644 notebooks/real_data_figure/generate_curation_data.py
 create mode 100644 notebooks/real_data_figure/pyproject.toml
 create mode 100644 notebooks/real_data_figure/sort_all_real_data.py
 create mode 100644 notebooks/real_data_figure/sort_one_piece_of_data.py

diff --git a/notebooks/real_data_figure/generate_curation_data.py b/notebooks/real_data_figure/generate_curation_data.py
new file mode 100644
index 0000000..b23fdc0
--- /dev/null
+++ b/notebooks/real_data_figure/generate_curation_data.py
@@ -0,0 +1,16 @@
+all_curation_data = []
+
+for rec_name, recording, list_of_protocols, in zip(['np2', 'np1'], [np2_recording, np1_recording], [np2_protocols, np1_protocols] ):
+    for protocol_name in list_of_protocols:
+
+        analyzer_path = f"{rec_name}_srt-{protocol_name}_analyzer"
+        analyzer = do_sorting(recording, analyzer_path, protocol_name)
+
+        curation_data = get_automated_labels(analyzer)
+
+        all_curation_data.append(curation_data)    
+
+curation_data_df = pd.DataFrame(all_curation_data, columns=[])
+curation_data_df.to_csv('all_curation_data.csv')
+
+
diff --git a/notebooks/real_data_figure/pyproject.toml b/notebooks/real_data_figure/pyproject.toml
new file mode 100644
index 0000000..d157c9a
--- /dev/null
+++ b/notebooks/real_data_figure/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "real-data-figure"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "ipykernel>=7.2.0",
+    "spikeinterface>=0.104.3",
+]
diff --git a/notebooks/real_data_figure/sort_all_real_data.py b/notebooks/real_data_figure/sort_all_real_data.py
new file mode 100644
index 0000000..43b3b47
--- /dev/null
+++ b/notebooks/real_data_figure/sort_all_real_data.py
@@ -0,0 +1,78 @@
+"""
+Based on code from https://github.com/MattNolanLab/nolanlab-ephys
+
+The IBL data is the file
+    sub-UCLA034_ses-3537d970-f515-4786-853f-23de525e110f_desc-raw_ecephys.nwb
+from DANDI dataset 000409 - https://dandiarchive.org/dandiset/000409
+
+The UCL data is the file
+    AL032_2020-01-07
+from https://rdr.ucl.ac.uk/articles/dataset/Chronic_recordings_from_Neuropixels_2_0_probes_in_mice/24411841
+
+The Dus data is the fil
+    ???
+from the DANDI dataset 000939 - https://dandiarchive.org/dandiset/000939
+"""
+import spikeinterface.full as si
+import pandas as pd
+from pathlib import Path
+from .sort_one_piece_of_data import do_sorting
+
+repo_folder = Path("/Users/christopherhalcrow/Work/fromgit/sorting_components_benchmark_paper/")
+raw_data_folder = repo_folder / "notebooks/real_data_figure/raw_data"
+analyzers_folder = repo_folder / "notebooks/real_data_figure/raw_data"
+
+# NP1 data from IBL
+
+np1_protocols = [
+        'kilosort4_motion_correction',
+        'lupin_motion_correction',
+        'spykingcircus2_motion_correction'
+        'tridesclous2A_motion_correction'
+    ]
+np1_data_folder = ""
+np1_analyzer_folders = [
+    analyzers_folder / f"ibl_{protocol}_analyzer" for protocol in np1_protocols
+]
+
+for protocol_name, analyzer_folder in zip(np1_protocols, np1_analyzer_folders):
+    recording = ...
+    do_sorting(recording, analyzer_folder, protocol_name)
+
+# NP2 data from UCL
+
+np2_protocols = [
+        'kilosort4_no_motion_correction',
+        'lupin_no_motion_correction',
+        'spykingcircus2_no_motion_correction'
+        'tridesclous2A_no_motion_correction'
+    ]
+np2_data_folder = ""
+np2_analyzer_folders = [
+    analyzers_folder / f"ucl_{protocol}_analyzer" for protocol in np1_protocols
+]
+
+for protocol_name, analyzer_folder in zip(np2_protocols, np2_analyzer_folders):
+    recording = ...
+    do_sorting(recording, analyzer_folder, protocol_name)
+
+# CN data from Adrian
+
+cn_protocols = [
+        'kilosort4_no_motion_correction',
+        'lupin_no_motion_correction',
+        'spykingcircus2_no_motion_correction'
+        'tridesclous2A_no_motion_correction'
+    ]
+cn_analyzer_folders = [
+    analyzers_folder / f"cn_{protocol}_analyzer" for protocol in cn_protocols
+]
+
+
+for protocol_name, analyzer_folder in zip(cn_protocols, cn_analyzer_folders):
+    recording = ...
+    do_sorting(recording, analyzer_folder, protocol_name)
+
+
+
+
diff --git a/notebooks/real_data_figure/sort_one_piece_of_data.py b/notebooks/real_data_figure/sort_one_piece_of_data.py
new file mode 100644
index 0000000..4625729
--- /dev/null
+++ b/notebooks/real_data_figure/sort_one_piece_of_data.py
@@ -0,0 +1,151 @@
+"""
+Based on code from https://github.com/MattNolanLab/nolanlab-ephys
+"""
+import spikeinterface.full as si
+
+protocols = {
+    "kilosort4_no_motion_correction": {
+        "preprocessing": {
+        },
+        "sorting": {
+            "sorter_name": "kilosort4",
+            "do_correction": False,
+            "use_binary_file": False,
+        },
+        "preprocessing_for_analyzer": {
+            "common_reference": {},
+            "bandpass_filter": {},
+        },
+    },
+    "spykingcircus2_no_motion_correction": {
+        "preprocessing": {},
+        "sorting": {
+            "sorter_name": "spykingcircus2",
+            "apply_motion_correction": False,
+            "cache_preprocessing": {"mode": "folder", "folder": "sk2_pre"},
+        },
+        "preprocessing_for_analyzer": {
+            "bandpass_filter": {},
+            "common_reference": {},
+        },
+    },
+    "tridesclous2A_no_motion_correction": {
+        "preprocessing": {},
+        "sorting": {
+            "sorter_name": "tridesclous2",
+            "cache_preprocessing_mode": "folder",
+        },
+        "preprocessing_for_analyzer": {
+            "bandpass_filter": {},
+            "common_reference": {},
+        },
+    },
+    "lupin_no_motion_correction": {
+        "preprocessing": {},
+        "sorting": {
+            "sorter_name": "spykingcircus2",
+            "apply_motion_correction": False,
+            "cache_preprocessing": {"mode": "folder", "folder": "sk2_pre"},
+        },
+        "preprocessing_for_analyzer": {
+            "bandpass_filter": {},
+            "common_reference": {},
+        },
+    },
+    "kilosort4_motion_correction": {
+        "preprocessing": {
+        },
+        "sorting": {
+            "sorter_name": "kilosort4",
+            "do_correction": True,
+            "use_binary_file": False,
+        },
+        "preprocessing_for_analyzer": {
+            "common_reference": {},
+            "bandpass_filter": {},
+        },
+    },
+    "spykingcircus2_motion_correction": {
+        "preprocessing": {},
+        "sorting": {
+            "sorter_name": "spykingcircus2",
+            "apply_motion_correction": True,
+            "cache_preprocessing": {"mode": "folder", "folder": "sk2_pre"},
+        },
+        "preprocessing_for_analyzer": {
+            "bandpass_filter": {},
+            "common_reference": {},
+        },
+    },
+    "tridesclous2A_motion_correction": {
+        "preprocessing": {},
+        "sorting": {
+            "sorter_name": "tridesclous2",
+            "cache_preprocessing_mode": "folder",
+            "apply_motion_correction": True,
+        },
+        "preprocessing_for_analyzer": {
+            "bandpass_filter": {},
+            "common_reference": {},
+        },
+    },
+    "lupin_no_motion_correction": {
+        "preprocessing": {},
+        "sorting": {
+            "sorter_name": "spykingcircus2",
+            "apply_motion_correction": True,
+            "cache_preprocessing": {"mode": "folder", "folder": "sk2_pre"},
+        },
+        "preprocessing_for_analyzer": {
+            "bandpass_filter": {},
+            "common_reference": {},
+        },
+    },
+}
+
+postprocessing_extensions_to_compute = {
+    "unit_locations": {},
+    "random_spikes": {},
+    "noise_levels": {},
+    "waveforms": {},
+    "templates": {},
+    "spike_amplitudes": {"peak_sign": "both"},
+    "amplitude_scalings": {},
+    "isi_histograms": {},
+    "spike_locations": {"peak_sign": "both"},
+    "correlograms": {},
+    "template_similarity": {"method": "l2"},
+    "quality_metrics": {},
+    "template_metrics": {},
+}
+
+def do_sorting(recording, analyzer_path, protocol_name, n_jobs=8):
+
+    si.set_global_job_kwargs(n_jobs=n_jobs)
+
+    protocol_info = protocols[protocol_name]
+
+    pp_recording = si.apply_preprocessing_pipeline(
+        recording, protocol_info["preprocessing"]
+    )
+    sorting = si.run_sorter(
+        recording=pp_recording,
+        **protocol_info["sorting"],
+        remove_existing_folder=True,
+        verbose=True,
+    )
+
+    preprocessed_recording_for_analyzer = si.apply_preprocessing_pipeline(
+        recording, protocol_info["preprocessing_for_analyzer"]
+    )
+
+    analyzer = si.create_sorting_analyzer(
+        recording=preprocessed_recording_for_analyzer,
+        sorting=sorting,
+        folder=analyzer_path,
+        format="binary_folder",
+        peak_sign="both",
+        radius_um=70,
+    )
+
+    analyzer.compute(postprocessing_extensions_to_compute)

From 8948b084507d3c1c3c82edd71e8285cafe355aa7 Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Mon, 11 May 2026 11:03:01 +0100
Subject: [PATCH 2/7] Get pipeline working

---
 .../generate_curation_data.py                 | 69 +++++++++++++++---
 notebooks/real_data_figure/pyproject.toml     |  8 +++
 .../real_data_figure/sort_all_real_data.py    | 70 ++++++++++++++-----
 .../sort_one_piece_of_data.py                 |  8 +--
 4 files changed, 122 insertions(+), 33 deletions(-)

diff --git a/notebooks/real_data_figure/generate_curation_data.py b/notebooks/real_data_figure/generate_curation_data.py
index b23fdc0..e301bd6 100644
--- a/notebooks/real_data_figure/generate_curation_data.py
+++ b/notebooks/real_data_figure/generate_curation_data.py
@@ -1,16 +1,67 @@
-all_curation_data = []
+"""
+Generates curation results from computed analyzers.
 
-for rec_name, recording, list_of_protocols, in zip(['np2', 'np1'], [np2_recording, np1_recording], [np2_protocols, np1_protocols] ):
-    for protocol_name in list_of_protocols:
+Once you have the analyzers, run this code by `cd`ing into the `real_data_figure` 
+folder, then running
+>>> uv run generate_curation_data.py
 
-        analyzer_path = f"{rec_name}_srt-{protocol_name}_analyzer"
-        analyzer = do_sorting(recording, analyzer_path, protocol_name)
+"""
 
-        curation_data = get_automated_labels(analyzer)
+import spikeinterface.full as si
+import numpy as np
+import pandas as pd
+from pathlib import Path
 
-        all_curation_data.append(curation_data)    
+repo_folder = Path("/home/nolanlab/fromgit/sorting_components_benchmark_paper/")
+real_data_figure_folder = repo_folder / "notebooks/real_data_figure"
+analyzers_folder = real_data_figure_folder / "analyzers"
 
-curation_data_df = pd.DataFrame(all_curation_data, columns=[])
-curation_data_df.to_csv('all_curation_data.csv')
+dataset_protocols = {
+    'IBL': ['kilosort4_motion_correction', 'lupin_motion_correction', 'tridesclous2_motion_correction','spykingcircus2_motion_correction'],
+    'ucl': ['kilosort4_no_motion_correction', 'lupin_no_motion_correction', 'tridesclous2_no_motion_correction','spykingcircus2_no_motion_correction'],
+    'Duszkiewicz': ['kilosort4_no_motion_correction', 'lupin_no_motion_correction', 'tridesclous2_no_motion_correction','spykingcircus2_no_motion_correction'],
+}
 
+bombcell_labels = ['good', 'mua', 'noise', 'non_soma_good', 'non_soma_mua']
+unitrefine_labels = ['sua', 'mua', 'noise']
+merge_presets = ['slay']
 
+for dataset_name, protocols in dataset_protocols.items():
+    bombcell_results = []
+    unitrefine_results = []
+    all_protocols_data = []
+    for protocol in protocols:
+
+        analyzer_path = analyzers_folder / f"{dataset_name}_{protocol}_analyzer"
+        if analyzer_path.is_dir():
+            analyzer = si.load_sorting_analyzer(analyzer_path)
+        else:
+            analyzer = si.load_sorting_analyzer(str(analyzer_path) + '.zarr')
+        
+        bombcell_unit_label = si.bombcell_label_units(analyzer, split_non_somatic_good_mua=True)['bombcell_label'].values
+        bombcell_results = {label: np.sum(bombcell_unit_label == label) for label in bombcell_labels}
+
+        # You need to donwload the UnitRefine models `noise_neural_classifier_lightweight` and `sua_mua_classifier_lightweight` from
+        # https://huggingface.co/AnoushkaJain3
+        unitrefine_unit_label = si.unitrefine_label_units(analyzer, noise_neural_classifier='/home/nolanlab/Downloads/noise_neural_classifier_lightweight', sua_mua_classifier='/home/nolanlab/Downloads/sua_mua_classifier_lightweight')
+        unitrefine_results = {label: np.sum(unitrefine_unit_label['unitrefine_label'] == label) for label in unitrefine_labels}
+
+        merge_results = {merge_preset: len(si.compute_merge_unit_groups(analyzer, preset=merge_preset)) for merge_preset in merge_presets}
+
+        protocol_data = [
+            protocol,
+            analyzer.get_num_units(),
+            bombcell_results['good'] + bombcell_results['non_soma_good'],
+            unitrefine_results['sua'],
+            bombcell_results['mua'] + bombcell_results['non_soma_mua'],
+            unitrefine_results['mua'],
+            merge_results['slay'],
+            bombcell_results['noise'],
+            unitrefine_results['noise'],
+        ]
+
+        all_protocols_data.append(protocol_data)
+
+    results = pd.DataFrame(all_protocols_data, columns=["sorter", "total units", "bombcell good", "unitrefine sua", "bombcell mua", "unitrefine mua", "# slay merges", "bombcell noise", "unitrefine noise"], index=None)
+
+    results.to_csv(real_data_figure_folder / f"curation_results/{dataset_name}_results.csv", index=False)
diff --git a/notebooks/real_data_figure/pyproject.toml b/notebooks/real_data_figure/pyproject.toml
index d157c9a..91238fc 100644
--- a/notebooks/real_data_figure/pyproject.toml
+++ b/notebooks/real_data_figure/pyproject.toml
@@ -6,5 +6,13 @@ readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
     "ipykernel>=7.2.0",
+    "pandas>=3.0.2",
+    "scikit-learn==1.6",
+    "skops>=0.14.0",
     "spikeinterface>=0.104.3",
 ]
+
+[dependency-groups]
+dev = [
+    "ruff>=0.15.12",
+]
diff --git a/notebooks/real_data_figure/sort_all_real_data.py b/notebooks/real_data_figure/sort_all_real_data.py
index 43b3b47..08122b0 100644
--- a/notebooks/real_data_figure/sort_all_real_data.py
+++ b/notebooks/real_data_figure/sort_all_real_data.py
@@ -8,52 +8,89 @@
 The UCL data is the file
     AL032_2020-01-07
 from https://rdr.ucl.ac.uk/articles/dataset/Chronic_recordings_from_Neuropixels_2_0_probes_in_mice/24411841
+This needs to be untarred into a folder.
 
-The Dus data is the fil
-    ???
+The Dus data is the file
+    A3720-191126.nwb
 from the DANDI dataset 000939 - https://dandiarchive.org/dandiset/000939
+
+For this code to run, put all files in the "raw_data" folder and change the "repo_folder" below, to point at
+your local copy of this repo. Your file organisation should look like
+
+sorting_components_benchmark_paper/   <-- `repo_folder` points here
+   notebooks/
+       real_data_figure/
+           sort_all_real_data.py
+           raw_data/
+               sub-UCLA034_ses-3537d970-f515-4786-853f-23de525e110f_desc-raw_ecephys.nwb
+               A3702-191126.nwb
+               AL032_2020-01-07/
+                   ???
+            analyzers/
+            curation_results/
+
+`cd` to the `real_data_figure` folder and run
+>>> uv run sort_all_real_data.py
+
+After this has run the `analyzers` folder should contain the following 12 analyzers:
+
+analyzers/
+    Duszkiewicz_kilosort4_no_motion_correction_analyzer
+    Duszkiewicz_lupin_no_motion_correction_analyzer
+    Duszkiewicz_spykingcircus2_no_motion_correction_analyzer
+    Duszkiewicz_tridesclous2_no_motion_correction_analyzer
+    IBL_kilosort4_motion_correction_analyzer
+    IBL_lupin_motion_correction_analyzer
+    IBL_spykingcircus2_motion_correction_analyzer
+    IBL_tridesclous2_motion_correction_analyzer
+    ucl_kilosort4_no_motion_correction_analyzer
+    ucl_lupin_no_motion_correction_analyzer
+    ucl_spykingcircus2_no_motion_correction_analyzer
+    ucl_tridesclous2_no_motion_correction_analyzer 
+
+
 """
+
 import spikeinterface.full as si
-import pandas as pd
 from pathlib import Path
-from .sort_one_piece_of_data import do_sorting
+from sort_one_piece_of_data import do_sorting
 
+# edit this to point to the `sorting_components_benchmark_paper` on your own computer
 repo_folder = Path("/Users/christopherhalcrow/Work/fromgit/sorting_components_benchmark_paper/")
+
 raw_data_folder = repo_folder / "notebooks/real_data_figure/raw_data"
 analyzers_folder = repo_folder / "notebooks/real_data_figure/raw_data"
 
 # NP1 data from IBL
-
 np1_protocols = [
         'kilosort4_motion_correction',
         'lupin_motion_correction',
         'spykingcircus2_motion_correction'
         'tridesclous2A_motion_correction'
     ]
-np1_data_folder = ""
+np1_data_folder = raw_data_folder / 'sub-UCLA034_ses-3537d970-f515-4786-853f-23de525e110f_desc-raw_ecephys.nwb'
 np1_analyzer_folders = [
-    analyzers_folder / f"ibl_{protocol}_analyzer" for protocol in np1_protocols
+    analyzers_folder / f"IBL_{protocol}_analyzer" for protocol in np1_protocols
 ]
 
+recording = si.read_nwb_recording(np1_data_folder)
 for protocol_name, analyzer_folder in zip(np1_protocols, np1_analyzer_folders):
-    recording = ...
     do_sorting(recording, analyzer_folder, protocol_name)
 
 # NP2 data from UCL
-
 np2_protocols = [
         'kilosort4_no_motion_correction',
         'lupin_no_motion_correction',
         'spykingcircus2_no_motion_correction'
         'tridesclous2A_no_motion_correction'
     ]
-np2_data_folder = ""
+np2_data_folder = raw_data_folder / 'AL032_2020-01-07'
 np2_analyzer_folders = [
     analyzers_folder / f"ucl_{protocol}_analyzer" for protocol in np1_protocols
 ]
 
+recording = si.read_cbin_ibl(np2_data_folder)
 for protocol_name, analyzer_folder in zip(np2_protocols, np2_analyzer_folders):
-    recording = ...
     do_sorting(recording, analyzer_folder, protocol_name)
 
 # CN data from Adrian
@@ -65,14 +102,9 @@
         'tridesclous2A_no_motion_correction'
     ]
 cn_analyzer_folders = [
-    analyzers_folder / f"cn_{protocol}_analyzer" for protocol in cn_protocols
+    analyzers_folder / f"Duszkiewicz_{protocol}_analyzer" for protocol in cn_protocols
 ]
-
-
+cn_data_folder = raw_data_folder / 'A3702-191126.nwb'
+recording = si.read_nwb_recording(cn_data_folder)
 for protocol_name, analyzer_folder in zip(cn_protocols, cn_analyzer_folders):
-    recording = ...
     do_sorting(recording, analyzer_folder, protocol_name)
-
-
-
-
diff --git a/notebooks/real_data_figure/sort_one_piece_of_data.py b/notebooks/real_data_figure/sort_one_piece_of_data.py
index 4625729..4ea0dc8 100644
--- a/notebooks/real_data_figure/sort_one_piece_of_data.py
+++ b/notebooks/real_data_figure/sort_one_piece_of_data.py
@@ -43,9 +43,8 @@
     "lupin_no_motion_correction": {
         "preprocessing": {},
         "sorting": {
-            "sorter_name": "spykingcircus2",
+            "sorter_name": "lupin",
             "apply_motion_correction": False,
-            "cache_preprocessing": {"mode": "folder", "folder": "sk2_pre"},
         },
         "preprocessing_for_analyzer": {
             "bandpass_filter": {},
@@ -89,12 +88,11 @@
             "common_reference": {},
         },
     },
-    "lupin_no_motion_correction": {
+    "lupin_motion_correction": {
         "preprocessing": {},
         "sorting": {
-            "sorter_name": "spykingcircus2",
+            "sorter_name": "lupin",
             "apply_motion_correction": True,
-            "cache_preprocessing": {"mode": "folder", "folder": "sk2_pre"},
         },
         "preprocessing_for_analyzer": {
             "bandpass_filter": {},

From 3da03a1058f7aed675c6d9a7f86aaca25a5a01a7 Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Mon, 11 May 2026 14:07:40 +0100
Subject: [PATCH 3/7] Add more figure generation stuff

---
 .../Duszkiewicz_probe.png                     | Bin 0 -> 2257 bytes
 .../drift_maps_and_probes/IBL_probe.png       | Bin 0 -> 2162 bytes
 .../drift_maps_and_probes/ucl_probe.png       | Bin 0 -> 2757 bytes
 notebooks/real_data_figure/figure.typ         | 106 +++++++++++++++++
 .../generate_curation_data.py                 |   8 +-
 .../real_data_figure/make_drift_plots.py      | 111 ++++++++++++++++++
 notebooks/real_data_figure/pyproject.toml     |   1 +
 7 files changed, 225 insertions(+), 1 deletion(-)
 create mode 100644 notebooks/real_data_figure/drift_maps_and_probes/Duszkiewicz_probe.png
 create mode 100644 notebooks/real_data_figure/drift_maps_and_probes/IBL_probe.png
 create mode 100644 notebooks/real_data_figure/drift_maps_and_probes/ucl_probe.png
 create mode 100644 notebooks/real_data_figure/figure.typ
 create mode 100644 notebooks/real_data_figure/make_drift_plots.py

diff --git a/notebooks/real_data_figure/drift_maps_and_probes/Duszkiewicz_probe.png b/notebooks/real_data_figure/drift_maps_and_probes/Duszkiewicz_probe.png
new file mode 100644
index 0000000000000000000000000000000000000000..42d8e42e26e6beb78aa32d360f576c6f2931257c
GIT binary patch
literal 2257
zcmZ`*d00~U8it%nZ|0!m+|bf6>`ru1QOQhEG&L<Ub4%Qk+;bOXTt*SQE$R)@QOvql
zX2v)(DkV+flE%!VrIMOU*&<>fnYrVF8#)KAwEH-Joadb9_nr6sz3=;dXUTMSa!}c_
zeG3Q#QaN(i&J{R2gFxW(N}GYJ@I3N);ABod<Vkjm2_Yx=lY&9V{K>JAG33aw0G;??
zQe0R}G}^$((BOcc&S^3^HqIP{iu!zqK@2Gr^@q<7tpLkEVh?-8fj|&{`2qgLsw4~q
zQnEZ^chEiY+6ey*$!$P0?QPq+*KiFC*YSEE-7s4f@hm41?bff-e;eImG9~;G($s%9
z_pIN+n45vIT*t_UK6;R~<zf8KMWV)zn%}3Ei=}3CJ<Qnn9AU_3Cb6t@kZeQ2m=2?K
z5;mXPs-&rEtP^D2n3fGLnygLb^72ulH&n_FJnQ7(TrOpqF9@W}2p-A)7dq{^%oF^W
zI%Z6Z%4+D&dNev1xCmW=cTGyU-z`qU1oZ_X*m2p|Km=aYRzNN9k7zRP1D2?hhVx5a
zKM;;G!EN2Fcv&HaP~+VKb<4pic$j#ckf@$nli|$FH{y@#z<;Z~h_GdXYxoE4+>@a$
z7xZGp)%<q%B-7krF2laZ_Nz~-cPZF-`8}o$qkm$1%<C>V?q(|Yv1ctmy8`O0sa^Pa
z=gS-tYpQ5NEbft>ZcT@@Tk9rcRHs?EYt!dYkd($eEJJ)f14%(J)udl2y_|2f!hF&%
zR~<i}ovySRBe3D-b1C0%nsfZBS>n+e3U|ioj;>KUUoN>TXHCuT?yt|o_ODg4!GO$c
zU?ZN78KCzI6wn<#RG|IL)eI^Pgd5ycsh0{_-@K1H%dc&$6L5#(HL|8;CW;6qCWD=r
zVg0H1{Wutj2?<WxBgDaQVfB`FI&%H14Pl6bX@9G=Fw+{Veun)pZ}O@Vqa!1EjgleW
zJ1l1vh)v9hOi0pN1xN_6jDyL80doBU6=aRz@vi(t`CbUqO>?aV8C(=7tYo>W<&#e2
zz=k@X0`7RmY$S!+0>%w~b4zb09LRIkN>xX?@1U($Z)Rmjy6bi#L`^>JA8Wx$nxene
z8Vb*Dv%3k59?(waKH)mb;G``za@K^TV@?3~x{9VM5GSxB5KJ5s@+C@BwOL{u5>%Ku
z`$Xv`!yt_(l&(B^|EIGVfN!7^%|^=Dv|*fp5C`OQ7IAUi#C;s`1o*}y4jA89tK_Ou
zKUz&baX>rw(ks_d%7<Vzxl4vY+~xL6r#4t;9QSCx!67YsvUx3)%`hSC4}2?Cwxs&D
zfg|pFTU0L~W>Q}c&E+yb+@^e+>yS%aZk64zdT>Xr?tUUNq_w4f;w{k%cH>&%gVg`-
zK^_<ORQ1M9$t+_7gLS{*K<>fB?|2~=zKI&0ot+d)iU5rAtyHDg&|mU<N2K}}SQ>oA
z1lW{p=lk#KsuI!Ir9ROC^8@f0^u7h31xi*lnD97~75#Vu^5hkr6j!1N8Il2uii=|R
z-qrzkY*=A^KapB8x`3<7mg7UPSDHc?QqGL1^!c4<@3-M<)L;4XX~iSO{SH-Coix-3
zLVZ`4``$6UH`lv-TZHsQfYhtHgk)4L9qmDmSRSo~))8g)DUzDwIoe`l2afZ+u|MFX
z$!!IFhwAkjAcc6AKEE}QZtq&$6MbLgI^_Cmv5!Wh`OxF(9mJ?bZ>>74R{a}knFhb2
zqM}2Pd}@I^EuKDY8KXjB6ku9By?y7CR7^n%%2)pU)uORoe=AwH){{VcmHDcOE-5X?
zE9~I^$;Hn{9F1sC?Y)#R)~hn_Cc^>dd8YY_`>W5NQ7n5N-d@@L%amUgKm3V%#J@*-
z>oDVOCz@=T4f6p{k3BuHzxmDszlPlBdla<!Ho?cmFD{pAihr}`ID2t3Gc!G@cd^f0
zqB*aP_%~QVWaT_8l-1U)Xx@C{Qgo3-aVUZpL6it@Pr39aS3r>*j{p*h-6b9QS(2u%
zu3iyOE$?!P*5(L{bF+l4p$WL;wtL%~v*No3hKvXWs3-=(igI%9{k97;_l)RF`@O{$
zWq?QhQ<^2Dp~`>e3w(Dr1I77HFXcGp(+tRsom->G%C^fB4^kbH#v1&}QmVg&qWrX;
z;p6w)LS}ggE6f4{TD&k<QV1DGVhV~BX|qWTsDWPnAEjr}wb=9R`n%N(;kPP!GE6Zs
z)t4lrll<NGoNPz##KgqQS&{uP){=6TZyw%|$HC`l@NJC2y6|_Q1I0`JvM!gM@4lW4
z%h{qaA#h2ZST24jO+k5B`5X%&D>r?u1%qX^SVb*;aF5o$FO<C5=fs!(3u*`th>kb-
z0DB>tUK)B&HYbkjU3ZgGj%=o7B>p~y=pB7X=a2ArJ8-fi<q8rP$B9pAE%hTloOGG^
zT6h_H7^dCm%U<3T=fD0%@%i)g=`FUlP_zs(Bb!-V*WaEF{9S;K9CET_*_`^%Uk{cE
AcmMzZ

literal 0
HcmV?d00001

diff --git a/notebooks/real_data_figure/drift_maps_and_probes/IBL_probe.png b/notebooks/real_data_figure/drift_maps_and_probes/IBL_probe.png
new file mode 100644
index 0000000000000000000000000000000000000000..9ea22a5a97618db30842dc2664e7a55e353692bf
GIT binary patch
literal 2162
zcmai#e>B_Y9>>F}*0^nTn^x(m(Ym@(v6<;jOV*Z>s+eg>uox+=wj_v15V7UdrX^h&
zwHE1B#x0dbtCLEEt7)cHSu4S$#LtApuZmFGBzv5am7RO{$M-zn^L#%)UZ3~-Io~Ji
z3#9i(6H5~a1hNs~<M}06eIXEo^TzAJTb*RnRj|MaUMNB!E}D=Wkq`y(iy+|7;Rxqq
zB0nHTB_zh+;#{0Qc6Qq5_`zuc0iOtiLSyw2PPl|q(B{yjc2MOVybn4N0@)m~^cYma
zD`Fs!4SfjDgU3ll6B22hm3V8$i~Rac1&3_w3Y<#=PSbkKnrw573sHx{Y+r0TU<I4P
z?{*4+wx<@E_RLy$`<;2-T{2*3V^;1gC0!EG8l_SB=Rz6xJK91fkCe#Cl|MYWFa12v
z!qj=|*N#o73>E}y@X)ZvT=QSt6xFI1GmqoRw8^?GXq;9jhU*4;Uk+B`rmy)@`WSxp
z9o>)T2UU4OQwo=Q2PB?nM*O=4)?2S@3D+&aihijsW3@NFhK<Cx3PTIJ%vM@tl<6$D
zs(Dj7E7jPYVD0OWicm#(lSs}*?q$h3PBF>Z=u0Y$;H>c*s_O;T(|(lx8oQuC>w0eH
zGGeYa1E+8a4?tjD)ju)!Fm0CcuT*FF1~skZHgRqG`^=^-4Idh28(Em*aXuLPu^1y)
z_4a4_5Y$qFokPFtx}^~3t^K*?_*N6rMuR0KvW@Cb8imywVh%5h<AiQi?smU*{b!HE
zDf$q%b<eH-44K?$FrS(6=e&?LOm+_B;<5^Jk!VBQ|H1}1x#n+S8`rz7<E)JS+$y&V
zkrHyykCHjNGTGl_8vqJDlW))r4B_uk`5)mA`pMQ{rygDsPcKTJ86Ib>7X?V!dURRJ
zZzOst*I>7r0P*zLKt(6Cfqvro%;BWmlAtdF88&)Tfr@}aAA&Ui39GSJ8<o#NKI<?!
zqi`vEYqC`#tFeKKZyWW{Fsyb(%Ko_(DS`R_2YZc|T$^PtPx<Jvf%!l@y(oWszU-KJ
zvv<F}y7-SVz1nMz;A-r@81+{#-yHWnI~;^FA4$FU);eD7jINsQ+C~?rKcyegB^$k3
zcp-eDZGLs0*NCs~Xz8$kZ@-cfs63tUuPjLyPB1wcH!$_#4rP0D@#rxa3*~r}%T98k
ze3~Pq!w*XX_jSr-(M4)u<SgD(+SS8;IcB?<su?A~MqmBL%XE{nx4R7<X{(7I_3A{F
z$S=D;E~SbD(u>q2i=CUXudlP=4M#3{$ET1iCx}*5C|x{3#Oj`C3Xwm*<SXu>97|os
z9u(8|mpAf6&~n<)!DH?A<(S*Z8{gg7r)2PD(QGN-m(n#iN8B;DX(%BLt?*JhH(cfC
z)Ne_3n?K43Sd5ns-^eS37hg#qIAI015KNQmMcd1<v9Ymyu4F#)H9<q|_6NT&Y9jo8
z?@08Fue*<Rult2gRnSTFJ<+wwp5e52X+zD=<c{W5;xqhy+QIC0Z(rW!n_+b5(CuUk
ziA3VdsGI#>)~UVK{nNyQw0k9Rp3Gk!Kl2okd#8Y2$6{@185#)jSf9h75A{u?vg+#c
zhdH}jczvN<QiF!bDQY|xANDS~z)MMXpil0!+wbyw8G-ak_T%rAs?@I0wD)P7*FQaD
zZat~VJINv@Cl7IzQ~cptt#n*|hdka!90Z-q<o_&gW~8TCc6h7dE_5MPV?@AWca5<T
z*d$o-=#+YBu;Sv60^49Hvo!hEHtsJetwLmD@0hiES89RyGbWFO-_nvdvhys)o?K0A
z_)5Y_2lmsH8&20CkJq|YpT)p|<Nk2%-CC7H)(HWpaXhTJ^4u=&BQgcJsw=#5wd%e6
znyFw{rnUP{zI`(ML)T#Yk8<iRkTtc5Nu@KJGmpD7!-=4p*5pLHGO$gYm7g|Td6P`J
zz=pAMQ777zflt65rEssB;~@?!Mxm>JR4r~3hVQ;s^`3*K@Z?RhX1vw$ckm^q`hC@+
zC^0E=#JcILXbd9?efMFET;aKw5#+QV^ta^qx7-Cy3s6xq^~D8SV$Cz5IIG8FgTGu2
z4$e`T!ZVijE2r@h8y29{-QELZG81Z$BFieUrvQ7ebiE4|WQrJ+m_t$BnBjadt9#Ic
zaG_Fl;L^Atff3An6!PE#DG38l|AgMrFyg_)<kw}X1!PSm>w7m0hm{M4`iy0p&gfby
zT`g`{h{f<gKh|ZPL@S6o->JZL-gkNXo?W_dUK-J^MO2uwZI`Zp2*L~LS##*?Z~p+q
Ce*Pc;

literal 0
HcmV?d00001

diff --git a/notebooks/real_data_figure/drift_maps_and_probes/ucl_probe.png b/notebooks/real_data_figure/drift_maps_and_probes/ucl_probe.png
new file mode 100644
index 0000000000000000000000000000000000000000..6358233e8d2326c53522e81c833b778352e69fc8
GIT binary patch
literal 2757
zcmc&$XH-+!8V#b9&_$Y}kdOgU3=xqg$PhzQh(v-0Lko}yNRxyr0!s)*j37>=SwU%n
zQLun?=>`i$5QI1yK@kKYM4G@m869T+z8`PBA9vkz&)R37{q6nTvrdWw)>;a>847_w
zq-<?4N5C5cfe6vXL<FN(PD<cqLbD{$9D^vda8d{vVo#z4`v=kdeLdB~$RSkUpuj!I
z-6*7yj=B$x7ECqK)eTrfAcI0q=r$5VUjP>IU>j#D1R_HcJVHh0g}x97bhj<W0v~bZ
z>s59jw_PDo#`4R-BY$k~$dFqfnWiJsf&6_`W%T{%28x1us?J&Gk7ngrvv8lzR>hzv
z-n&mVEk6#pSnBAs>09jER&BPsqp5kSDJ{6xjfJj5%`6KgiqIjO);Wm%4ukw0$ntmn
z7*jL1on87qV!pa;y)YTR8ng5+W+6U$E>jKP4_%ROpNn0o<LBtkDt0f3J~x=*=}xWY
zvL4nI8<tEqsIAWSTim4^$<MH2SM0GPpB|ex`}d4x_PUJcE^?S#jhWZY=Q&G62~wue
z_VqVp<Is>%*;23)LKSOJU582<(q7XeNg)K`hlND;#5GEKpsF2z;h{pxosexX!2*q+
z3{XJjA>A02|3x}DUhGd1xdxB0!nLAMSwg0QUE-|&DYHTKEZ^$&RsmEB9@jX5y(v)g
z7Y|A}A_39~gQWdE14J6HA7_;86F|3MAn^lN(SO_3;34u&OepJsK+l?!RI$D-PT>Nm
zVKPLdVjuk{GvH~I{8J9p2myKod;Pn701cEClB9$~*LdPp{^Ai7FRa`-x#`!EUt$Di
z0iFMUJQ@NLvKFKVtXRt^>%cuu(G%ez^>vjN>rq+5|BhAQt?_zb#XlJ(dDP-UMFA89
z_`k`zGE)j3LyqI1^Kgj%aVK6yQCBacvykWZu$Jy$&cIu~emu0Jg$hgh@#@Z_$LVM4
zGCx#(CI&JcBQw56y%kJ_KDvBzZzs#nK{TVlmo>;ick456h%a6yl4;vIq9^B7^|qyv
zH`^^;CjNH8Q~Yeqs<Tyq;kD-Yaee#Y_8rdryM`OfiC=G-Ue-KR<Y=lC@@94%1y4kO
zUx*1tV=Y&uH9WU_Gs_bDef`FUNf=2w!WM2}R*IE(g=asf)Q&R}FgQfTy;YgVjh;2A
z62rR6)VBLBNTnV!zUa<zgW^wbWn~tcJg}v0h55K7Ih?HCEi4uJGJ_S8zxO=dn{DT<
zu2h>(@$MyIHYEV=e2Vgt9hXfgaI9d9bKfB(4`f>}BX8+fxv@4L_Tq-(S598Cg@58X
zdlhqfZ$|<~Ab+X8z<9qJxqe^^D<A&xp0Q2n>}g8v(PLa`jXZ>RmCIt<H|D_JPKh1p
z4@TeXP8&!S08<$KI61w|jGC~VasH0!=9;GD?VO^6CyK8ejxn)!!k4@9-w8)Da?+Ov
z-vy*_d+yJjQN|o@r%X+?IxWAYFh!!=v-SCXzIKyM>VAOt^5=;O(pgs4g@TQiCgwzY
z8`<@GGQ;6H>=Aee0!KW|*9)y8JS#T#MMhqlAv_DLbtE2kN>0N%ZaXyaX|x}uTLg`K
zV`}5tn{$X`bsvb4u<hwxpx{8R2w}6N&QDa8t3%Li24!R7&B3UW>-)vlrL>v!Zwjb%
zL1x(TFG$Nt$PANDY1$9_=q_mvYT@N1(9#Qy&V}074@I_vLlUo4?izGkdq@>_WOdab
z8U6+B;@0C6t_*OaysGoMqx+5p5HJ!d3|j1lon|4scUKF)OXQ8|jJbS*&h0&#P|AJS
z%N%Y#t|}2p7`x_RsUtoRG~wQZZ#!7qluXzN$VXfThuydC)#%V025n=ToQmD!XH`!4
zeEm+TST?p$uWs<GenoHrhlJ62!Oq%0&X+43MGT-<$Kjs1c1qR7k_QzM?xsp#fEfh&
z@|SZV(I&^8tC4;?w^}vlrAIT_w;%9md7}ZM%{j8UT#fg}54r&rG}}(Iuhm+yD{pST
zZCB~!T#MK8^u>ZeZv_^<KbqkLd;9w6at!M^kmQ`q7~XS9sL6n!;OP~OC=zx?W|o|+
zYb{Af*!J#Ybi|SF@XN|{!bB9f?UMo0<KfH?DV%v5*VpCP{v9W4lR{zDud!u4ZzfT2
z=>%1SsH?5ChI>+df;PtZp5E&-;o4N=;hH=;nLQRq+EOYZ-sBMA(AC1AX}v@uE;U#7
z;K{$`-d+GQuZVSJv8i1ojH-B7)=<qKO3djrqLEz2u034&lds;?CPp?sH^z_<G0`e)
zZ#~>1W{QhR9-B|@1H4zvU?=p72nvBF=aUb>G_{WQWZt4RW(y97nX2pQhe9ugQs#Ed
zZ8(9p6Hy4@Bw1V$PvKV19jHAnoEQ+Sc`K;!)BxDHe%GGd6IVqQ*k0Pzc0oV5idgz=
zWNaRzkZJz3I6djdO<vfChfhi_#14T^eS0!KeCZ-`-}z)OSZYfiwXjPS<sa_|vyg3v
z`>iU+e-{X=->H>a(HDPi1Q$E_UhxH;rh=z|L&H>TV5IsxSA)4Cs2ue51OAzu8(nIv
zm2@hMx8U-WT7A>vD~QENsXNSJI(k1lgc2*GFg;`S$f@RuaZ!=kQ;wo8dT<EBy1?W~
zYkaV6536N5PM=b|zx=uPbc9^@=uI~@&?J}A2mHf$BRhG)fn-n+$BnwsbtA6~Fh|H|
zXf0I7pKG}f{^w<Jr{?_hG+>UD@}tJw^yhxO$s2hrK#a4=DVVBzVq{9+*ftq9=+3oo
zr<pK|YnC0RbgnpW0fPH8wRJU9gIcYz&pW<=a%!L*JuY8n#<7m3OW@$VMprcuHQpa<
zRzy&GI1w_Z6skLKhUO^adm{|1#og#N{2W$S#|yF3#2TndTAQADT})u-DDbYPzT7+;
z;cmhhdzabWP@Uc%i|hKRtL*pQwSC;8rNE~&4dei(<Imj<@~@|d!&u=j^A353g8t*s
zF}PW>!+ka*m>8i1pCJ|&KRY<ny8^CR$_eSiQCjsYbH|wVE2L-YplL-T7k?mfnYnF`
z%{^t&<z!9L)~G2ITv#pxkz$HeE0a-v1gn((IMo!hIBUC2P3=tV`S)Tf(D?ahlHzOE
ega3LkuV#t5NbuP8Ed<;sA-0xS%w4p{+5Z3^<+)t|

literal 0
HcmV?d00001

diff --git a/notebooks/real_data_figure/figure.typ b/notebooks/real_data_figure/figure.typ
new file mode 100644
index 0000000..472dc7c
--- /dev/null
+++ b/notebooks/real_data_figure/figure.typ
@@ -0,0 +1,106 @@
+#set page("us-letter")
+#set text(size: 9pt, font: "New Computer Modern")
+
+#table(
+  columns: (1fr, 0.087fr, 0.55fr),
+  // Text fits content, SVGs split remaining space
+  align: center,
+  gutter: 0pt,
+  stroke: none,
+
+  box(width: 100%)[
+    #table(
+      columns: (1.3fr, 1fr, 1fr, 1fr, 1fr, 1fr, 1.2fr, 1fr, 1fr),
+      inset: 4pt,
+      stroke: 0.5pt + gray,
+      fill: (x, y) => {
+        if x == 2 or x == 3 {
+          green.lighten(85%)
+        } else if x == 4 or x == 5 or x == 6 {
+          orange.lighten(85%)
+        } else if x == 7 or x == 8 {
+          red.lighten(85%)
+        } else if x == 0 or x == 1 {
+          gray.lighten(80%)
+        }
+      },
+      [Sorter], [Tot units], [BC good], [UR sua], [BC mua], [UR mua], [SLAy merges], [BC noise], [UR noise],
+    )
+  ],
+  align()[],
+  align()[],
+
+  // --- Row 1 ---
+
+  box(width: 100%)[
+    *Lebedeva et. al., Chronic, NP2.0, 38 mins*
+    #table(
+      columns: (1.3fr, 1fr, 1fr, 1fr, 1fr, 1fr, 1.2fr, 1fr, 1fr),
+      inset: 4pt,
+      stroke: 0.5pt + gray,
+      fill: (x, y) => {
+        if x == 2 or x == 3 {
+          green.lighten(85%)
+        } else if x == 4 or x == 5 or x == 6 {
+          orange.lighten(85%)
+        } else if x == 7 or x == 8 {
+          red.lighten(85%)
+        }
+      },
+      [KS4], [808], [246], [288], [489], [312], [5], [73], [208],
+      [Lupin], [683], [216], [259], [460], [338], [2], [7], [86],
+      [TCD2], [617], [119], [246], [491], [369], [13], [7], [2],
+      [SC2], [270], [102], [133], [166], [137], [1], [2], [0],
+    )
+  ],
+  align()[#image("drift_maps_and_probes/ucl_probe.png", height: 12.6%)],
+  image("drift_maps_and_probes/ucl_drift.svg"),
+  // --- Row 2 ---
+  box(width: 100%)[
+    *IBL, Acute, NP1.0, 67 mins*
+    #table(
+      columns: (1.3fr, 1fr, 1fr, 1fr, 1fr, 1fr, 1.2fr, 1fr, 1fr),
+      inset: 4pt,
+      stroke: 0.5pt + gray,
+      fill: (x, y) => {
+        if x == 2 or x == 3 {
+          green.lighten(85%)
+        } else if x == 4 or x == 5 or x == 6 {
+          orange.lighten(85%)
+        } else if x == 7 or x == 8 {
+          red.lighten(85%)
+        }
+      },
+      [KS4], [1050], [210], [459], [673], [354], [24], [167], [237],
+      [Lupin], [864], [209], [379], [601], [278], [6], [54], [207],
+      [TDC2], [954], [124], [417], [778], [504], [33], [52], [33],
+      [SC2], [458], [97], [170], [333], [271], [0], [28], [17],
+    )
+  ],
+  align()[#image("drift_maps_and_probes/IBL_probe.png", height: 11.9%)],
+  image("drift_maps_and_probes/IBL_drift.svg"),
+  // --- Row 3 ---
+  box(width: 100%)[
+    *Duszkiewicz et. al., Chronic, CN 156H5, 211 mins*
+    #table(
+      columns: (1.3fr, 1fr, 1fr, 1fr, 1fr, 1fr, 1.2fr, 1fr, 1fr),
+      inset: 4pt,
+      stroke: 0.5pt + gray,
+      fill: (x, y) => {
+        if x == 2 or x == 3 {
+          green.lighten(85%)
+        } else if x == 4 or x == 5 or x == 6 {
+          orange.lighten(85%)
+        } else if x == 7 or x == 8 {
+          red.lighten(85%)
+        }
+      },
+      [KS4], [174], [41], [71], [98], [68], [2], [35], [35],
+      [Lupin], [162], [56], [96], [103], [63], [4], [3], [3],
+      [TDC2], [191], [11], [60], [180], [128], [9], [0], [3],
+      [SC2], [58], [4], [9], [53], [47], [0], [1], [2],
+    )
+  ],
+  align()[#image("drift_maps_and_probes/Duszkiewicz_probe.png", height: 12.3%)],
+  image("drift_maps_and_probes/Duszkiewicz_drift.svg"),
+)
diff --git a/notebooks/real_data_figure/generate_curation_data.py b/notebooks/real_data_figure/generate_curation_data.py
index e301bd6..d0998fa 100644
--- a/notebooks/real_data_figure/generate_curation_data.py
+++ b/notebooks/real_data_figure/generate_curation_data.py
@@ -64,4 +64,10 @@
 
     results = pd.DataFrame(all_protocols_data, columns=["sorter", "total units", "bombcell good", "unitrefine sua", "bombcell mua", "unitrefine mua", "# slay merges", "bombcell noise", "unitrefine noise"], index=None)
 
-    results.to_csv(real_data_figure_folder / f"curation_results/{dataset_name}_results.csv", index=False)
+    results.to_csv(real_data_figure_folder / f"curation_results/{dataset_name}_results.csv", index=False)  
+
+    # render for typst rendering
+    for row in results.iterrows():
+        for cell in row[1]:
+            print(f"[{cell}], ", end="")
+        print("")
diff --git a/notebooks/real_data_figure/make_drift_plots.py b/notebooks/real_data_figure/make_drift_plots.py
new file mode 100644
index 0000000..0986fb5
--- /dev/null
+++ b/notebooks/real_data_figure/make_drift_plots.py
@@ -0,0 +1,111 @@
+import matplotlib.pyplot as plt
+import matplotlib.cm as cm
+import matplotlib.colors as mcolors
+import spikeinterface.full as si
+
+from pathlib import Path
+
+repo_folder = Path("/home/nolanlab/fromgit/sorting_components_benchmark_paper/")
+real_data_figure_folder = repo_folder / "notebooks/real_data_figure"
+analyzers_folder = real_data_figure_folder / "analyzers"
+drift_maps_folder = real_data_figure_folder / "drift_maps_and_probes"
+
+bombcell_labels = ['good', 'mua', 'noise', 'non_soma_good', 'non_soma_mua']
+
+protocol = 'no_motion_correction'
+
+FONT_SIZE = 18
+
+plotting_settings = {
+    'ucl': {
+        'protocol': 'no_motion_correction',
+        'vmin': -600,
+        'scatter_decimate': 20,
+        'cbar_ticks': [-600,-500,-400,-300,-200,-100,0],
+        'cbar_ticklabels': ['600','','400','','200','','0'],
+        'yticklabels': ['','2.9', '', '3.1', '', '3.3', '', '3.5'],
+        'xticks_s': [0,600,1200,1800],
+    },
+    'IBL': {
+        'protocol': 'motion_correction',
+        'vmin': -457.829994,
+        'scatter_decimate': 20,
+        'cbar_ticks': [-600,-500,-400,-300,-200,-100,0], 
+        'cbar_ticklabels': ['600','','400','','200','','0'],
+        'yticklabels': ['', '0', '', '1', '', '2', '', '3', ''],
+        'xticks_s': [0,900,1800,2700,3600],
+    },
+    'Duszkiewicz': {
+        'protocol': 'no_motion_correction',
+        'vmin': -380,
+        'scatter_decimate': 5,
+        'cbar_ticks': [-400,-300,-200,-100,0],
+        'cbar_ticklabels': [400,300,200,100,0],
+        'yticklabels': ['', '', '0.2', '', '0.4', '', '0.6', '', '0.8'],
+        'xticks_s': [0,3000,6000,9000,12000],
+    }
+}
+
+for dataset_name, dataset_settings in plotting_settings.items():
+   
+    protocol = dataset_settings['protocol']
+   
+    analyzer_path =  analyzers_folder / f'{dataset_name}_kilosort4_{protocol}_analyzer'
+    if analyzer_path.is_dir():
+        analyzer = si.load_sorting_analyzer(analyzer_path)
+    else:
+        analyzer = si.load_sorting_analyzer(str(analyzer_path) + '.zarr')
+
+    print(analyzer.get_total_duration())
+        
+    bombcell_unit_labels = si.bombcell_label_units(analyzer, split_non_somatic_good_mua=True)['bombcell_label'].values
+    good_units = analyzer.unit_ids[bombcell_unit_labels == 'good']
+    analyzer_good = analyzer.select_units(good_units)
+    
+    cmap_name = 'inferno'
+    
+    fig = si.plot_drift_raster_map(
+        sorting_analyzer=analyzer_good, 
+        cmap=cmap_name, 
+        alpha=0.10,
+        scatter_decimate=dataset_settings['scatter_decimate'], 
+        figsize=(8,4.5)
+    )
+    # 1. Define your parameters
+    vmin = dataset_settings['vmin']
+    vmax = 0
+    
+    # 2. Create the Normalization and Mappable objects
+    norm = mcolors.Normalize(vmin=vmin, vmax=vmax)
+    sm = cm.ScalarMappable(cmap=plt.get_cmap(cmap_name), norm=norm)
+    sm.set_array([]) # Required for the colorbar to initialize correctly
+    
+    # 3. Access your existing figure/axes and add the colorbar
+    # Assuming 'fig' is your figure object
+    ax = fig.figure.get_axes()[0] 
+    cbar = fig.figure.colorbar(sm, ax=ax)
+    
+    # 2. Find the scatter plot and rasterize it
+    # In Matplotlib, scatter plots are usually 'PathCollection' objects
+    for artist in ax.get_children():
+        if isinstance(artist, plt.matplotlib.collections.PathCollection):
+            artist.set_rasterized(True)
+    
+    # 4. (Optional) Add a label
+    cbar_ticks = dataset_settings['cbar_ticks']
+    #cbar_ticks = [-80,-60,-40,-20,0]
+    cbar.set_label('Abs peak amplitude [uV]', fontsize=FONT_SIZE)
+    cbar.set_ticklabels(dataset_settings['cbar_ticklabels']) 
+    cbar.ax.tick_params(labelsize=FONT_SIZE) # Font size for colorbar ticks
+    
+    ax.set_ylabel('Depth [mm]', fontsize=FONT_SIZE)
+    ax.set_yticklabels(dataset_settings['yticklabels'], fontsize=FONT_SIZE)
+    
+    xticks_s = dataset_settings['xticks_s']
+    ax.set_xticks(xticks_s)
+    ax.set_xticklabels([int(xtick/60) for xtick in xticks_s], fontsize=FONT_SIZE)
+    ax.set_xlabel('Time [min]', fontsize=FONT_SIZE)
+    
+    ax.set_title(label=None)
+    
+    fig.figure.savefig(drift_maps_folder / f'{dataset_name}_drift.svg',  bbox_inches='tight')
diff --git a/notebooks/real_data_figure/pyproject.toml b/notebooks/real_data_figure/pyproject.toml
index 91238fc..e53fa3a 100644
--- a/notebooks/real_data_figure/pyproject.toml
+++ b/notebooks/real_data_figure/pyproject.toml
@@ -6,6 +6,7 @@ readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
     "ipykernel>=7.2.0",
+    "matplotlib>=3.10.9",
     "pandas>=3.0.2",
     "scikit-learn==1.6",
     "skops>=0.14.0",

From 8a7ba6b6b30c2beabb5a3453ae6c330405a7495d Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Mon, 11 May 2026 14:15:47 +0100
Subject: [PATCH 4/7] Bug in ibl read

---
 notebooks/real_data_figure/pyproject.toml        | 5 +++++
 notebooks/real_data_figure/sort_all_real_data.py | 7 +++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/notebooks/real_data_figure/pyproject.toml b/notebooks/real_data_figure/pyproject.toml
index e53fa3a..e80d7e0 100644
--- a/notebooks/real_data_figure/pyproject.toml
+++ b/notebooks/real_data_figure/pyproject.toml
@@ -5,10 +5,15 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
+    "h5py>=3.16.0",
+    "hdbscan>=0.8.42",
     "ipykernel>=7.2.0",
+    "kilosort==4.1.2",
     "matplotlib>=3.10.9",
+    "numba>=0.65.1",
     "pandas>=3.0.2",
     "scikit-learn==1.6",
+    "scipy>=1.17.1",
     "skops>=0.14.0",
     "spikeinterface>=0.104.3",
 ]
diff --git a/notebooks/real_data_figure/sort_all_real_data.py b/notebooks/real_data_figure/sort_all_real_data.py
index 08122b0..ff66c3a 100644
--- a/notebooks/real_data_figure/sort_all_real_data.py
+++ b/notebooks/real_data_figure/sort_all_real_data.py
@@ -48,7 +48,6 @@
     ucl_spykingcircus2_no_motion_correction_analyzer
     ucl_tridesclous2_no_motion_correction_analyzer 
 
-
 """
 
 import spikeinterface.full as si
@@ -56,7 +55,7 @@
 from sort_one_piece_of_data import do_sorting
 
 # edit this to point to the `sorting_components_benchmark_paper` on your own computer
-repo_folder = Path("/Users/christopherhalcrow/Work/fromgit/sorting_components_benchmark_paper/")
+repo_folder = Path("/home/nolanlab/fromgit/sorting_components_benchmark_paper/")
 
 raw_data_folder = repo_folder / "notebooks/real_data_figure/raw_data"
 analyzers_folder = repo_folder / "notebooks/real_data_figure/raw_data"
@@ -73,7 +72,7 @@
     analyzers_folder / f"IBL_{protocol}_analyzer" for protocol in np1_protocols
 ]
 
-recording = si.read_nwb_recording(np1_data_folder)
+recording = si.read_nwb_recording(np1_data_folder, electrical_series_path = 'acquisition/ElectricalSeriesProbe00AP').frame_slice(start_frame=0, end_frame=30000*60)
 for protocol_name, analyzer_folder in zip(np1_protocols, np1_analyzer_folders):
     do_sorting(recording, analyzer_folder, protocol_name)
 
@@ -89,7 +88,7 @@
     analyzers_folder / f"ucl_{protocol}_analyzer" for protocol in np1_protocols
 ]
 
-recording = si.read_cbin_ibl(np2_data_folder)
+recording = si.read_cbin_ibl(np2_data_folder).frame_slice(start_frame=0, end_frame=30000*60)
 for protocol_name, analyzer_folder in zip(np2_protocols, np2_analyzer_folders):
     do_sorting(recording, analyzer_folder, protocol_name)
 

From 056173bce672c69ce0e1b7ab3912cb29b29f25e3 Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Tue, 19 May 2026 09:46:58 +0100
Subject: [PATCH 5/7] Update filepaths

---
 notebooks/real_data_figure/sort_all_real_data.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/notebooks/real_data_figure/sort_all_real_data.py b/notebooks/real_data_figure/sort_all_real_data.py
index ff66c3a..d34799e 100644
--- a/notebooks/real_data_figure/sort_all_real_data.py
+++ b/notebooks/real_data_figure/sort_all_real_data.py
@@ -10,8 +10,8 @@
 from https://rdr.ucl.ac.uk/articles/dataset/Chronic_recordings_from_Neuropixels_2_0_probes_in_mice/24411841
 This needs to be untarred into a folder.
 
-The Dus data is the file
-    A3720-191126.nwb
+The Duszkiewicz data is the file
+    sub-A3702/sub-A3702_ses-191126_behavior+ecephys.nwb
 from the DANDI dataset 000939 - https://dandiarchive.org/dandiset/000939
 
 For this code to run, put all files in the "raw_data" folder and change the "repo_folder" below, to point at
@@ -23,9 +23,9 @@
            sort_all_real_data.py
            raw_data/
                sub-UCLA034_ses-3537d970-f515-4786-853f-23de525e110f_desc-raw_ecephys.nwb
-               A3702-191126.nwb
+               sub-A3702_ses-191126_behavior+ecephys.nwb
                AL032_2020-01-07/
-                   ???
+                   ...
             analyzers/
             curation_results/
 
@@ -103,7 +103,7 @@
 cn_analyzer_folders = [
     analyzers_folder / f"Duszkiewicz_{protocol}_analyzer" for protocol in cn_protocols
 ]
-cn_data_folder = raw_data_folder / 'A3702-191126.nwb'
+cn_data_folder = raw_data_folder / 'sub-A3702_ses-191126_behavior+ecephys.nwb'
 recording = si.read_nwb_recording(cn_data_folder)
 for protocol_name, analyzer_folder in zip(cn_protocols, cn_analyzer_folders):
     do_sorting(recording, analyzer_folder, protocol_name)

From 42e1e98fb475d37900d7927908000dbd4f24bed4 Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Tue, 19 May 2026 09:50:13 +0100
Subject: [PATCH 6/7] remove debug stuff

---
 notebooks/real_data_figure/sort_all_real_data.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/notebooks/real_data_figure/sort_all_real_data.py b/notebooks/real_data_figure/sort_all_real_data.py
index d34799e..6a57860 100644
--- a/notebooks/real_data_figure/sort_all_real_data.py
+++ b/notebooks/real_data_figure/sort_all_real_data.py
@@ -49,7 +49,6 @@
     ucl_tridesclous2_no_motion_correction_analyzer 
 
 """
-
 import spikeinterface.full as si
 from pathlib import Path
 from sort_one_piece_of_data import do_sorting
@@ -72,7 +71,7 @@
     analyzers_folder / f"IBL_{protocol}_analyzer" for protocol in np1_protocols
 ]
 
-recording = si.read_nwb_recording(np1_data_folder, electrical_series_path = 'acquisition/ElectricalSeriesProbe00AP').frame_slice(start_frame=0, end_frame=30000*60)
+recording = si.read_nwb_recording(np1_data_folder, electrical_series_path = 'acquisition/ElectricalSeriesProbe00AP')
 for protocol_name, analyzer_folder in zip(np1_protocols, np1_analyzer_folders):
     do_sorting(recording, analyzer_folder, protocol_name)
 
@@ -88,12 +87,11 @@
     analyzers_folder / f"ucl_{protocol}_analyzer" for protocol in np1_protocols
 ]
 
-recording = si.read_cbin_ibl(np2_data_folder).frame_slice(start_frame=0, end_frame=30000*60)
+recording = si.read_cbin_ibl(np2_data_folder)
 for protocol_name, analyzer_folder in zip(np2_protocols, np2_analyzer_folders):
     do_sorting(recording, analyzer_folder, protocol_name)
 
-# CN data from Adrian
-
+# CN data from  Duszkiewicz
 cn_protocols = [
         'kilosort4_no_motion_correction',
         'lupin_no_motion_correction',
@@ -104,6 +102,6 @@
     analyzers_folder / f"Duszkiewicz_{protocol}_analyzer" for protocol in cn_protocols
 ]
 cn_data_folder = raw_data_folder / 'sub-A3702_ses-191126_behavior+ecephys.nwb'
-recording = si.read_nwb_recording(cn_data_folder)
+recording = si.read_nwb_recording(cn_data_folder, electrical_series_path = 'acquisition/ElectricalSeries')
 for protocol_name, analyzer_folder in zip(cn_protocols, cn_analyzer_folders):
     do_sorting(recording, analyzer_folder, protocol_name)

From 894e011c3e40782513a0b53195e8c57ff28cb4cb Mon Sep 17 00:00:00 2001
From: chrishalcrow <chrishalcrow@gmail.com>
Date: Tue, 19 May 2026 09:55:21 +0100
Subject: [PATCH 7/7] add reproduce steps

---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 60fc076..74a4612 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,11 @@ Figure 7 can be reproduce using:
 
 Figure 8 can be reproduce using:
 
-  * TODO Chris
-  * TODO Chris
+  * `cd notebooks/real_data_figure` : move to the folder where the environment is defined
+  * `uv run sort_all_real_data.py` : Sort three datasets using four sorters
+  * `uv run generate_curation_data.py` : Use UnitRefine, Bombcell and SLAy to curate the sorting output
+  * `uv run make_drift_plots.py` : Make the drift and probe plots
+  * `figure.tpy` : Generate the plot
 
 
 Supplementary figure can be reproduce using::