diff --git a/core/optimizer/filters.py b/core/optimizer/filters.py index 7b5ad81..60d3e21 100644 --- a/core/optimizer/filters.py +++ b/core/optimizer/filters.py @@ -134,6 +134,54 @@ def compute_mad(results: List[FitAttempt]) -> Optional[np.ndarray]: return mad +def compute_mean_params(parameter_samples: dict[str, np.ndarray]) -> dict[str, float]: + """Mean of each parameter across the accepted-fit pool. + + Classical (non-robust) counterpart to :func:`compute_median_params`. It + operates on the pooled ``FitResult.parameter_samples`` mapping (one flat + array of accepted-fit values per parameter key) rather than raw + ``FitAttempt`` objects, because that pool is what downstream consumers + (the fitted-parameters table, distribution plots) actually carry. + + Parameters + ---------- + parameter_samples : dict[str, np.ndarray] + One flat array of accepted-fit values per parameter key. + + Returns + ------- + dict[str, float] + Parameter key -> arithmetic mean of its accepted-fit values. + """ + return {key: float(np.mean(values)) for key, values in parameter_samples.items()} + + +def compute_std_params(parameter_samples: dict[str, np.ndarray]) -> dict[str, float]: + """Standard deviation of each parameter across the accepted-fit pool. + + Classical (non-robust) counterpart to :func:`compute_mad`. Uses the sample + standard deviation (``ddof=1``) — the conventional unbiased estimator that + pairs with the arithmetic mean. A single accepted fit has no estimable + spread and returns ``0.0`` for that key, mirroring :func:`compute_mad` + returning 0 for a single sample. + + Parameters + ---------- + parameter_samples : dict[str, np.ndarray] + One flat array of accepted-fit values per parameter key. + + Returns + ------- + dict[str, float] + Parameter key -> sample standard deviation of its accepted-fit values. + """ + std_params: dict[str, float] = {} + for key, values in parameter_samples.items(): + arr = np.asarray(values, dtype=float) + std_params[key] = float(np.std(arr, ddof=1)) if arr.size > 1 else 0.0 + return std_params + + def aggregate_fits( results: List[FitAttempt], rmse_threshold_factor: float = 1.5, diff --git a/gui/plotting/fit_summary_widget.py b/gui/plotting/fit_summary_widget.py index 6f18ce4..00383ee 100644 --- a/gui/plotting/fit_summary_widget.py +++ b/gui/plotting/fit_summary_widget.py @@ -14,50 +14,61 @@ ) from core.assays.registry import ASSAY_REGISTRY, AssayType +from core.optimizer.filters import compute_mean_params, compute_std_params from core.pipeline.fit_pipeline import FitResult from core.units import Q_, Quantity from gui.plotting.labels import fmt_param, fmt_unit_html from gui.widgets.info_button import InfoGroupBox -_UNCERTAINTY_HELP_HTML = """ -

Uncertainty — what the ± value means

- -

The ± value next to each fitted parameter tells you how -much that parameter varies across all the acceptable fits the -fitter found. A small ± means the fitter consistently lands on -the same value; a large one means there is real spread.

+_HEADERS = ['Parameter', 'Median \u00b1 MAD', 'Mean \u00b1 STDEV', 'Units'] -

Technically, the reported value is the median of the -acceptable fits, and the ± is their median absolute -deviation — a robust measure of spread that is not thrown off -by a few outliers.

+_UNCERTAINTY_HELP_HTML = """ +

The two ± summaries — what they mean

+ +

Each parameter is summarised across all accepted fits the fitter +found (no re-fitting). Two pairs are shown so you can judge both the typical +value and how much it varies:

+ + + +

MAD is not the same as STDEV

+

They measure spread in different ways. For a clean, bell-shaped (Gaussian) +pool they agree after a fixed scaling: STDEV ≈ 1.48 × MAD. +When the two pairs roughly satisfy that, the accepted fits are well behaved and +either summary is fine.

+

When they disagree — typically STDEV much larger than +1.48 × MAD, or the mean sitting far from the median — a +few outlying or skewed fits are inflating the classical numbers. Trust the +robust Median ± MAD in that case, and inspect the spread in the +distribution (box-and-whisker) plot.

Average mode

-

Your replicas are averaged into one curve, which is then fit many -times from different starting points. The ± reflects how -precisely the fitter can pin down the parameter on that averaged curve. -This is a measure of numerical precision — it does not -capture replica-to-replica variation.

+

Your replicas are averaged into one curve, which is then fit many times from +different starting points. The spread reflects how precisely the fitter can pin +down the parameter on that averaged curve — a measure of numerical +precision. It does not capture replica-to-replica variation.

Per-replica mode

-

Each replica is fit independently, and every acceptable fit from -every replica is collected together. The ± now reflects the -full spread — including differences between replicas. This is a -measure of experimental reproducibility, which is typically the -number you would report in a publication.

- -

The Median Fit curve

-

The curve drawn on the plot uses the median parameter values from all -acceptable fits. It is labelled Median Fit because it -represents the middle of the distribution, not the single “best” -attempt.

+

Each replica is fit independently and every acceptable fit from every +replica is pooled together. The spread now includes differences between +replicas — a measure of experimental reproducibility, typically +the number you would report in a publication.

Which mode am I using?

-

The column header tells you: ± Uncertainty -(optimiser) in average mode, or ± Uncertainty -(pool N=…, … replicas) in per-replica -mode. Switch between modes in Fit Configuration → “Fit per -replica”.

+

The caption under the table says so — average mode or +per-replica mode — along with how many accepted fits (N) the +statistics were computed from. Switch modes in Fit Configuration → +“Fit per replica”.

""" @@ -82,13 +93,34 @@ """ +def _magnitude(value) -> float: + """Return the float magnitude of a Quantity or plain number.""" + return float(value.magnitude) if isinstance(value, Quantity) else float(value) + + +def _fmt_mag(magnitude: float, unit_str: str) -> str: + """Format a magnitude for display, stripping any unit (shown separately).""" + if unit_str: + return f'{Q_(magnitude, unit_str):.3g~H}'.rsplit(' ', 1)[0] + return f'{magnitude:.3g}' + + +def _make_cell(html: str) -> QLabel: + """Build a centred rich-text cell label for the parameters table.""" + lbl = QLabel(html) + lbl.setAlignment(Qt.AlignmentFlag.AlignCenter) + lbl.setTextFormat(Qt.TextFormat.RichText) + return lbl + + class FitSummaryWidget(QWidget): """Read-only display of ``FitResult`` statistics. Layout ------ - ``QGroupBox("Fitted Parameters")`` with columns: - Parameter | Value | +/- Uncertainty | Units + Parameter | Median +/- MAD | Mean +/- STDEV | Units, plus a caption + noting the fit mode and accepted-fit count. - ``QGroupBox("Fit Quality")`` with RMSE, R-squared, Fits passing. """ @@ -97,18 +129,21 @@ def __init__(self, parent=None): self._params_group = InfoGroupBox( 'Fitted Parameters', - 'Uncertainty: what the \u00b1 value means', + 'Median \u00b1 MAD vs Mean \u00b1 STDEV', _UNCERTAINTY_HELP_HTML, ) - self._table = QTableWidget(0, 4) - self._uncertainty_header_default = '\u00b1 Uncertainty (optimiser)' - self._table.setHorizontalHeaderLabels(['Parameter', 'Value', self._uncertainty_header_default, 'Units']) + self._table = QTableWidget(0, len(_HEADERS)) + self._table.setHorizontalHeaderLabels(_HEADERS) header = self._table.horizontalHeader() header.setStretchLastSection(False) header.setSectionResizeMode(QHeaderView.ResizeMode.Interactive) self._table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers) + self._caption = QLabel() + self._caption.setWordWrap(True) + self._caption.setStyleSheet('color: gray;') params_layout = QVBoxLayout(self._params_group) params_layout.addWidget(self._table) + params_layout.addWidget(self._caption) self._quality_group = InfoGroupBox('Fit Quality', 'Fit Quality', _FIT_QUALITY_HELP_HTML) quality_layout = QFormLayout(self._quality_group) @@ -127,6 +162,9 @@ def update_result(self, result: FitResult) -> None: """Populate the widget from a ``FitResult``. Resolves units from ``ASSAY_REGISTRY`` using ``result.assay_type``. + Shows two stat pairs per parameter: the robust median/MAD (reusing the + stored values) and the classical mean/STDEV (computed from + ``result.parameter_samples``; no re-fit). Parameters ---------- @@ -137,54 +175,47 @@ def update_result(self, result: FitResult) -> None: if assay_type is not None: units = ASSAY_REGISTRY[assay_type].units + # The fit mode and pool size used to live in the uncertainty-column + # header; with two stat pairs that context moves to a caption. if result.uncertainty_source == 'replicate': # JSON-compat magic value pool_size = result.metadata.get('pool_size', result.n_passing) n_reps = result.metadata.get('n_replicas_fit', '?') - header = f'\u00b1 Uncertainty (pool N={pool_size}, {n_reps} replicas)' + self._caption.setText( + f'Statistics across N = {pool_size} accepted fits pooled from {n_reps} replicas (per-replica mode).' + ) else: - header = self._uncertainty_header_default - self._table.setHorizontalHeaderLabels(['Parameter', 'Value', header, 'Units']) + self._caption.setText(f'Statistics across N = {result.n_passing} accepted fits (average mode).') params = result.parameters uncertainties = result.uncertainties + samples = result.parameter_samples + means = compute_mean_params(samples) if samples else None + stds = compute_std_params(samples) if samples else None self._table.setRowCount(len(params)) - for row, (key, value) in enumerate(params.items()): - unc = uncertainties.get(key, float('nan')) + for row, key in enumerate(params): unit_str = units.get(key, '') - - lbl_name = QLabel(fmt_param(key)) - lbl_name.setAlignment(Qt.AlignmentFlag.AlignCenter) - self._table.setCellWidget(row, 0, lbl_name) - - val_mag = float(value.magnitude) if isinstance(value, Quantity) else float(value) - unc_mag = float(unc.magnitude) if isinstance(unc, Quantity) else float(unc) - - # Use Pint HTML formatter for proper superscript notation - if unit_str: - val_html = f'{Q_(val_mag, unit_str):.3g~H}' - unc_html = f'{Q_(unc_mag, unit_str):.3g~H}' + median_mag = _magnitude(params[key]) + mad_mag = _magnitude(uncertainties.get(key, float('nan'))) + + self._table.setCellWidget(row, 0, _make_cell(fmt_param(key))) + self._table.setCellWidget( + row, + 1, + _make_cell(f'{_fmt_mag(median_mag, unit_str)} \u00b1 {_fmt_mag(mad_mag, unit_str)}'), + ) + + if means is not None and key in means: + mean_cell = _make_cell(f'{_fmt_mag(means[key], unit_str)} \u00b1 {_fmt_mag(stds[key], unit_str)}') else: - val_html = f'{val_mag:.3g}' - unc_html = f'{unc_mag:.3g}' - # Strip unit from the HTML — units shown in separate column - val_display = val_html.rsplit(' ', 1)[0] if unit_str else val_html - unc_display = unc_html.rsplit(' ', 1)[0] if unit_str else unc_html - - lbl_val = QLabel(val_display) - lbl_val.setAlignment(Qt.AlignmentFlag.AlignCenter) - lbl_val.setTextFormat(Qt.TextFormat.RichText) - self._table.setCellWidget(row, 1, lbl_val) - - lbl_unc = QLabel(unc_display) - lbl_unc.setAlignment(Qt.AlignmentFlag.AlignCenter) - lbl_unc.setTextFormat(Qt.TextFormat.RichText) - self._table.setCellWidget(row, 2, lbl_unc) - - unit_html = fmt_unit_html(unit_str) - lbl_unit = QLabel(unit_html) - lbl_unit.setAlignment(Qt.AlignmentFlag.AlignCenter) - self._table.setCellWidget(row, 3, lbl_unit) + mean_cell = _make_cell('\u2014') + mean_cell.setToolTip( + 'Mean \u00b1 STDEV needs the accepted-fit pool, which is ' + 'unavailable for this result (e.g. imported from an older file).' + ) + self._table.setCellWidget(row, 2, mean_cell) + + self._table.setCellWidget(row, 3, _make_cell(fmt_unit_html(unit_str))) rmse_html = f'{Q_(result.rmse, "au"):.3g~H}' self._rmse_label.setTextFormat(Qt.TextFormat.RichText) @@ -218,7 +249,8 @@ def _autosize_columns(self) -> None: def clear(self) -> None: """Reset all fields to their empty state.""" self._table.setRowCount(0) - self._table.setHorizontalHeaderLabels(['Parameter', 'Value', self._uncertainty_header_default, 'Units']) + self._table.setHorizontalHeaderLabels(_HEADERS) + self._caption.clear() self._rmse_label.setText('\u2014') self._r2_label.setText('\u2014') self._passing_label.setText('\u2014') diff --git a/tests/unit/gui/test_fit_summary_stats.py b/tests/unit/gui/test_fit_summary_stats.py new file mode 100644 index 0000000..60616c6 --- /dev/null +++ b/tests/unit/gui/test_fit_summary_stats.py @@ -0,0 +1,107 @@ +"""FitSummaryWidget: the Median ± MAD and Mean ± STDEV stat pairs. + +Math correctness of the aggregation lives in ``test_filters_mean_std.py``; +here we verify the widget wiring — that the pool feeds the classical pair, the +robust pair comes from the stored median/MAD, and the mode caption is right. +""" + +import numpy as np +import pytest + +pytest.importorskip('PyQt6') + + +def _make_result(**overrides): + from core.pipeline.fit_pipeline import FitResult + + x = np.linspace(0, 1e-4, 10) + base = dict( + parameters={'Ka_guest': 2.0, 'I0': 100.0}, + uncertainties={'Ka_guest': 0.5, 'I0': 5.0}, + rmse=0.01, + r_squared=0.99, + n_passing=3, + n_total=10, + x_fit=x, + y_fit=x, + assay_type='GDA', + model_name='equilibrium_4param', + parameter_samples={ + 'Ka_guest': np.array([1.0, 2.0, 3.0]), + 'I0': np.array([95.0, 100.0, 105.0]), + }, + ) + base.update(overrides) + return FitResult(**base) + + +def test_table_has_two_stat_pair_columns(qapp): + from gui.plotting.fit_summary_widget import FitSummaryWidget + + widget = FitSummaryWidget() + widget.update_result(_make_result()) + + headers = [widget._table.horizontalHeaderItem(c).text() for c in range(widget._table.columnCount())] + assert headers == ['Parameter', 'Median ± MAD', 'Mean ± STDEV', 'Units'] + assert widget._table.rowCount() == 2 + + +def test_stat_pairs_reflect_pool_and_stored_values(qapp): + """UNKNOWN_TYPE has no registry units, so values format as plain %.3g and we + can assert the exact cell text. Median/MAD come from the stored Quantities; + Mean/STDEV are computed from the pool [1,2,3] -> mean 2, sample STDEV 1.""" + from gui.plotting.fit_summary_widget import FitSummaryWidget + + widget = FitSummaryWidget() + widget.update_result( + _make_result( + assay_type='UNKNOWN_TYPE', + parameters={'p': 2.0}, + uncertainties={'p': 0.5}, + parameter_samples={'p': np.array([1.0, 2.0, 3.0])}, + ) + ) + + assert widget._table.cellWidget(0, 1).text() == '2 ± 0.5' # Median ± MAD + assert widget._table.cellWidget(0, 2).text() == '2 ± 1' # Mean ± STDEV + + +def test_mean_stdev_unavailable_without_pool(qapp): + from gui.plotting.fit_summary_widget import FitSummaryWidget + + widget = FitSummaryWidget() + widget.update_result(_make_result(parameter_samples=None)) + + # No pool -> classical pair cannot be computed; robust pair still shows. + assert widget._table.cellWidget(0, 2).text() == '—' + assert '±' in widget._table.cellWidget(0, 1).text() + + +def test_caption_distinguishes_modes(qapp): + from gui.plotting.fit_summary_widget import FitSummaryWidget + + widget = FitSummaryWidget() + + widget.update_result(_make_result()) + assert 'average mode' in widget._caption.text() + + widget.update_result( + _make_result( + uncertainty_source='replicate', + metadata={'pool_size': 9, 'n_replicas_fit': 3}, + ) + ) + caption = widget._caption.text() + assert 'per-replica mode' in caption + assert '9' in caption and '3' in caption + + +def test_clear_resets_caption(qapp): + from gui.plotting.fit_summary_widget import FitSummaryWidget + + widget = FitSummaryWidget() + widget.update_result(_make_result()) + widget.clear() + + assert widget._table.rowCount() == 0 + assert widget._caption.text() == '' diff --git a/tests/unit/test_filters_mean_std.py b/tests/unit/test_filters_mean_std.py new file mode 100644 index 0000000..ce27719 --- /dev/null +++ b/tests/unit/test_filters_mean_std.py @@ -0,0 +1,58 @@ +"""Mean / STDEV aggregation over the accepted-fit pool. + +Companion to the median/MAD tests in ``test_optimizer.py``. These pin the +classical (non-robust) summaries shown in the Fitted Parameters table, +computed directly from ``FitResult.parameter_samples`` — no re-fitting. + +Expected values are hand-derived (not recomputed with the code's own formula). +""" + +import numpy as np +import pytest + +from core.optimizer.filters import compute_mean_params, compute_std_params + + +def test_mean_is_arithmetic_average(): + samples = {'Ka': np.array([1.0, 2.0, 3.0]), 'I0': np.array([10.0, 20.0])} + means = compute_mean_params(samples) + assert means['Ka'] == pytest.approx(2.0) # (1+2+3)/3 + assert means['I0'] == pytest.approx(15.0) # (10+20)/2 + + +def test_std_is_sample_stdev(): + # [1,2,3]: mean 2, squared devs 1+0+1=2, /(3-1)=1, sqrt=1. + stds = compute_std_params({'Ka': np.array([1.0, 2.0, 3.0])}) + assert stds['Ka'] == pytest.approx(1.0) + + +def test_std_single_sample_is_zero(): + # One accepted fit -> spread cannot be estimated; report 0.0 (as MAD does). + stds = compute_std_params({'Ka': np.array([7.5])}) + assert stds['Ka'] == 0.0 + + +def test_mean_and_std_pulled_by_outlier(): + """A skewed pool: mean and STDEV are dragged toward the outlier, whereas the + robust median (2.5) and MAD (1.0, asserted in test_optimizer) stay put — the + reason both pairs are shown side by side.""" + pool = np.array([1.0, 2.0, 3.0, 100.0]) + mean = compute_mean_params({'Ka': pool})['Ka'] + std = compute_std_params({'Ka': pool})['Ka'] + + # Hand-computed: mean = 106/4 = 26.5. + assert mean == pytest.approx(26.5) + # Hand-computed sample STDEV: devs from 26.5 are -25.5,-24.5,-23.5,73.5; + # squares sum to 7205; /(4-1)=2401.667; sqrt = 49.0068. + assert std == pytest.approx(49.0068, abs=1e-3) + # Classical pair sits far from the robust pair (median 2.5, MAD 1.0). + assert mean > 25 + assert std > 40 + + +def test_multiple_keys_independent(): + means = compute_mean_params({'a': np.array([0.0, 4.0]), 'b': np.array([5.0, 5.0])}) + stds = compute_std_params({'a': np.array([0.0, 4.0]), 'b': np.array([5.0, 5.0])}) + assert means == {'a': pytest.approx(2.0), 'b': pytest.approx(5.0)} + assert stds['a'] == pytest.approx(np.sqrt(8.0)) # devs ±2 -> 8/(2-1)=8 + assert stds['b'] == 0.0 # identical values -> no spread