From 5360fbd13b246d1e9bad016ec97701d92bf1e529 Mon Sep 17 00:00:00 2001 From: Paulo Sousa Date: Fri, 8 May 2026 11:04:40 +0100 Subject: [PATCH] compare: always apply arch filter when an architecture is set The four `TS.QUERYINDEX` filter-construction sites in compare.py guarded the `arch={baseline,comparison}_architecture` filter with `!= ARCH_X86`, so the filter was only ever appended for non-x86_64 runs. Since both flags default to `ARCH` (= x86_64), the common case x86_64-vs-x86_64 ran with no `arch=` filter at all. In RTS, every modern series carries an explicit `arch={x86_64,aarch64}` label and multiple architectures often coexist under the same `test_name` (e.g. `Ops/sec/Totals` for x86_64 and `Ops/sec/Totalsarch=aarch64` for aarch64). Without an arch filter, `TS.QUERYINDEX` returns both, the downstream "taking the first one" warning fires, and the picks aren't guaranteed to match between baseline and comparison sides. Concrete symptom observed on a recent run: a confident-looking `-29.5%` regression where the baseline median was the x86_64 series (18 651) and the comparison median was the aarch64 series (13 154). Apples-to-apples x86_64-vs-x86_64 was actually -3.6% (within the joan-side 10.5% CV). Replace the four `!= ARCH_X86` guards with `is not None` so the filter is always applied when an architecture is set. After the fix the filter list now contains `arch=x86_64` (verifiable in the `Baseline/Comparison filters: [...]` log line) and `Still multiple time-series after filtering` warnings drop to zero on the same workload. The fix touches all four sites: the throughput query in `from_rts_to_regression_table`, and the three latency-analysis queries in `check_client_side_latency`, `perform_variance_and_p99_analysis`, and `check_latency_for_unstable_throughput`. Co-Authored-By: Claude Opus 4.7 (1M context) --- redisbench_admin/compare/compare.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/redisbench_admin/compare/compare.py b/redisbench_admin/compare/compare.py index 8216d97..0c85d44 100644 --- a/redisbench_admin/compare/compare.py +++ b/redisbench_admin/compare/compare.py @@ -1386,7 +1386,13 @@ def from_rts_to_regression_table( filters_baseline.append("github_repo={}".format(tf_github_repo)) if running_platform is not None: filters_baseline.append("running_platform={}".format(running_platform)) - if baseline_architecture != ARCH_X86: + if baseline_architecture is not None: + # Always apply the arch filter, including for x86_64. Multiple + # architectures coexist under the same test_name in RTS (each + # series carries an explicit arch={x86_64,aarch64} label), so + # without this filter TS.QUERYINDEX returns both and the + # downstream "take the first one" fallback can silently + # cross-compare arches and produce fake regressions/improvements. filters_baseline.append(f"arch={baseline_architecture}") filters_comparison = [ "{}={}".format(by_str_comparison, comparison_str), @@ -1401,7 +1407,8 @@ def from_rts_to_regression_table( filters_comparison.append("github_repo={}".format(tf_github_repo)) if running_platform is not None: filters_comparison.append("running_platform={}".format(running_platform)) - if comparison_architecture != ARCH_X86: + if comparison_architecture is not None: + # See note above on filters_baseline. filters_comparison.append(f"arch={comparison_architecture}") baseline_timeseries = rts.ts().queryindex(filters_baseline) comparison_timeseries = rts.ts().queryindex(filters_comparison) @@ -1933,9 +1940,10 @@ def check_client_side_latency( if running_platform is not None: filters_baseline.append(f"running_platform={running_platform}") filters_comparison.append(f"running_platform={running_platform}") - if baseline_architecture != ARCH_X86: + # Always apply arch filter (see note at the throughput-query site). + if baseline_architecture is not None: filters_baseline.append(f"arch={baseline_architecture}") - if comparison_architecture != ARCH_X86: + if comparison_architecture is not None: filters_comparison.append(f"arch={comparison_architecture}") # Query for client-side latency time-series @@ -2207,9 +2215,10 @@ def perform_variance_and_p99_analysis( if running_platform is not None: filters_baseline.append(f"running_platform={running_platform}") filters_comparison.append(f"running_platform={running_platform}") - if baseline_architecture != ARCH_X86: + # Always apply arch filter (see note at the throughput-query site). + if baseline_architecture is not None: filters_baseline.append(f"arch={baseline_architecture}") - if comparison_architecture != ARCH_X86: + if comparison_architecture is not None: filters_comparison.append(f"arch={comparison_architecture}") # Query for p99 latency time-series @@ -2495,9 +2504,10 @@ def check_latency_for_unstable_throughput( if running_platform is not None: filters_baseline.append(f"running_platform={running_platform}") filters_comparison.append(f"running_platform={running_platform}") - if baseline_architecture != ARCH_X86: + # Always apply arch filter (see note at the throughput-query site). + if baseline_architecture is not None: filters_baseline.append(f"arch={baseline_architecture}") - if comparison_architecture != ARCH_X86: + if comparison_architecture is not None: filters_comparison.append(f"arch={comparison_architecture}") # Query for p50 latency time-series