From 5360fbd13b246d1e9bad016ec97701d92bf1e529 Mon Sep 17 00:00:00 2001
From: Paulo Sousa <paulo.sousa@redis.com>
Date: Fri, 8 May 2026 11:04:40 +0100
Subject: [PATCH] compare: always apply arch filter when an architecture is set

The four `TS.QUERYINDEX` filter-construction sites in
compare.py guarded the `arch={baseline,comparison}_architecture`
filter with `!= ARCH_X86`, so the filter was only ever appended for
non-x86_64 runs. Since both flags default to `ARCH` (= x86_64), the
common case x86_64-vs-x86_64 ran with no `arch=` filter at all.

In RTS, every modern series carries an explicit `arch={x86_64,aarch64}`
label and multiple architectures often coexist under the same
`test_name` (e.g. `Ops/sec/Totals` for x86_64 and
`Ops/sec/Totalsarch=aarch64` for aarch64). Without an arch filter,
`TS.QUERYINDEX` returns both, the downstream
"taking the first one" warning fires, and the picks aren't guaranteed
to match between baseline and comparison sides. Concrete symptom
observed on a recent run: a confident-looking
`-29.5%` regression where the baseline median was the x86_64 series
(18 651) and the comparison median was the aarch64 series (13 154).
Apples-to-apples x86_64-vs-x86_64 was actually -3.6% (within the
joan-side 10.5% CV).

Replace the four `!= ARCH_X86` guards with `is not None` so the
filter is always applied when an architecture is set. After the fix
the filter list now contains `arch=x86_64` (verifiable in the
`Baseline/Comparison filters: [...]` log line) and
`Still multiple time-series after filtering` warnings drop to zero
on the same workload.

The fix touches all four sites: the throughput query in
`from_rts_to_regression_table`, and the three latency-analysis
queries in `check_client_side_latency`,
`perform_variance_and_p99_analysis`, and
`check_latency_for_unstable_throughput`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 redisbench_admin/compare/compare.py | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/redisbench_admin/compare/compare.py b/redisbench_admin/compare/compare.py
index 8216d97..0c85d44 100644
--- a/redisbench_admin/compare/compare.py
+++ b/redisbench_admin/compare/compare.py
@@ -1386,7 +1386,13 @@ def from_rts_to_regression_table(
             filters_baseline.append("github_repo={}".format(tf_github_repo))
         if running_platform is not None:
             filters_baseline.append("running_platform={}".format(running_platform))
-        if baseline_architecture != ARCH_X86:
+        if baseline_architecture is not None:
+            # Always apply the arch filter, including for x86_64. Multiple
+            # architectures coexist under the same test_name in RTS (each
+            # series carries an explicit arch={x86_64,aarch64} label), so
+            # without this filter TS.QUERYINDEX returns both and the
+            # downstream "take the first one" fallback can silently
+            # cross-compare arches and produce fake regressions/improvements.
             filters_baseline.append(f"arch={baseline_architecture}")
         filters_comparison = [
             "{}={}".format(by_str_comparison, comparison_str),
@@ -1401,7 +1407,8 @@ def from_rts_to_regression_table(
             filters_comparison.append("github_repo={}".format(tf_github_repo))
         if running_platform is not None:
             filters_comparison.append("running_platform={}".format(running_platform))
-        if comparison_architecture != ARCH_X86:
+        if comparison_architecture is not None:
+            # See note above on filters_baseline.
             filters_comparison.append(f"arch={comparison_architecture}")
         baseline_timeseries = rts.ts().queryindex(filters_baseline)
         comparison_timeseries = rts.ts().queryindex(filters_comparison)
@@ -1933,9 +1940,10 @@ def check_client_side_latency(
             if running_platform is not None:
                 filters_baseline.append(f"running_platform={running_platform}")
                 filters_comparison.append(f"running_platform={running_platform}")
-            if baseline_architecture != ARCH_X86:
+            # Always apply arch filter (see note at the throughput-query site).
+            if baseline_architecture is not None:
                 filters_baseline.append(f"arch={baseline_architecture}")
-            if comparison_architecture != ARCH_X86:
+            if comparison_architecture is not None:
                 filters_comparison.append(f"arch={comparison_architecture}")
 
             # Query for client-side latency time-series
@@ -2207,9 +2215,10 @@ def perform_variance_and_p99_analysis(
         if running_platform is not None:
             filters_baseline.append(f"running_platform={running_platform}")
             filters_comparison.append(f"running_platform={running_platform}")
-        if baseline_architecture != ARCH_X86:
+        # Always apply arch filter (see note at the throughput-query site).
+        if baseline_architecture is not None:
             filters_baseline.append(f"arch={baseline_architecture}")
-        if comparison_architecture != ARCH_X86:
+        if comparison_architecture is not None:
             filters_comparison.append(f"arch={comparison_architecture}")
 
         # Query for p99 latency time-series
@@ -2495,9 +2504,10 @@ def check_latency_for_unstable_throughput(
         if running_platform is not None:
             filters_baseline.append(f"running_platform={running_platform}")
             filters_comparison.append(f"running_platform={running_platform}")
-        if baseline_architecture != ARCH_X86:
+        # Always apply arch filter (see note at the throughput-query site).
+        if baseline_architecture is not None:
             filters_baseline.append(f"arch={baseline_architecture}")
-        if comparison_architecture != ARCH_X86:
+        if comparison_architecture is not None:
             filters_comparison.append(f"arch={comparison_architecture}")
 
         # Query for p50 latency time-series