Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 94 additions & 16 deletions superset/mcp_service/chart/tool/get_chart_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,21 @@ def _resolve_metrics_and_groupby(
return _resolve_metrics(form_data, viz_type), _resolve_groupby(form_data)


def _extract_x_axis_col(form_data: dict[str, Any]) -> str | None:
"""Return the x_axis column name from form_data, or None if not set.

``x_axis`` may be stored as a plain column-name string or as an adhoc
column dict (``{"column_name": "...", ...}``).
"""
x_axis = form_data.get("x_axis")
if isinstance(x_axis, str) and x_axis:
return x_axis
if isinstance(x_axis, dict):
col_name = x_axis.get("column_name")
return col_name if isinstance(col_name, str) and col_name else None
return None


def _resolve_engine(
datasource_id: Any,
datasource_type: str,
Expand All @@ -162,6 +177,58 @@ def _resolve_engine(
return "base"


def _build_single_query_dict(
form_data: dict[str, Any],
columns: list[Any],
metrics: list[Any],
) -> dict[str, Any]:
"""Build one query entry for QueryContextFactory from form_data fields."""
qd: dict[str, Any] = {"columns": columns, "metrics": metrics}
if time_range := form_data.get("time_range"):
qd["time_range"] = time_range
if filters := form_data.get("filters"):
qd["filters"] = filters
if (row_limit := form_data.get("row_limit")) is not None:
qd["row_limit"] = row_limit
Comment on lines +191 to +192
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Query dict construction always reads row_limit and never supports row_limit_b, so the secondary mixed_timeseries query cannot honor its own row limit. This causes incorrect SQL LIMIT behavior for query B. Use the secondary-specific limit when building the secondary query. [logic error]

Severity Level: Major ⚠️
- ❌ Secondary series row limits ignored in get_chart_sql output.
- ⚠️ SQL preview shows incorrect LIMIT for mixed_timeseries B.
Steps of Reproduction ✅
1. Configure a `mixed_timeseries` chart whose Explore form_data contains both `row_limit`
and a different `row_limit_b` (per-series secondary row limit). Example persisted
configuration appears in `superset/examples/featured_charts/charts/Mixed.yaml:80-81` where
both `row_limit` and `row_limit_b` are defined.

2. Use the MCP `get_chart_sql` tool for this chart in a context that goes through the
form_data fallback, e.g. request SQL for an unsaved Explore state via `form_data_key` so
`_handle_unsaved_chart_sql` (`superset/mcp_service/chart/tool/get_chart_sql.py:24-56`)
loads the cached form_data and calls `_sql_from_form_data(form_data, chart=None)` at lines
39-56.

3. `_sql_from_form_data` invokes `_build_query_context_from_form_data` (lines 218-305),
which builds the primary query via `_build_single_query_dict(form_data, groupby, metrics)`
and the secondary mixed_timeseries query via `_build_mixed_timeseries_secondary` (lines
196-215). Inside `_build_mixed_timeseries_secondary`, the secondary query dict is
constructed with `qd = _build_single_query_dict(form_data, groupby_b, metrics_b)` at line
212.

4. `_build_single_query_dict` (lines 180-193) always reads `row_limit` from
`form_data.get("row_limit")` and, if set, assigns `qd["row_limit"] = row_limit` (lines
191-192). There is no handling of `row_limit_b`, so both the primary query and secondary
query use the same `row_limit` value. When `ChartDataCommand` executes this `QueryContext`
(`_sql_from_form_data` lines 44-49), the generated SQL for series B uses the primary row
limit instead of the configured `row_limit_b`, making the SQL preview inconsistent with
the chart configuration.

Fix in Cursor | Fix in VSCode Claude

(Use Cmd/Ctrl + Click for best experience)

Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** superset/mcp_service/chart/tool/get_chart_sql.py
**Line:** 191:192
**Comment:**
	*Logic Error: Query dict construction always reads `row_limit` and never supports `row_limit_b`, so the secondary `mixed_timeseries` query cannot honor its own row limit. This causes incorrect SQL LIMIT behavior for query B. Use the secondary-specific limit when building the secondary query.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix
👍 | 👎

return qd


def _build_mixed_timeseries_secondary(
form_data: dict[str, Any],
x_axis_col: str | None,
engine: str = "base",
) -> dict[str, Any]:
"""Build the secondary query dict for the ``mixed_timeseries`` viz type.

``mixed_timeseries`` has two independent series layers; the secondary
layer uses ``metrics_b`` / ``groupby_b`` instead of the primary fields.
Secondary-specific overrides (``time_range_b``, ``row_limit_b``,
``adhoc_filters_b``) replace the corresponding primary values so the
generated SQL accurately reflects each series' independent configuration.
"""
metrics_b: list[Any] = list(form_data.get("metrics_b") or [])
raw_b = form_data.get("groupby_b") or []
groupby_b: list[Any] = [raw_b] if isinstance(raw_b, str) else list(raw_b)
if x_axis_col and x_axis_col not in groupby_b:
groupby_b = [x_axis_col] + groupby_b
qd = _build_single_query_dict(form_data, groupby_b, metrics_b)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: The secondary query for mixed_timeseries is built from the original form_data without translating _b controls to base keys, so secondary-only filters (for example adhoc_filters_b after preprocessing) are never applied. This makes query B SQL use primary filter state and can return incorrect results. Build a secondary form-data view (equivalent to retainFormDataSuffix(..., "_b")) before constructing query B. [logic error]

Severity Level: Major ⚠️
- ❌ get_chart_sql misrepresents mixed_timeseries secondary series filters.
- ⚠️ Unsaved mixed_timeseries SQL omits secondary adhoc_filters_b.
Steps of Reproduction ✅
1. Create or edit a `mixed_timeseries` chart in Explore so its form_data (as stored in
`Slice.params` / cached via `GetFormDataCommand`) contains both primary and secondary
series, including `metrics`, `groupby`, `metrics_b`, `groupby_b`, and a secondary-only
adhoc filter in `adhoc_filters_b`. This matches how mixed charts are migrated/built in
`superset/migrations/shared/migrate_viz/processors.py:17-28` where `adhoc_filters_b` is
populated from `adhoc_filters`.

2. From an MCP client, request SQL for this chart using only `form_data_key` (unsaved
state), triggering `_handle_unsaved_chart_sql` at
`superset/mcp_service/chart/tool/get_chart_sql.py:24-56`, which loads the cached form_data
and calls `_sql_from_form_data(form_data, chart=None)` at lines 39-56.

3. `_sql_from_form_data` calls `_build_query_context_from_form_data` (lines 218-305).
Inside that function, adhoc filters are preprocessed by `merge_extra_filters` and
`split_adhoc_filters_into_base_filters` at lines 250-263, but
`split_adhoc_filters_into_base_filters` (`superset/utils/core.py:1387-1400`) only reads
the `adhoc_filters` key, never `adhoc_filters_b`, so secondary-only adhoc filters remain
unprocessed on `form_data` and are not converted into `filters`/`where`/`having`.

4. Still in `_build_query_context_from_form_data`, the secondary query dict for
mixed_timeseries is built by `_build_mixed_timeseries_secondary` (lines 196-215), which
calls `qd = _build_single_query_dict(form_data, groupby_b, metrics_b)` at line 212.
`_build_single_query_dict` (lines 180-193) pulls `filters` and `time_range` from the
shared `form_data`, so the secondary query's `qd["filters"]` is based only on primary
`adhoc_filters` that were split into base filters, while `adhoc_filters_b` is never
applied. When `ChartDataCommand` later runs on this `QueryContext` (`_sql_from_form_data`
lines 44-49), the rendered SQL for query B ignores its intended secondary-only filters.

Fix in Cursor | Fix in VSCode Claude

(Use Cmd/Ctrl + Click for best experience)

Prompt for AI Agent 🤖
This is a comment left during a code review.

**Path:** superset/mcp_service/chart/tool/get_chart_sql.py
**Line:** 212:212
**Comment:**
	*Logic Error: The secondary query for `mixed_timeseries` is built from the original `form_data` without translating `_b` controls to base keys, so secondary-only filters (for example `adhoc_filters_b` after preprocessing) are never applied. This makes query B SQL use primary filter state and can return incorrect results. Build a secondary form-data view (equivalent to `retainFormDataSuffix(..., "_b")`) before constructing query B.

Validate the correctness of the flagged issue. If correct, How can I resolve this? If you propose a fix, implement it and please make it concise.
Once fix is implemented, also check other comments on the same PR, and ask user if the user wants to fix the rest of the comments as well. if said yes, then fetch all the comments validate the correctness and implement a minimal fix
👍 | 👎

if time_range_b := form_data.get("time_range_b"):
qd["time_range"] = time_range_b
if (row_limit_b := form_data.get("row_limit_b")) is not None:
qd["row_limit"] = row_limit_b
# Process adhoc_filters_b into concrete filter clauses for the secondary
# query, mirroring how split_adhoc_filters_into_base_filters handles the
# primary adhoc_filters in _build_query_context_from_form_data.
if adhoc_filters_b := form_data.get("adhoc_filters_b"):
from superset.utils.core import split_adhoc_filters_into_base_filters

secondary_fd: dict[str, Any] = {"adhoc_filters": adhoc_filters_b}
split_adhoc_filters_into_base_filters(secondary_fd, engine)
if secondary_filters := secondary_fd.get("filters"):
qd["filters"] = secondary_filters
return qd


def _build_query_context_from_form_data(
form_data: dict[str, Any],
chart: "Slice | None" = None,
Expand Down Expand Up @@ -209,22 +276,33 @@ def _build_query_context_from_form_data(
merge_extra_filters(form_data)
split_adhoc_filters_into_base_filters(form_data, engine)

# Build query dict with temporal and filter fields.
# QueryObjectFactory.create() accepts time_range as a top-level kwarg
# and converts it to from_dttm/to_dttm for the QueryObject.
query_dict: dict[str, Any] = {
"columns": groupby,
"metrics": metrics,
}

if time_range := form_data.get("time_range"):
query_dict["time_range"] = time_range

if filters := form_data.get("filters"):
query_dict["filters"] = filters
viz_type: str = (
form_data.get("viz_type")
or (getattr(chart, "viz_type", "") if chart else "")
or ""
)
is_timeseries = (
viz_type.startswith("echarts_timeseries") or viz_type == "mixed_timeseries"
)

if (row_limit := form_data.get("row_limit")) is not None:
query_dict["row_limit"] = row_limit
# For echarts_timeseries_* and mixed_timeseries charts the temporal
# column is stored in x_axis rather than groupby. Prepend it so the
# generated SQL includes the time axis.
x_axis_col: str | None = None
if is_timeseries:
x_axis_col = _extract_x_axis_col(form_data)
if x_axis_col and x_axis_col not in groupby:
groupby = [x_axis_col] + groupby

queries: list[dict[str, Any]] = [
_build_single_query_dict(form_data, groupby, metrics)
]

# mixed_timeseries exposes two independent query layers (primary and
# secondary). Build the second query from metrics_b / groupby_b so
# that get_chart_sql returns SQL for both and neither is silently lost.
if viz_type == "mixed_timeseries":
queries.append(_build_mixed_timeseries_secondary(form_data, x_axis_col, engine))

# Ensure datasource fields satisfy DatasourceDict typing requirements.
# datasource_id must be int | str; datasource_type must be str.
Expand All @@ -238,7 +316,7 @@ def _build_query_context_from_form_data(

return factory.create(
datasource={"id": resolved_id, "type": resolved_type_str},
queries=[query_dict],
queries=queries,
form_data=form_data,
result_type=ChartDataResultType.QUERY,
force=False,
Expand Down
Loading
Loading