From 393906c657e218cb0230ecf5cecc711f5fefe1fc Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Wed, 6 May 2026 22:53:03 +0000 Subject: [PATCH 1/7] fix(mcp): guard against NaN in ASCII chart bar/line renderers When a dataset contains NULL values they become float NaN after numeric conversion. The horizontal and vertical bar chart renderers passed NaN into int(), raising ValueError. Likewise the line chart extractor would include NaN in the value list, corrupting min/max calculations. Three-layer fix: 1. Filter NaN during value extraction in _generate_ascii_bar_chart and _extract_time_series_data so NaN rows are silently skipped. 2. Add _is_nan_value() guard before int() in _generate_horizontal_bar_chart (bar_length) and _generate_vertical_bar_chart (bar_height) as defence-in-depth; NaN bar size is treated as 0 (no bar drawn). Fixes: ValueError: cannot convert float NaN to integer --- superset/mcp_service/chart/ascii_charts.py | 22 +++- .../mcp_service/chart/test_ascii_charts.py | 104 ++++++++++++++++++ 2 files changed, 122 insertions(+), 4 deletions(-) create mode 100644 tests/unit_tests/mcp_service/chart/test_ascii_charts.py diff --git a/superset/mcp_service/chart/ascii_charts.py b/superset/mcp_service/chart/ascii_charts.py index b5aa4d71f491..0acbe8d72330 100644 --- a/superset/mcp_service/chart/ascii_charts.py +++ b/superset/mcp_service/chart/ascii_charts.py @@ -79,7 +79,11 @@ def _generate_ascii_bar_chart(data: list[Any], width: int, height: int) -> str: label_val = None for _key, val in row.items(): - if isinstance(val, (int, float)) and numeric_val is None: + if ( + isinstance(val, (int, float)) + and not _is_nan_value(val) + and numeric_val is None + ): numeric_val = val elif isinstance(val, str) and label_val is None: label_val = val @@ -121,7 +125,10 @@ def _generate_horizontal_bar_chart( # Calculate bar length if max_val > min_val: normalized = (value - min_val) / (max_val - min_val) - bar_length = max(1, int(normalized * max_bar_width)) + if _is_nan_value(normalized): + bar_length = 0 + else: + bar_length = max(1, int(normalized * max_bar_width)) else: bar_length = 1 @@ -164,7 +171,10 @@ def _generate_vertical_bar_chart( # noqa: C901 for col, value in enumerate(values): if max_val > min_val: normalized = (value - min_val) / (max_val - min_val) - bar_height = max(1, int(normalized * chart_height)) + if _is_nan_value(normalized): + bar_height = 0 + else: + bar_height = max(1, int(normalized * chart_height)) else: bar_height = 1 @@ -274,7 +284,11 @@ def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]: label_val = None for key, val in row.items(): - if isinstance(val, (int, float)) and numeric_val is None: + if ( + isinstance(val, (int, float)) + and not _is_nan_value(val) + and numeric_val is None + ): numeric_val = val elif isinstance(val, str) and label_val is None: # Use the key name if it looks like a date/time field diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py new file mode 100644 index 000000000000..8d9635fb3436 --- /dev/null +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Unit tests for ascii_charts.py NaN/null value handling.""" + +from superset.mcp_service.chart.ascii_charts import generate_ascii_chart + + +def test_bar_chart_with_null_values_does_not_raise() -> None: + """Bar chart renderer must not crash when dataset rows contain NaN values.""" + data = [ + {"category": "A", "value": 10.0}, + {"category": "B", "value": float("nan")}, + {"category": "C", "value": 30.0}, + ] + result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_bar_chart_with_all_null_values_returns_fallback() -> None: + """Bar chart with no valid numeric rows should return a fallback message.""" + data = [ + {"category": "A", "value": float("nan")}, + {"category": "B", "value": float("nan")}, + ] + result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_line_chart_with_null_values_does_not_raise() -> None: + """Line chart renderer must not crash when dataset rows contain NaN values.""" + data = [ + {"date": "2024-01", "sales": 100.0}, + {"date": "2024-02", "sales": float("nan")}, + {"date": "2024-03", "sales": 300.0}, + ] + result = generate_ascii_chart(data, "line") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_horizontal_bar_chart_nan_rows_are_skipped() -> None: + """NaN rows must be silently skipped; valid rows render normally.""" + data = [ + {"label": "Alpha", "amount": 50.0}, + {"label": "Beta", "amount": float("nan")}, + {"label": "Gamma", "amount": 150.0}, + ] + result = generate_ascii_chart(data, "bar") + # Valid labels should appear in output; NaN row should not crash + assert "Alpha" in result or "Gamma" in result + + +def test_column_chart_with_null_values_does_not_raise() -> None: + """Column (vertical bar) chart must not crash on NaN values.""" + data = [ + {"x": "Q1", "y": 10.0}, + {"x": "Q2", "y": float("nan")}, + {"x": "Q3", "y": 30.0}, + {"x": "Q4", "y": 40.0}, + ] + result = generate_ascii_chart(data, "column") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_timeseries_bar_with_null_values_does_not_raise() -> None: + """echarts_timeseries_bar chart type must not crash on NaN values.""" + data = [ + {"ts": "2024-01-01", "count": 5.0}, + {"ts": "2024-01-02", "count": float("nan")}, + {"ts": "2024-01-03", "count": 15.0}, + ] + result = generate_ascii_chart(data, "echarts_timeseries_bar") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_chart_with_none_values_does_not_raise() -> None: + """None (SQL NULL) values should be treated identically to NaN.""" + data = [ + {"name": "X", "metric": 100.0}, + {"name": "Y", "metric": None}, + {"name": "Z", "metric": 200.0}, + ] + result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) + assert len(result) > 0 From fb4ccc9215ac80ab9d1226ddb7f8a46ca42d2445 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Wed, 6 May 2026 23:44:44 +0000 Subject: [PATCH 2/7] test(mcp): fix label assertion in NaN bar chart test Vertical bar chart (chosen when avg label length <= 8) truncates labels to 3 characters, so 'Alpha' never appears literally. Use longer labels (> 8 chars average) to force horizontal layout, where the full label is preserved up to 15 characters. --- tests/unit_tests/mcp_service/chart/test_ascii_charts.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py index 8d9635fb3436..29cabb9617c8 100644 --- a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -57,10 +57,12 @@ def test_line_chart_with_null_values_does_not_raise() -> None: def test_horizontal_bar_chart_nan_rows_are_skipped() -> None: """NaN rows must be silently skipped; valid rows render normally.""" + # Use labels longer than 8 chars to force horizontal layout, where full + # label text is preserved (vertical layout truncates to 3 chars). data = [ - {"label": "Alpha", "amount": 50.0}, - {"label": "Beta", "amount": float("nan")}, - {"label": "Gamma", "amount": 150.0}, + {"label": "Alpha Category", "amount": 50.0}, + {"label": "Beta Category", "amount": float("nan")}, + {"label": "Gamma Category", "amount": 150.0}, ] result = generate_ascii_chart(data, "bar") # Valid labels should appear in output; NaN row should not crash From 436e7ae520bde07b475c472f1aec51a06ba02c31 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 7 May 2026 00:02:44 +0000 Subject: [PATCH 3/7] test(mcp): strengthen all-NaN fallback assertion in bar chart test Assert the exact fallback message 'No numeric data found for bar chart' instead of just checking len(result) > 0, which would pass even on unrelated error strings. --- tests/unit_tests/mcp_service/chart/test_ascii_charts.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py index 29cabb9617c8..d050af04103d 100644 --- a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -33,14 +33,13 @@ def test_bar_chart_with_null_values_does_not_raise() -> None: def test_bar_chart_with_all_null_values_returns_fallback() -> None: - """Bar chart with no valid numeric rows should return a fallback message.""" + """Bar chart with no valid numeric rows should return the no-data fallback.""" data = [ {"category": "A", "value": float("nan")}, {"category": "B", "value": float("nan")}, ] result = generate_ascii_chart(data, "bar") - assert isinstance(result, str) - assert len(result) > 0 + assert result == "No numeric data found for bar chart" def test_line_chart_with_null_values_does_not_raise() -> None: From 32192df9321ebd9c733f1cce20b7c82023f8fa33 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 7 May 2026 02:49:07 +0000 Subject: [PATCH 4/7] test(mcp): strengthen NaN bar chart test assertions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - test_horizontal_bar_chart_nan_rows_are_skipped: replace weak `or` assertion with separate `assert "Alpha" in result`, `assert "Gamma" in result`, and `assert "Beta" not in result`, making the test deterministic and verifying the NaN row is actually excluded - test_bar_chart_with_all_null_values_returns_fallback: add `isinstance(result, str)` and `assert "█" not in result` to explicitly verify no bar content is rendered in the fallback path --- tests/unit_tests/mcp_service/chart/test_ascii_charts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py index d050af04103d..823badf8fb71 100644 --- a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -39,7 +39,9 @@ def test_bar_chart_with_all_null_values_returns_fallback() -> None: {"category": "B", "value": float("nan")}, ] result = generate_ascii_chart(data, "bar") + assert isinstance(result, str) assert result == "No numeric data found for bar chart" + assert "█" not in result # No bars should be rendered in the fallback path def test_line_chart_with_null_values_does_not_raise() -> None: @@ -64,8 +66,10 @@ def test_horizontal_bar_chart_nan_rows_are_skipped() -> None: {"label": "Gamma Category", "amount": 150.0}, ] result = generate_ascii_chart(data, "bar") - # Valid labels should appear in output; NaN row should not crash - assert "Alpha" in result or "Gamma" in result + # Both valid labels must appear; the NaN row (Beta) must be absent + assert "Alpha" in result + assert "Gamma" in result + assert "Beta" not in result def test_column_chart_with_null_values_does_not_raise() -> None: From 429e14a6eff5c63d021ed8135c2ee90ba24b49ec Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 7 May 2026 02:50:35 +0000 Subject: [PATCH 5/7] test(mcp): assert horizontal layout in NaN bar chart skipping test Add `assert "Horizontal Bar Chart" in result` to make it explicit that the horizontal renderer is chosen (avg label length 14 > 8 threshold). Horizontal mode preserves full label text; vertical would truncate to 3 chars, invalidating the "Alpha"/"Gamma" presence assertions. --- tests/unit_tests/mcp_service/chart/test_ascii_charts.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py index 823badf8fb71..f2d2843f2a84 100644 --- a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -66,7 +66,10 @@ def test_horizontal_bar_chart_nan_rows_are_skipped() -> None: {"label": "Gamma Category", "amount": 150.0}, ] result = generate_ascii_chart(data, "bar") - # Both valid labels must appear; the NaN row (Beta) must be absent + # avg label length is 14 (> 8 threshold), so horizontal layout is chosen; + # horizontal mode preserves full label text unlike vertical (3-char truncation). + assert "Horizontal Bar Chart" in result + # Both valid labels must appear in full; the NaN row (Beta) must be absent assert "Alpha" in result assert "Gamma" in result assert "Beta" not in result From f80e3b32343cf07d724743dd7c719a34a121ccc2 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 7 May 2026 03:43:12 +0000 Subject: [PATCH 6/7] fix(mcp): exclude booleans from numeric extraction in ASCII chart renderers bool is a subclass of int, so isinstance(True, (int, float)) returns True. Without an explicit bool guard the extractor would lock onto a boolean column (is_active, flag, etc.) and ignore the real numeric metric, producing 0/1 bars instead of actual values. Add not isinstance(val, bool) to the numeric guard in both _generate_ascii_bar_chart and _extract_time_series_data. --- superset/mcp_service/chart/ascii_charts.py | 2 ++ .../mcp_service/chart/test_ascii_charts.py | 33 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/superset/mcp_service/chart/ascii_charts.py b/superset/mcp_service/chart/ascii_charts.py index 0acbe8d72330..1507f40e917f 100644 --- a/superset/mcp_service/chart/ascii_charts.py +++ b/superset/mcp_service/chart/ascii_charts.py @@ -81,6 +81,7 @@ def _generate_ascii_bar_chart(data: list[Any], width: int, height: int) -> str: for _key, val in row.items(): if ( isinstance(val, (int, float)) + and not isinstance(val, bool) and not _is_nan_value(val) and numeric_val is None ): @@ -286,6 +287,7 @@ def _extract_time_series_data(data: list[Any]) -> tuple[list[float], list[str]]: for key, val in row.items(): if ( isinstance(val, (int, float)) + and not isinstance(val, bool) and not _is_nan_value(val) and numeric_val is None ): diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py index f2d2843f2a84..bf7ce0a2e317 100644 --- a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -110,3 +110,36 @@ def test_chart_with_none_values_does_not_raise() -> None: result = generate_ascii_chart(data, "bar") assert isinstance(result, str) assert len(result) > 0 + + +def test_bar_chart_skips_boolean_columns() -> None: + """Boolean fields must not be selected as the numeric metric. + + bool is a subclass of int, so isinstance(True, (int, float)) is True. + Without an explicit bool guard the extractor would lock onto a boolean + column (e.g. is_active=True -> 1) and ignore the real numeric metric. + """ + data = [ + {"label": "Alpha Category", "is_active": True, "revenue": 500.0}, + {"label": "Beta Category", "is_active": False, "revenue": 1500.0}, + {"label": "Gamma Category", "is_active": True, "revenue": 1000.0}, + ] + result = generate_ascii_chart(data, "bar") + # If booleans are correctly skipped, revenue (500/1500/1000) drives the + # bars. The max value is 1500, so we expect at least one K-formatted value. + assert "1.5K" in result or "1500" in result or "1.0K" in result + + +def test_line_chart_skips_boolean_columns() -> None: + """Boolean fields must not be selected as numeric points in line charts.""" + data = [ + {"date": "2024-01", "is_active": True, "sales": 100.0}, + {"date": "2024-02", "is_active": False, "sales": 200.0}, + {"date": "2024-03", "is_active": True, "sales": 300.0}, + ] + result = generate_ascii_chart(data, "line") + assert isinstance(result, str) + assert len(result) > 0 + # If booleans were selected, the range would be 0-1; if revenue is + # selected the range includes values up to 300. + assert "300" in result or "200" in result From a560f84f4c546534ee2e007fd0ff87d3dfd1f280 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 7 May 2026 16:44:53 +0000 Subject: [PATCH 7/7] fix(mcp): apply consistent NaN+bool guards to scatter extractor and strengthen tests - Replace old `val != val` NaN idiom and add bool guard in _extract_scatter_data to match the pattern used in bar/line extractors - Add scatter chart NaN and boolean test coverage - Remove redundant bar fallback assertion; strengthen bool-skip negative check --- superset/mcp_service/chart/ascii_charts.py | 19 +++++------ .../mcp_service/chart/test_ascii_charts.py | 32 ++++++++++++++++++- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/superset/mcp_service/chart/ascii_charts.py b/superset/mcp_service/chart/ascii_charts.py index 1507f40e917f..d8af0be3d6eb 100644 --- a/superset/mcp_service/chart/ascii_charts.py +++ b/superset/mcp_service/chart/ascii_charts.py @@ -536,9 +536,11 @@ def _extract_scatter_data( if data and isinstance(data[0], dict): # Find the first two numeric columns for key, val in data[0].items(): - if isinstance(val, (int, float)) and not ( - isinstance(val, float) and (val != val) - ): # Exclude NaN + if ( + isinstance(val, (int, float)) + and not isinstance(val, bool) + and not _is_nan_value(val) + ): numeric_columns.append(key) if len(numeric_columns) >= 2: @@ -550,15 +552,14 @@ def _extract_scatter_data( if isinstance(row, dict): x_val = row.get(x_column) y_val = row.get(y_column) - # Check for valid numbers (not NaN) if ( isinstance(x_val, (int, float)) + and not isinstance(x_val, bool) + and not _is_nan_value(x_val) and isinstance(y_val, (int, float)) - and not ( - isinstance(x_val, float) and (x_val != x_val) - ) # Not NaN - and not (isinstance(y_val, float) and (y_val != y_val)) - ): # Not NaN + and not isinstance(y_val, bool) + and not _is_nan_value(y_val) + ): x_values.append(x_val) y_values.append(y_val) diff --git a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py index bf7ce0a2e317..45371f0f07a8 100644 --- a/tests/unit_tests/mcp_service/chart/test_ascii_charts.py +++ b/tests/unit_tests/mcp_service/chart/test_ascii_charts.py @@ -41,7 +41,6 @@ def test_bar_chart_with_all_null_values_returns_fallback() -> None: result = generate_ascii_chart(data, "bar") assert isinstance(result, str) assert result == "No numeric data found for bar chart" - assert "█" not in result # No bars should be rendered in the fallback path def test_line_chart_with_null_values_does_not_raise() -> None: @@ -128,6 +127,9 @@ def test_bar_chart_skips_boolean_columns() -> None: # If booleans are correctly skipped, revenue (500/1500/1000) drives the # bars. The max value is 1500, so we expect at least one K-formatted value. assert "1.5K" in result or "1500" in result or "1.0K" in result + # The scale min/max would be "0.0" and "1.0" only if booleans were chosen; + # with revenue selected the scale starts at 500 (never "Scale: 0.0"). + assert "Scale: 0.0" not in result def test_line_chart_skips_boolean_columns() -> None: @@ -143,3 +145,31 @@ def test_line_chart_skips_boolean_columns() -> None: # If booleans were selected, the range would be 0-1; if revenue is # selected the range includes values up to 300. assert "300" in result or "200" in result + + +def test_scatter_chart_with_nan_values_does_not_raise() -> None: + """Scatter chart renderer must not crash when dataset rows contain NaN values.""" + data = [ + {"x": 1.0, "y": 2.0}, + {"x": float("nan"), "y": 4.0}, + {"x": 5.0, "y": float("nan")}, + {"x": 7.0, "y": 8.0}, + ] + result = generate_ascii_chart(data, "scatter") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_scatter_chart_skips_boolean_columns() -> None: + """Boolean fields must not be selected as X/Y axes in scatter charts.""" + data = [ + {"is_active": True, "x": 10.0, "y": 20.0}, + {"is_active": False, "x": 30.0, "y": 40.0}, + {"is_active": True, "x": 50.0, "y": 60.0}, + ] + result = generate_ascii_chart(data, "scatter") + # If booleans are correctly skipped, x/y (10-50 / 20-60) drive the axes; + # boolean-driven axes would be confined to 0-1. + assert isinstance(result, str) + assert len(result) > 0 + assert "10" in result or "30" in result or "50" in result