From 373c366907727989ec842bda436917c621f2fd63 Mon Sep 17 00:00:00 2001 From: Elizabeth Thompson Date: Fri, 1 May 2026 23:10:32 +0000 Subject: [PATCH 1/3] fix(mcp): exclude self-referencing filter columns from get_schema output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After #39638, chart/dataset/dashboard ModelGetSchemaCore instances had no exclude_filter_columns set, so get_schema(model_type='chart|dataset|dashboard') still advertised created_by_fk and owner in filter_columns. LLMs would call get_schema to discover available filters, see those columns, and pass real user IDs directly — bypassing the created_by_me/owned_by_me server-side injection added in #39638. Pass SELF_REFERENCING_FILTER_COLUMNS as exclude_filter_columns for chart, dataset, and dashboard schema cores (the database core already excluded its user-directory columns via DATABASE_EXCLUDE_COLUMNS). Co-Authored-By: Claude Sonnet 4.6 --- superset/mcp_service/system/tool/get_schema.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/superset/mcp_service/system/tool/get_schema.py b/superset/mcp_service/system/tool/get_schema.py index c2bb9b715cad..a64a91d28bce 100644 --- a/superset/mcp_service/system/tool/get_schema.py +++ b/superset/mcp_service/system/tool/get_schema.py @@ -56,6 +56,7 @@ from superset.mcp_service.privacy import ( PrivacyError, remove_chart_data_model_columns, + SELF_REFERENCING_FILTER_COLUMNS, user_can_view_data_model_metadata, ) @@ -77,6 +78,7 @@ def _get_chart_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=CHART_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS), logger=logger, ) @@ -96,6 +98,7 @@ def _get_dataset_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=DATASET_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS), logger=logger, ) @@ -115,6 +118,7 @@ def _get_dashboard_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=DASHBOARD_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS), logger=logger, ) From f6ef071d2b9cf4822c2f3901eb8722c267977913 Mon Sep 17 00:00:00 2001 From: Elizabeth Thompson Date: Fri, 1 May 2026 23:43:52 +0000 Subject: [PATCH 2/3] test(mcp): add get_schema filter_columns exclusion tests for chart and dataset The existing test only covered dashboard. Add parallel tests for chart and dataset to verify that created_by_fk, owner, and created_by_fk_or_owner are excluded from filter_columns even when the DAO returns them. Co-Authored-By: Claude Sonnet 4.6 --- .../system/tool/test_get_schema.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/unit_tests/mcp_service/system/tool/test_get_schema.py b/tests/unit_tests/mcp_service/system/tool/test_get_schema.py index 65ffd633f34c..714b93c7f607 100644 --- a/tests/unit_tests/mcp_service/system/tool/test_get_schema.py +++ b/tests/unit_tests/mcp_service/system/tool/test_get_schema.py @@ -355,6 +355,62 @@ async def test_get_schema_omits_user_directory_columns( assert field not in info["filter_columns"] assert field not in info["sortable_columns"] + @patch("superset.daos.chart.ChartDAO.get_filterable_columns_and_operators") + @pytest.mark.asyncio + async def test_get_schema_chart_omits_self_referencing_filter_columns( + self, mock_filters, mcp_server + ): + """Test that chart schema does not advertise self-referencing filter columns. + + Even if the DAO returns created_by_fk or owner, they must be excluded so + LLMs cannot discover and use them to enumerate user IDs. + """ + mock_filters.return_value = { + "slice_name": ["eq", "ilike"], + "created_by_fk": ["eq"], + "owner": ["eq", "in"], + } + + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_schema", {"request": {"model_type": "chart"}} + ) + + data = json.loads(result.content[0].text) + info = data["schema_info"] + + assert "slice_name" in info["filter_columns"] + for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): + assert field not in info["filter_columns"] + + @patch("superset.daos.dataset.DatasetDAO.get_filterable_columns_and_operators") + @pytest.mark.asyncio + async def test_get_schema_dataset_omits_self_referencing_filter_columns( + self, mock_filters, mcp_server + ): + """Test that dataset schema does not advertise self-referencing filter columns. + + Even if the DAO returns created_by_fk or owner, they must be excluded so + LLMs cannot discover and use them to enumerate user IDs. + """ + mock_filters.return_value = { + "table_name": ["eq", "ilike"], + "created_by_fk": ["eq"], + "owner": ["eq", "in"], + } + + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_schema", {"request": {"model_type": "dataset"}} + ) + + data = json.loads(result.content[0].text) + info = data["schema_info"] + + assert "table_name" in info["filter_columns"] + for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): + assert field not in info["filter_columns"] + class TestGetSchemaEdgeCases: """Test edge cases for get_schema tool.""" From c0ef691886b23bdc7b931a6e261f44f712093917 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 7 May 2026 15:37:16 +0000 Subject: [PATCH 3/3] test(mcp): add created_by_fk_or_owner to filter mocks and dashboard test Add `created_by_fk_or_owner` to the mock return values in the chart and dataset self-referencing filter tests so the exclusion is actually exercised (previously the assertion was vacuously true since the key was never in the mock). Also add a matching test for the dashboard schema core, which received the same exclude_filter_columns fix but had no corresponding coverage. --- .../system/tool/test_get_schema.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/unit_tests/mcp_service/system/tool/test_get_schema.py b/tests/unit_tests/mcp_service/system/tool/test_get_schema.py index 714b93c7f607..1df8dc2ca48b 100644 --- a/tests/unit_tests/mcp_service/system/tool/test_get_schema.py +++ b/tests/unit_tests/mcp_service/system/tool/test_get_schema.py @@ -369,6 +369,7 @@ async def test_get_schema_chart_omits_self_referencing_filter_columns( "slice_name": ["eq", "ilike"], "created_by_fk": ["eq"], "owner": ["eq", "in"], + "created_by_fk_or_owner": ["eq"], } async with Client(mcp_server) as client: @@ -397,6 +398,7 @@ async def test_get_schema_dataset_omits_self_referencing_filter_columns( "table_name": ["eq", "ilike"], "created_by_fk": ["eq"], "owner": ["eq", "in"], + "created_by_fk_or_owner": ["eq"], } async with Client(mcp_server) as client: @@ -411,6 +413,35 @@ async def test_get_schema_dataset_omits_self_referencing_filter_columns( for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): assert field not in info["filter_columns"] + @patch("superset.daos.dashboard.DashboardDAO.get_filterable_columns_and_operators") + @pytest.mark.asyncio + async def test_get_schema_dashboard_omits_self_referencing_filter_columns( + self, mock_filters, mcp_server + ): + """Test dashboard schema omits self-referencing filter columns. + + Even if the DAO returns created_by_fk or owner, they must be excluded + so LLMs cannot discover and use them to enumerate user IDs. + """ + mock_filters.return_value = { + "dashboard_title": ["eq", "ilike"], + "created_by_fk": ["eq"], + "owner": ["eq", "in"], + "created_by_fk_or_owner": ["eq"], + } + + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_schema", {"request": {"model_type": "dashboard"}} + ) + + data = json.loads(result.content[0].text) + info = data["schema_info"] + + assert "dashboard_title" in info["filter_columns"] + for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): + assert field not in info["filter_columns"] + class TestGetSchemaEdgeCases: """Test edge cases for get_schema tool."""