diff --git a/superset/mcp_service/system/tool/get_schema.py b/superset/mcp_service/system/tool/get_schema.py index c2bb9b715cad..a64a91d28bce 100644 --- a/superset/mcp_service/system/tool/get_schema.py +++ b/superset/mcp_service/system/tool/get_schema.py @@ -56,6 +56,7 @@ from superset.mcp_service.privacy import ( PrivacyError, remove_chart_data_model_columns, + SELF_REFERENCING_FILTER_COLUMNS, user_can_view_data_model_metadata, ) @@ -77,6 +78,7 @@ def _get_chart_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=CHART_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS), logger=logger, ) @@ -96,6 +98,7 @@ def _get_dataset_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=DATASET_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS), logger=logger, ) @@ -115,6 +118,7 @@ def _get_dashboard_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=DASHBOARD_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=set(SELF_REFERENCING_FILTER_COLUMNS), logger=logger, ) diff --git a/tests/unit_tests/mcp_service/system/tool/test_get_schema.py b/tests/unit_tests/mcp_service/system/tool/test_get_schema.py index 65ffd633f34c..1df8dc2ca48b 100644 --- a/tests/unit_tests/mcp_service/system/tool/test_get_schema.py +++ b/tests/unit_tests/mcp_service/system/tool/test_get_schema.py @@ -355,6 +355,93 @@ async def test_get_schema_omits_user_directory_columns( assert field not in info["filter_columns"] assert field not in info["sortable_columns"] + @patch("superset.daos.chart.ChartDAO.get_filterable_columns_and_operators") + @pytest.mark.asyncio + async def test_get_schema_chart_omits_self_referencing_filter_columns( + self, mock_filters, mcp_server + ): + """Test that chart schema does not advertise self-referencing filter columns. + + Even if the DAO returns created_by_fk or owner, they must be excluded so + LLMs cannot discover and use them to enumerate user IDs. + """ + mock_filters.return_value = { + "slice_name": ["eq", "ilike"], + "created_by_fk": ["eq"], + "owner": ["eq", "in"], + "created_by_fk_or_owner": ["eq"], + } + + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_schema", {"request": {"model_type": "chart"}} + ) + + data = json.loads(result.content[0].text) + info = data["schema_info"] + + assert "slice_name" in info["filter_columns"] + for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): + assert field not in info["filter_columns"] + + @patch("superset.daos.dataset.DatasetDAO.get_filterable_columns_and_operators") + @pytest.mark.asyncio + async def test_get_schema_dataset_omits_self_referencing_filter_columns( + self, mock_filters, mcp_server + ): + """Test that dataset schema does not advertise self-referencing filter columns. + + Even if the DAO returns created_by_fk or owner, they must be excluded so + LLMs cannot discover and use them to enumerate user IDs. + """ + mock_filters.return_value = { + "table_name": ["eq", "ilike"], + "created_by_fk": ["eq"], + "owner": ["eq", "in"], + "created_by_fk_or_owner": ["eq"], + } + + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_schema", {"request": {"model_type": "dataset"}} + ) + + data = json.loads(result.content[0].text) + info = data["schema_info"] + + assert "table_name" in info["filter_columns"] + for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): + assert field not in info["filter_columns"] + + @patch("superset.daos.dashboard.DashboardDAO.get_filterable_columns_and_operators") + @pytest.mark.asyncio + async def test_get_schema_dashboard_omits_self_referencing_filter_columns( + self, mock_filters, mcp_server + ): + """Test dashboard schema omits self-referencing filter columns. + + Even if the DAO returns created_by_fk or owner, they must be excluded + so LLMs cannot discover and use them to enumerate user IDs. + """ + mock_filters.return_value = { + "dashboard_title": ["eq", "ilike"], + "created_by_fk": ["eq"], + "owner": ["eq", "in"], + "created_by_fk_or_owner": ["eq"], + } + + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_schema", {"request": {"model_type": "dashboard"}} + ) + + data = json.loads(result.content[0].text) + info = data["schema_info"] + + assert "dashboard_title" in info["filter_columns"] + for field in ("created_by_fk", "owner", "created_by_fk_or_owner"): + assert field not in info["filter_columns"] + class TestGetSchemaEdgeCases: """Test edge cases for get_schema tool."""