From 0091d76fb8dd157f1d6cf8115aca4652585b2357 Mon Sep 17 00:00:00 2001 From: Matt Fitzgerald Date: Tue, 5 May 2026 19:04:32 +1000 Subject: [PATCH 1/4] fix(plugin-chart-ag-grid-table): use display text for filter and sort on HTML cells The Table V2 chart's TextCellRenderer renders cells containing HTML markup (e.g. anchor tags) via dangerouslySetInnerHTML, but the column's filterValueGetter and comparator were not configured for string columns. As a result both filter and sort operated on the raw HTML string instead of the visible text: - Column sort ordered rows by raw HTML lexically, so the URL inside the markup dictated ordering instead of the visible label (e.g. all rows with a `https://aaa/...` URL sorted before rows with `https://bbb/...` regardless of the label shown to the user). - The text "Contains" filter matched against the raw HTML, causing false positives on URL fragments and false negatives when typing the visible label. Add a small `htmlTextFilterValueGetter` utility (with a matching `htmlTextComparator`) that extracts visible text from HTML values via DOMParser + sanitizeHtml, and wire it into the colDef for string columns. Pass-through for non-HTML values. --- .../utils/htmlTextFilterValueGetter.test.ts | 74 +++++++++++++++++++ .../src/utils/htmlTextFilterValueGetter.ts | 52 +++++++++++++ .../src/utils/useColDefs.ts | 10 +++ 3 files changed, 136 insertions(+) create mode 100644 superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts create mode 100644 superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts new file mode 100644 index 000000000000..ad91575e13ce --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { ValueGetterParams } from '@superset-ui/core/components/ThemedAgGridReact'; +import htmlTextFilterValueGetter, { + htmlTextComparator, +} from './htmlTextFilterValueGetter'; + +const makeParams = (value: unknown): ValueGetterParams => + ({ + data: { foo: value }, + colDef: { field: 'foo' }, + }) as unknown as ValueGetterParams; + +test('htmlTextFilterValueGetter extracts visible text from HTML anchor', () => { + expect( + htmlTextFilterValueGetter( + makeParams( + 'S18_3232', + ), + ), + ).toBe('S18_3232'); +}); + +test('htmlTextFilterValueGetter strips nested HTML markup', () => { + expect( + htmlTextFilterValueGetter( + makeParams('
Hello World
'), + ), + ).toBe('Hello World'); +}); + +test('htmlTextFilterValueGetter passes plain strings through', () => { + expect(htmlTextFilterValueGetter(makeParams('plain value'))).toBe( + 'plain value', + ); +}); + +test('htmlTextFilterValueGetter passes non-string values through', () => { + expect(htmlTextFilterValueGetter(makeParams(42))).toBe(42); + expect(htmlTextFilterValueGetter(makeParams(null))).toBeNull(); + expect(htmlTextFilterValueGetter(makeParams(undefined))).toBeUndefined(); +}); + +test('htmlTextComparator orders by visible text, not raw HTML', () => { + // URL prefixes (zzz vs bbb) would flip the order under raw-HTML sort, + // but the visible labels (S700_4002 vs S72_3212) sort the other way. + const left = 'S700_4002'; + const right = 'S72_3212'; + expect(htmlTextComparator(left, right)).toBeLessThan(0); +}); + +test('htmlTextComparator handles nulls and numbers', () => { + expect(htmlTextComparator(null, null)).toBe(0); + expect(htmlTextComparator(null, 'x')).toBeLessThan(0); + expect(htmlTextComparator('x', null)).toBeGreaterThan(0); + expect(htmlTextComparator(1, 2)).toBeLessThan(0); + expect(htmlTextComparator(2, 1)).toBeGreaterThan(0); +}); diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts new file mode 100644 index 000000000000..eb7f291c6537 --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { isProbablyHTML, sanitizeHtml } from '@superset-ui/core'; +import { ValueGetterParams } from '@superset-ui/core/components/ThemedAgGridReact'; + +const stripHtmlToText = (html: string): string => { + const doc = new DOMParser().parseFromString(sanitizeHtml(html), 'text/html'); + return (doc.body.textContent || '').trim(); +}; + +/** + * Returns the visible-text representation of an HTML cell value so AG Grid + * filters and sort operate on what the user sees, not the underlying markup. + * Pass-through for non-HTML values. + */ +const htmlTextFilterValueGetter = (params: ValueGetterParams) => { + const raw = params.data?.[params.colDef.field as string]; + if (typeof raw === 'string' && isProbablyHTML(raw)) { + return stripHtmlToText(raw); + } + return raw; +}; + +export const htmlTextComparator = (a: unknown, b: unknown): number => { + const toText = (v: unknown) => + typeof v === 'string' && isProbablyHTML(v) ? stripHtmlToText(v) : v; + const aT = toText(a); + const bT = toText(b); + if (aT == null && bT == null) return 0; + if (aT == null) return -1; + if (bT == null) return 1; + if (typeof aT === 'number' && typeof bT === 'number') return aT - bT; + return String(aT).localeCompare(String(bT)); +}; + +export default htmlTextFilterValueGetter; diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts index 48f713aabbeb..9de6c840da34 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts @@ -32,6 +32,9 @@ import { } from '../types'; import getCellClass from './getCellClass'; import filterValueGetter from './filterValueGetter'; +import htmlTextFilterValueGetter, { + htmlTextComparator, +} from './htmlTextFilterValueGetter'; import dateFilterComparator from './dateFilterComparator'; import DateWithFormatter from './DateWithFormatter'; import { getAggFunc } from './getAggFunc'; @@ -317,6 +320,13 @@ export const useColDefs = ({ ...(isPercentMetric && { filterValueGetter, }), + ...(dataType === GenericDataType.String && { + // HTML cells (e.g. anchor markup) are rendered by TextCellRenderer + // via dangerouslySetInnerHTML; without these the set-filter dropdown + // shows raw markup and sort orders by raw HTML lexically. + filterValueGetter: htmlTextFilterValueGetter, + comparator: htmlTextComparator, + }), ...(dataType === GenericDataType.Temporal && { // Use dateFilterValueGetter so AG Grid correctly identifies null dates for blank filter filterValueGetter: dateFilterValueGetter, From f91fa870c6a17399e0b53cc0ca0b402d845ec39b Mon Sep 17 00:00:00 2001 From: Matt Fitzgerald Date: Wed, 6 May 2026 08:11:58 +1000 Subject: [PATCH 2/4] fix(plugin-chart-ag-grid-table): preserve default string ordering and document client-side scope Address review feedback: - `htmlTextComparator` now uses codepoint comparison (`<` / `>`) instead of `localeCompare`, so it matches AG Grid's default string comparator semantics for plain (non-HTML) values. Locale-aware comparison flipped the order of e.g. `'Z'` vs `'a'`, which would have changed sort behavior for plain string columns the PR description claims are unaffected. Added a test asserting the default ordering is preserved. - Added a comment in `useColDefs.ts` documenting the client-side scope: when `serverPagination` is enabled, sorting is delegated to the server via the existing `comparator: () => 0` override, so HTML normalization does not apply. Server-paginated tables with HTML columns are out of scope for this fix and would require server-side handling. --- .../src/utils/htmlTextFilterValueGetter.test.ts | 9 +++++++++ .../src/utils/htmlTextFilterValueGetter.ts | 10 +++++++++- .../src/utils/useColDefs.ts | 11 +++++++++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts index ad91575e13ce..0de5e95c8418 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts @@ -72,3 +72,12 @@ test('htmlTextComparator handles nulls and numbers', () => { expect(htmlTextComparator(1, 2)).toBeLessThan(0); expect(htmlTextComparator(2, 1)).toBeGreaterThan(0); }); + +test('htmlTextComparator preserves default codepoint ordering for plain strings', () => { + // AG Grid's default string comparator orders by codepoint, so 'Z' (90) + // sorts before 'a' (97). A locale-aware comparator would flip this — + // verify we match the default so plain string columns are unaffected. + expect(htmlTextComparator('Z', 'a')).toBeLessThan(0); + expect(htmlTextComparator('a', 'Z')).toBeGreaterThan(0); + expect(htmlTextComparator('apple', 'banana')).toBeLessThan(0); +}); diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts index eb7f291c6537..626eb8be126b 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts @@ -37,6 +37,13 @@ const htmlTextFilterValueGetter = (params: ValueGetterParams) => { return raw; }; +/** + * Comparator that mirrors AG Grid's default string comparator (codepoint + * order, nulls first), but extracts visible text from HTML values first + * so HTML cells sort by their displayed label. Plain (non-HTML) values + * pass through unchanged, preserving default ordering — e.g. 'Z' still + * sorts before 'a' as it does under the default comparator. + */ export const htmlTextComparator = (a: unknown, b: unknown): number => { const toText = (v: unknown) => typeof v === 'string' && isProbablyHTML(v) ? stripHtmlToText(v) : v; @@ -46,7 +53,8 @@ export const htmlTextComparator = (a: unknown, b: unknown): number => { if (aT == null) return -1; if (bT == null) return 1; if (typeof aT === 'number' && typeof bT === 'number') return aT - bT; - return String(aT).localeCompare(String(bT)); + if (aT === bT) return 0; + return aT < bT ? -1 : 1; }; export default htmlTextFilterValueGetter; diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts index 9de6c840da34..8bea76701947 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts @@ -322,8 +322,15 @@ export const useColDefs = ({ }), ...(dataType === GenericDataType.String && { // HTML cells (e.g. anchor markup) are rendered by TextCellRenderer - // via dangerouslySetInnerHTML; without these the set-filter dropdown - // shows raw markup and sort orders by raw HTML lexically. + // via dangerouslySetInnerHTML; without these the filter and sort + // operate on raw HTML so the URL inside the markup dictates order + // and the "Contains" filter matches against the raw HTML string. + // + // Scope: client-side only. When `serverPagination` is enabled, a + // later spread overrides `comparator` with `() => 0` so sorting is + // delegated to the server; the database does not know to extract + // visible text from HTML, so server-paginated tables with HTML + // columns are out of scope for this fix. filterValueGetter: htmlTextFilterValueGetter, comparator: htmlTextComparator, }), From dc4a3bf9baaa13125eb0d9eb904e6f2ca2509573 Mon Sep 17 00:00:00 2001 From: Matt Fitzgerald Date: Wed, 6 May 2026 10:50:18 +1000 Subject: [PATCH 3/4] fix(plugin-chart-ag-grid-table): memoize HTML stripping and gate to client-side mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address two bot review comments: - **Performance:** `stripHtmlToText` ran `DOMParser` + `sanitizeHtml` on every comparator call. Sorts invoke the comparator O(n log n) times, which adds up on large tables. Cache extracted text by raw HTML string in a module-level Map; bounded by the number of unique HTML values in the dataset. - **Server-pagination consistency:** Gate the string-column branch on `!serverPagination`. In server-pagination mode, the typed filter value is converted into a backend query that operates on the raw HTML stored in the database; stripping HTML client-side via `filterValueGetter` caused a mismatch where the typed text was searched against visible labels client-side but raw HTML server-side, leading to false positives or empty results. Server-paginated tables with HTML columns remain out of scope (they revert to pre-PR behavior — same backend query path as before, no inconsistency). Sort in server-pagination mode is already delegated to the server via the existing `comparator: () => 0` override, so removing the `htmlTextComparator` here loses nothing in that mode. --- .../src/utils/htmlTextFilterValueGetter.ts | 12 ++++++- .../src/utils/useColDefs.ts | 32 +++++++++++-------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts index 626eb8be126b..864daf3e9c85 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts @@ -19,9 +19,19 @@ import { isProbablyHTML, sanitizeHtml } from '@superset-ui/core'; import { ValueGetterParams } from '@superset-ui/core/components/ThemedAgGridReact'; +// Cache extracted text by raw HTML string. Sort comparators run O(n log n) +// times against the same set of cell values, so reparsing on every call is +// a measurable cost on large tables. Bounded by the number of unique HTML +// values in the dataset. +const htmlTextCache = new Map(); + const stripHtmlToText = (html: string): string => { + const cached = htmlTextCache.get(html); + if (cached !== undefined) return cached; const doc = new DOMParser().parseFromString(sanitizeHtml(html), 'text/html'); - return (doc.body.textContent || '').trim(); + const text = (doc.body.textContent || '').trim(); + htmlTextCache.set(html, text); + return text; }; /** diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts index 8bea76701947..cc99891602f9 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts @@ -320,20 +320,24 @@ export const useColDefs = ({ ...(isPercentMetric && { filterValueGetter, }), - ...(dataType === GenericDataType.String && { - // HTML cells (e.g. anchor markup) are rendered by TextCellRenderer - // via dangerouslySetInnerHTML; without these the filter and sort - // operate on raw HTML so the URL inside the markup dictates order - // and the "Contains" filter matches against the raw HTML string. - // - // Scope: client-side only. When `serverPagination` is enabled, a - // later spread overrides `comparator` with `() => 0` so sorting is - // delegated to the server; the database does not know to extract - // visible text from HTML, so server-paginated tables with HTML - // columns are out of scope for this fix. - filterValueGetter: htmlTextFilterValueGetter, - comparator: htmlTextComparator, - }), + ...(dataType === GenericDataType.String && + !serverPagination && { + // HTML cells (e.g. anchor markup) are rendered by TextCellRenderer + // via dangerouslySetInnerHTML; without these the filter and sort + // operate on raw HTML so the URL inside the markup dictates order + // and the "Contains" filter matches against the raw HTML string. + // + // Gated on !serverPagination: in server-pagination mode sort and + // filter are both delegated to the backend (which sees raw HTML + // in the database), so applying the visible-text getter only on + // the client would create a mismatch where the typed filter + // value is stripped client-side but the server query still + // operates on the raw HTML. Server-paginated tables with HTML + // columns are out of scope for this fix and would require + // server-side handling. + filterValueGetter: htmlTextFilterValueGetter, + comparator: htmlTextComparator, + }), ...(dataType === GenericDataType.Temporal && { // Use dateFilterValueGetter so AG Grid correctly identifies null dates for blank filter filterValueGetter: dateFilterValueGetter, From ab6c9e317245bac1e457542a6d8075b105d30ca4 Mon Sep 17 00:00:00 2001 From: Matt Fitzgerald Date: Thu, 7 May 2026 08:06:43 +1000 Subject: [PATCH 4/4] fix(plugin-chart-ag-grid-table): cache normalized comparator value per raw string Address @richardfogaca's perf follow-up: even with the previous `stripHtmlToText` cache, `htmlTextComparator` still invoked `isProbablyHTML` on every comparison call, and `isProbablyHTML` itself runs `DOMParser.parseFromString` for HTML-looking strings. For an N-row sort that's `O(N log N)` DOMParser-or-equivalent invocations per side. Move the cache one level up so it stores the comparator-ready form per raw string. `toComparableText(raw)` runs detection + extraction once per unique input and the comparator (and getter) become Map lookups on the hot path. Plain (non-HTML) strings cache to themselves, so they also skip repeated detection cost during sort. Cache is module-level and bounded by the count of unique string cell values seen in the session. --- .../src/utils/htmlTextFilterValueGetter.ts | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts index 864daf3e9c85..52e3a3887e4a 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts @@ -19,19 +19,26 @@ import { isProbablyHTML, sanitizeHtml } from '@superset-ui/core'; import { ValueGetterParams } from '@superset-ui/core/components/ThemedAgGridReact'; -// Cache extracted text by raw HTML string. Sort comparators run O(n log n) -// times against the same set of cell values, so reparsing on every call is -// a measurable cost on large tables. Bounded by the number of unique HTML -// values in the dataset. -const htmlTextCache = new Map(); - const stripHtmlToText = (html: string): string => { - const cached = htmlTextCache.get(html); - if (cached !== undefined) return cached; const doc = new DOMParser().parseFromString(sanitizeHtml(html), 'text/html'); - const text = (doc.body.textContent || '').trim(); - htmlTextCache.set(html, text); - return text; + return (doc.body.textContent || '').trim(); +}; + +// Cache the comparator-ready form per raw string. Both the HTML-detection +// step (`isProbablyHTML`, which itself invokes DOMParser for HTML-looking +// values) and the extraction step (`stripHtmlToText`, also DOMParser) are +// expensive; sort runs `O(n log n)` comparator calls against the same set +// of cell values. Memoizing the combined detection + extraction means each +// unique cell value pays the cost once per session. Module-level scope; +// bounded by the count of unique string cell values seen. +const comparableTextCache = new Map(); + +const toComparableText = (raw: string): string => { + const cached = comparableTextCache.get(raw); + if (cached !== undefined) return cached; + const normalized = isProbablyHTML(raw) ? stripHtmlToText(raw) : raw; + comparableTextCache.set(raw, normalized); + return normalized; }; /** @@ -41,10 +48,7 @@ const stripHtmlToText = (html: string): string => { */ const htmlTextFilterValueGetter = (params: ValueGetterParams) => { const raw = params.data?.[params.colDef.field as string]; - if (typeof raw === 'string' && isProbablyHTML(raw)) { - return stripHtmlToText(raw); - } - return raw; + return typeof raw === 'string' ? toComparableText(raw) : raw; }; /** @@ -56,7 +60,7 @@ const htmlTextFilterValueGetter = (params: ValueGetterParams) => { */ export const htmlTextComparator = (a: unknown, b: unknown): number => { const toText = (v: unknown) => - typeof v === 'string' && isProbablyHTML(v) ? stripHtmlToText(v) : v; + typeof v === 'string' ? toComparableText(v) : v; const aT = toText(a); const bT = toText(b); if (aT == null && bT == null) return 0;