diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts new file mode 100644 index 000000000000..0de5e95c8418 --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.test.ts @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { ValueGetterParams } from '@superset-ui/core/components/ThemedAgGridReact'; +import htmlTextFilterValueGetter, { + htmlTextComparator, +} from './htmlTextFilterValueGetter'; + +const makeParams = (value: unknown): ValueGetterParams => + ({ + data: { foo: value }, + colDef: { field: 'foo' }, + }) as unknown as ValueGetterParams; + +test('htmlTextFilterValueGetter extracts visible text from HTML anchor', () => { + expect( + htmlTextFilterValueGetter( + makeParams( + 'S18_3232', + ), + ), + ).toBe('S18_3232'); +}); + +test('htmlTextFilterValueGetter strips nested HTML markup', () => { + expect( + htmlTextFilterValueGetter( + makeParams('
Hello World
'), + ), + ).toBe('Hello World'); +}); + +test('htmlTextFilterValueGetter passes plain strings through', () => { + expect(htmlTextFilterValueGetter(makeParams('plain value'))).toBe( + 'plain value', + ); +}); + +test('htmlTextFilterValueGetter passes non-string values through', () => { + expect(htmlTextFilterValueGetter(makeParams(42))).toBe(42); + expect(htmlTextFilterValueGetter(makeParams(null))).toBeNull(); + expect(htmlTextFilterValueGetter(makeParams(undefined))).toBeUndefined(); +}); + +test('htmlTextComparator orders by visible text, not raw HTML', () => { + // URL prefixes (zzz vs bbb) would flip the order under raw-HTML sort, + // but the visible labels (S700_4002 vs S72_3212) sort the other way. + const left = 'S700_4002'; + const right = 'S72_3212'; + expect(htmlTextComparator(left, right)).toBeLessThan(0); +}); + +test('htmlTextComparator handles nulls and numbers', () => { + expect(htmlTextComparator(null, null)).toBe(0); + expect(htmlTextComparator(null, 'x')).toBeLessThan(0); + expect(htmlTextComparator('x', null)).toBeGreaterThan(0); + expect(htmlTextComparator(1, 2)).toBeLessThan(0); + expect(htmlTextComparator(2, 1)).toBeGreaterThan(0); +}); + +test('htmlTextComparator preserves default codepoint ordering for plain strings', () => { + // AG Grid's default string comparator orders by codepoint, so 'Z' (90) + // sorts before 'a' (97). A locale-aware comparator would flip this — + // verify we match the default so plain string columns are unaffected. + expect(htmlTextComparator('Z', 'a')).toBeLessThan(0); + expect(htmlTextComparator('a', 'Z')).toBeGreaterThan(0); + expect(htmlTextComparator('apple', 'banana')).toBeLessThan(0); +}); diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts new file mode 100644 index 000000000000..52e3a3887e4a --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/htmlTextFilterValueGetter.ts @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { isProbablyHTML, sanitizeHtml } from '@superset-ui/core'; +import { ValueGetterParams } from '@superset-ui/core/components/ThemedAgGridReact'; + +const stripHtmlToText = (html: string): string => { + const doc = new DOMParser().parseFromString(sanitizeHtml(html), 'text/html'); + return (doc.body.textContent || '').trim(); +}; + +// Cache the comparator-ready form per raw string. Both the HTML-detection +// step (`isProbablyHTML`, which itself invokes DOMParser for HTML-looking +// values) and the extraction step (`stripHtmlToText`, also DOMParser) are +// expensive; sort runs `O(n log n)` comparator calls against the same set +// of cell values. Memoizing the combined detection + extraction means each +// unique cell value pays the cost once per session. Module-level scope; +// bounded by the count of unique string cell values seen. +const comparableTextCache = new Map(); + +const toComparableText = (raw: string): string => { + const cached = comparableTextCache.get(raw); + if (cached !== undefined) return cached; + const normalized = isProbablyHTML(raw) ? stripHtmlToText(raw) : raw; + comparableTextCache.set(raw, normalized); + return normalized; +}; + +/** + * Returns the visible-text representation of an HTML cell value so AG Grid + * filters and sort operate on what the user sees, not the underlying markup. + * Pass-through for non-HTML values. + */ +const htmlTextFilterValueGetter = (params: ValueGetterParams) => { + const raw = params.data?.[params.colDef.field as string]; + return typeof raw === 'string' ? toComparableText(raw) : raw; +}; + +/** + * Comparator that mirrors AG Grid's default string comparator (codepoint + * order, nulls first), but extracts visible text from HTML values first + * so HTML cells sort by their displayed label. Plain (non-HTML) values + * pass through unchanged, preserving default ordering — e.g. 'Z' still + * sorts before 'a' as it does under the default comparator. + */ +export const htmlTextComparator = (a: unknown, b: unknown): number => { + const toText = (v: unknown) => + typeof v === 'string' ? toComparableText(v) : v; + const aT = toText(a); + const bT = toText(b); + if (aT == null && bT == null) return 0; + if (aT == null) return -1; + if (bT == null) return 1; + if (typeof aT === 'number' && typeof bT === 'number') return aT - bT; + if (aT === bT) return 0; + return aT < bT ? -1 : 1; +}; + +export default htmlTextFilterValueGetter; diff --git a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts index 48f713aabbeb..cc99891602f9 100644 --- a/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts +++ b/superset-frontend/plugins/plugin-chart-ag-grid-table/src/utils/useColDefs.ts @@ -32,6 +32,9 @@ import { } from '../types'; import getCellClass from './getCellClass'; import filterValueGetter from './filterValueGetter'; +import htmlTextFilterValueGetter, { + htmlTextComparator, +} from './htmlTextFilterValueGetter'; import dateFilterComparator from './dateFilterComparator'; import DateWithFormatter from './DateWithFormatter'; import { getAggFunc } from './getAggFunc'; @@ -317,6 +320,24 @@ export const useColDefs = ({ ...(isPercentMetric && { filterValueGetter, }), + ...(dataType === GenericDataType.String && + !serverPagination && { + // HTML cells (e.g. anchor markup) are rendered by TextCellRenderer + // via dangerouslySetInnerHTML; without these the filter and sort + // operate on raw HTML so the URL inside the markup dictates order + // and the "Contains" filter matches against the raw HTML string. + // + // Gated on !serverPagination: in server-pagination mode sort and + // filter are both delegated to the backend (which sees raw HTML + // in the database), so applying the visible-text getter only on + // the client would create a mismatch where the typed filter + // value is stripped client-side but the server query still + // operates on the raw HTML. Server-paginated tables with HTML + // columns are out of scope for this fix and would require + // server-side handling. + filterValueGetter: htmlTextFilterValueGetter, + comparator: htmlTextComparator, + }), ...(dataType === GenericDataType.Temporal && { // Use dateFilterValueGetter so AG Grid correctly identifies null dates for blank filter filterValueGetter: dateFilterValueGetter,