diff --git a/use-timescale/extensions/pg-textsearch.md b/use-timescale/extensions/pg-textsearch.md index 9879aedc00..e5ed49c289 100644 --- a/use-timescale/extensions/pg-textsearch.md +++ b/use-timescale/extensions/pg-textsearch.md @@ -8,14 +8,18 @@ products: [cloud, self_hosted] import EA1125 from "versionContent/_partials/_early_access_11_25.mdx"; import SINCE010 from "versionContent/_partials/_since_0_1_0.mdx"; +import SINCE040 from "versionContent/_partials/_since_0_4_0.mdx"; import IntegrationPrereqs from "versionContent/_partials/_integration-prereqs.mdx"; # Optimize full text search with BM25 -$PG full-text search at scale consistently hits a wall where performance degrades catastrophically. +$PG full-text search at scale consistently hits a wall where performance degrades catastrophically. $COMPANY's [pg_textsearch][pg_textsearch-github-repo] brings modern [BM25][bm25-wiki]-based full-text search directly into $PG, -with a memtable architecture for efficient indexing and ranking. `pg_textsearch` integrates seamlessly with SQL and -provides better search quality and performance than the $PG built-in full-text search. +with a memtable architecture for efficient indexing and ranking. `pg_textsearch` integrates seamlessly with SQL and +provides better search quality and performance than the $PG built-in full-text search. With Block-Max WAND optimization, +`pg_textsearch` delivers up to **4x faster top-k queries** compared to naive BM25 implementations. Advanced compression +using delta encoding and bitpacking reduces index sizes by **41%** while improving query performance by 10-20% for +shorter queries. BM25 scores in `pg_textsearch` are returned as negative values, where lower (more negative) numbers indicate better matches. `pg_textsearch` implements the following: @@ -117,7 +121,8 @@ You have created a BM25 index for full-text search. ## Optimize search queries for performance -Use efficient query patterns to leverage BM25 ranking and optimize search performance. +Use efficient query patterns to leverage BM25 ranking and optimize search performance. The `<@>` operator with `to_bm25query()` +provides BM25-based ranking scores. The function takes two parameters: the search query text and the index name. @@ -163,7 +168,7 @@ Use efficient query patterns to leverage BM25 ranking and optimize search perfor FROM products WHERE price < 500 AND description <@> to_bm25query('ergonomic', 'products_search_idx') < -0.5 - ORDER BY description <@> to_bm25query('ergonomic', 'products_search_idx') + ORDER BY score LIMIT 5; ``` @@ -350,9 +355,22 @@ Customize `pg_textsearch` behavior for your specific use case and data character -- Set default query limit when no LIMIT clause is present (default 1000) SET pg_textsearch.default_limit = 5000; + + -- Enable Block-Max WAND optimization for faster top-k queries (enabled by default) + SET pg_textsearch.enable_bmw = true; + + -- Log block skip statistics for debugging query performance (disabled by default) + SET pg_textsearch.log_bmw_stats = false; ``` + ```sql + -- Enable segment compression using delta encoding and bitpacking (enabled by default) + -- Reduces index size by ~41% with 10-20% query performance improvement for shorter queries + SET pg_textsearch.compress_segments = on; + ``` + + 1. **Configure language-specific text processing** You can create multiple BM25 indexes on the same column with different language configurations: