-
Notifications
You must be signed in to change notification settings - Fork 11
Add post-processing SQL over semantic query results #135
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
552c2d9
c3565e5
02194ec
6cc3958
b542ae7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -118,8 +118,9 @@ def rewrite(self, sql: str, strict: bool = True) -> str: | |
| # Check if this is a CTE-based query or has subqueries | ||
| has_ctes = parsed.args.get("with") is not None | ||
| has_subquery_in_from = self._has_subquery_in_from(parsed) | ||
| has_subquery_in_joins = any(isinstance(join.this, exp.Subquery) for join in (parsed.args.get("joins") or [])) | ||
|
|
||
| if has_ctes or has_subquery_in_from: | ||
| if has_ctes or has_subquery_in_from or has_subquery_in_joins: | ||
| # Handle CTEs and subqueries | ||
| return self._rewrite_with_ctes_or_subqueries(parsed) | ||
|
|
||
|
|
@@ -1851,41 +1852,89 @@ def _has_subquery_in_from(self, select: exp.Select) -> bool: | |
| def _rewrite_with_ctes_or_subqueries(self, parsed: exp.Select) -> str: | ||
| """Rewrite query that contains CTEs or subqueries. | ||
|
|
||
| Strategy: | ||
| 1. Rewrite each CTE that references semantic models | ||
| 2. Rewrite subqueries in FROM clause | ||
| 3. Return the modified SQL | ||
| Recursively walks the query tree bottom-up, rewriting any | ||
| SELECT whose FROM target resolves to a semantic model. | ||
| Outer queries are left as plain SQL, so post-processing | ||
| (CASE, window functions, arithmetic, etc.) works naturally. | ||
| """ | ||
| # Handle CTEs | ||
| if parsed.args.get("with"): | ||
| with_clause = parsed.args["with"] | ||
| for cte in with_clause.expressions: | ||
| # Each CTE has a name (alias) and a query (this) | ||
| self._rewrite_select_tree(parsed) | ||
|
|
||
| # If the root SELECT itself references a semantic model, it must | ||
| # still go through _rewrite_simple_query (which enforces the | ||
| # explicit JOIN guard and performs semantic rewriting). | ||
| if self._references_semantic_model(parsed): | ||
| # Save user-defined CTEs before _rewrite_simple_query replaces | ||
| # the entire query with fresh generator output. | ||
| original_with = parsed.args.get("with") | ||
|
|
||
| rewritten_sql = self._rewrite_simple_query(parsed) | ||
|
|
||
| if original_with: | ||
| # Merge user CTEs into the generated SQL so references | ||
| # from filters/expressions (e.g. IN (SELECT ... FROM cte)) | ||
| # remain valid. | ||
| rewritten = sqlglot.parse_one(rewritten_sql, dialect=self.dialect) | ||
| gen_with = rewritten.args.get("with") | ||
| if gen_with: | ||
| # Check for CTE name collisions between user and generated CTEs | ||
| user_names = {cte.alias for cte in original_with.expressions} | ||
| for gen_cte in gen_with.expressions: | ||
| if gen_cte.alias in user_names: | ||
| raise ValueError( | ||
| f"CTE name '{gen_cte.alias}' conflicts with an internally " | ||
| f"generated name. Please choose a different CTE name." | ||
| ) | ||
|
|
||
| user_ctes = [cte.copy() for cte in original_with.expressions] | ||
| gen_with.set("expressions", user_ctes + list(gen_with.expressions)) | ||
|
Comment on lines
+1888
to
+1889
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a root semantic query includes Useful? React with 👍 / 👎. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a root semantic query already has a user CTE name that matches an internally generated CTE (for example Useful? React with 👍 / 👎. |
||
| # Preserve WITH RECURSIVE from the original query | ||
| if original_with.args.get("recursive"): | ||
| gen_with.set("recursive", True) | ||
| else: | ||
| rewritten.set("with", original_with.copy()) | ||
| return rewritten.sql(dialect=self.dialect) | ||
|
|
||
| return rewritten_sql | ||
|
|
||
| return parsed.sql(dialect=self.dialect) | ||
|
|
||
| def _rewrite_select_tree(self, select: exp.Select): | ||
| """Recursively rewrite semantic subqueries and CTEs (bottom-up). | ||
|
|
||
| At each level: recurse into children first, then rewrite this | ||
| node if it directly references a semantic model. | ||
| """ | ||
| # Recurse into CTEs | ||
| if select.args.get("with"): | ||
| for cte in select.args["with"].expressions: | ||
| cte_query = cte.this | ||
| if isinstance(cte_query, exp.Select): | ||
| # Check if this CTE references a semantic model | ||
| self._rewrite_select_tree(cte_query) | ||
| if self._references_semantic_model(cte_query): | ||
| # Rewrite the CTE query | ||
| rewritten_cte_sql = self._rewrite_simple_query(cte_query) | ||
| # Parse the rewritten SQL and replace the CTE query | ||
| rewritten_cte = sqlglot.parse_one(rewritten_cte_sql, dialect=self.dialect) | ||
| cte.set("this", rewritten_cte) | ||
|
|
||
| # Handle subquery in FROM | ||
| from_clause = parsed.args.get("from") | ||
| rewritten_sql = self._rewrite_simple_query(cte_query) | ||
| cte.set("this", sqlglot.parse_one(rewritten_sql, dialect=self.dialect)) | ||
|
|
||
| # Recurse into FROM subquery | ||
| from_clause = select.args.get("from") | ||
| if from_clause and isinstance(from_clause.this, exp.Subquery): | ||
| subquery = from_clause.this | ||
| subquery_select = subquery.this | ||
| if isinstance(subquery_select, exp.Select) and self._references_semantic_model(subquery_select): | ||
| # Rewrite the subquery | ||
| rewritten_subquery_sql = self._rewrite_simple_query(subquery_select) | ||
| rewritten_subquery = sqlglot.parse_one(rewritten_subquery_sql, dialect=self.dialect) | ||
| subquery.set("this", rewritten_subquery) | ||
|
|
||
| # Return the modified SQL | ||
| # Note: Individual CTEs/subqueries are already instrumented by _rewrite_simple_query -> generator | ||
| # The outer query wrapper doesn't need separate instrumentation | ||
| return parsed.sql(dialect=self.dialect) | ||
| if isinstance(subquery_select, exp.Select): | ||
| self._rewrite_select_tree(subquery_select) | ||
| if self._references_semantic_model(subquery_select): | ||
| rewritten_sql = self._rewrite_simple_query(subquery_select) | ||
| subquery.set("this", sqlglot.parse_one(rewritten_sql, dialect=self.dialect)) | ||
|
|
||
| # Recurse into JOIN subqueries | ||
| for join in select.args.get("joins") or []: | ||
| join_expr = join.this | ||
| if isinstance(join_expr, exp.Subquery): | ||
| join_select = join_expr.this | ||
| if isinstance(join_select, exp.Select): | ||
| self._rewrite_select_tree(join_select) | ||
| if self._references_semantic_model(join_select): | ||
| rewritten_sql = self._rewrite_simple_query(join_select) | ||
| join_expr.set("this", sqlglot.parse_one(rewritten_sql, dialect=self.dialect)) | ||
|
|
||
| def _references_semantic_model(self, select: exp.Select) -> bool: | ||
| """Check if a SELECT statement references any semantic models.""" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Routing every query with a JOIN subquery through
_rewrite_with_ctes_or_subqueriesskips_rewrite_simple_queryfor the rootSELECT, because_rewrite_select_treeonly rewrites child scopes. For queries whose rootFROMis a semantic model (for example,FROM orders o JOIN (SELECT ...) s), metric references likeo.revenueare no longer rewritten through the semantic pipeline or rejected by the explicit JOIN guard, so execution can now fail with binder errors or silently return raw row-level columns instead of semantic aggregates.Useful? React with 👍 / 👎.