From 082fc08f0f444fe8eb5881c22a36af149ab6d0b3 Mon Sep 17 00:00:00 2001 From: Mark Gordon Date: Thu, 30 Apr 2026 23:36:57 -0700 Subject: [PATCH] Add polymorphic field support --- CHANGELOG.md | 6 + subsetter/_version.py | 2 +- subsetter/common.py | 2 +- subsetter/config_model.py | 27 +++++ subsetter/metadata.py | 4 + subsetter/plan_model.py | 75 ++++++------- subsetter/planner.py | 139 ++++++++++++++++++++--- tests/data/big_join.yaml | 19 ++-- tests/data/datasets/poly.yaml | 36 ++++++ tests/data/fk_chain.yaml | 168 ++++++++++++++-------------- tests/data/fk_chain_compact.yaml | 168 ++++++++++++++-------------- tests/data/instruments.yaml | 19 ++-- tests/data/poly.yaml | 149 ++++++++++++++++++++++++ tests/data/user_orders.yaml | 38 +++---- tests/data/user_orders_compact.yaml | 38 +++---- tests/test_live.py | 5 + 16 files changed, 605 insertions(+), 290 deletions(-) create mode 100644 tests/data/datasets/poly.yaml create mode 100644 tests/data/poly.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 29a7346..e3ce38d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# v0.4.5 + +- Added support for polymorphic foreign keys +- Removed Python 3.8, 3.9 support and added 3.13, 3.14 support +- Updated dependencies + # v0.4.4 - Improved query performance following foreign key relationships diff --git a/subsetter/_version.py b/subsetter/_version.py index cd1ee63..98a433b 100644 --- a/subsetter/_version.py +++ b/subsetter/_version.py @@ -1 +1 @@ -__version__ = "0.4.4" +__version__ = "0.4.5" diff --git a/subsetter/common.py b/subsetter/common.py index 43e29b8..dc6cc5d 100644 --- a/subsetter/common.py +++ b/subsetter/common.py @@ -241,7 +241,7 @@ def _push(key: Any, value: Any): data = stack.pop() if isinstance(data, BaseModel): yield data - for field, _ in data.model_fields.items(): + for field, _ in data.__class__.model_fields.items(): _push(field, getattr(data, field)) if isinstance(data, list): diff --git a/subsetter/config_model.py b/subsetter/config_model.py index 90a79fa..767dc85 100644 --- a/subsetter/config_model.py +++ b/subsetter/config_model.py @@ -43,6 +43,32 @@ def check_columns_match(self): raise ValueError("each column in src_columns must be unique") return self + class PolymorphicFKConfig(ForbidBaseModel): + class KeyDestination(ForbidBaseModel): + table: str + columns: List[str] + + table: str + columns: List[str] + discriminator_column: str + destinations: dict[str, KeyDestination] + + @model_validator(mode="after") + def check_columns_match(self): + col_count = len(self.columns) + if not col_count: + raise ValueError("columns cannot be empty") + if len(set(self.columns)) != col_count: + raise ValueError("each column in columns must be unique") + for key_dest in self.destinations.values(): + if len(key_dest.columns) != col_count: + raise ValueError( + "src_columns and dst_columns must be the same length" + ) + if len(set(key_dest.columns)) != col_count: + raise ValueError("each column in src_columns must be unique") + return self + class ColumnConstraint(ForbidBaseModel): column: str operator: SQLKnownOperator @@ -55,6 +81,7 @@ class ColumnConstraint(ForbidBaseModel): passthrough: List[str] = [] ignore_fks: List[IgnoreFKConfig] = [] extra_fks: List[ExtraFKConfig] = [] + polymorphic_fks: List[PolymorphicFKConfig] = [] infer_foreign_keys: Literal["none", "schema", "all"] = "none" include_dependencies: bool = True diff --git a/subsetter/metadata.py b/subsetter/metadata.py index 6bcb932..b471e7b 100644 --- a/subsetter/metadata.py +++ b/subsetter/metadata.py @@ -22,6 +22,8 @@ class ForeignKey: dst_schema: str dst_table: str dst_columns: Tuple[str, ...] + src_discriminator: Optional[Tuple[str, str]] = None + dst_discriminator: Optional[Tuple[str, str]] = None @classmethod def from_schema(cls, fk: sa.ForeignKeyConstraint) -> "ForeignKey": @@ -188,6 +190,8 @@ def compute_reverse_keys(self) -> None: dst_schema=table.schema, dst_table=table.name, dst_columns=fk.columns, + src_discriminator=fk.dst_discriminator, + dst_discriminator=fk.src_discriminator, ) ) diff --git a/subsetter/plan_model.py b/subsetter/plan_model.py index 3e61d37..48a3f62 100644 --- a/subsetter/plan_model.py +++ b/subsetter/plan_model.py @@ -237,6 +237,8 @@ class SQLLeftJoin(BaseModel): left_columns: List[str] right_columns: List[str] half_unique: bool = True + left_discriminator: List[str] = [] + right_discriminator: List[str] = [] class SQLStatementSelect(BaseModel): @@ -245,8 +247,9 @@ class SQLStatementSelect(BaseModel): from_: SQLTableIdentifier = Field(..., alias="from") where: Optional[SQLWhereClause] = None limit: Optional[int] = None - joins: Optional[List[SQLLeftJoin]] = None - joins_outer: bool = False + + # Joins are combined in CNF format - inner lists of joins must have one matching joined row + joins: List[List[SQLLeftJoin]] = [] model_config = ConfigDict(populate_by_name=True) @@ -259,50 +262,47 @@ def build(self, context: SQLBuildContext): else: stmt = sa.select(table_obj) - if self.joins: - joined_cols: List[sa.ColumnElement] = [] - joined: sa.FromClause = table_obj - exists_constraints: List[sa.ColumnExpressionArgument] = [] - for join in self.joins: # pylint: disable=not-an-iterable + joined: sa.FromClause = table_obj + join_and_conditions = [] + for join_list in self.joins: + join_or_conditions: List[sa.ColumnExpressionArgument] = [] + for join in join_list: right = join.right.build(context).alias() + join_on = [ + table_obj.c[lft_col] == right.c[rht_col] + for lft_col, rht_col in zip(join.left_columns, join.right_columns) + ] + if join.left_discriminator: + disc_col, disc_val = join.left_discriminator + join_on.append(table_obj.c[disc_col] == disc_val) + if join.right_discriminator: + disc_col, disc_val = join.right_discriminator + join_on.append(right.c[disc_col] == disc_val) + if join.half_unique and table_obj.primary_key: joined = joined.join( right, - onclause=sa.and_( - *( - table_obj.c[lft_col] == right.c[rht_col] - for lft_col, rht_col in zip( - join.left_columns, join.right_columns - ) - ) - ), - isouter=self.joins_outer, - ) - joined_cols.extend( - right.c[rht_col] for rht_col in join.right_columns + onclause=sa.and_(*join_on), + isouter=len(join_list) > 1, ) - else: - exists_constraints.append( - sa.exists().where( - *( - table_obj.c[lft_col] == right.c[rht_col] - for lft_col, rht_col in zip( - join.left_columns, join.right_columns - ) - ) + if len(join_list) > 1: + join_or_conditions.extend( + right.c[rht_col].is_not(None) + for rht_col in join.right_columns ) - ) + else: + join_or_conditions.append(sa.exists().where(*join_on)) + + if join_or_conditions: + join_and_conditions.append(sa.or_(*join_or_conditions)) - stmt = stmt.select_from(joined) - if joined is not table_obj: - stmt = stmt.group_by(*table_obj.primary_key.columns) + stmt = stmt.select_from(joined) + if joined is not table_obj: + stmt = stmt.group_by(*table_obj.primary_key.columns) - if self.joins_outer: - exists_constraints.extend(col.is_not(None) for col in joined_cols) - stmt = stmt.where(sa.or_(*exists_constraints)) - elif exists_constraints: - stmt = stmt.where(sa.and_(*exists_constraints)) + if join_and_conditions: + stmt = stmt.where(sa.and_(*join_and_conditions)) if self.where: stmt = stmt.where(self.where.build(context, table_obj)) @@ -329,7 +329,6 @@ def simplify(self) -> "SQLStatementSelect": kwargs["limit"] = self.limit if self.joins: kwargs["joins"] = self.joins - kwargs["joins_outer"] = self.joins_outer return SQLStatementSelect(**kwargs) # type: ignore diff --git a/subsetter/planner.py b/subsetter/planner.py index 66b17e3..898f059 100644 --- a/subsetter/planner.py +++ b/subsetter/planner.py @@ -69,6 +69,7 @@ def _plan_internal(self) -> SubsetPlan: ) self._remove_ignore_fks() self._add_extra_fks() + self._add_polymorphic_fks() if self.config.include_dependencies: self._check_ignore_tables() self._check_passthrough_tables() @@ -202,6 +203,85 @@ def _add_extra_fks(self) -> None: ), ) + def _add_polymorphic_fks(self) -> None: + """Add in configured polymorphic foreign keys requested.""" + for index, poly_fk in enumerate(self.config.polymorphic_fks): + src_schema, src_table_name = parse_table_name(poly_fk.table) + table = self.meta.tables.get((src_schema, src_table_name)) + if table is None: + LOGGER.warning( + "Found no source table %s.%s referenced in polymorphic_fks[%d]", + src_schema, + src_table_name, + index, + ) + continue + + src_missing_cols = { + col for col in poly_fk.columns if col not in table.table_obj.columns + } + if src_missing_cols: + LOGGER.warning( + "Columns %s do not exist in %s.%s referenced in poly_fks[%d]", + src_missing_cols, + src_schema, + src_table_name, + index, + ) + continue + + if poly_fk.discriminator_column not in table.table_obj.columns: + LOGGER.warning( + "Column %s does not exist in %s.%s referenced in poly_fks[%d].discriminator_column", + poly_fk.discriminator_column, + src_schema, + src_table_name, + index, + ) + continue + + for discriminator_value, key_dest in poly_fk.destinations.items(): + dst_schema, dst_table_name = parse_table_name(key_dest.table) + dst_table = self.meta.tables.get((dst_schema, dst_table_name)) + if dst_table is None: + LOGGER.warning( + "Found no destination table %s.%s referenced in poly_fks[%d].destinations[%s]", + dst_schema, + dst_table_name, + index, + discriminator_value, + ) + continue + + dst_missing_cols = { + col + for col in key_dest.columns + if col not in dst_table.table_obj.columns + } + if dst_missing_cols: + LOGGER.warning( + "Columns %s do not exist in %s.%s referenced in poly_fks[%d].destinations[%s]", + dst_missing_cols, + dst_schema, + dst_table_name, + index, + discriminator_value, + ) + continue + + table.foreign_keys.append( + ForeignKey( + columns=tuple(poly_fk.columns), + dst_schema=dst_schema, + dst_table=dst_table_name, + dst_columns=tuple(key_dest.columns), + src_discriminator=( + poly_fk.discriminator_column, + discriminator_value, + ), + ), + ) + def _remove_ignore_fks(self) -> None: """Remove requested foreign keys""" for ignore_fk in self.config.ignore_fks: @@ -322,24 +402,51 @@ def _is_distinct(table_obj: sa.Table, cols: Iterable[str]) -> bool: return True return False + # Create joins in conjunctive normal form. + fks_to_join = [] + + # reverse foreign keys just get OR'ed together + if rev_foreign_keys: + fks_to_join.append(rev_foreign_keys) + + # forward foreign keys get AND'ed except for polymorphic fks which OR when using the same + # discriminator column. + fk_disc_index: dict[str, int] = {} + for fk in foreign_keys: + if fk.src_discriminator: + disc_col = fk.src_discriminator[0] + if disc_col in fk_disc_index: + fks_to_join[fk_disc_index[disc_col]].append(fk) + else: + fk_disc_index[disc_col] = len(fks_to_join) + fks_to_join.append([fk]) + else: + fks_to_join.append([fk]) + fk_joins = [] - for fk in foreign_keys or rev_foreign_keys: - dst_table = self.meta.tables[(fk.dst_schema, fk.dst_table)] - half_unique = _is_distinct(table.table_obj, fk.columns) or _is_distinct( - dst_table.table_obj, fk.dst_columns - ) - fk_joins.append( - SQLLeftJoin( - right=SQLTableIdentifier( - table_schema=fk.dst_schema, - table_name=fk.dst_table, - sampled=True, - ), - left_columns=list(fk.columns), - right_columns=list(fk.dst_columns), - half_unique=half_unique, + for fk_join_list in fks_to_join: + or_joins = [] + for fk in fk_join_list: + dst_table = self.meta.tables[(fk.dst_schema, fk.dst_table)] + half_unique = _is_distinct(table.table_obj, fk.columns) or _is_distinct( + dst_table.table_obj, fk.dst_columns ) - ) + + or_joins.append( + SQLLeftJoin( + right=SQLTableIdentifier( + table_schema=fk.dst_schema, + table_name=fk.dst_table, + sampled=True, + ), + left_columns=list(fk.columns), + right_columns=list(fk.dst_columns), + half_unique=half_unique, + left_discriminator=list(fk.src_discriminator or ()), + right_discriminator=list(fk.dst_discriminator or ()), + ) + ) + fk_joins.append(or_joins) conf_constraints = self.config.table_constraints.get( f"{table.schema}.{table.name}", [] diff --git a/tests/data/big_join.yaml b/tests/data/big_join.yaml index 756467e..10adce8 100644 --- a/tests/data/big_join.yaml +++ b/tests/data/big_join.yaml @@ -36,16 +36,15 @@ expected_plan: schema: test table: users joins: - - half_unique: false - left_columns: - - state - right: - sampled: true - schema: test - table: homes - right_columns: - - state - joins_outer: true + - - half_unique: false + left_columns: + - state + right: + sampled: true + schema: test + table: homes + right_columns: + - state type: select expected_sample: diff --git a/tests/data/datasets/poly.yaml b/tests/data/datasets/poly.yaml new file mode 100644 index 0000000..e8d1080 --- /dev/null +++ b/tests/data/datasets/poly.yaml @@ -0,0 +1,36 @@ +tables: + test.musicians: + primary_key: [id] + columns: [id, name|str, instrument_type|str, instrument, sample] + test.owners: + primary_key: [id] + columns: [id, name|str, instrument_type|str, instrument] + test.pianos: + primary_key: [id] + columns: [id, name|str] + test.trumpets: + primary_key: [id] + columns: [id, name|str] +data: + test.musicians: + - [1, jack, piano, 1, 0] + - [2, peter, piano, 2, 1] + - [3, jasmine, trumpet, 1, 0] + - [4, walter, trumpet, 2, 0] + - [5, david, piano, 3, 0] + - [6, jackson, trumpet, 3, 1] + test.owners: + - [1, bob, trumpet, 2] + - [2, alice, trumpet, 1] + - [3, bobby, piano, 1] + - [4, robby, piano, 3] + - [5, rob, trumpet, 3] + - [6, robert, piano, 2] + test.pianos: + - [1, gold] + - [2, silver] + - [3, bronze] + test.trumpets: + - [1, uno] + - [2, dos] + - [3, tres] diff --git a/tests/data/fk_chain.yaml b/tests/data/fk_chain.yaml index 8802909..de9b2a5 100644 --- a/tests/data/fk_chain.yaml +++ b/tests/data/fk_chain.yaml @@ -18,16 +18,15 @@ expected_plan: schema: test table: bookmark joins: - - half_unique: true - left_columns: - - user_id - right: - sampled: true - schema: test - table: users - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - user_id + right: + sampled: true + schema: test + table: users + right_columns: + - id type: select test.friends: statement: @@ -47,25 +46,24 @@ expected_plan: schema: test table: referal_owners joins: - - half_unique: true - left_columns: - - referal_id - right: - sampled: true - schema: test - table: referals - right_columns: - - id - - half_unique: true - left_columns: - - source_website_id - right: - sampled: true - schema: test - table: websites - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - referal_id + right: + sampled: true + schema: test + table: referals + right_columns: + - id + - - half_unique: true + left_columns: + - source_website_id + right: + sampled: true + schema: test + table: websites + right_columns: + - id type: select test.referals: statement: @@ -73,16 +71,15 @@ expected_plan: schema: test table: referals joins: - - half_unique: true - left_columns: - - website_id - right: - sampled: true - schema: test - table: websites - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - website_id + right: + sampled: true + schema: test + table: websites + right_columns: + - id type: select test.users: statement: @@ -90,25 +87,24 @@ expected_plan: schema: test table: users joins: - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: friends - right_columns: - - friend_a - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: friends - right_columns: - - friend_b - joins_outer: true + - - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: friends + right_columns: + - friend_a + - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: friends + right_columns: + - friend_b type: select test.visits: statement: @@ -116,16 +112,15 @@ expected_plan: schema: test table: visits joins: - - half_unique: true - left_columns: - - user_id - right: - sampled: true - schema: test - table: users - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - user_id + right: + sampled: true + schema: test + table: users + right_columns: + - id type: select test.websites: statement: @@ -133,25 +128,24 @@ expected_plan: schema: test table: websites joins: - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: bookmark - right_columns: - - website_id - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: visits - right_columns: - - website_id - joins_outer: true + - - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: bookmark + right_columns: + - website_id + - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: visits + right_columns: + - website_id type: select expected_sample: diff --git a/tests/data/fk_chain_compact.yaml b/tests/data/fk_chain_compact.yaml index 1396b68..f9b8bea 100644 --- a/tests/data/fk_chain_compact.yaml +++ b/tests/data/fk_chain_compact.yaml @@ -21,16 +21,15 @@ expected_plan: schema: test table: bookmark joins: - - half_unique: true - left_columns: - - user_id - right: - sampled: true - schema: test - table: users - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - user_id + right: + sampled: true + schema: test + table: users + right_columns: + - id type: select test.friends: statement: @@ -50,25 +49,24 @@ expected_plan: schema: test table: referal_owners joins: - - half_unique: true - left_columns: - - referal_id - right: - sampled: true - schema: test - table: referals - right_columns: - - id - - half_unique: true - left_columns: - - source_website_id - right: - sampled: true - schema: test - table: websites - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - referal_id + right: + sampled: true + schema: test + table: referals + right_columns: + - id + - - half_unique: true + left_columns: + - source_website_id + right: + sampled: true + schema: test + table: websites + right_columns: + - id type: select test.referals: statement: @@ -76,16 +74,15 @@ expected_plan: schema: test table: referals joins: - - half_unique: true - left_columns: - - website_id - right: - sampled: true - schema: test - table: websites - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - website_id + right: + sampled: true + schema: test + table: websites + right_columns: + - id type: select test.users: statement: @@ -93,25 +90,24 @@ expected_plan: schema: test table: users joins: - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: friends - right_columns: - - friend_a - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: friends - right_columns: - - friend_b - joins_outer: true + - - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: friends + right_columns: + - friend_a + - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: friends + right_columns: + - friend_b type: select test.visits: statement: @@ -119,16 +115,15 @@ expected_plan: schema: test table: visits joins: - - half_unique: true - left_columns: - - user_id - right: - sampled: true - schema: test - table: users - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - user_id + right: + sampled: true + schema: test + table: users + right_columns: + - id type: select test.websites: statement: @@ -136,25 +131,24 @@ expected_plan: schema: test table: websites joins: - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: bookmark - right_columns: - - website_id - - half_unique: true - left_columns: - - id - right: - sampled: true - schema: test - table: visits - right_columns: - - website_id - joins_outer: true + - - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: bookmark + right_columns: + - website_id + - half_unique: true + left_columns: + - id + right: + sampled: true + schema: test + table: visits + right_columns: + - website_id type: select expected_sample: diff --git a/tests/data/instruments.yaml b/tests/data/instruments.yaml index b4b49c8..bfc7d75 100644 --- a/tests/data/instruments.yaml +++ b/tests/data/instruments.yaml @@ -28,16 +28,15 @@ expected_plan: schema: test table: instruments joins: - - half_unique: true - left_columns: - - owner_id - right: - sampled: true - schema: test - table: owners - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - owner_id + right: + sampled: true + schema: test + table: owners + right_columns: + - id type: select test.owners: statement: diff --git a/tests/data/poly.yaml b/tests/data/poly.yaml new file mode 100644 index 0000000..9284570 --- /dev/null +++ b/tests/data/poly.yaml @@ -0,0 +1,149 @@ +dataset: poly + +plan_config: + targets: + test.musicians: + in: + sample: [1] + polymorphic_fks: + - table: test.musicians + columns: [instrument] + discriminator_column: instrument_type + destinations: + piano: + table: test.pianos + columns: [id] + trumpet: + table: test.trumpets + columns: [id] + - table: test.owners + columns: [instrument] + discriminator_column: instrument_type + destinations: + piano: + table: test.pianos + columns: [id] + trumpet: + table: test.trumpets + columns: [id] + select: + - test.* + +sample_config: {} + +expected_plan: + passthrough: [] + queries: + test.musicians: + statement: + from: + schema: test + table: musicians + type: select + where: + columns: + - sample + type: in + values: + - - 1 + test.owners: + statement: + from: + schema: test + table: owners + joins: + - - half_unique: true + left_columns: + - instrument + left_discriminator: + - instrument_type + - piano + right: + sampled: true + schema: test + table: pianos + right_columns: + - id + right_discriminator: [] + - half_unique: true + left_columns: + - instrument + left_discriminator: + - instrument_type + - trumpet + right: + sampled: true + schema: test + table: trumpets + right_columns: + - id + right_discriminator: [] + type: select + test.pianos: + statement: + from: + schema: test + table: pianos + joins: + - - half_unique: true + left_columns: + - id + left_discriminator: [] + right: + sampled: true + schema: test + table: musicians + right_columns: + - instrument + right_discriminator: + - instrument_type + - piano + type: select + test.trumpets: + statement: + from: + schema: test + table: trumpets + joins: + - - half_unique: true + left_columns: + - id + left_discriminator: [] + right: + sampled: true + schema: test + table: musicians + right_columns: + - instrument + right_discriminator: + - instrument_type + - trumpet + type: select + +expected_sample: + test_out.musicians: + - id: 2 + name: peter + instrument_type: piano + instrument: 2 + sample: 1 + - id: 6 + name: jackson + instrument_type: trumpet + instrument: 3 + sample: 1 + test_out.pianos: + - id: 2 + name: silver + test_out.trumpets: + - id: 3 + name: tres + test_out.owners: + - id: 5 + name: rob + instrument_type: trumpet + instrument: 3 + - id: 6 + name: robert + instrument_type: piano + instrument: 2 diff --git a/tests/data/user_orders.yaml b/tests/data/user_orders.yaml index 8bad5a8..e228d07 100644 --- a/tests/data/user_orders.yaml +++ b/tests/data/user_orders.yaml @@ -18,16 +18,15 @@ expected_plan: schema: test table: order_status joins: - - half_unique: true - left_columns: - - order_id - right: - sampled: true - schema: test - table: orders - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - order_id + right: + sampled: true + schema: test + table: orders + right_columns: + - id type: select test.orders: statement: @@ -35,16 +34,15 @@ expected_plan: schema: test table: orders joins: - - half_unique: true - left_columns: - - user_id - right: - sampled: true - schema: test - table: users - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - user_id + right: + sampled: true + schema: test + table: users + right_columns: + - id type: select test.users: statement: diff --git a/tests/data/user_orders_compact.yaml b/tests/data/user_orders_compact.yaml index 6405e2f..7d6e424 100644 --- a/tests/data/user_orders_compact.yaml +++ b/tests/data/user_orders_compact.yaml @@ -20,16 +20,15 @@ expected_plan: schema: test table: order_status joins: - - half_unique: true - left_columns: - - order_id - right: - sampled: true - schema: test - table: orders - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - order_id + right: + sampled: true + schema: test + table: orders + right_columns: + - id type: select test.orders: statement: @@ -37,16 +36,15 @@ expected_plan: schema: test table: orders joins: - - half_unique: true - left_columns: - - user_id - right: - sampled: true - schema: test - table: users - right_columns: - - id - joins_outer: false + - - half_unique: true + left_columns: + - user_id + right: + sampled: true + schema: test + table: users + right_columns: + - id type: select test.users: statement: diff --git a/tests/test_live.py b/tests/test_live.py index 1de3ec2..428b254 100644 --- a/tests/test_live.py +++ b/tests/test_live.py @@ -113,3 +113,8 @@ def test_instruments(db_config): @pytest.mark.parametrize("db_config", DATABASE_CONFIGURATIONS, indirect=True) def test_big_join(db_config): do_dataset_test(db_config, "big_join") + + +@pytest.mark.parametrize("db_config", DATABASE_CONFIGURATIONS, indirect=True) +def test_poly_join(db_config): + do_dataset_test(db_config, "poly")