From 5d48fcd5b1f5a4c194c9cf93aa5ab56704c8cc36 Mon Sep 17 00:00:00 2001 From: kould Date: Mon, 4 May 2026 19:43:46 +0800 Subject: [PATCH] feat: support intersect set operations --- docs/features.md | 1 + src/binder/select.rs | 34 ++--- src/execution/dql/mod.rs | 2 +- .../dql/{except.rs => set_membership.rs} | 59 +++++---- src/execution/mod.rs | 23 ++-- src/optimizer/rule/implementation/mod.rs | 2 +- .../rule/normalization/column_pruning.rs | 8 +- .../normalization/compilation_in_advance.rs | 4 +- src/optimizer/rule/normalization/mod.rs | 2 +- src/orm/README.md | 5 +- src/orm/mod.rs | 53 ++++++++ src/planner/mod.rs | 4 +- src/planner/operator/mod.rs | 13 +- .../operator/{except.rs => set_membership.rs} | 28 ++++- tests/macros-test/src/main.rs | 9 ++ tests/slt/set_operation.slt | 119 ++++++++++++++++++ tests/slt/union.slt | 85 ------------- 17 files changed, 295 insertions(+), 156 deletions(-) rename src/execution/dql/{except.rs => set_membership.rs} (63%) rename src/planner/operator/{except.rs => set_membership.rs} (73%) create mode 100644 tests/slt/set_operation.slt delete mode 100644 tests/slt/union.slt diff --git a/docs/features.md b/docs/features.md index 86c8df79..e366ff00 100644 --- a/docs/features.md +++ b/docs/features.md @@ -190,6 +190,7 @@ If `unsafe_txdb_checkpoint` is not enabled, `build_rocksdb()` returns an explici - [x] Describe - [x] Union - [x] EXCEPT +- [x] INTERSECT ### DML - [x] Insert diff --git a/src/binder/select.rs b/src/binder/select.rs index bd9d2178..2ef20df6 100644 --- a/src/binder/select.rs +++ b/src/binder/select.rs @@ -41,10 +41,10 @@ use crate::execution::dql::join::joins_nullable; use crate::expression::simplify::ConstantCalculator; use crate::expression::visitor_mut::{walk_mut_expr, PositionShift, VisitorMut}; use crate::expression::{AliasType, BinaryOperator}; -use crate::planner::operator::except::ExceptOperator; use crate::planner::operator::function_scan::FunctionScanOperator; use crate::planner::operator::insert::InsertOperator; use crate::planner::operator::join::JoinCondition; +use crate::planner::operator::set_membership::{SetMembershipKind, SetMembershipOperator}; use crate::planner::operator::sort::{SortField, SortOperator}; use crate::planner::operator::union::UnionOperator; use crate::planner::{Childrens, LogicalPlan, SchemaOutput}; @@ -756,15 +756,14 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' )?) } } - SetOperator::Except => { - if is_all { - Ok(ExceptOperator::build( - left_schema.clone(), - right_schema.clone(), - left_plan, - right_plan, - )) - } else { + SetOperator::Except | SetOperator::Intersect => { + let kind = match op { + SetOperator::Except => SetMembershipKind::Except, + SetOperator::Intersect => SetMembershipKind::Intersect, + _ => unreachable!(), + }; + + if !is_all { let left_distinct_exprs = left_schema .iter() .cloned() @@ -782,14 +781,15 @@ impl<'a: 'b, 'b, T: Transaction, A: AsRef<[(&'static str, DataValue)]>> Binder<' right_plan = self.bind_distinct(right_plan, right_distinct_exprs)?; left_schema = left_plan.output_schema(); right_schema = right_plan.output_schema(); - - Ok(ExceptOperator::build( - left_schema.clone(), - right_schema.clone(), - left_plan, - right_plan, - )) } + + Ok(SetMembershipOperator::build( + kind, + left_schema.clone(), + right_schema.clone(), + left_plan, + right_plan, + )) } set_operator => Err(DatabaseError::UnsupportedStmt(format!( "set operator: {set_operator:?}" diff --git a/src/execution/dql/mod.rs b/src/execution/dql/mod.rs index 3e7ea856..e271df5e 100644 --- a/src/execution/dql/mod.rs +++ b/src/execution/dql/mod.rs @@ -15,7 +15,6 @@ pub(crate) mod aggregate; pub(crate) mod describe; pub(crate) mod dummy; -pub(crate) mod except; pub(crate) mod explain; pub(crate) mod filter; pub(crate) mod function_scan; @@ -27,6 +26,7 @@ pub(crate) mod projection; pub(crate) mod scalar_apply; pub(crate) mod scalar_subquery; pub(crate) mod seq_scan; +pub(crate) mod set_membership; pub(crate) mod show_table; pub(crate) mod show_view; pub(crate) mod sort; diff --git a/src/execution/dql/except.rs b/src/execution/dql/set_membership.rs similarity index 63% rename from src/execution/dql/except.rs rename to src/execution/dql/set_membership.rs index c2dd2e3f..19cdc170 100644 --- a/src/execution/dql/except.rs +++ b/src/execution/dql/set_membership.rs @@ -16,33 +16,39 @@ use crate::errors::DatabaseError; use crate::execution::{ build_read, ExecArena, ExecId, ExecNode, ExecutionCaches, ExecutorNode, ReadExecutor, }; +use crate::planner::operator::set_membership::SetMembershipKind; use crate::planner::LogicalPlan; use crate::storage::Transaction; use crate::types::tuple::Tuple; use ahash::{HashMap, HashMapExt}; -pub struct Except { + +pub struct SetMembership { + kind: SetMembershipKind, left_plan: LogicalPlan, right_plan: LogicalPlan, left_input: ExecId, right_input: ExecId, - except_col: HashMap, + right_counts: HashMap, built: bool, } -impl From<(LogicalPlan, LogicalPlan)> for Except { - fn from((left_input, right_input): (LogicalPlan, LogicalPlan)) -> Self { - Except { +impl From<(SetMembershipKind, LogicalPlan, LogicalPlan)> for SetMembership { + fn from( + (kind, left_input, right_input): (SetMembershipKind, LogicalPlan, LogicalPlan), + ) -> Self { + SetMembership { + kind, left_plan: left_input, right_plan: right_input, left_input: 0, right_input: 0, - except_col: HashMap::new(), + right_counts: HashMap::new(), built: false, } } } -impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Except { +impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for SetMembership { fn into_executor( mut self, arena: &mut ExecArena<'a, T>, @@ -51,12 +57,12 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for Except { ) -> ExecId { self.left_input = build_read(arena, self.left_plan.take(), cache, transaction); self.right_input = build_read(arena, self.right_plan.take(), cache, transaction); - arena.push(ExecNode::Except(self)) + arena.push(ExecNode::SetMembership(self)) } } -impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Except { - type Input = (LogicalPlan, LogicalPlan); +impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for SetMembership { + type Input = (SetMembershipKind, LogicalPlan, LogicalPlan); fn into_executor( input: Self::Input, @@ -68,11 +74,11 @@ impl<'a, T: Transaction + 'a> ExecutorNode<'a, T> for Except { } fn next_tuple(&mut self, arena: &mut ExecArena<'a, T>) -> Result<(), DatabaseError> { - Except::next_tuple(self, arena) + SetMembership::next_tuple(self, arena) } } -impl Except { +impl SetMembership { pub(crate) fn next_tuple<'a, T: Transaction + 'a>( &mut self, arena: &mut ExecArena<'a, T>, @@ -80,7 +86,7 @@ impl Except { if !self.built { while arena.next_tuple(self.right_input)? { *self - .except_col + .right_counts .entry(arena.result_tuple().clone()) .or_insert(0) += 1; } @@ -92,17 +98,28 @@ impl Except { arena.finish(); return Ok(()); } - let tuple = arena.result_tuple(); - if let Some(count) = self.except_col.get_mut(tuple) { - if *count > 0 { - *count -= 1; - continue; - } + let matched = self.consume_right_match(arena.result_tuple()); + let should_emit = match self.kind { + SetMembershipKind::Except => !matched, + SetMembershipKind::Intersect => matched, + }; + + if should_emit { + arena.resume(); + return Ok(()); } + } + } - arena.resume(); - return Ok(()); + fn consume_right_match(&mut self, tuple: &Tuple) -> bool { + if let Some(count) = self.right_counts.get_mut(tuple) { + if *count > 0 { + *count -= 1; + return true; + } } + + false } } diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 385f823b..8df214d6 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -41,7 +41,6 @@ use crate::execution::dql::aggregate::simple_agg::SimpleAggExecutor; use crate::execution::dql::aggregate::stream_distinct::StreamDistinctExecutor; use crate::execution::dql::describe::Describe; use crate::execution::dql::dummy::Dummy; -use crate::execution::dql::except::Except; use crate::execution::dql::explain::Explain; use crate::execution::dql::filter::Filter; use crate::execution::dql::function_scan::FunctionScan; @@ -51,6 +50,7 @@ use crate::execution::dql::limit::Limit; use crate::execution::dql::projection::Projection; use crate::execution::dql::scalar_subquery::ScalarSubquery; use crate::execution::dql::seq_scan::SeqScan; +use crate::execution::dql::set_membership::SetMembership; use crate::execution::dql::show_table::ShowTables; use crate::execution::dql::show_view::ShowViews; use crate::execution::dql::sort::Sort; @@ -127,7 +127,6 @@ pub(crate) enum ExecNode<'a, T: Transaction + 'a> { DropTable(DropTable), DropView(DropView), Dummy(Dummy), - Except(Except), Explain(Explain), Filter(Filter), FunctionScan(FunctionScan), @@ -141,6 +140,7 @@ pub(crate) enum ExecNode<'a, T: Transaction + 'a> { Projection(Projection), ScalarApply(ScalarApply), ScalarSubquery(ScalarSubquery), + SetMembership(SetMembership), SeqScan(SeqScan<'a, T>), ShowTables(ShowTables), ShowViews(ShowViews), @@ -206,7 +206,6 @@ impl<'a, T: Transaction + 'a> ExecNode<'a, T> { } ExecNode::DropView(exec) => >::next_tuple(exec, arena), ExecNode::Dummy(exec) => >::next_tuple(exec, arena), - ExecNode::Except(exec) => >::next_tuple(exec, arena), ExecNode::Explain(exec) => >::next_tuple(exec, arena), ExecNode::Filter(exec) => >::next_tuple(exec, arena), ExecNode::FunctionScan(exec) => { @@ -236,6 +235,9 @@ impl<'a, T: Transaction + 'a> ExecNode<'a, T> { ExecNode::ScalarSubquery(exec) => { >::next_tuple(exec, arena) } + ExecNode::SetMembership(exec) => { + >::next_tuple(exec, arena) + } ExecNode::SeqScan(exec) => { as ExecutorNode<'a, T>>::next_tuple(exec, arena) } @@ -783,12 +785,15 @@ pub(crate) fn build_read<'a, T: Transaction + 'a>( cache, transaction, ), - Operator::Except(_) => >::into_executor( - childrens.pop_twins(), - arena, - cache, - transaction, - ), + Operator::SetMembership(op) => { + let (left, right) = childrens.pop_twins(); + >::into_executor( + (op.kind, left, right), + arena, + cache, + transaction, + ) + } _ => unreachable!(), } } diff --git a/src/optimizer/rule/implementation/mod.rs b/src/optimizer/rule/implementation/mod.rs index d3bdb3ee..580f792e 100644 --- a/src/optimizer/rule/implementation/mod.rs +++ b/src/optimizer/rule/implementation/mod.rs @@ -120,7 +120,7 @@ impl ImplementationRuleRootTag { | Operator::ShowView | Operator::Explain | Operator::Describe(_) - | Operator::Except(_) + | Operator::SetMembership(_) | Operator::Union(_) | Operator::CreateIndex(_) | Operator::CreateView(_) diff --git a/src/optimizer/rule/normalization/column_pruning.rs b/src/optimizer/rule/normalization/column_pruning.rs index 186eee86..b627666d 100644 --- a/src/optimizer/rule/normalization/column_pruning.rs +++ b/src/optimizer/rule/normalization/column_pruning.rs @@ -139,7 +139,7 @@ impl ColumnPruning { .map(|column| column.summary()), ); } - Operator::Except(op) => { + Operator::SetMembership(op) => { referenced_columns.extend( op.left_schema_ref .iter() @@ -281,7 +281,7 @@ impl ColumnPruning { | Operator::ShowView | Operator::Describe(_) | Operator::Union(_) - | Operator::Except(_) + | Operator::SetMembership(_) | Operator::AddColumn(_) | Operator::ChangeColumn(_) | Operator::DropColumn(_) @@ -503,7 +503,7 @@ impl ColumnPruning { | Operator::Join(_) | Operator::Filter(_) | Operator::Union(_) - | Operator::Except(_) + | Operator::SetMembership(_) | Operator::TopK(_) => { if matches!( operator, @@ -582,7 +582,7 @@ impl ColumnPruning { } changed = true; } - } else if matches!(operator, Operator::Union(_) | Operator::Except(_)) { + } else if matches!(operator, Operator::Union(_) | Operator::SetMembership(_)) { let mut child_required = required_columns; Self::extend_operator_referenced_columns(operator, &mut child_required); changed |= Self::apply_twins(child_required, all_referenced, childrens, arena)?; diff --git a/src/optimizer/rule/normalization/compilation_in_advance.rs b/src/optimizer/rule/normalization/compilation_in_advance.rs index cb6b0aa4..7b0dee7f 100644 --- a/src/optimizer/rule/normalization/compilation_in_advance.rs +++ b/src/optimizer/rule/normalization/compilation_in_advance.rs @@ -107,7 +107,7 @@ pub(crate) fn evaluator_bind_current(plan: &mut LogicalPlan) -> Result<(), Datab | Operator::CopyFromFile(_) | Operator::CopyToFile(_) | Operator::Union(_) - | Operator::Except(_) => (), + | Operator::SetMembership(_) => (), } Ok(()) @@ -125,7 +125,7 @@ impl EvaluatorBind { | Operator::MarkApply(_) | Operator::Join(_) | Operator::Union(_) - | Operator::Except(_) + | Operator::SetMembership(_) ) { Self::_apply(right)?; } diff --git a/src/optimizer/rule/normalization/mod.rs b/src/optimizer/rule/normalization/mod.rs index 4cabfde4..4b26db5c 100644 --- a/src/optimizer/rule/normalization/mod.rs +++ b/src/optimizer/rule/normalization/mod.rs @@ -135,7 +135,7 @@ impl NormalizationRuleRootTag { | Operator::FunctionScan(_) | Operator::Update(_) | Operator::Union(_) - | Operator::Except(_) => None, + | Operator::SetMembership(_) => None, } } } diff --git a/src/orm/README.md b/src/orm/README.md index 53c398ed..dc44a011 100644 --- a/src/orm/README.md +++ b/src/orm/README.md @@ -101,7 +101,7 @@ The usual flow is: - keep full-model output, or switch into `project::

()`, `project_value(...)`, or `project_tuple(...)` - once the output shape is fixed, compose set queries with `union(...)`, - `except(...)`, and optional `.all()` + `except(...)`, `intersect(...)`, and optional `.all()` If you need an explicit relation alias, call `.alias("name")` on a source or pending join, and re-qualify fields with `Field::qualify("name")` where @@ -143,9 +143,10 @@ Set operations are available after the output shape is fixed: - model rows: `from::().union(...)` - single values: `project_value(...).union(...)` - tuples: `project_tuple(...).except(...)` +- intersections: `project_value(...).intersect(...)` - struct projections: `project::

().union(...)` -Call `.all()` after `union(...)` or `except(...)` when you want multiset +Call `.all()` after `union(...)`, `except(...)`, or `intersect(...)` when you want multiset semantics instead of the default distinct result. After a set query is formed, you can still apply result-level methods such as diff --git a/src/orm/mod.rs b/src/orm/mod.rs index 52df9d11..8e903d42 100644 --- a/src/orm/mod.rs +++ b/src/orm/mod.rs @@ -1739,6 +1739,22 @@ pub trait QueryOperand: private::Sealed + Sized { ), ) } + + fn intersect(self, rhs: R) -> SetQueryBuilder + where + R: QueryOperand, + { + let (source, left_query) = self.into_query_parts(); + SetQueryBuilder::new( + source, + set_operation_query( + left_query, + rhs.into_query(), + SetOperator::Intersect, + SetQuantifier::Distinct, + ), + ) + } } impl IntoJoinColumns for Field { @@ -1969,6 +1985,24 @@ impl FromBuilder { QueryOperand::except(self, rhs) } + /// Builds an `INTERSECT` set query with another query of the same shape. + /// + /// ```rust,ignore + /// let ordered_user_ids = database + /// .from::() + /// .project_value(User::id()) + /// .intersect(database.from::().project_value(Order::user_id())) + /// .fetch::()?; + /// # Ok::<(), kite_sql::errors::DatabaseError>(()) + /// ``` + pub fn intersect(self, rhs: R) -> SetQueryBuilder + where + Self: QueryOperand, + R: QueryOperand::Shape>, + { + QueryOperand::intersect(self, rhs) + } + /// Inserts the current query result into a target model table. /// /// Use this when the query output is a partial projection and you want to @@ -2252,6 +2286,25 @@ impl SetQueryBuilder { QueryOperand::except(self, rhs) } + /// Appends `INTERSECT` to the current set query. + /// + /// ```rust,ignore + /// let ids = database + /// .from::() + /// .project_value(User::id()) + /// .union(database.from::().project_value(Order::user_id())) + /// .intersect(database.from::().project_value(Wallet::id())) + /// .fetch::()?; + /// # Ok::<(), kite_sql::errors::DatabaseError>(()) + /// ``` + pub fn intersect(self, rhs: R) -> Self + where + Self: QueryOperand, + R: QueryOperand::Shape>, + { + QueryOperand::intersect(self, rhs) + } + /// Inserts the current query result into a target model table. /// /// Use this when the query output is a partial projection and you want to diff --git a/src/planner/mod.rs b/src/planner/mod.rs index 4c18ccf3..3a7d5310 100644 --- a/src/planner/mod.rs +++ b/src/planner/mod.rs @@ -15,7 +15,7 @@ pub mod operator; use crate::catalog::{ColumnCatalog, ColumnRef, TableName}; -use crate::planner::operator::except::ExceptOperator; +use crate::planner::operator::set_membership::SetMembershipOperator; use crate::planner::operator::union::UnionOperator; use crate::planner::operator::values::ValuesOperator; use crate::planner::operator::{Operator, PhysicalOption}; @@ -215,7 +215,7 @@ impl LogicalPlan { left_schema_ref: schema_ref, .. }) - | Operator::Except(ExceptOperator { + | Operator::SetMembership(SetMembershipOperator { left_schema_ref: schema_ref, .. }) => SchemaOutput::SchemaRef(schema_ref.clone()), diff --git a/src/planner/operator/mod.rs b/src/planner/operator/mod.rs index 7b45b6c3..5ea733d3 100644 --- a/src/planner/operator/mod.rs +++ b/src/planner/operator/mod.rs @@ -25,7 +25,6 @@ pub mod describe; pub mod drop_index; pub mod drop_table; pub mod drop_view; -pub mod except; pub mod filter; pub mod function_scan; pub mod insert; @@ -35,6 +34,7 @@ pub mod mark_apply; pub mod project; pub mod scalar_apply; pub mod scalar_subquery; +pub mod set_membership; pub mod sort; pub mod table_scan; pub mod top_k; @@ -64,10 +64,10 @@ use crate::planner::operator::describe::DescribeOperator; use crate::planner::operator::drop_index::DropIndexOperator; use crate::planner::operator::drop_table::DropTableOperator; use crate::planner::operator::drop_view::DropViewOperator; -use crate::planner::operator::except::ExceptOperator; use crate::planner::operator::function_scan::FunctionScanOperator; use crate::planner::operator::insert::InsertOperator; use crate::planner::operator::join::JoinCondition; +use crate::planner::operator::set_membership::SetMembershipOperator; use crate::planner::operator::sort::SortField; use crate::planner::operator::top_k::TopKOperator; use crate::planner::operator::truncate::TruncateOperator; @@ -100,7 +100,7 @@ pub enum Operator { ShowView, Explain, Describe(DescribeOperator), - Except(ExceptOperator), + SetMembership(SetMembershipOperator), Union(UnionOperator), // DML Insert(InsertOperator), @@ -219,7 +219,7 @@ impl Operator { left_schema_ref: schema_ref, .. }) - | Operator::Except(ExceptOperator { + | Operator::SetMembership(SetMembershipOperator { left_schema_ref: schema_ref, .. }) => { @@ -329,9 +329,10 @@ impl Operator { left_schema_ref, _right_schema_ref, }) - | Operator::Except(ExceptOperator { + | Operator::SetMembership(SetMembershipOperator { left_schema_ref, _right_schema_ref, + .. }) => left_schema_ref .iter() .chain(_right_schema_ref.iter()) @@ -426,7 +427,7 @@ impl fmt::Display for Operator { Operator::CopyFromFile(op) => write!(f, "{op}"), Operator::CopyToFile(op) => write!(f, "{op}"), Operator::Union(op) => write!(f, "{op}"), - Operator::Except(op) => write!(f, "{op}"), + Operator::SetMembership(op) => write!(f, "{op}"), } } } diff --git a/src/planner/operator/except.rs b/src/planner/operator/set_membership.rs similarity index 73% rename from src/planner/operator/except.rs rename to src/planner/operator/set_membership.rs index 8e5570ca..5f34549d 100644 --- a/src/planner/operator/except.rs +++ b/src/planner/operator/set_membership.rs @@ -20,22 +20,40 @@ use kite_sql_serde_macros::ReferenceSerialization; use std::fmt; use std::fmt::Formatter; +#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash, ReferenceSerialization)] +pub enum SetMembershipKind { + Except, + Intersect, +} + +impl SetMembershipKind { + fn name(self) -> &'static str { + match self { + Self::Except => "Except", + Self::Intersect => "Intersect", + } + } +} + #[derive(Debug, PartialEq, Eq, Clone, Hash, ReferenceSerialization)] -pub struct ExceptOperator { +pub struct SetMembershipOperator { + pub kind: SetMembershipKind, pub left_schema_ref: SchemaRef, // mainly use `left_schema` as output and `right_schema` for `column pruning` pub _right_schema_ref: SchemaRef, } -impl ExceptOperator { +impl SetMembershipOperator { pub fn build( + kind: SetMembershipKind, left_schema_ref: SchemaRef, right_schema_ref: SchemaRef, left_plan: LogicalPlan, right_plan: LogicalPlan, ) -> LogicalPlan { LogicalPlan::new( - Operator::Except(ExceptOperator { + Operator::SetMembership(SetMembershipOperator { + kind, left_schema_ref, _right_schema_ref: right_schema_ref, }), @@ -47,7 +65,7 @@ impl ExceptOperator { } } -impl fmt::Display for ExceptOperator { +impl fmt::Display for SetMembershipOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let schema = self .left_schema_ref @@ -55,7 +73,7 @@ impl fmt::Display for ExceptOperator { .map(|column| column.name().to_string()) .join(", "); - write!(f, "Except: [{schema}]")?; + write!(f, "{}: [{schema}]", self.kind.name())?; Ok(()) } diff --git a/tests/macros-test/src/main.rs b/tests/macros-test/src/main.rs index d0fb32a1..a0eed830 100644 --- a/tests/macros-test/src/main.rs +++ b/tests/macros-test/src/main.rs @@ -946,6 +946,15 @@ mod test { .collect::, _>>()?; assert_eq!(ordered_union_ids, vec![1, 1, 2]); + let ordered_customer_ids = database + .from::() + .project_value(User::id()) + .intersect(database.from::().project_value(Order::user_id())) + .asc(User::id()) + .fetch::()? + .collect::, _>>()?; + assert_eq!(ordered_customer_ids, vec![1, 2]); + let users_without_orders = database .from::() .in_subquery( diff --git a/tests/slt/set_operation.slt b/tests/slt/set_operation.slt new file mode 100644 index 00000000..84d0a0f2 --- /dev/null +++ b/tests/slt/set_operation.slt @@ -0,0 +1,119 @@ +statement ok +create table set_left(id int primary key, v int) + +statement ok +create table set_right(id int primary key, v int) + +statement ok +insert into set_left values (1,1), (2,1), (3,2), (4,3), (5,NULL), (6,NULL), (7,5) + +statement ok +insert into set_right values (1,1), (2,1), (3,1), (4,3), (5,4), (6,NULL) + +query I rowsort +select v from set_left union select v from set_right +---- +1 +2 +3 +4 +5 +null + +query I rowsort +select v from set_left union all select v from set_right +---- +1 +1 +1 +1 +1 +2 +3 +3 +4 +5 +null +null +null + +# Edge case: tuple values should be compared as whole set values across set +# operators, rather than flattening their fields or comparing only one slot. +query T rowsort +select (v, id) from set_left where id = 1 or id = 2 +union +select (v, id) from set_left where id = 2 or id = 1 +---- +(1, 1) +(1, 2) + +query T rowsort +select (v, id) from set_left where id = 1 or id = 2 or id = 3 +except +select (v, id) from set_left where id = 1 or id = 3 +---- +(1, 2) + +query T rowsort +select (v, id) from set_left where id = 1 or id = 2 or id = 3 +intersect +select (v, id) from set_left where id = 2 or id = 3 or id = 4 +---- +(1, 2) +(2, 3) + +query I rowsort +select v from set_left except select v from set_right +---- +2 +5 + +# Edge case: EXCEPT ALL must subtract right-side counts instead of removing +# every matching value; one NULL remains because the left has two and the right has one. +query I rowsort +select v from set_left except all select v from set_right +---- +2 +5 +null + +query I rowsort +select v from set_left intersect select v from set_right +---- +1 +3 +null + +# Edge case: INTERSECT ALL emits min(left_count, right_count), so value 1 +# appears twice even though the right side has three copies. +query I rowsort +select v from set_left intersect all select v from set_right +---- +1 +1 +3 +null + +# Edge case: set membership treats NULL as a set value for duplicate +# elimination/matching, unlike ordinary NULL = NULL predicate evaluation. +query I rowsort +select v from set_left where v is null intersect select v from set_right where v is null +---- +null + +query I rowsort +select v from set_left where v is null except select v from set_right where v is null +---- + +query I +select v from set_left intersect select v from set_right order by v desc +---- +3 +1 +null + +statement ok +drop table set_right + +statement ok +drop table set_left diff --git a/tests/slt/union.slt b/tests/slt/union.slt deleted file mode 100644 index a767c831..00000000 --- a/tests/slt/union.slt +++ /dev/null @@ -1,85 +0,0 @@ -query I rowsort -select 1 union select 2 ----- -1 -2 - -query I rowsort -select 1 union select 2 + 1 ----- -1 -3 - -query I rowsort -select 1 union select 1 ----- -1 - -query I rowsort -select 1 union all select 1 ----- -1 -1 - -query T rowsort -select (1, 2) union select (2, 1) union select (1, 2) ----- -(1, 2) -(2, 1) - -statement ok -create table t1(id int primary key, v1 int unique) - -statement ok -insert into t1 values (1,1), (2,2), (3,3), (4,4) - -query I rowsort -select id from t1 union select v1 from t1 ----- -1 -2 -3 -4 - -query I rowsort -select id from t1 union all select v1 from t1 ----- -1 -1 -2 -2 -3 -3 -4 -4 - -query I -select id from t1 union all select v1 from t1 order by id desc limit 3 offset 1 ----- -4 -3 -3 - -statement ok -create table t2(id int primary key, v1 int unique) - -statement ok -insert into t2 values (2,20), (4,40) - -query I rowsort -select id from t1 except select id from t2 ----- -1 -3 - -query I -select id from t1 except select id from t2 order by id desc ----- -3 -1 - -statement ok -drop table t2 - -statement ok -drop table t1