From cac8914ec3fbf9c589fbc154abffc1dd020eff45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 19:41:35 +0100 Subject: [PATCH 01/16] Upgrade hashbrown to 0.16 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 10fc88b7057c..0a8838ddff3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -156,7 +156,7 @@ flate2 = "1.1.5" futures = "0.3" glob = "0.3.0" half = { version = "2.7.0", default-features = false } -hashbrown = { version = "0.14.5", features = ["raw"] } +hashbrown = { version = "0.16", features = ["raw"] } hex = { version = "0.4.3" } indexmap = "2.12.1" insta = { version = "1.45.0", features = ["glob", "filters"] } From c2ec4ed0b167f2e937036976cda0684c4f62fac8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 19:42:40 +0100 Subject: [PATCH 02/16] Upgrade hashbrown to 0.16 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0a8838ddff3a..8bd9f0b15c67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -156,7 +156,7 @@ flate2 = "1.1.5" futures = "0.3" glob = "0.3.0" half = { version = "2.7.0", default-features = false } -hashbrown = { version = "0.16", features = ["raw"] } +hashbrown = { version = "0.16.1", features = ["raw"] } hex = { version = "0.4.3" } indexmap = "2.12.1" insta = { version = "1.45.0", features = ["glob", "filters"] } From b35306ccdd1ba9d1ef0759a312359f94de6cac18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 19:43:15 +0100 Subject: [PATCH 03/16] Upgrade hashbrown to 0.16 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 8bd9f0b15c67..d801bb6114b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -156,7 +156,7 @@ flate2 = "1.1.5" futures = "0.3" glob = "0.3.0" half = { version = "2.7.0", default-features = false } -hashbrown = { version = "0.16.1", features = ["raw"] } +hashbrown = { version = "0.16.1" } hex = { version = "0.4.3" } indexmap = "2.12.1" insta = { version = "1.45.0", features = ["glob", "filters"] } From 97fe22a492d3af21b675185bc5c3215987f253d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 19:44:36 +0100 Subject: [PATCH 04/16] Upgrade hashbrown to 0.16 --- Cargo.lock | 25 ++++++++++++++++--------- datafusion/common/src/lib.rs | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 077efb0fae34..4c58e82d8592 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2013,7 +2013,7 @@ dependencies = [ "chrono", "criterion", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "hex", "indexmap 2.12.1", "insta", @@ -2495,7 +2495,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "insta", "itertools 0.14.0", @@ -2530,7 +2530,7 @@ dependencies = [ "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "itertools 0.14.0", "parking_lot", ] @@ -2578,7 +2578,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "insta", "itertools 0.14.0", @@ -3120,6 +3120,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3371,10 +3377,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash 0.8.12", - "allocator-api2", -] [[package]] name = "hashbrown" @@ -3384,7 +3386,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -3392,6 +3394,11 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heck" diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 3bec9bd35cbd..2dbc08688652 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -85,7 +85,7 @@ pub use functional_dependencies::{ aggregate_functional_dependencies, get_required_group_by_exprs_indices, get_target_functional_dependencies, }; -use hashbrown::hash_map::DefaultHashBuilder; +use hashbrown::DefaultHashBuilder; pub use join_type::{JoinConstraint, JoinSide, JoinType}; pub use nested_struct::cast_column; pub use null_equality::NullEquality; From 1b6e84e5e1e1dff0a95ccdbe856d0d5d45c14bcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 19:46:35 +0100 Subject: [PATCH 05/16] Upgrade hashbrown to 0.16 --- datafusion/physical-expr/src/utils/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index cd476ee3b31a..2cdc326f5dd3 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -229,7 +229,7 @@ pub fn collect_columns(expr: &Arc) -> HashSet { let mut columns = HashSet::::new(); expr.apply(|expr| { if let Some(column) = expr.as_any().downcast_ref::() { - columns.get_or_insert_owned(column); + columns.get_or_insert_with(column, |c| c.clone()); } Ok(TreeNodeRecursion::Continue) }) From a0d74ec8884e3f06a1375404ee0be2d1b1707387 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 20:06:31 +0100 Subject: [PATCH 06/16] Fix nondeterministic behavior --- datafusion/physical-expr/src/expressions/case.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 582d6a141a5c..e42d16bd296c 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -145,10 +145,13 @@ impl CaseBody { } // Construct a mapping from the original column index to the projected column index. - let column_index_map = used_column_indices - .iter() + let mut sorted_used_column_indices = + used_column_indices.into_iter().collect::>(); + sorted_used_column_indices.sort_unstable(); + let column_index_map = sorted_used_column_indices + .into_iter() .enumerate() - .map(|(projected, original)| (*original, projected)) + .map(|(projected, original)| (original, projected)) .collect::>(); // Construct the projected body by rewriting each expression from the original body From 7d3af7af5103962bad7ddf8a8189d7d3cb0c2900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 20:19:42 +0100 Subject: [PATCH 07/16] Update test --- datafusion/functions-aggregate/src/array_agg.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 849106212495..6c61a86f6e84 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -1111,7 +1111,7 @@ mod tests { ])])?; // without compaction, the size is 17112 - assert_eq!(acc.size(), 2184); + assert_eq!(acc.size(), 2192); Ok(()) } From e6fea007682096ac5fbc39cc2cd063daab19a8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 20:56:45 +0100 Subject: [PATCH 08/16] Fix 2 --- .../physical-expr-common/src/sort_expr.rs | 11 +++++++- .../physical-expr/src/equivalence/class.rs | 25 +++++++++++++------ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index db30dd6ed26e..cca7920827e5 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -353,7 +353,7 @@ impl From for PhysicalSortExpr { /// 1. It is non-degenerate, meaning it contains at least one element. /// 2. It is duplicate-free, meaning it does not contain multiple entries for /// the same column. -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct LexOrdering { /// Vector of sort expressions representing the lexicographical ordering. exprs: Vec, @@ -363,6 +363,15 @@ pub struct LexOrdering { set: HashSet>, } +impl fmt::Debug for LexOrdering { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("LexOrdering") + .field("exprs", &self.exprs) + .field("set", &self.exprs.iter().map(|e| &e.expr).collect::>()) + .finish() + } +} + impl LexOrdering { /// Creates a new [`LexOrdering`] from the given vector of sort expressions. /// If the vector is empty, returns `None`. diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 91d339910b58..35da3fdf00d5 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::fmt::Display; +use std::fmt::{self, Debug, Display, Formatter}; use std::ops::Deref; use std::sync::Arc; use std::vec::IntoIter; @@ -46,7 +46,7 @@ pub enum AcrossPartitions { } impl Display for AcrossPartitions { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { AcrossPartitions::Heterogeneous => write!(f, "(heterogeneous)"), AcrossPartitions::Uniform(value) => { @@ -118,7 +118,7 @@ impl ConstExpr { pub fn format_list(input: &[ConstExpr]) -> impl Display + '_ { struct DisplayableList<'a>(&'a [ConstExpr]); impl Display for DisplayableList<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { let mut first = true; for const_expr in self.0 { if first { @@ -142,7 +142,7 @@ impl PartialEq for ConstExpr { } impl Display for ConstExpr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "{}", self.expr)?; write!(f, "{}", self.across_partitions) } @@ -277,7 +277,7 @@ impl IntoIterator for EquivalenceClass { } impl Display for EquivalenceClass { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{{")?; write!(f, "members: {}", format_physical_expr_list(&self.exprs))?; if let Some(across) = &self.constant { @@ -300,7 +300,7 @@ type AugmentedMapping<'a> = IndexMap< /// A collection of distinct `EquivalenceClass`es. This object supports fast /// lookups of expressions and their equivalence classes. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Default)] pub struct EquivalenceGroup { /// A mapping from expressions to their equivalence class key. map: HashMap, usize>, @@ -308,6 +308,17 @@ pub struct EquivalenceGroup { classes: Vec, } +impl Debug for EquivalenceGroup { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let mut map_entries: Vec<_> = self.map.iter().collect(); + map_entries.sort_by_key(|(expr, _)| format!("{:?}", expr)); + f.debug_struct("EquivalenceGroup") + .field("map", &map_entries) + .field("classes", &self.classes) + .finish() + } +} + impl EquivalenceGroup { /// Creates an equivalence group from the given equivalence classes. pub fn new(classes: impl IntoIterator) -> Self { @@ -876,7 +887,7 @@ impl IntoIterator for EquivalenceGroup { } impl Display for EquivalenceGroup { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "[")?; let mut iter = self.iter(); if let Some(cls) = iter.next() { From 646da343774521307484fe274b66b8a920b41f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 20:57:12 +0100 Subject: [PATCH 09/16] Fix 2 --- datafusion/physical-expr-common/src/sort_expr.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index cca7920827e5..c04fe454da6f 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -367,7 +367,10 @@ impl fmt::Debug for LexOrdering { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("LexOrdering") .field("exprs", &self.exprs) - .field("set", &self.exprs.iter().map(|e| &e.expr).collect::>()) + .field( + "set", + &self.exprs.iter().map(|e| &e.expr).collect::>(), + ) .finish() } } From 52361bb99e444936889c4304ccefb3d2c0eda2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:17:21 +0100 Subject: [PATCH 10/16] Fix 3 --- .../physical-expr/src/equivalence/class.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 35da3fdf00d5..0c5c21e17508 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -311,9 +311,22 @@ pub struct EquivalenceGroup { impl Debug for EquivalenceGroup { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { let mut map_entries: Vec<_> = self.map.iter().collect(); - map_entries.sort_by_key(|(expr, _)| format!("{:?}", expr)); + map_entries.sort_by_key(|(expr, _)| format!("{expr:?}")); + let map: BTreeMap<_, _> = map_entries + .iter() + .map(|(k, v)| (format!("{k:?}"), *v)) + .collect(); + struct DeterministicMap<'a>(&'a [(&'a Arc, &'a usize)]); + impl Debug for DeterministicMap<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_map() + .entries(self.0.iter().map(|&(k, v)| (k, v))) + .finish() + } + } + f.debug_struct("EquivalenceGroup") - .field("map", &map_entries) + .field("map", &DeterministicMap(&map_entries)) .field("classes", &self.classes) .finish() } From b39ab4ce7ac293fc2d85ce0e780d54883bb729b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:19:19 +0100 Subject: [PATCH 11/16] Fix 3 --- .../physical-expr/src/equivalence/class.rs | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 0c5c21e17508..257f515a4f18 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -27,7 +27,7 @@ use crate::projection::ProjectionTargets; use crate::{PhysicalExpr, PhysicalExprRef, PhysicalSortExpr, PhysicalSortRequirement}; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{HashMap, JoinType, Result, ScalarValue}; +use datafusion_common::{JoinType, Result, ScalarValue}; use datafusion_physical_expr_common::physical_expr::format_physical_expr_list; use indexmap::{IndexMap, IndexSet}; @@ -303,30 +303,16 @@ type AugmentedMapping<'a> = IndexMap< #[derive(Clone, Default)] pub struct EquivalenceGroup { /// A mapping from expressions to their equivalence class key. - map: HashMap, usize>, + map: IndexMap, usize>, /// The equivalence classes in this group. classes: Vec, } impl Debug for EquivalenceGroup { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - let mut map_entries: Vec<_> = self.map.iter().collect(); - map_entries.sort_by_key(|(expr, _)| format!("{expr:?}")); - let map: BTreeMap<_, _> = map_entries - .iter() - .map(|(k, v)| (format!("{k:?}"), *v)) - .collect(); - struct DeterministicMap<'a>(&'a [(&'a Arc, &'a usize)]); - impl Debug for DeterministicMap<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.debug_map() - .entries(self.0.iter().map(|&(k, v)| (k, v))) - .finish() - } - } f.debug_struct("EquivalenceGroup") - .field("map", &DeterministicMap(&map_entries)) + .field("map", &self.map) .field("classes", &self.classes) .finish() } @@ -460,7 +446,7 @@ impl EquivalenceGroup { let cls = self.classes.swap_remove(idx); // Remove its entries from the lookup table: for expr in cls.iter() { - self.map.remove(expr); + self.map.swap_remove(expr); } // Update the lookup table for the moved class: if idx < self.classes.len() { @@ -472,7 +458,7 @@ impl EquivalenceGroup { /// Updates the entry in lookup table for the given equivalence class with /// the given index. fn update_lookup_table( - map: &mut HashMap, usize>, + map: &mut IndexMap, usize>, cls: &EquivalenceClass, idx: usize, ) { From 6267b1e59466907c4591e3b7d4446e876fff07a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:25:34 +0100 Subject: [PATCH 12/16] Fix 3 --- Cargo.lock | 1 + datafusion/physical-expr-common/Cargo.toml | 1 + .../physical-expr-common/src/sort_expr.rs | 22 +++++-------------- .../physical-expr/src/equivalence/class.rs | 10 --------- 4 files changed, 7 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c58e82d8592..1675f26e8a0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2531,6 +2531,7 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "hashbrown 0.16.1", + "indexmap 2.12.1", "itertools 0.14.0", "parking_lot", ] diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml index d292da212e6c..a81eafe19695 100644 --- a/datafusion/physical-expr-common/Cargo.toml +++ b/datafusion/physical-expr-common/Cargo.toml @@ -47,5 +47,6 @@ chrono = { workspace = true } datafusion-common = { workspace = true } datafusion-expr-common = { workspace = true } hashbrown = { workspace = true } +indexmap = { workspace = true } itertools = { workspace = true } parking_lot = { workspace = true } diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index c04fe454da6f..fa961981c048 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -31,7 +31,7 @@ use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; use datafusion_common::{HashSet, Result}; use datafusion_expr_common::columnar_value::ColumnarValue; - +use indexmap::IndexSet; /// Represents Sort operation for a column in a RecordBatch /// /// Example: @@ -353,26 +353,14 @@ impl From for PhysicalSortExpr { /// 1. It is non-degenerate, meaning it contains at least one element. /// 2. It is duplicate-free, meaning it does not contain multiple entries for /// the same column. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct LexOrdering { /// Vector of sort expressions representing the lexicographical ordering. exprs: Vec, /// Set of expressions in the lexicographical ordering, used to ensure /// that the ordering is duplicate-free. Note that the elements in this /// set are the same underlying physical expressions as in `exprs`. - set: HashSet>, -} - -impl fmt::Debug for LexOrdering { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.debug_struct("LexOrdering") - .field("exprs", &self.exprs) - .field( - "set", - &self.exprs.iter().map(|e| &e.expr).collect::>(), - ) - .finish() - } + set: IndexSet>, } impl LexOrdering { @@ -383,7 +371,7 @@ impl LexOrdering { let mut candidate = Self { // not valid yet; valid publicly-returned instance must be non-empty exprs: Vec::new(), - set: HashSet::new(), + set: IndexSet::new(), }; for expr in exprs { candidate.push(expr); @@ -433,7 +421,7 @@ impl LexOrdering { return false; } for PhysicalSortExpr { expr, .. } in self.exprs[len..].iter() { - self.set.remove(expr); + self.set.swap_remove(expr); } self.exprs.truncate(len); true diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 257f515a4f18..54757cc5b031 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -308,16 +308,6 @@ pub struct EquivalenceGroup { classes: Vec, } -impl Debug for EquivalenceGroup { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - - f.debug_struct("EquivalenceGroup") - .field("map", &self.map) - .field("classes", &self.classes) - .finish() - } -} - impl EquivalenceGroup { /// Creates an equivalence group from the given equivalence classes. pub fn new(classes: impl IntoIterator) -> Self { From 0c34cb64fddad6b6710cdf8660042139e6a7e3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:29:08 +0100 Subject: [PATCH 13/16] Simplify --- datafusion/physical-expr/src/equivalence/class.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 54757cc5b031..78478fc13ed4 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::fmt::{self, Debug, Display, Formatter}; +use std::fmt::Display; use std::ops::Deref; use std::sync::Arc; use std::vec::IntoIter; @@ -46,7 +46,7 @@ pub enum AcrossPartitions { } impl Display for AcrossPartitions { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { AcrossPartitions::Heterogeneous => write!(f, "(heterogeneous)"), AcrossPartitions::Uniform(value) => { @@ -118,7 +118,7 @@ impl ConstExpr { pub fn format_list(input: &[ConstExpr]) -> impl Display + '_ { struct DisplayableList<'a>(&'a [ConstExpr]); impl Display for DisplayableList<'_> { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let mut first = true; for const_expr in self.0 { if first { @@ -142,7 +142,7 @@ impl PartialEq for ConstExpr { } impl Display for ConstExpr { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.expr)?; write!(f, "{}", self.across_partitions) } @@ -277,7 +277,7 @@ impl IntoIterator for EquivalenceClass { } impl Display for EquivalenceClass { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{{")?; write!(f, "members: {}", format_physical_expr_list(&self.exprs))?; if let Some(across) = &self.constant { @@ -300,7 +300,7 @@ type AugmentedMapping<'a> = IndexMap< /// A collection of distinct `EquivalenceClass`es. This object supports fast /// lookups of expressions and their equivalence classes. -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default)] pub struct EquivalenceGroup { /// A mapping from expressions to their equivalence class key. map: IndexMap, usize>, @@ -876,7 +876,7 @@ impl IntoIterator for EquivalenceGroup { } impl Display for EquivalenceGroup { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "[")?; let mut iter = self.iter(); if let Some(cls) = iter.next() { From 417f7d02257d94d0ef25742ac5ba47e45a6dd38a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:30:42 +0100 Subject: [PATCH 14/16] Simplify --- datafusion/physical-expr/src/expressions/case.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index e42d16bd296c..5616b7f9e8a8 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -29,10 +29,11 @@ use arrow::datatypes::{DataType, Schema, UInt32Type, UnionMode}; use arrow::error::ArrowError; use datafusion_common::cast::as_boolean_array; use datafusion_common::{ - DataFusionError, HashMap, HashSet, Result, ScalarValue, assert_or_internal_err, - exec_err, internal_datafusion_err, internal_err, + DataFusionError, HashMap, Result, ScalarValue, assert_or_internal_err, exec_err, + internal_datafusion_err, internal_err, }; use datafusion_expr::ColumnarValue; +use indexmap::IndexSet; use std::borrow::Cow; use std::hash::Hash; use std::{any::Any, sync::Arc}; @@ -122,7 +123,7 @@ impl CaseBody { /// Derives a [ProjectedCaseBody] from this [CaseBody]. fn project(&self) -> Result { // Determine the set of columns that are used in all the expressions of the case body. - let mut used_column_indices = HashSet::::new(); + let mut used_column_indices = IndexSet::::new(); let mut collect_column_indices = |expr: &Arc| { expr.apply(|expr| { if let Some(column) = expr.as_any().downcast_ref::() { @@ -145,10 +146,7 @@ impl CaseBody { } // Construct a mapping from the original column index to the projected column index. - let mut sorted_used_column_indices = - used_column_indices.into_iter().collect::>(); - sorted_used_column_indices.sort_unstable(); - let column_index_map = sorted_used_column_indices + let column_index_map = used_column_indices .into_iter() .enumerate() .map(|(projected, original)| (original, projected)) From 6472572ee9344803f824e18e2592e9b20c095b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:40:46 +0100 Subject: [PATCH 15/16] Simplify --- datafusion/physical-expr/src/expressions/case.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 5616b7f9e8a8..758317d3d279 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -29,11 +29,11 @@ use arrow::datatypes::{DataType, Schema, UInt32Type, UnionMode}; use arrow::error::ArrowError; use datafusion_common::cast::as_boolean_array; use datafusion_common::{ - DataFusionError, HashMap, Result, ScalarValue, assert_or_internal_err, exec_err, + DataFusionError, Result, ScalarValue, assert_or_internal_err, exec_err, internal_datafusion_err, internal_err, }; use datafusion_expr::ColumnarValue; -use indexmap::IndexSet; +use indexmap::{IndexMap, IndexSet}; use std::borrow::Cow; use std::hash::Hash; use std::{any::Any, sync::Arc}; @@ -147,10 +147,10 @@ impl CaseBody { // Construct a mapping from the original column index to the projected column index. let column_index_map = used_column_indices - .into_iter() + .iter() .enumerate() - .map(|(projected, original)| (original, projected)) - .collect::>(); + .map(|(projected, original)| (*original, projected)) + .collect::>(); // Construct the projected body by rewriting each expression from the original body // using the column index mapping. From 8e80b3dccf5a70799074c023148eaade3797f90c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Mon, 29 Dec 2025 22:44:53 +0100 Subject: [PATCH 16/16] WIP --- datafusion/functions-aggregate/src/array_agg.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 6c61a86f6e84..9b2e7429ab3b 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -1111,7 +1111,7 @@ mod tests { ])])?; // without compaction, the size is 17112 - assert_eq!(acc.size(), 2192); + assert_eq!(acc.size(), 2224); Ok(()) }