diff --git a/Cargo.lock b/Cargo.lock index 077efb0fae349..1675f26e8a0f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2013,7 +2013,7 @@ dependencies = [ "chrono", "criterion", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "hex", "indexmap 2.12.1", "insta", @@ -2495,7 +2495,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "insta", "itertools 0.14.0", @@ -2530,7 +2530,8 @@ dependencies = [ "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", + "hashbrown 0.16.1", + "indexmap 2.12.1", "itertools 0.14.0", "parking_lot", ] @@ -2578,7 +2579,7 @@ dependencies = [ "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap 2.12.1", "insta", "itertools 0.14.0", @@ -3120,6 +3121,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -3371,10 +3378,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash 0.8.12", - "allocator-api2", -] [[package]] name = "hashbrown" @@ -3384,7 +3387,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -3392,6 +3395,11 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heck" diff --git a/Cargo.toml b/Cargo.toml index 10fc88b7057c8..d801bb6114b20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -156,7 +156,7 @@ flate2 = "1.1.5" futures = "0.3" glob = "0.3.0" half = { version = "2.7.0", default-features = false } -hashbrown = { version = "0.14.5", features = ["raw"] } +hashbrown = { version = "0.16.1" } hex = { version = "0.4.3" } indexmap = "2.12.1" insta = { version = "1.45.0", features = ["glob", "filters"] } diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs index 3bec9bd35cbd0..2dbc08688652f 100644 --- a/datafusion/common/src/lib.rs +++ b/datafusion/common/src/lib.rs @@ -85,7 +85,7 @@ pub use functional_dependencies::{ aggregate_functional_dependencies, get_required_group_by_exprs_indices, get_target_functional_dependencies, }; -use hashbrown::hash_map::DefaultHashBuilder; +use hashbrown::DefaultHashBuilder; pub use join_type::{JoinConstraint, JoinSide, JoinType}; pub use nested_struct::cast_column; pub use null_equality::NullEquality; diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs index 8491062124954..9b2e7429ab3b0 100644 --- a/datafusion/functions-aggregate/src/array_agg.rs +++ b/datafusion/functions-aggregate/src/array_agg.rs @@ -1111,7 +1111,7 @@ mod tests { ])])?; // without compaction, the size is 17112 - assert_eq!(acc.size(), 2184); + assert_eq!(acc.size(), 2224); Ok(()) } diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml index d292da212e6c8..a81eafe196959 100644 --- a/datafusion/physical-expr-common/Cargo.toml +++ b/datafusion/physical-expr-common/Cargo.toml @@ -47,5 +47,6 @@ chrono = { workspace = true } datafusion-common = { workspace = true } datafusion-expr-common = { workspace = true } hashbrown = { workspace = true } +indexmap = { workspace = true } itertools = { workspace = true } parking_lot = { workspace = true } diff --git a/datafusion/physical-expr-common/src/sort_expr.rs b/datafusion/physical-expr-common/src/sort_expr.rs index db30dd6ed26e2..fa961981c0488 100644 --- a/datafusion/physical-expr-common/src/sort_expr.rs +++ b/datafusion/physical-expr-common/src/sort_expr.rs @@ -31,7 +31,7 @@ use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; use datafusion_common::{HashSet, Result}; use datafusion_expr_common::columnar_value::ColumnarValue; - +use indexmap::IndexSet; /// Represents Sort operation for a column in a RecordBatch /// /// Example: @@ -353,14 +353,14 @@ impl From for PhysicalSortExpr { /// 1. It is non-degenerate, meaning it contains at least one element. /// 2. It is duplicate-free, meaning it does not contain multiple entries for /// the same column. -#[derive(Debug, Clone)] +#[derive(Clone, Debug)] pub struct LexOrdering { /// Vector of sort expressions representing the lexicographical ordering. exprs: Vec, /// Set of expressions in the lexicographical ordering, used to ensure /// that the ordering is duplicate-free. Note that the elements in this /// set are the same underlying physical expressions as in `exprs`. - set: HashSet>, + set: IndexSet>, } impl LexOrdering { @@ -371,7 +371,7 @@ impl LexOrdering { let mut candidate = Self { // not valid yet; valid publicly-returned instance must be non-empty exprs: Vec::new(), - set: HashSet::new(), + set: IndexSet::new(), }; for expr in exprs { candidate.push(expr); @@ -421,7 +421,7 @@ impl LexOrdering { return false; } for PhysicalSortExpr { expr, .. } in self.exprs[len..].iter() { - self.set.remove(expr); + self.set.swap_remove(expr); } self.exprs.truncate(len); true diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs index 91d339910b589..78478fc13ed4f 100644 --- a/datafusion/physical-expr/src/equivalence/class.rs +++ b/datafusion/physical-expr/src/equivalence/class.rs @@ -27,7 +27,7 @@ use crate::projection::ProjectionTargets; use crate::{PhysicalExpr, PhysicalExprRef, PhysicalSortExpr, PhysicalSortRequirement}; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; -use datafusion_common::{HashMap, JoinType, Result, ScalarValue}; +use datafusion_common::{JoinType, Result, ScalarValue}; use datafusion_physical_expr_common::physical_expr::format_physical_expr_list; use indexmap::{IndexMap, IndexSet}; @@ -303,7 +303,7 @@ type AugmentedMapping<'a> = IndexMap< #[derive(Clone, Debug, Default)] pub struct EquivalenceGroup { /// A mapping from expressions to their equivalence class key. - map: HashMap, usize>, + map: IndexMap, usize>, /// The equivalence classes in this group. classes: Vec, } @@ -436,7 +436,7 @@ impl EquivalenceGroup { let cls = self.classes.swap_remove(idx); // Remove its entries from the lookup table: for expr in cls.iter() { - self.map.remove(expr); + self.map.swap_remove(expr); } // Update the lookup table for the moved class: if idx < self.classes.len() { @@ -448,7 +448,7 @@ impl EquivalenceGroup { /// Updates the entry in lookup table for the given equivalence class with /// the given index. fn update_lookup_table( - map: &mut HashMap, usize>, + map: &mut IndexMap, usize>, cls: &EquivalenceClass, idx: usize, ) { diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs index 582d6a141a5ca..758317d3d2798 100644 --- a/datafusion/physical-expr/src/expressions/case.rs +++ b/datafusion/physical-expr/src/expressions/case.rs @@ -29,10 +29,11 @@ use arrow::datatypes::{DataType, Schema, UInt32Type, UnionMode}; use arrow::error::ArrowError; use datafusion_common::cast::as_boolean_array; use datafusion_common::{ - DataFusionError, HashMap, HashSet, Result, ScalarValue, assert_or_internal_err, - exec_err, internal_datafusion_err, internal_err, + DataFusionError, Result, ScalarValue, assert_or_internal_err, exec_err, + internal_datafusion_err, internal_err, }; use datafusion_expr::ColumnarValue; +use indexmap::{IndexMap, IndexSet}; use std::borrow::Cow; use std::hash::Hash; use std::{any::Any, sync::Arc}; @@ -122,7 +123,7 @@ impl CaseBody { /// Derives a [ProjectedCaseBody] from this [CaseBody]. fn project(&self) -> Result { // Determine the set of columns that are used in all the expressions of the case body. - let mut used_column_indices = HashSet::::new(); + let mut used_column_indices = IndexSet::::new(); let mut collect_column_indices = |expr: &Arc| { expr.apply(|expr| { if let Some(column) = expr.as_any().downcast_ref::() { @@ -149,7 +150,7 @@ impl CaseBody { .iter() .enumerate() .map(|(projected, original)| (*original, projected)) - .collect::>(); + .collect::>(); // Construct the projected body by rewriting each expression from the original body // using the column index mapping. diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs index cd476ee3b31a3..2cdc326f5dd36 100644 --- a/datafusion/physical-expr/src/utils/mod.rs +++ b/datafusion/physical-expr/src/utils/mod.rs @@ -229,7 +229,7 @@ pub fn collect_columns(expr: &Arc) -> HashSet { let mut columns = HashSet::::new(); expr.apply(|expr| { if let Some(column) = expr.as_any().downcast_ref::() { - columns.get_or_insert_owned(column); + columns.get_or_insert_with(column, |c| c.clone()); } Ok(TreeNodeRecursion::Continue) })