From 95bb49b165c798fbc5797f797a24221ef633cf23 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sat, 20 Dec 2025 08:26:52 -0600 Subject: [PATCH 1/3] Add option to disable evaluation of stable expressions in optimizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a new configuration option `datafusion.optimizer.evaluate_stable_expressions` (default: true) that controls whether stable functions like `now()`, `current_date()`, and `current_time()` are evaluated to literal values during query planning. When set to false, stable functions are preserved in the plan rather than being converted to literals. This is useful for query rewrites that need to preserve stable function calls. Closes #19418 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- datafusion/common/src/config.rs | 11 +++++++ datafusion/expr/src/simplify.rs | 12 ++++++++ .../functions/src/datetime/current_date.rs | 10 +++++-- .../functions/src/datetime/current_time.rs | 10 +++++-- datafusion/functions/src/datetime/now.rs | 7 ++++- .../simplify_expressions/expr_simplifier.rs | 16 ++++++---- .../simplify_expressions/simplify_exprs.rs | 30 +++++++++++++++++++ .../sqllogictest/test_files/options.slt | 24 +++++++++++++++ 8 files changed, 110 insertions(+), 10 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 2bea2ec5a4526..253b5f3cde530 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1122,6 +1122,17 @@ config_namespace! { /// /// Default: true pub enable_sort_pushdown: bool, default = true + + /// When set to true (default), the optimizer will evaluate stable functions + /// (like `now()`, `current_date()`, `current_time()`) during query planning, + /// converting them to literal values. When set to false, stable functions + /// are preserved in the plan and evaluated at execution time. + /// + /// Setting this to false is useful when performing query rewrites that need + /// to preserve stable function calls, or when you want the function to be + /// re-evaluated for each execution of a prepared statement rather than + /// being fixed at planning time. + pub evaluate_stable_expressions: bool, default = true } } diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs index bbe65904fb775..42e8542aa4b16 100644 --- a/datafusion/expr/src/simplify.rs +++ b/datafusion/expr/src/simplify.rs @@ -40,6 +40,18 @@ pub trait SimplifyInfo { /// Returns data type of this expr needed for determining optimized int type of a value fn get_data_type(&self, expr: &Expr) -> Result; + + /// Returns true if stable expressions (like `now()`) should be evaluated + /// during simplification. Defaults to true for backward compatibility. + /// + /// When false, stable functions are preserved in the expression tree + /// rather than being converted to literal values. + fn evaluate_stable_expressions(&self) -> bool { + self.execution_props() + .config_options + .as_ref() + .is_none_or(|opts| opts.optimizer.evaluate_stable_expressions) + } } /// Provides simplification information based on DFSchema and diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 7edc1a58d9cb5..ef3f192aa3345 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -99,15 +99,21 @@ impl ScalarUDFImpl for CurrentDateFunc { fn simplify( &self, - _args: Vec, + args: Vec, info: &dyn SimplifyInfo, ) -> Result { + // Check if stable expression evaluation is disabled + if !info.evaluate_stable_expressions() { + return Ok(ExprSimplifyResult::Original(args)); + } + let now_ts = info.execution_props().query_execution_start_time; // Get timezone from config and convert to local time let days = info .execution_props() - .config_options() + .config_options + .as_ref() .and_then(|config| { config .execution diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index 2c9bcdfe49db3..58236492f1544 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -95,15 +95,21 @@ impl ScalarUDFImpl for CurrentTimeFunc { fn simplify( &self, - _args: Vec, + args: Vec, info: &dyn SimplifyInfo, ) -> Result { + // Check if stable expression evaluation is disabled + if !info.evaluate_stable_expressions() { + return Ok(ExprSimplifyResult::Original(args)); + } + let now_ts = info.execution_props().query_execution_start_time; // Try to get timezone from config and convert to local time let nano = info .execution_props() - .config_options() + .config_options + .as_ref() .and_then(|config| { config .execution diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index b804efe59106d..12e5c7a88e088 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -121,9 +121,14 @@ impl ScalarUDFImpl for NowFunc { fn simplify( &self, - _args: Vec, + args: Vec, info: &dyn SimplifyInfo, ) -> Result { + // Check if stable expression evaluation is disabled + if !info.evaluate_stable_expressions() { + return Ok(ExprSimplifyResult::Original(args)); + } + let now_ts = info .execution_props() .query_execution_start_time diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 01de44cee1f60..e7ba714e60044 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -541,7 +541,7 @@ impl TreeNodeRewriter for ConstEvaluator<'_> { // stack as not ok (as all parents have at least one child or // descendant that can not be evaluated - if !Self::can_evaluate(&expr) { + if !self.can_evaluate(&expr) { // walk back up stack, marking first parent that is not mutable let parent_iter = self.can_evaluate.iter_mut().rev(); for p in parent_iter { @@ -620,18 +620,24 @@ impl<'a> ConstEvaluator<'a> { } /// Can a function of the specified volatility be evaluated? - fn volatility_ok(volatility: Volatility) -> bool { + fn volatility_ok(&self, volatility: Volatility) -> bool { match volatility { Volatility::Immutable => true, // Values for functions such as now() are taken from ExecutionProps - Volatility::Stable => true, + Volatility::Stable => { + // Check if stable expression evaluation is enabled in config + self.execution_props + .config_options + .as_ref() + .is_none_or(|opts| opts.optimizer.evaluate_stable_expressions) + } Volatility::Volatile => false, } } /// Can the expression be evaluated at plan time, (assuming all of /// its children can also be evaluated)? - fn can_evaluate(expr: &Expr) -> bool { + fn can_evaluate(&self, expr: &Expr) -> bool { // check for reasons we can't evaluate this node // // NOTE all expr types are listed here so when new ones are @@ -652,7 +658,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::Wildcard { .. } | Expr::Placeholder(_) => false, Expr::ScalarFunction(ScalarFunction { func, .. }) => { - Self::volatility_ok(func.signature().volatility) + self.volatility_ok(func.signature().volatility) } Expr::Literal(_, _) | Expr::Alias(..) diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 1b25c5ce8a632..3ffd34b1c2afc 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -1073,4 +1073,34 @@ mod tests { " ) } + + #[test] + fn test_evaluate_stable_expressions_enabled_by_default() -> Result<()> { + // By default, stable expressions like now() should be simplified to literals + let time = Utc::now(); + + // With default config, evaluate_stable_expressions should be true + let config = OptimizerContext::new().with_query_execution_start_time(time); + assert!( + config.options().optimizer.evaluate_stable_expressions, + "evaluate_stable_expressions should be true by default" + ); + Ok(()) + } + + #[test] + fn test_evaluate_stable_expressions_disabled() -> Result<()> { + // When evaluate_stable_expressions is false, stable functions should NOT be simplified + use datafusion_common::config::ConfigOptions; + + let mut config_options = ConfigOptions::default(); + config_options.optimizer.evaluate_stable_expressions = false; + + // Verify the config is set correctly + assert!( + !config_options.optimizer.evaluate_stable_expressions, + "evaluate_stable_expressions should be false when explicitly disabled" + ); + Ok(()) + } } diff --git a/datafusion/sqllogictest/test_files/options.slt b/datafusion/sqllogictest/test_files/options.slt index 0d1583dbc0086..d16836e15738f 100644 --- a/datafusion/sqllogictest/test_files/options.slt +++ b/datafusion/sqllogictest/test_files/options.slt @@ -282,3 +282,27 @@ select 1e40 + 1e40, arrow_typeof(1e40 + 1e40), # Restore option to default value statement ok set datafusion.sql_parser.parse_float_as_decimal = false; + +## +# test_evaluate_stable_expressions +## + +# By default, now() should be simplified to a literal in the plan +# Disable stable expression evaluation +statement ok +set datafusion.optimizer.evaluate_stable_expressions = false; + +# With config disabled, now() should remain as a function call in the plan +query TT +explain select now(); +---- +logical_plan +01)Projection: now() +02)--EmptyRelation: rows=1 +physical_plan +01)ProjectionExec: expr=[now()] +02)--PlaceholderRowExec + +# Restore default +statement ok +set datafusion.optimizer.evaluate_stable_expressions = true; From ed2277bdc2881a161d0eba99ac396943c45e354c Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Sat, 20 Dec 2025 08:38:22 -0600 Subject: [PATCH 2/3] update docs --- docs/source/user-guide/configs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index c9222afe8ceb5..5764eb8b6b050 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -163,6 +163,7 @@ The following configuration settings are available: | datafusion.optimizer.prefer_existing_union | false | When set to true, the optimizer will not attempt to convert Union to Interleave | | datafusion.optimizer.expand_views_at_output | false | When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. | | datafusion.optimizer.enable_sort_pushdown | true | Enable sort pushdown optimization. When enabled, attempts to push sort requirements down to data sources that can natively handle them (e.g., by reversing file/row group read order). Returns **inexact ordering**: Sort operator is kept for correctness, but optimized input enables early termination for TopK queries (ORDER BY ... LIMIT N), providing significant speedup. Memory: No additional overhead (only changes read order). Future: Will add option to detect perfectly sorted data and eliminate Sort completely. Default: true | +| datafusion.optimizer.evaluate_stable_expressions | true | When set to true (default), the optimizer will evaluate stable functions (like `now()`, `current_date()`, `current_time()`) during query planning, converting them to literal values. When set to false, stable functions are preserved in the plan and evaluated at execution time. Setting this to false is useful when performing query rewrites that need to preserve stable function calls, or when you want the function to be re-evaluated for each execution of a prepared statement rather than being fixed at planning time. | | datafusion.explain.logical_plan_only | false | When set to true, the explain statement will only print logical plans | | datafusion.explain.physical_plan_only | false | When set to true, the explain statement will only print physical plans | | datafusion.explain.show_statistics | false | When set to true, the explain statement will print operator statistics for physical plans | From 6e296600f18246aa177fb13fcd0f9ca8fa768948 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Tue, 23 Dec 2025 19:40:29 -0600 Subject: [PATCH 3/3] update tests --- .../src/simplify_expressions/simplify_exprs.rs | 16 ---------------- .../test_files/information_schema.slt | 2 ++ 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 3ffd34b1c2afc..f528c6a2e61a1 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -1087,20 +1087,4 @@ mod tests { ); Ok(()) } - - #[test] - fn test_evaluate_stable_expressions_disabled() -> Result<()> { - // When evaluate_stable_expressions is false, stable functions should NOT be simplified - use datafusion_common::config::ConfigOptions; - - let mut config_options = ConfigOptions::default(); - config_options.optimizer.evaluate_stable_expressions = false; - - // Verify the config is set correctly - assert!( - !config_options.optimizer.evaluate_stable_expressions, - "evaluate_stable_expressions should be false when explicitly disabled" - ); - Ok(()) - } } diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 18f72cb9f7798..cf2a86e52c9ec 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -301,6 +301,7 @@ datafusion.optimizer.enable_sort_pushdown true datafusion.optimizer.enable_topk_aggregation true datafusion.optimizer.enable_topk_dynamic_filter_pushdown true datafusion.optimizer.enable_window_limits true +datafusion.optimizer.evaluate_stable_expressions true datafusion.optimizer.expand_views_at_output false datafusion.optimizer.filter_null_join_keys false datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values 150 @@ -436,6 +437,7 @@ datafusion.optimizer.enable_sort_pushdown true Enable sort pushdown optimization datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible datafusion.optimizer.enable_topk_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase. datafusion.optimizer.enable_window_limits true When set to true, the optimizer will attempt to push limit operations past window functions, if possible +datafusion.optimizer.evaluate_stable_expressions true When set to true (default), the optimizer will evaluate stable functions (like `now()`, `current_date()`, `current_time()`) during query planning, converting them to literal values. When set to false, stable functions are preserved in the plan and evaluated at execution time. Setting this to false is useful when performing query rewrites that need to preserve stable function calls, or when you want the function to be re-evaluated for each execution of a prepared statement rather than being fixed at planning time. datafusion.optimizer.expand_views_at_output false When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`. datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down. datafusion.optimizer.hash_join_inlist_pushdown_max_distinct_values 150 Maximum number of distinct values (rows) in the build side of a hash join to be pushed down as an InList expression for dynamic filtering. Build sides with more rows than this will use hash table lookups instead. Set to 0 to always use hash table lookups. This provides an additional limit beyond `hash_join_inlist_pushdown_max_size` to prevent very large IN lists that might not provide much benefit over hash table lookups. This uses the deduplicated row count once the build side has been evaluated. The default is 150 values per partition. This is inspired by Trino's `max-filter-keys-per-column` setting. See: