diff --git a/datafusion-examples/examples/builtin_functions/function_factory.rs b/datafusion-examples/examples/builtin_functions/function_factory.rs index 7eff0d0b5c484..106c53cdf7f12 100644 --- a/datafusion-examples/examples/builtin_functions/function_factory.rs +++ b/datafusion-examples/examples/builtin_functions/function_factory.rs @@ -24,7 +24,7 @@ use datafusion::error::Result; use datafusion::execution::context::{ FunctionFactory, RegisterFunction, SessionContext, SessionState, }; -use datafusion::logical_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion::logical_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion::logical_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion::logical_expr::{ ColumnarValue, CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, @@ -145,7 +145,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { let replacement = Self::replacement(&self.expr, &args)?; diff --git a/datafusion-examples/examples/query_planning/expr_api.rs b/datafusion-examples/examples/query_planning/expr_api.rs index 47de669023f7c..386273c72817b 100644 --- a/datafusion-examples/examples/query_planning/expr_api.rs +++ b/datafusion-examples/examples/query_planning/expr_api.rs @@ -175,8 +175,9 @@ fn simplify_demo() -> Result<()> { // the ExecutionProps carries information needed to simplify // expressions, such as the current time (to evaluate `now()` // correctly) - let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default() + .with_schema(schema) + .with_current_time(); let simplifier = ExprSimplifier::new(context); // And then call the simplify_expr function: @@ -191,7 +192,9 @@ fn simplify_demo() -> Result<()> { // here are some other examples of what DataFusion is capable of let schema = Schema::new(vec![make_field("i", DataType::Int64)]).to_dfschema_ref()?; - let context = SimplifyContext::new(&props).with_schema(schema.clone()); + let context = SimplifyContext::default() + .with_schema(Arc::clone(&schema)) + .with_current_time(); let simplifier = ExprSimplifier::new(context); // basic arithmetic simplification @@ -551,7 +554,9 @@ fn type_coercion_demo() -> Result<()> { assert!(physical_expr.evaluate(&batch).is_ok()); // 2. Type coercion with `ExprSimplifier::coerce`. - let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone())); + let context = SimplifyContext::default() + .with_schema(Arc::new(df_schema.clone())) + .with_current_time(); let simplifier = ExprSimplifier::new(context); let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?; let physical_expr = datafusion::physical_expr::create_physical_expr( diff --git a/datafusion-examples/examples/udf/advanced_udaf.rs b/datafusion-examples/examples/udf/advanced_udaf.rs index fbb9e652486ce..16d9a9f7cbdf7 100644 --- a/datafusion-examples/examples/udf/advanced_udaf.rs +++ b/datafusion-examples/examples/udf/advanced_udaf.rs @@ -34,7 +34,7 @@ use datafusion::logical_expr::{ Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature, expr::AggregateFunction, function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs}, - simplify::SimplifyInfo, + simplify::SimplifyContext, }; use datafusion::prelude::*; @@ -421,7 +421,7 @@ impl AggregateUDFImpl for SimplifiedGeoMeanUdaf { /// Optionally replaces a UDAF with another expression during query optimization. fn simplify(&self) -> Option { - let simplify = |aggregate_function: AggregateFunction, _: &dyn SimplifyInfo| { + let simplify = |aggregate_function: AggregateFunction, _: &SimplifyContext| { // Replaces the UDAF with `GeoMeanUdaf` as a placeholder example to demonstrate the `simplify` method. // In real-world scenarios, you might create UDFs from built-in expressions. Ok(Expr::AggregateFunction(AggregateFunction::new_udf( diff --git a/datafusion-examples/examples/udf/advanced_udwf.rs b/datafusion-examples/examples/udf/advanced_udwf.rs index e8d3a75b29dec..ffe06eac8f6a6 100644 --- a/datafusion-examples/examples/udf/advanced_udwf.rs +++ b/datafusion-examples/examples/udf/advanced_udwf.rs @@ -32,7 +32,7 @@ use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams}; use datafusion::logical_expr::function::{ PartitionEvaluatorArgs, WindowFunctionSimplification, WindowUDFFieldArgs, }; -use datafusion::logical_expr::simplify::SimplifyInfo; +use datafusion::logical_expr::simplify::SimplifyContext; use datafusion::logical_expr::{ Expr, LimitEffect, PartitionEvaluator, Signature, WindowFrame, WindowFunctionDefinition, WindowUDF, WindowUDFImpl, @@ -198,7 +198,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { /// this function will simplify `SimplifySmoothItUdf` to `AggregateUDF` for `Avg` /// default implementation will not be called (left as `todo!()`) fn simplify(&self) -> Option { - let simplify = |window_function: WindowFunction, _: &dyn SimplifyInfo| { + let simplify = |window_function: WindowFunction, _: &SimplifyContext| { Ok(Expr::from(WindowFunction { fun: WindowFunctionDefinition::AggregateUDF(avg_udaf()), params: WindowFunctionParams { diff --git a/datafusion-examples/examples/udf/simple_udtf.rs b/datafusion-examples/examples/udf/simple_udtf.rs index 087b8ba73af5c..18eb8887a34a4 100644 --- a/datafusion-examples/examples/udf/simple_udtf.rs +++ b/datafusion-examples/examples/udf/simple_udtf.rs @@ -28,7 +28,6 @@ use datafusion::common::{ScalarValue, plan_err}; use datafusion::datasource::TableProvider; use datafusion::datasource::memory::MemorySourceConfig; use datafusion::error::Result; -use datafusion::execution::context::ExecutionProps; use datafusion::logical_expr::simplify::SimplifyContext; use datafusion::logical_expr::{Expr, TableType}; use datafusion::optimizer::simplify_expressions::ExprSimplifier; @@ -142,8 +141,7 @@ impl TableFunctionImpl for LocalCsvTableFunc { .get(1) .map(|expr| { // try to simplify the expression, so 1+2 becomes 3, for example - let execution_props = ExecutionProps::new(); - let info = SimplifyContext::new(&execution_props); + let info = SimplifyContext::default(); let expr = ExprSimplifier::new(info).simplify(expr.clone())?; if let Expr::Literal(ScalarValue::Int64(Some(limit)), _) = expr { diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 55a031d870122..f67e7e4517d2b 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -1134,6 +1134,12 @@ impl TryFrom for DFSchema { } } +impl From for SchemaRef { + fn from(dfschema: DFSchema) -> Self { + Arc::clone(&dfschema.inner) + } +} + // Hashing refers to a subset of fields considered in PartialEq. impl Hash for DFSchema { fn hash(&self, state: &mut H) { diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index a769bb01b4354..5ca5c051ff220 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -93,9 +93,9 @@ use datafusion_expr::{ logical_plan::{DdlStatement, Statement}, planner::ExprPlanner, }; -use datafusion_optimizer::Analyzer; use datafusion_optimizer::analyzer::type_coercion::TypeCoercion; use datafusion_optimizer::simplify_expressions::ExprSimplifier; +use datafusion_optimizer::{Analyzer, OptimizerContext}; use datafusion_optimizer::{AnalyzerRule, OptimizerRule}; use datafusion_session::SessionStore; @@ -749,12 +749,19 @@ impl SessionContext { ); } } - // Store the unoptimized plan into the session state. Although storing the - // optimized plan or the physical plan would be more efficient, doing so is - // not currently feasible. This is because `now()` would be optimized to a - // constant value, causing each EXECUTE to yield the same result, which is - // incorrect behavior. - self.state.write().store_prepared(name, fields, input)?; + // Optimize the plan without evaluating expressions like now() + let optimizer_context = OptimizerContext::new_with_config_options( + Arc::clone(self.state().config().options()), + ) + .without_query_execution_start_time(); + let plan = self.state().optimizer().optimize( + Arc::unwrap_or_clone(input), + &optimizer_context, + |_1, _2| {}, + )?; + self.state + .write() + .store_prepared(name, fields, Arc::new(plan))?; self.return_empty_dataframe() } LogicalPlan::Statement(Statement::Execute(execute)) => { @@ -1394,7 +1401,12 @@ impl SessionContext { })?; let state = self.state.read(); - let context = SimplifyContext::new(state.execution_props()); + let context = SimplifyContext::default() + .with_schema(Arc::clone(prepared.plan.schema())) + .with_config_options(Arc::clone(state.config_options())) + .with_query_execution_start_time( + state.execution_props().query_execution_start_time, + ); let simplifier = ExprSimplifier::new(context); // Only allow literals as parameters for now. diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 6a9ebcdf51250..7cdbc77ae90c3 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -57,10 +57,8 @@ use datafusion_expr::planner::ExprPlanner; #[cfg(feature = "sql")] use datafusion_expr::planner::{RelationPlanner, TypePlanner}; use datafusion_expr::registry::{FunctionRegistry, SerializerRegistry}; -use datafusion_expr::simplify::SimplifyInfo; -use datafusion_expr::{ - AggregateUDF, Explain, Expr, ExprSchemable, LogicalPlan, ScalarUDF, WindowUDF, -}; +use datafusion_expr::simplify::SimplifyContext; +use datafusion_expr::{AggregateUDF, Explain, Expr, LogicalPlan, ScalarUDF, WindowUDF}; use datafusion_optimizer::simplify_expressions::ExprSimplifier; use datafusion_optimizer::{ Analyzer, AnalyzerRule, Optimizer, OptimizerConfig, OptimizerRule, @@ -744,13 +742,18 @@ impl SessionState { expr: Expr, df_schema: &DFSchema, ) -> datafusion_common::Result> { - let simplifier = - ExprSimplifier::new(SessionSimplifyProvider::new(self, df_schema)); + let config_options = self.config_options(); + let simplify_context = SimplifyContext::default() + .with_schema(Arc::new(df_schema.clone())) + .with_config_options(Arc::clone(config_options)) + .with_query_execution_start_time( + self.execution_props().query_execution_start_time, + ); + let simplifier = ExprSimplifier::new(simplify_context); // apply type coercion here to ensure types match let mut expr = simplifier.coerce(expr, df_schema)?; // rewrite Exprs to functions if necessary - let config_options = self.config_options(); for rewrite in self.analyzer.function_rewrites() { expr = expr .transform_up(|expr| rewrite.rewrite(expr, df_schema, config_options))? @@ -1834,9 +1837,12 @@ impl ContextProvider for SessionContextProvider<'_> { .get(name) .cloned() .ok_or_else(|| plan_datafusion_err!("table function '{name}' not found"))?; - let dummy_schema = DFSchema::empty(); - let simplifier = - ExprSimplifier::new(SessionSimplifyProvider::new(self.state, &dummy_schema)); + let simplify_context = SimplifyContext::default() + .with_config_options(Arc::clone(self.state.config_options())) + .with_query_execution_start_time( + self.state.execution_props().query_execution_start_time, + ); + let simplifier = ExprSimplifier::new(simplify_context); let args = args .into_iter() .map(|arg| simplifier.simplify(arg)) @@ -2063,7 +2069,7 @@ impl datafusion_execution::TaskContextProvider for SessionState { } impl OptimizerConfig for SessionState { - fn query_execution_start_time(&self) -> DateTime { + fn query_execution_start_time(&self) -> Option> { self.execution_props.query_execution_start_time } @@ -2115,35 +2121,6 @@ impl QueryPlanner for DefaultQueryPlanner { } } -struct SessionSimplifyProvider<'a> { - state: &'a SessionState, - df_schema: &'a DFSchema, -} - -impl<'a> SessionSimplifyProvider<'a> { - fn new(state: &'a SessionState, df_schema: &'a DFSchema) -> Self { - Self { state, df_schema } - } -} - -impl SimplifyInfo for SessionSimplifyProvider<'_> { - fn is_boolean_type(&self, expr: &Expr) -> datafusion_common::Result { - Ok(expr.get_type(self.df_schema)? == DataType::Boolean) - } - - fn nullable(&self, expr: &Expr) -> datafusion_common::Result { - expr.nullable(self.df_schema) - } - - fn execution_props(&self) -> &ExecutionProps { - self.state.execution_props() - } - - fn get_data_type(&self, expr: &Expr) -> datafusion_common::Result { - expr.get_type(self.df_schema) - } -} - #[derive(Debug)] pub(crate) struct PreparedPlan { /// Data types of the parameters diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs index 44e884c23a681..0cded68ae63fd 100644 --- a/datafusion/core/src/test_util/parquet.rs +++ b/datafusion/core/src/test_util/parquet.rs @@ -174,8 +174,7 @@ impl TestParquetFile { let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?; // run coercion on the filters to coerce types etc. - let props = ExecutionProps::new(); - let context = SimplifyContext::new(&props).with_schema(Arc::clone(&df_schema)); + let context = SimplifyContext::default().with_schema(Arc::clone(&df_schema)); if let Some(filter) = maybe_filter { let simplifier = ExprSimplifier::new(context); let filter = simplifier.coerce(filter, &df_schema).unwrap(); diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs index 90c1b96749b3c..91dd5de7fcd64 100644 --- a/datafusion/core/tests/expr_api/mod.rs +++ b/datafusion/core/tests/expr_api/mod.rs @@ -24,7 +24,6 @@ use arrow::util::pretty::{pretty_format_batches, pretty_format_columns}; use datafusion::prelude::*; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::ExprFunctionExt; -use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::expr::NullTreatment; use datafusion_expr::simplify::SimplifyContext; use datafusion_functions::core::expr_ext::FieldAccessor; @@ -422,9 +421,7 @@ fn create_simplified_expr_test(expr: Expr, expected_expr: &str) { let df_schema = DFSchema::try_from(batch.schema()).unwrap(); // Simplify the expression first - let props = ExecutionProps::new(); - let simplify_context = - SimplifyContext::new(&props).with_schema(df_schema.clone().into()); + let simplify_context = SimplifyContext::default().with_schema(Arc::new(df_schema)); let simplifier = ExprSimplifier::new(simplify_context).with_max_cycles(10); let simplified = simplifier.simplify(expr).unwrap(); create_expr_test(simplified, expected_expr); diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index a42dfc951da0d..02f2503faf22a 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -23,16 +23,16 @@ use arrow::array::types::IntervalDayTime; use arrow::array::{ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; use chrono::{DateTime, TimeZone, Utc}; -use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*}; +use datafusion::{error::Result, prelude::*}; use datafusion_common::ScalarValue; use datafusion_common::cast::as_int32_array; use datafusion_common::{DFSchemaRef, ToDFSchema}; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::logical_plan::builder::table_scan_with_filters; -use datafusion_expr::simplify::SimplifyInfo; +use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::{ - Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, ScalarUDF, - Volatility, table_scan, + Cast, ColumnarValue, ExprSchemable, LogicalPlan, LogicalPlanBuilder, Projection, + ScalarUDF, Volatility, table_scan, }; use datafusion_functions::math; use datafusion_optimizer::optimizer::Optimizer; @@ -40,50 +40,6 @@ use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyExpress use datafusion_optimizer::{OptimizerContext, OptimizerRule}; use std::sync::Arc; -/// In order to simplify expressions, DataFusion must have information -/// about the expressions. -/// -/// You can provide that information using DataFusion [DFSchema] -/// objects or from some other implementation -struct MyInfo { - /// The input schema - schema: DFSchemaRef, - - /// Execution specific details needed for constant evaluation such - /// as the current time for `now()` and [VariableProviders] - execution_props: ExecutionProps, -} - -impl SimplifyInfo for MyInfo { - fn is_boolean_type(&self, expr: &Expr) -> Result { - Ok(matches!( - expr.get_type(self.schema.as_ref())?, - DataType::Boolean - )) - } - - fn nullable(&self, expr: &Expr) -> Result { - expr.nullable(self.schema.as_ref()) - } - - fn execution_props(&self) -> &ExecutionProps { - &self.execution_props - } - - fn get_data_type(&self, expr: &Expr) -> Result { - expr.get_type(self.schema.as_ref()) - } -} - -impl From for MyInfo { - fn from(schema: DFSchemaRef) -> Self { - Self { - schema, - execution_props: ExecutionProps::new(), - } - } -} - /// A schema like: /// /// a: Int32 (possibly with nulls) @@ -132,14 +88,10 @@ fn test_evaluate_with_start_time( expected_expr: Expr, date_time: &DateTime, ) { - let execution_props = - ExecutionProps::new().with_query_execution_start_time(*date_time); - - let info: MyInfo = MyInfo { - schema: schema(), - execution_props, - }; - let simplifier = ExprSimplifier::new(info); + let context = SimplifyContext::default() + .with_schema(schema()) + .with_query_execution_start_time(Some(*date_time)); + let simplifier = ExprSimplifier::new(context); let simplified_expr = simplifier .simplify(input_expr.clone()) .expect("successfully evaluated"); @@ -201,7 +153,9 @@ fn to_timestamp_expr(arg: impl Into) -> Expr { #[test] fn basic() { - let info: MyInfo = schema().into(); + let context = SimplifyContext::default() + .with_schema(schema()) + .with_query_execution_start_time(Some(Utc::now())); // The `Expr` is a core concept in DataFusion, and DataFusion can // help simplify it. @@ -210,21 +164,21 @@ fn basic() { // optimize form `a < 5` automatically let expr = col("a").lt(lit(2i32) + lit(3i32)); - let simplifier = ExprSimplifier::new(info); + let simplifier = ExprSimplifier::new(context); let simplified = simplifier.simplify(expr).unwrap(); assert_eq!(simplified, col("a").lt(lit(5i32))); } #[test] fn fold_and_simplify() { - let info: MyInfo = schema().into(); + let context = SimplifyContext::default().with_schema(schema()); // What will it do with the expression `concat('foo', 'bar') == 'foobar')`? let expr = concat(vec![lit("foo"), lit("bar")]).eq(lit("foobar")); // Since datafusion applies both simplification *and* rewriting // some expressions can be entirely simplified - let simplifier = ExprSimplifier::new(info); + let simplifier = ExprSimplifier::new(context); let simplified = simplifier.simplify(expr).unwrap(); assert_eq!(simplified, lit(true)) } @@ -523,6 +477,72 @@ fn multiple_now() -> Result<()> { Ok(()) } +/// Unwraps an alias expression to get the inner expression +fn unrwap_aliases(expr: &Expr) -> &Expr { + match expr { + Expr::Alias(alias) => unrwap_aliases(&alias.expr), + expr => expr, + } +} + +/// Test that `now()` is simplified to a literal when execution start time is set, +/// but remains as an expression when no execution start time is available. +#[test] +fn now_simplification_with_and_without_start_time() { + let plan = LogicalPlanBuilder::empty(false) + .project(vec![now()]) + .unwrap() + .build() + .unwrap(); + + // Case 1: With execution start time set, now() should be simplified to a literal + { + let time = DateTime::::from_timestamp_nanos(123); + let ctx: OptimizerContext = + OptimizerContext::new().with_query_execution_start_time(time); + let optimizer = SimplifyExpressions {}; + let simplified = optimizer + .rewrite(plan.clone(), &ctx) + .expect("rewrite should succeed") + .data; + let LogicalPlan::Projection(Projection { expr, .. }) = simplified else { + panic!("Expected Projection plan"); + }; + assert_eq!(expr.len(), 1); + let simplified = unrwap_aliases(expr.first().unwrap()); + // Should be a literal timestamp + match simplified { + Expr::Literal(ScalarValue::TimestampNanosecond(Some(ts), _), _) => { + assert_eq!(*ts, time.timestamp_nanos_opt().unwrap()); + } + other => panic!("Expected timestamp literal, got: {other:?}"), + } + } + + // Case 2: Without execution start time, now() should remain as a function call + { + let ctx: OptimizerContext = + OptimizerContext::new().without_query_execution_start_time(); + let optimizer = SimplifyExpressions {}; + let simplified = optimizer + .rewrite(plan, &ctx) + .expect("rewrite should succeed") + .data; + let LogicalPlan::Projection(Projection { expr, .. }) = simplified else { + panic!("Expected Projection plan"); + }; + assert_eq!(expr.len(), 1); + let simplified = unrwap_aliases(expr.first().unwrap()); + // Should still be a now() function call + match simplified { + Expr::ScalarFunction(ScalarFunction { func, .. }) => { + assert_eq!(func.name(), "now"); + } + other => panic!("Expected now() function call, got: {other:?}"), + } + } +} + // ------------------------------ // --- Simplifier tests ----- // ------------------------------ @@ -545,11 +565,8 @@ fn expr_test_schema() -> DFSchemaRef { } fn test_simplify(input_expr: Expr, expected_expr: Expr) { - let info: MyInfo = MyInfo { - schema: expr_test_schema(), - execution_props: ExecutionProps::new(), - }; - let simplifier = ExprSimplifier::new(info); + let context = SimplifyContext::default().with_schema(expr_test_schema()); + let simplifier = ExprSimplifier::new(context); let simplified_expr = simplifier .simplify(input_expr.clone()) .expect("successfully evaluated"); @@ -564,11 +581,10 @@ fn test_simplify_with_cycle_count( expected_expr: Expr, expected_count: u32, ) { - let info: MyInfo = MyInfo { - schema: expr_test_schema(), - execution_props: ExecutionProps::new(), - }; - let simplifier = ExprSimplifier::new(info); + let context = SimplifyContext::default() + .with_schema(expr_test_schema()) + .with_query_execution_start_time(Some(Utc::now())); + let simplifier = ExprSimplifier::new(context); let (simplified_expr, count) = simplifier .simplify_with_cycle_count_transformed(input_expr.clone()) .expect("successfully evaluated"); diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index b86cd94a8a9b7..24cade1e80d5a 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -42,7 +42,7 @@ use datafusion_common::{ assert_batches_sorted_eq, assert_contains, exec_datafusion_err, exec_err, not_impl_err, plan_err, }; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody, LogicalPlanBuilder, OperateFunctionArg, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, @@ -699,7 +699,7 @@ impl ScalarUDFImpl for CastToI64UDF { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { // DataFusion should have ensured the function is called with just a // single argument @@ -975,7 +975,7 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { let replacement = Self::replacement(&self.expr, &args, &self.defaults)?; diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs index 20d8f82bf48a2..3bf6978eb60ee 100644 --- a/datafusion/expr/src/execution_props.rs +++ b/datafusion/expr/src/execution_props.rs @@ -16,7 +16,7 @@ // under the License. use crate::var_provider::{VarProvider, VarType}; -use chrono::{DateTime, TimeZone, Utc}; +use chrono::{DateTime, Utc}; use datafusion_common::HashMap; use datafusion_common::alias::AliasGenerator; use datafusion_common::config::ConfigOptions; @@ -33,7 +33,9 @@ use std::sync::Arc; /// done so during predicate pruning and expression simplification #[derive(Clone, Debug)] pub struct ExecutionProps { - pub query_execution_start_time: DateTime, + /// The time at which the query execution started. If `None`, + /// functions like `now()` will not be simplified during optimization. + pub query_execution_start_time: Option>, /// Alias generator used by subquery optimizer rules pub alias_generator: Arc, /// Snapshot of config options when the query started @@ -52,9 +54,7 @@ impl ExecutionProps { /// Creates a new execution props pub fn new() -> Self { ExecutionProps { - // Set this to a fixed sentinel to make it obvious if this is - // not being updated / propagated correctly - query_execution_start_time: Utc.timestamp_nanos(0), + query_execution_start_time: None, alias_generator: Arc::new(AliasGenerator::new()), config_options: None, var_providers: None, @@ -66,7 +66,7 @@ impl ExecutionProps { mut self, query_execution_start_time: DateTime, ) -> Self { - self.query_execution_start_time = query_execution_start_time; + self.query_execution_start_time = Some(query_execution_start_time); self } @@ -79,7 +79,7 @@ impl ExecutionProps { /// Marks the execution of query started timestamp. /// This also instantiates a new alias generator. pub fn mark_start_execution(&mut self, config_options: Arc) -> &Self { - self.query_execution_start_time = Utc::now(); + self.query_execution_start_time = Some(Utc::now()); self.alias_generator = Arc::new(AliasGenerator::new()); self.config_options = Some(config_options); &*self @@ -126,7 +126,7 @@ mod test { fn debug() { let props = ExecutionProps::new(); assert_eq!( - "ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", + "ExecutionProps { query_execution_start_time: None, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", format!("{props:?}") ); } diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index e0235d32292fa..68d2c9073241b 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -67,25 +67,25 @@ pub type StateTypeFunction = /// [crate::udaf::AggregateUDFImpl::simplify] simplifier closure /// A closure with two arguments: /// * 'aggregate_function': [crate::expr::AggregateFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyInfo] +/// * 'info': [crate::simplify::SimplifyContext] /// /// Closure returns simplified [Expr] or an error. pub type AggregateFunctionSimplification = Box< dyn Fn( crate::expr::AggregateFunction, - &dyn crate::simplify::SimplifyInfo, + &crate::simplify::SimplifyContext, ) -> Result, >; /// [crate::udwf::WindowUDFImpl::simplify] simplifier closure /// A closure with two arguments: /// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked -/// * 'info': [crate::simplify::SimplifyInfo] +/// * 'info': [crate::simplify::SimplifyContext] /// /// Closure returns simplified [Expr] or an error. pub type WindowFunctionSimplification = Box< dyn Fn( crate::expr::WindowFunction, - &dyn crate::simplify::SimplifyInfo, + &crate::simplify::SimplifyContext, ) -> Result, >; diff --git a/datafusion/expr/src/simplify.rs b/datafusion/expr/src/simplify.rs index bbe65904fb775..8c68067a55a37 100644 --- a/datafusion/expr/src/simplify.rs +++ b/datafusion/expr/src/simplify.rs @@ -15,92 +15,98 @@ // specific language governing permissions and limitations // under the License. -//! Structs and traits to provide the information needed for expression simplification. +//! Structs to provide the information needed for expression simplification. + +use std::sync::Arc; use arrow::datatypes::DataType; -use datafusion_common::{DFSchemaRef, Result, internal_datafusion_err}; +use chrono::{DateTime, Utc}; +use datafusion_common::config::ConfigOptions; +use datafusion_common::{DFSchema, DFSchemaRef, Result}; -use crate::{Expr, ExprSchemable, execution_props::ExecutionProps}; +use crate::{Expr, ExprSchemable}; -/// Provides the information necessary to apply algebraic simplification to an -/// [Expr]. See [SimplifyContext] for one concrete implementation. -/// -/// This trait exists so that other systems can plug schema -/// information in without having to create `DFSchema` objects. If you -/// have a [`DFSchemaRef`] you can use [`SimplifyContext`] -pub trait SimplifyInfo { - /// Returns true if this Expr has boolean type - fn is_boolean_type(&self, expr: &Expr) -> Result; - - /// Returns true of this expr is nullable (could possibly be NULL) - fn nullable(&self, expr: &Expr) -> Result; - - /// Returns details needed for partial expression evaluation - fn execution_props(&self) -> &ExecutionProps; - - /// Returns data type of this expr needed for determining optimized int type of a value - fn get_data_type(&self, expr: &Expr) -> Result; -} - -/// Provides simplification information based on DFSchema and -/// [`ExecutionProps`]. This is the default implementation used by DataFusion +/// Provides simplification information based on schema, query execution time, +/// and configuration options. /// /// # Example /// See the `simplify_demo` in the [`expr_api` example] /// /// [`expr_api` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/query_planning/expr_api.rs #[derive(Debug, Clone)] -pub struct SimplifyContext<'a> { - schema: Option, - props: &'a ExecutionProps, +pub struct SimplifyContext { + schema: DFSchemaRef, + query_execution_start_time: Option>, + config_options: Arc, } -impl<'a> SimplifyContext<'a> { - /// Create a new SimplifyContext - pub fn new(props: &'a ExecutionProps) -> Self { +impl Default for SimplifyContext { + fn default() -> Self { Self { - schema: None, - props, + schema: Arc::new(DFSchema::empty()), + query_execution_start_time: None, + config_options: Arc::new(ConfigOptions::default()), } } +} + +impl SimplifyContext { + /// Set the [`ConfigOptions`] for this context + pub fn with_config_options(mut self, config_options: Arc) -> Self { + self.config_options = config_options; + self + } - /// Register a [`DFSchemaRef`] with this context + /// Set the schema for this context pub fn with_schema(mut self, schema: DFSchemaRef) -> Self { - self.schema = Some(schema); + self.schema = schema; self } -} -impl SimplifyInfo for SimplifyContext<'_> { - /// Returns true if this Expr has boolean type - fn is_boolean_type(&self, expr: &Expr) -> Result { - if let Some(schema) = &self.schema - && let Ok(DataType::Boolean) = expr.get_type(schema) - { - return Ok(true); - } + /// Set the query execution start time + pub fn with_query_execution_start_time( + mut self, + query_execution_start_time: Option>, + ) -> Self { + self.query_execution_start_time = query_execution_start_time; + self + } - Ok(false) + /// Set the query execution start to the current time + pub fn with_current_time(mut self) -> Self { + self.query_execution_start_time = Some(Utc::now()); + self + } + + /// Returns the schema + pub fn schema(&self) -> &DFSchemaRef { + &self.schema + } + + /// Returns true if this Expr has boolean type + pub fn is_boolean_type(&self, expr: &Expr) -> Result { + Ok(expr.get_type(&self.schema)? == DataType::Boolean) } /// Returns true if expr is nullable - fn nullable(&self, expr: &Expr) -> Result { - let schema = self.schema.as_ref().ok_or_else(|| { - internal_datafusion_err!("attempt to get nullability without schema") - })?; - expr.nullable(schema.as_ref()) + pub fn nullable(&self, expr: &Expr) -> Result { + expr.nullable(self.schema.as_ref()) } /// Returns data type of this expr needed for determining optimized int type of a value - fn get_data_type(&self, expr: &Expr) -> Result { - let schema = self.schema.as_ref().ok_or_else(|| { - internal_datafusion_err!("attempt to get data type without schema") - })?; - expr.get_type(schema) + pub fn get_data_type(&self, expr: &Expr) -> Result { + expr.get_type(&self.schema) + } + + /// Returns the time at which the query execution started. + /// If `None`, time-dependent functions like `now()` will not be simplified. + pub fn query_execution_start_time(&self) -> Option> { + self.query_execution_start_time } - fn execution_props(&self) -> &ExecutionProps { - self.props + /// Returns the configuration options for the session. + pub fn config_options(&self) -> &Arc { + &self.config_options } } diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index a69176e1173a5..ee38077dbf304 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -668,7 +668,7 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// /// Or, a closure with two arguments: /// * 'aggregate_function': [AggregateFunction] for which simplified has been invoked - /// * 'info': [crate::simplify::SimplifyInfo] + /// * 'info': [crate::simplify::SimplifyContext] /// /// closure returns simplified [Expr] or an error. /// diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 26d7fc99cb17c..b97c251c9993e 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -19,7 +19,7 @@ use crate::async_udf::AsyncScalarUDF; use crate::expr::schema_name_from_exprs_comma_separated_without_space; -use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; +use crate::simplify::{ExprSimplifyResult, SimplifyContext}; use crate::sort_properties::{ExprProperties, SortProperties}; use crate::udf_eq::UdfEq; use crate::{ColumnarValue, Documentation, Expr, Signature}; @@ -221,7 +221,7 @@ impl ScalarUDF { pub fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { self.inner.simplify(args, info) } @@ -691,7 +691,7 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { Ok(ExprSimplifyResult::Original(args)) } @@ -921,7 +921,7 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { self.inner.simplify(args, info) } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 37055daa1ca4f..8f2b8a0d9bfe5 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -362,7 +362,7 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// /// Or, a closure with two arguments: /// * 'window_function': [crate::expr::WindowFunction] for which simplified has been invoked - /// * 'info': [crate::simplify::SimplifyInfo] + /// * 'info': [crate::simplify::SimplifyContext] /// /// # Notes /// The returned expression must have the same schema as the original diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs index d6c8eabb459e6..a09779d0c936f 100644 --- a/datafusion/functions-aggregate/src/percentile_cont.rs +++ b/datafusion/functions-aggregate/src/percentile_cont.rs @@ -48,7 +48,7 @@ use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_expr::{ expr::{AggregateFunction, Cast, Sort}, function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs}, - simplify::SimplifyInfo, + simplify::SimplifyContext, }; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask; @@ -388,7 +388,7 @@ enum PercentileRewriteTarget { #[expect(clippy::needless_pass_by_value)] fn simplify_percentile_cont_aggregate( aggregate_function: AggregateFunction, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { let original_expr = Expr::AggregateFunction(aggregate_function.clone()); let params = &aggregate_function.params; diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 54b94abafb999..97671d4a95f23 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -125,7 +125,7 @@ impl ScalarUDFImpl for ArrayHas { fn simplify( &self, mut args: Vec, - _info: &dyn datafusion_expr::simplify::SimplifyInfo, + _info: &datafusion_expr::simplify::SimplifyContext, ) -> Result { let [haystack, needle] = take_function_args(self.name(), &mut args)?; @@ -684,8 +684,8 @@ mod tests { utils::SingleRowListArrayBuilder, }; use datafusion_expr::{ - ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, - execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, col, lit, + simplify::ExprSimplifyResult, }; use crate::expr_fn::make_array; @@ -701,8 +701,7 @@ mod tests { .build_list_scalar()); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) @@ -725,8 +724,7 @@ mod tests { let haystack = make_array(vec![lit(1), lit(2), lit(3)]); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(Expr::InList(in_list))) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) @@ -749,8 +747,7 @@ mod tests { let haystack = Expr::Literal(ScalarValue::Null, None); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(simplified)) = ArrayHas::new().simplify(vec![haystack, needle], &context) else { @@ -767,8 +764,7 @@ mod tests { let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None); let needle = col("c"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Simplified(simplified)) = ArrayHas::new().simplify(vec![haystack, needle], &context) else { @@ -783,8 +779,7 @@ mod tests { let haystack = col("c1"); let needle = col("c2"); - let props = ExecutionProps::new(); - let context = datafusion_expr::simplify::SimplifyContext::new(&props); + let context = datafusion_expr::simplify::SimplifyContext::default(); let Ok(ExprSimplifyResult::Original(args)) = ArrayHas::new().simplify(vec![haystack, needle.clone()], &context) diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs index a0101dc09da91..f493db5f3e14e 100644 --- a/datafusion/functions/src/core/arrow_cast.rs +++ b/datafusion/functions/src/core/arrow_cast.rs @@ -25,7 +25,7 @@ use datafusion_common::{ use datafusion_common::{exec_datafusion_err, utils::take_function_args}; use std::any::Any; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, @@ -153,7 +153,7 @@ impl ScalarUDFImpl for ArrowCastFunc { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { // convert this into a real cast let target_type = data_type_from_args(&args)?; diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs index 1404f68570974..359a6f6c9c84c 100644 --- a/datafusion/functions/src/core/coalesce.rs +++ b/datafusion/functions/src/core/coalesce.rs @@ -19,7 +19,7 @@ use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::{Result, exec_err, internal_err, plan_err}; use datafusion_expr::binary::try_type_union_resolution; use datafusion_expr::conditional_expressions::CaseBuilder; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, }; @@ -97,7 +97,7 @@ impl ScalarUDFImpl for CoalesceFunc { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { if args.is_empty() { return plan_err!("coalesce must have at least one argument"); diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index 3e961e4da4e75..47a903639dde5 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -421,7 +421,7 @@ impl ScalarUDFImpl for GetFieldFunc { fn simplify( &self, args: Vec, - _info: &dyn datafusion_expr::simplify::SimplifyInfo, + _info: &datafusion_expr::simplify::SimplifyContext, ) -> Result { // Need at least 2 args (base + field) if args.len() < 2 { diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs index 0b9968a88fc95..0b4966d4fbdce 100644 --- a/datafusion/functions/src/core/nvl.rs +++ b/datafusion/functions/src/core/nvl.rs @@ -18,7 +18,7 @@ use crate::core::coalesce::CoalesceFunc; use arrow::datatypes::{DataType, FieldRef}; use datafusion_common::Result; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, @@ -124,7 +124,7 @@ impl ScalarUDFImpl for NVLFunc { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { self.coalesce.simplify(args, info) } diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs index eda59fe07f57e..0b092c44d502b 100644 --- a/datafusion/functions/src/core/nvl2.rs +++ b/datafusion/functions/src/core/nvl2.rs @@ -21,7 +21,7 @@ use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility, conditional_expressions::CaseBuilder, - simplify::{ExprSimplifyResult, SimplifyInfo}, + simplify::{ExprSimplifyResult, SimplifyContext}, type_coercion::binary::comparison_coercion, }; use datafusion_macros::user_doc; @@ -108,7 +108,7 @@ impl ScalarUDFImpl for NVL2Func { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { let [test, if_non_null, if_null] = take_function_args(self.name(), args)?; diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs index 7edc1a58d9cb5..3e38772720979 100644 --- a/datafusion/functions/src/datetime/current_date.rs +++ b/datafusion/functions/src/datetime/current_date.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType::Date32; use chrono::{Datelike, NaiveDate, TimeZone}; use datafusion_common::{Result, ScalarValue, internal_err}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; @@ -99,23 +99,20 @@ impl ScalarUDFImpl for CurrentDateFunc { fn simplify( &self, - _args: Vec, - info: &dyn SimplifyInfo, + args: Vec, + info: &SimplifyContext, ) -> Result { - let now_ts = info.execution_props().query_execution_start_time; + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; // Get timezone from config and convert to local time let days = info - .execution_props() .config_options() - .and_then(|config| { - config - .execution - .time_zone - .as_ref() - .map(|tz| tz.parse::().ok()) - }) - .flatten() + .execution + .time_zone + .as_ref() + .and_then(|tz| tz.parse::().ok()) .map_or_else( || datetime_to_days(&now_ts), |tz| { diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs index 2c9bcdfe49db3..855c0c13dc6b4 100644 --- a/datafusion/functions/src/datetime/current_time.rs +++ b/datafusion/functions/src/datetime/current_time.rs @@ -22,7 +22,7 @@ use arrow::datatypes::TimeUnit::Nanosecond; use chrono::TimeZone; use chrono::Timelike; use datafusion_common::{Result, ScalarValue, internal_err}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility, }; @@ -95,23 +95,20 @@ impl ScalarUDFImpl for CurrentTimeFunc { fn simplify( &self, - _args: Vec, - info: &dyn SimplifyInfo, + args: Vec, + info: &SimplifyContext, ) -> Result { - let now_ts = info.execution_props().query_execution_start_time; + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; // Try to get timezone from config and convert to local time let nano = info - .execution_props() .config_options() - .and_then(|config| { - config - .execution - .time_zone - .as_ref() - .map(|tz| tz.parse::().ok()) - }) - .flatten() + .execution + .time_zone + .as_ref() + .and_then(|tz| tz.parse::().ok()) .map_or_else( || datetime_to_time_nanos(&now_ts), |tz| { @@ -143,46 +140,24 @@ fn datetime_to_time_nanos(dt: &chrono::DateTime) -> Option Result { - Ok(false) - } - - fn nullable(&self, _expr: &Expr) -> Result { - Ok(true) - } - - fn execution_props(&self) -> &ExecutionProps { - &self.execution_props - } - - fn get_data_type(&self, _expr: &Expr) -> Result { - Ok(Time64(Nanosecond)) - } - } - - fn set_session_timezone_env(tz: &str, start_time: DateTime) -> MockSimplifyInfo { - let mut config = datafusion_common::config::ConfigOptions::default(); + fn set_session_timezone_env(tz: &str, start_time: DateTime) -> SimplifyContext { + let mut config = ConfigOptions::default(); config.execution.time_zone = if tz.is_empty() { None } else { Some(tz.to_string()) }; - let mut execution_props = - ExecutionProps::new().with_query_execution_start_time(start_time); - execution_props.config_options = Some(Arc::new(config)); - MockSimplifyInfo { execution_props } + let schema = Arc::new(DFSchema::empty()); + SimplifyContext::default() + .with_schema(schema) + .with_config_options(Arc::new(config)) + .with_query_execution_start_time(Some(start_time)) } #[test] diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs index b804efe59106d..338a62a118f31 100644 --- a/datafusion/functions/src/datetime/now.rs +++ b/datafusion/functions/src/datetime/now.rs @@ -23,7 +23,7 @@ use std::sync::Arc; use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue, internal_err}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, @@ -121,16 +121,18 @@ impl ScalarUDFImpl for NowFunc { fn simplify( &self, - _args: Vec, - info: &dyn SimplifyInfo, + args: Vec, + info: &SimplifyContext, ) -> Result { - let now_ts = info - .execution_props() - .query_execution_start_time - .timestamp_nanos_opt(); + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; Ok(ExprSimplifyResult::Simplified(Expr::Literal( - ScalarValue::TimestampNanosecond(now_ts, self.timezone.clone()), + ScalarValue::TimestampNanosecond( + now_ts.timestamp_nanos_opt(), + self.timezone.clone(), + ), None, ))) } diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 0c50afa2dffd3..fb57f24257e5b 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -36,7 +36,7 @@ use datafusion_common::{ Result, ScalarValue, exec_err, internal_err, plan_datafusion_err, plan_err, }; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF, @@ -343,7 +343,7 @@ impl ScalarUDFImpl for LogFunc { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { let mut arg_types = args .iter() @@ -430,7 +430,6 @@ fn is_pow(func: &ScalarUDF) -> bool { #[cfg(test)] mod tests { - use std::collections::HashMap; use std::sync::Arc; use super::*; @@ -440,10 +439,8 @@ mod tests { }; use arrow::compute::SortOptions; use arrow::datatypes::{DECIMAL256_MAX_PRECISION, Field}; - use datafusion_common::DFSchema; use datafusion_common::cast::{as_float32_array, as_float64_array}; use datafusion_common::config::ConfigOptions; - use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; #[test] @@ -784,10 +781,7 @@ mod tests { #[test] // Test log() simplification errors fn test_log_simplify_errors() { - let props = ExecutionProps::new(); - let schema = - Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new()).unwrap()); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default(); // Expect 0 args to error let _ = LogFunc::new().simplify(vec![], &context).unwrap_err(); // Expect 3 args to error @@ -799,10 +793,7 @@ mod tests { #[test] // Test that non-simplifiable log() expressions are unchanged after simplification fn test_log_simplify_original() { - let props = ExecutionProps::new(); - let schema = - Arc::new(DFSchema::new_with_metadata(vec![], HashMap::new()).unwrap()); - let context = SimplifyContext::new(&props).with_schema(schema); + let context = SimplifyContext::default(); // One argument with no simplifications let result = LogFunc::new().simplify(vec![lit(2)], &context).unwrap(); let ExprSimplifyResult::Original(args) = result else { diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 33166f6444f2a..fafadd3ba4477 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -31,7 +31,7 @@ use datafusion_common::types::{NativeType, logical_float64, logical_int64}; use datafusion_common::utils::take_function_args; use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, lit, @@ -346,7 +346,7 @@ impl ScalarUDFImpl for PowerFunc { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { let [base, exponent] = take_function_args("power", args)?; let base_type = info.get_data_type(&base)?; diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index f707c8e0d8c7f..bc06d462c04bd 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -31,7 +31,7 @@ use datafusion_expr::{ }; use datafusion_macros::user_doc; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr_common::operator::Operator; use datafusion_expr_common::type_coercion::binary::BinaryTypeCoercer; use std::any::Any; @@ -158,7 +158,7 @@ impl ScalarUDFImpl for RegexpLikeFunc { fn simplify( &self, mut args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { // Try to simplify regexp_like usage to one of the builtin operators since those have // optimized code paths for the case where the regular expression pattern is a scalar. diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 42d455a05760a..80de194ad15e3 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -28,7 +28,7 @@ use crate::strings::{ use datafusion_common::cast::{as_string_array, as_string_view_array}; use datafusion_common::{Result, ScalarValue, internal_err, plan_err}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; use datafusion_macros::user_doc; @@ -277,7 +277,7 @@ impl ScalarUDFImpl for ConcatFunc { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { simplify_concat(args) } diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 8fe095c5ce2be..8ed3ade968cc2 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -28,7 +28,7 @@ use crate::strings::{ColumnarValueRef, StringArrayBuilder}; use datafusion_common::cast::{as_string_array, as_string_view_array}; use datafusion_common::{Result, ScalarValue, exec_err, internal_err, plan_err}; use datafusion_expr::expr::ScalarFunction; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::{ColumnarValue, Documentation, Expr, Volatility, lit}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; use datafusion_macros::user_doc; @@ -301,7 +301,7 @@ impl ScalarUDFImpl for ConcatWsFunc { fn simplify( &self, args: Vec, - _info: &dyn SimplifyInfo, + _info: &SimplifyContext, ) -> Result { match &args[..] { [delimiter, vals @ ..] => simplify_concat_ws(delimiter, vals), diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index 259612c42997e..ce106ca988a2d 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -21,7 +21,7 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Scalar}; use arrow::compute::kernels::comparison::starts_with as arrow_starts_with; use arrow::datatypes::DataType; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyContext}; use datafusion_expr::type_coercion::binary::{ binary_to_string_coercion, string_coercion, }; @@ -168,7 +168,7 @@ impl ScalarUDFImpl for StartsWithFunc { fn simplify( &self, args: Vec, - info: &dyn SimplifyInfo, + info: &SimplifyContext, ) -> Result { if let Expr::Literal(scalar_value, _) = &args[1] { // Convert starts_with(col, 'prefix') to col LIKE 'prefix%' with proper escaping diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index e8a9c8c83ae93..52d777f874fa8 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -36,7 +36,6 @@ use datafusion_expr::{ BinaryExpr, Cast, EmptyRelation, Expr, FetchType, LogicalPlan, LogicalPlanBuilder, Operator, expr, lit, }; -use datafusion_physical_expr::execution_props::ExecutionProps; /// This struct rewrite the sub query plan by pull up the correlated /// expressions(contains outer reference columns) from the inner subquery's @@ -509,8 +508,7 @@ fn agg_exprs_evaluation_result_on_empty_batch( .data()?; let result_expr = result_expr.unalias(); - let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema)); + let info = SimplifyContext::default().with_schema(Arc::clone(schema)); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; expr_result_map_for_count_bug.insert(e.schema_name().to_string(), result_expr); @@ -543,8 +541,7 @@ fn proj_exprs_evaluation_result_on_empty_batch( .data()?; if result_expr.ne(expr) { - let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema)); + let info = SimplifyContext::default().with_schema(Arc::clone(schema)); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; let expr_name = match expr { @@ -584,8 +581,7 @@ fn filter_exprs_evaluation_result_on_empty_batch( .data()?; let pull_up_expr = if result_expr.ne(filter_expr) { - let props = ExecutionProps::new(); - let info = SimplifyContext::new(&props).with_schema(schema); + let info = SimplifyContext::default().with_schema(schema); let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; match &result_expr { diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index ededcec0a47c9..8740ab072a1f5 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -100,8 +100,9 @@ pub trait OptimizerRule: Debug { /// Options to control the DataFusion Optimizer. pub trait OptimizerConfig { /// Return the time at which the query execution started. This - /// time is used as the value for now() - fn query_execution_start_time(&self) -> DateTime; + /// time is used as the value for `now()`. If `None`, time-dependent + /// functions like `now()` will not be simplified during optimization. + fn query_execution_start_time(&self) -> Option>; /// Return alias generator used to generate unique aliases for subqueries fn alias_generator(&self) -> &Arc; @@ -118,8 +119,9 @@ pub trait OptimizerConfig { #[derive(Debug)] pub struct OptimizerContext { /// Query execution start time that can be used to rewrite - /// expressions such as `now()` to use a literal value instead - query_execution_start_time: DateTime, + /// expressions such as `now()` to use a literal value instead. + /// If `None`, time-dependent functions will not be simplified. + query_execution_start_time: Option>, /// Alias generator used to generate unique aliases for subqueries alias_generator: Arc, @@ -139,7 +141,7 @@ impl OptimizerContext { /// Create a optimizer config with provided [ConfigOptions]. pub fn new_with_config_options(options: Arc) -> Self { Self { - query_execution_start_time: Utc::now(), + query_execution_start_time: Some(Utc::now()), alias_generator: Arc::new(AliasGenerator::new()), options, } @@ -153,13 +155,19 @@ impl OptimizerContext { self } - /// Specify whether the optimizer should skip rules that produce - /// errors, or fail the query + /// Set the query execution start time pub fn with_query_execution_start_time( mut self, - query_execution_tart_time: DateTime, + query_execution_start_time: DateTime, ) -> Self { - self.query_execution_start_time = query_execution_tart_time; + self.query_execution_start_time = Some(query_execution_start_time); + self + } + + /// Clear the query execution start time. When `None`, time-dependent + /// functions like `now()` will not be simplified during optimization. + pub fn without_query_execution_start_time(mut self) -> Self { + self.query_execution_start_time = None; self } @@ -185,7 +193,7 @@ impl Default for OptimizerContext { } impl OptimizerConfig for OptimizerContext { - fn query_execution_start_time(&self) -> DateTime { + fn query_execution_start_time(&self) -> Option> { self.query_execution_start_time } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 01de44cee1f60..55bff5849c5cb 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -27,6 +27,7 @@ use std::collections::HashSet; use std::ops::Not; use std::sync::Arc; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ DFSchema, DataFusionError, Result, ScalarValue, exec_datafusion_err, internal_err, }; @@ -51,7 +52,7 @@ use datafusion_physical_expr::{create_physical_expr, execution_props::ExecutionP use super::inlist_simplifier::ShortenInListSimplifier; use super::utils::*; use crate::analyzer::type_coercion::TypeCoercionRewriter; -use crate::simplify_expressions::SimplifyInfo; +use crate::simplify_expressions::SimplifyContext; use crate::simplify_expressions::regex::simplify_regex_expr; use crate::simplify_expressions::unwrap_cast::{ is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary, @@ -72,7 +73,6 @@ use regex::Regex; /// ``` /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{DataFusionError, ToDFSchema}; -/// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::{col, lit}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; @@ -83,8 +83,7 @@ use regex::Regex; /// .unwrap(); /// /// // Create the simplifier -/// let props = ExecutionProps::new(); -/// let context = SimplifyContext::new(&props).with_schema(schema); +/// let context = SimplifyContext::default().with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Use the simplifier @@ -96,8 +95,8 @@ use regex::Regex; /// let simplified = simplifier.simplify(expr).unwrap(); /// assert_eq!(simplified, col("b").lt(lit(2))); /// ``` -pub struct ExprSimplifier { - info: S, +pub struct ExprSimplifier { + info: SimplifyContext, /// Guarantees about the values of columns. This is provided by the user /// in [ExprSimplifier::with_guarantees()]. guarantees: Vec<(Expr, NullableInterval)>, @@ -111,13 +110,12 @@ pub struct ExprSimplifier { pub const THRESHOLD_INLINE_INLIST: usize = 3; pub const DEFAULT_MAX_SIMPLIFIER_CYCLES: u32 = 3; -impl ExprSimplifier { - /// Create a new `ExprSimplifier` with the given `info` such as an - /// instance of [`SimplifyContext`]. See - /// [`simplify`](Self::simplify) for an example. +impl ExprSimplifier { + /// Create a new `ExprSimplifier` with the given [`SimplifyContext`]. + /// See [`simplify`](Self::simplify) for an example. /// /// [`SimplifyContext`]: datafusion_expr::simplify::SimplifyContext - pub fn new(info: S) -> Self { + pub fn new(info: SimplifyContext) -> Self { Self { info, guarantees: vec![], @@ -142,40 +140,21 @@ impl ExprSimplifier { /// `b > 2` /// /// ``` - /// use arrow::datatypes::DataType; - /// use datafusion_common::DFSchema; + /// use arrow::datatypes::{DataType, Field, Schema}; + /// use datafusion_common::{DFSchema, ToDFSchema}; /// use datafusion_common::Result; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; - /// use datafusion_expr::simplify::SimplifyInfo; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// use std::sync::Arc; /// - /// /// Simple implementation that provides `Simplifier` the information it needs - /// /// See SimplifyContext for a structure that does this. - /// #[derive(Default)] - /// struct Info { - /// execution_props: ExecutionProps, - /// }; - /// - /// impl SimplifyInfo for Info { - /// fn is_boolean_type(&self, expr: &Expr) -> Result { - /// Ok(false) - /// } - /// fn nullable(&self, expr: &Expr) -> Result { - /// Ok(true) - /// } - /// fn execution_props(&self) -> &ExecutionProps { - /// &self.execution_props - /// } - /// fn get_data_type(&self, expr: &Expr) -> Result { - /// Ok(DataType::Int32) - /// } - /// } - /// + /// // Create a schema and SimplifyContext + /// let schema = Schema::new(vec![Field::new("b", DataType::Int32, true)]) + /// .to_dfschema_ref() + /// .unwrap(); /// // Create the simplifier - /// let simplifier = ExprSimplifier::new(Info::default()); + /// let context = SimplifyContext::default().with_schema(schema); + /// let simplifier = ExprSimplifier::new(context); /// /// // b < 2 /// let b_lt_2 = col("b").gt(lit(2)); @@ -225,7 +204,8 @@ impl ExprSimplifier { mut expr: Expr, ) -> Result<(Transformed, u32)> { let mut simplifier = Simplifier::new(&self.info); - let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?; + let config_options = Some(Arc::clone(self.info.config_options())); + let mut const_evaluator = ConstEvaluator::try_new(config_options)?; let mut shorten_in_list_simplifier = ShortenInListSimplifier::new(); let guarantees_map: HashMap<&Expr, &NullableInterval> = self.guarantees.iter().map(|(k, v)| (k, v)).collect(); @@ -287,7 +267,6 @@ impl ExprSimplifier { /// ```rust /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::{col, lit, Expr}; @@ -302,8 +281,7 @@ impl ExprSimplifier { /// .unwrap(); /// /// // Create the simplifier - /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props).with_schema(schema); + /// let context = SimplifyContext::default().with_schema(schema); /// /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5) /// let expr_x = col("x").gt_eq(lit(3_i64)); @@ -349,7 +327,6 @@ impl ExprSimplifier { /// ```rust /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::{col, lit, Expr}; @@ -364,8 +341,7 @@ impl ExprSimplifier { /// .unwrap(); /// /// // Create the simplifier - /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props).with_schema(schema); + /// let context = SimplifyContext::default().with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Expression: a = c AND 1 = b @@ -410,7 +386,6 @@ impl ExprSimplifier { /// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; - /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// @@ -420,9 +395,7 @@ impl ExprSimplifier { /// .to_dfschema_ref().unwrap(); /// /// // Create the simplifier - /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) - /// .with_schema(schema); + /// let context = SimplifyContext::default().with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Expression: a IS NOT NULL @@ -500,7 +473,7 @@ impl TreeNodeRewriter for Canonicalizer { /// /// Note it does not handle algebraic rewrites such as `(a or false)` /// --> `a`, which is handled by [`Simplifier`] -struct ConstEvaluator<'a> { +struct ConstEvaluator { /// `can_evaluate` is used during the depth-first-search of the /// `Expr` tree to track if any siblings (or their descendants) were /// non evaluatable (e.g. had a column reference or volatile @@ -514,8 +487,13 @@ struct ConstEvaluator<'a> { /// means there were no non evaluatable siblings (or their /// descendants) so this `Expr` can be evaluated can_evaluate: Vec, - - execution_props: &'a ExecutionProps, + /// Execution properties needed to call [`create_physical_expr`]. + /// `ConstEvaluator` only evaluates expressions without column references + /// (i.e. constant expressions) and doesn't use the variable binding features + /// of `ExecutionProps` (we explicitly filter out [`Expr::ScalarVariable`]). + /// The `config_options` are passed from the session to allow scalar functions + /// to access configuration like timezone. + execution_props: ExecutionProps, input_schema: DFSchema, input_batch: RecordBatch, } @@ -530,7 +508,7 @@ enum ConstSimplifyResult { SimplifyRuntimeError(DataFusionError, Expr), } -impl TreeNodeRewriter for ConstEvaluator<'_> { +impl TreeNodeRewriter for ConstEvaluator { type Node = Expr; fn f_down(&mut self, expr: Expr) -> Result> { @@ -593,11 +571,17 @@ impl TreeNodeRewriter for ConstEvaluator<'_> { } } -impl<'a> ConstEvaluator<'a> { - /// Create a new `ConstantEvaluator`. Session constants (such as - /// the time for `now()` are taken from the passed - /// `execution_props`. - pub fn try_new(execution_props: &'a ExecutionProps) -> Result { +impl ConstEvaluator { + /// Create a new `ConstantEvaluator`. + /// + /// Note: `ConstEvaluator` filters out expressions with scalar variables + /// (like `$var`) and volatile functions, so it creates its own default + /// `ExecutionProps` internally. The filtered expressions will be evaluated + /// at runtime where proper variable bindings are available. + /// + /// The `config_options` parameter is used to pass session configuration + /// (like timezone) to scalar functions during constant evaluation. + pub fn try_new(config_options: Option>) -> Result { // The dummy column name is unused and doesn't matter as only // expressions without column references can be evaluated static DUMMY_COL_NAME: &str = "."; @@ -611,6 +595,9 @@ impl<'a> ConstEvaluator<'a> { let col = new_null_array(&DataType::Null, 1); let input_batch = RecordBatch::try_new(schema, vec![col])?; + let mut execution_props = ExecutionProps::new(); + execution_props.config_options = config_options; + Ok(Self { can_evaluate: vec![], execution_props, @@ -684,11 +671,14 @@ impl<'a> ConstEvaluator<'a> { return ConstSimplifyResult::NotSimplified(s, m); } - let phys_expr = - match create_physical_expr(&expr, &self.input_schema, self.execution_props) { - Ok(e) => e, - Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), - }; + let phys_expr = match create_physical_expr( + &expr, + &self.input_schema, + &self.execution_props, + ) { + Ok(e) => e, + Err(err) => return ConstSimplifyResult::SimplifyRuntimeError(err, expr), + }; let metadata = phys_expr .return_field(self.input_batch.schema_ref()) .ok() @@ -745,17 +735,17 @@ impl<'a> ConstEvaluator<'a> { /// * `false = true` and `true = false` to `false` /// * `!!expr` to `expr` /// * `expr = null` and `expr != null` to `null` -struct Simplifier<'a, S> { - info: &'a S, +struct Simplifier<'a> { + info: &'a SimplifyContext, } -impl<'a, S> Simplifier<'a, S> { - pub fn new(info: &'a S) -> Self { +impl<'a> Simplifier<'a> { + pub fn new(info: &'a SimplifyContext) -> Self { Self { info } } } -impl TreeNodeRewriter for Simplifier<'_, S> { +impl TreeNodeRewriter for Simplifier<'_> { type Node = Expr; /// rewrite the expression simplifying any constant expressions @@ -2117,7 +2107,7 @@ fn inlist_except(mut l1: InList, l2: &InList) -> Result { } /// Returns expression testing a boolean `expr` for being exactly `true` (not `false` or NULL). -fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result { +fn is_exactly_true(expr: Expr, info: &SimplifyContext) -> Result { if !info.nullable(&expr)? { Ok(expr) } else { @@ -2133,8 +2123,8 @@ fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result { // A / 1 -> A // // Move this function body out of the large match branch avoid stack overflow -fn simplify_right_is_one_case( - info: &S, +fn simplify_right_is_one_case( + info: &SimplifyContext, left: Box, op: &Operator, right: &Expr, @@ -2187,9 +2177,8 @@ mod tests { // ------------------------------ #[test] fn api_basic() { - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); let expr = lit(1) + lit(2); let expected = lit(3); @@ -2199,9 +2188,8 @@ mod tests { #[test] fn basic_coercion() { let schema = test_schema(); - let props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&props).with_schema(Arc::clone(&schema)), + SimplifyContext::default().with_schema(Arc::clone(&schema)), ); // Note expr type is int32 (not int64) @@ -2229,9 +2217,8 @@ mod tests { #[test] fn simplify_and_constant_prop() { - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); // should be able to simplify to false // (i * (1 - 2)) > 0 @@ -2242,9 +2229,8 @@ mod tests { #[test] fn simplify_and_constant_prop_with_case() { - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema())); + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); // CASE // WHEN i>5 AND false THEN i > 5 @@ -3358,18 +3344,15 @@ mod tests { fn try_simplify(expr: Expr) -> Result { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), - ); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); simplifier.simplify(expr) } fn coerce(expr: Expr) -> Expr { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)), + SimplifyContext::default().with_schema(Arc::clone(&schema)), ); simplifier.coerce(expr, schema.as_ref()).unwrap() } @@ -3380,10 +3363,8 @@ mod tests { fn try_simplify_with_cycle_count(expr: Expr) -> Result<(Expr, u32)> { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), - ); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?; Ok((expr.data, count)) } @@ -3397,11 +3378,9 @@ mod tests { guarantees: Vec<(Expr, NullableInterval)>, ) -> Expr { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let simplifier = ExprSimplifier::new( - SimplifyContext::new(&execution_props).with_schema(schema), - ) - .with_guarantees(guarantees); + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)) + .with_guarantees(guarantees); simplifier.simplify(expr).unwrap() } @@ -4303,8 +4282,7 @@ mod tests { fn just_simplifier_simplify_null_in_empty_inlist() { let simplify = |expr: Expr| -> Expr { let schema = expr_test_schema(); - let execution_props = ExecutionProps::new(); - let info = SimplifyContext::new(&execution_props).with_schema(schema); + let info = SimplifyContext::default().with_schema(schema); let simplifier = &mut Simplifier::new(&info); expr.rewrite(simplifier) .expect("Failed to simplify expression") @@ -4670,10 +4648,9 @@ mod tests { #[test] fn simplify_common_factor_conjunction_in_disjunction() { - let props = ExecutionProps::new(); let schema = boolean_test_schema(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema)); + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); let a = || col("A"); let b = || col("B"); @@ -5003,9 +4980,8 @@ mod tests { // The simplification should now fail with an error at plan time let schema = test_schema(); - let props = ExecutionProps::new(); let simplifier = - ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema)); + ExprSimplifier::new(SimplifyContext::default().with_schema(schema)); let result = simplifier.simplify(expr); assert!(result.is_err(), "Expected error for invalid cast"); let err_msg = result.unwrap_err().to_string(); diff --git a/datafusion/optimizer/src/simplify_expressions/mod.rs b/datafusion/optimizer/src/simplify_expressions/mod.rs index e238fca32689d..e6bb1eca25b69 100644 --- a/datafusion/optimizer/src/simplify_expressions/mod.rs +++ b/datafusion/optimizer/src/simplify_expressions/mod.rs @@ -27,7 +27,7 @@ mod unwrap_cast; mod utils; // backwards compatibility -pub use datafusion_expr::simplify::{SimplifyContext, SimplifyInfo}; +pub use datafusion_expr::simplify::SimplifyContext; pub use expr_simplifier::*; pub use simplify_exprs::*; diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index 1b25c5ce8a632..f7f100015004a 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -22,7 +22,6 @@ use std::sync::Arc; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result}; use datafusion_expr::Expr; -use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::logical_plan::LogicalPlan; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::utils::merge_schema; @@ -67,17 +66,14 @@ impl OptimizerRule for SimplifyExpressions { plan: LogicalPlan, config: &dyn OptimizerConfig, ) -> Result, DataFusionError> { - let mut execution_props = ExecutionProps::new(); - execution_props.query_execution_start_time = config.query_execution_start_time(); - execution_props.config_options = Some(config.options()); - Self::optimize_internal(plan, &execution_props) + Self::optimize_internal(plan, config) } } impl SimplifyExpressions { fn optimize_internal( plan: LogicalPlan, - execution_props: &ExecutionProps, + config: &dyn OptimizerConfig, ) -> Result> { let schema = if !plan.inputs().is_empty() { DFSchemaRef::new(merge_schema(&plan.inputs())) @@ -100,7 +96,10 @@ impl SimplifyExpressions { Arc::new(DFSchema::empty()) }; - let info = SimplifyContext::new(execution_props).with_schema(schema); + let info = SimplifyContext::default() + .with_schema(schema) + .with_config_options(config.options()) + .with_query_execution_start_time(config.query_execution_start_time()); // Inputs have already been rewritten (due to bottom-up traversal handled by Optimizer) // Just need to rewrite our own expressions diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs index b2349db8c4605..acf0f32ab2234 100644 --- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs +++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs @@ -58,11 +58,11 @@ use arrow::datatypes::DataType; use datafusion_common::{Result, ScalarValue}; use datafusion_common::{internal_err, tree_node::Transformed}; use datafusion_expr::{BinaryExpr, lit}; -use datafusion_expr::{Cast, Expr, Operator, TryCast, simplify::SimplifyInfo}; +use datafusion_expr::{Cast, Expr, Operator, TryCast, simplify::SimplifyContext}; use datafusion_expr_common::casts::{is_supported_type, try_cast_literal_to_type}; -pub(super) fn unwrap_cast_in_comparison_for_binary( - info: &S, +pub(super) fn unwrap_cast_in_comparison_for_binary( + info: &SimplifyContext, cast_expr: Expr, literal: Expr, op: Operator, @@ -104,10 +104,8 @@ pub(super) fn unwrap_cast_in_comparison_for_binary( } } -pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary< - S: SimplifyInfo, ->( - info: &S, +pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary( + info: &SimplifyContext, expr: &Expr, op: Operator, literal: &Expr, @@ -142,10 +140,8 @@ pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_binary< } } -pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist< - S: SimplifyInfo, ->( - info: &S, +pub(super) fn is_cast_expr_and_support_unwrap_cast_in_comparison_for_inlist( + info: &SimplifyContext, expr: &Expr, list: &[Expr], ) -> bool { @@ -241,7 +237,6 @@ mod tests { use crate::simplify_expressions::ExprSimplifier; use arrow::datatypes::{Field, TimeUnit}; use datafusion_common::{DFSchema, DFSchemaRef}; - use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; use datafusion_expr::{cast, col, in_list, try_cast}; @@ -592,9 +587,8 @@ mod tests { } fn optimize_test(expr: Expr, schema: &DFSchemaRef) -> Expr { - let props = ExecutionProps::new(); let simplifier = ExprSimplifier::new( - SimplifyContext::new(&props).with_schema(Arc::clone(schema)), + SimplifyContext::default().with_schema(Arc::clone(schema)), ); simplifier.simplify(expr).unwrap() diff --git a/datafusion/spark/src/function/conditional/if.rs b/datafusion/spark/src/function/conditional/if.rs index 906b0bc312f2f..e423f8264ecca 100644 --- a/datafusion/spark/src/function/conditional/if.rs +++ b/datafusion/spark/src/function/conditional/if.rs @@ -86,7 +86,7 @@ impl ScalarUDFImpl for SparkIf { fn simplify( &self, args: Vec, - _info: &dyn datafusion_expr::simplify::SimplifyInfo, + _info: &datafusion_expr::simplify::SimplifyContext, ) -> Result { let condition = args[0].clone(); let then_expr = args[1].clone(); diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs index b20e6c24ffeaa..c5948bd7343a6 100644 --- a/datafusion/wasmtest/src/lib.rs +++ b/datafusion/wasmtest/src/lib.rs @@ -24,14 +24,12 @@ extern crate wasm_bindgen; -use datafusion_common::{DFSchema, ScalarValue}; -use datafusion_expr::execution_props::ExecutionProps; +use datafusion_common::ScalarValue; use datafusion_expr::lit; use datafusion_expr::simplify::SimplifyContext; use datafusion_optimizer::simplify_expressions::ExprSimplifier; use datafusion_sql::sqlparser::dialect::GenericDialect; use datafusion_sql::sqlparser::parser::Parser; -use std::sync::Arc; use wasm_bindgen::prelude::*; pub fn set_panic_hook() { // When the `console_error_panic_hook` feature is enabled, we can call the @@ -63,10 +61,7 @@ pub fn basic_exprs() { log(&format!("Expr: {expr:?}")); // Simplify Expr (using datafusion-phys-expr and datafusion-optimizer) - let schema = Arc::new(DFSchema::empty()); - let execution_props = ExecutionProps::new(); - let simplifier = - ExprSimplifier::new(SimplifyContext::new(&execution_props).with_schema(schema)); + let simplifier = ExprSimplifier::new(SimplifyContext::default()); let simplified_expr = simplifier.simplify(expr).unwrap(); log(&format!("Simplified Expr: {simplified_expr:?}")); } @@ -82,6 +77,8 @@ pub fn basic_parse() { #[cfg(test)] mod test { + use std::sync::Arc; + use super::*; use datafusion::{ arrow::{ diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 39d52bd5903a4..dc635f8afbfda 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -355,6 +355,101 @@ Instead of silently succeeding. The remove API no longer requires a mutable instance +### `SimplifyInfo` trait removed, `SimplifyContext` now uses builder-style API + +The `SimplifyInfo` trait has been removed and replaced with the concrete `SimplifyContext` struct. This simplifies the expression simplification API and removes the need for trait objects. + +**Who is affected:** + +- Users who implemented custom `SimplifyInfo` implementations +- Users who implemented `ScalarUDFImpl::simplify()` for custom scalar functions +- Users who directly use `SimplifyContext` or `ExprSimplifier` + +**Breaking changes:** + +1. The `SimplifyInfo` trait has been removed entirely +2. `SimplifyContext` no longer takes `&ExecutionProps` - it now uses a builder-style API with direct fields +3. `ScalarUDFImpl::simplify()` now takes `&SimplifyContext` instead of `&dyn SimplifyInfo` +4. Time-dependent function simplification (e.g., `now()`) is now optional - if `query_execution_start_time` is `None`, these functions won't be simplified + +**Migration guide:** + +If you implemented a custom `SimplifyInfo`: + +**Before:** + +```rust,ignore +impl SimplifyInfo for MySimplifyInfo { + fn is_boolean_type(&self, expr: &Expr) -> Result { ... } + fn nullable(&self, expr: &Expr) -> Result { ... } + fn execution_props(&self) -> &ExecutionProps { ... } + fn get_data_type(&self, expr: &Expr) -> Result { ... } +} +``` + +**After:** + +Use `SimplifyContext` directly with the builder-style API: + +```rust,ignore +let context = SimplifyContext::default() + .with_schema(schema) + .with_config_options(config_options) + .with_query_execution_start_time(Some(Utc::now())); // or use .with_current_time() +``` + +If you implemented `ScalarUDFImpl::simplify()`: + +**Before:** + +```rust,ignore +fn simplify( + &self, + args: Vec, + info: &dyn SimplifyInfo, +) -> Result { + let now_ts = info.execution_props().query_execution_start_time; + // ... +} +``` + +**After:** + +```rust,ignore +fn simplify( + &self, + args: Vec, + info: &SimplifyContext, +) -> Result { + // query_execution_start_time is now Option> + // Return Original if time is not set (simplification skipped) + let Some(now_ts) = info.query_execution_start_time() else { + return Ok(ExprSimplifyResult::Original(args)); + }; + // ... +} +``` + +If you created `SimplifyContext` from `ExecutionProps`: + +**Before:** + +```rust,ignore +let props = ExecutionProps::new(); +let context = SimplifyContext::new(&props).with_schema(schema); +``` + +**After:** + +```rust,ignore +let context = SimplifyContext::default() + .with_schema(schema) + .with_config_options(config_options) + .with_current_time(); // Sets query_execution_start_time to Utc::now() +``` + +See [`SimplifyContext` documentation](https://docs.rs/datafusion-expr/latest/datafusion_expr/simplify/struct.SimplifyContext.html) for more details. + ### FFI crate updates Many of the structs in the `datafusion-ffi` crate have been updated to allow easier