From 48d506357dda9090d08826086df1fe0e76668fee Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 28 Dec 2025 17:14:04 +0900 Subject: [PATCH 1/4] Refactor duplicate code in `type_coercion/functions.rs` --- datafusion/expr/src/expr_schema.rs | 24 +- .../expr/src/type_coercion/functions.rs | 370 +++++++++--------- datafusion/ffi/src/udaf/mod.rs | 4 +- datafusion/ffi/src/udf/mod.rs | 12 +- datafusion/ffi/src/udwf/mod.rs | 4 +- .../optimizer/src/analyzer/type_coercion.rs | 15 +- .../physical-expr/src/scalar_function.rs | 8 +- datafusion/sqllogictest/test_files/errors.slt | 4 +- 8 files changed, 210 insertions(+), 231 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 691a8c508f801..dbba0f2914a6d 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -21,13 +21,11 @@ use crate::expr::{ InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, WindowFunctionParams, }; -use crate::type_coercion::functions::{ - data_types_with_scalar_udf, fields_with_aggregate_udf, fields_with_window_udf, -}; +use crate::type_coercion::functions::fields_with_udf; use crate::udf::ReturnFieldArgs; use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition, utils}; use arrow::compute::can_cast_types; -use arrow::datatypes::{DataType, Field, FieldRef}; +use arrow::datatypes::{DataType, Field}; use datafusion_common::datatype::FieldExt; use datafusion_common::metadata::FieldMetadata; use datafusion_common::{ @@ -169,7 +167,7 @@ impl ExprSchemable for Expr { .iter() .map(|e| e.to_field(schema).map(|(_, f)| f)) .collect::>>()?; - let new_fields = fields_with_aggregate_udf(&fields, func) + let new_fields = fields_with_udf(&fields, func.as_ref()) .map_err(|err| { let data_types = fields .iter() @@ -554,7 +552,7 @@ impl ExprSchemable for Expr { .map(|e| e.to_field(schema).map(|(_, f)| f)) .collect::>>()?; // Verify that function is invoked with correct number and type of arguments as defined in `TypeSignature` - let new_fields = fields_with_aggregate_udf(&fields, func) + let new_fields = fields_with_udf(&fields, func.as_ref()) .map_err(|err| { let arg_types = fields .iter() @@ -588,8 +586,8 @@ impl ExprSchemable for Expr { .map(|f| (f.data_type().clone(), f)) .unzip(); // Verify that function is invoked with correct number and type of arguments as defined in `TypeSignature` - let new_data_types = data_types_with_scalar_udf(&arg_types, func) - .map_err(|err| { + let new_fields = + fields_with_udf(&fields, func.as_ref()).map_err(|err| { plan_datafusion_err!( "{} {}", match err { @@ -603,11 +601,6 @@ impl ExprSchemable for Expr { ) ) })?; - let new_fields = fields - .into_iter() - .zip(new_data_types) - .map(|(f, d)| f.retyped(d)) - .collect::>(); let arguments = args .iter() @@ -727,7 +720,7 @@ impl Expr { .map(|f| f.data_type()) .cloned() .collect::>(); - let new_fields = fields_with_aggregate_udf(&fields, udaf) + let new_fields = fields_with_udf(&fields, udaf.as_ref()) .map_err(|err| { plan_datafusion_err!( "{} {}", @@ -755,7 +748,7 @@ impl Expr { .map(|f| f.data_type()) .cloned() .collect::>(); - let new_fields = fields_with_window_udf(&fields, udwf) + let new_fields = fields_with_udf(&fields, udwf.as_ref()) .map_err(|err| { plan_datafusion_err!( "{} {}", @@ -828,6 +821,7 @@ mod tests { use super::*; use crate::{and, col, lit, not, or, out_ref_col_with_metadata, when}; + use arrow::datatypes::FieldRef; use datafusion_common::{DFSchema, ScalarValue, assert_or_internal_err}; macro_rules! test_is_expr_nullable { diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 7be9713f53186..7f19416d50289 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -17,7 +17,7 @@ use super::binary::binary_numeric_coercion; use crate::{AggregateUDF, ScalarUDF, Signature, TypeSignature, WindowUDF}; -use arrow::datatypes::FieldRef; +use arrow::datatypes::{Field, FieldRef}; use arrow::{ compute::can_cast_types, datatypes::{DataType, TimeUnit}, @@ -39,6 +39,61 @@ use datafusion_expr_common::{ use itertools::Itertools as _; use std::sync::Arc; +/// Extension trait to unify common functionality between [`ScalarUDF`], [`AggregateUDF`] +/// and [`WindowUDF`] for use by signature coercion functions. +pub trait UDFCoercionExt { + /// Should delegate to [`ScalarUDF::name`], [`AggregateUDF::name`] or [`WindowUDF::name`]. + fn name(&self) -> &str; + /// Should delegate to [`ScalarUDF::signature`], [`AggregateUDF::signature`] + /// or [`WindowUDF::signature`]. + fn signature(&self) -> &Signature; + /// Should delegate to [`ScalarUDF::coerce_types`], [`AggregateUDF::coerce_types`] + /// or [`WindowUDF::coerce_types`]. + fn coerce_types(&self, arg_types: &[DataType]) -> Result>; +} + +impl UDFCoercionExt for ScalarUDF { + fn name(&self) -> &str { + self.name() + } + + fn signature(&self) -> &Signature { + self.signature() + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + self.coerce_types(arg_types) + } +} + +impl UDFCoercionExt for AggregateUDF { + fn name(&self) -> &str { + self.name() + } + + fn signature(&self) -> &Signature { + self.signature() + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + self.coerce_types(arg_types) + } +} + +impl UDFCoercionExt for WindowUDF { + fn name(&self) -> &str { + self.name() + } + + fn signature(&self) -> &Signature { + self.signature() + } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + self.coerce_types(arg_types) + } +} + /// Performs type coercion for scalar function arguments. /// /// Returns the data types to which each argument must be coerced to @@ -46,38 +101,19 @@ use std::sync::Arc; /// /// For more details on coercion in general, please see the /// [`type_coercion`](crate::type_coercion) module. +#[deprecated(since = "52.0.0", note = "use fields_with_udf")] pub fn data_types_with_scalar_udf( current_types: &[DataType], func: &ScalarUDF, ) -> Result> { - let signature = func.signature(); - let type_signature = &signature.type_signature; - - if current_types.is_empty() && type_signature != &TypeSignature::UserDefined { - if type_signature.supports_zero_argument() { - return Ok(vec![]); - } else if type_signature.used_to_support_zero_arguments() { - // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763 - return plan_err!( - "'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments", - func.name() - ); - } else { - return plan_err!("'{}' does not support zero arguments", func.name()); - } - } - - let valid_types = - get_valid_types_with_scalar_udf(type_signature, current_types, func)?; - - if valid_types + let current_fields = current_types .iter() - .any(|data_type| data_type == current_types) - { - return Ok(current_types.to_vec()); - } - - try_coerce_types(func.name(), valid_types, current_types, type_signature) + .map(|dt| Arc::new(Field::new("f", dt.clone(), true))) + .collect::>(); + Ok(fields_with_udf(¤t_fields, func)? + .iter() + .map(|f| f.data_type().clone()) + .collect()) } /// Performs type coercion for aggregate function arguments. @@ -87,64 +123,88 @@ pub fn data_types_with_scalar_udf( /// /// For more details on coercion in general, please see the /// [`type_coercion`](crate::type_coercion) module. +#[deprecated(since = "52.0.0", note = "use fields_with_udf")] pub fn fields_with_aggregate_udf( current_fields: &[FieldRef], func: &AggregateUDF, ) -> Result> { - let signature = func.signature(); + fields_with_udf(current_fields, func) +} + +/// Performs type coercion for window function arguments. +/// +/// Returns the data types to which each argument must be coerced to +/// match `signature`. +/// +/// For more details on coercion in general, please see the +/// [`type_coercion`](crate::type_coercion) module. +#[deprecated(since = "52.0.0", note = "use fields_with_udf")] +pub fn fields_with_window_udf( + current_fields: &[FieldRef], + func: &WindowUDF, +) -> Result> { + fields_with_udf(current_fields, func) +} + +/// Performs type coercion for function arguments. +/// +/// Returns the data types to which each argument must be coerced to +/// match `signature`. +/// +/// For more details on coercion in general, please see the +/// [`type_coercion`](crate::type_coercion) module. +#[deprecated(since = "52.0.0", note = "use fields_with_udf")] +pub fn data_types( + function_name: impl AsRef, + current_types: &[DataType], + signature: &Signature, +) -> Result> { let type_signature = &signature.type_signature; - if current_fields.is_empty() && type_signature != &TypeSignature::UserDefined { + if current_types.is_empty() && type_signature != &TypeSignature::UserDefined { if type_signature.supports_zero_argument() { return Ok(vec![]); } else if type_signature.used_to_support_zero_arguments() { // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763 return plan_err!( - "'{}' does not support zero arguments. Use TypeSignature::Nullary for zero arguments", - func.name() + "function '{}' has signature {type_signature:?} which does not support zero arguments. Use TypeSignature::Nullary for zero arguments", + function_name.as_ref() ); } else { - return plan_err!("'{}' does not support zero arguments", func.name()); + return plan_err!( + "Function '{}' has signature {type_signature:?} which does not support zero arguments", + function_name.as_ref() + ); } } - let current_types = current_fields - .iter() - .map(|f| f.data_type()) - .cloned() - .collect::>(); let valid_types = - get_valid_types_with_aggregate_udf(type_signature, ¤t_types, func)?; + get_valid_types(function_name.as_ref(), type_signature, current_types)?; if valid_types .iter() - .any(|data_type| data_type == ¤t_types) + .any(|data_type| data_type == current_types) { - return Ok(current_fields.to_vec()); + return Ok(current_types.to_vec()); } - let updated_types = - try_coerce_types(func.name(), valid_types, ¤t_types, type_signature)?; - - Ok(current_fields - .iter() - .zip(updated_types) - .map(|(current_field, new_type)| { - current_field.as_ref().clone().with_data_type(new_type) - }) - .map(Arc::new) - .collect()) + try_coerce_types( + function_name.as_ref(), + valid_types, + current_types, + type_signature, + ) } -/// Performs type coercion for window function arguments. +/// Performs type coercion for UDF arguments. /// /// Returns the data types to which each argument must be coerced to /// match `signature`. /// /// For more details on coercion in general, please see the /// [`type_coercion`](crate::type_coercion) module. -pub fn fields_with_window_udf( +pub fn fields_with_udf( current_fields: &[FieldRef], - func: &WindowUDF, + func: &F, ) -> Result> { let signature = func.signature(); let type_signature = &signature.type_signature; @@ -162,14 +222,13 @@ pub fn fields_with_window_udf( return plan_err!("'{}' does not support zero arguments", func.name()); } } - let current_types = current_fields .iter() .map(|f| f.data_type()) .cloned() .collect::>(); - let valid_types = - get_valid_types_with_window_udf(type_signature, ¤t_types, func)?; + + let valid_types = get_valid_types_with_udf(type_signature, ¤t_types, func)?; if valid_types .iter() .any(|data_type| data_type == ¤t_types) @@ -190,69 +249,24 @@ pub fn fields_with_window_udf( .collect()) } -/// Performs type coercion for function arguments. -/// -/// Returns the data types to which each argument must be coerced to -/// match `signature`. -/// -/// For more details on coercion in general, please see the -/// [`type_coercion`](crate::type_coercion) module. -pub fn data_types( - function_name: impl AsRef, - current_types: &[DataType], - signature: &Signature, -) -> Result> { - let type_signature = &signature.type_signature; - - if current_types.is_empty() && type_signature != &TypeSignature::UserDefined { - if type_signature.supports_zero_argument() { - return Ok(vec![]); - } else if type_signature.used_to_support_zero_arguments() { - // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763 - return plan_err!( - "function '{}' has signature {type_signature:?} which does not support zero arguments. Use TypeSignature::Nullary for zero arguments", - function_name.as_ref() - ); - } else { - return plan_err!( - "Function '{}' has signature {type_signature:?} which does not support zero arguments", - function_name.as_ref() - ); - } - } - - let valid_types = - get_valid_types(function_name.as_ref(), type_signature, current_types)?; - if valid_types - .iter() - .any(|data_type| data_type == current_types) - { - return Ok(current_types.to_vec()); - } - - try_coerce_types( - function_name.as_ref(), - valid_types, - current_types, - type_signature, - ) -} - fn is_well_supported_signature(type_signature: &TypeSignature) -> bool { - if let TypeSignature::OneOf(signatures) = type_signature { - return signatures.iter().all(is_well_supported_signature); - } - - matches!( - type_signature, + match type_signature { + TypeSignature::OneOf(type_signatures) => { + type_signatures.iter().all(is_well_supported_signature) + } TypeSignature::UserDefined - | TypeSignature::Numeric(_) - | TypeSignature::String(_) - | TypeSignature::Coercible(_) - | TypeSignature::Any(_) - | TypeSignature::Nullary - | TypeSignature::Comparable(_) - ) + | TypeSignature::Numeric(_) + | TypeSignature::String(_) + | TypeSignature::Coercible(_) + | TypeSignature::Any(_) + | TypeSignature::Nullary + | TypeSignature::Comparable(_) => true, + TypeSignature::Variadic(_) + | TypeSignature::VariadicAny + | TypeSignature::Uniform(_, _) + | TypeSignature::Exact(_) + | TypeSignature::ArraySignature(_) => false, + } } fn try_coerce_types( @@ -293,25 +307,27 @@ fn try_coerce_types( ) } -fn get_valid_types_with_scalar_udf( +fn get_valid_types_with_udf( signature: &TypeSignature, current_types: &[DataType], - func: &ScalarUDF, + func: &F, ) -> Result>> { - match signature { + let valid_types = match signature { TypeSignature::UserDefined => match func.coerce_types(current_types) { - Ok(coerced_types) => Ok(vec![coerced_types]), - Err(e) => exec_err!( - "Function '{}' user-defined coercion failed with {:?}", - func.name(), - e.strip_backtrace() - ), + Ok(coerced_types) => vec![coerced_types], + Err(e) => { + return exec_err!( + "Function '{}' user-defined coercion failed with {:?}", + func.name(), + e.strip_backtrace() + ); + } }, TypeSignature::OneOf(signatures) => { let mut res = vec![]; let mut errors = vec![]; for sig in signatures { - match get_valid_types_with_scalar_udf(sig, current_types, func) { + match get_valid_types_with_udf(sig, current_types, func) { Ok(valid_types) => { res.extend(valid_types); } @@ -323,69 +339,15 @@ fn get_valid_types_with_scalar_udf( // Every signature failed, return the joined error if res.is_empty() { - internal_err!( + return internal_err!( "Function '{}' failed to match any signature, errors: {}", func.name(), errors.join(",") - ) + ); } else { - Ok(res) + res } } - _ => get_valid_types(func.name(), signature, current_types), - } -} - -fn get_valid_types_with_aggregate_udf( - signature: &TypeSignature, - current_types: &[DataType], - func: &AggregateUDF, -) -> Result>> { - let valid_types = match signature { - TypeSignature::UserDefined => match func.coerce_types(current_types) { - Ok(coerced_types) => vec![coerced_types], - Err(e) => { - return exec_err!( - "Function '{}' user-defined coercion failed with {:?}", - func.name(), - e.strip_backtrace() - ); - } - }, - TypeSignature::OneOf(signatures) => signatures - .iter() - .filter_map(|t| { - get_valid_types_with_aggregate_udf(t, current_types, func).ok() - }) - .flatten() - .collect::>(), - _ => get_valid_types(func.name(), signature, current_types)?, - }; - - Ok(valid_types) -} - -fn get_valid_types_with_window_udf( - signature: &TypeSignature, - current_types: &[DataType], - func: &WindowUDF, -) -> Result>> { - let valid_types = match signature { - TypeSignature::UserDefined => match func.coerce_types(current_types) { - Ok(coerced_types) => vec![coerced_types], - Err(e) => { - return exec_err!( - "Function '{}' user-defined coercion failed with {:?}", - func.name(), - e.strip_backtrace() - ); - } - }, - TypeSignature::OneOf(signatures) => signatures - .iter() - .filter_map(|t| get_valid_types_with_window_udf(t, current_types, func).ok()) - .flatten() - .collect::>(), _ => get_valid_types(func.name(), signature, current_types)?, }; @@ -1158,10 +1120,27 @@ mod tests { #[test] fn test_fixed_list_wildcard_coerce() -> Result<()> { + struct MockUdf(Signature); + + impl UDFCoercionExt for MockUdf { + fn name(&self) -> &str { + "test" + } + fn signature(&self) -> &Signature { + &self.0 + } + fn coerce_types(&self, _arg_types: &[DataType]) -> Result> { + unimplemented!() + } + } + let inner = Arc::new(Field::new_list_field(DataType::Int32, false)); - let current_types = vec![ - DataType::FixedSizeList(Arc::clone(&inner), 2), // able to coerce for any size - ]; + // able to coerce for any size + let current_fields = vec![Arc::new(Field::new( + "t", + DataType::FixedSizeList(Arc::clone(&inner), 2), + true, + ))]; let signature = Signature::exact( vec![DataType::FixedSizeList( @@ -1171,24 +1150,25 @@ mod tests { Volatility::Stable, ); - let coerced_data_types = data_types("test", ¤t_types, &signature)?; - assert_eq!(coerced_data_types, current_types); + let coerced_fields = fields_with_udf(¤t_fields, &MockUdf(signature))?; + assert_eq!(coerced_fields, current_fields); // make sure it can't coerce to a different size let signature = Signature::exact( vec![DataType::FixedSizeList(Arc::clone(&inner), 3)], Volatility::Stable, ); - let coerced_data_types = data_types("test", ¤t_types, &signature); - assert!(coerced_data_types.is_err()); + let coerced_fields = fields_with_udf(¤t_fields, &MockUdf(signature)); + assert!(coerced_fields.is_err()); // make sure it works with the same type. let signature = Signature::exact( vec![DataType::FixedSizeList(Arc::clone(&inner), 2)], Volatility::Stable, ); - let coerced_data_types = data_types("test", ¤t_types, &signature).unwrap(); - assert_eq!(coerced_data_types, current_types); + let coerced_fields = + fields_with_udf(¤t_fields, &MockUdf(signature)).unwrap(); + assert_eq!(coerced_fields, current_fields); Ok(()) } diff --git a/datafusion/ffi/src/udaf/mod.rs b/datafusion/ffi/src/udaf/mod.rs index c485c9a71bc46..22cbe8cff0fe6 100644 --- a/datafusion/ffi/src/udaf/mod.rs +++ b/datafusion/ffi/src/udaf/mod.rs @@ -28,7 +28,7 @@ use arrow::ffi::FFI_ArrowSchema; use arrow_schema::FieldRef; use datafusion_common::{DataFusionError, Result, ffi_datafusion_err}; use datafusion_expr::function::AggregateFunctionSimplification; -use datafusion_expr::type_coercion::functions::fields_with_aggregate_udf; +use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::{ Accumulator, AggregateUDF, AggregateUDFImpl, GroupsAccumulator, Signature, }; @@ -340,7 +340,7 @@ unsafe extern "C" fn coerce_types_fn_wrapper( .map(|dt| Field::new("f", dt.clone(), true)) .map(Arc::new) .collect::>(); - let return_types = rresult_return!(fields_with_aggregate_udf(&arg_fields, udaf)) + let return_types = rresult_return!(fields_with_udf(&arg_fields, udaf.as_ref())) .into_iter() .map(|f| f.data_type().to_owned()) .collect::>(); diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index d7da050b35efa..7f18afeccaeb2 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -28,7 +28,7 @@ use arrow::ffi::{FFI_ArrowSchema, from_ffi, to_ffi}; use arrow_schema::FieldRef; use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, Result, internal_err}; -use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; +use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, @@ -140,8 +140,16 @@ unsafe extern "C" fn coerce_types_fn_wrapper( ) -> FFIResult> { let arg_types = rresult_return!(rvec_wrapped_to_vec_datatype(&arg_types)); + let arg_fields = arg_types + .iter() + .map(|dt| Field::new("f", dt.clone(), true)) + .map(Arc::new) + .collect::>(); let return_types = - rresult_return!(data_types_with_scalar_udf(&arg_types, udf.inner())); + rresult_return!(fields_with_udf(&arg_fields, udf.inner().as_ref())) + .into_iter() + .map(|f| f.data_type().to_owned()) + .collect::>(); rresult!(vec_datatype_to_rvec_wrapped(&return_types)) } diff --git a/datafusion/ffi/src/udwf/mod.rs b/datafusion/ffi/src/udwf/mod.rs index 53aa6c34eba42..dbac00fd43020 100644 --- a/datafusion/ffi/src/udwf/mod.rs +++ b/datafusion/ffi/src/udwf/mod.rs @@ -26,7 +26,7 @@ use arrow::datatypes::{DataType, Schema, SchemaRef}; use arrow_schema::{Field, FieldRef}; use datafusion_common::{Result, ffi_err}; use datafusion_expr::function::WindowUDFFieldArgs; -use datafusion_expr::type_coercion::functions::fields_with_window_udf; +use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::{ LimitEffect, PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl, }; @@ -167,7 +167,7 @@ unsafe extern "C" fn coerce_types_fn_wrapper( .map(Arc::new) .collect::>(); - let return_fields = rresult_return!(fields_with_window_udf(&arg_fields, inner)); + let return_fields = rresult_return!(fields_with_udf(&arg_fields, inner.as_ref())); let return_types = return_fields .into_iter() .map(|f| f.data_type().to_owned()) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index bc317e9c201ce..1b6dbd629f866 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -41,9 +41,7 @@ use datafusion_expr::expr_rewriter::coerce_plan_expr_for_schema; use datafusion_expr::expr_schema::cast_subquery; use datafusion_expr::logical_plan::Subquery; use datafusion_expr::type_coercion::binary::{comparison_coercion, like_coercion}; -use datafusion_expr::type_coercion::functions::{ - data_types_with_scalar_udf, fields_with_aggregate_udf, -}; +use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::type_coercion::other::{ get_coerce_type_for_case_expression, get_coerce_type_for_list, }; @@ -794,12 +792,15 @@ fn coerce_arguments_for_signature_with_scalar_udf( return Ok(expressions); } - let current_types = expressions + let current_fields = expressions .iter() - .map(|e| e.get_type(schema)) + .map(|e| e.to_field(schema).map(|(_, f)| f)) .collect::>>()?; - let new_types = data_types_with_scalar_udf(¤t_types, func)?; + let new_types = fields_with_udf(¤t_fields, func)? + .into_iter() + .map(|f| f.data_type().clone()) + .collect::>(); expressions .into_iter() @@ -826,7 +827,7 @@ fn coerce_arguments_for_signature_with_aggregate_udf( .map(|e| e.to_field(schema).map(|(_, f)| f)) .collect::>>()?; - let new_types = fields_with_aggregate_udf(¤t_fields, func)? + let new_types = fields_with_udf(¤t_fields, func)? .into_iter() .map(|f| f.data_type().clone()) .collect::>(); diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index e6a6db75bebd7..aa090743ad441 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -43,7 +43,7 @@ use datafusion_common::config::{ConfigEntry, ConfigOptions}; use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::sort_properties::ExprProperties; -use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; +use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::{ ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, Volatility, expr_vec_fmt, @@ -101,11 +101,7 @@ impl ScalarFunctionExpr { .collect::>>()?; // verify that input data types is consistent with function's `TypeSignature` - let arg_types = arg_fields - .iter() - .map(|f| f.data_type().clone()) - .collect::>(); - data_types_with_scalar_udf(&arg_types, &fun)?; + fields_with_udf(&arg_fields, fun.as_ref())?; let arguments = args .iter() diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt index 22430774bbca2..be8013ec0fdf3 100644 --- a/datafusion/sqllogictest/test_files/errors.slt +++ b/datafusion/sqllogictest/test_files/errors.slt @@ -61,7 +61,7 @@ WITH t AS (WITH u as (SELECT 1) SELECT 1) SELECT * from u # select_wildcard_without_table statement error Error during planning: SELECT \* with no tables specified is not valid -SELECT * +SELECT * # invalid_qualified_table_references statement error Error during planning: table 'datafusion\.nonexistentschema\.aggregate_test_100' not found @@ -125,7 +125,7 @@ from aggregate_test_100 order by c9 # WindowFunction wrong signature -statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'nth_value' function: coercion from Int32, Int64, Int64 to the signature OneOf\(\[Any\(0\), Any\(1\), Any\(2\)\]\) failed +statement error No function matches the given name and argument types 'nth_value\(Int32, Int64, Int64\)' select c9, nth_value(c5, 2, 3) over (order by c9) as nv1 From 9d922f43f86ff6f3a52dedef44430fe8f65a2da2 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 28 Dec 2025 17:31:04 +0900 Subject: [PATCH 2/4] make diff easier --- .../expr/src/type_coercion/functions.rs | 98 +++++++++---------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 7f19416d50289..e1f2a19672825 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -146,55 +146,6 @@ pub fn fields_with_window_udf( fields_with_udf(current_fields, func) } -/// Performs type coercion for function arguments. -/// -/// Returns the data types to which each argument must be coerced to -/// match `signature`. -/// -/// For more details on coercion in general, please see the -/// [`type_coercion`](crate::type_coercion) module. -#[deprecated(since = "52.0.0", note = "use fields_with_udf")] -pub fn data_types( - function_name: impl AsRef, - current_types: &[DataType], - signature: &Signature, -) -> Result> { - let type_signature = &signature.type_signature; - - if current_types.is_empty() && type_signature != &TypeSignature::UserDefined { - if type_signature.supports_zero_argument() { - return Ok(vec![]); - } else if type_signature.used_to_support_zero_arguments() { - // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763 - return plan_err!( - "function '{}' has signature {type_signature:?} which does not support zero arguments. Use TypeSignature::Nullary for zero arguments", - function_name.as_ref() - ); - } else { - return plan_err!( - "Function '{}' has signature {type_signature:?} which does not support zero arguments", - function_name.as_ref() - ); - } - } - - let valid_types = - get_valid_types(function_name.as_ref(), type_signature, current_types)?; - if valid_types - .iter() - .any(|data_type| data_type == current_types) - { - return Ok(current_types.to_vec()); - } - - try_coerce_types( - function_name.as_ref(), - valid_types, - current_types, - type_signature, - ) -} - /// Performs type coercion for UDF arguments. /// /// Returns the data types to which each argument must be coerced to @@ -249,6 +200,55 @@ pub fn fields_with_udf( .collect()) } +/// Performs type coercion for function arguments. +/// +/// Returns the data types to which each argument must be coerced to +/// match `signature`. +/// +/// For more details on coercion in general, please see the +/// [`type_coercion`](crate::type_coercion) module. +#[deprecated(since = "52.0.0", note = "use fields_with_udf")] +pub fn data_types( + function_name: impl AsRef, + current_types: &[DataType], + signature: &Signature, +) -> Result> { + let type_signature = &signature.type_signature; + + if current_types.is_empty() && type_signature != &TypeSignature::UserDefined { + if type_signature.supports_zero_argument() { + return Ok(vec![]); + } else if type_signature.used_to_support_zero_arguments() { + // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763 + return plan_err!( + "function '{}' has signature {type_signature:?} which does not support zero arguments. Use TypeSignature::Nullary for zero arguments", + function_name.as_ref() + ); + } else { + return plan_err!( + "Function '{}' has signature {type_signature:?} which does not support zero arguments", + function_name.as_ref() + ); + } + } + + let valid_types = + get_valid_types(function_name.as_ref(), type_signature, current_types)?; + if valid_types + .iter() + .any(|data_type| data_type == current_types) + { + return Ok(current_types.to_vec()); + } + + try_coerce_types( + function_name.as_ref(), + valid_types, + current_types, + type_signature, + ) +} + fn is_well_supported_signature(type_signature: &TypeSignature) -> bool { match type_signature { TypeSignature::OneOf(type_signatures) => { From bed669da0fb9cbe8c07f8cbeb5af7b7f666a45a6 Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 28 Dec 2025 19:04:43 +0900 Subject: [PATCH 3/4] clippy --- datafusion/core/tests/fuzz_cases/window_fuzz.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index 2ecfcd84aba98..1212c081ebe00 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -35,7 +35,7 @@ use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_common::HashMap; use datafusion_common::{Result, ScalarValue}; use datafusion_common_runtime::SpawnedTask; -use datafusion_expr::type_coercion::functions::fields_with_aggregate_udf; +use datafusion_expr::type_coercion::functions::fields_with_udf; use datafusion_expr::{ WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition, }; @@ -451,7 +451,7 @@ fn get_random_function( // Do type coercion first argument let a = args[0].clone(); let dt = a.return_field(schema.as_ref()).unwrap(); - let coerced = fields_with_aggregate_udf(&[dt], udf).unwrap(); + let coerced = fields_with_udf(&[dt], udf.as_ref()).unwrap(); args[0] = cast(a, schema, coerced[0].data_type().clone()).unwrap(); } From 76e9a25d68867e09009ebdf239efe65e09a2b61e Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Sun, 28 Dec 2025 19:09:33 +0900 Subject: [PATCH 4/4] fix test --- datafusion/sqllogictest/test_files/errors.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt index be8013ec0fdf3..20c1db5cb1511 100644 --- a/datafusion/sqllogictest/test_files/errors.slt +++ b/datafusion/sqllogictest/test_files/errors.slt @@ -125,7 +125,7 @@ from aggregate_test_100 order by c9 # WindowFunction wrong signature -statement error No function matches the given name and argument types 'nth_value\(Int32, Int64, Int64\)' +statement error DataFusion error: Error during planning: Internal error: Function 'nth_value' failed to match any signature select c9, nth_value(c5, 2, 3) over (order by c9) as nv1