From 8fa55703bee2fda264d09b34c623ae10189b456a Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sun, 28 Dec 2025 19:06:36 +0800 Subject: [PATCH 01/12] Introduce ParameterKind and update signature builders Add ParameterKind plus with_parameter/with_parameters builders on Signature to pair parameter names with types or coercions, reusing arity validation. Document the migration path for ergonomic signature construction. Expand signature tests to include new builder success cases and address mismatched counts, duplicate names, and variadic failures. Refactor substr function signature setup to utilize the new parameter builder, reducing duplication in specifying coercions and names. --- datafusion/expr-common/src/signature.rs | 247 +++++++++++++++++++++ datafusion/functions/src/unicode/substr.rs | 30 ++- 2 files changed, 266 insertions(+), 11 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 90bd1415003cd..438d34235ab7d 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1094,6 +1094,29 @@ pub struct Signature { pub parameter_names: Option>, } +/// A helper enum used by [`Signature::with_parameter`] and [`Signature::with_parameters`] +/// to accept either concrete [`DataType`]s (for [`TypeSignature::Exact`] and +/// [`TypeSignature::Uniform`]) or [`Coercion`] rules (for [`TypeSignature::Coercible`]). +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] +pub enum ParameterKind { + /// A concrete [`DataType`] expected by the signature. + DataType(DataType), + /// A [`Coercion`] rule that will be stored on the signature. + Coercion(Coercion), +} + +impl From for ParameterKind { + fn from(value: DataType) -> Self { + Self::DataType(value) + } +} + +impl From for ParameterKind { + fn from(value: Coercion) -> Self { + Self::Coercion(value) + } +} + impl Signature { /// Creates a new Signature from a given type signature and volatility. pub fn new(type_signature: TypeSignature, volatility: Volatility) -> Self { @@ -1333,6 +1356,126 @@ impl Signature { Ok(self) } + /// Add a single named parameter to this signature. + /// + /// This method is useful when constructing signatures alongside parameter + /// names and expected coercions. For bulk construction, prefer + /// [`Signature::with_parameters`]. + pub fn with_parameter(self, name: N, parameter: P) -> Result + where + N: Into + AsRef, + P: Into + Clone, + { + self.with_parameters(&[(name, parameter)]) + } + + /// Add parameter names together with their expected types or coercion rules. + /// + /// This builder reduces duplication when defining both a [`TypeSignature`] + /// and a set of parameter names. It reuses the same arity validation used + /// by [`Signature::with_parameter_names`] and rejects unsupported + /// combinations such as variadic signatures. + /// + /// The provided `parameters` slice should contain a tuple for each + /// parameter where the first element is the parameter name and the second + /// element describes the expected type: + /// + /// * [`ParameterKind::DataType`] is accepted for [`TypeSignature::Exact`] + /// and [`TypeSignature::Uniform`]. + /// * [`ParameterKind::Coercion`] is accepted for [`TypeSignature::Coercible`]. + /// + /// For other fixed-arity signatures, the parameter types are ignored but + /// the names are still validated and stored. This keeps + /// [`Signature::with_parameter_names`] available for backward compatibility + /// while allowing new code to migrate to a single API that pairs names with + /// types. + pub fn with_parameters(mut self, parameters: &[(N, P)]) -> Result + where + N: AsRef, + P: Clone + Into, + { + let names = parameters + .iter() + .map(|(name, _)| name.as_ref().to_string()) + .collect::>(); + + match &mut self.type_signature { + TypeSignature::Exact(types) => { + let new_types = parameters + .iter() + .map(|(_, parameter)| match parameter.clone().into() { + ParameterKind::DataType(data_type) => Ok(data_type), + ParameterKind::Coercion(_) => { + plan_err!("Expected DataType for Exact signature parameters") + } + }) + .collect::>>()?; + + if !types.is_empty() && types.len() != new_types.len() { + return plan_err!( + "Parameter names count ({}) does not match signature arity ({})", + names.len(), + types.len() + ); + } + + *types = new_types; + } + TypeSignature::Coercible(coercions) => { + let new_coercions = parameters + .iter() + .map(|(_, parameter)| match parameter.clone().into() { + ParameterKind::Coercion(coercion) => Ok(coercion), + ParameterKind::DataType(_) => plan_err!( + "Expected Coercion for Coercible signature parameters" + ), + }) + .collect::>>()?; + + if !coercions.is_empty() && coercions.len() != new_coercions.len() { + return plan_err!( + "Parameter names count ({}) does not match signature arity ({})", + names.len(), + coercions.len() + ); + } + + *coercions = new_coercions; + } + TypeSignature::Uniform(arg_count, valid_types) => { + let provided_types = parameters + .iter() + .map(|(_, parameter)| match parameter.clone().into() { + ParameterKind::DataType(data_type) => Ok(data_type), + ParameterKind::Coercion(_) => plan_err!( + "Expected DataType for Uniform signature parameters" + ), + }) + .collect::>>()?; + + for data_type in &provided_types { + if !valid_types.contains(data_type) { + return plan_err!( + "Parameter type {:?} not permitted for Uniform signature {:?}", + data_type, + valid_types + ); + } + } + + if provided_types.len() != *arg_count { + // Delegate to arity validation for consistent messaging + self.validate_parameter_names(&names)?; + } + } + _ => {} + } + + self.validate_parameter_names(&names)?; + self.parameter_names = Some(names); + Ok(self) + } + /// Validate that parameter names are compatible with this signature fn validate_parameter_names(&self, names: &[String]) -> Result<()> { match self.type_signature.arity() { @@ -1587,6 +1730,110 @@ mod tests { ); } + #[test] + fn test_signature_with_parameters_exact() { + let sig = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) + .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]) + .unwrap(); + + assert_eq!( + sig.type_signature, + TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) + ); + assert_eq!( + sig.parameter_names, + Some(vec!["count".to_string(), "name".to_string()]) + ); + } + + #[test] + fn test_signature_with_parameters_coercible() { + let sig = Signature::new(TypeSignature::Coercible(vec![]), Volatility::Immutable) + .with_parameters(&[ + ( + "str", + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + ), + ( + "start_pos", + Coercion::new_implicit( + TypeSignatureClass::Native(logical_int64()), + vec![TypeSignatureClass::Native(logical_int32())], + NativeType::Int64, + ), + ), + ]) + .unwrap(); + + assert_eq!(sig.type_signature.arity(), Arity::Fixed(2)); + assert_eq!( + sig.parameter_names, + Some(vec!["str".to_string(), "start_pos".to_string()]) + ); + } + + #[test] + fn test_signature_with_parameters_uniform() { + let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) + .with_parameters(&[ + ("x", DataType::Float64), + ("y", DataType::Float64), + ("z", DataType::Float64), + ]) + .unwrap(); + + assert_eq!( + sig.type_signature, + TypeSignature::Uniform(3, vec![DataType::Float64]) + ); + assert_eq!( + sig.parameter_names, + Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + ); + } + + #[test] + fn test_signature_with_parameters_mismatched_counts() { + let result = Signature::exact(vec![DataType::Int32], Volatility::Immutable) + .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("does not match signature arity") + ); + } + + #[test] + fn test_signature_with_parameters_duplicate_names() { + let result = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) + .with_parameters(&[("count", DataType::Int32), ("count", DataType::Int32)]); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Duplicate parameter name") + ); + } + + #[test] + fn test_signature_with_parameters_variadic_error() { + let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) + .with_parameters(&[("arg", DataType::Int32)]); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("variable arity signature") + ); + } + #[test] fn test_signature_parameter_names_variadic() { let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index cc1d53b3aad67..af1cf855a03d0 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -80,23 +80,31 @@ impl SubstrFunc { vec![TypeSignatureClass::Native(logical_int32())], NativeType::Int64, ); + let parameters = [ + ("str", string.clone()), + ("start_pos", int64.clone()), + ("length", int64.clone()), + ]; Self { signature: Signature::one_of( vec![ - TypeSignature::Coercible(vec![string.clone(), int64.clone()]), - TypeSignature::Coercible(vec![ - string.clone(), - int64.clone(), - int64.clone(), - ]), + TypeSignature::Coercible( + parameters + .iter() + .take(2) + .map(|(_, coercion)| coercion.clone()) + .collect(), + ), + TypeSignature::Coercible( + parameters + .iter() + .map(|(_, coercion)| coercion.clone()) + .collect(), + ), ], Volatility::Immutable, ) - .with_parameter_names(vec![ - "str".to_string(), - "start_pos".to_string(), - "length".to_string(), - ]) + .with_parameters(¶meters) .expect("valid parameter names"), aliases: vec![String::from("substring")], } From 6f331a2dcd7cbfb8e986010c2333fc3524ed56fd Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Sun, 28 Dec 2025 19:26:19 +0800 Subject: [PATCH 02/12] Consolidate parameter extraction in substr.rs Eliminate duplicate iterations over the parameters array by consolidating the extraction logic in substr.rs. --- datafusion/functions/src/unicode/substr.rs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index af1cf855a03d0..f22cd9ebb271d 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -85,22 +85,15 @@ impl SubstrFunc { ("start_pos", int64.clone()), ("length", int64.clone()), ]; + let coercions: Vec<_> = parameters + .iter() + .map(|(_, coercion)| coercion.clone()) + .collect(); Self { signature: Signature::one_of( vec![ - TypeSignature::Coercible( - parameters - .iter() - .take(2) - .map(|(_, coercion)| coercion.clone()) - .collect(), - ), - TypeSignature::Coercible( - parameters - .iter() - .map(|(_, coercion)| coercion.clone()) - .collect(), - ), + TypeSignature::Coercible(coercions[..2].to_vec()), + TypeSignature::Coercible(coercions), ], Volatility::Immutable, ) From 3d95ac0e37bb5a5c74c1833e528ef5746eff2f06 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 17:10:58 +0800 Subject: [PATCH 03/12] Refactor SubstrFunc signature to use from_parameter_variants --- datafusion/expr-common/src/signature.rs | 315 +++++++++++++++++++++ datafusion/functions/src/unicode/substr.rs | 27 +- 2 files changed, 326 insertions(+), 16 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 438d34235ab7d..0c2c22f9edae5 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1333,6 +1333,174 @@ impl Signature { Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility) } + /// Construct a signature directly from parameter specifications. + /// + /// This is the recommended way to define function signatures as it eliminates + /// duplication by co-locating parameter names with their types or coercion rules. + /// + /// The method automatically infers the appropriate [`TypeSignature`]: + /// - Uses [`TypeSignature::Exact`] for [`DataType`] parameters + /// - Uses [`TypeSignature::Coercible`] for [`Coercion`] parameters + /// + /// # Example + /// ``` + /// # use datafusion_expr_common::signature::{Signature, Volatility, Coercion, TypeSignatureClass}; + /// # use datafusion_common::types::{logical_string, logical_int64, NativeType}; + /// # use arrow::datatypes::DataType; + /// # use datafusion_common::Result; + /// # fn example() -> Result<()> { + /// let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + /// let int64 = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); + /// + /// // Simple function with coercible parameters + /// let sig = Signature::from_parameters( + /// &[("str", string), ("pos", int64)], + /// Volatility::Immutable + /// )?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Errors + /// Returns an error if parameter names are invalid (e.g., duplicates). + pub fn from_parameters( + parameters: &[(N, P)], + volatility: Volatility, + ) -> Result + where + N: AsRef, + P: Clone + Into, + { + if parameters.is_empty() { + return Ok(Self::nullary(volatility)); + } + + // Determine the type signature based on the first parameter + let first_kind: ParameterKind = parameters[0].1.clone().into(); + let type_signature = match first_kind { + ParameterKind::DataType(_) => { + // All parameters should be DataTypes for Exact signature + TypeSignature::Exact(vec![]) + } + ParameterKind::Coercion(_) => { + // All parameters should be Coercions for Coercible signature + TypeSignature::Coercible(vec![]) + } + }; + + Self::new(type_signature, volatility).with_parameters(parameters) + } + + /// Construct a signature with multiple variants directly from parameter specifications. + /// + /// This is the recommended way to define functions that accept multiple signatures + /// (e.g., optional parameters) as it eliminates duplication and makes the variants + /// explicit. + /// + /// # Example + /// ``` + /// # use datafusion_expr_common::signature::{Signature, Volatility, Coercion, TypeSignatureClass}; + /// # use datafusion_common::types::{logical_string, logical_int64, NativeType}; + /// # use datafusion_common::Result; + /// # fn example() -> Result<()> { + /// let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + /// let int64 = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); + /// + /// // substr(str, pos) OR substr(str, pos, length) + /// let sig = Signature::from_parameter_variants( + /// vec![ + /// vec![("str", string.clone()), ("start_pos", int64.clone())], + /// vec![("str", string.clone()), ("start_pos", int64.clone()), ("length", int64.clone())], + /// ], + /// Volatility::Immutable + /// )?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Parameter Name Inference + /// The parameter names for the signature are inferred from the **longest variant**. + /// This ensures all parameters are documented. Shorter variants are treated as + /// having optional trailing parameters. + /// + /// # Errors + /// Returns an error if: + /// - No variants are provided + /// - Parameter names are invalid + /// - Type kinds are inconsistent within a variant + pub fn from_parameter_variants( + variants: Vec>, + volatility: Volatility, + ) -> Result + where + N: AsRef, + P: Clone + Into, + { + if variants.is_empty() { + return plan_err!("At least one variant must be provided"); + } + + // Find the longest variant to use for parameter names + let longest_variant = variants + .iter() + .max_by_key(|v| v.len()) + .expect("variants is non-empty"); + + let parameter_names: Vec = longest_variant + .iter() + .map(|(name, _)| name.as_ref().to_string()) + .collect(); + + // Build TypeSignature for each variant + let type_signatures: Result> = variants + .iter() + .map(|variant_params| { + if variant_params.is_empty() { + return Ok(TypeSignature::Nullary); + } + + // Determine type based on first parameter + let first_kind: ParameterKind = variant_params[0].1.clone().into(); + match first_kind { + ParameterKind::DataType(_) => { + let types: Result> = variant_params + .iter() + .map(|(_, p)| match p.clone().into() { + ParameterKind::DataType(dt) => Ok(dt), + ParameterKind::Coercion(_) => plan_err!( + "Cannot mix DataType and Coercion in same variant" + ), + }) + .collect(); + Ok(TypeSignature::Exact(types?)) + } + ParameterKind::Coercion(_) => { + let coercions: Result> = variant_params + .iter() + .map(|(_, p)| match p.clone().into() { + ParameterKind::Coercion(c) => Ok(c), + ParameterKind::DataType(_) => plan_err!( + "Cannot mix DataType and Coercion in same variant" + ), + }) + .collect(); + Ok(TypeSignature::Coercible(coercions?)) + } + } + }) + .collect(); + + let type_signature = if type_signatures.as_ref().unwrap().len() == 1 { + type_signatures?.into_iter().next().unwrap() + } else { + TypeSignature::OneOf(type_signatures?) + }; + + let mut sig = Self::new(type_signature, volatility); + sig.parameter_names = Some(parameter_names); + Ok(sig) + } + /// Add parameter names to this signature, enabling named argument notation. /// /// # Example @@ -1882,6 +2050,153 @@ mod tests { ); } + #[test] + fn test_signature_from_parameters_exact() { + let sig = Signature::from_parameters( + &[("count", DataType::Int32), ("name", DataType::Utf8)], + Volatility::Immutable, + ) + .unwrap(); + + assert_eq!( + sig.type_signature, + TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) + ); + assert_eq!( + sig.parameter_names, + Some(vec!["count".to_string(), "name".to_string()]) + ); + } + + #[test] + fn test_signature_from_parameters_coercible() { + let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + let int64 = Coercion::new_implicit( + TypeSignatureClass::Native(logical_int64()), + vec![TypeSignatureClass::Native(logical_int32())], + NativeType::Int64, + ); + + let sig = Signature::from_parameters( + &[("str", string), ("pos", int64)], + Volatility::Immutable, + ) + .unwrap(); + + assert_eq!(sig.type_signature.arity(), Arity::Fixed(2)); + assert_eq!( + sig.parameter_names, + Some(vec!["str".to_string(), "pos".to_string()]) + ); + } + + #[test] + fn test_signature_from_parameters_empty() { + let sig = + Signature::from_parameters::<&str, DataType>(&[], Volatility::Immutable) + .unwrap(); + + assert_eq!(sig.type_signature, TypeSignature::Nullary); + assert_eq!(sig.parameter_names, None); + } + + #[test] + fn test_signature_from_parameter_variants_two_variants() { + let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + let int64 = Coercion::new_implicit( + TypeSignatureClass::Native(logical_int64()), + vec![TypeSignatureClass::Native(logical_int32())], + NativeType::Int64, + ); + + let sig = Signature::from_parameter_variants( + vec![ + vec![("str", string.clone()), ("start_pos", int64.clone())], + vec![ + ("str", string.clone()), + ("start_pos", int64.clone()), + ("length", int64.clone()), + ], + ], + Volatility::Immutable, + ) + .unwrap(); + + // Should create OneOf with two variants + assert!(matches!(sig.type_signature, TypeSignature::OneOf(_))); + + // Parameter names should come from the longest variant + assert_eq!( + sig.parameter_names, + Some(vec![ + "str".to_string(), + "start_pos".to_string(), + "length".to_string() + ]) + ); + } + + #[test] + fn test_signature_from_parameter_variants_single_variant() { + let sig = Signature::from_parameter_variants( + vec![vec![("x", DataType::Float64), ("y", DataType::Float64)]], + Volatility::Immutable, + ) + .unwrap(); + + // Single variant should not be wrapped in OneOf + assert_eq!( + sig.type_signature, + TypeSignature::Exact(vec![DataType::Float64, DataType::Float64]) + ); + assert_eq!( + sig.parameter_names, + Some(vec!["x".to_string(), "y".to_string()]) + ); + } + + #[test] + fn test_signature_from_parameter_variants_empty_error() { + let result = Signature::from_parameter_variants::<&str, DataType>( + vec![], + Volatility::Immutable, + ); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("At least one variant") + ); + } + + #[test] + fn test_signature_from_parameter_variants_with_nullary() { + let sig = Signature::from_parameter_variants( + vec![ + vec![], + vec![("count", DataType::Int32)], + vec![("count", DataType::Int32), ("name", DataType::Utf8)], + ], + Volatility::Immutable, + ) + .unwrap(); + + // Should create OneOf including Nullary + assert!(matches!(sig.type_signature, TypeSignature::OneOf(_))); + if let TypeSignature::OneOf(variants) = &sig.type_signature { + assert_eq!(variants.len(), 3); + assert!(matches!(variants[0], TypeSignature::Nullary)); + } + + // Parameter names from longest variant + assert_eq!( + sig.parameter_names, + Some(vec!["count".to_string(), "name".to_string()]) + ); + } + #[test] fn test_signature_nullary_with_empty_names() { let sig = Signature::nullary(Volatility::Immutable) diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index f22cd9ebb271d..930d5a7503ead 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -32,8 +32,8 @@ use datafusion_common::types::{ }; use datafusion_common::{Result, exec_err}; use datafusion_expr::{ - Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, - TypeSignatureClass, Volatility, + Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass, + Volatility, }; use datafusion_macros::user_doc; @@ -80,25 +80,20 @@ impl SubstrFunc { vec![TypeSignatureClass::Native(logical_int32())], NativeType::Int64, ); - let parameters = [ - ("str", string.clone()), - ("start_pos", int64.clone()), - ("length", int64.clone()), - ]; - let coercions: Vec<_> = parameters - .iter() - .map(|(_, coercion)| coercion.clone()) - .collect(); + Self { - signature: Signature::one_of( + signature: Signature::from_parameter_variants( vec![ - TypeSignature::Coercible(coercions[..2].to_vec()), - TypeSignature::Coercible(coercions), + vec![("str", string.clone()), ("start_pos", int64.clone())], + vec![ + ("str", string.clone()), + ("start_pos", int64.clone()), + ("length", int64.clone()), + ], ], Volatility::Immutable, ) - .with_parameters(¶meters) - .expect("valid parameter names"), + .expect("valid parameter variants"), aliases: vec![String::from("substring")], } } From 5bf34bfac78d1e211144894d31c54ec20fb4a9ad Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 17:30:30 +0800 Subject: [PATCH 04/12] Remove from_parameters method and related tests from Signature implementation --- datafusion/expr-common/src/signature.rs | 212 ------------------------ 1 file changed, 212 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 0c2c22f9edae5..0d40e1d597738 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1333,64 +1333,6 @@ impl Signature { Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility) } - /// Construct a signature directly from parameter specifications. - /// - /// This is the recommended way to define function signatures as it eliminates - /// duplication by co-locating parameter names with their types or coercion rules. - /// - /// The method automatically infers the appropriate [`TypeSignature`]: - /// - Uses [`TypeSignature::Exact`] for [`DataType`] parameters - /// - Uses [`TypeSignature::Coercible`] for [`Coercion`] parameters - /// - /// # Example - /// ``` - /// # use datafusion_expr_common::signature::{Signature, Volatility, Coercion, TypeSignatureClass}; - /// # use datafusion_common::types::{logical_string, logical_int64, NativeType}; - /// # use arrow::datatypes::DataType; - /// # use datafusion_common::Result; - /// # fn example() -> Result<()> { - /// let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); - /// let int64 = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); - /// - /// // Simple function with coercible parameters - /// let sig = Signature::from_parameters( - /// &[("str", string), ("pos", int64)], - /// Volatility::Immutable - /// )?; - /// # Ok(()) - /// # } - /// ``` - /// - /// # Errors - /// Returns an error if parameter names are invalid (e.g., duplicates). - pub fn from_parameters( - parameters: &[(N, P)], - volatility: Volatility, - ) -> Result - where - N: AsRef, - P: Clone + Into, - { - if parameters.is_empty() { - return Ok(Self::nullary(volatility)); - } - - // Determine the type signature based on the first parameter - let first_kind: ParameterKind = parameters[0].1.clone().into(); - let type_signature = match first_kind { - ParameterKind::DataType(_) => { - // All parameters should be DataTypes for Exact signature - TypeSignature::Exact(vec![]) - } - ParameterKind::Coercion(_) => { - // All parameters should be Coercions for Coercible signature - TypeSignature::Coercible(vec![]) - } - }; - - Self::new(type_signature, volatility).with_parameters(parameters) - } - /// Construct a signature with multiple variants directly from parameter specifications. /// /// This is the recommended way to define functions that accept multiple signatures @@ -1898,110 +1840,6 @@ mod tests { ); } - #[test] - fn test_signature_with_parameters_exact() { - let sig = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) - .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]) - .unwrap(); - - assert_eq!( - sig.type_signature, - TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["count".to_string(), "name".to_string()]) - ); - } - - #[test] - fn test_signature_with_parameters_coercible() { - let sig = Signature::new(TypeSignature::Coercible(vec![]), Volatility::Immutable) - .with_parameters(&[ - ( - "str", - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - ), - ( - "start_pos", - Coercion::new_implicit( - TypeSignatureClass::Native(logical_int64()), - vec![TypeSignatureClass::Native(logical_int32())], - NativeType::Int64, - ), - ), - ]) - .unwrap(); - - assert_eq!(sig.type_signature.arity(), Arity::Fixed(2)); - assert_eq!( - sig.parameter_names, - Some(vec!["str".to_string(), "start_pos".to_string()]) - ); - } - - #[test] - fn test_signature_with_parameters_uniform() { - let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) - .with_parameters(&[ - ("x", DataType::Float64), - ("y", DataType::Float64), - ("z", DataType::Float64), - ]) - .unwrap(); - - assert_eq!( - sig.type_signature, - TypeSignature::Uniform(3, vec![DataType::Float64]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) - ); - } - - #[test] - fn test_signature_with_parameters_mismatched_counts() { - let result = Signature::exact(vec![DataType::Int32], Volatility::Immutable) - .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("does not match signature arity") - ); - } - - #[test] - fn test_signature_with_parameters_duplicate_names() { - let result = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) - .with_parameters(&[("count", DataType::Int32), ("count", DataType::Int32)]); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("Duplicate parameter name") - ); - } - - #[test] - fn test_signature_with_parameters_variadic_error() { - let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) - .with_parameters(&[("arg", DataType::Int32)]); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("variable arity signature") - ); - } - #[test] fn test_signature_parameter_names_variadic() { let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) @@ -2050,56 +1888,6 @@ mod tests { ); } - #[test] - fn test_signature_from_parameters_exact() { - let sig = Signature::from_parameters( - &[("count", DataType::Int32), ("name", DataType::Utf8)], - Volatility::Immutable, - ) - .unwrap(); - - assert_eq!( - sig.type_signature, - TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["count".to_string(), "name".to_string()]) - ); - } - - #[test] - fn test_signature_from_parameters_coercible() { - let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); - let int64 = Coercion::new_implicit( - TypeSignatureClass::Native(logical_int64()), - vec![TypeSignatureClass::Native(logical_int32())], - NativeType::Int64, - ); - - let sig = Signature::from_parameters( - &[("str", string), ("pos", int64)], - Volatility::Immutable, - ) - .unwrap(); - - assert_eq!(sig.type_signature.arity(), Arity::Fixed(2)); - assert_eq!( - sig.parameter_names, - Some(vec!["str".to_string(), "pos".to_string()]) - ); - } - - #[test] - fn test_signature_from_parameters_empty() { - let sig = - Signature::from_parameters::<&str, DataType>(&[], Volatility::Immutable) - .unwrap(); - - assert_eq!(sig.type_signature, TypeSignature::Nullary); - assert_eq!(sig.parameter_names, None); - } - #[test] fn test_signature_from_parameter_variants_two_variants() { let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); From ffd867f39e7257562f376f832167abb740d33f9b Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 17:37:01 +0800 Subject: [PATCH 05/12] Refactor Signature::from_parameter_variant implementation to enhance parameter handling and type signature building --- datafusion/expr-common/src/signature.rs | 127 +++++++++++++++--------- 1 file changed, 80 insertions(+), 47 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 0d40e1d597738..62cf6fece8341 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1382,65 +1382,98 @@ impl Signature { return plan_err!("At least one variant must be provided"); } - // Find the longest variant to use for parameter names - let longest_variant = variants + let parameter_names = Self::extract_parameter_names(&variants); + let type_signatures = Self::build_type_signatures(&variants)?; + let type_signature = Self::consolidate_signatures(type_signatures)?; + + let mut sig = Self::new(type_signature, volatility); + sig.parameter_names = Some(parameter_names); + Ok(sig) + } + + /// Extract parameter names from the longest variant + fn extract_parameter_names(variants: &[Vec<(N, P)>]) -> Vec + where + N: AsRef, + { + variants .iter() .max_by_key(|v| v.len()) - .expect("variants is non-empty"); - - let parameter_names: Vec = longest_variant + .expect("variants is non-empty") .iter() .map(|(name, _)| name.as_ref().to_string()) - .collect(); + .collect() + } - // Build TypeSignature for each variant - let type_signatures: Result> = variants + /// Build TypeSignature for each variant + fn build_type_signatures(variants: &[Vec<(N, P)>]) -> Result> + where + P: Clone + Into, + { + variants .iter() - .map(|variant_params| { - if variant_params.is_empty() { - return Ok(TypeSignature::Nullary); - } + .map(|params| Self::build_variant_signature(params)) + .collect() + } - // Determine type based on first parameter - let first_kind: ParameterKind = variant_params[0].1.clone().into(); - match first_kind { - ParameterKind::DataType(_) => { - let types: Result> = variant_params - .iter() - .map(|(_, p)| match p.clone().into() { - ParameterKind::DataType(dt) => Ok(dt), - ParameterKind::Coercion(_) => plan_err!( - "Cannot mix DataType and Coercion in same variant" - ), - }) - .collect(); - Ok(TypeSignature::Exact(types?)) - } - ParameterKind::Coercion(_) => { - let coercions: Result> = variant_params - .iter() - .map(|(_, p)| match p.clone().into() { - ParameterKind::Coercion(c) => Ok(c), - ParameterKind::DataType(_) => plan_err!( - "Cannot mix DataType and Coercion in same variant" - ), - }) - .collect(); - Ok(TypeSignature::Coercible(coercions?)) - } + /// Build a TypeSignature for a single variant + fn build_variant_signature(params: &[(N, P)]) -> Result + where + P: Clone + Into, + { + if params.is_empty() { + return Ok(TypeSignature::Nullary); + } + + match params[0].1.clone().into() { + ParameterKind::DataType(_) => Self::build_exact_signature(params), + ParameterKind::Coercion(_) => Self::build_coercible_signature(params), + } + } + + /// Build an Exact TypeSignature from DataType parameters + fn build_exact_signature(params: &[(N, P)]) -> Result + where + P: Clone + Into, + { + let types: Result> = params + .iter() + .map(|(_, p)| match p.clone().into() { + ParameterKind::DataType(dt) => Ok(dt), + ParameterKind::Coercion(_) => { + plan_err!("Cannot mix DataType and Coercion in same variant") } }) .collect(); + Ok(TypeSignature::Exact(types?)) + } - let type_signature = if type_signatures.as_ref().unwrap().len() == 1 { - type_signatures?.into_iter().next().unwrap() - } else { - TypeSignature::OneOf(type_signatures?) - }; + /// Build a Coercible TypeSignature from Coercion parameters + fn build_coercible_signature(params: &[(N, P)]) -> Result + where + P: Clone + Into, + { + let coercions: Result> = params + .iter() + .map(|(_, p)| match p.clone().into() { + ParameterKind::Coercion(c) => Ok(c), + ParameterKind::DataType(_) => { + plan_err!("Cannot mix DataType and Coercion in same variant") + } + }) + .collect(); + Ok(TypeSignature::Coercible(coercions?)) + } - let mut sig = Self::new(type_signature, volatility); - sig.parameter_names = Some(parameter_names); - Ok(sig) + /// Consolidate multiple TypeSignatures into a single one (or OneOf) + fn consolidate_signatures( + mut signatures: Vec, + ) -> Result { + match signatures.len() { + 0 => internal_err!("No type signatures provided"), + 1 => Ok(signatures.pop().unwrap()), + _ => Ok(TypeSignature::OneOf(signatures)), + } } /// Add parameter names to this signature, enabling named argument notation. From d3266498231cb28a2d7923389b75d5ab67edb7e1 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:18:17 +0800 Subject: [PATCH 06/12] Revert signature.rs --- datafusion/expr-common/src/signature.rs | 334 +++++++----------------- 1 file changed, 99 insertions(+), 235 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 62cf6fece8341..438d34235ab7d 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1333,149 +1333,6 @@ impl Signature { Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility) } - /// Construct a signature with multiple variants directly from parameter specifications. - /// - /// This is the recommended way to define functions that accept multiple signatures - /// (e.g., optional parameters) as it eliminates duplication and makes the variants - /// explicit. - /// - /// # Example - /// ``` - /// # use datafusion_expr_common::signature::{Signature, Volatility, Coercion, TypeSignatureClass}; - /// # use datafusion_common::types::{logical_string, logical_int64, NativeType}; - /// # use datafusion_common::Result; - /// # fn example() -> Result<()> { - /// let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); - /// let int64 = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); - /// - /// // substr(str, pos) OR substr(str, pos, length) - /// let sig = Signature::from_parameter_variants( - /// vec![ - /// vec![("str", string.clone()), ("start_pos", int64.clone())], - /// vec![("str", string.clone()), ("start_pos", int64.clone()), ("length", int64.clone())], - /// ], - /// Volatility::Immutable - /// )?; - /// # Ok(()) - /// # } - /// ``` - /// - /// # Parameter Name Inference - /// The parameter names for the signature are inferred from the **longest variant**. - /// This ensures all parameters are documented. Shorter variants are treated as - /// having optional trailing parameters. - /// - /// # Errors - /// Returns an error if: - /// - No variants are provided - /// - Parameter names are invalid - /// - Type kinds are inconsistent within a variant - pub fn from_parameter_variants( - variants: Vec>, - volatility: Volatility, - ) -> Result - where - N: AsRef, - P: Clone + Into, - { - if variants.is_empty() { - return plan_err!("At least one variant must be provided"); - } - - let parameter_names = Self::extract_parameter_names(&variants); - let type_signatures = Self::build_type_signatures(&variants)?; - let type_signature = Self::consolidate_signatures(type_signatures)?; - - let mut sig = Self::new(type_signature, volatility); - sig.parameter_names = Some(parameter_names); - Ok(sig) - } - - /// Extract parameter names from the longest variant - fn extract_parameter_names(variants: &[Vec<(N, P)>]) -> Vec - where - N: AsRef, - { - variants - .iter() - .max_by_key(|v| v.len()) - .expect("variants is non-empty") - .iter() - .map(|(name, _)| name.as_ref().to_string()) - .collect() - } - - /// Build TypeSignature for each variant - fn build_type_signatures(variants: &[Vec<(N, P)>]) -> Result> - where - P: Clone + Into, - { - variants - .iter() - .map(|params| Self::build_variant_signature(params)) - .collect() - } - - /// Build a TypeSignature for a single variant - fn build_variant_signature(params: &[(N, P)]) -> Result - where - P: Clone + Into, - { - if params.is_empty() { - return Ok(TypeSignature::Nullary); - } - - match params[0].1.clone().into() { - ParameterKind::DataType(_) => Self::build_exact_signature(params), - ParameterKind::Coercion(_) => Self::build_coercible_signature(params), - } - } - - /// Build an Exact TypeSignature from DataType parameters - fn build_exact_signature(params: &[(N, P)]) -> Result - where - P: Clone + Into, - { - let types: Result> = params - .iter() - .map(|(_, p)| match p.clone().into() { - ParameterKind::DataType(dt) => Ok(dt), - ParameterKind::Coercion(_) => { - plan_err!("Cannot mix DataType and Coercion in same variant") - } - }) - .collect(); - Ok(TypeSignature::Exact(types?)) - } - - /// Build a Coercible TypeSignature from Coercion parameters - fn build_coercible_signature(params: &[(N, P)]) -> Result - where - P: Clone + Into, - { - let coercions: Result> = params - .iter() - .map(|(_, p)| match p.clone().into() { - ParameterKind::Coercion(c) => Ok(c), - ParameterKind::DataType(_) => { - plan_err!("Cannot mix DataType and Coercion in same variant") - } - }) - .collect(); - Ok(TypeSignature::Coercible(coercions?)) - } - - /// Consolidate multiple TypeSignatures into a single one (or OneOf) - fn consolidate_signatures( - mut signatures: Vec, - ) -> Result { - match signatures.len() { - 0 => internal_err!("No type signatures provided"), - 1 => Ok(signatures.pop().unwrap()), - _ => Ok(TypeSignature::OneOf(signatures)), - } - } - /// Add parameter names to this signature, enabling named argument notation. /// /// # Example @@ -1874,147 +1731,154 @@ mod tests { } #[test] - fn test_signature_parameter_names_variadic() { - let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) - .with_parameter_names(vec!["arg".to_string()]); + fn test_signature_with_parameters_exact() { + let sig = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) + .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]) + .unwrap(); - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("variable arity signature") + assert_eq!( + sig.type_signature, + TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) ); - } - - #[test] - fn test_signature_without_parameter_names() { - let sig = Signature::exact( - vec![DataType::Int32, DataType::Utf8], - Volatility::Immutable, + assert_eq!( + sig.parameter_names, + Some(vec!["count".to_string(), "name".to_string()]) ); - - assert_eq!(sig.parameter_names, None); } #[test] - fn test_signature_uniform_with_parameter_names() { - let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) - .with_parameter_names(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + fn test_signature_with_parameters_coercible() { + let sig = Signature::new(TypeSignature::Coercible(vec![]), Volatility::Immutable) + .with_parameters(&[ + ( + "str", + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + ), + ( + "start_pos", + Coercion::new_implicit( + TypeSignatureClass::Native(logical_int64()), + vec![TypeSignatureClass::Native(logical_int32())], + NativeType::Int64, + ), + ), + ]) .unwrap(); + assert_eq!(sig.type_signature.arity(), Arity::Fixed(2)); assert_eq!( sig.parameter_names, - Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + Some(vec!["str".to_string(), "start_pos".to_string()]) ); } #[test] - fn test_signature_numeric_with_parameter_names() { - let sig = Signature::numeric(2, Volatility::Immutable) - .with_parameter_names(vec!["a".to_string(), "b".to_string()]) + fn test_signature_with_parameters_uniform() { + let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) + .with_parameters(&[ + ("x", DataType::Float64), + ("y", DataType::Float64), + ("z", DataType::Float64), + ]) .unwrap(); + assert_eq!( + sig.type_signature, + TypeSignature::Uniform(3, vec![DataType::Float64]) + ); assert_eq!( sig.parameter_names, - Some(vec!["a".to_string(), "b".to_string()]) + Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) ); } #[test] - fn test_signature_from_parameter_variants_two_variants() { - let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); - let int64 = Coercion::new_implicit( - TypeSignatureClass::Native(logical_int64()), - vec![TypeSignatureClass::Native(logical_int32())], - NativeType::Int64, - ); + fn test_signature_with_parameters_mismatched_counts() { + let result = Signature::exact(vec![DataType::Int32], Volatility::Immutable) + .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]); - let sig = Signature::from_parameter_variants( - vec![ - vec![("str", string.clone()), ("start_pos", int64.clone())], - vec![ - ("str", string.clone()), - ("start_pos", int64.clone()), - ("length", int64.clone()), - ], - ], - Volatility::Immutable, - ) - .unwrap(); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("does not match signature arity") + ); + } - // Should create OneOf with two variants - assert!(matches!(sig.type_signature, TypeSignature::OneOf(_))); + #[test] + fn test_signature_with_parameters_duplicate_names() { + let result = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) + .with_parameters(&[("count", DataType::Int32), ("count", DataType::Int32)]); - // Parameter names should come from the longest variant - assert_eq!( - sig.parameter_names, - Some(vec![ - "str".to_string(), - "start_pos".to_string(), - "length".to_string() - ]) + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Duplicate parameter name") ); } #[test] - fn test_signature_from_parameter_variants_single_variant() { - let sig = Signature::from_parameter_variants( - vec![vec![("x", DataType::Float64), ("y", DataType::Float64)]], - Volatility::Immutable, - ) - .unwrap(); + fn test_signature_with_parameters_variadic_error() { + let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) + .with_parameters(&[("arg", DataType::Int32)]); - // Single variant should not be wrapped in OneOf - assert_eq!( - sig.type_signature, - TypeSignature::Exact(vec![DataType::Float64, DataType::Float64]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["x".to_string(), "y".to_string()]) + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("variable arity signature") ); } #[test] - fn test_signature_from_parameter_variants_empty_error() { - let result = Signature::from_parameter_variants::<&str, DataType>( - vec![], - Volatility::Immutable, - ); + fn test_signature_parameter_names_variadic() { + let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) + .with_parameter_names(vec!["arg".to_string()]); assert!(result.is_err()); assert!( result .unwrap_err() .to_string() - .contains("At least one variant") + .contains("variable arity signature") ); } #[test] - fn test_signature_from_parameter_variants_with_nullary() { - let sig = Signature::from_parameter_variants( - vec![ - vec![], - vec![("count", DataType::Int32)], - vec![("count", DataType::Int32), ("name", DataType::Utf8)], - ], + fn test_signature_without_parameter_names() { + let sig = Signature::exact( + vec![DataType::Int32, DataType::Utf8], Volatility::Immutable, - ) - .unwrap(); + ); - // Should create OneOf including Nullary - assert!(matches!(sig.type_signature, TypeSignature::OneOf(_))); - if let TypeSignature::OneOf(variants) = &sig.type_signature { - assert_eq!(variants.len(), 3); - assert!(matches!(variants[0], TypeSignature::Nullary)); - } + assert_eq!(sig.parameter_names, None); + } + + #[test] + fn test_signature_uniform_with_parameter_names() { + let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) + .with_parameter_names(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + .unwrap(); - // Parameter names from longest variant assert_eq!( sig.parameter_names, - Some(vec!["count".to_string(), "name".to_string()]) + Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + ); + } + + #[test] + fn test_signature_numeric_with_parameter_names() { + let sig = Signature::numeric(2, Volatility::Immutable) + .with_parameter_names(vec!["a".to_string(), "b".to_string()]) + .unwrap(); + + assert_eq!( + sig.parameter_names, + Some(vec!["a".to_string(), "b".to_string()]) ); } From 0e0698bfd1ee5f22b697d9b0bd1fa63a853b0a31 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:19:09 +0800 Subject: [PATCH 07/12] feat(signature): add support for defining functions with multiple parameter variants - Implemented `from_parameter_variants` method for the `Signature` struct to allow the creation of function signatures that accept multiple parameter configurations. - Added internal methods for extracting parameter names, building type signatures for variants, and consolidating multiple type signatures. - Enhanced documentation with examples on how to use the new method effectively and constraints on parameter name inference and variant requirements. --- datafusion/expr-common/src/signature.rs | 143 ++++++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 438d34235ab7d..fae06026976e2 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1333,6 +1333,149 @@ impl Signature { Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility) } + /// Construct a signature with multiple variants directly from parameter specifications. + /// + /// This is the recommended way to define functions that accept multiple signatures + /// (e.g., optional parameters) as it eliminates duplication and makes the variants + /// explicit. + /// + /// # Example + /// ``` + /// # use datafusion_expr_common::signature::{Signature, Volatility, Coercion, TypeSignatureClass}; + /// # use datafusion_common::types::{logical_string, logical_int64, NativeType}; + /// # use datafusion_common::Result; + /// # fn example() -> Result<()> { + /// let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + /// let int64 = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); + /// + /// // substr(str, pos) OR substr(str, pos, length) + /// let sig = Signature::from_parameter_variants( + /// vec![ + /// vec![("str", string.clone()), ("start_pos", int64.clone())], + /// vec![("str", string.clone()), ("start_pos", int64.clone()), ("length", int64.clone())], + /// ], + /// Volatility::Immutable + /// )?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Parameter Name Inference + /// The parameter names for the signature are inferred from the **longest variant**. + /// This ensures all parameters are documented. Shorter variants are treated as + /// having optional trailing parameters. + /// + /// # Errors + /// Returns an error if: + /// - No variants are provided + /// - Parameter names are invalid + /// - Type kinds are inconsistent within a variant + pub fn from_parameter_variants( + variants: Vec>, + volatility: Volatility, + ) -> Result + where + N: AsRef, + P: Clone + Into, + { + if variants.is_empty() { + return plan_err!("At least one variant must be provided"); + } + + let parameter_names = Self::extract_parameter_names(&variants); + let type_signatures = Self::build_type_signatures(&variants)?; + let type_signature = Self::consolidate_signatures(type_signatures)?; + + let mut sig = Self::new(type_signature, volatility); + sig.parameter_names = Some(parameter_names); + Ok(sig) + } + + /// Extract parameter names from the longest variant + fn extract_parameter_names(variants: &[Vec<(N, P)>]) -> Vec + where + N: AsRef, + { + variants + .iter() + .max_by_key(|v| v.len()) + .expect("variants is non-empty") + .iter() + .map(|(name, _)| name.as_ref().to_string()) + .collect() + } + + /// Build TypeSignature for each variant + fn build_type_signatures(variants: &[Vec<(N, P)>]) -> Result> + where + P: Clone + Into, + { + variants + .iter() + .map(|params| Self::build_variant_signature(params)) + .collect() + } + + /// Build a TypeSignature for a single variant + fn build_variant_signature(params: &[(N, P)]) -> Result + where + P: Clone + Into, + { + if params.is_empty() { + return Ok(TypeSignature::Nullary); + } + + match params[0].1.clone().into() { + ParameterKind::DataType(_) => Self::build_exact_signature(params), + ParameterKind::Coercion(_) => Self::build_coercible_signature(params), + } + } + + /// Build an Exact TypeSignature from DataType parameters + fn build_exact_signature(params: &[(N, P)]) -> Result + where + P: Clone + Into, + { + let types: Result> = params + .iter() + .map(|(_, p)| match p.clone().into() { + ParameterKind::DataType(dt) => Ok(dt), + ParameterKind::Coercion(_) => { + plan_err!("Cannot mix DataType and Coercion in same variant") + } + }) + .collect(); + Ok(TypeSignature::Exact(types?)) + } + + /// Build a Coercible TypeSignature from Coercion parameters + fn build_coercible_signature(params: &[(N, P)]) -> Result + where + P: Clone + Into, + { + let coercions: Result> = params + .iter() + .map(|(_, p)| match p.clone().into() { + ParameterKind::Coercion(c) => Ok(c), + ParameterKind::DataType(_) => { + plan_err!("Cannot mix DataType and Coercion in same variant") + } + }) + .collect(); + Ok(TypeSignature::Coercible(coercions?)) + } + + /// Consolidate multiple TypeSignatures into a single one (or OneOf) + fn consolidate_signatures( + mut signatures: Vec, + ) -> Result { + match signatures.len() { + 0 => internal_err!("No type signatures provided"), + 1 => Ok(signatures.pop().unwrap()), + _ => Ok(TypeSignature::OneOf(signatures)), + } + } + /// Add parameter names to this signature, enabling named argument notation. /// /// # Example From 910f9cdbc3d3a12d7db29fa206f188f699f0a4fe Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:23:23 +0800 Subject: [PATCH 08/12] refactor(signature): remove from_parameter(s), with_parameter(s) --- datafusion/expr-common/src/signature.rs | 424 +++++++++++------------- 1 file changed, 199 insertions(+), 225 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index fae06026976e2..5f5b3ba89d1d1 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1094,7 +1094,7 @@ pub struct Signature { pub parameter_names: Option>, } -/// A helper enum used by [`Signature::with_parameter`] and [`Signature::with_parameters`] +/// A helper enum used by [`Signature::from_parameter_variants`] /// to accept either concrete [`DataType`]s (for [`TypeSignature::Exact`] and /// [`TypeSignature::Uniform`]) or [`Coercion`] rules (for [`TypeSignature::Coercible`]). #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] @@ -1499,126 +1499,6 @@ impl Signature { Ok(self) } - /// Add a single named parameter to this signature. - /// - /// This method is useful when constructing signatures alongside parameter - /// names and expected coercions. For bulk construction, prefer - /// [`Signature::with_parameters`]. - pub fn with_parameter(self, name: N, parameter: P) -> Result - where - N: Into + AsRef, - P: Into + Clone, - { - self.with_parameters(&[(name, parameter)]) - } - - /// Add parameter names together with their expected types or coercion rules. - /// - /// This builder reduces duplication when defining both a [`TypeSignature`] - /// and a set of parameter names. It reuses the same arity validation used - /// by [`Signature::with_parameter_names`] and rejects unsupported - /// combinations such as variadic signatures. - /// - /// The provided `parameters` slice should contain a tuple for each - /// parameter where the first element is the parameter name and the second - /// element describes the expected type: - /// - /// * [`ParameterKind::DataType`] is accepted for [`TypeSignature::Exact`] - /// and [`TypeSignature::Uniform`]. - /// * [`ParameterKind::Coercion`] is accepted for [`TypeSignature::Coercible`]. - /// - /// For other fixed-arity signatures, the parameter types are ignored but - /// the names are still validated and stored. This keeps - /// [`Signature::with_parameter_names`] available for backward compatibility - /// while allowing new code to migrate to a single API that pairs names with - /// types. - pub fn with_parameters(mut self, parameters: &[(N, P)]) -> Result - where - N: AsRef, - P: Clone + Into, - { - let names = parameters - .iter() - .map(|(name, _)| name.as_ref().to_string()) - .collect::>(); - - match &mut self.type_signature { - TypeSignature::Exact(types) => { - let new_types = parameters - .iter() - .map(|(_, parameter)| match parameter.clone().into() { - ParameterKind::DataType(data_type) => Ok(data_type), - ParameterKind::Coercion(_) => { - plan_err!("Expected DataType for Exact signature parameters") - } - }) - .collect::>>()?; - - if !types.is_empty() && types.len() != new_types.len() { - return plan_err!( - "Parameter names count ({}) does not match signature arity ({})", - names.len(), - types.len() - ); - } - - *types = new_types; - } - TypeSignature::Coercible(coercions) => { - let new_coercions = parameters - .iter() - .map(|(_, parameter)| match parameter.clone().into() { - ParameterKind::Coercion(coercion) => Ok(coercion), - ParameterKind::DataType(_) => plan_err!( - "Expected Coercion for Coercible signature parameters" - ), - }) - .collect::>>()?; - - if !coercions.is_empty() && coercions.len() != new_coercions.len() { - return plan_err!( - "Parameter names count ({}) does not match signature arity ({})", - names.len(), - coercions.len() - ); - } - - *coercions = new_coercions; - } - TypeSignature::Uniform(arg_count, valid_types) => { - let provided_types = parameters - .iter() - .map(|(_, parameter)| match parameter.clone().into() { - ParameterKind::DataType(data_type) => Ok(data_type), - ParameterKind::Coercion(_) => plan_err!( - "Expected DataType for Uniform signature parameters" - ), - }) - .collect::>>()?; - - for data_type in &provided_types { - if !valid_types.contains(data_type) { - return plan_err!( - "Parameter type {:?} not permitted for Uniform signature {:?}", - data_type, - valid_types - ); - } - } - - if provided_types.len() != *arg_count { - // Delegate to arity validation for consistent messaging - self.validate_parameter_names(&names)?; - } - } - _ => {} - } - - self.validate_parameter_names(&names)?; - self.parameter_names = Some(names); - Ok(self) - } - /// Validate that parameter names are compatible with this signature fn validate_parameter_names(&self, names: &[String]) -> Result<()> { match self.type_signature.arity() { @@ -1873,110 +1753,6 @@ mod tests { ); } - #[test] - fn test_signature_with_parameters_exact() { - let sig = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) - .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]) - .unwrap(); - - assert_eq!( - sig.type_signature, - TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["count".to_string(), "name".to_string()]) - ); - } - - #[test] - fn test_signature_with_parameters_coercible() { - let sig = Signature::new(TypeSignature::Coercible(vec![]), Volatility::Immutable) - .with_parameters(&[ - ( - "str", - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - ), - ( - "start_pos", - Coercion::new_implicit( - TypeSignatureClass::Native(logical_int64()), - vec![TypeSignatureClass::Native(logical_int32())], - NativeType::Int64, - ), - ), - ]) - .unwrap(); - - assert_eq!(sig.type_signature.arity(), Arity::Fixed(2)); - assert_eq!( - sig.parameter_names, - Some(vec!["str".to_string(), "start_pos".to_string()]) - ); - } - - #[test] - fn test_signature_with_parameters_uniform() { - let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) - .with_parameters(&[ - ("x", DataType::Float64), - ("y", DataType::Float64), - ("z", DataType::Float64), - ]) - .unwrap(); - - assert_eq!( - sig.type_signature, - TypeSignature::Uniform(3, vec![DataType::Float64]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) - ); - } - - #[test] - fn test_signature_with_parameters_mismatched_counts() { - let result = Signature::exact(vec![DataType::Int32], Volatility::Immutable) - .with_parameters(&[("count", DataType::Int32), ("name", DataType::Utf8)]); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("does not match signature arity") - ); - } - - #[test] - fn test_signature_with_parameters_duplicate_names() { - let result = Signature::new(TypeSignature::Exact(vec![]), Volatility::Immutable) - .with_parameters(&[("count", DataType::Int32), ("count", DataType::Int32)]); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("Duplicate parameter name") - ); - } - - #[test] - fn test_signature_with_parameters_variadic_error() { - let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) - .with_parameters(&[("arg", DataType::Int32)]); - - assert!(result.is_err()); - assert!( - result - .unwrap_err() - .to_string() - .contains("variable arity signature") - ); - } - #[test] fn test_signature_parameter_names_variadic() { let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable) @@ -2355,4 +2131,202 @@ mod tests { let sig = TypeSignature::UserDefined; assert_eq!(sig.arity(), Arity::Variable); } + + #[test] + fn test_signature_from_parameter_variants_single_variant() { + // Test with a single variant (Exact signature) + let sig = Signature::from_parameter_variants( + vec![vec![("count", DataType::Int32), ("name", DataType::Utf8)]], + Volatility::Immutable, + ) + .unwrap(); + + assert_eq!( + sig.type_signature, + TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]) + ); + assert_eq!( + sig.parameter_names, + Some(vec!["count".to_string(), "name".to_string()]) + ); + assert_eq!(sig.volatility, Volatility::Immutable); + } + + #[test] + fn test_signature_from_parameter_variants_two_variants() { + // Test with two variants creating a OneOf signature + let sig = Signature::from_parameter_variants( + vec![ + vec![("str", DataType::Utf8), ("pos", DataType::Int64)], + vec![ + ("str", DataType::Utf8), + ("pos", DataType::Int64), + ("len", DataType::Int64), + ], + ], + Volatility::Immutable, + ) + .unwrap(); + + // Should create a OneOf signature with parameter names from longest variant + match &sig.type_signature { + TypeSignature::OneOf(sigs) => { + assert_eq!(sigs.len(), 2); + assert_eq!(sigs[0], TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64])); + assert_eq!( + sigs[1], + TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Int64]) + ); + } + other => panic!("Expected OneOf, got {:?}", other), + } + + // Names should come from the longest variant + assert_eq!( + sig.parameter_names, + Some(vec!["str".to_string(), "pos".to_string(), "len".to_string()]) + ); + } + + #[test] + fn test_signature_from_parameter_variants_with_nullary() { + // Test with a Nullary (no arguments) variant + let sig = Signature::from_parameter_variants( + vec![ + vec![], + vec![("flag", DataType::Boolean)], + ], + Volatility::Stable, + ) + .unwrap(); + + // Should create a OneOf with Nullary and single-arg signatures + match &sig.type_signature { + TypeSignature::OneOf(sigs) => { + assert_eq!(sigs.len(), 2); + assert_eq!(sigs[0], TypeSignature::Nullary); + assert_eq!(sigs[1], TypeSignature::Exact(vec![DataType::Boolean])); + } + other => panic!("Expected OneOf, got {:?}", other), + } + + // Names should come from the longest variant + assert_eq!(sig.parameter_names, Some(vec!["flag".to_string()])); + } + + #[test] + fn test_signature_from_parameter_variants_empty_error() { + // Test that an empty variant list returns an error + let result = Signature::from_parameter_variants::<&str, DataType>(vec![], Volatility::Immutable); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("At least one variant must be provided") + ); + } + + #[test] + fn test_signature_from_parameter_variants_with_coercions() { + // Test with Coercion-based variants for Coercible signatures + let string_coercion = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + let int64_coercion = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); + + let sig = Signature::from_parameter_variants( + vec![ + vec![("str", string_coercion.clone()), ("pos", int64_coercion.clone())], + vec![ + ("str", string_coercion.clone()), + ("pos", int64_coercion.clone()), + ("len", int64_coercion), + ], + ], + Volatility::Immutable, + ) + .unwrap(); + + // Should create OneOf of Coercible signatures + match &sig.type_signature { + TypeSignature::OneOf(sigs) => { + assert_eq!(sigs.len(), 2); + // First variant has 2 coercions + match &sigs[0] { + TypeSignature::Coercible(coercions) => assert_eq!(coercions.len(), 2), + other => panic!("Expected Coercible, got {:?}", other), + } + // Second variant has 3 coercions + match &sigs[1] { + TypeSignature::Coercible(coercions) => assert_eq!(coercions.len(), 3), + other => panic!("Expected Coercible, got {:?}", other), + } + } + other => panic!("Expected OneOf, got {:?}", other), + } + + // Parameter names from longest variant + assert_eq!( + sig.parameter_names, + Some(vec!["str".to_string(), "pos".to_string(), "len".to_string()]) + ); + } + + #[test] + fn test_signature_from_parameter_variants_mixed_volatility() { + // Test that volatility is set correctly + let volatile_sig = Signature::from_parameter_variants( + vec![vec![("x", DataType::Float64)]], + Volatility::Volatile, + ) + .unwrap(); + + assert_eq!(volatile_sig.volatility, Volatility::Volatile); + + let stable_sig = Signature::from_parameter_variants( + vec![vec![("y", DataType::Int32)]], + Volatility::Stable, + ) + .unwrap(); + + assert_eq!(stable_sig.volatility, Volatility::Stable); + } + + #[test] + fn test_signature_from_parameter_variants_mixed_types_error() { + // Test that mixing DataType and Coercion in same variant returns error + let coercion = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + + let result = Signature::from_parameter_variants( + vec![vec![ + ("str", DataType::Utf8), // DataType + ("pos", coercion), // Coercion - mixed! + ]], + Volatility::Immutable, + ); + + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("Cannot mix DataType and Coercion") + ); + } + + #[test] + fn test_signature_from_parameter_variants_single_variant_single_param() { + // Test with a single parameter in a single variant + let sig = Signature::from_parameter_variants( + vec![vec![("value", DataType::Float32)]], + Volatility::Immutable, + ) + .unwrap(); + + assert_eq!( + sig.type_signature, + TypeSignature::Exact(vec![DataType::Float32]) + ); + assert_eq!(sig.parameter_names, Some(vec!["value".to_string()])); + } } From 5c1333d0dc53380b3f9c6f186bc9cd1717cd9392 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:25:07 +0800 Subject: [PATCH 09/12] Add from_paramter_variant tests --- datafusion/expr-common/src/signature.rs | 48 +++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 5f5b3ba89d1d1..17316e9203626 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -2172,10 +2172,17 @@ mod tests { match &sig.type_signature { TypeSignature::OneOf(sigs) => { assert_eq!(sigs.len(), 2); - assert_eq!(sigs[0], TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64])); + assert_eq!( + sigs[0], + TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64]) + ); assert_eq!( sigs[1], - TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64, DataType::Int64]) + TypeSignature::Exact(vec![ + DataType::Utf8, + DataType::Int64, + DataType::Int64 + ]) ); } other => panic!("Expected OneOf, got {:?}", other), @@ -2184,7 +2191,11 @@ mod tests { // Names should come from the longest variant assert_eq!( sig.parameter_names, - Some(vec!["str".to_string(), "pos".to_string(), "len".to_string()]) + Some(vec![ + "str".to_string(), + "pos".to_string(), + "len".to_string() + ]) ); } @@ -2192,10 +2203,7 @@ mod tests { fn test_signature_from_parameter_variants_with_nullary() { // Test with a Nullary (no arguments) variant let sig = Signature::from_parameter_variants( - vec![ - vec![], - vec![("flag", DataType::Boolean)], - ], + vec![vec![], vec![("flag", DataType::Boolean)]], Volatility::Stable, ) .unwrap(); @@ -2217,7 +2225,10 @@ mod tests { #[test] fn test_signature_from_parameter_variants_empty_error() { // Test that an empty variant list returns an error - let result = Signature::from_parameter_variants::<&str, DataType>(vec![], Volatility::Immutable); + let result = Signature::from_parameter_variants::<&str, DataType>( + vec![], + Volatility::Immutable, + ); assert!(result.is_err()); assert!( @@ -2231,12 +2242,17 @@ mod tests { #[test] fn test_signature_from_parameter_variants_with_coercions() { // Test with Coercion-based variants for Coercible signatures - let string_coercion = Coercion::new_exact(TypeSignatureClass::Native(logical_string())); - let int64_coercion = Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); + let string_coercion = + Coercion::new_exact(TypeSignatureClass::Native(logical_string())); + let int64_coercion = + Coercion::new_exact(TypeSignatureClass::Native(logical_int64())); let sig = Signature::from_parameter_variants( vec![ - vec![("str", string_coercion.clone()), ("pos", int64_coercion.clone())], + vec![ + ("str", string_coercion.clone()), + ("pos", int64_coercion.clone()), + ], vec![ ("str", string_coercion.clone()), ("pos", int64_coercion.clone()), @@ -2268,7 +2284,11 @@ mod tests { // Parameter names from longest variant assert_eq!( sig.parameter_names, - Some(vec!["str".to_string(), "pos".to_string(), "len".to_string()]) + Some(vec![ + "str".to_string(), + "pos".to_string(), + "len".to_string() + ]) ); } @@ -2299,8 +2319,8 @@ mod tests { let result = Signature::from_parameter_variants( vec![vec![ - ("str", DataType::Utf8), // DataType - ("pos", coercion), // Coercion - mixed! + ("str", ParameterKind::DataType(DataType::Utf8)), + ("pos", ParameterKind::Coercion(coercion)), ]], Volatility::Immutable, ); From 45e093f262872d2e3abc57e1c0bea85e41c40c5c Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:46:30 +0800 Subject: [PATCH 10/12] rearrange from_parameter_variants to bottom --- datafusion/expr-common/src/signature.rs | 114 ++++++++++++------------ 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 17316e9203626..61f32dca53193 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1333,6 +1333,63 @@ impl Signature { Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility) } + /// Add parameter names to this signature, enabling named argument notation. + /// + /// # Example + /// ``` + /// # use datafusion_expr_common::signature::{Signature, Volatility}; + /// # use arrow::datatypes::DataType; + /// let sig = + /// Signature::exact(vec![DataType::Int32, DataType::Utf8], Volatility::Immutable) + /// .with_parameter_names(vec!["count".to_string(), "name".to_string()]); + /// ``` + /// + /// # Errors + /// Returns an error if the number of parameter names doesn't match the signature's arity. + /// For signatures with variable arity (e.g., `Variadic`, `VariadicAny`), parameter names + /// cannot be specified. + pub fn with_parameter_names(mut self, names: Vec>) -> Result { + let names = names.into_iter().map(Into::into).collect::>(); + // Validate that the number of names matches the signature + self.validate_parameter_names(&names)?; + self.parameter_names = Some(names); + Ok(self) + } + + /// Validate that parameter names are compatible with this signature + fn validate_parameter_names(&self, names: &[String]) -> Result<()> { + match self.type_signature.arity() { + Arity::Fixed(expected) => { + if names.len() != expected { + return plan_err!( + "Parameter names count ({}) does not match signature arity ({})", + names.len(), + expected + ); + } + } + Arity::Variable => { + // For UserDefined signatures, allow parameter names + // The function implementer is responsible for validating the names match the actual arguments + if !matches!(self.type_signature, TypeSignature::UserDefined) { + return plan_err!( + "Cannot specify parameter names for variable arity signature: {:?}", + self.type_signature + ); + } + } + } + + let mut seen = std::collections::HashSet::new(); + for name in names { + if !seen.insert(name) { + return plan_err!("Duplicate parameter name: '{}'", name); + } + } + + Ok(()) + } + /// Construct a signature with multiple variants directly from parameter specifications. /// /// This is the recommended way to define functions that accept multiple signatures @@ -1475,63 +1532,6 @@ impl Signature { _ => Ok(TypeSignature::OneOf(signatures)), } } - - /// Add parameter names to this signature, enabling named argument notation. - /// - /// # Example - /// ``` - /// # use datafusion_expr_common::signature::{Signature, Volatility}; - /// # use arrow::datatypes::DataType; - /// let sig = - /// Signature::exact(vec![DataType::Int32, DataType::Utf8], Volatility::Immutable) - /// .with_parameter_names(vec!["count".to_string(), "name".to_string()]); - /// ``` - /// - /// # Errors - /// Returns an error if the number of parameter names doesn't match the signature's arity. - /// For signatures with variable arity (e.g., `Variadic`, `VariadicAny`), parameter names - /// cannot be specified. - pub fn with_parameter_names(mut self, names: Vec>) -> Result { - let names = names.into_iter().map(Into::into).collect::>(); - // Validate that the number of names matches the signature - self.validate_parameter_names(&names)?; - self.parameter_names = Some(names); - Ok(self) - } - - /// Validate that parameter names are compatible with this signature - fn validate_parameter_names(&self, names: &[String]) -> Result<()> { - match self.type_signature.arity() { - Arity::Fixed(expected) => { - if names.len() != expected { - return plan_err!( - "Parameter names count ({}) does not match signature arity ({})", - names.len(), - expected - ); - } - } - Arity::Variable => { - // For UserDefined signatures, allow parameter names - // The function implementer is responsible for validating the names match the actual arguments - if !matches!(self.type_signature, TypeSignature::UserDefined) { - return plan_err!( - "Cannot specify parameter names for variable arity signature: {:?}", - self.type_signature - ); - } - } - } - - let mut seen = std::collections::HashSet::new(); - for name in names { - if !seen.insert(name) { - return plan_err!("Duplicate parameter name: '{}'", name); - } - } - - Ok(()) - } } #[cfg(test)] From 7e48d342eb34e0f7d58459c59b33f928ec14f99d Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:50:20 +0800 Subject: [PATCH 11/12] docs(signature): update documentation for supported TypeSignatures and add tests for constructor requirements --- datafusion/expr-common/src/signature.rs | 133 ++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 61f32dca53193..3212c0d7ec07c 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1422,6 +1422,19 @@ impl Signature { /// This ensures all parameters are documented. Shorter variants are treated as /// having optional trailing parameters. /// + /// # Supported TypeSignatures + /// This method can generate: + /// - [`TypeSignature::Nullary`] - via empty parameter list `vec![]` + /// - [`TypeSignature::Exact`] - via [`DataType`] parameters + /// - [`TypeSignature::Coercible`] - via [`Coercion`] parameters + /// - [`TypeSignature::OneOf`] - when multiple variants are provided + /// + /// For other signature types (e.g., [`TypeSignature::Variadic`], [`TypeSignature::Uniform`], + /// [`TypeSignature::Numeric`], [`TypeSignature::String`], [`TypeSignature::Comparable`], + /// [`TypeSignature::Any`], [`TypeSignature::ArraySignature`], [`TypeSignature::UserDefined`]), + /// use the corresponding constructor methods like [`Signature::variadic`], [`Signature::uniform`], + /// [`Signature::numeric`], etc. + /// /// # Errors /// Returns an error if: /// - No variants are provided @@ -2349,4 +2362,124 @@ mod tests { ); assert_eq!(sig.parameter_names, Some(vec!["value".to_string()])); } + + // Tests demonstrating TypeSignatures NOT supported by from_parameter_variants + // These tests show that other constructor methods must be used for these cases + + #[test] + fn test_signature_variadic_requires_separate_constructor() { + // Variadic signatures require Signature::variadic() constructor + // from_parameter_variants cannot express "one or more args of these types" + // Note: Variadic signatures don't support parameter names (variable arity) + let sig = Signature::variadic( + vec![DataType::Utf8, DataType::LargeUtf8], + Volatility::Immutable, + ); + + assert_eq!( + sig.type_signature, + TypeSignature::Variadic(vec![DataType::Utf8, DataType::LargeUtf8]) + ); + assert_eq!(sig.parameter_names, None); + } + + #[test] + fn test_signature_uniform_requires_separate_constructor() { + // Uniform signatures require Signature::uniform() constructor + // from_parameter_variants cannot express "all args must be same type from list" + let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) + .with_parameter_names(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + .unwrap(); + + assert_eq!( + sig.type_signature, + TypeSignature::Uniform(3, vec![DataType::Float64]) + ); + assert_eq!( + sig.parameter_names, + Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) + ); + } + + #[test] + fn test_signature_numeric_requires_separate_constructor() { + // Numeric signatures require Signature::numeric() constructor + // from_parameter_variants cannot express "all numeric types" + let sig = Signature::numeric(2, Volatility::Immutable) + .with_parameter_names(vec!["a".to_string(), "b".to_string()]) + .unwrap(); + + assert_eq!(sig.type_signature, TypeSignature::Numeric(2)); + assert_eq!( + sig.parameter_names, + Some(vec!["a".to_string(), "b".to_string()]) + ); + } + + #[test] + fn test_signature_string_requires_separate_constructor() { + // String signatures require Signature::string() constructor + // from_parameter_variants cannot express "all string types" + let sig = Signature::string(2, Volatility::Immutable) + .with_parameter_names(vec!["str1".to_string(), "str2".to_string()]) + .unwrap(); + + assert_eq!(sig.type_signature, TypeSignature::String(2)); + assert_eq!( + sig.parameter_names, + Some(vec!["str1".to_string(), "str2".to_string()]) + ); + } + + #[test] + fn test_signature_comparable_requires_separate_constructor() { + // Comparable signatures require Signature::comparable() constructor + // from_parameter_variants cannot express "all comparable types" + let sig = Signature::comparable(2, Volatility::Immutable) + .with_parameter_names(vec!["left".to_string(), "right".to_string()]) + .unwrap(); + + assert_eq!(sig.type_signature, TypeSignature::Comparable(2)); + assert_eq!( + sig.parameter_names, + Some(vec!["left".to_string(), "right".to_string()]) + ); + } + + #[test] + fn test_signature_any_requires_separate_constructor() { + // Any signatures require Signature::any() constructor + // from_parameter_variants cannot express "any types" + let sig = Signature::any(3, Volatility::Immutable) + .with_parameter_names(vec!["a".to_string(), "b".to_string(), "c".to_string()]) + .unwrap(); + + assert_eq!(sig.type_signature, TypeSignature::Any(3)); + assert_eq!( + sig.parameter_names, + Some(vec!["a".to_string(), "b".to_string(), "c".to_string()]) + ); + } + + #[test] + fn test_signature_user_defined_requires_separate_constructor() { + // UserDefined signatures require Signature::user_defined() constructor + // from_parameter_variants cannot express custom coercion logic + let sig = Signature::user_defined(Volatility::Immutable); + + assert_eq!(sig.type_signature, TypeSignature::UserDefined); + assert_eq!(sig.parameter_names, None); + } + + #[test] + fn test_signature_array_requires_separate_constructor() { + // ArraySignature requires Signature::array() or similar constructors + // from_parameter_variants cannot express array-specific signatures + let sig = Signature::array(Volatility::Immutable); + + match sig.type_signature { + TypeSignature::ArraySignature(_) => {} // Expected + other => panic!("Expected ArraySignature, got {:?}", other), + } + } } From edc2bb5a3fe825e54fd432c6661ed1f120629fdb Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Tue, 30 Dec 2025 18:53:11 +0800 Subject: [PATCH 12/12] refactor(signature): remove unsupported tests for from_parameter_variants --- datafusion/expr-common/src/signature.rs | 120 ------------------------ 1 file changed, 120 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 3212c0d7ec07c..0647b43e7bbc3 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -2362,124 +2362,4 @@ mod tests { ); assert_eq!(sig.parameter_names, Some(vec!["value".to_string()])); } - - // Tests demonstrating TypeSignatures NOT supported by from_parameter_variants - // These tests show that other constructor methods must be used for these cases - - #[test] - fn test_signature_variadic_requires_separate_constructor() { - // Variadic signatures require Signature::variadic() constructor - // from_parameter_variants cannot express "one or more args of these types" - // Note: Variadic signatures don't support parameter names (variable arity) - let sig = Signature::variadic( - vec![DataType::Utf8, DataType::LargeUtf8], - Volatility::Immutable, - ); - - assert_eq!( - sig.type_signature, - TypeSignature::Variadic(vec![DataType::Utf8, DataType::LargeUtf8]) - ); - assert_eq!(sig.parameter_names, None); - } - - #[test] - fn test_signature_uniform_requires_separate_constructor() { - // Uniform signatures require Signature::uniform() constructor - // from_parameter_variants cannot express "all args must be same type from list" - let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable) - .with_parameter_names(vec!["x".to_string(), "y".to_string(), "z".to_string()]) - .unwrap(); - - assert_eq!( - sig.type_signature, - TypeSignature::Uniform(3, vec![DataType::Float64]) - ); - assert_eq!( - sig.parameter_names, - Some(vec!["x".to_string(), "y".to_string(), "z".to_string()]) - ); - } - - #[test] - fn test_signature_numeric_requires_separate_constructor() { - // Numeric signatures require Signature::numeric() constructor - // from_parameter_variants cannot express "all numeric types" - let sig = Signature::numeric(2, Volatility::Immutable) - .with_parameter_names(vec!["a".to_string(), "b".to_string()]) - .unwrap(); - - assert_eq!(sig.type_signature, TypeSignature::Numeric(2)); - assert_eq!( - sig.parameter_names, - Some(vec!["a".to_string(), "b".to_string()]) - ); - } - - #[test] - fn test_signature_string_requires_separate_constructor() { - // String signatures require Signature::string() constructor - // from_parameter_variants cannot express "all string types" - let sig = Signature::string(2, Volatility::Immutable) - .with_parameter_names(vec!["str1".to_string(), "str2".to_string()]) - .unwrap(); - - assert_eq!(sig.type_signature, TypeSignature::String(2)); - assert_eq!( - sig.parameter_names, - Some(vec!["str1".to_string(), "str2".to_string()]) - ); - } - - #[test] - fn test_signature_comparable_requires_separate_constructor() { - // Comparable signatures require Signature::comparable() constructor - // from_parameter_variants cannot express "all comparable types" - let sig = Signature::comparable(2, Volatility::Immutable) - .with_parameter_names(vec!["left".to_string(), "right".to_string()]) - .unwrap(); - - assert_eq!(sig.type_signature, TypeSignature::Comparable(2)); - assert_eq!( - sig.parameter_names, - Some(vec!["left".to_string(), "right".to_string()]) - ); - } - - #[test] - fn test_signature_any_requires_separate_constructor() { - // Any signatures require Signature::any() constructor - // from_parameter_variants cannot express "any types" - let sig = Signature::any(3, Volatility::Immutable) - .with_parameter_names(vec!["a".to_string(), "b".to_string(), "c".to_string()]) - .unwrap(); - - assert_eq!(sig.type_signature, TypeSignature::Any(3)); - assert_eq!( - sig.parameter_names, - Some(vec!["a".to_string(), "b".to_string(), "c".to_string()]) - ); - } - - #[test] - fn test_signature_user_defined_requires_separate_constructor() { - // UserDefined signatures require Signature::user_defined() constructor - // from_parameter_variants cannot express custom coercion logic - let sig = Signature::user_defined(Volatility::Immutable); - - assert_eq!(sig.type_signature, TypeSignature::UserDefined); - assert_eq!(sig.parameter_names, None); - } - - #[test] - fn test_signature_array_requires_separate_constructor() { - // ArraySignature requires Signature::array() or similar constructors - // from_parameter_variants cannot express array-specific signatures - let sig = Signature::array(Volatility::Immutable); - - match sig.type_signature { - TypeSignature::ArraySignature(_) => {} // Expected - other => panic!("Expected ArraySignature, got {:?}", other), - } - } }