From 9917de9e274fa8ed3dc2001d25a318a5c3de9291 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Mon, 29 Dec 2025 12:38:00 +0530 Subject: [PATCH 1/3] feat: to_time function --- datafusion/functions/src/datetime/mod.rs | 7 + datafusion/functions/src/datetime/to_time.rs | 440 ++++++++++++++++++ .../test_files/datetime/timestamps.slt | 98 ++++ .../source/user-guide/sql/scalar_functions.md | 40 ++ 4 files changed, 585 insertions(+) create mode 100644 datafusion/functions/src/datetime/to_time.rs diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs index 6ebea3c1a5570..9872db3faf556 100644 --- a/datafusion/functions/src/datetime/mod.rs +++ b/datafusion/functions/src/datetime/mod.rs @@ -35,6 +35,7 @@ pub mod planner; pub mod to_char; pub mod to_date; pub mod to_local_time; +pub mod to_time; pub mod to_timestamp; pub mod to_unixtime; @@ -50,6 +51,7 @@ make_udf_function!(from_unixtime::FromUnixtimeFunc, from_unixtime); make_udf_function!(to_char::ToCharFunc, to_char); make_udf_function!(to_date::ToDateFunc, to_date); make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time); +make_udf_function!(to_time::ToTimeFunc, to_time); make_udf_function!(to_unixtime::ToUnixtimeFunc, to_unixtime); make_udf_function!(to_timestamp::ToTimestampFunc, to_timestamp); make_udf_function!(to_timestamp::ToTimestampSecondsFunc, to_timestamp_seconds); @@ -110,6 +112,10 @@ pub mod expr_fn { to_unixtime, "converts a value to seconds since the unix epoch", args, + ),( + to_time, + "converts a string and optional formats to a `Time64(Nanoseconds)`", + args, ),( to_timestamp, "converts a string and optional formats to a `Timestamp(Nanoseconds, None)`", @@ -278,6 +284,7 @@ pub fn functions() -> Vec> { to_char(), to_date(), to_local_time(), + to_time(), to_unixtime(), to_timestamp(), to_timestamp_seconds(), diff --git a/datafusion/functions/src/datetime/to_time.rs b/datafusion/functions/src/datetime/to_time.rs new file mode 100644 index 0000000000000..42fce363d33d0 --- /dev/null +++ b/datafusion/functions/src/datetime/to_time.rs @@ -0,0 +1,440 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::datetime::common::*; +use arrow::array::builder::PrimitiveBuilder; +use arrow::array::cast::AsArray; +use arrow::array::types::Time64NanosecondType; +use arrow::array::{Array, PrimitiveArray, StringArrayType}; +use arrow::datatypes::DataType; +use arrow::datatypes::DataType::*; +use chrono::{NaiveTime, Timelike}; +use datafusion_common::{Result, ScalarValue, exec_err}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; +use datafusion_macros::user_doc; +use std::any::Any; +use std::sync::Arc; + +/// Default time formats to try when parsing without an explicit format +const DEFAULT_TIME_FORMATS: &[&str] = &[ + "%H:%M:%S%.f", // 12:30:45.123456789 + "%H:%M:%S", // 12:30:45 + "%H:%M", // 12:30 +]; + +#[user_doc( + doc_section(label = "Time and Date Functions"), + description = r"Converts a value to a time (`HH:MM:SS.nnnnnnnnn`). +Supports strings as input. +Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Returns the corresponding time. + +Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight.", + syntax_example = "to_time('12:30:45', '%H:%M:%S')", + sql_example = r#"```sql +> select to_time('12:30:45'); ++---------------------------+ +| to_time(Utf8("12:30:45")) | ++---------------------------+ +| 12:30:45 | ++---------------------------+ +> select to_time('12-30-45', '%H-%M-%S'); ++--------------------------------------------+ +| to_time(Utf8("12-30-45"),Utf8("%H-%M-%S")) | ++--------------------------------------------+ +| 12:30:45 | ++--------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs) +"#, + standard_argument(name = "expression", prefix = "String"), + argument( + name = "format_n", + description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order + they appear with the first successful one being returned. If none of the formats successfully parse the expression + an error will be returned." + ) +)] +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct ToTimeFunc { + signature: Signature, +} + +impl Default for ToTimeFunc { + fn default() -> Self { + Self::new() + } +} + +impl ToTimeFunc { + pub fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } + + fn to_time(&self, args: &[ColumnarValue]) -> Result { + let formats: Vec<&str> = if args.len() > 1 { + // Collect format strings from arguments + args[1..] + .iter() + .filter_map(|arg| { + if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(s))) + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(s))) + | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(s))) = arg + { + Some(s.as_str()) + } else { + None + } + }) + .collect() + } else { + DEFAULT_TIME_FORMATS.to_vec() + }; + + match &args[0] { + ColumnarValue::Scalar(ScalarValue::Utf8(s)) + | ColumnarValue::Scalar(ScalarValue::LargeUtf8(s)) + | ColumnarValue::Scalar(ScalarValue::Utf8View(s)) => { + let result = s + .as_ref() + .map(|s| parse_time_with_formats(s, &formats)) + .transpose()?; + Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(result))) + } + ColumnarValue::Array(array) => { + let result = match array.data_type() { + Utf8 => parse_time_array(&array.as_string::(), &formats)?, + LargeUtf8 => parse_time_array(&array.as_string::(), &formats)?, + Utf8View => parse_time_array(&array.as_string_view(), &formats)?, + other => return exec_err!("Unsupported type for to_time: {}", other), + }; + Ok(ColumnarValue::Array(Arc::new(result))) + } + other => exec_err!("Unsupported argument for to_time: {:?}", other), + } + } +} + +impl ScalarUDFImpl for ToTimeFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "to_time" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(Time64(arrow::datatypes::TimeUnit::Nanosecond)) + } + + fn invoke_with_args( + &self, + args: datafusion_expr::ScalarFunctionArgs, + ) -> Result { + let args = args.args; + if args.is_empty() { + return exec_err!("to_time function requires 1 or more arguments, got 0"); + } + + // validate that any args after the first one are Utf8 + if args.len() > 1 { + validate_data_types(&args, "to_time")?; + } + + match args[0].data_type() { + Utf8View | LargeUtf8 | Utf8 => self.to_time(&args), + Null => Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))), + other => { + exec_err!("Unsupported data type {} for function to_time", other) + } + } + } + + fn documentation(&self) -> Option<&Documentation> { + self.doc() + } +} + +/// Parse time array using the provided formats +fn parse_time_array<'a, A: StringArrayType<'a>>( + array: &A, + formats: &[&str], +) -> Result> { + let mut builder: PrimitiveBuilder = + PrimitiveArray::builder(array.len()); + + for i in 0..array.len() { + if array.is_null(i) { + builder.append_null(); + } else { + let s = array.value(i); + let nanos = parse_time_with_formats(s, formats)?; + builder.append_value(nanos); + } + } + + Ok(builder.finish()) +} + +/// Parse time string using provided formats +fn parse_time_with_formats(s: &str, formats: &[&str]) -> Result { + for format in formats { + if let Ok(time) = NaiveTime::parse_from_str(s, format) { + return Ok(time_to_nanos(time)); + } + } + exec_err!( + "Error parsing '{}' as time. Tried formats: {:?}", + s, + formats + ) +} + +/// Convert NaiveTime to nanoseconds since midnight +fn time_to_nanos(time: NaiveTime) -> i64 { + let hours = time.hour() as i64; + let minutes = time.minute() as i64; + let seconds = time.second() as i64; + let nanos = time.nanosecond() as i64; + + hours * 3_600_000_000_000 + minutes * 60_000_000_000 + seconds * 1_000_000_000 + nanos +} + +#[cfg(test)] +mod tests { + use super::ToTimeFunc; + use arrow::array::{ + Array, GenericStringArray, StringViewArray, Time64NanosecondArray, + }; + use arrow::datatypes::{DataType, Field, TimeUnit}; + use datafusion_common::config::ConfigOptions; + use datafusion_common::{DataFusionError, ScalarValue}; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + use std::sync::Arc; + + fn invoke_to_time_with_args( + args: Vec, + number_rows: usize, + ) -> Result { + let arg_fields = args + .iter() + .map(|arg| Field::new("a", arg.data_type(), true).into()) + .collect::>(); + + let args = datafusion_expr::ScalarFunctionArgs { + args, + arg_fields, + number_rows, + return_field: Field::new("f", DataType::Time64(TimeUnit::Nanosecond), true) + .into(), + config_options: Arc::new(ConfigOptions::default()), + }; + ToTimeFunc::new().invoke_with_args(args) + } + + #[test] + fn test_to_time_without_format() { + struct TestCase { + name: &'static str, + time_str: &'static str, + expected_nanos: i64, + } + + let test_cases = vec![ + TestCase { + name: "HH:MM:SS format", + time_str: "12:30:45", + // 12*3600 + 30*60 + 45 = 45045 seconds = 45045000000000 nanos + expected_nanos: 45_045_000_000_000, + }, + TestCase { + name: "HH:MM:SS.f format with milliseconds", + time_str: "12:30:45.123", + expected_nanos: 45_045_123_000_000, + }, + TestCase { + name: "HH:MM:SS.f format with nanoseconds", + time_str: "12:30:45.123456789", + expected_nanos: 45_045_123_456_789, + }, + TestCase { + name: "Midnight", + time_str: "00:00:00", + expected_nanos: 0, + }, + TestCase { + name: "End of day", + time_str: "23:59:59", + expected_nanos: 86_399_000_000_000, + }, + ]; + + for tc in &test_cases { + // Test scalar Utf8 + let sv = ScalarValue::Utf8(Some(tc.time_str.to_string())); + let result = invoke_to_time_with_args(vec![ColumnarValue::Scalar(sv)], 1); + + match result { + Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(Some(val)))) => { + assert_eq!( + val, tc.expected_nanos, + "{}: to_time created wrong value, got {}, expected {}", + tc.name, val, tc.expected_nanos + ); + } + other => panic!( + "{}: Could not convert '{}' to Time: {:?}", + tc.name, tc.time_str, other + ), + } + } + } + + #[test] + fn test_to_time_with_format() { + struct TestCase { + name: &'static str, + time_str: &'static str, + format_str: &'static str, + expected_nanos: i64, + } + + let test_cases = vec![ + TestCase { + name: "Custom dash format", + time_str: "12-30-45", + format_str: "%H-%M-%S", + expected_nanos: 45_045_000_000_000, + }, + TestCase { + name: "Slash format", + time_str: "14/25/30", + format_str: "%H/%M/%S", + expected_nanos: 51_930_000_000_000, + }, + TestCase { + name: "12-hour format with AM/PM", + time_str: "02:30:45 PM", + format_str: "%I:%M:%S %p", + expected_nanos: 52_245_000_000_000, // 14:30:45 + }, + ]; + + for tc in &test_cases { + let time_scalar = ScalarValue::Utf8(Some(tc.time_str.to_string())); + let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string())); + + let result = invoke_to_time_with_args( + vec![ + ColumnarValue::Scalar(time_scalar), + ColumnarValue::Scalar(format_scalar), + ], + 1, + ); + + match result { + Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(Some(val)))) => { + assert_eq!( + val, tc.expected_nanos, + "{}: to_time created wrong value for '{}' with format '{}', got {}, expected {}", + tc.name, tc.time_str, tc.format_str, val, tc.expected_nanos + ); + } + other => panic!( + "{}: Could not convert '{}' with format '{}' to Time: {:?}", + tc.name, tc.time_str, tc.format_str, other + ), + } + } + } + + #[test] + fn test_to_time_array() { + let time_array = GenericStringArray::::from(vec!["12:30:45", "23:59:59"]); + let batch_len = time_array.len(); + let result = invoke_to_time_with_args( + vec![ColumnarValue::Array(Arc::new(time_array))], + batch_len, + ); + + match result { + Ok(ColumnarValue::Array(a)) => { + assert_eq!(a.len(), 2); + + let time_array = + a.as_any().downcast_ref::().unwrap(); + assert_eq!(time_array.value(0), 45_045_000_000_000); // 12:30:45 + assert_eq!(time_array.value(1), 86_399_000_000_000); // 23:59:59 + } + other => panic!("Expected Array result, got {other:?}"), + } + } + + #[test] + fn test_to_time_null_input() { + let null_scalar = ScalarValue::Utf8(None); + let result = + invoke_to_time_with_args(vec![ColumnarValue::Scalar(null_scalar)], 1); + + match result { + Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))) => { + // Expected: null input results in null output + } + other => panic!("Expected null Time result, got {other:?}"), + } + } + + #[test] + fn test_to_time_string_view() { + let time_array = StringViewArray::from(vec!["08:15:30"]); + let batch_len = time_array.len(); + let result = invoke_to_time_with_args( + vec![ColumnarValue::Array(Arc::new(time_array))], + batch_len, + ); + + match result { + Ok(ColumnarValue::Array(a)) => { + assert_eq!(a.len(), 1); + let time_array = + a.as_any().downcast_ref::().unwrap(); + // 8*3600 + 15*60 + 30 = 29730 seconds + assert_eq!(time_array.value(0), 29_730_000_000_000); + } + other => panic!("Expected Array result, got {other:?}"), + } + } + + #[test] + fn test_to_time_invalid_string() { + let invalid_scalar = ScalarValue::Utf8(Some("not_a_time".to_string())); + let result = + invoke_to_time_with_args(vec![ColumnarValue::Scalar(invalid_scalar)], 1); + + assert!(result.is_err()); + } +} diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 90accbce38a6e..7f8d93d11c66b 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -3285,6 +3285,104 @@ select make_time(22, 1, ''); query error Expect TypeSignatureClass::Native\(LogicalType\(Native\(Int32\), Int32\)\) but received NativeType::Float64, DataType: Float64 select make_time(arrow_cast(22, 'Float64'), 1, ''); +########## +## to_time tests +########## + +# Basic time parsing + +query D +select to_time('12:30:45'); +---- +12:30:45 + +query D +select to_time('00:00:00'); +---- +00:00:00 + +query D +select to_time('23:59:59'); +---- +23:59:59 + +query D +select to_time('08:15:30'); +---- +08:15:30 + +# Time with fractional seconds + +query D +select to_time('12:30:45.123'); +---- +12:30:45.123 + +query D +select to_time('12:30:45.123456789'); +---- +12:30:45.123456789 + +# Time with custom format + +query D +select to_time('12-30-45', '%H-%M-%S'); +---- +12:30:45 + +query D +select to_time('14/25/30', '%H/%M/%S'); +---- +14:25:30 + +query D +select to_time('02:30:45 PM', '%I:%M:%S %p'); +---- +14:30:45 + +# Null handling + +query D +select to_time(null); +---- +NULL + +# Return type check + +query T +select arrow_typeof(to_time('12:30:45')); +---- +Time64(ns) + +# Table input + +statement ok +create table time_strings (time_str varchar) as values + ('12:30:45'), + ('23:59:59'), + ('00:00:00'); + +query D +select to_time(time_str) from time_strings; +---- +12:30:45 +23:59:59 +00:00:00 + +statement ok +drop table time_strings; + +# Error cases + +query error Error parsing 'not_a_time' as time +select to_time('not_a_time'); + +query error Error parsing '25:00:00' as time +select to_time('25:00:00'); + +query error does not support zero arguments +select to_time(); + ########## ## to_char tests ########## diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index cf35b9f3c3328..f8fcc549e2f9a 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2394,6 +2394,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo - [to_char](#to_char) - [to_date](#to_date) - [to_local_time](#to_local_time) +- [to_time](#to_time) - [to_timestamp](#to_timestamp) - [to_timestamp_micros](#to_timestamp_micros) - [to_timestamp_millis](#to_timestamp_millis) @@ -2818,6 +2819,45 @@ FROM ( +---------------------------+ ``` +### `to_time` + +Converts a value to a time (`HH:MM:SS.nnnnnnnnn`). +Supports strings as input. +Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Returns the corresponding time. + +Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight. + +```sql +to_time('12:30:45', '%H:%M:%S') +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order + they appear with the first successful one being returned. If none of the formats successfully parse the expression + an error will be returned. + +#### Example + +```sql +> select to_time('12:30:45'); ++---------------------------+ +| to_time(Utf8("12:30:45")) | ++---------------------------+ +| 12:30:45 | ++---------------------------+ +> select to_time('12-30-45', '%H-%M-%S'); ++--------------------------------------------+ +| to_time(Utf8("12-30-45"),Utf8("%H-%M-%S")) | ++--------------------------------------------+ +| 12:30:45 | ++--------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs) + ### `to_timestamp` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. From 893e97f2fddc0398b2223de54b8d0fb5dfd7dfaa Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Mon, 29 Dec 2025 23:50:57 +0530 Subject: [PATCH 2/3] Address reviewer feedback: add constants and timestamp support - Extract magic numbers to NANOS_PER_SECOND, NANOS_PER_MINUTE, NANOS_PER_HOUR constants - Use Arrow's NANOSECONDS constant as base - Add timestamp input support (extracts time portion) - Add SLT tests for: - HH:MM default parsing (no seconds) - Out of range minutes/seconds - Timestamp input (various timestamp types) - Null timestamp handling --- datafusion/functions/src/datetime/to_time.rs | 148 +++++++++++++++++- .../test_files/datetime/timestamps.slt | 68 ++++++++ 2 files changed, 213 insertions(+), 3 deletions(-) diff --git a/datafusion/functions/src/datetime/to_time.rs b/datafusion/functions/src/datetime/to_time.rs index 42fce363d33d0..7dd22f03508bd 100644 --- a/datafusion/functions/src/datetime/to_time.rs +++ b/datafusion/functions/src/datetime/to_time.rs @@ -18,6 +18,7 @@ use crate::datetime::common::*; use arrow::array::builder::PrimitiveBuilder; use arrow::array::cast::AsArray; +use arrow::array::temporal_conversions::NANOSECONDS; use arrow::array::types::Time64NanosecondType; use arrow::array::{Array, PrimitiveArray, StringArrayType}; use arrow::datatypes::DataType; @@ -31,6 +32,15 @@ use datafusion_macros::user_doc; use std::any::Any; use std::sync::Arc; +/// Nanoseconds per second (1 billion) +const NANOS_PER_SECOND: i64 = NANOSECONDS; +/// Nanoseconds per minute +const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND; +/// Nanoseconds per hour +const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE; +/// Nanoseconds per day (used for extracting time from timestamp) +const NANOS_PER_DAY: i64 = 24 * NANOS_PER_HOUR; + /// Default time formats to try when parsing without an explicit format const DEFAULT_TIME_FORMATS: &[&str] = &[ "%H:%M:%S%.f", // 12:30:45.123456789 @@ -41,8 +51,9 @@ const DEFAULT_TIME_FORMATS: &[&str] = &[ #[user_doc( doc_section(label = "Time and Date Functions"), description = r"Converts a value to a time (`HH:MM:SS.nnnnnnnnn`). -Supports strings as input. +Supports strings and timestamps as input. Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Timestamps will have the time portion extracted. Returns the corresponding time. Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight.", @@ -60,11 +71,17 @@ Note: `to_time` returns Time64(Nanosecond), which represents the time of day in +--------------------------------------------+ | 12:30:45 | +--------------------------------------------+ +> select to_time('2024-01-15 14:30:45'::timestamp); ++--------------------------------------------------+ +| to_time(Utf8("2024-01-15 14:30:45")) | ++--------------------------------------------------+ +| 14:30:45 | ++--------------------------------------------------+ ``` Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs) "#, - standard_argument(name = "expression", prefix = "String"), + standard_argument(name = "expression", prefix = "String or Timestamp"), argument( name = "format_n", description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order @@ -168,6 +185,11 @@ impl ScalarUDFImpl for ToTimeFunc { match args[0].data_type() { Utf8View | LargeUtf8 | Utf8 => self.to_time(&args), Null => Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))), + // Support timestamp input by extracting time portion + Timestamp(_, _) => { + let nanos = extract_time_from_timestamp(&args[0])?; + Ok(nanos) + } other => { exec_err!("Unsupported data type {} for function to_time", other) } @@ -221,7 +243,127 @@ fn time_to_nanos(time: NaiveTime) -> i64 { let seconds = time.second() as i64; let nanos = time.nanosecond() as i64; - hours * 3_600_000_000_000 + minutes * 60_000_000_000 + seconds * 1_000_000_000 + nanos + hours * NANOS_PER_HOUR + + minutes * NANOS_PER_MINUTE + + seconds * NANOS_PER_SECOND + + nanos +} + +/// Extract time portion from timestamp (nanoseconds since midnight) +fn extract_time_from_timestamp(arg: &ColumnarValue) -> Result { + match arg { + ColumnarValue::Scalar(scalar) => { + let nanos = match scalar { + ScalarValue::TimestampNanosecond(Some(ts), _) => *ts % NANOS_PER_DAY, + ScalarValue::TimestampMicrosecond(Some(ts), _) => { + (*ts * 1_000) % NANOS_PER_DAY + } + ScalarValue::TimestampMillisecond(Some(ts), _) => { + (*ts * 1_000_000) % NANOS_PER_DAY + } + ScalarValue::TimestampSecond(Some(ts), _) => { + (*ts * NANOS_PER_SECOND) % NANOS_PER_DAY + } + ScalarValue::TimestampNanosecond(None, _) + | ScalarValue::TimestampMicrosecond(None, _) + | ScalarValue::TimestampMillisecond(None, _) + | ScalarValue::TimestampSecond(None, _) => { + return Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond( + None, + ))); + } + _ => return exec_err!("Unsupported timestamp type for to_time"), + }; + // Handle negative timestamps (before epoch) - normalize to positive time + let normalized_nanos = if nanos < 0 { + nanos + NANOS_PER_DAY + } else { + nanos + }; + Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(Some( + normalized_nanos, + )))) + } + ColumnarValue::Array(array) => { + let len = array.len(); + let mut builder: PrimitiveBuilder = + PrimitiveArray::builder(len); + + match array.data_type() { + Timestamp(arrow::datatypes::TimeUnit::Nanosecond, _) => { + let ts_array = + array.as_primitive::(); + for i in 0..len { + if ts_array.is_null(i) { + builder.append_null(); + } else { + let nanos = ts_array.value(i) % NANOS_PER_DAY; + let normalized = if nanos < 0 { + nanos + NANOS_PER_DAY + } else { + nanos + }; + builder.append_value(normalized); + } + } + } + Timestamp(arrow::datatypes::TimeUnit::Microsecond, _) => { + let ts_array = array + .as_primitive::(); + for i in 0..len { + if ts_array.is_null(i) { + builder.append_null(); + } else { + let nanos = (ts_array.value(i) * 1_000) % NANOS_PER_DAY; + let normalized = if nanos < 0 { + nanos + NANOS_PER_DAY + } else { + nanos + }; + builder.append_value(normalized); + } + } + } + Timestamp(arrow::datatypes::TimeUnit::Millisecond, _) => { + let ts_array = array + .as_primitive::(); + for i in 0..len { + if ts_array.is_null(i) { + builder.append_null(); + } else { + let nanos = (ts_array.value(i) * 1_000_000) % NANOS_PER_DAY; + let normalized = if nanos < 0 { + nanos + NANOS_PER_DAY + } else { + nanos + }; + builder.append_value(normalized); + } + } + } + Timestamp(arrow::datatypes::TimeUnit::Second, _) => { + let ts_array = + array.as_primitive::(); + for i in 0..len { + if ts_array.is_null(i) { + builder.append_null(); + } else { + let nanos = + (ts_array.value(i) * NANOS_PER_SECOND) % NANOS_PER_DAY; + let normalized = if nanos < 0 { + nanos + NANOS_PER_DAY + } else { + nanos + }; + builder.append_value(normalized); + } + } + } + _ => return exec_err!("Unsupported timestamp type for to_time"), + } + Ok(ColumnarValue::Array(Arc::new(builder.finish()))) + } + } } #[cfg(test)] diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 7f8d93d11c66b..dcc83625c21f5 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -3380,9 +3380,77 @@ select to_time('not_a_time'); query error Error parsing '25:00:00' as time select to_time('25:00:00'); +# Out of range minutes +query error Error parsing '12:60:00' as time +select to_time('12:60:00'); + +# Out of range seconds (61 is invalid, 60 is allowed as leap second) +query error Error parsing '12:30:61' as time +select to_time('12:30:61'); + query error does not support zero arguments select to_time(); +# HH:MM default parsing (no seconds) + +query D +select to_time('14:30'); +---- +14:30:00 + +query D +select to_time('09:05'); +---- +09:05:00 + +query D +select to_time('00:00'); +---- +00:00:00 + +# Timestamp input - extract time portion + +query D +select to_time(to_timestamp('2024-01-15 14:30:45')); +---- +14:30:45 + +query D +select to_time('2024-03-20 09:15:30'::timestamp); +---- +09:15:30 + +query D +select to_time('2024-06-15 23:59:59.123456789'::timestamp); +---- +23:59:59.123456789 + +query D +select to_time('2024-01-01 00:00:00'::timestamp); +---- +00:00:00 + +# Timestamp with timezone + +query D +select to_time(to_timestamp('2024-01-15T14:30:45+00:00')); +---- +14:30:45 + +# Null timestamp + +query D +select to_time(null::timestamp); +---- +NULL + +# Return type check with timestamp input + +query T +select arrow_typeof(to_time(to_timestamp('2024-01-15 12:30:45'))); +---- +Time64(ns) + ########## ## to_char tests ########## From d429fa32822335e99d28ab84d5a297b17dc28460 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 30 Dec 2025 00:08:48 +0530 Subject: [PATCH 3/3] update docs --- docs/source/user-guide/sql/scalar_functions.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index f8fcc549e2f9a..a0b129efc7331 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -2822,8 +2822,9 @@ FROM ( ### `to_time` Converts a value to a time (`HH:MM:SS.nnnnnnnnn`). -Supports strings as input. +Supports strings and timestamps as input. Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Timestamps will have the time portion extracted. Returns the corresponding time. Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight. @@ -2834,7 +2835,7 @@ to_time('12:30:45', '%H:%M:%S') #### Arguments -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression**: String or Timestamp expression to operate on. Can be a constant, column, or function, and any combination of operators. - **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. @@ -2854,6 +2855,12 @@ to_time('12:30:45', '%H:%M:%S') +--------------------------------------------+ | 12:30:45 | +--------------------------------------------+ +> select to_time('2024-01-15 14:30:45'::timestamp); ++--------------------------------------------------+ +| to_time(Utf8("2024-01-15 14:30:45")) | ++--------------------------------------------------+ +| 14:30:45 | ++--------------------------------------------------+ ``` Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)