diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index eecb63d3be65..298ee9364efe 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -64,6 +64,7 @@ cargo run --example csv_sql - [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF) - [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF) - [`simple_udfw.rs`](examples/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF) +- [`to_timestamp.rs`](examples/to_timestamp.rs): Examples of using the to_timestamp functions - [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF) ## Distributed diff --git a/datafusion-examples/examples/dataframe_to_timestamp.rs b/datafusion-examples/examples/to_timestamp.rs similarity index 70% rename from datafusion-examples/examples/dataframe_to_timestamp.rs rename to datafusion-examples/examples/to_timestamp.rs index 8caa9245596b..a07dbaefb75b 100644 --- a/datafusion-examples/examples/dataframe_to_timestamp.rs +++ b/datafusion-examples/examples/to_timestamp.rs @@ -24,7 +24,8 @@ use datafusion::error::Result; use datafusion::prelude::*; use datafusion_common::assert_contains; -/// This example demonstrates how to use the to_timestamp function in the DataFrame API as well as via sql. +/// This example demonstrates how to use the to_timestamp series +/// of functions in the DataFrame API as well as via sql. #[tokio::main] async fn main() -> Result<()> { // define a schema. @@ -61,7 +62,8 @@ async fn main() -> Result<()> { // use to_timestamp function to convert col 'a' to timestamp type using the default parsing let df = df.with_column("a", to_timestamp(vec![col("a")]))?; - // use to_timestamp_seconds function to convert col 'b' to timestamp(Seconds) type using a list of chrono formats to try + // use to_timestamp_seconds function to convert col 'b' to timestamp(Seconds) type using a list + // of chrono formats (https://docs.rs/chrono/latest/chrono/format/strftime/index.html) to try let df = df.with_column( "b", to_timestamp_seconds(vec![ @@ -90,14 +92,31 @@ async fn main() -> Result<()> { df.show().await?; // use sql to convert a static string to a timestamp using a list of chrono formats to try - let df = ctx.sql("select to_timestamp('01-14-2023 01:01:30+05:30', '%+', '%d-%m-%Y %H/%M/%S', '%m-%d-%Y %H:%M:%S%#z')").await?; + // note that one of the formats is invalid ('%q') but since DataFusion will try all the + // formats until it encounters one that parses the timestamp expression successfully + // no error will be returned + let df = ctx.sql("select to_timestamp_micros('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z')").await?; + + // print the results + df.show().await?; + + // casting a string to TIMESTAMP will also work for RFC3339 timestamps + let df = ctx + .sql("select to_timestamp_millis(TIMESTAMP '2022-08-03T14:38:50Z')") + .await?; + + // print the results + df.show().await?; + + // unix timestamps (in seconds) are also supported + let df = ctx.sql("select to_timestamp(1926632005)").await?; // print the results df.show().await?; // use sql to convert a static string to a timestamp using a non-matching chrono format to try let result = ctx - .sql("select to_timestamp('01-14-2023 01/01/30', '%d-%m-%Y %H:%M:%S')") + .sql("select to_timestamp_nanos('01-14-2023 01/01/30', '%d-%m-%Y %H:%M:%S')") .await? .collect() .await; @@ -105,5 +124,15 @@ async fn main() -> Result<()> { let expected = "Error parsing timestamp from '01-14-2023 01/01/30' using format '%d-%m-%Y %H:%M:%S': input contains invalid characters"; assert_contains!(result.unwrap_err().to_string(), expected); + // note that using arrays for the chrono formats is not supported + let result = ctx + .sql("SELECT to_timestamp('2022-08-03T14:38:50+05:30', make_array('%s', '%q', '%d-%m-%Y %H:%M:%S%#z', '%+'))") + .await? + .collect() + .await; + + let expected = "to_timestamp function unsupported data type at index 1: List"; + assert_contains!(result.unwrap_err().to_string(), expected); + Ok(()) } diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index c3def3f89b5b..6c526e3ada75 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -1311,8 +1311,8 @@ regexp_replace(str, regexp, replacement, flags) - [date_part](#date_part) - [datepart](#datepart) - [extract](#extract) -- [to_timestamp](#to_timestamp) - [today](#today) +- [to_timestamp](#to_timestamp) - [to_timestamp_millis](#to_timestamp_millis) - [to_timestamp_micros](#to_timestamp_micros) - [to_timestamp_seconds](#to_timestamp_seconds) @@ -1526,6 +1526,27 @@ to_timestamp(expression[, ..., format_n]) [chrono format]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html +#### Example + +``` +❯ select to_timestamp('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------+ +| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------+ +❯ select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------+ +| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++--------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs + ### `to_timestamp_millis` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). @@ -1546,6 +1567,27 @@ to_timestamp_millis(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example + +``` +❯ select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123 | ++------------------------------------------------------------------+ +❯ select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123 | ++---------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs + ### `to_timestamp_micros` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). @@ -1566,6 +1608,27 @@ to_timestamp_micros(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example + +``` +❯ select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456 | ++------------------------------------------------------------------+ +❯ select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456 | ++---------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs + ### `to_timestamp_nanos` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). @@ -1586,6 +1649,27 @@ to_timestamp_nanos(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example + +``` +❯ select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------------+ +❯ select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++---------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs + ### `to_timestamp_seconds` Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). @@ -1606,6 +1690,27 @@ to_timestamp_seconds(expression[, ..., format_n]) they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. +#### Example + +``` +❯ select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); ++-------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-------------------------------------------------------------------+ +| 2023-01-31T14:26:56 | ++-------------------------------------------------------------------+ +❯ select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++----------------------------------------------------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++----------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00 | ++----------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here] + +[here]: https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs + ### `from_unixtime` Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`).