From 35c761b4ecd41b97fb5d2749ab6173e46bd02dfd Mon Sep 17 00:00:00 2001 From: Marko Grujic Date: Fri, 5 Jan 2024 10:23:00 +0100 Subject: [PATCH] Extend string parsing support for Date32 to encompass the timestamp format --- arrow-cast/src/cast.rs | 12 +++++++----- arrow-cast/src/parse.rs | 17 ++++++++++++++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 92b9071a6754..bd35096e0645 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -7482,9 +7482,9 @@ mod tests { let a = StringArray::from(vec![ "2000-01-01", // valid date with leading 0s + "2000-01-01T12:00:00", // valid datetime, will throw away the time part "2000-2-2", // valid date without leading 0s "2000-00-00", // invalid month and day - "2000-01-01T12:00:00", // date + time is invalid "2000", // just a year is invalid ]); let array = Arc::new(a) as ArrayRef; @@ -7500,17 +7500,19 @@ mod tests { assert!(c.is_valid(0)); // "2000-01-01" assert_eq!(date_value, c.value(0)); + assert!(c.is_valid(1)); // "2000-01-01T12:00:00" + assert_eq!(date_value, c.value(1)); + let date_value = since( NaiveDate::from_ymd_opt(2000, 2, 2).unwrap(), from_ymd(1970, 1, 1).unwrap(), ) .num_days() as i32; - assert!(c.is_valid(1)); // "2000-2-2" - assert_eq!(date_value, c.value(1)); + assert!(c.is_valid(2)); // "2000-2-2" + assert_eq!(date_value, c.value(2)); // test invalid inputs - assert!(!c.is_valid(2)); // "2000-00-00" - assert!(!c.is_valid(3)); // "2000-01-01T12:00:00" + assert!(!c.is_valid(3)); // "2000-00-00" assert!(!c.is_valid(4)); // "2000" } diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 3d2e47ed95a4..50e9fda672f6 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -546,8 +546,11 @@ const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented a fn parse_date(string: &str) -> Option { if string.len() > 10 { - return None; - } + // Try to parse as datetime and return just the date part + return string_to_datetime(&Utc, string) + .map(|dt| dt.date_naive()) + .ok(); + }; let mut digits = [0; 10]; let mut mask = 0; @@ -1488,10 +1491,13 @@ mod tests { "2020-9-08", "2020-12-1", "1690-2-5", + "2020-09-08 01:02:03", ]; for case in cases { let v = date32_to_datetime(Date32Type::parse(case).unwrap()).unwrap(); - let expected: NaiveDate = case.parse().unwrap(); + let expected = NaiveDate::parse_from_str(case, "%Y-%m-%d") + .or(NaiveDate::parse_from_str(case, "%Y-%m-%d %H:%M:%S")) + .unwrap(); assert_eq!(v.date(), expected); } @@ -1503,6 +1509,11 @@ mod tests { "2020-09-08-03", "2020--04-03", "2020--", + "2020-09-08 01", + "2020-09-08 01:02", + "2020-09-08 01-02-03", + "2020-9-8 01:02:03", + "2020-09-08 1:2:3", ]; for case in err_cases { assert_eq!(Date32Type::parse(case), None);