From 6468bd723d7b1b431a0218de6b11d5b30379ce9b Mon Sep 17 00:00:00 2001 From: Vitalie Spinu Date: Mon, 2 Oct 2017 19:16:02 +0200 Subject: [PATCH] [Fix #556] Fix incorrect scoring of `y` format in `dmy` order --- NEWS.md | 1 + R/guess.r | 14 +++++++------- tests/testthat/test-parsers.R | 7 +++++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index aa6f5c83..f6396be4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ Version 1.6.0.9000 ### BUG FIXES +* [#556](https://github.com/tidyverse/lubridate/issues/556) Fix incorrect scoring of `y` format when it's the last in format order (as in `mdy`). * [#559](https://github.com/tidyverse/lubridate/issues/559) Parsing of alpha-months in English locales now drops correctly to low level C parsing. Thus, parsing with multiple orders containing `m` and `b` formats now works correctly. * [#570](https://github.com/tidyverse/lubridate/issues/570), [#574](https://github.com/tidyverse/lubridate/issues/574) Fix broken `date()` when called with missing argument. * [#567](https://github.com/tidyverse/lubridate/issues/567) Fix year update and rounding for leap years. diff --git a/R/guess.r b/R/guess.r index a2036128..a57a5e04 100644 --- a/R/guess.r +++ b/R/guess.r @@ -250,10 +250,10 @@ guess_formats <- function(x, orders, locale = Sys.getlocale("LC_TIME"), .select_formats <- function(trained, drop = FALSE) { nms <- names(trained) - n_fmts <- + score <- nchar(gsub("[^%]", "", nms)) + ## longer formats have priority - grepl("%Y", nms, fixed = T)*1.5 + ## Y has priority over y - grepl("%y[^%]", nms)*1.6 + ## y has priority over Y, but only when followed by non % + grepl("%Y", nms, fixed = T)*1.5 + + grepl("%y(?!%)", nms, perl = T)*1.6 + ## y has priority over Y, but only when not followed by % grepl("%[Bb]", nms)*.31 + ## B/b format has priority over %Om ## C parser formats have higher priority grepl("%Om", nms)*.3 + @@ -263,14 +263,14 @@ guess_formats <- function(x, orders, locale = Sys.getlocale("LC_TIME"), ## ties are broken by `trained` n0 <- trained != 0 if (drop) { - n_fmts <- n_fmts[n0] + score <- score[n0] trained <- trained[n0] } else { - n_fmts[!n0] <- -100 + score[!n0] <- -100 } - ## names(trained[which.max(n_fmts)]) - names(trained)[order(n_fmts, trained, decreasing = T)] + ## print(rbind(trained, score)) + names(trained)[order(score, trained, decreasing = T)] } ## These are formats that are effectively matched by c parser. But we must get diff --git a/tests/testthat/test-parsers.R b/tests/testthat/test-parsers.R index 6e216526..58401928 100644 --- a/tests/testthat/test-parsers.R +++ b/tests/testthat/test-parsers.R @@ -404,6 +404,13 @@ test_that("parse_date_time handles multiple partial month formats correctly", { }) +test_that("parse_date_time gives higher priority to y than to Y format", { + expect_equal(parse_date_time(c("apr.12.50","apr.2.2016"), orders = "mdy"), + ymd(c("2050-04-12 UTC", "2016-04-02 UTC"), tz = "UTC")) + expect_equal(parse_date_time(c("50.apr.12","2016.apr.2"), orders = "ymd"), + ymd(c("2050-04-12 UTC", "2016-04-02 UTC"), tz = "UTC")) +}) + test_that("C parser correctly handles month formats", { expect_equal(ymd_hms("2010-Jan-02 23:59:59"), as.POSIXct("2010-01-02 23:59:59", tz = "UTC")) expect_equal(ymd_hms("2010-January-02 23:59:59"), as.POSIXct("2010-01-02 23:59:59", tz = "UTC"))