From 6468bd723d7b1b431a0218de6b11d5b30379ce9b Mon Sep 17 00:00:00 2001
From: Vitalie Spinu <spinuvit@gmail.com>
Date: Mon, 2 Oct 2017 19:16:02 +0200
Subject: [PATCH] [Fix #556] Fix incorrect scoring of `y` format in `dmy` order

---
 NEWS.md                       |  1 +
 R/guess.r                     | 14 +++++++-------
 tests/testthat/test-parsers.R |  7 +++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index aa6f5c83..f6396be4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -23,6 +23,7 @@ Version 1.6.0.9000
 
 ### BUG FIXES
 
+* [#556](https://github.com/tidyverse/lubridate/issues/556) Fix incorrect scoring of `y` format when it's the last in format order (as in `mdy`).
 * [#559](https://github.com/tidyverse/lubridate/issues/559) Parsing of alpha-months in English locales now drops correctly to low level C parsing. Thus, parsing with multiple orders containing `m` and `b` formats now works correctly.
 * [#570](https://github.com/tidyverse/lubridate/issues/570), [#574](https://github.com/tidyverse/lubridate/issues/574) Fix broken `date()` when called with missing argument.
 * [#567](https://github.com/tidyverse/lubridate/issues/567) Fix year update and rounding for leap years.
diff --git a/R/guess.r b/R/guess.r
index a2036128..a57a5e04 100644
--- a/R/guess.r
+++ b/R/guess.r
@@ -250,10 +250,10 @@ guess_formats <- function(x, orders, locale = Sys.getlocale("LC_TIME"),
 .select_formats <- function(trained, drop = FALSE) {
   nms <- names(trained)
 
-  n_fmts <-
+  score <-
     nchar(gsub("[^%]", "", nms)) + ## longer formats have priority
-    grepl("%Y", nms, fixed = T)*1.5 + ## Y has priority over y
-    grepl("%y[^%]", nms)*1.6 + ## y has priority over Y, but only when followed by non %
+    grepl("%Y", nms, fixed = T)*1.5 +
+    grepl("%y(?!%)", nms, perl = T)*1.6 + ## y has priority over Y, but only when not followed by %
     grepl("%[Bb]", nms)*.31 + ## B/b format has priority over %Om
     ## C parser formats have higher priority
     grepl("%Om", nms)*.3 +
@@ -263,14 +263,14 @@ guess_formats <- function(x, orders, locale = Sys.getlocale("LC_TIME"),
   ## ties are broken by `trained`
   n0 <- trained != 0
   if (drop) {
-    n_fmts <- n_fmts[n0]
+    score <- score[n0]
     trained <- trained[n0]
   } else {
-    n_fmts[!n0] <- -100
+    score[!n0] <- -100
   }
 
-  ## names(trained[which.max(n_fmts)])
-  names(trained)[order(n_fmts, trained, decreasing = T)]
+  ## print(rbind(trained, score))
+  names(trained)[order(score, trained, decreasing = T)]
 }
 
 ## These are formats that are effectively matched by c parser. But we must get
diff --git a/tests/testthat/test-parsers.R b/tests/testthat/test-parsers.R
index 6e216526..58401928 100644
--- a/tests/testthat/test-parsers.R
+++ b/tests/testthat/test-parsers.R
@@ -404,6 +404,13 @@ test_that("parse_date_time handles multiple partial month formats correctly", {
 
 })
 
+test_that("parse_date_time gives higher priority to y than to Y format", {
+  expect_equal(parse_date_time(c("apr.12.50","apr.2.2016"), orders = "mdy"),
+               ymd(c("2050-04-12 UTC", "2016-04-02 UTC"), tz = "UTC"))
+  expect_equal(parse_date_time(c("50.apr.12","2016.apr.2"), orders = "ymd"),
+               ymd(c("2050-04-12 UTC", "2016-04-02 UTC"), tz = "UTC"))
+})
+
 test_that("C parser correctly handles month formats", {
   expect_equal(ymd_hms("2010-Jan-02 23:59:59"), as.POSIXct("2010-01-02 23:59:59", tz = "UTC"))
   expect_equal(ymd_hms("2010-January-02 23:59:59"), as.POSIXct("2010-01-02 23:59:59", tz = "UTC"))