From a148e7025d2e3492b5f2a4c3ca2a354b3911eaec Mon Sep 17 00:00:00 2001 From: oweno-tfwm Date: Sun, 24 Sep 2023 12:25:00 +0100 Subject: [PATCH] 1) when no public arrival time available fall back to WTT time then pass time 2) when looking at non-public services include passes marked as 'no pickup/dropoff available' from a GTFS perspective --- R/atoc_export.R | 9 ++-- R/atoc_import.R | 87 +++++++++++++++++++++++++---------- tests/testthat/test_aa_unit.R | 55 +++++++++++++++++----- 3 files changed, 110 insertions(+), 41 deletions(-) diff --git a/R/atoc_export.R b/R/atoc_export.R index d8391b8..1ca9195 100644 --- a/R/atoc_export.R +++ b/R/atoc_export.R @@ -652,14 +652,15 @@ clean_activities2 <- function(x, public_only = TRUE) { x$pickup_type[is.na(x$pickup_type)] <- 0 x$drop_off_type[is.na(x$drop_off_type)] <- 0 } - x <- x[, c("pickup_type", "drop_off_type")] } else #set all of the stops on a route to be valid for passenger boarding / alighting from a GTFS perspective + # (unless they are 'pass' which have no 'activity' as they woosh past - just like deadlines.) { - x$pickup_type <- 0 - x$drop_off_type <- 0 - x <- x[, c("pickup_type", "drop_off_type", "activity")] + x$pickup_type <- ifelse( is.na(x$activity), 1, 0 ) + x$drop_off_type <- ifelse( is.na(x$activity), 1, 0 ) } + x <- x[, c("pickup_type", "drop_off_type")] + return(x) } diff --git a/R/atoc_import.R b/R/atoc_import.R index e7df250..38fd6dd 100644 --- a/R/atoc_import.R +++ b/R/atoc_import.R @@ -303,34 +303,68 @@ strip_whitespace <- function(dt) { } -#does in place-modification of input data.table + process_times <- function(dt, working_timetable) { - #fill in the missing seconds - substituting H for 30 seconds. - if (working_timetable) - { - if ("Scheduled Arrival Time" %in% colnames(dt)) { - set(dt, j = "Arrival Time", value = gsub("^(\\d{4}) $","\\100",gsub("^(\\d{4})H$", "\\130", dt[["Scheduled Arrival Time"]]))) - } - if ("Scheduled Departure Time" %in% colnames(dt)) { - set(dt, j = "Departure Time", value = gsub("^(\\d{4}) $","\\100",gsub("^(\\d{4})H$", "\\130", dt[["Scheduled Departure Time"]]))) - } - } - else + dt = processOneTime(dt, working_timetable, "Arrival Time", "Scheduled Arrival Time", "Public Arrival Time") + dt = processOneTime(dt, working_timetable, "Departure Time", "Scheduled Departure Time", "Public Departure Time") + + return(dt) +} + + +#does in place-modification of input data.table +#select the public arrive/depart times if they exist, otherwise select the wtt arrive/depart times if they exist, otherwise select the pass time +#and at the same time fill in the missing seconds values (and 30 seconds if 'H' is indicated) +processOneTime <- function(dt, working_timetable, targetField, sourceFieldWtt, sourceField) +{ + hasPass = "Scheduled Pass" %in% colnames(dt) + + if (sourceFieldWtt %in% colnames(dt)) { - if ("Public Arrival Time" %in% colnames(dt)) { - set(dt, j = "Arrival Time", value = gsub("^(\\d{4})$", "\\100", dt[["Public Arrival Time"]])) + if (working_timetable) + { + if(hasPass) + { + set(dt, j = targetField, value = gsub("^(\\d{4}) $","\\100",gsub("^(\\d{4})H$", "\\130", + data.table::fifelse( " "==dt[[sourceFieldWtt]], + dt[["Scheduled Pass"]], + dt[[sourceFieldWtt]]))) + ) + } + else + { + set(dt, j = targetField, value = gsub("^(\\d{4}) $","\\100",gsub("^(\\d{4})H$", "\\130", dt[[sourceFieldWtt]]))) + } } - - if ("Public Departure Time" %in% colnames(dt)) { - set(dt, j = "Departure Time", value = gsub("^(\\d{4})$", "\\100", dt[["Public Departure Time"]])) + else + { + if(hasPass) + { + set(dt, j = targetField, value = data.table::fifelse( "0000"==dt[[sourceField]], + gsub("^(\\d{4}) $","\\100",gsub("^(\\d{4})H$", "\\130", + data.table::fifelse( " "==dt[[sourceFieldWtt]], + dt[["Scheduled Pass"]], + dt[[sourceFieldWtt]]))), + gsub("^(\\d{4})$", "\\100", dt[[sourceField]])) + ) + } + else + { + #If there is no Public Arrival time this field will default to 0000. (we will use WTT instead) + set(dt, j = targetField, value = data.table::fifelse( "0000"==dt[[sourceField]], + gsub("^(\\d{4}) $","\\100",gsub("^(\\d{4})H$", "\\130", dt[[sourceFieldWtt]])), + gsub("^(\\d{4})$", "\\100", dt[[sourceField]])) + ) + } } } - return(dt) + return (dt) } + # Process Activity Codes process_activity <- function(dt, public_only) { @@ -373,8 +407,11 @@ process_activity <- function(dt, public_only) { activity = gsub(",+", ",", activity) set(dt, j="Activity", value = gsub("\\s+|^,|,$", "", activity)) - #remove rows with no activity we're interested in - dt <- dt[ ""!=dt$Activity ] + #remove rows with no activity we're interested in (there is no activity at 'pass' locations) + if(public_only) + { + dt <- dt[ ""!=dt$Activity ] + } return(dt) } @@ -506,9 +543,9 @@ importMCA <- function(file, # Add the rowid LO$rowID <- rowIds[types == "LO"] - LO[, c("Scheduled Arrival Time","Public Arrival Time") := ""] + LO[, c("Scheduled Arrival Time","Public Arrival Time", "Scheduled Pass") := ""] LO <- LO[, c("rowID", "Location", "Activity", "Scheduled Arrival Time", "Scheduled Departure Time", - "Public Arrival Time", "Public Departure Time" )] + "Public Arrival Time", "Public Departure Time", "Scheduled Pass" )] # Intermediate Station @@ -534,7 +571,7 @@ importMCA <- function(file, LI$rowID <- rowIds[types == "LI"] LI <- LI[, c("rowID", "Location", "Activity", "Scheduled Arrival Time", "Scheduled Departure Time", - "Public Arrival Time", "Public Departure Time" )] + "Public Arrival Time", "Public Departure Time", "Scheduled Pass" )] # Terminating Station @@ -556,9 +593,9 @@ importMCA <- function(file, # Add the rowid LT$rowID <- rowIds[types == "LT"] - LT[, c("Scheduled Departure Time","Public Departure Time") := ""] + LT[, c("Scheduled Departure Time","Public Departure Time", "Scheduled Pass") := ""] LT <- LT[, c("rowID", "Location", "Activity", "Scheduled Arrival Time", "Scheduled Departure Time", - "Public Arrival Time", "Public Departure Time" )] + "Public Arrival Time", "Public Departure Time", "Scheduled Pass" )] # TIPLOC Insert diff --git a/tests/testthat/test_aa_unit.R b/tests/testthat/test_aa_unit.R index e002e6b..8bd114b 100644 --- a/tests/testthat/test_aa_unit.R +++ b/tests/testthat/test_aa_unit.R @@ -84,19 +84,49 @@ test_that("test splitBitmask performance", { +test_that("test process_times", { + testData = data.table( + `Scheduled Arrival Time` =c("", " ", "0000 ", "1234H", "5678 "), + `Scheduled Departure Time`=c("", " ", "0106 ", "2156H", "8765H"), + `Public Arrival Time` =c("", " ", "0135", "tjkl", "0000"), + `Public Departure Time` =c("", " ", "1234", "tgbi", "0000")) + OK = TRUE + { + res = process_times( testData, FALSE ) -test_that("test process_times", { + res = res[,c("Arrival Time","Departure Time")] - testData = data.table( - `Scheduled Arrival Time` =c("", " ", "0000 ", "1234H"), - `Scheduled Departure Time`=c("", " ", "0106 ", "2156H"), - `Public Arrival Time` =c("", " ", "0135", "tjkl" ), - `Public Departure Time` =c("", " ", "1234", "tgbi" )) + expectedResult = data.table( + `Arrival Time` =c("", " ", "013500", "tjkl", "567800"), + `Departure Time` =c("", " ", "123400", "tgbi", "876530")) - OK = TRUE + printDifferencesDf(expectedResult, res) + OK = OK & identical(expectedResult, res) + } + { + res = process_times( testData, TRUE ) + + res = res[,c("Arrival Time","Departure Time")] + + expectedResult = data.table( + `Arrival Time` =c("", " ", "000000", "123430", "567800" ), + `Departure Time` =c("", " ", "010600", "215630", "876530" )) + + printDifferencesDf(expectedResult, res) + OK = OK & identical(expectedResult, res) + } + + + testData = data.table( + `Scheduled Arrival Time` =c("", " ", " ", "1234H", " "), + `Scheduled Departure Time`=c("", " ", "0106 ", "2156H", " "), + `Public Arrival Time` =c("", " ", "0135", "tjkl", "0000"), + `Public Departure Time` =c("", " ", "1234", "tgbi", "0000"), + `Scheduled Pass` =c("", "1234 ", "0001 ", "1234H", "5678 ") + ) { res = process_times( testData, FALSE ) @@ -104,8 +134,8 @@ test_that("test process_times", { res = res[,c("Arrival Time","Departure Time")] expectedResult = data.table( - `Arrival Time` =c("", " ", "013500", "tjkl" ), - `Departure Time` =c("", " ", "123400", "tgbi" )) + `Arrival Time` =c("", " ", "013500", "tjkl", "567800"), + `Departure Time` =c("", " ", "123400", "tgbi", "567800")) printDifferencesDf(expectedResult, res) OK = OK & identical(expectedResult, res) @@ -116,13 +146,14 @@ test_that("test process_times", { res = res[,c("Arrival Time","Departure Time")] expectedResult = data.table( - `Arrival Time` =c("", " ", "000000", "123430" ), - `Departure Time` =c("", " ", "010600", "215630" )) + `Arrival Time` =c("", "123400", "000100", "123430", "567800" ), + `Departure Time` =c("", "123400", "010600", "215630", "567800" )) printDifferencesDf(expectedResult, res) OK = OK & identical(expectedResult, res) } + expect_true( OK ) }) @@ -665,7 +696,7 @@ test_that("test process_activity:1", { res = process_activity( testData, FALSE ) - expectedResult = data.table( Activity=c("TB,T,D,U,R,TF", "ab,cd,ef,gh,ij,kl", "a,d,ef,ij,kl","cd,ef,gh,ij" )) + expectedResult = data.table( Activity=c("", "TB,T,D,U,R,TF", "ab,cd,ef,gh,ij,kl", "a,d,ef,ij,kl","cd,ef,gh,ij" )) OK = OK & identical(expectedResult,res) }