diff --git a/R/entitylist_detail.R b/R/entitylist_detail.R index b272b320..78a770ee 100644 --- a/R/entitylist_detail.R +++ b/R/entitylist_detail.R @@ -10,8 +10,6 @@ #' This function is supported from ODK Central v2022.3 and will warn if the #' given odkc_version is lower. #' -#' `r lifecycle::badge("maturing")` -#' #' @template param-pid #' @template param-did #' @template param-url diff --git a/R/entitylist_download.R b/R/entitylist_download.R index 18261cfc..f1b4da61 100644 --- a/R/entitylist_download.R +++ b/R/entitylist_download.R @@ -2,8 +2,9 @@ #' #' `r lifecycle::badge("maturing")` #' -#' The downloaded file is named "entities.csv". The download location defaults -#' to the current workdir, but can be modified to a folder name. +#' The downloaded CSV file is named after the entity list name. +#' The download location defaults to the current workdir, but can be modified +#' to a different folder path which will be created if it doesn't exist. #' #' An Entity List is a named collection of Entities that have the same #' properties. @@ -19,36 +20,63 @@ #' If any Property for an given Entity is blank (e.g. it was not captured by #' that Form or was left blank), that field of the CSV is blank. #' -#' The `$filter` querystring parameter can be used to filter on system-level -#' properties, similar to how filtering in the OData Dataset (Entity List) -#' Service works. +#' The ODK Central `$filter` querystring parameter can be used to filter on +#' system-level properties, similar to how filtering in the OData Dataset +#' (Entity List) Service works. +#' Of the [OData filter specs](https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part1-protocol.html#_Toc31358948) +#' ODK Central implements a [growing set of features +#' ](https://docs.getodk.org/central-api-odata-endpoints/#data-document). +#' `ruODK` provides the parameter `filter` (str) which, if set, will be passed +#' on to the ODK Central endpoint as is. #' -#' This endpoint supports `ETag` header, which can be used to avoid downloading -#' the same content more than once. When an API consumer calls this endpoint, -#' the endpoint returns a value in the `ETag` header. -#' If you pass that value in the `If-None-Match` header of a subsequent request, +#' The ODK Central endpoint supports the [`ETag` header +#' ](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag), which can +#' be used to avoid downloading the same content more than once. +#' When an API consumer calls this endpoint, the endpoint returns a value in +#' the `ETag` header. +#' If you pass that value in the [`If-None-Match` header +#' ](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) +#' of a subsequent request, #' then if the Entity List has not been changed since the previous request, #' you will receive 304 Not Modified response; otherwise you'll get the new #' data. -#' -#' `r lifecycle::badge("maturing")` +#' `ruODK` provides the parameter `etag` which can be set from the output of +#' a previous call to `entitylist_download()`. `ruODK` strips the `W/\"` and +#' `\"` from the returned etag and expects the stripped etag as parameter. #' #' @template param-pid #' @template param-did #' @template param-url #' @template param-auth #' @param local_dir The local folder to save the downloaded files to, -#' default: \code{here::here}. -#' @param overwrite Whether to overwrite previously downloaded zip files, +#' default: \code{here::here}. +#' If the folder does not exist it will be created. +#' @param etag (str) The etag value from a previous call to +#' `entitylist_download()`. The value must be stripped of the `W/\"` and `\"`, +#' which is the format of the etag returned by `entitylist_download()`. +#' If provided, only new entities will be returned. +#' If the same `local_dir` is chosen and `overwrite` is set to `TRUE`, +#' the downloaded CSV will also be overwritte, losing the Entities downloaded +#' earlier. +#' Default: NULL (no filtering, all entities returned). +#' @param filter (str) A valid filter string. +#' Default: NULL (no filtering, all entities returned). +#' @param overwrite Whether to overwrite previously downloaded file, #' default: FALSE #' @template param-retries #' @template param-odkcv #' @template param-orders #' @template param-tz #' @template param-verbose -#' @return The path to the downloaded CSV. +#' @return A list of four items: +#' - entities (tbl_df) The Entity List as tibble +#' - http_status (int) The HTTP status code of the response. +#' 200 if OK, 304 if a given etag finds no new entities created. +#' - etag (str) The ETag to use in subsequent calls to `entitylist_download()` +#' - downloaded_to (fs_path) The path to the downloaded CSV file +#' - downloaded_on (POSIXct) The time of download in the local timezome # nolint start -#' @seealso \url{ https://docs.getodk.org/central-api-dataset-management/#datasets} +#' @seealso \url{https://docs.getodk.org/central-api-dataset-management/#datasets} # nolint end #' @family entity-management #' @export @@ -59,26 +87,47 @@ #' #' ds <- entitylist_list(pid = get_default_pid()) #' ds1 <- entitylist_download(pid = get_default_pid(), did = ds$name[1]) +#' # ds1$entities +#' # ds1$etag +#' # ds1$downloaded_to +#' # ds1$downloaded_on +#' +#' ds2 <- entitylist_download( +#' pid = get_default_pid(), +#' did = ds$name[1], +#' etag = ds1$etag +#' ) +#' # ds2$http_status == 304 +#' +#' newest_entity_date <- as.Date(max(ds1$entities$`__createdAt`)) +#' ds3 <- entitylist_download( +#' pid = get_default_pid(), +#' did = ds$name[1], +#' filter = glue::glue("__createdAt le {newest_entity_date}") +#' ) #' } entitylist_download <- function(pid = get_default_pid(), - did = NULL, - url = get_default_url(), - un = get_default_un(), - pw = get_default_pw(), - local_dir = here::here(), - overwrite = TRUE, - retries = get_retries(), - odkc_version = get_default_odkc_version(), - orders = c( - "YmdHMS", - "YmdHMSz", - "Ymd HMS", - "Ymd HMSz", - "Ymd", - "ymd" - ), - tz = get_default_tz(), - verbose = get_ru_verbose()) { + did = NULL, + url = get_default_url(), + un = get_default_un(), + pw = get_default_pw(), + local_dir = here::here(), + filter = NULL, + etag = NULL, + overwrite = TRUE, + retries = get_retries(), + odkc_version = get_default_odkc_version(), + orders = c( + "YmdHMS", + "YmdHMSz", + "Ymd HMS", + "Ymd HMSz", + "Ymd", + "ymd" + ), + tz = get_default_tz(), + verbose = get_ru_verbose()) { + # Gatecheck params yell_if_missing(url, un, pw, pid = pid) if (is.null(did)) { @@ -87,12 +136,20 @@ entitylist_download <- function(pid = get_default_pid(), ) } + # Gatecheck ODKC version if (odkc_version |> semver_lt("2022.3")) { ru_msg_warn("entitylist_download is supported from v2022.3") } - pth <- fs::path(local_dir, "entities.csv") + # Download file destination directory + if (!fs::dir_exists(local_dir)) { + fs::dir_create(local_dir) + } + + # Downloaded file path + pth <- fs::path(local_dir, glue::glue("{did}.csv")) + # Emit message if (fs::file_exists(pth)) { if (overwrite == TRUE) { "Overwriting previous entity list: \"{pth}\"" %>% @@ -102,8 +159,6 @@ entitylist_download <- function(pid = get_default_pid(), "Keeping previous entity list: \"{pth}\"" %>% glue::glue() %>% ru_msg_success(verbose = verbose) - - return(pth) } } else { "Downloading entity list \"{did}\" to {pth}" %>% @@ -111,27 +166,48 @@ entitylist_download <- function(pid = get_default_pid(), ru_msg_success(verbose = verbose) } + # Headers: accept CSV, set ETag if given + headers <- c(Accept = "text/csv; charset=utf-8") + if (!is.null(etag)) { + if (odkc_version |> semver_lt("2023.3")) { + ru_msg_warn("entitylist_download ETag is supported from v2023.3") + } + headers <- c(headers, c("If-None-Match" = etag)) + } + + # Query: filter + query <- NULL + if (!is.null(filter)) { + query <- list("$filter" = utils::URLencode(filter, reserved = TRUE)) + } - httr::RETRY( + res <- httr::RETRY( "GET", - httr::modify_url(url, - path = glue::glue( - "v1/projects/{pid}/datasets/", - "{URLencode(did, reserved = TRUE)}/entities.csv" - ) - ), - httr::add_headers( - "Accept" = "text/csv" + httr::modify_url( + url, + path = glue::glue( + "v1/projects/{pid}/datasets/", + "{utils::URLencode(did, reserved = TRUE)}/entities.csv" + ), + query = query ), + httr::add_headers(.headers = headers), httr::authenticate(un, pw), httr::write_disk(pth, overwrite = overwrite), times = retries - ) |> - yell_if_error(url, un, pw) |> - httr::content(encoding = "utf-8") + ) + # yell_if_error(url, un, pw) # allow HTTP 304 for no new submissions - pth + list( + entities = httr::content(res, encoding = "utf-8"), + etag = res$headers$etag |> + stringr::str_remove_all(stringr::fixed("W/\"")) |> + stringr::str_remove_all(stringr::fixed("\"")), + http_status = res$status_code, + downloaded_to = pth, + downloaded_on = isodt_to_local(res$date, orders = orders, tz = tz) + ) } -# usethis::use_test("entitylist_download") # nolint \ No newline at end of file +# usethis::use_test("entitylist_download") # nolint diff --git a/R/entitylist_list.R b/R/entitylist_list.R index f0dcbf67..66dc833a 100644 --- a/R/entitylist_list.R +++ b/R/entitylist_list.R @@ -17,8 +17,6 @@ #' This function is supported from ODK Central v2022.3 and will warn if the #' given odkc_version is lower. #' -#' `r lifecycle::badge("maturing")` -#' #' @template param-pid #' @template param-url #' @template param-auth diff --git a/R/submission_export.R b/R/submission_export.R index 8c3ca36e..038a2d2e 100644 --- a/R/submission_export.R +++ b/R/submission_export.R @@ -127,6 +127,10 @@ submission_export <- function(local_dir = here::here(), "{URLencode(fid, reserved = TRUE)}/submissions{url_ext}" ) + if (!fs::dir_exists(local_dir)) { + fs::dir_create(local_dir) + } + pth <- fs::path( local_dir, glue::glue("{URLencode(fid, reserved = TRUE)}{file_ext}") diff --git a/data-raw/make_release.R b/data-raw/make_release.R index 1c108ac5..59ef39f9 100644 --- a/data-raw/make_release.R +++ b/data-raw/make_release.R @@ -48,7 +48,7 @@ spelling::spell_check_files("README.Rmd", lang = "en-AU") spelling::update_wordlist() codemetar::write_codemeta("../ruODK", write_minimeta = TRUE) if (fs::file_info("README.md")$modification_time < - fs::file_info("README.Rmd")$modification_time) { + fs::file_info("README.Rmd")$modification_time) { rmarkdown::render("README.Rmd", encoding = "UTF-8", clean = TRUE) if (fs::file_exists("README.html")) fs::file_delete("README.html") } diff --git a/man/entitylist_detail.Rd b/man/entitylist_detail.Rd index 7a74df1e..b863bb6c 100644 --- a/man/entitylist_detail.Rd +++ b/man/entitylist_detail.Rd @@ -90,8 +90,6 @@ This will make it available to clients as an automatically-updating CSV. This function is supported from ODK Central v2022.3 and will warn if the given odkc_version is lower. - -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \examples{ \dontrun{ diff --git a/man/entitylist_download.Rd b/man/entitylist_download.Rd index 5345befb..75b43fa9 100644 --- a/man/entitylist_download.Rd +++ b/man/entitylist_download.Rd @@ -11,6 +11,8 @@ entitylist_download( un = get_default_un(), pw = get_default_pw(), local_dir = here::here(), + filter = NULL, + etag = NULL, overwrite = TRUE, retries = get_retries(), odkc_version = get_default_odkc_version(), @@ -49,9 +51,22 @@ Set default \code{pw} through \code{ru_setup(pw="...")}. See \code{vignette("Setup", package = "ruODK")}.} \item{local_dir}{The local folder to save the downloaded files to, -default: \code{here::here}.} - -\item{overwrite}{Whether to overwrite previously downloaded zip files, +default: \code{here::here}. +If the folder does not exist it will be created.} + +\item{filter}{(str) A valid filter string. +Default: NULL (no filtering, all entities returned).} + +\item{etag}{(str) The etag value from a previous call to +\code{entitylist_download()}. The value must be stripped of the \verb{W/\\"} and \verb{\\"}, +which is the format of the etag returned by \code{entitylist_download()}. +If provided, only new entities will be returned. +If the same \code{local_dir} is chosen and \code{overwrite} is set to \code{TRUE}, +the downloaded CSV will also be overwritte, losing the Entities downloaded +earlier. +Default: NULL (no filtering, all entities returned).} + +\item{overwrite}{Whether to overwrite previously downloaded file, default: FALSE} \item{retries}{The number of attempts to retrieve a web resource. @@ -89,14 +104,23 @@ Read \code{vignette("setup", package = "ruODK")} to learn how \code{ruODK}'s verbosity can be set globally or per function.} } \value{ -The path to the downloaded CSV. +A list of four items: +\itemize{ +\item entities (tbl_df) The Entity List as tibble +\item http_status (int) The HTTP status code of the response. +200 if OK, 304 if a given etag finds no new entities created. +\item etag (str) The ETag to use in subsequent calls to \code{entitylist_download()} +\item downloaded_to (fs_path) The path to the downloaded CSV file +\item downloaded_on (POSIXct) The time of download in the local timezome +} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \details{ -The downloaded file is named "entities.csv". The download location defaults -to the current workdir, but can be modified to a folder name. +The downloaded CSV file is named after the entity list name. +The download location defaults to the current workdir, but can be modified +to a different folder path which will be created if it doesn't exist. An Entity List is a named collection of Entities that have the same properties. @@ -112,19 +136,26 @@ Dataset (Entity List )/Entity Properties themselves. If any Property for an given Entity is blank (e.g. it was not captured by that Form or was left blank), that field of the CSV is blank. -The \verb{$filter} querystring parameter can be used to filter on system-level -properties, similar to how filtering in the OData Dataset (Entity List) -Service works. - -This endpoint supports \code{ETag} header, which can be used to avoid downloading -the same content more than once. When an API consumer calls this endpoint, -the endpoint returns a value in the \code{ETag} header. -If you pass that value in the \code{If-None-Match} header of a subsequent request, +The ODK Central \verb{$filter} querystring parameter can be used to filter on +system-level properties, similar to how filtering in the OData Dataset +(Entity List) Service works. +Of the \href{https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part1-protocol.html#_Toc31358948}{OData filter specs} +ODK Central implements a \href{https://docs.getodk.org/central-api-odata-endpoints/#data-document}{growing set of features }. +\code{ruODK} provides the parameter \code{filter} (str) which, if set, will be passed +on to the ODK Central endpoint as is. + +The ODK Central endpoint supports the \href{https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag}{\code{ETag} header }, which can +be used to avoid downloading the same content more than once. +When an API consumer calls this endpoint, the endpoint returns a value in +the \code{ETag} header. +If you pass that value in the \href{https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match}{\code{If-None-Match} header } +of a subsequent request, then if the Entity List has not been changed since the previous request, you will receive 304 Not Modified response; otherwise you'll get the new data. - -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} +\code{ruODK} provides the parameter \code{etag} which can be set from the output of +a previous call to \code{entitylist_download()}. \code{ruODK} strips the \verb{W/\\"} and +\verb{\\"} from the returned etag and expects the stripped etag as parameter. } \examples{ \dontrun{ @@ -133,10 +164,28 @@ data. ds <- entitylist_list(pid = get_default_pid()) ds1 <- entitylist_download(pid = get_default_pid(), did = ds$name[1]) +# ds1$entities +# ds1$etag +# ds1$downloaded_to +# ds1$downloaded_on + +ds2 <- entitylist_download( + pid = get_default_pid(), + did = ds$name[1], + etag = ds1$etag +) +# ds2$http_status == 304 + +newest_entity_date <- as.Date(max(ds1$entities$`__createdAt`)) +ds3 <- entitylist_download( + pid = get_default_pid(), + did = ds$name[1], + filter = glue::glue("__createdAt le {newest_entity_date}") +) } } \seealso{ -\url{ https://docs.getodk.org/central-api-dataset-management/#datasets} +\url{https://docs.getodk.org/central-api-dataset-management/#datasets} Other entity-management: \code{\link{entitylist_detail}()}, diff --git a/man/entitylist_list.Rd b/man/entitylist_list.Rd index 17681b24..560d5a10 100644 --- a/man/entitylist_list.Rd +++ b/man/entitylist_list.Rd @@ -94,8 +94,6 @@ its relation to Entities better than the term Dataset. This function is supported from ODK Central v2022.3 and will warn if the given odkc_version is lower. - -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \examples{ \dontrun{ diff --git a/tests/testthat/test-entitylist_download.R b/tests/testthat/test-entitylist_download.R index c8eb65e4..e73c2a53 100644 --- a/tests/testthat/test-entitylist_download.R +++ b/tests/testthat/test-entitylist_download.R @@ -3,7 +3,7 @@ test_that("entitylist_download works", { message = "Test server not configured" ) - skip_on_ci() + # skip_on_ci() tempd <- tempdir() fs::dir_ls(tempd) %>% fs::file_delete() @@ -16,29 +16,119 @@ test_that("entitylist_download works", { odkc_version = get_test_odkc_version() ) - ds <- entitylist_list() ds1 <- entitylist_download(did = ds$name[1], local_dir = tempd) - # The file was downloaded - testthat::expect_true( - fs::file_exists(ds1) - ) + # Format: + # list( + # entities = httr::content(res, encoding = "utf-8"), + # etag = res$headers$etag, + # downloaded_to = pth, + # downloaded_on = isodt_to_local(res$date, orders = orders, tz = tz) + # ) + + # The Entity List is also returned as a tibble + testthat::expect_s3_class(ds1$entities, "tbl_df") + + # An ETag was returned + testthat::expect_is(ds1$etag, "character") + + # The CSV file was downloaded + testthat::expect_true(fs::file_exists(ds1$downloaded_to)) - # Download to same location, do not overwrite: emit message, return file path - testthat::expect_message( - ds2 <- entitylist_download( + # The timestamp is included + testthat::expect_s3_class(ds1$downloaded_on, "POSIXct") + + # Download to same location, do not overwrite: error + # Error: Path exists and overwrite is FALSE + testthat::expect_error( + entitylist_download( did = ds$name[1], local_dir = tempd, overwrite = FALSE ) ) + ds2 <- entitylist_download( + did = ds$name[1], + local_dir = tempd, + overwrite = TRUE + ) + # The returned file path is the same as from the first download - testthat::expect_equal(ds1, ds2) + testthat::expect_equal(ds1$downloaded_to, ds2$downloaded_to) +}) + + +test_that("entitylist_download etag works", { + skip_if(Sys.getenv("ODKC_TEST_URL") == "", + message = "Test server not configured" + ) + + # skip_on_ci() + + tempd <- tempdir() + fs::dir_ls(tempd) %>% fs::file_delete() + + ru_setup( + pid = get_test_pid(), + url = get_test_url(), + un = get_test_un(), + pw = get_test_pw(), + odkc_version = get_test_odkc_version() + ) + + ds <- entitylist_list() + + ds1 <- entitylist_download(did = ds$name[1], local_dir = tempd) + + # Download only entities added since last download (ds1) = None + ds2 <- entitylist_download( + did = ds$name[1], + local_dir = tempd, + overwrite = TRUE, + etag = ds1$etag + ) + + testthat::expect_equal(ds2$http_status, 304) + testthat::expect_equal(ds2$entities, NULL) }) +test_that("entitylist_download filter works", { + skip_if(Sys.getenv("ODKC_TEST_URL") == "", + message = "Test server not configured" + ) + + # skip_on_ci() + + tempd <- tempdir() + fs::dir_ls(tempd) %>% fs::file_delete() + + ru_setup( + pid = get_test_pid(), + url = get_test_url(), + un = get_test_un(), + pw = get_test_pw(), + odkc_version = get_test_odkc_version() + ) + + ds <- entitylist_list() + + ds1 <- entitylist_download(did = ds$name[1], local_dir = tempd) + + newest_entity_date <- as.Date(max(ds1$entities$`__createdAt`)) + + # Should return all entities (created before or on date of latest entity) + # Currently returns HTTP 501 not implemented + # ds2 <- entitylist_download( + # did = ds$name[1], + # filter=glue::glue("__createdAt le {newest_entity_date}") + # ) + + # testthat::expect_equal(ds2$http_status, 200) + # testthat::expect_true(nrow(ds2$entities)) +}) test_that("entitylist_download warns if did is missing", { testthat::expect_error( @@ -46,5 +136,28 @@ test_that("entitylist_download warns if did is missing", { ) }) +test_that("entitylist_download warns if odkc_version too low", { + tempd <- tempdir() + fs::dir_ls(tempd) %>% fs::file_delete() + + ru_setup( + pid = get_test_pid(), + url = get_test_url(), + un = get_test_un(), + pw = get_test_pw(), + odkc_version = get_test_odkc_version() + ) + + ds <- entitylist_list() + + testthat::expect_warning( + entitylist_download( + did = ds$name[1], + local_dir = tempd, + odkc_version = "1.5.3" + ) + ) +}) + # usethis::use_r("entitylist_download") # nolint diff --git a/tic.R b/tic.R deleted file mode 100644 index f48abca6..00000000 --- a/tic.R +++ /dev/null @@ -1,47 +0,0 @@ -# installs dependencies, runs R CMD check, runs covr::codecov() -do_package_checks( - error_on="error", - args = c( - "--no-manual", - "--as-cran", - "--no-vignettes", - "--no-build-vignettes", - "--no-multiarch" - ) -) - -# failed: ‘terra’, ‘sf’, ‘raster’, ‘leafpop’, ‘leaflet’, ‘satellite’, ‘leafem’ -if(ci_get_env("matrix.config.os") == "macOS-latest"){ - get_stage("install") %>% - add_step(step_install_cran("proj4")) %>% - add_step(step_install_github("r-spatial/sf", dependencies = TRUE, force = TRUE)) %>% - add_step(step_install_cran("raster")) %>% - add_step(step_install_cran("leaflet")) %>% - add_step(step_install_cran("leafpop")) %>% - add_step(step_install_github("r-spatial/leafem", dependencies = TRUE)) %>% - add_step(step_install_cran("terra")) -} - -if(ci_get_env("matrix.config.os") == "ubuntu-20.04"){ - get_stage("install") %>% - # add_step(step_install_github(c("tidyverse/readr"))) %>% - # https://stackoverflow.com/q/61875754/2813717 - install proj4 - add_step(step_install_cran("proj4")) %>% - # sf install fixed by cpp11 - add_step(step_install_github("r-lib/cpp11", dependencies = TRUE)) %>% - add_step(step_install_github("r-spatial/sf", dependencies = TRUE, force = TRUE)) %>% - add_step(step_install_github("r-spatial/mapview", dependencies = TRUE)) %>% - add_step(step_install_github("r-spatial/leafem", dependencies = TRUE)) %>% - # add_step(step_install_cran("listviewer")) %>% - # libicui8n not found: fixed by stringi forced install - add_step(step_install_github("gagolews/stringi", dependencies = TRUE, force = TRUE)) -} - -# # rOpenSci build their own docs, see build at -# # https://dev.ropensci.org/job/ruODK/lastBuild/console -# -# if (ci_on_ghactions() && ci_has_env("BUILD_PKGDOWN")) { -# # creates pkgdown site and pushes to gh-pages branch -# # only for the runner with the "BUILD_PKGDOWN" env var set -# do_pkgdown() -# }