Skip to content

Commit

Permalink
Merge pull request #579 from adokter/download_vpts_aloft
Browse files Browse the repository at this point in the history
list_vpts_aloft()
  • Loading branch information
iskandari authored May 25, 2023
2 parents a390c38 + dce67e4 commit 5261641
Show file tree
Hide file tree
Showing 11 changed files with 536 additions and 14 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: bioRad
Title: Biological Analysis and Visualization of Weather Radar Data
Version: 0.7.0.9581
Version: 0.7.0.9579
Description: Extract, visualize and summarize aerial movements of birds and
insects from weather radar data. See <doi:10.1111/ecog.04028>
for a software paper describing package and methodologies.
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ export(is.vp)
export(is.vpfile)
export(is.vpi)
export(is.vpts)
export(list_vpts_aloft)
export(map)
export(nexrad_to_odim)
export(noy)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ bioRad 0.7 includes a major backend overhaul that deprecates the use of Docker.
* simplify installation, including automatic installation of rhdf5 from bioconductor (#464)
* new sep argument in read_vpts() (#536)
* allow odim files with missing source attribute. Extraction of radar indentifer from what/source attribute in read_pvolfiles updated to function as read_vpfiles(), i.e. using the NOD identifier in the source attribute, if missing try RAD, if also missing try WMO, if nothing found use "unknown" (2f6935c).
* new function `list_vpts_aloft()` produces a list of aloft archive urls for time series of vertical profiles (`vpts`). This list of urls can then be used to bulk download this data using any number of external tools. #553


# bioRad 0.6.1
Expand Down
187 changes: 187 additions & 0 deletions R/list_vpts_aloft.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#' List aloft urls for time series of vertical profiles (`vpts`) of radar
#' stations
#'
#' @param date_min Character, the first date to return urls for. In the shape of
#' YYYY-MM-DD.
#' @param date_max Character, the last date to return urls for. In the shape of
#' YYYY-MM-DD.
#' @param radars Character vector, radar stations to return urls for.
#' @param format Character, the format of archive urls to return, either csv or
#' hdf5. Currently only csv urls are supported.
#' @param source Character, either `baltrad` or `ecog-04003`
#' @param show_warnings Logical, whether to print warnings for dates or radar
#' stations for which no data was found.
#'
#' @return A character vector of aloft urls
#' @export
#'
#' @examples
#' list_vpts_aloft(radars = "bejab")
list_vpts_aloft <- function(date_min = NULL,
date_max = NULL,
radars = NULL,
format = "csv", # also hdf5
source = "baltrad", # also ecog-04003
show_warnings = TRUE) {
# Check if aws.s3 is installed
# NOTE added because aws.s3 is schedueled to be moved to Suggests

rlang::check_installed("aws.s3",
reason = "to connect to the aloft bucket on Amazon Web Services"
)

# check arguments against vocabulary --------------------------------------
# Check source
valid_sources <- c("baltrad", "ecog-04003")
assertthat::assert_that(
source %in% valid_sources,
msg = glue::glue(
"`source` must be one of: {valid_sources_collapse}.",
valid_sources_collapse = glue::glue_collapse(
glue::backtick(valid_sources), sep = ", "
)
)
)

# Check format
valid_formats <- c("csv", "hdf5")
assertthat::assert_that(
format %in% valid_formats,
msg = glue::glue(
"`format` must be one of: {valid_formats_collapse}.",
valid_formats_collapse = glue::glue_collapse(
glue::backtick(valid_formats), sep = ", "
)
)
)

# check radars
aloft_radars_url <-
paste(
sep = "/",
"https://raw.githubusercontent.com",
"enram",
"aloftdata.eu",
"main",
"_data",
"OPERA_RADARS_DB.json"
)
valid_radars <- readr::read_lines(aloft_radars_url) %>%
extract_string(pattern = '(?<="odimcode": ")[a-z]{5}', perl = TRUE)

assertthat::assert_that(
all(radars %in% valid_radars),
msg = glue::glue("Can't find radar(s): {missing_radars}",
missing_radars = radars[!radars %in% valid_radars]
)
)

# create file list --------------------------------------------------------
## handle dates -----------------------------------------------------------

# handle missing dates
if (rlang::is_empty(date_min)) {
# if date_min is missing, set it to a date predating any radar observations
date_min <- "1900-01-01"
}
if (rlang::is_empty(date_max)) {
date_max <- "9999-12-31"
}

# Convert to dates
start_date <- as.Date(date_min, tz = NULL)
end_date <- as.Date(date_max, tz = NULL)

## set static urls --------------------------------------------------------
# Set base URL
base_url <- "https://aloft.s3-eu-west-1.amazonaws.com"

# format csv --------------------------------------------------------------
if (format == "csv") {
# Aloft CSV data are available in daily and monthly files
# This function uses the zipped monthly files, which are faster to download
months <- format(seq(start_date, end_date, by = "months"), "%Y%m")

found_vpts_aloft <-
aws.s3::get_bucket_df(
bucket = "s3://aloft",
prefix = glue::glue("{source}/monthly"),
region = "eu-west-1",
max = Inf
) %>%
dplyr::mutate(
radar = vapply(.data$Key, FUN = function(radar_key) {
strsplit(radar_key, "/", fixed = TRUE)[[1]][3]
}, FUN.VALUE = character(1)),
date = extract_string(.data$Key, "[0-9]{6}")
) %>%
dplyr::filter(
.data$radar %in% radars,
date %in% months
)

# format hdf5 -------------------------------------------------------------
} else {
# hdf5 files
# TODO: create file paths of form
# https://aloft.s3-eu-west-1.amazonaws.com/baltrad/hdf5/bejab/2023/05/02/bejab_vp_20230502T000000Z_0x9.h5
}

# format found data -------------------------------------------------------
found_radars <-
dplyr::distinct(found_vpts_aloft, .data$radar) %>%
dplyr::pull("radar")

data_urls <-
glue::glue("{base_url}/{keys}",
keys = dplyr::pull(found_vpts_aloft, "Key"),
base_url = base_url
)

# warnings ----------------------------------------------------------------
## warn if no data found --------------------------------------------------
if (rlang::is_empty(data_urls) && show_warnings) {
warning(
glue::glue("No data found for radars between {date_min} - {date_max}")
)
# stop here, no need to warn for radars and dates individually
return(data_urls)
}
## warn missing radar stations --------------------------------------------
# Provide a warning if data couldn't be retrieved for all requested radar
# stations

all_radars_found <- all(found_radars == radars)
if (!all_radars_found && show_warnings) {
warning(
glue::glue(
"Found no data for radars: {missing_radars_collapse}",
missing_radars_collapse =
glue::glue_collapse(
glue::backtick(radars[!radars %in% found_radars]),
sep = ", "
)
)
)
}

## warn missing dates -----------------------------------------------------
# Warn if less dates were found then requested
if (!all(months %in% found_vpts_aloft$date) && show_warnings) {
warning(
glue::glue(
"Not every date has radar data, ",
"radars found for {first_date_found} to {last_date_found}",
first_date_found = format(lubridate::ym(min(
found_vpts_aloft$date
)), "%Y-%m"),
last_date_found = format(lubridate::ym(max(
found_vpts_aloft$date
)), "%Y-%m")
)
)
}

# output vector of urls ---------------------------------------------------
return(data_urls)
}
19 changes: 19 additions & 0 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,22 @@ skip_if_no_mistnet <- function(){
}
testthat::skip("No MistNet")
}


#' extract strings from a vector using regex, analog to stringr::str_extract
#'
#' @param string Input vector. A character vector.
#' @param pattern Regex pattern to look for
#' @param ... passed on to `regexpr()`
#'
#' @return A character vector with matches only, possibly of different length as
#' `string`
#' @keywords internal
extract_string <- function(string,pattern,...) {
regmatches(string,
m = regexpr(
pattern = pattern,
text = string,
...
))
}
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ reference:
- example_vp
- plot.vp
- as.data.frame.vp
- list_vpts_aloft
- title: "Combining vertical profiles into time series"
desc: "Functions to combine vertical profiles (vp) into time series (vpts) and read, inspect and plot these."
contents:
Expand Down
23 changes: 23 additions & 0 deletions man/extract_string.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions man/list_vpts_aloft.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5261641

Please sign in to comment.