Skip to content

Commit

Permalink
Merge 289ff87 into 2dba87e
Browse files Browse the repository at this point in the history
  • Loading branch information
mhallal1 authored Nov 10, 2022
2 parents 2dba87e + 289ff87 commit 0e02c7a
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 31 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

* Updated examples to use `scda.2022`.
* Added R session information into a link in the footer of `teal` applications.
* Added data hashing step using `rlang` instead of `digest` package to calculate the hash (which has been moved from `teal.data` and `teal.slice`). There is now an explicit hashing check in the reproducible code output

# teal 0.12.0

Expand Down
39 changes: 14 additions & 25 deletions R/get_rcode.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ get_rcode <- function(datasets = NULL,
)
str_install <- paste(c(get_rcode_str_install(), ""), collapse = "\n")
str_libs <- paste(get_rcode_libraries(), "\n")
str_code <- get_datasets_code(datanames, datasets)

hashes <- calculate_hashes(datanames, datasets)
str_code <- c(get_datasets_code(datanames, datasets, hashes), teal.slice::get_filter_expr(datasets, datanames))
} else {
str_header <- get_rcode_header(title = title, description = description)
str_install <- character(0)
Expand Down Expand Up @@ -141,30 +143,21 @@ get_rcode <- function(datasets = NULL,
#' - hash of loaded objects
#' - filter panel code
#' @keywords internal
get_datasets_code <- function(datanames, datasets) {
get_datasets_code <- function(datanames, datasets, hashes) {
str_code <- datasets$get_code(datanames)
if (length(str_code) == 0 || (length(str_code) == 1 && str_code == "")) {
str_code <- paste0(c(
"#################################################################",
"# ___ ____ ____ ___ ____ ____ ____ ____ ____ ____ _ _ _ ____ #",
"# |__] |__/ |___ |__] |__/ | | | |___ [__ [__ | |\\ | | __ #",
"# | | \\ |___ | | \\ |__| |___ |___ ___] ___] | | \\| |__] #",
"# _ ____ ____ _ _ ___ ___ _ _ #",
"# | [__ |___ |\\/| |__] | \\_/ #",
"# | ___] |___ | | | | | #",
"#################################################################\n"
), collapse = "\n")
str_code <- "message('Preprocessing is empty')"
} else if (length(str_code) > 0) {
str_code <- paste0(str_code, "\n\n")
}
if (!datasets$get_check()) {
check_note_string <- paste0(
c(
"## NOTE: Reproducibility of data import and preprocessing was not",
"## explicitly checked (argument \"check = FALSE\" is set).",
"## The app developer has the choice to check the reproducibility",
"## and might have omitted this step for some reason. Please reach",
"## out to the app developer for details.\n"
"warning(paste(\"Reproducibility of data import and preprocessing was not\",",
" \"explicitly checked (argument 'check = FALSE' is set).\",",
" \"The app developer has the choice to check the reproducibility\",",
" \"and might have omitted this step for some reason. Please reach\",",
" \"out to the app developer for details.\n\"))"
),
collapse = "\n"
)
Expand All @@ -177,9 +170,9 @@ get_datasets_code <- function(datanames, datasets) {
datanames,
function(dataname) {
sprintf(
"# %s MD5 hash at the time of analysis: %s",
dataname,
datasets$get_filtered_dataset(dataname)$get_hash()
"stopifnot(%s == %s)",
deparse1(bquote(rlang::hash(.(as.name(dataname))))),
deparse1(hashes[[dataname]])
)
},
character(1)
Expand All @@ -189,11 +182,7 @@ get_datasets_code <- function(datanames, datasets) {
"\n\n"
)

str_filter <- teal.slice::get_filter_expr(datasets, datanames)
if (str_filter != "") {
str_filter <- paste0(str_filter, "\n\n")
}
c(str_code, str_hash, str_filter)
c(str_code, str_hash)
}

## Module ----
Expand Down
22 changes: 21 additions & 1 deletion R/module_nested_tabs.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ srv_nested_tabs.teal_modules <- function(id, datasets, modules, reporter) {
"module { deparse1(modules$label) }."
)
)

modules_reactive <- sapply(names(modules$children), USE.NAMES = TRUE, function(id) {
srv_nested_tabs(id = id, datasets = datasets, modules = modules$children[[id]], reporter = reporter)
})
Expand Down Expand Up @@ -255,13 +256,32 @@ srv_nested_tabs.teal_module <- function(id, datasets, modules, reporter) {
}
)

hashes <- calculate_hashes(datanames, datasets)
metadata <- lapply(datanames, datasets$get_metadata)
names(metadata) <- datanames

new_tdata(
data,
reactive(get_datasets_code(datanames, datasets)),
reactive(c(get_datasets_code(datanames, datasets, hashes), teal.slice::get_filter_expr(datasets, datanames))),
datasets$get_join_keys(),
metadata
)
}

#' Get the hash of a dataset
#'
#' @param datanames (`character`) names of datasets
#' @param datasets (`FilteredData`) object holding the data
#'
#' @return A list of hashes per dataset
#' @keywords internal
#'
calculate_hashes <- function(datanames, datasets) {
sapply(
datanames,
simplify = FALSE,
function(x) {
rlang::hash(datasets$get_data(x, filtered = FALSE))
}
)
}
20 changes: 20 additions & 0 deletions man/calculate_hashes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/get_datasets_code.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions tests/testthat/test-get_rcode.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,16 @@ testthat::test_that("get_datasets_code returns code only for specified datanames
)
)

hashes <- calculate_hashes(datasets$datanames(), datasets)
testthat::expect_true(
!grepl(
"mtcars",
paste(get_datasets_code(datasets = datasets, dataname = "IRIS"), collapse = "\n"),
paste(get_datasets_code(datasets = datasets, dataname = "IRIS", hashes = hashes), collapse = "\n"),
ignore.case = TRUE
) &&
grepl(
"iris",
paste(get_datasets_code(datasets = datasets, dataname = "IRIS"), collapse = "\n"),
paste(get_datasets_code(datasets = datasets, dataname = "IRIS", hashes = hashes), collapse = "\n"),
ignore.case = TRUE
)
)
Expand Down
73 changes: 71 additions & 2 deletions tests/testthat/test-module_nested_tabs.R
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,15 @@ testthat::test_that(".datasets_to_data returns tdata object", {

# code
testthat::expect_equal(
isolate(get_code(data)[1]),
"d1 <- data.frame(id = 1:5, pk = c(2, 3, 2, 1, 4), val = 1:5)\nd2 <- data.frame(id = 1:5, value = 1:5)\n\n"
isolate(get_code(data)),
c(
"d1 <- data.frame(id = 1:5, pk = c(2, 3, 2, 1, 4), val = 1:5)\nd2 <- data.frame(id = 1:5, value = 1:5)\n\n",
paste0(
"stopifnot(rlang::hash(d1) == \"f6f90d2c133ca4abdeb2f7a7d85b731e\")\n",
"stopifnot(rlang::hash(d2) == \"6e30be195b7d914a1311672c3ebf4e4f\") \n\n"
),
""
)
)

# metadata
Expand Down Expand Up @@ -343,3 +350,65 @@ testthat::test_that(".datasets_to_data returns parent datasets for CDISC data",
data <- .datasets_to_data(module, datasets)
testthat::expect_setequal(isolate(names(data)), c("ADSL", "ADAE"))
})

testthat::test_that("calculate_hashes takes a FilteredData and vector of datanames as input", {
adsl <- data.frame(STUDYID = 1, USUBJID = 1)
adae <- data.frame(STUDYID = 1, USUBJID = 1, ASTDTM = 1, AETERM = 1, AESEQ = 1)
adtte <- data.frame(STUDYID = 1, USUBJID = 1, PARAMCD = 1)

datasets <- teal.slice::init_filtered_data(
teal.data::cdisc_data(
teal.data::cdisc_dataset("ADSL", adsl),
teal.data::cdisc_dataset("ADAE", adae),
teal.data::cdisc_dataset("ADTTE", adtte)
)
)

testthat::expect_error(calculate_hashes(datanames = c("ADSL", "ADAE", "ADTTE"), datasets = datasets), NA)
})

testthat::test_that("calculate_hashes returns a named list", {
adsl <- data.frame(STUDYID = 1, USUBJID = 1)
adae <- data.frame(STUDYID = 1, USUBJID = 1, ASTDTM = 1, AETERM = 1, AESEQ = 1)
adtte <- data.frame(STUDYID = 1, USUBJID = 1, PARAMCD = 1)

datasets <- teal.slice::init_filtered_data(
teal.data::cdisc_data(
teal.data::cdisc_dataset("ADSL", adsl),
teal.data::cdisc_dataset("ADAE", adae),
teal.data::cdisc_dataset("ADTTE", adtte)
)
)

hashes <- calculate_hashes(datanames = c("ADSL", "ADAE", "ADTTE"), datasets = datasets)
testthat::expect_identical(
hashes,
list(
"ADSL" = "e89f5271357822c78dd5cfddb60c0a95",
"ADAE" = "f71b576ecfd23075f7285841327515e0",
"ADTTE" = "c68c01c86b946a3dfe05150da040aa2a"
)
)
testthat::expect_is(hashes, "list")
testthat::expect_named(hashes)
})

testthat::test_that("calculate_hashes returns the hash of the non Filtered dataset", {
datasets <- teal.slice::init_filtered_data(
teal.data::teal_data(
teal.data::dataset("iris", iris)
)
)

fs <- list(
iris = list(
Sepal.Length = list(c(5.1, 6.4)),
Species = c("setosa", "versicolor")
)
)
datasets$set_filter_state(state = fs)

hashes <- calculate_hashes(datanames = c("iris"), datasets = datasets)
testthat::expect_identical(hashes, list("iris" = "34844aba7bde36f5a34f6d8e39803508"))
testthat::expect_false(hashes == rlang::hash(isolate(datasets$get_data("iris", filtered = TRUE))))
})

0 comments on commit 0e02c7a

Please sign in to comment.