diff --git a/.buildlibrary b/.buildlibrary index 08794d08e..c6c187108 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '608716' +ValidationKey: '785560' AutocreateReadme: yes allowLinterWarnings: no AddInReadme: tutorial.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5d2e4ca94..7a47c4148 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: mixed-line-ending - repo: https://github.com/lorenzwalthert/precommit - rev: v0.3.2.9019 + rev: v0.3.2.9021 hooks: - id: parsable-R - id: deps-in-desc diff --git a/CITATION.cff b/CITATION.cff index 58f7a0a38..7db1b9125 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,8 +2,8 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'piamInterfaces: Project specific interfaces to REMIND / MAgPIE' -version: 0.3.1 -date-released: '2023-10-06' +version: 0.4.0 +date-released: '2023-10-09' abstract: Project specific interfaces to REMIND / MAgPIE. authors: - family-names: Benke diff --git a/DESCRIPTION b/DESCRIPTION index 2387b7f43..e9f6e14f7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Type: Package Package: piamInterfaces Title: Project specific interfaces to REMIND / MAgPIE -Version: 0.3.1 -Date: 2023-10-06 +Version: 0.4.0 +Date: 2023-10-09 Authors@R: c( person("Falk", "Benke", , "benke@pik-potsdam.de", role = c("aut", "cre")), person("Oliver", "Richters", role = "aut") diff --git a/NAMESPACE b/NAMESPACE index 7e98ea3ac..695aab89e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,10 +31,12 @@ importFrom(data.table,fwrite) importFrom(data.table,setnames) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) +importFrom(dplyr,case_when) importFrom(dplyr,count) importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,filter) +importFrom(dplyr,first) importFrom(dplyr,group_by) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) @@ -50,7 +52,6 @@ importFrom(dplyr,summarise) importFrom(dplyr,summarize) importFrom(dplyr,ungroup) importFrom(gms,chooseFromList) -importFrom(gms,getLine) importFrom(grDevices,dev.off) importFrom(grDevices,pdf) importFrom(magclass,as.magpie) @@ -65,6 +66,7 @@ importFrom(quitte,as.quitte) importFrom(quitte,getModels) importFrom(quitte,getRegs) importFrom(quitte,getScenarios) +importFrom(quitte,quitteSort) importFrom(quitte,read.quitte) importFrom(quitte,write.IAMCxlsx) importFrom(quitte,write.mif) diff --git a/R/fixOnRef.R b/R/fixOnRef.R index 9cb49c48c..8d7b04496 100644 --- a/R/fixOnRef.R +++ b/R/fixOnRef.R @@ -2,94 +2,127 @@ #' #' @md #' @author Oliver Richters -#' @param mif data or path to mif file of single scenario -#' @param mifRef data or path to mif file of reference scenario -#' @param startyear first time step for each mif and mifRef are expected to differ -#' @param ret "boolean", "fixed" or "fails", depending on what you want to get -#' @param failfile csv file to which failing check are written to -#' @importFrom dplyr group_by summarise ungroup left_join mutate arrange %>% +#' @param data quitte object or mif file +#' @param refscen scenario name of reference scenario, or file or quitte object with reference data +#' @param startyear first time step for which scenarios and reference scenario are expected to differ +#' @param ret "boolean": just return TRUE/FALSE if check was successful +#' "fails": data frame with mismatches between scenario and reference data +#' "fixed": quitte object with data correctly fixed on reference data +#' @param failfile csv file to which mismatches are written to +#' @importFrom dplyr case_when first group_by summarise ungroup left_join mutate arrange %>% #' filter select desc n -#' @importFrom gms getLine -#' @importFrom quitte as.quitte +#' @importFrom quitte as.quitte quitteSort #' @importFrom tidyr pivot_wider #' @importFrom utils write.csv -#' +#' @return see parameter 'ret' #' @export - -fixOnRef <- function(mif, mifRef, startyear, ret = "boolean", failfile = NULL) { - scenario <- variable <- period <- value <- ref <- reldiff <- group <- NULL - mif <- droplevels(as.quitte(mif, na.rm = TRUE)) - mifRef <- droplevels(as.quitte(mifRef, na.rm = TRUE)) +fixOnRef <- function(data, refscen, startyear, ret = "boolean", failfile = NULL) { + scenario <- variable <- period <- value <- ref <- reldiff <- NULL + data <- droplevels(as.quitte(data, na.rm = TRUE)) startyear <- suppressWarnings(as.numeric(startyear)) + # check whether refscen is just the scenario name, or rather data + if (is.character(refscen) && ! file.exists(refscen) && all(refscen %in% levels(data$scenario))) { + refdata <- droplevels(filter(data, scenario == refscen)) + } else { + refdata <- droplevels(as.quitte(refscen, na.rm = TRUE)) + } + refscen <- levels(refdata$scenario) stopifnot( - `'mif' must contain data from one scenario only` = length(levels(mif$scenario)) == 1, - `'mifRef' must contain data from one scenario only` = length(levels(mifRef$scenario)) == 1, + `'refscen' must be a single scenario only` = length(refscen) == 1, `'startyear' must be a single numeric value` = (length(startyear) == 1 && ! is.na(startyear)), `'ret' must be 'boolean', 'fails' or 'fixed'` = (length(ret) == 1 && ret %in% c("boolean", "fails", "fixed")) ) - title <- levels(mif$scenario) - titleRef <- levels(mifRef$scenario) - - if (identical(levels(mif$scenario), levels(mifRef$scenario))) { - levels(mifRef$scenario) <- paste0(levels(mifRef$scenario), "_ref") + returnhelper <- function(ret, boolean, fails, fixed) { + messages <- c(boolean = "Returning a boolean.", + fails = "Returning failing data.", + fixed = "Returning fixed data.") + message(messages[[ret]]) + if (ret == "boolean") return(boolean) + if (ret == "fails") return(fails) + if (ret == "fixed") return(fixed) } - falsepositives <- grep("Moving Avg$", levels(mif$variable), value = TRUE) - - message("Comparing ", title, " with reference run ", titleRef, " for t < ", startyear) - - if (startyear <= min(mif$period)) { - message("No data before startyear found, so no fixing happened") - return(if (ret == "fails") NULL else if (ret == "fixed") mif else TRUE) + message("Comparing with reference run ", refscen, " for t < ", startyear) + if (startyear <= min(data$period)) { + message("No data before startyear found, so no fixing happened.") + return(returnhelper(ret, boolean = TRUE, fails = NULL, fixed = data)) } - comp <- rbind(mutate(mif, scenario = "value"), mutate(mifRef, scenario = "ref")) %>% + + # define false-positives that necessarily differ + falsepositives <- c(grep("Moving Avg$", levels(data$variable), value = TRUE), + grep("Interest Rate (t+1)/(t-1)", levels(data$variable), value = TRUE, fixed = TRUE) + ) + # prepare reference data to left_join it to data + refcomp <- refdata %>% + select(-scenario) %>% + rename(ref = value) %>% + droplevels() + # left_join data with reference run and select everything with bigger differences + comp <- data %>% + left_join(refcomp, by = c("model", "region", "variable", "unit", "period")) %>% filter(! variable %in% falsepositives, period < startyear) %>% - arrange(variable) %>% - pivot_wider(names_from = scenario) %>% mutate(reldiff = abs(value - ref) / pmax(1E-14, abs(value), abs(ref), na.rm = TRUE)) %>% filter(abs(reldiff) > 1E-14) %>% - mutate(scenario = factor(title)) %>% droplevels() if (nrow(comp) == 0) { - message("# Run is perfectly fixed on reference run!") - return(if (ret == "fails") NULL else if (ret == "fixed") mif else TRUE) - } - mismatches <- comp %>% - mutate(model = NULL, scenario = NULL, variable = factor(removePlus(variable))) %>% - arrange(variable) %>% - summarise(period = paste(sort(unique(period)), collapse = ","), - reldiff = max(reldiff), - .by = variable) %>% - mutate(group = factor(gsub("(\\|.*?)\\|.*$", "\\1", variable))) %>% - # mutate(group = factor(gsub("\\|.*", "", variable))) %>% # to group more coarsely - summarise(variable = if (length(unique(variable)) == 1) unique(variable) else unique(group), - variables = n(), - period = paste(sort(unique(strsplit(period, ",")[[1]])), collapse = ", "), - reldiff = max(reldiff), - .by = group) %>% - mutate(reldiff = niceround(reldiff), group = variable, variable = NULL) %>% - droplevels() - - showrows <- 250 - rlang::with_options(width = 160, print(mismatches, n = showrows)) - if (showrows < nrow(mismatches)) { - message("Further ", (nrow(mismatches) - showrows), " variable groups differ.") + message("\n### All runs are perfectly fixed on reference run!") + return(returnhelper(ret, boolean = TRUE, fails = NULL, fixed = data)) } + # print human-readbable summary + .printRefDiff(data, comp) + # save mismatches to file, if requested if (! is.null(failfile) && nrow(comp) > 0) { message("Find failing variables in '", failfile, "'.") write.csv(comp, failfile, quote = FALSE, row.names = FALSE) } - if (ret %in% c("fails", "boolean")) { - return(if (ret == "boolean") FALSE else comp) + # fix correctly on ref + fixeddata <- data %>% + left_join(refcomp, by = c("model", "region", "variable", "unit", "period")) %>% + mutate(value = case_when( + period >= startyear ~ value, + variable %in% falsepositives ~ value, + .default = ref + )) %>% + select(-ref) %>% + quitteSort() + return(returnhelper(ret, boolean = FALSE, fails = comp, fixed = fixeddata)) +} + +.printRefDiff <- function(data, comp) { + model <- scenario <- variable <- period <- reldiff <- group <- NULL + for (m in levels(data$model)) { + for (s in levels(data$scenario)) { + mismatches <- comp %>% + filter(model == m, scenario == s) %>% + select(-model, -scenario) + if (nrow(mismatches) == 0) { + message("\n### Everything fine for model=", m, " and scenario=", s) + } else { + groupdepth <- 3 + groupgrep <- paste(c(rep("(\\|.*?)", groupdepth - 1), "\\|.*$"), collapse = "") + mismatches <- mismatches %>% + mutate(variable = factor(removePlus(variable))) %>% + arrange(variable) %>% + summarise(period = paste(sort(unique(period)), collapse = ","), + reldiff = max(reldiff), + .by = variable) %>% + mutate(group = factor(gsub(groupgrep, "\\1", variable))) %>% + summarise(variable = if (length(unique(variable)) == 1) unique(variable) else unique(group), + variables = n(), + period = paste(sort(unique(strsplit(period, ",")[[1]])), collapse = ", "), + reldiff = max(reldiff), + .by = group) %>% + mutate(reldiff = niceround(reldiff), group = variable, variable = NULL) %>% + droplevels() + message("\n### Incorrect fixing for these variable groups for model=", m, " and scenario=", s) + showrows <- 250 + rlang::with_options(width = 160, print(mismatches, n = showrows)) + if (showrows < nrow(mismatches)) { + message("Further ", (nrow(mismatches) - showrows), " variable groups differ.") + } + } + } } - message("Returning corrected data for ", title, ".") - di <- rbind( - filter(mif, period >= startyear | ! variable %in% levels(mifRef$variable) | variable %in% falsepositives), - filter(mifRef, period < startyear, variable %in% setdiff(levels(mif$variable), falsepositives)) - ) %>% - mutate(scenario = factor(title)) %>% - quitteSort() - return(di) } diff --git a/README.md b/README.md index e5e8396fa..143cfffda 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Project specific interfaces to REMIND / MAgPIE -R package **piamInterfaces**, version **0.3.1** +R package **piamInterfaces**, version **0.4.0** [![CRAN status](https://www.r-pkg.org/badges/version/piamInterfaces)](https://cran.r-project.org/package=piamInterfaces) [![R build status](https://github.com/pik-piam/piamInterfaces/workflows/check/badge.svg)](https://github.com/pik-piam/piamInterfaces/actions) [![codecov](https://codecov.io/gh/pik-piam/piamInterfaces/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/piamInterfaces) [![r-universe](https://pik-piam.r-universe.dev/badges/piamInterfaces)](https://pik-piam.r-universe.dev/builds) @@ -64,7 +64,7 @@ In case of questions / problems please contact Falk Benke To cite package **piamInterfaces** in publications use: -Benke F, Richters O (2023). _piamInterfaces: Project specific interfaces to REMIND / MAgPIE_. R package version 0.3.1, . +Benke F, Richters O (2023). _piamInterfaces: Project specific interfaces to REMIND / MAgPIE_. R package version 0.4.0, . A BibTeX entry for LaTeX users is @@ -73,7 +73,7 @@ A BibTeX entry for LaTeX users is title = {piamInterfaces: Project specific interfaces to REMIND / MAgPIE}, author = {Falk Benke and Oliver Richters}, year = {2023}, - note = {R package version 0.3.1}, + note = {R package version 0.4.0}, url = {https://github.com/pik-piam/piamInterfaces}, } ``` diff --git a/man/fixOnRef.Rd b/man/fixOnRef.Rd index c43c0021c..de5029e5e 100644 --- a/man/fixOnRef.Rd +++ b/man/fixOnRef.Rd @@ -4,18 +4,23 @@ \alias{fixOnRef} \title{Checks for a run if it is correctly fixed on the reference run for t < startyear} \usage{ -fixOnRef(mif, mifRef, startyear, ret = "boolean", failfile = NULL) +fixOnRef(data, refscen, startyear, ret = "boolean", failfile = NULL) } \arguments{ -\item{mif}{data or path to mif file of single scenario} +\item{data}{quitte object or mif file} -\item{mifRef}{data or path to mif file of reference scenario} +\item{refscen}{scenario name of reference scenario, or file or quitte object with reference data} -\item{startyear}{first time step for each mif and mifRef are expected to differ} +\item{startyear}{first time step for which scenarios and reference scenario are expected to differ} -\item{ret}{"boolean", "fixed" or "fails", depending on what you want to get} +\item{ret}{"boolean": just return TRUE/FALSE if check was successful +"fails": data frame with mismatches between scenario and reference data +"fixed": quitte object with data correctly fixed on reference data} -\item{failfile}{csv file to which failing check are written to} +\item{failfile}{csv file to which mismatches are written to} +} +\value{ +see parameter 'ret' } \description{ Checks for a run if it is correctly fixed on the reference run for t < startyear diff --git a/tests/testthat/test-fixOnRef.R b/tests/testthat/test-fixOnRef.R index 28d742421..04b4c89a7 100644 --- a/tests/testthat/test-fixOnRef.R +++ b/tests/testthat/test-fixOnRef.R @@ -1,10 +1,14 @@ test_that("fixOnRef works", { qe <- quitte::quitte_example_dataAR6 d <- droplevels(dplyr::filter(qe, - scenario == levels(scenario)[[1]], - model == levels(model)[[1]])) + scenario == first(scenario), + model == first(model))) expect_true(fixOnRef(d, d, startyear = 2020, ret = "boolean")) + # somehow, only MESSAGEix results are correct in the dataset above + expect_true(fixOnRef(filter(qe, model == "MESSAGEix"), "Current Policies", startyear = 2020, ret = "boolean")) + qefixed <- fixOnRef(qe, "Current Policies", startyear = 2020, ret = "fixed") + expect_true(fixOnRef(qefixed, "Current Policies", startyear = 2020, ret = "boolean")) expect_identical(fixOnRef(d, d, startyear = 2020, ret = "fixed"), d) expect_true(is.null(fixOnRef(d, d, startyear = 2020, ret = "fails"))) # generate object with wrong data in 2020 @@ -21,7 +25,8 @@ test_that("fixOnRef works", { expect_identical(dwrong, fixOnRef(dwrong, d, startyear = 2010, ret = "fixed")) expect_true(fixOnRef(dwrong, d, startyear = min(dwrong$period), ret = "boolean")) # make sure only valid settings are accepted + expect_no_error(fixOnRef(qe, filter(qe, scenario == first(scenario)), startyear = 2005, ret = "boolean")) expect_error(fixOnRef(dwrong, d, startyear = 2020, ret = "whatever"), "ret") expect_error(fixOnRef(dwrong, d, startyear = "whenever"), "startyear") - expect_error(fixOnRef(qe, qe, startyear = 2020), "mif") + expect_error(fixOnRef(qe, qe, startyear = 2020), "refscen") })