improve fixOnRef to let it support multiple models and scenarios

pik-piam · Oct 9, 2023 · 7383e33 · 7383e33
1 parent 851ea51
commit 7383e33
Show file tree

Hide file tree

Showing 9 changed files with 129 additions and 84 deletions.
diff --git a/.buildlibrary b/.buildlibrary
@@ -1,4 +1,4 @@
-ValidationKey: '608716'
+ValidationKey: '785560'
 AutocreateReadme: yes
 allowLinterWarnings: no
 AddInReadme: tutorial.md

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     -   id: mixed-line-ending
 
 -   repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.3.2.9019
+    rev: v0.3.2.9021
     hooks:
     -   id: parsable-R
     -   id: deps-in-desc

diff --git a/CITATION.cff b/CITATION.cff
@@ -2,8 +2,8 @@ cff-version: 1.2.0
 message: If you use this software, please cite it using the metadata from this file.
 type: software
 title: 'piamInterfaces: Project specific interfaces to REMIND / MAgPIE'
-version: 0.3.1
-date-released: '2023-10-06'
+version: 0.4.0
+date-released: '2023-10-09'
 abstract: Project specific interfaces to REMIND / MAgPIE.
 authors:
 - family-names: Benke

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Type: Package
 Package: piamInterfaces
 Title: Project specific interfaces to REMIND / MAgPIE
-Version: 0.3.1
-Date: 2023-10-06
+Version: 0.4.0
+Date: 2023-10-09
 Authors@R: c(
     person("Falk", "Benke", , "[email protected]", role = c("aut", "cre")),
     person("Oliver", "Richters", role = "aut")

diff --git a/NAMESPACE b/NAMESPACE
@@ -31,10 +31,12 @@ importFrom(data.table,fwrite)
 importFrom(data.table,setnames)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,arrange)
+importFrom(dplyr,case_when)
 importFrom(dplyr,count)
 importFrom(dplyr,desc)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
+importFrom(dplyr,first)
 importFrom(dplyr,group_by)
 importFrom(dplyr,inner_join)
 importFrom(dplyr,left_join)
@@ -50,7 +52,6 @@ importFrom(dplyr,summarise)
 importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
 importFrom(gms,chooseFromList)
-importFrom(gms,getLine)
 importFrom(grDevices,dev.off)
 importFrom(grDevices,pdf)
 importFrom(magclass,as.magpie)
@@ -65,6 +66,7 @@ importFrom(quitte,as.quitte)
 importFrom(quitte,getModels)
 importFrom(quitte,getRegs)
 importFrom(quitte,getScenarios)
+importFrom(quitte,quitteSort)
 importFrom(quitte,read.quitte)
 importFrom(quitte,write.IAMCxlsx)
 importFrom(quitte,write.mif)

diff --git a/R/fixOnRef.R b/R/fixOnRef.R
@@ -2,94 +2,127 @@
 #'
 #' @md
 #' @author Oliver Richters
-#' @param mif data or path to mif file of single scenario
-#' @param mifRef data or path to mif file of reference scenario
-#' @param startyear first time step for each mif and mifRef are expected to differ
-#' @param ret "boolean", "fixed" or "fails", depending on what you want to get
-#' @param failfile csv file to which failing check are written to
-#' @importFrom dplyr group_by summarise ungroup left_join mutate arrange %>%
+#' @param data quitte object or mif file
+#' @param refscen scenario name of reference scenario, or file or quitte object with reference data
+#' @param startyear first time step for which scenarios and reference scenario are expected to differ
+#' @param ret "boolean": just return TRUE/FALSE if check was successful
+#'            "fails": data frame with mismatches between scenario and reference data
+#'            "fixed": quitte object with data correctly fixed on reference data
+#' @param failfile csv file to which mismatches are written to
+#' @importFrom dplyr case_when first group_by summarise ungroup left_join mutate arrange %>%
 #'             filter select desc n
-#' @importFrom gms getLine
-#' @importFrom quitte as.quitte
+#' @importFrom quitte as.quitte quitteSort
 #' @importFrom tidyr pivot_wider
 #' @importFrom utils write.csv
-#'
+#' @return see parameter 'ret'
 #' @export
 
-
-fixOnRef <- function(mif, mifRef, startyear, ret = "boolean", failfile = NULL) {
-  scenario <- variable <- period <- value <- ref <- reldiff <- group <- NULL
-  mif <- droplevels(as.quitte(mif, na.rm = TRUE))
-  mifRef <- droplevels(as.quitte(mifRef, na.rm = TRUE))
+fixOnRef <- function(data, refscen, startyear, ret = "boolean", failfile = NULL) {
+  scenario <- variable <- period <- value <- ref <- reldiff <- NULL
+  data <- droplevels(as.quitte(data, na.rm = TRUE))
   startyear <- suppressWarnings(as.numeric(startyear))
+  # check whether refscen is just the scenario name, or rather data
+  if (is.character(refscen) && ! file.exists(refscen) && all(refscen %in% levels(data$scenario))) {
+    refdata <- droplevels(filter(data, scenario == refscen))
+  } else {
+    refdata <- droplevels(as.quitte(refscen, na.rm = TRUE))
+  }
+  refscen <- levels(refdata$scenario)
   stopifnot(
-    `'mif' must contain data from one scenario only` = length(levels(mif$scenario)) == 1,
-    `'mifRef' must contain data from one scenario only` = length(levels(mifRef$scenario)) == 1,
+    `'refscen' must be a single scenario only` = length(refscen) == 1,
     `'startyear' must be a single numeric value` = (length(startyear) == 1 && ! is.na(startyear)),
     `'ret' must be 'boolean', 'fails' or 'fixed'` = (length(ret) == 1 && ret %in% c("boolean", "fails", "fixed"))
   )
 
-  title <- levels(mif$scenario)
-  titleRef <- levels(mifRef$scenario)
-
-  if (identical(levels(mif$scenario), levels(mifRef$scenario))) {
-    levels(mifRef$scenario) <- paste0(levels(mifRef$scenario), "_ref")
+  returnhelper <- function(ret, boolean, fails, fixed) {
+    messages <- c(boolean = "Returning a boolean.",
+                  fails   = "Returning failing data.",
+                  fixed   = "Returning fixed data.")
+    message(messages[[ret]])
+    if (ret == "boolean") return(boolean)
+    if (ret == "fails") return(fails)
+    if (ret == "fixed") return(fixed)
   }
 
-  falsepositives <- grep("Moving Avg$", levels(mif$variable), value = TRUE)
-
-  message("Comparing ", title, " with reference run ", titleRef, " for t < ", startyear)
-
-  if (startyear <= min(mif$period)) {
-    message("No data before startyear found, so no fixing happened")
-    return(if (ret == "fails") NULL else if (ret == "fixed") mif else TRUE)
+  message("Comparing with reference run ", refscen, " for t < ", startyear)
+  if (startyear <= min(data$period)) {
+    message("No data before startyear found, so no fixing happened.")
+    return(returnhelper(ret, boolean = TRUE, fails = NULL, fixed = data))
   }
-  comp <- rbind(mutate(mif, scenario = "value"), mutate(mifRef, scenario = "ref")) %>%
+
+  # define false-positives that necessarily differ
+  falsepositives <- c(grep("Moving Avg$", levels(data$variable), value = TRUE),
+                      grep("Interest Rate (t+1)/(t-1)", levels(data$variable), value = TRUE, fixed = TRUE)
+                     )
+  # prepare reference data to left_join it to data
+  refcomp <- refdata %>%
+    select(-scenario) %>%
+    rename(ref = value) %>%
+    droplevels()
+  # left_join data with reference run and select everything with bigger differences
+  comp <- data %>%
+    left_join(refcomp, by = c("model", "region", "variable", "unit", "period")) %>%
     filter(! variable %in% falsepositives, period < startyear) %>%
-    arrange(variable) %>%
-    pivot_wider(names_from = scenario) %>%
     mutate(reldiff = abs(value - ref) / pmax(1E-14, abs(value), abs(ref), na.rm = TRUE)) %>%
     filter(abs(reldiff) > 1E-14) %>%
-    mutate(scenario = factor(title)) %>%
     droplevels()
   if (nrow(comp) == 0) {
-    message("# Run is perfectly fixed on reference run!")
-    return(if (ret == "fails") NULL else if (ret == "fixed") mif else TRUE)
-  }
-  mismatches <- comp %>%
-    mutate(model = NULL, scenario = NULL, variable = factor(removePlus(variable))) %>%
-    arrange(variable) %>%
-    summarise(period = paste(sort(unique(period)), collapse = ","),
-              reldiff = max(reldiff),
-              .by = variable) %>%
-    mutate(group = factor(gsub("(\\|.*?)\\|.*$", "\\1", variable))) %>%
-    # mutate(group = factor(gsub("\\|.*", "", variable))) %>% # to group more coarsely
-    summarise(variable = if (length(unique(variable)) == 1) unique(variable) else unique(group),
-              variables = n(),
-              period = paste(sort(unique(strsplit(period, ",")[[1]])), collapse = ", "),
-              reldiff = max(reldiff),
-              .by = group) %>%
-    mutate(reldiff = niceround(reldiff), group = variable, variable = NULL) %>%
-    droplevels()
-
-  showrows <- 250
-  rlang::with_options(width = 160, print(mismatches, n = showrows))
-  if (showrows < nrow(mismatches)) {
-    message("Further ", (nrow(mismatches) - showrows), " variable groups differ.")
+    message("\n### All runs are perfectly fixed on reference run!")
+    return(returnhelper(ret, boolean = TRUE, fails = NULL, fixed = data))
   }
+  # print human-readbable summary
+  .printRefDiff(data, comp)
+  # save mismatches to file, if requested
   if (! is.null(failfile) && nrow(comp) > 0) {
     message("Find failing variables in '", failfile, "'.")
     write.csv(comp, failfile, quote = FALSE, row.names = FALSE)
   }
-  if (ret %in% c("fails", "boolean")) {
-    return(if (ret == "boolean") FALSE else comp)
+  # fix correctly on ref
+  fixeddata <- data %>%
+    left_join(refcomp, by = c("model", "region", "variable", "unit", "period")) %>%
+    mutate(value = case_when(
+      period >= startyear ~ value,
+      variable %in% falsepositives ~ value,
+      .default = ref
+    )) %>%
+    select(-ref) %>%
+    quitteSort()
+  return(returnhelper(ret, boolean = FALSE, fails = comp, fixed = fixeddata))
+}
+
+.printRefDiff <- function(data, comp) {
+  model <- scenario <- variable <- period <- reldiff <- group <- NULL
+  for (m in levels(data$model)) {
+    for (s in levels(data$scenario)) {
+      mismatches <- comp %>%
+        filter(model == m, scenario == s) %>%
+        select(-model, -scenario)
+      if (nrow(mismatches) == 0) {
+        message("\n### Everything fine for model=", m, " and scenario=", s)
+      } else {
+        groupdepth <- 3
+        groupgrep <- paste(c(rep("(\\|.*?)", groupdepth - 1), "\\|.*$"), collapse = "")
+        mismatches <- mismatches %>%
+          mutate(variable = factor(removePlus(variable))) %>%
+          arrange(variable) %>%
+          summarise(period = paste(sort(unique(period)), collapse = ","),
+                    reldiff = max(reldiff),
+                    .by = variable) %>%
+          mutate(group = factor(gsub(groupgrep, "\\1", variable))) %>%
+          summarise(variable = if (length(unique(variable)) == 1) unique(variable) else unique(group),
+                    variables = n(),
+                    period = paste(sort(unique(strsplit(period, ",")[[1]])), collapse = ", "),
+                    reldiff = max(reldiff),
+                    .by = group) %>%
+          mutate(reldiff = niceround(reldiff), group = variable, variable = NULL) %>%
+          droplevels()
+        message("\n### Incorrect fixing for these variable groups for model=", m, " and scenario=", s)
+        showrows <- 250
+        rlang::with_options(width = 160, print(mismatches, n = showrows))
+        if (showrows < nrow(mismatches)) {
+          message("Further ", (nrow(mismatches) - showrows), " variable groups differ.")
+        }
+      }
+    }
   }
-  message("Returning corrected data for ", title, ".")
-  di <- rbind(
-            filter(mif, period >= startyear | ! variable %in% levels(mifRef$variable) | variable %in% falsepositives),
-            filter(mifRef, period < startyear, variable %in% setdiff(levels(mif$variable), falsepositives))
-           ) %>%
-        mutate(scenario = factor(title)) %>%
-        quitteSort()
-  return(di)
 }
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Project specific interfaces to REMIND / MAgPIE
 
-R package **piamInterfaces**, version **0.3.1**
+R package **piamInterfaces**, version **0.4.0**
 
 [![CRAN status](https://www.r-pkg.org/badges/version/piamInterfaces)](https://cran.r-project.org/package=piamInterfaces)  [![R build status](https://github.com/pik-piam/piamInterfaces/workflows/check/badge.svg)](https://github.com/pik-piam/piamInterfaces/actions) [![codecov](https://codecov.io/gh/pik-piam/piamInterfaces/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/piamInterfaces) [![r-universe](https://pik-piam.r-universe.dev/badges/piamInterfaces)](https://pik-piam.r-universe.dev/builds)
 
@@ -64,7 +64,7 @@ In case of questions / problems please contact Falk Benke <[email protected]>
 
 To cite package **piamInterfaces** in publications use:
 
-Benke F, Richters O (2023). _piamInterfaces: Project specific interfaces to REMIND / MAgPIE_. R package version 0.3.1, <https://github.com/pik-piam/piamInterfaces>.
+Benke F, Richters O (2023). _piamInterfaces: Project specific interfaces to REMIND / MAgPIE_. R package version 0.4.0, <URL: https://github.com/pik-piam/piamInterfaces>.
 
 A BibTeX entry for LaTeX users is
 
@@ -73,7 +73,7 @@ A BibTeX entry for LaTeX users is
   title = {piamInterfaces: Project specific interfaces to REMIND / MAgPIE},
   author = {Falk Benke and Oliver Richters},
   year = {2023},
-  note = {R package version 0.3.1},
+  note = {R package version 0.4.0},
   url = {https://github.com/pik-piam/piamInterfaces},
 }
 ```
diff --git a/man/fixOnRef.Rd b/man/fixOnRef.Rd
diff --git a/tests/testthat/test-fixOnRef.R b/tests/testthat/test-fixOnRef.R
@@ -1,10 +1,14 @@
 test_that("fixOnRef works", {
   qe <- quitte::quitte_example_dataAR6
   d <- droplevels(dplyr::filter(qe,
-                                scenario == levels(scenario)[[1]],
-                                model == levels(model)[[1]]))
+                                scenario == first(scenario),
+                                model == first(model)))
 
   expect_true(fixOnRef(d, d, startyear = 2020, ret = "boolean"))
+  # somehow, only MESSAGEix results are correct in the dataset above
+  expect_true(fixOnRef(filter(qe, model == "MESSAGEix"), "Current Policies", startyear = 2020, ret = "boolean"))
+  qefixed <- fixOnRef(qe, "Current Policies", startyear = 2020, ret = "fixed")
+  expect_true(fixOnRef(qefixed, "Current Policies", startyear = 2020, ret = "boolean"))
   expect_identical(fixOnRef(d, d, startyear = 2020, ret = "fixed"), d)
   expect_true(is.null(fixOnRef(d, d, startyear = 2020, ret = "fails")))
   # generate object with wrong data in 2020
@@ -21,7 +25,8 @@ test_that("fixOnRef works", {
   expect_identical(dwrong, fixOnRef(dwrong, d, startyear = 2010, ret = "fixed"))
   expect_true(fixOnRef(dwrong, d, startyear = min(dwrong$period), ret = "boolean"))
   # make sure only valid settings are accepted
+  expect_no_error(fixOnRef(qe, filter(qe, scenario == first(scenario)), startyear = 2005, ret = "boolean"))
   expect_error(fixOnRef(dwrong, d, startyear = 2020, ret = "whatever"), "ret")
   expect_error(fixOnRef(dwrong, d, startyear = "whenever"), "startyear")
-  expect_error(fixOnRef(qe, qe, startyear = 2020), "mif")
+  expect_error(fixOnRef(qe, qe, startyear = 2020), "refscen")
 })