new and revised vignettes, print bug fix

USGS-R · Dec 3, 2015 · 51183f5 · 51183f5
1 parent 4afdcc0
commit 51183f5
Show file tree

Hide file tree

Showing 64 changed files with 1,302 additions and 78 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: rloadest
 Type: Package
 Title: River Load Estimation
-Version: 0.4.2
-Date: 2015-07-20
+Version: 0.4.3
+Date: 2015-12-03
 Author: Dave Lorenz, Rob Runkel, Laura De Cicco
 Maintainer: Dave Lorenz <[email protected]>
 Description: Collection of functions to make constituent load estimations

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,4 +1,4 @@
-# Generated by roxygen2 (4.1.0): do not edit by hand
+# Generated by roxygen2 (4.1.1): do not edit by hand
 
 S3method(censoring,Surv)
 S3method(coef,loadReg)
@@ -8,6 +8,7 @@ S3method(makepredictcall,center)
 S3method(mean,character)
 S3method(mean,factor)
 S3method(plot,loadReg)
+S3method(print,jackStats)
 S3method(print,loadReg)
 S3method(residuals,loadReg)
 S3method(rmse,loadReg)
@@ -16,6 +17,7 @@ export(AICc)
 export(c2load)
 export(center)
 export(dailyAg)
+export(jackStats)
 export(loadConvFactor)
 export(loadReg)
 export(loadReport)

diff --git a/R/center.R b/R/center.R
@@ -9,7 +9,7 @@
 #' @note The centering value by default is computed by the method described
 #'in Cohn and others (1992).
 #' @return The centered value of \code{x}.
-#' @seealso \code{\link[USGSwsBase]{quadratic}}, \code{\link{scale}}
+#' @seealso \code{\link[smwrBase]{quadratic}}, \code{\link{scale}}
 #' @references Cohn, T.A., Caulder, D.L., Gilroy, E.J., Zynjuk, L.D., and
 #'Summers, R.M., 1992, The validity of a simple statistical model for
 #'estimating fluvial constituent loads---An empirical study involving nutrient

diff --git a/R/jackStats.R b/R/jackStats.R
@@ -0,0 +1,142 @@
+#' Jackknife Statistics
+#'
+#' Compute selected jackknife statistics for a rating-curve load-estimation model.
+#'
+#' @param fit an object of class "loadReg"---output from \code{loadReg}. Can also 
+#'be an object of class "censReg."
+#' @param which a character string indicating the "load" or
+#'"concentration" model for an object of class "loadReg" or "censReg" for
+#'an object of class "censReg."
+#' @return An object of class "jackStats" containing these components:
+#'coef, the table of coefficient estimates, the jackknife bias and standard errors\cr
+#'coefficients, the jackknifed coefficients\cr
+#'pctcens, the percentage of left-censored values. \cr
+#'The PRESS statistic and individual jackknife differences are also returned
+#'when the percentage of censoring is 0.
+#' @note The \code{jackStats} function can only be used when the analysis is AMLE.
+#'
+#'Abdi and Williams (2010) describe the jackknife as refering to two related techniques: the first 
+#'estimates the parameters, their bias and standard errors and the second evaluates the 
+#'predictive performance of the model. The second technique is the PRESS statistic (Helsel
+#'and Hirsch, 2002), but can only be used on uncensored data; it is computed by \code{jackStats}
+#'when no data are censored. The first technique can be used to assess the coefficients of the
+#'regression---the bias should be small and the jackknife standard errors should not be much
+#'different from the standard errors reported for the regression. Efron and Tibshirani (1993)
+#'suggest that the bias is small if the relative bias (biuas divided by the jackknife standard
+#'error) is less than 0.25.
+#' @seealso \code{\link{loadReg}}
+#' @keywords utilities
+#' @references
+#' Abdi, H. and Williams, L.J., 2010, Jackknife, in encyclopedia of research design, 
+#'Salkind, N.J., editor: Thousand Oaks, Calif., SAGE Publications, 1719 p.
+#'
+#'Efron, B. and Tibshirani, R.J., 1993, An introduction to the bootstrap: Boca Raton, 
+#'Fla., Chapman and Hall/CRC, 436 p.
+#'
+#' Helsel, D.R. and Hirsch, R.M., 2002, Statistical methods in water resources: 
+#'U.S. Geological Survey Techniques of Water-Resources Investigations, book 4, 
+#'chap. A3, 522 p.
+#'Salkind, 
+#' @examples
+#'# From application 1 in the vignettes
+#'data(app1.calib)
+#'app1.lr <- loadReg(Phosphorus ~ model(1), data = app1.calib, 
+#'  flow = "FLOW", dates = "DATES", conc.units="mg/L",
+#'  station="Illinois River at Marseilles, Ill.")
+#'jackStats(app1.lr)
+#' @export
+jackStats <- function(fit, which="load") {
+  ## Compute some stats
+  which <- match.arg(which, c("load", "concentration", "censReg"))
+  if(which == "load") {
+    # Verify AMLE
+    if(fit$method != "AMLE") {
+      stop("The analysis method must be AMLE")
+    }
+    # initial stuff
+    NPAR <- fit$lfit$NPAR
+    NOBS <- fit$lfit$NOBSC
+    # Get the repsonse, X, and Yeff
+    Y <- as.lcens(exp(fit$lfit$YLCAL), exp(fit$lfit$YD), fit$lfit$CENSFLAG)
+    X <- fit$lfit$XLCAL
+    Yeff <- fit$lfit$YLCAL
+    ## The code below computes an effective value for left-censored values
+    ## by computing the expected value from the prediction
+    ## provided if the method is ever published. No plans for now
+    #Yeff[fit$lfit$CENSFLAG] <- fit$lfit$XLCAL[fit$lfit$CENSFLAG, ,drop=FALSE] %*% 
+    #  fit$lfit$PARAML[seq(NPAR)] + fit$lfit$RESID[fit$lfit$CENSFLAG]
+    #
+    # Other Info
+    dist <- "lognormal"
+    parms <- fit$lfit$PARAML[seq(fit$lfit$NPAR)]
+    parnames <- colnames(fit$lfit$XLCAL)
+  } else if(which == "concentration") {
+    # Verify AMLE
+    if(fit$method != "AMLE") {
+      stop("The analysis method must be AMLE")
+    }
+    # initial stuff
+    NPAR <- fit$cfit$NPAR
+    NOBS <- fit$cfit$NOBSC
+    # Get the repsonse, X, and Yeff
+    Y <- as.lcens(exp(fit$cfit$YLCAL), exp(fit$cfit$YD), fit$cfit$CENSFLAG)
+    X <- fit$cfit$XLCAL
+    Yeff <- fit$cfit$YLCAL
+    ## See comment above
+    #Yeff[fit$cfit$CENSFLAG] <- fit$cfit$XLCAL[fit$cfit$CENSFLAG, ,drop=FALSE] %*% 
+    #  fit$cfit$PARAML[seq(NPAR)] + fit$cfit$RESID[fit$cfit$CENSFLAG]
+    #
+    # Other Info
+    dist <- "lognormal"
+    parms <- fit$cfit$PARAML[seq(fit$cfit$NPAR)]
+    parnames <- colnames(fit$cfit$XLCAL)
+  } else { # must be censReg
+    # Verify AMLE
+    if(fit$method != "AMLE") {
+      stop("The analysis method must be AMLE")
+    }
+    # initial stuff
+    NPAR <- fit$NPAR
+    NOBS <- fit$NOBSC
+    dist <- fit$dist
+    # Get the repsonse, X, and Yeff
+	if(dist == "lognormal") {
+      Y <- as.lcens(exp(fit$YLCAL), exp(fit$YD), fit$CENSFLAG)
+	} else {
+	  Y <- as.lcens(fit$YLCAL, fit$YD, fit$CENSFLAG)
+	}
+    X <- fit$XLCAL
+    Yeff <- fit$YLCAL
+	## See comment above
+    #Yeff[fit$CENSFLAG] <- fit$XLCAL[fit$CENSFLAG, ,drop=FALSE] %*% 
+    #  fit$PARAML[seq(NPAR)] + fit$RESID[fit$CENSFLAG]
+    #
+    # Other Info
+    parms <- fit$PARAML[seq(fit$NPAR)]
+    parnames <- colnames(fit$XLCAL)
+  }
+  # do it
+  # set up res and coeff storage
+  pre <- numeric(NOBS)
+  coeff <- matrix(0, nrow=NOBS, ncol=NPAR)
+  for(i in seq(NOBS)) {
+    tmp <- censReg_AMLE.fit(Y[-i], X[-i,], dist)
+    coeff[i,] <- tmp$PARAML[-(NPAR + 1L)]
+    pre[i] <- Yeff[i] - coeff[i,, drop=FALSE] %*% t(X[i,,drop=FALSE])
+  }
+  # Compute the jackknife bias and variance of coeffs
+  out <- (NOBS - 1)*(rep(parms, each=NOBS) - coeff)
+  bias <- -1/NOBS*colSums(out)
+  var <- 1/(NOBS*(NOBS-1)) * (colSums(out^2) - NOBS * bias^2)
+  coef <- cbind(est=parms, bias=bias, stderr=sqrt(var))
+  rownames(coef) <- parnames
+  retval <- list(coef=coef, coefficients=coeff,
+                 pctcens=pctCens(Y))
+  # Include press if no censoring
+  if(retval$pctcens == 0) {
+    retval$press <- sum(pre^2)
+    retval$pre <- pre
+  }
+  class(retval) <- "jackStats"
+  return(retval)
+}
diff --git a/R/plot.loadReg.R b/R/plot.loadReg.R
@@ -22,7 +22,7 @@
 #' @param span the span to use for the loess smooth. Set to 0 to suppress.
 #' @param \dots further arguments passed to or from other methods.
 #' @return The object \code{x} is returned invisibly.
-#' @note This plotting function uses the core routines in the \code{USGSwsGraphs}
+#' @note This plotting function uses the core routines in the \code{smwrGraphs}
 #'package. It requires a graphics page that is set up from the functions
 #'in that package (\code{setpage} or \code{setPDF}) if \code{set.up} is
 #'\code{FALSE}. The graphs that are produced by this function are based 

diff --git a/R/predConc.R b/R/predConc.R
@@ -190,9 +190,10 @@ predConc <- function(fit, newdata, by="day",
     names(retval)[6L:7L] <- Names
   } else if(by == "day") {
     ## One or more obs per day
+    ## KDate are the dates of the days
     ## KDays are the indexes to days
     ## Kin are the indexes to the good model inputs
-    ## Kdy are the indexes to the days
+    ## Kdy are the indexes to the days of model inputs
     ## KinAll are the indexes to all days
     ##
     ## Preserve flow for later
@@ -210,11 +211,12 @@ predConc <- function(fit, newdata, by="day",
       Kin <- Kin[is.finite(rowSums(model.inp))]
       KDate <- as.Date(as.POSIXlt(newdata[[dates]]))
       Kdy <- as.integer(KDate)
-      KDate <- unique(KDate)
-      Kdy <- Kdy - Kdy[1L] + 1L # make relative to first day (Index)
-      if(length(unique(table(Kdy))) > 1L && !allow.incomplete) {
+      Ktbl <- table(Kdy)
+      if(length(unique(Ktbl)) > 1L && !allow.incomplete) {
         warning("Variable observations per day, either set the allow.incomplete argument to TRUE or use the resampleUVdata function to construct a uniform series")
       }
+      Kdy <- rep(seq(1, length(Ktbl)), Ktbl) # Create the index to days in input
+      KDate <- unique(KDate)
       KinAll <- unique(Kdy)
       ## Make it daily flow, Flow0 indicates a partial 0 flow
       Flow0 <- tapply(Flow, Kdy, min) 

diff --git a/R/predLoad.R b/R/predLoad.R
@@ -215,9 +215,10 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
     names(retval)[6L:7L] <- Names
   } else if(by == "day") {
     ## One or more obs per day
+    ## KDate are the dates of the days
     ## KDays are the indexes to days
     ## Kin are the indexes to the good model inputs
-    ## Kdy are the indexes to the days
+    ## Kdy are the indexes to the days of model inputs
     ## KinAll are the indexes to all days
     ##
     ## Preserve flow for later
@@ -235,11 +236,12 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
       Kin <- Kin[is.finite(rowSums(model.inp))]
       KDate <- as.Date(as.POSIXlt(newdata[[dates]]))
       Kdy <- as.integer(KDate)
-      KDate <- unique(KDate)
-      Kdy <- Kdy - Kdy[1L] + 1L # make relative to first day (Index)
-      if(length(unique(table(Kdy))) > 1L && !allow.incomplete) {
+      Ktbl <- table(Kdy)
+      if(length(unique(Ktbl)) > 1L && !allow.incomplete) {
         warning("Variable observations per day, either set the allow.incomplete argument to TRUE or use the resampleUVdata function to construct a uniform series")
       }
+      Kdy <- rep(seq(1, length(Ktbl)), Ktbl) # Create the index to days in input
+      KDate <- unique(KDate)
       KinAll <- unique(Kdy)
       ## Make it daily flow, Flow0 indicates a partial 0 flow
       Flow0 <- tapply(Flow, Kdy, min) 
@@ -392,17 +394,17 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
         if(any(drop)) { # Any missing values?
           OK <- FALSE
           retby <- data.frame(Period="period", Ndays=NDays,
-                               Flux=NA_real_, Std.Err=NA_real_, 
-                               SEP=NA_real_, L95=NA_real_, U95=NA_real_)
+                              Flux=NA_real_, Std.Err=NA_real_, 
+                              SEP=NA_real_, L95=NA_real_, U95=NA_real_)
         } else {
           ## Check for complete periods
           Period <- as.character(newdata[[by]][period[1L]])
           if(!is.null(targN <- checkDays[[Period]])) {
             if(targN != NDays) {
               OK <- FALSE
               retby <- data.frame(Period="period", Ndays=NDays,
-                                   Flux=NA_real_, Std.Err=NA_real_, 
-                                   SEP=NA_real_, L95=NA_real_, U95=NA_real_)
+                                  Flux=NA_real_, Std.Err=NA_real_, 
+                                  SEP=NA_real_, L95=NA_real_, U95=NA_real_)
 
             }
           }
@@ -440,16 +442,16 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
         if(out.data$IERR > 0) {
           warning(" *** Error (",out.data$IERR,") occurred in processing data. ***\n",sep="")
           retby <- data.frame(Period="period", Ndays=NA_integer_,
-                               Flux=NA_real_, Std.Err=NA_real_, 
-                               SEP=NA_real_, L95=NA_real_, U95=NA_real_)
+                              Flux=NA_real_, Std.Err=NA_real_, 
+                              SEP=NA_real_, L95=NA_real_, U95=NA_real_)
         } else {
           ## OK, we've had a successful run, pack the data into a data frame
           retby <- data.frame(Period="period", Ndays=NDays,
-                               Flux=out.data$load * lcf, 
-                               Std.Err=sqrt(out.data$loadvar) * lcf, 
-                               SEP=out.data$loadsep * lcf, 
-                               L95=out.data$loadlow * lcf, 
-                               U95=out.data$loadup * lcf)
+                              Flux=out.data$load * lcf, 
+                              Std.Err=sqrt(out.data$loadvar) * lcf, 
+                              SEP=out.data$loadsep * lcf, 
+                              L95=out.data$loadlow * lcf, 
+                              U95=out.data$loadup * lcf)
         }
       }
       return(retby)
@@ -476,14 +478,14 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
           "calibration data set streamflow. Load estimates require extrapolation.\n\n",
           sep="")
     } else if(Qsum[2L, 1L] < Qsum[1L, 1L]) {
-	   cat("WARNING: The minimum estimation data set steamflow exceeds the minimum\n",
+      cat("WARNING: The minimum estimation data set steamflow exceeds the minimum\n",
           "calibration data set streamflow. Load estimates require extrapolation.\n\n",
           sep="")
-	} else {
+    } else {
       cat("The maximum estimation data set streamflow does not exceed the maximum\n",
           "calibration data set streamflow. No extrapolation is required.\n\n",
           sep="")
-	}
+    }
     if(!(by %in% c("day", "unit"))) {
       cat("\n-------------------------------------------------------------\n",
           "Constituent Output File Part IIb: Estimation (Load Estimates)\n",

diff --git a/R/print.jackStats.R b/R/print.jackStats.R
@@ -0,0 +1,31 @@
+#' Print Results
+#'
+#' Print the results of a jackknife analysis of a censored regression.
+#'
+#' @param x an object of class "jackStats"---output from \code{jackStats}.
+#' @param digits the number of significant digits to print.
+#' @param \dots further arguments passed to or from other methods.
+#' @return The object \code{x} is returned invisibly.
+#' @note The printed output includes the original original estimate,
+#'the jackknife bias and standard error and the relative bias for
+#'each parameter in the regression model.
+#'
+#' @seealso \code{\link{loadReg}}
+#' @keywords utilities
+#' @export
+#' @method print jackStats
+print.jackStats <- function(x, digits=4, ...) {
+  ##
+  if(x$pctcens == 0) {
+    cat("jackknife estimates:\n\n",
+        "PRESS: ", signif(x$press, digits), "\nCoefficients:\n", sep="")
+  } else {
+    cat("jackknife estimates:\n\nCoefficients:\n", sep="")
+  }
+  coef <- x$coef
+  # compute the relative bias
+  coef <- cbind(coef, abs(coef[,2L]/coef[,3L]))
+  colnames(coef) <- c("Estimate", "Bias", "Std. Error", "Rel. Bias")
+  print(coef, digits=digits)
+  invisible(x)
+}
diff --git a/R/print.loadReg.R b/R/print.loadReg.R
@@ -76,8 +76,15 @@ print.loadReg <- function(x, digits=4, brief=TRUE, load.only=brief, ...) {
         "    Constituent Output File Part Ia: Calibration (Load Regression)\n",
         "----------------------------------------------------------------------\n\n",
         sep="")
-  # CENSFLAG is logical for AMLE, integer for MLE, this protects
-  Ncen <- sum(x$lfit$CENSFLAG != 0)
+  # CENSFLAG is logical for AMLE, integer for MLE, this protects,
+  # but does not work for other than left-censored values.
+  if(x$method == "AMLE") {
+    Ncen <- sum(x$lfit$CENSFLAG != 0)
+  } else {
+    Centbl <- table(x$lfit$survreg$y[,3L])
+    Ncen <- x$lfit$NOBSC - Centbl["1"]
+    names(Centbl) <- c("0"="right", "1"="none", "2"="left", "3"="interval")[names(Centbl)]
+  }
   cat("           Number of Observations: ", x$lfit$NOBSC, "\n",
       "Number of Uncensored Observations: ",
       x$lfit$NOBSC - Ncen, "\n",
@@ -88,6 +95,11 @@ print.loadReg <- function(x, digits=4, brief=TRUE, load.only=brief, ...) {
       "                 Period of record: ",
       as.character(x$PoR[1L]), " to ", as.character(x$PoR[2L]), 
       "\n\n", sep="")
+  if(x$method == "MLE") {
+    cat("Censoring of data:\n")
+    print(Centbl)
+    cat("\n\n")
+  }
   if(!brief || nrow(x$model.eval) > 1) { # Capture best model selection
     cat("Model Evaluation Criteria Based on ", x$method, " Results\n",
         "-----------------------------------------------\n\n", sep="")
@@ -286,7 +298,13 @@ print.loadReg <- function(x, digits=4, brief=TRUE, load.only=brief, ...) {
           "    Constituent Output File Part Ia: Calibration (Concentration Regression)\n",
           "--------------------------------------------------------------------------\n\n",
           sep="")
-    Ncen <- sum(x$cfit$CENSFLAG)
+    if(x$method == "AMLE") {
+      Ncen <- sum(x$cfit$CENSFLAG != 0)
+    } else {
+      Centbl <- table(x$cfit$survreg$y[,3L])
+      Ncen <- x$cfit$NOBSC - Centbl["1"]
+      names(Centbl) <- c("0"="right", "1"="none", "2"="left", "3"="interval")[names(Centbl)]
+    }
     if(!brief) { # Capture best model selection
       cat("Model # ", x$model.no, " selected\n\n", sep="")
     }