Skip to content

Commit

Permalink
new and revised vignettes, print bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dlorenz-usgs committed Dec 3, 2015
1 parent 4afdcc0 commit 51183f5
Show file tree
Hide file tree
Showing 64 changed files with 1,302 additions and 78 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rloadest
Type: Package
Title: River Load Estimation
Version: 0.4.2
Date: 2015-07-20
Version: 0.4.3
Date: 2015-12-03
Author: Dave Lorenz, Rob Runkel, Laura De Cicco
Maintainer: Dave Lorenz <[email protected]>
Description: Collection of functions to make constituent load estimations
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by roxygen2 (4.1.0): do not edit by hand
# Generated by roxygen2 (4.1.1): do not edit by hand

S3method(censoring,Surv)
S3method(coef,loadReg)
Expand All @@ -8,6 +8,7 @@ S3method(makepredictcall,center)
S3method(mean,character)
S3method(mean,factor)
S3method(plot,loadReg)
S3method(print,jackStats)
S3method(print,loadReg)
S3method(residuals,loadReg)
S3method(rmse,loadReg)
Expand All @@ -16,6 +17,7 @@ export(AICc)
export(c2load)
export(center)
export(dailyAg)
export(jackStats)
export(loadConvFactor)
export(loadReg)
export(loadReport)
Expand Down
2 changes: 1 addition & 1 deletion R/center.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#' @note The centering value by default is computed by the method described
#'in Cohn and others (1992).
#' @return The centered value of \code{x}.
#' @seealso \code{\link[USGSwsBase]{quadratic}}, \code{\link{scale}}
#' @seealso \code{\link[smwrBase]{quadratic}}, \code{\link{scale}}
#' @references Cohn, T.A., Caulder, D.L., Gilroy, E.J., Zynjuk, L.D., and
#'Summers, R.M., 1992, The validity of a simple statistical model for
#'estimating fluvial constituent loads---An empirical study involving nutrient
Expand Down
142 changes: 142 additions & 0 deletions R/jackStats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#' Jackknife Statistics
#'
#' Compute selected jackknife statistics for a rating-curve load-estimation model.
#'
#' @param fit an object of class "loadReg"---output from \code{loadReg}. Can also
#'be an object of class "censReg."
#' @param which a character string indicating the "load" or
#'"concentration" model for an object of class "loadReg" or "censReg" for
#'an object of class "censReg."
#' @return An object of class "jackStats" containing these components:
#'coef, the table of coefficient estimates, the jackknife bias and standard errors\cr
#'coefficients, the jackknifed coefficients\cr
#'pctcens, the percentage of left-censored values. \cr
#'The PRESS statistic and individual jackknife differences are also returned
#'when the percentage of censoring is 0.
#' @note The \code{jackStats} function can only be used when the analysis is AMLE.
#'
#'Abdi and Williams (2010) describe the jackknife as refering to two related techniques: the first
#'estimates the parameters, their bias and standard errors and the second evaluates the
#'predictive performance of the model. The second technique is the PRESS statistic (Helsel
#'and Hirsch, 2002), but can only be used on uncensored data; it is computed by \code{jackStats}
#'when no data are censored. The first technique can be used to assess the coefficients of the
#'regression---the bias should be small and the jackknife standard errors should not be much
#'different from the standard errors reported for the regression. Efron and Tibshirani (1993)
#'suggest that the bias is small if the relative bias (biuas divided by the jackknife standard
#'error) is less than 0.25.
#' @seealso \code{\link{loadReg}}
#' @keywords utilities
#' @references
#' Abdi, H. and Williams, L.J., 2010, Jackknife, in encyclopedia of research design,
#'Salkind, N.J., editor: Thousand Oaks, Calif., SAGE Publications, 1719 p.
#'
#'Efron, B. and Tibshirani, R.J., 1993, An introduction to the bootstrap: Boca Raton,
#'Fla., Chapman and Hall/CRC, 436 p.
#'
#' Helsel, D.R. and Hirsch, R.M., 2002, Statistical methods in water resources:
#'U.S. Geological Survey Techniques of Water-Resources Investigations, book 4,
#'chap. A3, 522 p.
#'Salkind,
#' @examples
#'# From application 1 in the vignettes
#'data(app1.calib)
#'app1.lr <- loadReg(Phosphorus ~ model(1), data = app1.calib,
#' flow = "FLOW", dates = "DATES", conc.units="mg/L",
#' station="Illinois River at Marseilles, Ill.")
#'jackStats(app1.lr)
#' @export
jackStats <- function(fit, which="load") {
## Compute some stats
which <- match.arg(which, c("load", "concentration", "censReg"))
if(which == "load") {
# Verify AMLE
if(fit$method != "AMLE") {
stop("The analysis method must be AMLE")
}
# initial stuff
NPAR <- fit$lfit$NPAR
NOBS <- fit$lfit$NOBSC
# Get the repsonse, X, and Yeff
Y <- as.lcens(exp(fit$lfit$YLCAL), exp(fit$lfit$YD), fit$lfit$CENSFLAG)
X <- fit$lfit$XLCAL
Yeff <- fit$lfit$YLCAL
## The code below computes an effective value for left-censored values
## by computing the expected value from the prediction
## provided if the method is ever published. No plans for now
#Yeff[fit$lfit$CENSFLAG] <- fit$lfit$XLCAL[fit$lfit$CENSFLAG, ,drop=FALSE] %*%
# fit$lfit$PARAML[seq(NPAR)] + fit$lfit$RESID[fit$lfit$CENSFLAG]
#
# Other Info
dist <- "lognormal"
parms <- fit$lfit$PARAML[seq(fit$lfit$NPAR)]
parnames <- colnames(fit$lfit$XLCAL)
} else if(which == "concentration") {
# Verify AMLE
if(fit$method != "AMLE") {
stop("The analysis method must be AMLE")
}
# initial stuff
NPAR <- fit$cfit$NPAR
NOBS <- fit$cfit$NOBSC
# Get the repsonse, X, and Yeff
Y <- as.lcens(exp(fit$cfit$YLCAL), exp(fit$cfit$YD), fit$cfit$CENSFLAG)
X <- fit$cfit$XLCAL
Yeff <- fit$cfit$YLCAL
## See comment above
#Yeff[fit$cfit$CENSFLAG] <- fit$cfit$XLCAL[fit$cfit$CENSFLAG, ,drop=FALSE] %*%
# fit$cfit$PARAML[seq(NPAR)] + fit$cfit$RESID[fit$cfit$CENSFLAG]
#
# Other Info
dist <- "lognormal"
parms <- fit$cfit$PARAML[seq(fit$cfit$NPAR)]
parnames <- colnames(fit$cfit$XLCAL)
} else { # must be censReg
# Verify AMLE
if(fit$method != "AMLE") {
stop("The analysis method must be AMLE")
}
# initial stuff
NPAR <- fit$NPAR
NOBS <- fit$NOBSC
dist <- fit$dist
# Get the repsonse, X, and Yeff
if(dist == "lognormal") {
Y <- as.lcens(exp(fit$YLCAL), exp(fit$YD), fit$CENSFLAG)
} else {
Y <- as.lcens(fit$YLCAL, fit$YD, fit$CENSFLAG)
}
X <- fit$XLCAL
Yeff <- fit$YLCAL
## See comment above
#Yeff[fit$CENSFLAG] <- fit$XLCAL[fit$CENSFLAG, ,drop=FALSE] %*%
# fit$PARAML[seq(NPAR)] + fit$RESID[fit$CENSFLAG]
#
# Other Info
parms <- fit$PARAML[seq(fit$NPAR)]
parnames <- colnames(fit$XLCAL)
}
# do it
# set up res and coeff storage
pre <- numeric(NOBS)
coeff <- matrix(0, nrow=NOBS, ncol=NPAR)
for(i in seq(NOBS)) {
tmp <- censReg_AMLE.fit(Y[-i], X[-i,], dist)
coeff[i,] <- tmp$PARAML[-(NPAR + 1L)]
pre[i] <- Yeff[i] - coeff[i,, drop=FALSE] %*% t(X[i,,drop=FALSE])
}
# Compute the jackknife bias and variance of coeffs
out <- (NOBS - 1)*(rep(parms, each=NOBS) - coeff)
bias <- -1/NOBS*colSums(out)
var <- 1/(NOBS*(NOBS-1)) * (colSums(out^2) - NOBS * bias^2)
coef <- cbind(est=parms, bias=bias, stderr=sqrt(var))
rownames(coef) <- parnames
retval <- list(coef=coef, coefficients=coeff,
pctcens=pctCens(Y))
# Include press if no censoring
if(retval$pctcens == 0) {
retval$press <- sum(pre^2)
retval$pre <- pre
}
class(retval) <- "jackStats"
return(retval)
}
2 changes: 1 addition & 1 deletion R/plot.loadReg.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' @param span the span to use for the loess smooth. Set to 0 to suppress.
#' @param \dots further arguments passed to or from other methods.
#' @return The object \code{x} is returned invisibly.
#' @note This plotting function uses the core routines in the \code{USGSwsGraphs}
#' @note This plotting function uses the core routines in the \code{smwrGraphs}
#'package. It requires a graphics page that is set up from the functions
#'in that package (\code{setpage} or \code{setPDF}) if \code{set.up} is
#'\code{FALSE}. The graphs that are produced by this function are based
Expand Down
10 changes: 6 additions & 4 deletions R/predConc.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,10 @@ predConc <- function(fit, newdata, by="day",
names(retval)[6L:7L] <- Names
} else if(by == "day") {
## One or more obs per day
## KDate are the dates of the days
## KDays are the indexes to days
## Kin are the indexes to the good model inputs
## Kdy are the indexes to the days
## Kdy are the indexes to the days of model inputs
## KinAll are the indexes to all days
##
## Preserve flow for later
Expand All @@ -210,11 +211,12 @@ predConc <- function(fit, newdata, by="day",
Kin <- Kin[is.finite(rowSums(model.inp))]
KDate <- as.Date(as.POSIXlt(newdata[[dates]]))
Kdy <- as.integer(KDate)
KDate <- unique(KDate)
Kdy <- Kdy - Kdy[1L] + 1L # make relative to first day (Index)
if(length(unique(table(Kdy))) > 1L && !allow.incomplete) {
Ktbl <- table(Kdy)
if(length(unique(Ktbl)) > 1L && !allow.incomplete) {
warning("Variable observations per day, either set the allow.incomplete argument to TRUE or use the resampleUVdata function to construct a uniform series")
}
Kdy <- rep(seq(1, length(Ktbl)), Ktbl) # Create the index to days in input
KDate <- unique(KDate)
KinAll <- unique(Kdy)
## Make it daily flow, Flow0 indicates a partial 0 flow
Flow0 <- tapply(Flow, Kdy, min)
Expand Down
38 changes: 20 additions & 18 deletions R/predLoad.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,10 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
names(retval)[6L:7L] <- Names
} else if(by == "day") {
## One or more obs per day
## KDate are the dates of the days
## KDays are the indexes to days
## Kin are the indexes to the good model inputs
## Kdy are the indexes to the days
## Kdy are the indexes to the days of model inputs
## KinAll are the indexes to all days
##
## Preserve flow for later
Expand All @@ -235,11 +236,12 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
Kin <- Kin[is.finite(rowSums(model.inp))]
KDate <- as.Date(as.POSIXlt(newdata[[dates]]))
Kdy <- as.integer(KDate)
KDate <- unique(KDate)
Kdy <- Kdy - Kdy[1L] + 1L # make relative to first day (Index)
if(length(unique(table(Kdy))) > 1L && !allow.incomplete) {
Ktbl <- table(Kdy)
if(length(unique(Ktbl)) > 1L && !allow.incomplete) {
warning("Variable observations per day, either set the allow.incomplete argument to TRUE or use the resampleUVdata function to construct a uniform series")
}
Kdy <- rep(seq(1, length(Ktbl)), Ktbl) # Create the index to days in input
KDate <- unique(KDate)
KinAll <- unique(Kdy)
## Make it daily flow, Flow0 indicates a partial 0 flow
Flow0 <- tapply(Flow, Kdy, min)
Expand Down Expand Up @@ -392,17 +394,17 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
if(any(drop)) { # Any missing values?
OK <- FALSE
retby <- data.frame(Period="period", Ndays=NDays,
Flux=NA_real_, Std.Err=NA_real_,
SEP=NA_real_, L95=NA_real_, U95=NA_real_)
Flux=NA_real_, Std.Err=NA_real_,
SEP=NA_real_, L95=NA_real_, U95=NA_real_)
} else {
## Check for complete periods
Period <- as.character(newdata[[by]][period[1L]])
if(!is.null(targN <- checkDays[[Period]])) {
if(targN != NDays) {
OK <- FALSE
retby <- data.frame(Period="period", Ndays=NDays,
Flux=NA_real_, Std.Err=NA_real_,
SEP=NA_real_, L95=NA_real_, U95=NA_real_)
Flux=NA_real_, Std.Err=NA_real_,
SEP=NA_real_, L95=NA_real_, U95=NA_real_)

}
}
Expand Down Expand Up @@ -440,16 +442,16 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
if(out.data$IERR > 0) {
warning(" *** Error (",out.data$IERR,") occurred in processing data. ***\n",sep="")
retby <- data.frame(Period="period", Ndays=NA_integer_,
Flux=NA_real_, Std.Err=NA_real_,
SEP=NA_real_, L95=NA_real_, U95=NA_real_)
Flux=NA_real_, Std.Err=NA_real_,
SEP=NA_real_, L95=NA_real_, U95=NA_real_)
} else {
## OK, we've had a successful run, pack the data into a data frame
retby <- data.frame(Period="period", Ndays=NDays,
Flux=out.data$load * lcf,
Std.Err=sqrt(out.data$loadvar) * lcf,
SEP=out.data$loadsep * lcf,
L95=out.data$loadlow * lcf,
U95=out.data$loadup * lcf)
Flux=out.data$load * lcf,
Std.Err=sqrt(out.data$loadvar) * lcf,
SEP=out.data$loadsep * lcf,
L95=out.data$loadlow * lcf,
U95=out.data$loadup * lcf)
}
}
return(retby)
Expand All @@ -476,14 +478,14 @@ predLoad <- function(fit, newdata, load.units=fit$load.units, by="total",
"calibration data set streamflow. Load estimates require extrapolation.\n\n",
sep="")
} else if(Qsum[2L, 1L] < Qsum[1L, 1L]) {
cat("WARNING: The minimum estimation data set steamflow exceeds the minimum\n",
cat("WARNING: The minimum estimation data set steamflow exceeds the minimum\n",
"calibration data set streamflow. Load estimates require extrapolation.\n\n",
sep="")
} else {
} else {
cat("The maximum estimation data set streamflow does not exceed the maximum\n",
"calibration data set streamflow. No extrapolation is required.\n\n",
sep="")
}
}
if(!(by %in% c("day", "unit"))) {
cat("\n-------------------------------------------------------------\n",
"Constituent Output File Part IIb: Estimation (Load Estimates)\n",
Expand Down
31 changes: 31 additions & 0 deletions R/print.jackStats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#' Print Results
#'
#' Print the results of a jackknife analysis of a censored regression.
#'
#' @param x an object of class "jackStats"---output from \code{jackStats}.
#' @param digits the number of significant digits to print.
#' @param \dots further arguments passed to or from other methods.
#' @return The object \code{x} is returned invisibly.
#' @note The printed output includes the original original estimate,
#'the jackknife bias and standard error and the relative bias for
#'each parameter in the regression model.
#'
#' @seealso \code{\link{loadReg}}
#' @keywords utilities
#' @export
#' @method print jackStats
print.jackStats <- function(x, digits=4, ...) {
##
if(x$pctcens == 0) {
cat("jackknife estimates:\n\n",
"PRESS: ", signif(x$press, digits), "\nCoefficients:\n", sep="")
} else {
cat("jackknife estimates:\n\nCoefficients:\n", sep="")
}
coef <- x$coef
# compute the relative bias
coef <- cbind(coef, abs(coef[,2L]/coef[,3L]))
colnames(coef) <- c("Estimate", "Bias", "Std. Error", "Rel. Bias")
print(coef, digits=digits)
invisible(x)
}
24 changes: 21 additions & 3 deletions R/print.loadReg.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,15 @@ print.loadReg <- function(x, digits=4, brief=TRUE, load.only=brief, ...) {
" Constituent Output File Part Ia: Calibration (Load Regression)\n",
"----------------------------------------------------------------------\n\n",
sep="")
# CENSFLAG is logical for AMLE, integer for MLE, this protects
Ncen <- sum(x$lfit$CENSFLAG != 0)
# CENSFLAG is logical for AMLE, integer for MLE, this protects,
# but does not work for other than left-censored values.
if(x$method == "AMLE") {
Ncen <- sum(x$lfit$CENSFLAG != 0)
} else {
Centbl <- table(x$lfit$survreg$y[,3L])
Ncen <- x$lfit$NOBSC - Centbl["1"]
names(Centbl) <- c("0"="right", "1"="none", "2"="left", "3"="interval")[names(Centbl)]
}
cat(" Number of Observations: ", x$lfit$NOBSC, "\n",
"Number of Uncensored Observations: ",
x$lfit$NOBSC - Ncen, "\n",
Expand All @@ -88,6 +95,11 @@ print.loadReg <- function(x, digits=4, brief=TRUE, load.only=brief, ...) {
" Period of record: ",
as.character(x$PoR[1L]), " to ", as.character(x$PoR[2L]),
"\n\n", sep="")
if(x$method == "MLE") {
cat("Censoring of data:\n")
print(Centbl)
cat("\n\n")
}
if(!brief || nrow(x$model.eval) > 1) { # Capture best model selection
cat("Model Evaluation Criteria Based on ", x$method, " Results\n",
"-----------------------------------------------\n\n", sep="")
Expand Down Expand Up @@ -286,7 +298,13 @@ print.loadReg <- function(x, digits=4, brief=TRUE, load.only=brief, ...) {
" Constituent Output File Part Ia: Calibration (Concentration Regression)\n",
"--------------------------------------------------------------------------\n\n",
sep="")
Ncen <- sum(x$cfit$CENSFLAG)
if(x$method == "AMLE") {
Ncen <- sum(x$cfit$CENSFLAG != 0)
} else {
Centbl <- table(x$cfit$survreg$y[,3L])
Ncen <- x$cfit$NOBSC - Centbl["1"]
names(Centbl) <- c("0"="right", "1"="none", "2"="left", "3"="interval")[names(Centbl)]
}
if(!brief) { # Capture best model selection
cat("Model # ", x$model.no, " selected\n\n", sep="")
}
Expand Down
Loading

0 comments on commit 51183f5

Please sign in to comment.