Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove hmisc dependency #102

Merged
merged 4 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@ linters: linters_with_defaults(
line_length_linter = line_length_linter(120L),
commented_code_linter = NULL
)
exclusions: list(
"./R/Hmisc-utils.R"
)

3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: hubEnsembles
Title: Ensemble methods for combining hub model outputs
Version: 0.1.1
Version: 0.1.2
Authors@R: c(
person(given = "Evan L",
family = "Ray",
Expand Down Expand Up @@ -44,7 +44,6 @@ Imports:
cli,
distfromq (>= 1.0.2),
dplyr,
Hmisc,
hubUtils (>= 0.0.1),
magrittr,
matrixStats,
Expand Down
212 changes: 212 additions & 0 deletions R/Hmisc-utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# All functions adapted from Hmisc pkg functions
# Compute weighted quantiles
# Source: https://github.com/harrelfe/Hmisc/blob/f80ecfb27e1deb88b3c6cc075c7252e3c577f66b/R/wtd.stats.s#L48
wtd.quantile <- function(x, weights=NULL, probs=c(0, .25, .5, .75, 1),
type=c('quantile','(i-1)/(n-1)','i/(n+1)','i/n'),
normwt=FALSE, na.rm=TRUE)
{
if(! length(weights))
return(stats::quantile(x, probs=probs, na.rm=na.rm))

type <- match.arg(type)
if(any(probs < 0 | probs > 1))
stop("Probabilities must be between 0 and 1 inclusive")

nams <- paste(format(round(probs * 100, if(length(probs) > 1)
2 - log10(diff(range(probs))) else 2)),
"%", sep = "")

i <- is.na(weights) | weights == 0
if(any(i)) {
x <- x[! i]
weights <- weights[! i]
}
if(type == 'quantile') {
w <- wtd.table(x, weights, na.rm=na.rm, normwt=normwt, type='list')
x <- w$x
wts <- w$sum.of.weights
n <- sum(wts)
order <- 1 + (n - 1) * probs
low <- pmax(floor(order), 1)
high <- pmin(low + 1, n)
order <- order %% 1
## Find low and high order statistics
## These are minimum values of x such that the cum. freqs >= c(low,high)
allq <- stats::approx(cumsum(wts), x, xout=c(low,high),
method='constant', f=1, rule=2)$y
k <- length(probs)
quantiles <- (1 - order)*allq[1:k] + order*allq[-(1:k)]
names(quantiles) <- nams
return(quantiles)
}
w <- wtd.Ecdf(x, weights, na.rm=na.rm, type=type, normwt=normwt)
structure(stats::approx(w$ecdf, w$x, xout=probs, rule=2)$y,
names=nams)
}


# Compute weighted empirical distribution function
# Source: https://github.com/harrelfe/Hmisc/blob/f80ecfb27e1deb88b3c6cc075c7252e3c577f66b/R/wtd.stats.s#L92
wtd.Ecdf <- function(x, weights=NULL,
type=c('i/n','(i-1)/(n-1)','i/(n+1)'),
normwt=FALSE, na.rm=TRUE)
{
type <- match.arg(type)
switch(type,
'(i-1)/(n-1)'={a <- b <- -1},
'i/(n+1)' ={a <- 0; b <- 1},
'i/n' ={a <- b <- 0})

if(! length(weights)) {
##.Options$digits <- 7 ## to get good resolution for names(table(x))
oldopt <- options('digits')
options(digits=7)
on.exit(options(oldopt))
cumu <- table(x) ## R does not give names for cumsum
isdate <- testDateTime(x) ## 31aug02
ax <- attributes(x)
ax$names <- NULL
x <- as.numeric(names(cumu))
if(isdate) attributes(x) <- c(attributes(x),ax)
cumu <- cumsum(cumu)
cdf <- (cumu + a)/(cumu[length(cumu)] + b)
if(cdf[1]>0) {
x <- c(x[1], x);
cdf <- c(0,cdf)
}

return(list(x = x, ecdf=cdf))
}

w <- wtd.table(x, weights, normwt=normwt, na.rm=na.rm)
cumu <- cumsum(w$sum.of.weights)
cdf <- (cumu + a)/(cumu[length(cumu)] + b)
list(x = c(if(cdf[1]>0) w$x[1], w$x), ecdf=c(if(cdf[1]>0)0, cdf))
}


# Compute weighted frequency table
# Source: https://github.com/harrelfe/Hmisc/blob/f80ecfb27e1deb88b3c6cc075c7252e3c577f66b/R/wtd.stats.s#L130
wtd.table <- function(x, weights=NULL, type=c('list','table'),
normwt=FALSE, na.rm=TRUE)
{
type <- match.arg(type)
if(! length(weights))
weights <- rep(1, length(x))

isdate <- testDateTime(x) ## 31aug02 + next 2
ax <- attributes(x)
ax$names <- NULL

if(is.character(x)) x <- as.factor(x)
lev <- levels(x)
x <- unclass(x)

if(na.rm) {
s <- ! is.na(x + weights)
x <- x[s, drop=FALSE] ## drop is for factor class
weights <- weights[s]
}

n <- length(x)
if(normwt)
weights <- weights * length(x) / sum(weights)

i <- order(x) # R does not preserve levels here
x <- x[i]; weights <- weights[i]

if(anyDuplicated(x)) { ## diff(x) == 0 faster but doesn't handle Inf
weights <- tapply(weights, x, sum)
if(length(lev)) {
levused <- lev[sort(unique(x))]
if((length(weights) > length(levused)) &&
any(is.na(weights)))
weights <- weights[! is.na(weights)]

if(length(weights) != length(levused))
stop('program logic error')

names(weights) <- levused
}

if(! length(names(weights)))
stop('program logic error')

if(type=='table')
return(weights)

x <- all.is.numeric(names(weights), 'vector')
if(isdate)
attributes(x) <- c(attributes(x),ax)

names(weights) <- NULL
return(list(x=x, sum.of.weights=weights))
}

xx <- x
if(isdate)
attributes(xx) <- c(attributes(xx),ax)

if(type=='list')
list(x=if(length(lev))lev[x]
else xx,
sum.of.weights=weights)
else {
names(weights) <- if(length(lev)) lev[x]
else xx
weights
}
}


# Determine if variable is a date, time, or date/time variable
# Source: https://github.com/harrelfe/Hmisc/blob/f80ecfb27e1deb88b3c6cc075c7252e3c577f66b/R/Misc.s#L1125
testDateTime <- function(x, what=c('either','both','timeVaries'))
{
what <- match.arg(what)
cl <- class(x)
if(!length(cl))
return(FALSE)

dc <- c('Date', 'POSIXt','POSIXct','dates','times','chron')

dtc <- c('POSIXt','POSIXct','chron')

switch(what,
either = any(cl %in% dc),
both = any(cl %in% dtc),
timeVaries = {
if('chron' %in% cl || 'Date' %in% cl) {
## chron or S+ timeDate
y <- as.numeric(x)
length(unique(round(y - floor(y),13))) > 1
}
else length(unique(format(x,'%H%M%S'))) > 1
})
}


# Check if all elements in a character vector are numeric
# Source: https://github.com/harrelfe/Hmisc/blob/f80ecfb27e1deb88b3c6cc075c7252e3c577f66b/R/Misc.s#L241
all.is.numeric <- function(x, what=c('test','vector','nonnum'),
extras=c('.','NA'))
{
what <- match.arg(what)
x <- sub('[[:space:]]+$', '', x)
x <- sub('^[[:space:]]+', '', x)
xs <- x[x %nin% c('', extras)]
if(! length(xs) || all(is.na(x)))
return(switch(what, test = FALSE, vector=x, nonnum=x[0]))
isnon <- suppressWarnings(! is.na(xs) & is.na(as.numeric(xs)))
isnum <- ! any(isnon)
# suppressWarnings below handles extras present in x
switch(what,
test = isnum,
vector = if(isnum) suppressWarnings(as.numeric(x)) else x,
nonnum = xs[isnon])
}


# Find matching (or non-matching) elements
# Source: https://github.com/harrelfe/Hmisc/blob/f80ecfb27e1deb88b3c6cc075c7252e3c577f66b/R/in.operator.s#L1
"%nin%" <- function(x, table) match(x, table, nomatch = 0) == 0
8 changes: 5 additions & 3 deletions R/linear_pool_quantile.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ linear_pool_quantile <- function(model_outputs, weights = NULL,
) |>
tidyr::unnest(.data$pred_qs) |>
dplyr::group_by(dplyr::across(dplyr::all_of(task_id_cols))) |>
dplyr::summarize(output_type_id = list(quantile_levels),
value = list(do.call(Hmisc::wtd.quantile, args = agg_args)),
.groups = "drop") |>
dplyr::summarize(
output_type_id = list(quantile_levels),
value = list(do.call("wtd.quantile", args = agg_args)),
.groups = "drop"
) |>
tidyr::unnest(cols = tidyselect::all_of(c("output_type_id", "value"))) |>
dplyr::mutate(model_id = model_id, .before = 1) |>
dplyr::mutate(output_type = "quantile", .before = .data$output_type_id) |>
Expand Down
1 change: 0 additions & 1 deletion tests/testthat/test-linear_pool.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
library(Hmisc)
library(distfromq)
library(matrixStats)
library(dplyr)
Expand Down
Loading