diff --git a/.Rbuildignore b/.Rbuildignore index 5999d52..76a4088 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ .travis.yml README.org -build.sh \ No newline at end of file +build.sh +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..28a7ba4 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,57 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-tinytex@v2 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - name: Install tinytex and LaTeX packages + run: | + install.packages("tinytex") + tinytex:::install_yihui_pkgs() + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index fa3b4eb..0000000 --- a/.travis.yml +++ /dev/null @@ -1,24 +0,0 @@ -language: r - -sudo: required - -apt_packages: - - ghostscript - -notifications: - email: - on_success: change - on_failure: change - -# r_packages: -# - ggplot2 -# - directlabels - -# DNAcopy not used but needed for checks. -bioc_packages: - - DNAcopy - -# r_github_packages: -# - tdhock/coseg -# - tdhock/PeakSegDP -# - tdhock/PeakError diff --git a/DESCRIPTION b/DESCRIPTION index 40d3075..e3dfd0b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: penaltyLearning Maintainer: Toby Dylan Hocking Author: Toby Dylan Hocking -Version: 2021.4.21 +Version: 2024.1.25 License: GPL-3 Title: Penalty Learning Description: Implementations of algorithms from diff --git a/NEWS b/NEWS index 612e9ae..e31166e 100644 --- a/NEWS +++ b/NEWS @@ -1,221 +1,139 @@ -TODOs +Changes in version 2024.1.25 -tests/support for flsa and HMM? +- now ok to have length(models.vars)>1 (was an un-informative error 'length = 2' in coercion to 'logical(1)' in recent versions of R). -- option to not stop in labelError if there is the same number of - changes in two models. +oChanges in version 2023.8.31 -2021.4.21 +- update un-exported fun arg docs to avoid CRAN NOTE. -Stop with an error for non-finite predictions. +Changes in version 2021.4.21 -2021.1.19 PR#19 +- Stop with an error for non-finite predictions. -reduce repetition in ROChange aum computation. +Changes in version 2019.12.3 -aum.grad has a row with zeros even for problems with no change in -error rates (previously there was no row). +- test/fix modelSelection for non-monotonic sequences of loss values. -2020.5.13 PR#14 +Changes in version 2019.11.19 -Remove dependency on Segmentor3IsBack, which was removed from CRAN in -May 2020. +- labelError is OK with model columns that are missing. -ROC curve reversed if necessary in AUC computation. i.e. ROChange no -longer assumes that small thresholds mean lots of fp/tp. +Changes in version 2019.10.10 -no longer need geometry for AUC computation. +- stop with an error for IntervalRegressionCV(., unlogged.outputs). +- new args for IntervalRegressionCV including LAPPLY which defaults to future.apply::future_lapply but can be set to base::lapply for debugging. +- new notConverging data set and test. +- smaller crit before restarting with a larger Lipschitz in IntervalRegressionCV. -ROChange returns aum (Area Under Min{fp,fn}) and aum.grad -(directional derivatives) for predicted values. +Changes in version 2019.5.16 -ROC curve thresholds sorted increasing rather than decreasing. +- non-strict equality in while(crossing point >= previous breakpoint) to avoid zero-length intervals. +- additional tests for modelSelectionFwd. -2019.12.3 +Changes in version 2019.05.15 -test/fix modelSelection for non-monotonic sequences of loss values. +- Use modelSelectionFwd C algo for modelSelectionC R function. +- Fix featureMatrix/labelError/ROChange argument checks, if(logical vector length bigger than 1) was used and is now being checked in R-3.6.0. -2019.11.19 +Changes in version 2019.05.03 -labelError is OK with model columns that are missing. +- modelSelectionFwd and modelSelectionQuadratic. -2019.10.10 +Changes in version 2019.04.18 -stop with an error for IntervalRegressionCV(., unlogged.outputs). +- IntervalRegressionCV: informative reg.type undefined error. -new args for IntervalRegressionCV including LAPPLY which defaults to -future.apply::future_lapply but can be set to base::lapply for -debugging. +Changes in version 2019.02.28 -new notConverging data set and test. +- set last_lambda=0 when popping. -smaller crit before restarting with a larger Lipschitz in -IntervalRegressionCV. +Changes in version 2019.02.27 -2019.5.29 PR#13 +- import rather than Depend data.table -ROChange: only return one threshold=="prediction" row, even if the -predicted threshold=0 happens to be exactly on the border between two -models. +Changes in version 2018.10.23 -2019.5.28 PR#12 +- IntervalRegression* stops with an informative error if there are no upper/lower limits. +- Remove Remotes/Travis deps. +- ROChange now works when there are problems with no thresholds, e.g. the FPR/TPR does not change at all when varying the penalty from -ROChange: informative error / test when there is more than one -prediction per problem. +Changes in version 2018.09.24 -2019.5.16 +- labelError stops for unrecognized annotations. -non-strict equality in while(crossing point >= previous breakpoint) to -avoid zero-length intervals. +Changes in version 2018.09.04 -additional tests for modelSelectionFwd. +- use future.apply::future_lapply. -2019.05.15 +Changes in version 2017.12.08 -Use modelSelectionFwd C algo for modelSelectionC R function. +- remove vignette to pass CRAN check. -Fix featureMatrix/labelError/ROChange argument checks, if(logical vector length -bigger than 1) was used and is now being checked in R-3.6.0. +Changes in version 2017.11.17 -2019.05.03 +- In vignette, remove cghseg since it has memory problems, use Segmentor instead, with trivial 1 segment model when Segmentor fails. +- Remove cghseg from example(modelSelectionC). +- Don't use fullpage in vignette because that causes a NOTE on CRAN mac. -modelSelectionFwd and modelSelectionQuadratic. +Changes in version 2017.07.12 -2019.04.18 +- try to fix vignette by using cghseg:::segmeanCO instead of Segmentor. -IntervalRegressionCV: informative reg.type undefined error. +Changes in version 2017.07.11 -2019.02.28 +- there is some problem with Segmentor3IsBack on windows, which crashes our vignette re-building in CRAN checks on solaris... not sure why but try to fix via adding tryCatch in vignette. +- Add ... passed from IntervalRegressionCV to IntervalRegressionRegularized. -set last_lambda=0 when popping. +Changes in version 2017.06.14 -2019.02.27 +- labelError bugfix and test case for no predicted changes. +- Simplify examples -- avoid running Segmentor since this crashes on new versions of R on windows. -import rather than Depend data.table +Changes in version 2017.05.08 -2018.10.23 +- IntervalRegressionCV uses future instead of foreach. -IntervalRegression* stops with an informative error if there are no -upper/lower limits. +Changes in version 2017.05.05 -Remove Remotes/Travis deps. +- corrections encountered while preparing tutorial, +- - theme_no_space() evaluated at runtime rather than theme_no_space which was evaluated at build time. +- - stop with an error if there are models that have the same number of changes -- this prevents problems for changepoint models, but prevents using the code with L1 regularized models (fused lasso). +- - stop with an error in targetIntervals if the errors column is not numeric. And return an errors column (the minimum number of incorrect labels). -ROChange now works when there are problems with no thresholds, -e.g. the FPR/TPR does not change at all when varying the penalty from -0 to Inf. +Changes in version 2017.04.11 -2018.09.24 +- prepare for CRAN submission: - convert to src/*.cpp files and register routines. - NULL variables to avoid CRAN checks about global variables. - vignette. - many more user-friendly error messages. - coefficients of IntervalRegression models are now returned on the original scale. -labelError stops for unrecognized annotations. +Changes in version 2017.03.24 -2018.09.04 +- IntervalRegression S3 class with plot, print, and predict methods. +- largestContinuousMinimum C implementation. +- more informative error messages when arguments to R functions are not as expected. +- check for bigger/smaller data sets in ROChange and labelError. +- check for errors in C code and return with non-zero status. -use future.apply::future_lapply. +Changes in version 2017.01.31 -2017.12.08 +- labelError works when there are more models than labels, and gives an informative error when there are no corresponding models for a given label. -remove vignette to pass CRAN check. +Changes in version 2017.01.21 -2017.11.17 +- tests for peak model and for IntervalRegression functions. -In vignette, remove cghseg since it has memory problems, use Segmentor -instead, with trivial 1 segment model when Segmentor fails. +Changes in version 2017.01.20 -Remove cghseg from example(modelSelectionC). +- IntervalRegression* functions. -Don't use fullpage in vignette because that causes a NOTE on CRAN mac. +Changes in version 2017.01.17 -2017.07.12 +- labelError, targetIntervals, ROChange. -try to fix vignette by using cghseg:::segmeanCO instead of Segmentor. +Changes in version 2017.01.13 -2017.07.11 +- C solver for linear time modelSelection algorithm, interface via modelSelectionC function. +- modelSelectionR function with original quadratic time algorithm in R code. +- modelSelection which takes a data.frame as input instead of vectors, and uses modelSelectionC. -there is some problem with Segmentor3IsBack on windows, which crashes -our vignette re-building in CRAN checks on solaris... not sure why but -try to fix via adding tryCatch in vignette. +Changes in version 2017.01.12 -Add ... passed from IntervalRegressionCV to -IntervalRegressionRegularized. - -2017.06.14 - -labelError bugfix and test case for no predicted changes. - -Simplify examples -- avoid running Segmentor since this crashes on new -versions of R on windows. - -2017.05.08 - -IntervalRegressionCV uses future instead of foreach. - -2017.05.05 - -corrections encountered while preparing tutorial, - -- theme_no_space() evaluated at runtime rather than theme_no_space - which was evaluated at build time. - -- stop with an error if there are models that have the same number of - changes -- this prevents problems for changepoint models, but - prevents using the code with L1 regularized models (fused lasso). - -- stop with an error in targetIntervals if the errors column is not - numeric. And return an errors column (the minimum number of - incorrect labels). - -2017.04.11 - -prepare for CRAN submission: -- convert to src/*.cpp files and register routines. -- NULL variables to avoid CRAN checks about global variables. -- vignette. -- many more user-friendly error messages. -- coefficients of IntervalRegression models are - now returned on the original scale. - -2017.03.24 - -IntervalRegression S3 class with plot, print, and predict methods. - -largestContinuousMinimum C implementation. - -more informative error messages when arguments to R functions are not -as expected. - -check for bigger/smaller data sets in ROChange and labelError. - -check for errors in C code and return with non-zero status. - -2017.01.31 - -labelError works when there are more models than labels, and gives an -informative error when there are no corresponding models for a given -label. - -2017.01.21 - -tests for peak model and for IntervalRegression functions. - -2017.01.20 - -IntervalRegression* functions. - -2017.01.17 - -labelError, targetIntervals, ROChange. - -2017.01.13 - -C solver for linear time modelSelection algorithm, interface via -modelSelectionC function. - -modelSelectionR function with original quadratic time algorithm in R -code. - -modelSelection which takes a data.frame as input instead of vectors, -and uses modelSelectionC. - -2017.01.12 - -First version. \ No newline at end of file +- First version. \ No newline at end of file diff --git a/R/ROChange.R b/R/ROChange.R index 754f762..49342f3 100644 --- a/R/ROChange.R +++ b/R/ROChange.R @@ -124,6 +124,7 @@ ROChange <- structure(function # ROC curve for changepoints } } first.dt <- err[min.log.lambda==-Inf] + last.dt <- err[max.log.lambda==Inf] total.dt <- first.dt[, list( labels=sum(labels), possible.fp=sum(possible.fp), @@ -153,7 +154,7 @@ ROChange <- structure(function # ROC curve for changepoints min.thresh=c(-Inf, thresh), max.thresh=c(thresh, Inf), fp = cumsum(c(sum(first.dt$fp), fp.tot.diff)), - fn = cumsum(c(sum(first.dt$fn), fn.tot.diff)) + fn = rev(cumsum(rev(c(-fn.tot.diff, sum(last.dt$fn))))) )] ## Compute aum = area under min(fp,fn). fp.fn.totals[, min.fp.fn := pmin(fp, fn)] diff --git a/R/labelError.R b/R/labelError.R index 2e242b0..22dd301 100644 --- a/R/labelError.R +++ b/R/labelError.R @@ -90,9 +90,9 @@ labelError <- structure(function # Compute incorrect labels } if(!( is.character(model.vars) && - 0 < length(model.vars) && - model.vars %in% names(models) && - model.vars %in% names(changes) + 0 < length(model.vars) && + all(model.vars %in% names(models)) && + all(model.vars %in% names(changes)) )){ stop("model.vars should be a column name of both models and changes (ID for model complexity, typically the number of changepoints or segments)") } diff --git a/R/targetIntervals.R b/R/targetIntervals.R index 20b8df8..d997ec3 100644 --- a/R/targetIntervals.R +++ b/R/targetIntervals.R @@ -215,6 +215,8 @@ targetIntervals <- structure(function # Compute target intervals ### IntervalRegression* functions. }, ex=function(){ + data.table::setDTthreads(1) + library(penaltyLearning) data(neuroblastomaProcessed, envir=environment()) targets.dt <- targetIntervals( diff --git a/README.org b/README.org index 4519f95..69e16f4 100644 --- a/README.org +++ b/README.org @@ -1,6 +1,7 @@ -Algorithms for supervised learning of penalty functions for change detection +Algorithms for supervised learning of penalty functions for +change-point detection -[[https://travis-ci.org/tdhock/penaltyLearning][https://travis-ci.org/tdhock/penaltyLearning.png?branch=master]] +[[https://github.com/tdhock/penaltyLearning/actions][https://github.com/tdhock/penaltyLearning/workflows/R-CMD-check/badge.svg]] This R package provides a practical implementation of the algorithms described in our ICML'13 paper, [[http://jmlr.org/proceedings/papers/v28/hocking13.html][Learning Sparse Penalties for diff --git a/man/changeLabel.Rd b/man/changeLabel.Rd index 6d6ad06..c50c486 100644 --- a/man/changeLabel.Rd +++ b/man/changeLabel.Rd @@ -6,14 +6,10 @@ min.changes, max.changes, color)} \arguments{ - \item{annotation}{ -} - \item{min.changes}{ -} - \item{max.changes}{ -} - \item{color}{ -} + \item{annotation}{annotation } + \item{min.changes}{min.changes } + \item{max.changes}{max.changes } + \item{color}{color } } diff --git a/man/check_target_pred.Rd b/man/check_target_pred.Rd index c92fe22..96c6b5f 100644 --- a/man/check_target_pred.Rd +++ b/man/check_target_pred.Rd @@ -6,10 +6,8 @@ target matrix or predicted values.} \usage{check_target_pred(target.mat, pred)} \arguments{ - \item{target.mat}{ -} - \item{pred}{ -} + \item{target.mat}{target.mat } + \item{pred}{pred } } \value{number of observations.} diff --git a/man/coef.IntervalRegression.Rd b/man/coef.IntervalRegression.Rd index 4383f39..d4757c2 100644 --- a/man/coef.IntervalRegression.Rd +++ b/man/coef.IntervalRegression.Rd @@ -5,10 +5,8 @@ \usage{\method{coef}{IntervalRegression}(object, ...)} \arguments{ - \item{object}{ -} - \item{\dots}{ -} + \item{object}{object } + \item{\dots}{\dots } } \value{numeric matrix [features x regularizations] of learned weights (on diff --git a/man/geom_tallrect.Rd b/man/geom_tallrect.Rd index 91027df..caeb10a 100644 --- a/man/geom_tallrect.Rd +++ b/man/geom_tallrect.Rd @@ -10,22 +10,14 @@ y range, useful for clickSelects background elements.} show.legend = NA, inherit.aes = TRUE)} \arguments{ - \item{mapping}{ -} - \item{data}{ -} - \item{stat}{ -} - \item{position}{ -} - \item{\dots}{ -} - \item{na.rm}{ -} - \item{show.legend}{ -} - \item{inherit.aes}{ -} + \item{mapping}{mapping } + \item{data}{data } + \item{stat}{stat } + \item{position}{position } + \item{\dots}{\dots } + \item{na.rm}{na.rm } + \item{show.legend}{show.legend } + \item{inherit.aes}{inherit.aes } } diff --git a/man/plot.IntervalRegression.Rd b/man/plot.IntervalRegression.Rd index 8a2dd55..b8fe09d 100644 --- a/man/plot.IntervalRegression.Rd +++ b/man/plot.IntervalRegression.Rd @@ -5,10 +5,8 @@ \usage{\method{plot}{IntervalRegression}(x, ...)} \arguments{ - \item{x}{ -} - \item{\dots}{ -} + \item{x}{x } + \item{\dots}{\dots } } \value{a ggplot.} diff --git a/man/predict.IntervalRegression.Rd b/man/predict.IntervalRegression.Rd index 59b6496..368a6a4 100644 --- a/man/predict.IntervalRegression.Rd +++ b/man/predict.IntervalRegression.Rd @@ -5,12 +5,9 @@ \usage{\method{predict}{IntervalRegression}(object, X, ...)} \arguments{ - \item{object}{ -} - \item{X}{ -} - \item{\dots}{ -} + \item{object}{object } + \item{X}{X } + \item{\dots}{\dots } } \value{numeric matrix of predicted log(penalty) values.} diff --git a/man/print.IntervalRegression.Rd b/man/print.IntervalRegression.Rd index 4a05be4..9e7036b 100644 --- a/man/print.IntervalRegression.Rd +++ b/man/print.IntervalRegression.Rd @@ -5,10 +5,8 @@ \usage{\method{print}{IntervalRegression}(x, ...)} \arguments{ - \item{x}{ -} - \item{\dots}{ -} + \item{x}{x } + \item{\dots}{\dots } } diff --git a/man/squared.hinge.Rd b/man/squared.hinge.Rd index 3b412ab..6c82372 100644 --- a/man/squared.hinge.Rd +++ b/man/squared.hinge.Rd @@ -4,10 +4,8 @@ \description{The squared hinge loss.} \usage{squared.hinge(x, e = 1)} \arguments{ - \item{x}{ -} - \item{e}{ -} + \item{x}{x } + \item{e}{e } } diff --git a/man/targetIntervals.Rd b/man/targetIntervals.Rd index d767530..b020a8e 100644 --- a/man/targetIntervals.Rd +++ b/man/targetIntervals.Rd @@ -27,6 +27,8 @@ IntervalRegression* functions.} \examples{ +data.table::setDTthreads(1) + library(penaltyLearning) data(neuroblastomaProcessed, envir=environment()) targets.dt <- targetIntervals( diff --git a/man/theme_no_space.Rd b/man/theme_no_space.Rd index deaa7c3..d262d5d 100644 --- a/man/theme_no_space.Rd +++ b/man/theme_no_space.Rd @@ -4,8 +4,7 @@ \description{ggplot2 theme element for no space between panels.} \usage{theme_no_space(...)} \arguments{ - \item{\dots}{ -} + \item{\dots}{\dots } } diff --git a/src/Makevars b/src/Makevars deleted file mode 100644 index ec71b7c..0000000 --- a/src/Makevars +++ /dev/null @@ -1 +0,0 @@ -CXX_STD = CXX11 diff --git a/tests/testthat.R b/tests/testthat.R index b0714f2..45f61b1 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,3 +1,2 @@ -if(require(testthat)){ - test_check("penaltyLearning") -} +data.table::setDTthreads(1) +if(require(testthat))test_check("penaltyLearning") diff --git a/tests/testthat/test-ROChange-aum.R b/tests/testthat/test-ROChange-aum.R index 522fe07..6cf3633 100644 --- a/tests/testthat/test-ROChange-aum.R +++ b/tests/testthat/test-ROChange-aum.R @@ -259,3 +259,33 @@ test_that("auc=2 for one error curve with one loop", { expect_equal(L$auc, 2) }) + +d <- function(min.log.lambda, fp, fn){ + data.table(min.log.lambda, fp, fn) +} +profile <- function(..., possible.fp, possible.fn, errors, labels){ + dt <- do.call(rbind, list(...)) + if(missing(possible.fp))possible.fp <- max(dt$fp) + if(missing(possible.fn))possible.fn <- max(dt$fn) + errors <- dt[, fp+fn] + if(missing(labels))labels <- max(errors) + dt[, data.table( + min.log.lambda, + max.log.lambda=c(min.log.lambda[-1], Inf), + fp, fn, errors, possible.fp, possible.fn, labels)] +} + +test_that("aum not -Inf", { + err <- profile( + d(-Inf, 0, 10), + d(2, 8/3, 8/3), + d(5, 10, 8/3), + d(7, 10, 25/3), + d(8, 5/3, 25/3), + d(9, 5/3, 8/3), + d(10, 10, 0)) + pred.dt <- data.table(problem=1, pred.log.lambda=0) + p <- data.table(problem=1, err) + roc.list <- penaltyLearning::ROChange(p, pred.dt, problem.vars="problem") + expect_true(all(roc.list$roc$fn >= 0)) +}) diff --git a/tests/testthat/test-labelError.R b/tests/testthat/test-labelError.R index 2165b6f..789ecc7 100644 --- a/tests/testthat/test-labelError.R +++ b/tests/testthat/test-labelError.R @@ -21,10 +21,12 @@ ann.trivial <- rbind( label("0changes", 2, 6)) models <- data.table( prob="five", + algo="opart", complexity=c(-1, -3, -5, -6)) changes <- data.table( prob="five", pos=c(1, 7, 1, 6, 17, 11), + algo="opart", complexity=c(-3, -5, -5, -6, -6, -6)) test_that("labelError throws informative errors", { expect_error({ @@ -71,6 +73,12 @@ test_that("labelError throws informative errors", { change.var="pos", model.vars="complexity") }, "label start must be less than end", fixed=TRUE) + expect_error({ + labelError(models, ann.trivial, changes, problem.vars="prob", + label.vars=c("start","end"), + change.var="pos", + model.vars=c("foo","complexity")) + }, "model.vars should be a column name of both models and changes (ID for model complexity, typically the number of changepoints or segments)", fixed=TRUE) }) trivial.list <- labelError( @@ -78,7 +86,7 @@ trivial.list <- labelError( problem.vars="prob", label.vars=c("start", "end"), change.var="pos", - model.vars="complexity") + model.vars=c("algo","complexity")) test_that("1 TP for complexity=-5, 2 errors for -6", { trivial.list$model.errors[, { expect_equal(complexity, c(-1, -3, -5, -6))