diff --git a/.Rbuildignore b/.Rbuildignore index f89509e2..9d5e7202 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,4 +8,5 @@ ^\.Rproj\.user$ ^docu$ ^test.R$ -^backlog$ \ No newline at end of file +^backlog$ +^CRAN-SUBMISSION$ diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 00000000..4ba6c37c --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 0.1.0 +Date: 2023-07-16 18:48:26 UTC +SHA: 9638974266fcc5f32a51870bccf0d08bde810551 diff --git a/DESCRIPTION b/DESCRIPTION index f0dd27f0..7d92178e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,15 +3,16 @@ Title: Interaction Statistics Version: 0.1.0 Authors@R: person("Michael", "Mayer", , "mayermichael79@gmail.com", role = c("aut", "cre")) -Description: Fast, model-agnostic implementation of Friedman and Popescu's - H statistics of interaction strength . These - statistics quantify interaction strength per feature, feature pair, - and feature triple. The package supports multi-output predictions and - can account for case weights. In addition, several variants of the - original statistics are provided. The shape of the interactions can - be explored through partial dependence plots or individual conditional - expectation plots. 'DALEX' explainers, meta learners ('mlr3', 'tidymodels', - 'caret') and most other models work out-of-the-box. +Description: Fast, model-agnostic implementation of different H-statistics + introduced by Jerome H. Friedman and Bogdan E. Popescu (2008) + . These statistics quantify interaction + strength per feature, feature pair, and feature triple. The package + supports multi-output predictions and can account for case weights. + In addition, several variants of the original statistics are provided. + The shape of the interactions can be explored through partial + dependence plots or individual conditional expectation plots. 'DALEX' + explainers, meta learners ('mlr3', 'tidymodels', 'caret') and most + other models work out-of-the-box. License: GPL (>= 2) Depends: R (>= 3.2.0) diff --git a/R/H2_overall.R b/R/H2_overall.R index 8a32414d..94fefd29 100644 --- a/R/H2_overall.R +++ b/R/H2_overall.R @@ -1,6 +1,6 @@ #' Overall Interaction Strength #' -#' Friedman and Popescu's \eqn{H^2_j} statistics of overall interaction strength per +#' Friedman and Popescu's statistic of overall interaction strength per #' feature, see Details. #' By default, the results are plotted as barplot. Set `plot = FALSE` to get numbers. #' @@ -13,10 +13,10 @@ #' \deqn{ #' F(\mathbf{x}) = F_j(x_j) + F_{\setminus j}(\mathbf{x}_{\setminus j}). #' } -#' Correspondingly, Friedman and Popescu's \eqn{H^2_j} statistic of overall interaction +#' Correspondingly, Friedman and Popescu's statistic of overall interaction #' strength is given by #' \deqn{ -#' H_{j}^2 = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i) - +#' H_j^2 = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i) - #' \hat F_j(x_{ij}) - \hat F_{\setminus j}(\mathbf{x}_{i\setminus j}) #' \big]^2}{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i)\big]^2} #' } diff --git a/R/H2_pairwise.R b/R/H2_pairwise.R index 10bd10d7..1ef7368d 100644 --- a/R/H2_pairwise.R +++ b/R/H2_pairwise.R @@ -1,6 +1,6 @@ #' Pairwise Interaction Strength #' -#' Friedman and Popescu's statistics of pairwise interaction strength, see Details. +#' Friedman and Popescu's statistic of pairwise interaction strength, see Details. #' By default, the results are plotted as barplot. Set `plot = FALSE` to get numbers. #' #' @details @@ -11,7 +11,7 @@ #' \deqn{ #' F_{jk}(x_j, x_k) = F_j(x_j)+ F_k(x_k). #' } -#' Correspondingly, Friedman and Popescu's \eqn{H_{jk}^2} statistic of pairwise +#' Correspondingly, Friedman and Popescu's statistic of pairwise #' interaction strength is defined as #' \deqn{ #' H_{jk}^2 = \frac{A_{jk}}{\frac{1}{n} \sum_{i = 1}^n\big[\hat F_{jk}(x_{ij}, x_{ik})\big]^2}, diff --git a/R/hstats.R b/R/hstats.R index d1c5f920..b00f2bdf 100644 --- a/R/hstats.R +++ b/R/hstats.R @@ -2,14 +2,14 @@ #' #' @description #' This is the main function of the package. It does the expensive calculations behind -#' the following interaction statistics: +#' the following H-statistics: #' - Total interaction strength \eqn{H^2}, a statistic measuring the proportion of #' prediction variability unexplained by main effects of `v`, see [h2()] for details. -#' - Friedman and Popescu's \eqn{H^2_j} statistic of overall interaction strength per +#' - Friedman and Popescu's statistic \eqn{H^2_j} of overall interaction strength per #' feature, see [h2_overall()] for details. -#' - Friedman and Popescu's \eqn{H^2_{jk}} statistic of pairwise interaction strength, +#' - Friedman and Popescu's statistic \eqn{H^2_{jk}} of pairwise interaction strength, #' see [h2_pairwise()] for details. -#' - Friedman and Popescu's \eqn{H^2_{jkl}} statistic of three-way interaction strength, +#' - Friedman and Popescu's statistic \eqn{H^2_{jkl}} of three-way interaction strength, #' see [h2_threeway()] for details. #' #' Furthermore, it allows to calculate an experimental partial dependence based @@ -283,7 +283,7 @@ hstats.explainer <- function(object, v = colnames(object[["data"]]), #' Print Method #' -#' Print method for object of class "hstats". Shows \eqn{H^2} statistic. +#' Print method for object of class "hstats". Shows \eqn{H^2}. #' #' @param x An object of class "hstats". #' @param ... Further arguments passed from other methods. diff --git a/R/pd_importance.R b/R/pd_importance.R index c770aafc..e81042bf 100644 --- a/R/pd_importance.R +++ b/R/pd_importance.R @@ -2,8 +2,8 @@ #' #' Experimental variable importance method based on partial dependence functions. #' While related to Greenwell et al., our suggestion measures not only main effect -#' strength but also interaction effects. It is very closely related to the -#' \eqn{H^2_j} statistics, see Details. By default, the results are plotted as barplot. +#' strength but also interaction effects. It is very closely related to \eqn{H^2_j}, +#' see Details. By default, the results are plotted as barplot. #' Set `plot = FALSE` to get numbers. #' #' @details diff --git a/README.md b/README.md index 2e9e75a3..e1fa1302 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,6 @@ [![CRAN status](http://www.r-pkg.org/badges/version/hstats)](https://cran.r-project.org/package=hstats) [![R-CMD-check](https://github.com/mayer79/hstats/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mayer79/hstats/actions) [![Codecov test coverage](https://codecov.io/gh/mayer79/hstats/branch/main/graph/badge.svg)](https://app.codecov.io/gh/mayer79/hstats?branch=main) -[![Lifecycle: maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#experimental) [![](https://cranlogs.r-pkg.org/badges/hstats)](https://cran.r-project.org/package=hstats) [![](https://cranlogs.r-pkg.org/badges/grand-total/hstats?color=orange)](https://cran.r-project.org/package=hstats) @@ -16,7 +15,7 @@ **What makes a ML model black-box? It's the interactions!** -The first step in understanding interactions is to measure their strength. This is exactly what Friedman and Popescu's H statistics [1] do: +The first step in understanding interactions is to measure their strength. This is exactly what Friedman and Popescu's H-statistics [1] do: | Statistic | Short description | How to read its value? | |-------------|------------------------------------------|-------------------------------------------------------------------------------------------------------| @@ -32,8 +31,8 @@ The core functions `hstats()`, `partial_dep()`, and `ice()` can directly be appl ## Limitations -1. H statistics are based on partial dependence estimates and are thus as good or bad as these. One of their problems is that the model is applied to unseen/impossible feature combinations. In extreme cases, H statistics intended to be in the range between 0 and 1 can become larger than 1. Accumulated local effects (ALE) [8] mend above problem of partial dependence estimates. They, however, depend on the notion of "closeness", which is highly non-trivial in higher dimension and for discrete features. -2. Due to their computational complexity, H statistics are usually evaluated on relatively small subsets of the training (or validation/test) data. Consequently, the estimates are typically not very robust. To get more robust results, increase the default `n_max = 300` of `hstats()`. +1. H-statistics are based on partial dependence estimates and are thus as good or bad as these. One of their problems is that the model is applied to unseen/impossible feature combinations. In extreme cases, H-statistics intended to be in the range between 0 and 1 can become larger than 1. Accumulated local effects (ALE) [8] mend above problem of partial dependence estimates. They, however, depend on the notion of "closeness", which is highly non-trivial in higher dimension and for discrete features. +2. Due to their computational complexity, H-statistics are usually evaluated on relatively small subsets of the training (or validation/test) data. Consequently, the estimates are typically not very robust. To get more robust results, increase the default `n_max = 300` of `hstats()`. ## Landscape @@ -93,7 +92,7 @@ fit <- xgb.train( ### Interaction statistics -Let's calculate different H statistics via `hstats()`: +Let's calculate different H-statistics via `hstats()`: ```r # 3 seconds on simple laptop - a random forest will take 1-2 minutes @@ -122,7 +121,7 @@ plot(s) # Or summary(s) for numeric output **Remarks** 1. Pairwise statistics $H^2_{jk}$ are calculated only for the features with strong overall interactions $H^2_j$. -2. H statistics need to repeatedly calculate predictions on up to $n^2$ rows. That is why {hstats} samples 300 rows by default. To get more robust results, increase this value at the price of slower run time. +2. H-statistics need to repeatedly calculate predictions on up to $n^2$ rows. That is why {hstats} samples 300 rows by default. To get more robust results, increase this value at the price of slower run time. 3. Pairwise statistics $H^2_{jk}$ measures interaction strength relative to the combined effect of the two features. This does not necessarily show which interactions are strongest in absolute numbers. To do so, we can study unnormalized statistics: ```r @@ -261,7 +260,7 @@ $$ F(\boldsymbol x) = F_j(x_j) + F_{\setminus j}(\boldsymbol x_{\setminus j}). $$ -Correspondingly, Friedman and Popescu's $H^2_j$ statistic of overall interaction strength is given by +Correspondingly, Friedman and Popescu's statistic of overall interaction strength is given by $$ H_{j}^2 = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\boldsymbol x_i) - \hat F_j(x_{ij}) - \hat F_{\setminus j}(\boldsymbol x_{i\setminus j})\big]^2}{\frac{1}{n} \sum_{i = 1}^n\big[F(\boldsymbol x_i)\big]^2}. @@ -285,7 +284,7 @@ $$ F_{jk}(x_j, x_k) = F_j(x_j) + F_k(x_k). $$ -Correspondingly, Friedman and Popescu's $H_{jk}^2$ statistic of pairwise interaction strength is defined as +Correspondingly, Friedman and Popescu's statistic of pairwise interaction strength is defined as $$ H_{jk}^2 = \frac{A_{jk}}{\frac{1}{n} \sum_{i = 1}^n\big[\hat F_{jk}(x_{ij}, x_{ik})\big]^2} @@ -371,7 +370,7 @@ In [5], $1 - H^2$ is called *additivity index*. A similar measure using accumula #### Workflow -Calculation of all $H_j^2$ statistics requires $O(n^2 p)$ predictions, while calculating of all pairwise $H_{jk}$ requires $O(n^2 p^2$ predictions. Therefore, we suggest to reduce the workflow in two important ways: +Calculation of all $H_j^2$ requires $O(n^2 p)$ predictions, while calculating of all pairwise $H_{jk}$ requires $O(n^2 p^2$ predictions. Therefore, we suggest to reduce the workflow in two important ways: 1. Evaluate the statistics only on a subset of the data, e.g., on $n' = 300$ observations. 2. Calculate $H_j^2$ for all features. Then, select a small number $m = O(\sqrt{p})$ of features with highest $H^2_j$ and do pairwise calculations only on this subset. diff --git a/cran-comments.md b/cran-comments.md index 6e8e7760..8a885b2f 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,3 +1,10 @@ +# Resubmission + +- Fixing an indirect URL in the README +- Sticking to "authors (year) " reference in DESCRIPTION. + +# Original message + Hello CRAN team Trying to submit a new package that calculates Friedman and Popescu's H statistics in many variants. @@ -10,9 +17,6 @@ Michael ## Local checks seem ok -❯ checking for future file timestamps ... NOTE - unable to verify current time - ❯ checking HTML version of manual ... NOTE Skipping checking HTML validation: no command 'tidy' found @@ -21,7 +25,6 @@ Michael New submission Possibly misspelled words in DESCRIPTION: - Popescu's (6:66) explainers (13:32) ## Winbuilder seems ok diff --git a/docu/document.log b/docu/document.log index 4a3073bc..9c7387d6 100644 --- a/docu/document.log +++ b/docu/document.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.24 (MiKTeX 22.3) (preloaded format=pdflatex 2022.5.15) 16 JUL 2023 20:32 +This is pdfTeX, Version 3.141592653-2.6-1.40.24 (MiKTeX 22.3) (preloaded format=pdflatex 2022.5.15) 17 JUL 2023 21:26 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -49,10 +49,21 @@ LaTeX Font Info: External font `cmex10' loaded for size (Font) <7> on input line 10. LaTeX Font Info: External font `cmex10' loaded for size (Font) <5> on input line 10. - [1 -{C:/Users/Michael/AppData/Local/MiKTeX/2.9/fonts/map/pdftex/pdftex.map}] [2] [3 -] [4] (document.bbl) [5] (document.aux) ) +Overfull \hbox (1.00066pt too wide) in paragraph at lines 28--30 +\OT1/cmr/m/n/10 Correspondingly, Fried-man and Popescu's statis-tic of over-all + in-ter-ac-tion strength + [] + +[1 + +{C:/Users/Michael/AppData/Local/MiKTeX/2.9/fonts/map/pdftex/pdftex.map}] +Overfull \hbox (8.00066pt too wide) in paragraph at lines 48--50 +\OT1/cmr/m/n/10 Correspondingly, Fried-man and Popescu's statis-tic of pair-wis +e in-ter-ac-tion strength + [] + +[2] [3] [4] (document.bbl) [5] (document.aux) ) Here is how much of TeX's memory you used: 459 strings out of 478608 8568 string characters out of 2850693 @@ -60,22 +71,22 @@ Here is how much of TeX's memory you used: 18687 multiletter control sequences out of 15000+600000 472077 words of font info for 37 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 34i,6n,38p,423b,184s stack positions out of 10000i,1000n,20000p,200000b,80000s - -Output written on document.pdf (5 pages, 174976 bytes). + 34i,6n,38p,423b,182s stack positions out of 10000i,1000n,20000p,200000b,80000s + + +Output written on document.pdf (5 pages, 174950 bytes). PDF statistics: 88 PDF objects out of 1000 (max. 8388607) 0 named destinations out of 1000 (max. 500000) diff --git a/docu/document.synctex.gz b/docu/document.synctex.gz index dd7c22ba..1355d599 100644 Binary files a/docu/document.synctex.gz and b/docu/document.synctex.gz differ diff --git a/docu/document.tex b/docu/document.tex index 45403d22..952e50a6 100644 --- a/docu/document.tex +++ b/docu/document.tex @@ -26,7 +26,7 @@ \subsection{Overall interaction strength} $$ F(\mathbf{x}) = F_j(x_j) + F_{\setminus j}(\mathbf{x}_{\setminus j}). $$ -Correspondingly, Friedman and Popescu's $H^2_j$ statistic of overall interaction strength is given by +Correspondingly, Friedman and Popescu's statistic of overall interaction strength is given by $$ H_{j}^2 = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i) - \hat F_j(x_{ij}) - \hat F_{\setminus j}(\mathbf{x}_{i\setminus j})\big]^2}{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i)\big]^2}. $$ @@ -46,7 +46,7 @@ \subsection{Pairwise interaction strength} $$ F_{jk}(x_j, x_k) = F_j(x_j)+ F_k(x_k). $$ -Correspondingly, Friedman and Popescu's $H_{jk}^2$ statistic of pairwise interaction strength is defined as +Correspondingly, Friedman and Popescu's statistic of pairwise interaction strength is defined as $$ H_{jk}^2 = \frac{A_{jk}}{\frac{1}{n} \sum_{i = 1}^n\big[\hat F_{jk}(x_{ij}, x_{ik})\big]^2} $$ @@ -106,7 +106,7 @@ \subsection{Total interaction strength of all variables together} In \cite{zolkowski2023}, $1 - H^2$ is called {\em additivity index}. A similar measure using accumulated local effects is discussed in \cite{molnar2020}. \subsection{Workflow} -Calculation of all $H_j^2$ statistics requires $O(n^2p)$ predictions, while calculating of all pairwise $H_{jk}$ requires $O(n^2 p^2)$ predictions. Therefore, we suggest to reduce the workflow in two important ways: +Calculation of all $H_j^2$ requires $O(n^2p)$ predictions, while calculating of all pairwise $H_{jk}$ requires $O(n^2 p^2)$ predictions. Therefore, we suggest to reduce the workflow in two important ways: \begin{itemize} \item Evaluate the statistics only on a subset of the data, e.g., on $n' = 300$ observations. \item Calculate $H_j^2$ for all features. Then, select a small number $m = O(\sqrt{p})$ of features with highest $H^2_j$ and do pairwise calculations only on this subset. @@ -130,9 +130,9 @@ \section{Variable importance} \section{Limitation} \begin{enumerate} -\item H statistics are based on partial dependence estimates and are thus as good or bad as these. One of their problems is that the model is applied to unseen/impossible feature combinations. In extreme cases, H statistics intended to be in the range between 0 and 1 can become larger than 1. +\item H-statistics are based on partial dependence estimates and are thus as good or bad as these. One of their problems is that the model is applied to unseen/impossible feature combinations. In extreme cases, H-statistics intended to be in the range between 0 and 1 can become larger than 1. Accumulated local effects (ALE) \cite{apley2016} mend above problem of partial dependence estimates. They, however, depend on the notion of ``closeness'', which is highly non-trivial in higher dimension and for discrete features. -\item Due to their computational complexity, H statistics are usually evaluated on relatively small subsets of the training (or validation/test) data. Consequently, the estimates are typically not very robust. To get more robust results, increase the sample size. +\item Due to their computational complexity, H-statistics are usually evaluated on relatively small subsets of the training (or validation/test) data. Consequently, the estimates are typically not very robust. To get more robust results, increase the sample size. \end{enumerate} \bibliographystyle{ieeetr} diff --git a/man/H2_overall.Rd b/man/H2_overall.Rd index b8901160..118a1ceb 100644 --- a/man/H2_overall.Rd +++ b/man/H2_overall.Rd @@ -48,7 +48,7 @@ A matrix of statistics (one row per variable, one column per prediction dimensio or a "ggplot" object (if \code{plot = TRUE}). } \description{ -Friedman and Popescu's \eqn{H^2_j} statistics of overall interaction strength per +Friedman and Popescu's statistic of overall interaction strength per feature, see Details. By default, the results are plotted as barplot. Set \code{plot = FALSE} to get numbers. } @@ -61,10 +61,10 @@ dependence \eqn{F_j} on \eqn{x_j} and the (centered) partial dependence \deqn{ F(\mathbf{x}) = F_j(x_j) + F_{\setminus j}(\mathbf{x}_{\setminus j}). } -Correspondingly, Friedman and Popescu's \eqn{H^2_j} statistic of overall interaction +Correspondingly, Friedman and Popescu's statistic of overall interaction strength is given by \deqn{ - H_{j}^2 = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i) - + H_j^2 = \frac{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i) - \hat F_j(x_{ij}) - \hat F_{\setminus j}(\mathbf{x}_{i\setminus j}) \big]^2}{\frac{1}{n} \sum_{i = 1}^n\big[F(\mathbf{x}_i)\big]^2} } diff --git a/man/H2_pairwise.Rd b/man/H2_pairwise.Rd index 29181011..afecdcfa 100644 --- a/man/H2_pairwise.Rd +++ b/man/H2_pairwise.Rd @@ -49,7 +49,7 @@ or a "ggplot" object (if \code{plot = TRUE}). If no pairwise statistics have been calculated, the function returns \code{NULL}. } \description{ -Friedman and Popescu's statistics of pairwise interaction strength, see Details. +Friedman and Popescu's statistic of pairwise interaction strength, see Details. By default, the results are plotted as barplot. Set \code{plot = FALSE} to get numbers. } \details{ @@ -60,7 +60,7 @@ dependencies \eqn{F_j} and \eqn{F_k}, i.e., \deqn{ F_{jk}(x_j, x_k) = F_j(x_j)+ F_k(x_k). } -Correspondingly, Friedman and Popescu's \eqn{H_{jk}^2} statistic of pairwise +Correspondingly, Friedman and Popescu's statistic of pairwise interaction strength is defined as \deqn{ H_{jk}^2 = \frac{A_{jk}}{\frac{1}{n} \sum_{i = 1}^n\big[\hat F_{jk}(x_{ij}, x_{ik})\big]^2}, diff --git a/man/hstats.Rd b/man/hstats.Rd index 74160465..3ed1da63 100644 --- a/man/hstats.Rd +++ b/man/hstats.Rd @@ -126,15 +126,15 @@ partial dependence functions \eqn{F_{jkl}}. } \description{ This is the main function of the package. It does the expensive calculations behind -the following interaction statistics: +the following H-statistics: \itemize{ \item Total interaction strength \eqn{H^2}, a statistic measuring the proportion of prediction variability unexplained by main effects of \code{v}, see \code{\link[=h2]{h2()}} for details. -\item Friedman and Popescu's \eqn{H^2_j} statistic of overall interaction strength per +\item Friedman and Popescu's statistic \eqn{H^2_j} of overall interaction strength per feature, see \code{\link[=h2_overall]{h2_overall()}} for details. -\item Friedman and Popescu's \eqn{H^2_{jk}} statistic of pairwise interaction strength, +\item Friedman and Popescu's statistic \eqn{H^2_{jk}} of pairwise interaction strength, see \code{\link[=h2_pairwise]{h2_pairwise()}} for details. -\item Friedman and Popescu's \eqn{H^2_{jkl}} statistic of three-way interaction strength, +\item Friedman and Popescu's statistic \eqn{H^2_{jkl}} of three-way interaction strength, see \code{\link[=h2_threeway]{h2_threeway()}} for details. } diff --git a/man/pd_importance.Rd b/man/pd_importance.Rd index 59006ea3..47613648 100644 --- a/man/pd_importance.Rd +++ b/man/pd_importance.Rd @@ -50,8 +50,8 @@ or a "ggplot" object (if \code{plot = TRUE}). \description{ Experimental variable importance method based on partial dependence functions. While related to Greenwell et al., our suggestion measures not only main effect -strength but also interaction effects. It is very closely related to the -\eqn{H^2_j} statistics, see Details. By default, the results are plotted as barplot. +strength but also interaction effects. It is very closely related to \eqn{H^2_j}, +see Details. By default, the results are plotted as barplot. Set \code{plot = FALSE} to get numbers. } \details{ diff --git a/man/print.hstats.Rd b/man/print.hstats.Rd index 40f42ec1..0020ed45 100644 --- a/man/print.hstats.Rd +++ b/man/print.hstats.Rd @@ -15,7 +15,7 @@ Invisibly, the input is returned. } \description{ -Print method for object of class "hstats". Shows \eqn{H^2} statistic. +Print method for object of class "hstats". Shows \eqn{H^2}. } \seealso{ See \code{\link[=hstats]{hstats()}} for examples. diff --git a/packaging.R b/packaging.R index 84524fee..3929b07f 100644 --- a/packaging.R +++ b/packaging.R @@ -16,8 +16,8 @@ use_description( fields = list( Title = "Interaction Statistics", Version = "0.1.0", - Description = "Fast, model-agnostic implementation of Friedman and Popescu's - H statistics of interaction strength . + Description = "Fast, model-agnostic implementation of different H-statistics + introduced by Jerome H. Friedman and Bogdan E. Popescu (2008) . These statistics quantify interaction strength per feature, feature pair, and feature triple. The package supports multi-output predictions and can account for case weights.