stan-dev · avehtari · Apr 10, 2024 · Apr 10, 2024 · Apr 10, 2024 · Apr 10, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -80,7 +80,8 @@ Suggests:
     doParallel,
     future,
     future.callr,
-    doFuture
+    doFuture,
+    progressr
 LinkingTo: Rcpp, RcppArmadillo
 Additional_repositories:
     https://mc-stan.org/r-packages/

diff --git a/R/cv_varsel.R b/R/cv_varsel.R
diff --git a/R/divergence_minimizers.R b/R/divergence_minimizers.R
@@ -91,18 +91,25 @@ divmin <- function(
     if (!requireNamespace("iterators", quietly = TRUE)) {
       stop("Please install the 'iterators' package.")
     }
+    if (verbose_divmin && use_progressr()) {
+      progressor_obj <- progressr::progressor(length(formulas))
+    } else {
+      progressor_obj <- NULL
+    }
     dot_args <- list(...)
     `%do_projpred%` <- foreach::`%dopar%`
     outdmin <- foreach::foreach(
       formula_s = formulas,
       projpred_var_s = iterators::iter(projpred_var, by = "column"),
       projpred_formula_no_random_s = projpred_formulas_no_random,
-      .export = c("sdivmin", "projpred_random", "dot_args"),
+      .packages = c("projpred"),
+      .export = c("sdivmin", "projpred_random", "dot_args", "progressor_obj"),
       .noexport = c(
         "object", "p_sel", "search_path", "p_ref", "refmodel", "formulas",
         "projpred_var", "projpred_ws_aug", "projpred_formulas_no_random"
       )
     ) %do_projpred% {
+      if (!is.null(progressor_obj)) progressor_obj()
       mssgs_warns_capt <- capt_mssgs_warns(
         soutdmin <- do.call(
           sdivmin,
@@ -649,19 +656,26 @@ divmin_augdat <- function(
     if (!requireNamespace("iterators", quietly = TRUE)) {
       stop("Please install the 'iterators' package.")
     }
+    if (verbose_divmin && use_progressr()) {
+      progressor_obj <- progressr::progressor(ncol(projpred_ws_aug))
+    } else {
+      progressor_obj <- NULL
+    }
     dot_args <- list(...)
     `%do_projpred%` <- foreach::`%dopar%`
     outdmin <- foreach::foreach(
       projpred_w_aug_s = iterators::iter(projpred_ws_aug, by = "column"),
+      .packages = c("projpred"),
       .export = c(
         "sdivmin", "formula", "data", "family", "projpred_formula_no_random",
-        "projpred_random", "dot_args"
+        "projpred_random", "dot_args", "progressor_obj"
       ),
       .noexport = c(
         "object", "p_sel", "search_path", "p_ref", "refmodel", "projpred_var",
         "projpred_ws_aug", "linkobjs"
       )
     ) %do_projpred% {
+      if (!is.null(progressor_obj)) progressor_obj()
       mssgs_warns_capt <- capt_mssgs_warns(
         soutdmin <- do.call(
           sdivmin,

diff --git a/R/glmfun.R b/R/glmfun.R
@@ -16,7 +16,7 @@ standardization <- function(x, center = TRUE, scale = TRUE, weights = NULL) {
     mx <- rep(0, ncol(x))
   }
   if (scale) {
-    sx <- apply(x, 2, weighted.sd, w)
+    sx <- apply(x, 2, .weighted_sd, w)
   } else {
     sx <- rep(1, ncol(x))
   }

diff --git a/R/methods.R b/R/methods.R
@@ -722,7 +722,7 @@ plot.vsel <- function(
   # Parse input:
   object <- x
   validate_vsel_object_stats(object, stats, resp_oscale = resp_oscale)
-  baseline <- validate_baseline(object$refmodel, baseline, deltas)
+  baseline <- validate_baseline(object, baseline, deltas)
   if (!is.null(ranking_repel) && !requireNamespace("ggrepel", quietly = TRUE)) {
     warning("Package 'ggrepel' is needed for a non-`NULL` argument ",
             "`ranking_repel`, but could not be found. Setting `ranking_repel` ",
@@ -1065,11 +1065,11 @@ plot.vsel <- function(
     #                        direction = 1)
     ###
   }
-  if (all(stats %in% c("rmse", "auc"))) {
+  if (all(stats %in% c("auc"))) {
     ci_type <- "bootstrap "
   } else if (all(stats %in% c("gmpd"))) {
     ci_type <- "exponentiated normal-approximation "
-  } else if (all(!stats %in% c("rmse", "auc", "gmpd"))) {
+  } else if (all(!stats %in% c("auc", "gmpd"))) {
     ci_type <- "normal-approximation "
   } else {
     ci_type <- ""
@@ -1158,23 +1158,23 @@ plot.vsel <- function(
 #'   are again all observations because the test set is the same as the training
 #'   set). Available statistics are:
 #'   * `"elpd"`: expected log (pointwise) predictive density (for a new
-#'   dataset). Estimated by the sum of the observation-specific log predictive
-#'   density values (with each of these predictive density values being
-#'   a---possibly weighted---average across the parameter draws).
-#'   * `"mlpd"`: mean log predictive density, that is, `"elpd"` divided by the
-#'   number of observations.
+#'   dataset) (ELPD). Estimated by the sum of the observation-specific log
+#'   predictive density values (with each of these predictive density values
+#'   being a---possibly weighted---average across the parameter draws).
+#'   * `"mlpd"`: mean log predictive density (MLPD), that is, the ELPD divided
+#'   by the number of observations.
 #'   * `"gmpd"`: geometric mean predictive density (GMPD), that is, [exp()] of
-#'   `"mlpd"`. The GMPD is especially helpful for discrete response families
+#'   the MLPD. The GMPD is especially helpful for discrete response families
 #'   (because there, the GMPD is bounded by zero and one). For the corresponding
 #'   standard error, the delta method is used. The corresponding confidence
 #'   interval type is "exponentiated normal approximation" because the
 #'   confidence interval bounds are the exponentiated confidence interval bounds
-#'   of the `"mlpd"`.
+#'   of the MLPD.
 #'   * `"mse"`: mean squared error (only available in the situations mentioned
 #'   in section "Details" below).
 #'   * `"rmse"`: root mean squared error (only available in the situations
 #'   mentioned in section "Details" below). For the corresponding standard error
-#'   and lower and upper confidence interval bounds, bootstrapping is used.
+#'   and lower and upper confidence interval bounds, the delta method is used.
 #'   * `"acc"` (or its alias, `"pctcorr"`): classification accuracy (only
 #'   available in the situations mentioned in section "Details" below). By
 #'   "classification accuracy", we mean the proportion of correctly classified
@@ -1283,7 +1283,7 @@ summary.vsel <- function(
     ...
 ) {
   validate_vsel_object_stats(object, stats, resp_oscale = resp_oscale)
-  baseline <- validate_baseline(object$refmodel, baseline, deltas)
+  baseline <- validate_baseline(object, baseline, deltas)
 
   # Initialize output:
   out <- c(

diff --git a/R/misc.R b/R/misc.R
@@ -1,8 +1,8 @@
 .onAttach <- function(...) {
   ver <- utils::packageVersion("projpred")
   msg <- paste0("This is projpred version ", ver, ".")
-  msg <- paste0(msg, " ", "NOTE: In projpred 2.7.0, the default search method ",
-                "was set to \"forward\" (for all kinds of models).")
+  msg <- paste0(msg, "\n", "NOTE: In projpred 2.7.0, the default search ",
+                "method was set to \"forward\" (for all kinds of models).")
   packageStartupMessage(msg)
 }
 
@@ -14,7 +14,7 @@ nms_y_wobs_test <- function(wobs_nm = "wobs") {
   c("y", "y_oscale", wobs_nm)
 }
 
-weighted.sd <- function(x, w, na.rm = FALSE) {
+.weighted_sd <- function(x, w, na.rm = FALSE) {
   if (na.rm) {
     ind <- !is.na(w) & !is.na(x)
     n <- sum(ind)
@@ -63,10 +63,10 @@ ilinkfun_raw <- function(x, link_nm) {
   return(basic_ilink(x))
 }
 
-auc <- function(x) {
+.auc <- function(x) {
   resp <- x[, 1]
   pred <- x[, 2]
-  wcv <- x[, 3]
+  wobs <- x[, 3]
 
   # Make it explicit that `x` should not be used anymore (due to the possibility
   # of `NA`s, but also due to the re-ordering):
@@ -77,9 +77,9 @@ auc <- function(x) {
 
   resp <- resp[ord]
   pred <- pred[ord]
-  wcv <- wcv[ord]
+  wobs <- wobs[ord]
 
-  w0 <- w1 <- wcv
+  w0 <- w1 <- wobs
   # CAUTION: The following check also ensures that `resp` does not have `NA`s:
   stopifnot(all(resp %in% c(0, 1)))
   w0[resp == 1] <- 0 # for calculating the false positive rate (fpr)
@@ -152,8 +152,8 @@ validate_vsel_object_stats <- function(object, stats, resp_oscale = TRUE) {
   }
   resp_oscale <- object$refmodel$family$for_latent && resp_oscale
 
-  trad_stats <- c("elpd", "mlpd", "gmpd", "mse", "rmse", "acc", "pctcorr",
-                  "auc")
+  trad_stats <- c("elpd", "mlpd", "gmpd", "mse", "rmse", "R2",
+                  "acc", "pctcorr", "auc")
   trad_stats_binom_only <- c("acc", "pctcorr", "auc")
   augdat_stats <- c("elpd", "mlpd", "gmpd", "acc", "pctcorr")
   resp_oscale_stats_fac <- augdat_stats
@@ -196,17 +196,22 @@ validate_vsel_object_stats <- function(object, stats, resp_oscale = TRUE) {
   return(invisible(TRUE))
 }
 
-validate_baseline <- function(refmodel, baseline, deltas) {
+validate_baseline <- function(vsel_obj, baseline, deltas) {
   stopifnot(!is.null(baseline))
   if (!(baseline %in% c("ref", "best"))) {
     stop("Argument 'baseline' must be either 'ref' or 'best'.")
   }
-  if (baseline == "ref" && deltas == TRUE && inherits(refmodel, "datafit")) {
+  if (baseline == "ref" && deltas == TRUE &&
+      inherits(vsel_obj$refmodel, "datafit")) {
     # no reference model (or the results missing for some other reason),
     # so cannot compute differences (or ratios) vs. the reference model
     stop("Cannot use deltas = TRUE and baseline = 'ref' when there is no ",
          "reference model.")
   }
+  if (baseline == "best" && vsel_obj$cv_method == "LOO" &&
+      vsel_obj$nloo < vsel_obj$refmodel$nobs) {
+    stop("Cannot use `baseline = \"best\"` in case of subsampled LOO-CV.")
+  }
   return(baseline)
 }
 
@@ -705,3 +710,8 @@ element_unq <- function(list_obj, nm) {
   }
   return(el_unq)
 }
+
+use_progressr <- function() {
+  getOption("projpred.use_progressr",
+            requireNamespace("progressr", quietly = TRUE) && interactive())
+}