Skip to content

Commit

Permalink
Merge pull request #426 from fweber144/warn323
Browse files Browse the repository at this point in the history
Warning for issue #323
  • Loading branch information
fweber144 authored Jun 27, 2023
2 parents f8ec8fe + 8e05c19 commit 8ee79dd
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 8 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ If you read this from a place other than <https://mc-stan.org/projpred/news/inde
* If an L1 search selects an interaction term before all involved lower-order interaction terms (including main-effect terms) have been selected, the predictor ranking is now automatically modified so that the lower-order interaction terms come before this interaction term. A corresponding warning is thrown, which may be deactivated by setting the global option `projpred.warn_L1_interactions` to `FALSE`. Previously, beginning with version 2.5.0, only a warning was thrown and this only if an L1 search selected an interaction term before all involved *main-effect* terms had been selected. (GitHub: #420)
* Added a progress bar for `project()` (when using the built-in divergence minimizers). For this, `project()` has gained a new argument `verbose` which can also be controlled via the global option `projpred.verbose_project`. By default, the new progress bar is activated. (GitHub: #421)
* Added a new argument `parallel` to `cv_varsel()`. With `parallel = TRUE`, costly parts of **projpred**'s cross-validation (CV) can be run in parallel. See the documentation of that new argument (and section "Note" of `cv_varsel()`'s documentation) for details. (GitHub: #422)
* Throw a warning for issue #323 (for multilevel Gaussian models, the projection onto the full model can be instable). (GitHub: #426)

## Bug fixes

Expand Down
6 changes: 4 additions & 2 deletions R/cv_varsel.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,12 @@ cv_varsel.refmodel <- function(
}

refmodel <- object
nterms_all <- count_terms_in_formula(refmodel$formula) - 1L
# Parse arguments which also exist in varsel():
args <- parse_args_varsel(
refmodel = refmodel, method = method, refit_prj = refit_prj,
nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms
nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms,
nterms_all = nterms_all
)
method <- args$method
refit_prj <- args$refit_prj
Expand Down Expand Up @@ -260,7 +262,7 @@ cv_varsel.refmodel <- function(
y_wobs_test,
nobs_test = nrow(y_wobs_test),
summaries = sel_cv$summaries,
nterms_all = count_terms_in_formula(refmodel$formula) - 1L,
nterms_all,
nterms_max,
method,
cv_method,
Expand Down
12 changes: 12 additions & 0 deletions R/project.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,18 @@ project <- function(object, nterms = NULL, solution_terms = NULL,
nclusters <- 1
}

nterms_max <- max(nterms)
nterms_all <- count_terms_in_formula(refmodel$formula) - 1L
if (nterms_max == nterms_all &&
formula_contains_group_terms(refmodel$formula) &&
(refmodel$family$family == "gaussian" || refmodel$family$for_latent)) {
warning(
"In case of the Gaussian family (also in case of the latent projection) ",
"and multilevel terms, the projection onto the full model can be ",
"instable and even lead to an error, see GitHub issue #323."
)
}

## get the clustering or thinning
if (refit_prj) {
p_ref <- get_refdist(refmodel, ndraws = ndraws, nclusters = nclusters)
Expand Down
21 changes: 18 additions & 3 deletions R/varsel.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,13 @@ varsel.refmodel <- function(object, d_test = NULL, method = NULL,
}

refmodel <- object
nterms_all <- count_terms_in_formula(refmodel$formula) - 1L

# Parse arguments:
args <- parse_args_varsel(
refmodel = refmodel, method = method, refit_prj = refit_prj,
nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms
nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms,
nterms_all = nterms_all
)
method <- args$method
refit_prj <- args$refit_prj
Expand Down Expand Up @@ -379,7 +381,7 @@ varsel.refmodel <- function(object, d_test = NULL, method = NULL,
y_wobs_test,
nobs_test,
summaries = nlist(sub, ref),
nterms_all = count_terms_in_formula(refmodel$formula) - 1L,
nterms_all,
nterms_max,
method,
cv_method = NULL,
Expand Down Expand Up @@ -425,7 +427,7 @@ select <- function(method, p_sel, refmodel, nterms_max, penalty, verbose, opt,
# them in with the default values. The purpose of this function is to avoid
# repeating the same code both in varsel() and cv_varsel().
parse_args_varsel <- function(refmodel, method, refit_prj, nterms_max,
nclusters, search_terms) {
nclusters, search_terms, nterms_all) {
search_terms_was_null <- is.null(search_terms)
if (search_terms_was_null) {
search_terms <- split_formula(refmodel$formula,
Expand Down Expand Up @@ -486,5 +488,18 @@ parse_args_varsel <- function(refmodel, method, refit_prj, nterms_max,
}
nterms_max <- min(max_nv_possible, nterms_max)

if (nterms_max == nterms_all && has_group_features &&
(refmodel$family$family == "gaussian" || refmodel$family$for_latent)) {
warning(
"In case of the Gaussian family (also in case of the latent projection) ",
"and multilevel terms, the projection onto the full model can be ",
"instable and even lead to an error, see GitHub issue #323. If you ",
"experience this and may refrain from the projection onto the full ",
"model, set `nterms_max` to the number of predictor terms in the full ",
"model minus 1 (possibly accounting for submodel sizes skipped by ",
"custom `search_terms`)."
)
}

return(nlist(method, refit_prj, nterms_max, nclusters, search_terms))
}
8 changes: 5 additions & 3 deletions tests/testthat/test_varsel.R
Original file line number Diff line number Diff line change
Expand Up @@ -1105,9 +1105,11 @@ test_that("invalid `method` fails", {

test_that("invalid `cv_method` fails", {
for (tstsetup in names(refmods)) {
expect_error(cv_varsel(refmods[[tstsetup]], cv_method = "k-fold"),
"^Unknown `cv_method`\\.$",
info = tstsetup)
expect_error(
suppressWarnings(cv_varsel(refmods[[tstsetup]], cv_method = "k-fold")),
"^Unknown `cv_method`\\.$",
info = tstsetup
)
}
})

Expand Down

0 comments on commit 8ee79dd

Please sign in to comment.