From be7d3d7d07313151241dcb3f8e5a48f89d9e8c05 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Mon, 19 Jun 2023 09:21:55 +0200 Subject: [PATCH 1/4] Add a warning for issue #323 to `varsel()` and `cv_varsel()`. --- R/cv_varsel.R | 6 ++++-- R/varsel.R | 21 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index c265d3c09..424fb578c 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -173,10 +173,12 @@ cv_varsel.refmodel <- function( } refmodel <- object + nterms_all <- count_terms_in_formula(refmodel$formula) - 1L # Parse arguments which also exist in varsel(): args <- parse_args_varsel( refmodel = refmodel, method = method, refit_prj = refit_prj, - nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms + nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms, + nterms_all = nterms_all ) method <- args$method refit_prj <- args$refit_prj @@ -260,7 +262,7 @@ cv_varsel.refmodel <- function( y_wobs_test, nobs_test = nrow(y_wobs_test), summaries = sel_cv$summaries, - nterms_all = count_terms_in_formula(refmodel$formula) - 1L, + nterms_all, nterms_max, method, cv_method, diff --git a/R/varsel.R b/R/varsel.R index 946c1841a..6d7541d9c 100644 --- a/R/varsel.R +++ b/R/varsel.R @@ -213,11 +213,13 @@ varsel.refmodel <- function(object, d_test = NULL, method = NULL, } refmodel <- object + nterms_all <- count_terms_in_formula(refmodel$formula) - 1L # Parse arguments: args <- parse_args_varsel( refmodel = refmodel, method = method, refit_prj = refit_prj, - nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms + nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms, + nterms_all = nterms_all ) method <- args$method refit_prj <- args$refit_prj @@ -379,7 +381,7 @@ varsel.refmodel <- function(object, d_test = NULL, method = NULL, y_wobs_test, nobs_test, summaries = nlist(sub, ref), - nterms_all = count_terms_in_formula(refmodel$formula) - 1L, + nterms_all, nterms_max, method, cv_method = NULL, @@ -425,7 +427,7 @@ select <- function(method, p_sel, refmodel, nterms_max, penalty, verbose, opt, # them in with the default values. The purpose of this function is to avoid # repeating the same code both in varsel() and cv_varsel(). parse_args_varsel <- function(refmodel, method, refit_prj, nterms_max, - nclusters, search_terms) { + nclusters, search_terms, nterms_all) { search_terms_was_null <- is.null(search_terms) if (search_terms_was_null) { search_terms <- split_formula(refmodel$formula, @@ -486,5 +488,18 @@ parse_args_varsel <- function(refmodel, method, refit_prj, nterms_max, } nterms_max <- min(max_nv_possible, nterms_max) + if (nterms_max == nterms_all && has_group_features && + (refmodel$family$family == "gaussian" || refmodel$family$for_latent)) { + warning( + "In case of the Gaussian family (also in case of the latent projection) ", + "and multilevel terms, the projection onto the full model can be ", + "instable and even lead to an error, see GitHub issue #323. If you ", + "experience this and may refrain from the projection onto the full ", + "model, set `nterms_max` to the number of predictor terms in the full ", + "model minus 1 (possibly accounting for submodel sizes skipped by ", + "custom `search_terms`)." + ) + } + return(nlist(method, refit_prj, nterms_max, nclusters, search_terms)) } From 3974b443761ef9def3d4914b64e48cf6b0f447a6 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Mon, 26 Jun 2023 21:06:47 +0200 Subject: [PATCH 2/4] Add a warning for issue #323 to `project()`. --- R/project.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/R/project.R b/R/project.R index ee8505433..99f65283e 100644 --- a/R/project.R +++ b/R/project.R @@ -252,6 +252,18 @@ project <- function(object, nterms = NULL, solution_terms = NULL, nclusters <- 1 } + nterms_max <- max(nterms) + nterms_all <- count_terms_in_formula(refmodel$formula) - 1L + if (nterms_max == nterms_all && + formula_contains_group_terms(refmodel$formula) && + (refmodel$family$family == "gaussian" || refmodel$family$for_latent)) { + warning( + "In case of the Gaussian family (also in case of the latent projection) ", + "and multilevel terms, the projection onto the full model can be ", + "instable and even lead to an error, see GitHub issue #323." + ) + } + ## get the clustering or thinning if (refit_prj) { p_ref <- get_refdist(refmodel, ndraws = ndraws, nclusters = nclusters) From 50fc3dbe80b738e1dafe42fc9515db0613a41b73 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Mon, 26 Jun 2023 21:52:26 +0200 Subject: [PATCH 3/4] Adapt the unit tests. --- tests/testthat/test_varsel.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_varsel.R b/tests/testthat/test_varsel.R index c0adc5401..659eb61f7 100644 --- a/tests/testthat/test_varsel.R +++ b/tests/testthat/test_varsel.R @@ -1105,9 +1105,11 @@ test_that("invalid `method` fails", { test_that("invalid `cv_method` fails", { for (tstsetup in names(refmods)) { - expect_error(cv_varsel(refmods[[tstsetup]], cv_method = "k-fold"), - "^Unknown `cv_method`\\.$", - info = tstsetup) + expect_error( + suppressWarnings(cv_varsel(refmods[[tstsetup]], cv_method = "k-fold")), + "^Unknown `cv_method`\\.$", + info = tstsetup + ) } }) From 8e05c193ce8a0b934015a75555045f221e6e21fc Mon Sep 17 00:00:00 2001 From: fweber144 Date: Tue, 27 Jun 2023 06:12:52 +0200 Subject: [PATCH 4/4] Add a `NEWS.md` entry. --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index fa3da5ee2..30e57ec95 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ If you read this from a place other than