From be7d3d7d07313151241dcb3f8e5a48f89d9e8c05 Mon Sep 17 00:00:00 2001 From: fweber144 Date: Mon, 19 Jun 2023 09:21:55 +0200 Subject: [PATCH] Add a warning for issue #323 to `varsel()` and `cv_varsel()`. --- R/cv_varsel.R | 6 ++++-- R/varsel.R | 21 ++++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/R/cv_varsel.R b/R/cv_varsel.R index c265d3c09..424fb578c 100644 --- a/R/cv_varsel.R +++ b/R/cv_varsel.R @@ -173,10 +173,12 @@ cv_varsel.refmodel <- function( } refmodel <- object + nterms_all <- count_terms_in_formula(refmodel$formula) - 1L # Parse arguments which also exist in varsel(): args <- parse_args_varsel( refmodel = refmodel, method = method, refit_prj = refit_prj, - nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms + nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms, + nterms_all = nterms_all ) method <- args$method refit_prj <- args$refit_prj @@ -260,7 +262,7 @@ cv_varsel.refmodel <- function( y_wobs_test, nobs_test = nrow(y_wobs_test), summaries = sel_cv$summaries, - nterms_all = count_terms_in_formula(refmodel$formula) - 1L, + nterms_all, nterms_max, method, cv_method, diff --git a/R/varsel.R b/R/varsel.R index 946c1841a..6d7541d9c 100644 --- a/R/varsel.R +++ b/R/varsel.R @@ -213,11 +213,13 @@ varsel.refmodel <- function(object, d_test = NULL, method = NULL, } refmodel <- object + nterms_all <- count_terms_in_formula(refmodel$formula) - 1L # Parse arguments: args <- parse_args_varsel( refmodel = refmodel, method = method, refit_prj = refit_prj, - nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms + nterms_max = nterms_max, nclusters = nclusters, search_terms = search_terms, + nterms_all = nterms_all ) method <- args$method refit_prj <- args$refit_prj @@ -379,7 +381,7 @@ varsel.refmodel <- function(object, d_test = NULL, method = NULL, y_wobs_test, nobs_test, summaries = nlist(sub, ref), - nterms_all = count_terms_in_formula(refmodel$formula) - 1L, + nterms_all, nterms_max, method, cv_method = NULL, @@ -425,7 +427,7 @@ select <- function(method, p_sel, refmodel, nterms_max, penalty, verbose, opt, # them in with the default values. The purpose of this function is to avoid # repeating the same code both in varsel() and cv_varsel(). parse_args_varsel <- function(refmodel, method, refit_prj, nterms_max, - nclusters, search_terms) { + nclusters, search_terms, nterms_all) { search_terms_was_null <- is.null(search_terms) if (search_terms_was_null) { search_terms <- split_formula(refmodel$formula, @@ -486,5 +488,18 @@ parse_args_varsel <- function(refmodel, method, refit_prj, nterms_max, } nterms_max <- min(max_nv_possible, nterms_max) + if (nterms_max == nterms_all && has_group_features && + (refmodel$family$family == "gaussian" || refmodel$family$for_latent)) { + warning( + "In case of the Gaussian family (also in case of the latent projection) ", + "and multilevel terms, the projection onto the full model can be ", + "instable and even lead to an error, see GitHub issue #323. If you ", + "experience this and may refrain from the projection onto the full ", + "model, set `nterms_max` to the number of predictor terms in the full ", + "model minus 1 (possibly accounting for submodel sizes skipped by ", + "custom `search_terms`)." + ) + } + return(nlist(method, refit_prj, nterms_max, nclusters, search_terms)) }