From 8cc4e4120bb1d86ca3d73ed6265cf83e5dcce1de Mon Sep 17 00:00:00 2001 From: bszh Date: Thu, 25 Jul 2024 22:03:19 +0200 Subject: [PATCH] Branch after init (#11) * fix: get_private in callbacks * chore: add catboost and mlr3oml to dependencies * refactor: move classif class to new file * feat: build graph after init * fix: set_validate * refactor: build graph only if emptys * test: fix glmnet test * test: fix early stopping * refactor: package * tests: disable parallel tests * docs: fix * tests: fix * tests: remove logger callback --------- Co-authored-by: be-marc --- .Rbuildignore | 2 +- DESCRIPTION | 50 +- NAMESPACE | 16 +- R/LearnerAutoBranch.R | 442 ------------------ R/LearnerClassifAuto.R | 442 ++++++++++++++++++ R/LearnerClassifAutoSVM.R | 40 ++ R/LearnerClassifAutoXgboost.R | 46 ++ R/LearnerRegrAuto.R | 41 ++ R/autoplot.R | 151 ------ R/mlr_callbacks.R | 84 ---- R/reexports.R | 5 - R/zzz.R | 15 +- man-roxygen/param_id.R | 2 + ...nerAutoBranch.Rd => LearnerClassifAuto.Rd} | 43 +- ...AutoBranch.Rd => LearnerClassifAutoSVM.Rd} | 28 +- man/LearnerClassifAutoXgboost.Rd | 69 +++ man/LearnerRegrAuto.Rd | 80 ++++ man/autoplot.LearnerClassifAutoBranch.Rd | 50 -- man/reexports.Rd | 17 - tests/testthat/setup.R | 20 + tests/testthat/test_LearnerAutoBranch.R | 360 -------------- tests/testthat/test_LearnerClassifAuto.R | 412 ++++++++++++++++ tests/testthat/test_LearnerClassifSVM.R | 25 + tests/testthat/test_LearnerClassifXgboost.R | 25 + 24 files changed, 1270 insertions(+), 1195 deletions(-) delete mode 100644 R/LearnerAutoBranch.R create mode 100644 R/LearnerClassifAuto.R create mode 100644 R/LearnerClassifAutoSVM.R create mode 100644 R/LearnerClassifAutoXgboost.R create mode 100644 R/LearnerRegrAuto.R delete mode 100644 R/autoplot.R delete mode 100644 R/mlr_callbacks.R delete mode 100644 R/reexports.R create mode 100644 man-roxygen/param_id.R rename man/{LearnerAutoBranch.Rd => LearnerClassifAuto.Rd} (67%) rename man/{LearnerClassifAutoBranch.Rd => LearnerClassifAutoSVM.Rd} (77%) create mode 100644 man/LearnerClassifAutoXgboost.Rd create mode 100644 man/LearnerRegrAuto.Rd delete mode 100644 man/autoplot.LearnerClassifAutoBranch.Rd delete mode 100644 man/reexports.Rd create mode 100644 tests/testthat/setup.R delete mode 100644 tests/testthat/test_LearnerAutoBranch.R create mode 100644 tests/testthat/test_LearnerClassifAuto.R create mode 100644 tests/testthat/test_LearnerClassifSVM.R create mode 100644 tests/testthat/test_LearnerClassifXgboost.R diff --git a/.Rbuildignore b/.Rbuildignore index 604ca50..6232234 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -21,4 +21,4 @@ ^CRAN-SUBMISSION$ ^man/figures$ .vscode -^kaggle/$ +^kaggle$ diff --git a/DESCRIPTION b/DESCRIPTION index 50af8e5..4ab70c0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,56 +12,54 @@ URL: https://github.com/mlr-org/mlr3automl BugReports: https://github.com/mlr-org/mlr3automl/issues Depends: mlr3 (>= 0.20.0), - mlr3learners (>= 0.7.0), mlr3extralearners, + mlr3learners (>= 0.7.0), mlr3tuning (>= 1.0.0), R (>= 3.1.0), rush Imports: - bbotk, checkmate, data.table, - e1071, - future, - ggplot2, - glmnet, - kknn, lhs, - lightgbm, - MASS, mlr3mbo, mlr3misc (>= 0.15.1), mlr3pipelines, mlr3tuningspaces, - nnet, - paradox (>= 1.0.0), + paradox (>= 1.0.1), R6, - ranger, - rpart, - xgboost, - utils, - stats, - ggparty, - partykit, + utils Suggests: + catboost, + e1071, + glmnet, + kknn, + lightgbm, + lgr, + MASS, mlr3viz, - testthat (>= 3.0.0) + nnet, + ranger, + rpart, + testthat (>= 3.0.0), + xgboost Remotes: + catboost/catboost/catboost/R-package, mlr-org/mlr3@learner_size, - mlr-org/mlr3mbo@adbo, - mlr-org/mlr3learners@mlr3automl, mlr-org/mlr3extralearners@mlr3automl, + mlr-org/mlr3learners@mlr3automl, + mlr-org/mlr3mbo@adbo, mlr-org/mlr3pipelines Config/testthat/edition: 3 +Config/testthat/parallel: false Encoding: UTF-8 NeedsCompilation: no Roxygen: list(markdown = TRUE, r6 = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Collate: 'aaa.R' - 'LearnerAutoBranch.R' - 'autoplot.R' + 'LearnerClassifAuto.R' + 'LearnerClassifAutoSVM.R' + 'LearnerClassifAutoXgboost.R' + 'LearnerRegrAuto.R' 'helper.R' - 'mlr_callbacks.R' - 'reexports.R' 'zzz.R' diff --git a/NAMESPACE b/NAMESPACE index e633e58..6fc0f11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,16 +1,15 @@ # Generated by roxygen2: do not edit by hand -S3method(autoplot,LearnerClassifAutoBranch) -export(LearnerAutoBranch) -export(LearnerClassifAutoBranch) -export(autoplot) -export(fortify) +export(LearnerClassifAuto) +export(LearnerClassifAutoSVM) +export(LearnerClassifAutoXgboost) +export(LearnerRegrAuto) import(R6) -import(bbotk) import(checkmate) import(data.table) -import(ggplot2) +import(lhs) import(mlr3) +import(mlr3extralearners) import(mlr3learners) import(mlr3mbo) import(mlr3misc) @@ -18,5 +17,4 @@ import(mlr3pipelines) import(mlr3tuning) import(mlr3tuningspaces) import(paradox) -importFrom(ggplot2,autoplot) -importFrom(ggplot2,fortify) +importFrom(rush,rush_config) diff --git a/R/LearnerAutoBranch.R b/R/LearnerAutoBranch.R deleted file mode 100644 index 36eb4e8..0000000 --- a/R/LearnerAutoBranch.R +++ /dev/null @@ -1,442 +0,0 @@ -#' @title Auto Learner -#' -#' @description -#' Abstract base class for Auto like learner. -#' -#' @param id (`character(1)`)\cr -#' Identifier for the new instance. -#' @param task_type (`character(1)`)\cr -#' Type of task, e.g. `"regr"` or `"classif"`. -#' Must be an element of [mlr_reflections$task_types$type][mlr_reflections]. -#' @param param_set ([ParamSet])\cr -#' Parameter set. -#' @param graph ([mlr3pipelines::Graph]). -#' Graph. -#' @param tuning_space (list of lists of [paradox::TuneToken])\cr -#' List of tuning spaces. -#' -#' @export -LearnerAutoBranch = R6Class("LearnerAutoBranch", - inherit = Learner, - public = list( - - #' @field graph ([mlr3pipelines::Graph]). - graph = NULL, - - #' @field tuning_space (`list()`). - tuning_space = NULL, - - #' @field instance ([TuningInstanceRushSingleCrit]). - instance = NULL, - - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function(id, task_type, param_set, graph, tuning_space) { - self$graph = assert_graph(graph) - self$tuning_space = assert_list(tuning_space) - - # packages - packages = unique(c("mlr3tuning", "mlr3learners", "mlr3pipelines", "mlr3mbo", "mlr3automl", graph$packages)) - - super$initialize( - id = id, - task_type = task_type, - param_set = param_set, - packages = packages, - feature_types = mlr_reflections$task_feature_types, - predict_types = names(mlr_reflections$learner_predict_types[[task_type]]), - properties = mlr_reflections$learner_properties[[task_type]], - ) - } - ), - - private = list( - - .train = function(task) { - pv = self$param_set$values - learner_ids = pv$learner_ids - graph = self$graph - - lg$debug("Training '%s' on task '%s'", self$id, task$id) - - # initialize mbo tuner - tuner = tnr("adbo") - - # remove learner based on memory limit - lg$debug("Starting to select from %i learners: %s", length(learner_ids), paste0(learner_ids, collapse = ",")) - - memory_usage = map_dbl(learner_ids, function(learner_id) { - graph$pipeops[[learner_id]]$learner$estimate_memory_usage(task)/1e6 - }) - learner_ids = learner_ids[memory_usage < pv$max_memory] - lg$debug("Checking learners for memory limit of %i MB. Keeping %i learner(s): %s", pv$max_memory, length(learner_ids), paste0(learner_ids, collapse = ",")) - - # set number of threads - lg$debug("Setting number of threads per learner to %i", pv$max_nthread) - walk(learner_ids, function(learner_id) { - set_threads(graph$pipeops[[learner_id]]$learner, pv$max_nthread) - }) - - # reduce number of workers on large data sets - if (task$nrow * task$ncol > pv$large_data_size) { - lg$debug("Task size larger than %i rows", pv$large_data_size) - - learner_ids = intersect(learner_ids, pv$large_data_learner_ids) - lg$debug("Keeping %i learner(s): %s", length(learner_ids), paste0(learner_ids, collapse = ",")) - - lg$debug("Increasing number of threads per learner to %i", pv$large_data_nthread) - walk(learner_ids, function(learner_id) { - set_threads(graph$pipeops[[learner_id]]$learner, pv$large_data_nthread) - }) - n_workers = rush_config()$n_workers - n = max(1, floor(n_workers / pv$large_data_nthread)) - tuner$param_set$set_values(n_workers = n) - lg$debug("Reducing number of workers to %i", n) - } - - # small data resampling - resampling = if (task$nrow < pv$small_data_size) { - lg$debug("Task has less than %i rows", pv$small_data_size) - lg$debug("Using small data set resampling with %i iterations", pv$small_data_resampling$iters) - pv$small_data_resampling - } else { - pv$resampling - } - - # cardinality - cardinality = map_int(task$col_info$levels, length) - if (any(cardinality > pv$max_cardinality)) { - lg$debug("Reducing number of factor levels to %i", pv$max_cardinality) - - # collapse factors - pipeop_ids = names(graph$pipeops) - pipeop_ids = pipeop_ids[grep("collapse", pipeop_ids)] - walk(pipeop_ids, function(pipeop_id) { - graph$pipeops[[pipeop_id]]$param_set$values$target_level_count = pv$max_cardinality - }) - } - - if (any(cardinality > pv$extra_trees_max_cardinality) && "extra_trees" %in% learner_ids) { - lg$debug("Reducing number of factor levels to %i for extra trees", pv$extra_trees_max_cardinality) - graph$pipeops$extra_trees_collapse$param_set$values$target_level_count = pv$extra_trees_max_cardinality - } - - # initialize graph learner - graph_learner = as_learner(graph) - graph_learner$id = "graph_learner" - graph_learner$predict_type = pv$measure$predict_type - graph_learner$fallback = lrn("classif.featureless", predict_type = pv$measure$predict_type) - graph_learner$encapsulate = c(train = "callr", predict = "callr") - graph_learner$timeout = c(train = pv$learner_timeout, predict = pv$learner_timeout) - set_validate(graph_learner, "test", ids = intersect(learner_ids, c("xgboost", "catboost", "lightgbm"))) - - # set early stopping - if ("xgboost" %in% learner_ids) { - graph_learner$param_set$values$xgboost.callbacks = list(cb_timeout_xgboost(pv$learner_timeout * 0.8)) - graph_learner$param_set$values$xgboost.eval_metric = pv$xgboost_eval_metric - } - if ("catboost" %in% learner_ids) { - graph_learner$param_set$values$catboost.eval_metric = pv$catboost_eval_metric - } - if ("lightgbm" %in% learner_ids) { - graph_learner$param_set$values$lightgbm.callbacks = list(cb_timeout_lightgbm(pv$learner_timeout * 0.8)) - graph_learner$param_set$values$lightgbm.eval = pv$lightgbm_eval_metric - } - - - # initialize search space - tuning_space = unlist(unname(self$tuning_space[learner_ids[learner_ids %in% names(self$tuning_space)]]), recursive = FALSE) - graph_scratch = graph_learner$clone(deep = TRUE) - graph_scratch$param_set$set_values(.values = tuning_space) - graph_scratch$param_set$set_values(branch.selection = to_tune(learner_ids)) - search_space = graph_scratch$param_set$search_space() - walk(learner_ids, function(learner_id) { - param_ids = search_space$ids() - param_ids = grep(paste0("^", learner_id), param_ids, value = TRUE) - walk(param_ids, function(param_id) { - # skip internal tuning parameter - if (param_id %in% c("xgboost.nrounds", "catboost.iterations", "lightgbm.num_iterations")) return() - search_space$add_dep( - id = param_id, - on = "branch.selection", - cond = CondEqual$new(learner_id) - ) - }) - }) - - # initial design - lhs_xdt = generate_lhs_design(pv$lhs_size, self$task_type, learner_ids[learner_ids %in% names(self$tuning_space)], self$tuning_space) - default_xdt = generate_default_design(self$task_type, learner_ids, task, self$tuning_space) - initial_xdt = rbindlist(list(lhs_xdt, default_xdt), use.names = TRUE, fill = TRUE) - setorderv(initial_xdt, "branch.selection") - tuner$param_set$set_values(initial_design = initial_xdt) - - # initialize auto tuner - self$instance = ti_async( - task = task, - learner = graph_learner, - resampling = resampling, - measure = pv$measure, - terminator = pv$terminator, - search_space = search_space, - callbacks = c(pv$callbacks, clbk("mlr3automl.branch_nrounds")), - store_benchmark_result = pv$store_benchmark_result - ) - - # tune - lg$debug("Learner '%s' starts tuning phase", self$id) - tuner$optimize(self$instance) - - # fit final model - lg$debug("Learner '%s' fits final model", self$id) - set_validate(graph_learner, NULL, ids = intersect(learner_ids, c("xgboost", "catboost", "lightgbm"))) - graph_learner$param_set$set_values(.values = self$instance$result_learner_param_vals) - graph_learner$timeout = c(train = Inf, predict = Inf) - graph_learner$train(task) - - list(graph_learner = graph_learner, instance = self$instance) - }, - - .predict = function(task) { - lg$debug("Predicting with '%s' on task '%s'", self$id, task$id) - self$model$graph_learner$predict(task) - } - ) -) - -#' @title Classification Auto Learner -#' -#' @description -#' Classification Auto learner. -#' -#' @param id (`character(1)`)\cr -#' Identifier for the new instance. -#' -#' @export -LearnerClassifAutoBranch = R6Class("LearnerClassifAutoBranch", - inherit = LearnerAutoBranch, - public = list( - - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function(id = "classif.automl") { - param_set = ps( - # learner - learner_ids = p_uty(), - learner_timeout = p_int(lower = 1L, default = 900L), - xgboost_eval_metric = p_uty(), - catboost_eval_metric = p_uty(), - lightgbm_eval_metric = p_uty(), - # system - max_nthread = p_int(lower = 1L, default = 1L), - max_memory = p_int(lower = 1L, default = 32000L), - # large data - large_data_size = p_int(lower = 1L, default = 1e6), - large_data_learner_ids = p_uty(), - large_data_nthread = p_int(lower = 1L, default = 4L), - # small data - small_data_size = p_int(lower = 1L, default = 5000L), - small_data_resampling = p_uty(), - max_cardinality = p_int(lower = 1L, default = 100L), - extra_trees_max_cardinality = p_int(lower = 1L, default = 40L), - # tuner - resampling = p_uty(), - terminator = p_uty(), - measure = p_uty(), - lhs_size = p_int(lower = 1L, default = 4L), - callbacks = p_uty(), - store_benchmark_result = p_lgl(default = FALSE)) - - learner_ids = c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees", "lightgbm") - param_set$set_values( - learner_ids = learner_ids, - learner_timeout = 900L, - max_nthread = 1L, - max_memory = 32000L, - large_data_size = 1e6L, - large_data_learner_ids = c("lda", "ranger", "xgboost", "catboost", "extra_trees", "lightgbm"), - large_data_nthread = 4L, - small_data_size = 5000L, - small_data_resampling = rsmp("cv", folds = 10L), - max_cardinality = 100L, - extra_trees_max_cardinality = 40L, - resampling = rsmp("cv", folds = 3L), - terminator = trm("run_time", secs = 14400L), - measure = msr("classif.ce"), - lhs_size = 4L, - store_benchmark_result = FALSE) - - # glmnet - branch_glmnet = - po("removeconstants", id = "glmnet_removeconstants") %>>% - po("imputehist", id = "glmnet_imputehist") %>>% - po("imputeoor", id = "glmnet_imputeoor") %>>% - po("fixfactors", id = "glmnet_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "glmnet_imputesample") %>>% - po("collapsefactors", target_level_count = 100, id = "glmnet_collapse") %>>% - po("encode", method = "one-hot", id = "glmnet_encode") %>>% - po("removeconstants", id = "glmnet_post_removeconstants") %>>% - lrn("classif.glmnet", id = "glmnet") - - # kknn - branch_kknn = po("removeconstants", id = "kknn_removeconstants") %>>% - po("imputehist", id = "kknn_imputehist") %>>% - po("imputeoor", id = "kknn_imputeoor") %>>% - po("fixfactors", id = "kknn_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "kknn_imputesample") %>>% - po("collapsefactors", target_level_count = 100, id = "kknn_collapse") %>>% - po("removeconstants", id = "kknn_post_removeconstants") %>>% - lrn("classif.kknn", id = "kknn") - - # lda - branch_lda = po("removeconstants", id = "lda_removeconstants") %>>% - po("imputehist", id = "lda_imputehist") %>>% - po("imputeoor", id = "lda_imputeoor") %>>% - po("fixfactors", id = "lda_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "lda_imputesample") %>>% - po("collapsefactors", target_level_count = 100, id = "lda_collapse") %>>% - po("removeconstants", id = "lda_post_removeconstants") %>>% - lrn("classif.lda", id = "lda") - - # nnet - branch_nnet = po("removeconstants", id = "nnet_removeconstants") %>>% - po("imputehist", id = "nnet_imputehist") %>>% - po("imputeoor", id = "nnet_imputeoor") %>>% - po("fixfactors", id = "nnet_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "nnet_imputesample") %>>% - po("collapsefactors", target_level_count = 100, id = "nnet_collapse") %>>% - po("removeconstants", id = "nnet_post_removeconstants") %>>% - lrn("classif.nnet", id = "nnet") - - # ranger - branch_ranger = po("removeconstants", id = "ranger_removeconstants") %>>% - po("imputeoor", id = "ranger_imputeoor") %>>% - po("fixfactors", id = "ranger_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "ranger_imputesample") %>>% - po("collapsefactors", target_level_count = 100, id = "ranger_collapse") %>>% - po("removeconstants", id = "ranger_post_removeconstants") %>>% - lrn("classif.ranger", id = "ranger", num.trees = 2000) # use upper bound of search space for memory estimation - - # svm - branch_svm = po("removeconstants", id = "svm_removeconstants") %>>% - po("imputehist", id = "svm_imputehist") %>>% - po("imputeoor", id = "svm_imputeoor") %>>% - po("fixfactors", id = "svm_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "svm_imputesample") %>>% - po("collapsefactors", target_level_count = 100, id = "svm_collapse") %>>% - po("encode", method = "one-hot", id = "smv_encode") %>>% - po("removeconstants", id = "svm_post_removeconstants") %>>% - lrn("classif.svm", id = "svm", type = "C-classification") - - # xgboost - branch_xgboost = po("removeconstants", id = "xgboost_removeconstants") %>>% - po("imputeoor", id = "xgboost_imputeoor") %>>% - po("fixfactors", id = "xgboost_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "xgboost_imputesample") %>>% - po("encodeimpact", id = "xgboost_encode") %>>% - po("removeconstants", id = "xgboost_post_removeconstants") %>>% - lrn("classif.xgboost", id = "xgboost", nrounds = 5000, early_stopping_rounds = 10) - - # catboost - branch_catboost = po("colapply", applicator = as.numeric, affect_columns = selector_type("integer")) %>>% - lrn("classif.catboost", id = "catboost", iterations = 500, early_stopping_rounds = 10, use_best_model = TRUE) - - # extra trees - branch_extra_trees = po("removeconstants", id = "extra_trees_removeconstants") %>>% - po("imputeoor", id = "extra_trees_imputeoor") %>>% - po("fixfactors", id = "extra_trees_fixfactors") %>>% - po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "extra_trees_imputesample") %>>% - po("collapsefactors", target_level_count = 40, id = "extra_trees_collapse") %>>% - po("removeconstants", id = "extra_trees_post_removeconstants") %>>% - lrn("classif.ranger", id = "extra_trees", splitrule = "extratrees", num.trees = 100, replace = FALSE, sample.fraction = 1) - - # lightgbm - branch_lightgbm = lrn("classif.lightgbm", id = "lightgbm", num_iterations = 5000, early_stopping_rounds = 10) - - # branch graph - graph = po("branch", options = learner_ids) %>>% - gunion(list( - branch_glmnet, - branch_kknn, - branch_lda, - branch_nnet, - branch_ranger, - branch_svm, - branch_xgboost, - branch_catboost, - branch_extra_trees, - branch_lightgbm)) %>>% - po("unbranch", options = learner_ids) - - super$initialize( - id = id, - task_type = "classif", - param_set = param_set, - graph = graph, - tuning_space = tuning_space) - } - ) -) - -tuning_space = list( - glmnet = list( - glmnet.s = to_tune(1e-4, 1e4, logscale = TRUE), - glmnet.alpha = to_tune(0, 1) - ), - - kknn = list( - kknn.k = to_tune(1, 50, logscale = TRUE), - kknn.distance = to_tune(1, 5), - kknn.kernel = to_tune(c("rectangular", "optimal", "epanechnikov", "biweight", "triweight", "cos", "inv", "gaussian", "rank")) - ), - - nnet = list( - nnet.maxit = to_tune(1e1, 1e3, logscale = TRUE), - nnet.decay = to_tune(1e-4, 1e-1, logscale = TRUE), - nnet.size = to_tune(2, 50, logscale = TRUE) - ), - - ranger = list( - ranger.mtry.ratio = to_tune(0, 1), - ranger.replace = to_tune(), - ranger.sample.fraction = to_tune(1e-1, 1), - ranger.num.trees = to_tune(500, 2000) - ), - - svm = list( - svm.cost = to_tune(1e-4, 1e4, logscale = TRUE), - svm.kernel = to_tune(c("polynomial", "radial", "sigmoid", "linear")), - svm.degree = to_tune(2, 5), - svm.gamma = to_tune(1e-4, 1e4, logscale = TRUE) - ), - - xgboost = list( - xgboost.eta = to_tune(1e-4, 1, logscale = TRUE), - xgboost.max_depth = to_tune(1, 20), - xgboost.colsample_bytree = to_tune(1e-1, 1), - xgboost.colsample_bylevel = to_tune(1e-1, 1), - xgboost.lambda = to_tune(1e-3, 1e3, logscale = TRUE), - xgboost.alpha = to_tune(1e-3, 1e3, logscale = TRUE), - xgboost.subsample = to_tune(1e-1, 1), - xgboost.nrounds = to_tune(1, 5000, internal = TRUE) - ), - - catboost = list( - catboost.depth = to_tune(5, 8), - catboost.learning_rate = to_tune(5e-3, 0.2, logscale = TRUE), - catboost.l2_leaf_reg = to_tune(1, 5), - catboost.iterations = to_tune(1, 500, internal = TRUE) - ), - - - lightgbm = list( - lightgbm.learning_rate = to_tune(5e-3, 0.2, logscale = TRUE), - lightgbm.feature_fraction = to_tune(0.75, 1), - lightgbm.min_data_in_leaf = to_tune(2, 60), - lightgbm.num_leaves = to_tune(16, 96), - lightgbm.num_iterations = to_tune(1, 5000, internal = TRUE) - ) -) - -#' @include aaa.R -learners[["classif.automl_branch"]] = LearnerClassifAutoBranch diff --git a/R/LearnerClassifAuto.R b/R/LearnerClassifAuto.R new file mode 100644 index 0000000..b3b126e --- /dev/null +++ b/R/LearnerClassifAuto.R @@ -0,0 +1,442 @@ +#' @title Classification Auto Learner +#' +#' @description +#' Classification auto learner. +#' +#' @template param_id +#' +#' @export +LearnerClassifAuto = R6Class("LearnerClassifAuto", + inherit = Learner, + public = list( + + #' @field graph ([mlr3pipelines::Graph]). + graph = NULL, + + #' @field tuning_space (`list()`). + tuning_space = NULL, + + #' @field instance ([mlr3tuning::TuningInstanceAsyncSingleCrit]). + instance = NULL, + + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function(id = "classif.auto") { + param_set = ps( + # learner + learner_ids = p_uty(default = c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees", "lightgbm"), + custom_check = function(x) { + if (all(x %in% c("lda", "extra_trees"))) { + return("Learner 'lda' and 'extra_trees' must be combined with other learners") + } + check_subset(x, c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees", "lightgbm")) + }), + learner_timeout = p_int(lower = 1L, default = 900L), + xgboost_eval_metric = p_uty(), + catboost_eval_metric = p_uty(), + lightgbm_eval_metric = p_uty(), + # system + max_nthread = p_int(lower = 1L, default = 1L), + max_memory = p_int(lower = 1L, default = 32000L), + # large data + large_data_size = p_int(lower = 1L, default = 1e6), + large_data_learner_ids = p_uty(), + large_data_nthread = p_int(lower = 1L, default = 4L), + # small data + small_data_size = p_int(lower = 1L, default = 5000L), + small_data_resampling = p_uty(), + max_cardinality = p_int(lower = 1L, default = 100L), + extra_trees_max_cardinality = p_int(lower = 1L, default = 40L), + # tuner + resampling = p_uty(), + terminator = p_uty(), + measure = p_uty(), + lhs_size = p_int(lower = 1L, default = 4L), + callbacks = p_uty(), + store_benchmark_result = p_lgl(default = FALSE)) + + param_set$set_values( + learner_ids = c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees", "lightgbm"), + learner_timeout = 900L, + max_nthread = 1L, + max_memory = 32000L, + large_data_size = 1e6L, + large_data_learner_ids = c("lda", "ranger", "xgboost", "catboost", "extra_trees", "lightgbm"), + large_data_nthread = 4L, + small_data_size = 5000L, + small_data_resampling = rsmp("cv", folds = 10L), + max_cardinality = 100L, + extra_trees_max_cardinality = 40L, + resampling = rsmp("cv", folds = 3L), + terminator = trm("run_time", secs = 14400L), + measure = msr("classif.ce"), + lhs_size = 4L, + store_benchmark_result = FALSE) + + super$initialize( + id = id, + task_type = "classif", + param_set = param_set, + packages = c("mlr3tuning", "mlr3learners", "mlr3pipelines", "mlr3mbo", "mlr3automl", "xgboost", "catboost", "lightgbm", "ranger", "nnet", "kknn", "glmnet", "MASS", "e1071"), + feature_types = c("logical", "integer", "numeric", "character", "factor"), + predict_types = c("response", "prob"), + properties = c("missings", "weights", "twoclass", "multiclass"), + ) + } + ), + + private = list( + .train = function(task) { + pv = self$param_set$values + learner_ids = pv$learner_ids + self$graph = build_graph(learner_ids) + self$tuning_space = tuning_space[learner_ids] + + lg$debug("Training '%s' on task '%s'", self$id, task$id) + + # initialize mbo tuner + tuner = tnr("adbo") + + # remove learner based on memory limit + lg$debug("Starting to select from %i learners: %s", length(learner_ids), paste0(learner_ids, collapse = ",")) + + if (!is.null(pv$max_memory)) { + memory_usage = map_dbl(learner_ids, function(learner_id) { + self$graph$pipeops[[learner_id]]$learner$estimate_memory_usage(task) / 1e6 + }) + learner_ids = learner_ids[memory_usage < pv$max_memory] + lg$debug("Checking learners for memory limit of %i MB. Keeping %i learner(s): %s", pv$max_memory, length(learner_ids), paste0(learner_ids, collapse = ",")) + } + + # set number of threads + if (!is.null(pv$max_nthread)) { + lg$debug("Setting number of threads per learner to %i", pv$max_nthread) + walk(learner_ids, function(learner_id) { + set_threads(self$graph$pipeops[[learner_id]]$learner, pv$max_nthread) + }) + } + + # reduce number of workers on large data sets + if (!is.null(pv$large_data_size) && task$nrow * task$ncol > pv$large_data_size) { + lg$debug("Task size larger than %i rows", pv$large_data_size) + + learner_ids = intersect(learner_ids, pv$large_data_learner_ids) + self$tuning_space = tuning_space[learner_ids] + lg$debug("Keeping %i learner(s): %s", length(learner_ids), paste0(learner_ids, collapse = ",")) + + lg$debug("Increasing number of threads per learner to %i", pv$large_data_nthread) + walk(learner_ids, function(learner_id) { + set_threads(self$graph$pipeops[[learner_id]]$learner, pv$large_data_nthread) + }) + n_workers = rush_config()$n_workers + n = max(1, floor(n_workers / pv$large_data_nthread)) + tuner$param_set$set_values(n_workers = n) + lg$debug("Reducing number of workers to %i", n) + } + + # small data resampling + resampling = if (!is.null(pv$small_data_size) && task$nrow < pv$small_data_size) { + lg$debug("Task has less than %i rows", pv$small_data_size) + lg$debug("Using small data set resampling with %i iterations", pv$small_data_resampling$iters) + pv$small_data_resampling + } else { + pv$resampling + } + + # cardinality + cardinality = map_int(task$col_info$levels, length) + if (!is.null(pv$max_cardinality) && any(cardinality > pv$max_cardinality)) { + lg$debug("Reducing number of factor levels to %i", pv$max_cardinality) + + # collapse factors + pipeop_ids = names(self$graph$pipeops) + pipeop_ids = pipeop_ids[grep("collapse", pipeop_ids)] + walk(pipeop_ids, function(pipeop_id) { + self$graph$pipeops[[pipeop_id]]$param_set$values$target_level_count = pv$max_cardinality + }) + } + + if ("extra_trees" %in% learner_ids && any(cardinality > pv$extra_trees_max_cardinality)) { + lg$debug("Reducing number of factor levels to %i for extra trees", pv$extra_trees_max_cardinality) + self$graph$pipeops$extra_trees_collapse$param_set$values$target_level_count = pv$extra_trees_max_cardinality + } + + # initialize graph learner + graph_learner = as_learner(self$graph) + graph_learner$id = "graph_learner" + graph_learner$predict_type = pv$measure$predict_type + graph_learner$fallback = lrn("classif.featureless", predict_type = pv$measure$predict_type) + graph_learner$encapsulate = c(train = "callr", predict = "callr") + graph_learner$timeout = c(train = pv$learner_timeout, predict = pv$learner_timeout) + + learners_with_validation = intersect(learner_ids, c("xgboost", "catboost", "lightgbm")) + if (length(learners_with_validation)) { + set_validate(graph_learner, "test", ids = learners_with_validation) + } + + # set early stopping + if ("xgboost" %in% learner_ids) { + graph_learner$param_set$values$xgboost.callbacks = list(cb_timeout_xgboost(pv$learner_timeout * 0.8)) + graph_learner$param_set$values$xgboost.eval_metric = pv$xgboost_eval_metric + } + if ("catboost" %in% learner_ids) { + graph_learner$param_set$values$catboost.eval_metric = pv$catboost_eval_metric + } + if ("lightgbm" %in% learner_ids) { + graph_learner$param_set$values$lightgbm.callbacks = list(cb_timeout_lightgbm(pv$learner_timeout * 0.8)) + graph_learner$param_set$values$lightgbm.eval = pv$lightgbm_eval_metric + } + + # initialize search space + tuning_space = unlist(unname(self$tuning_space), recursive = FALSE) + graph_scratch = graph_learner$clone(deep = TRUE) + graph_scratch$param_set$set_values(.values = tuning_space) + graph_scratch$param_set$set_values(branch.selection = to_tune(learner_ids)) + search_space = graph_scratch$param_set$search_space() + walk(learner_ids, function(learner_id) { + param_ids = search_space$ids() + param_ids = grep(paste0("^", learner_id), param_ids, value = TRUE) + walk(param_ids, function(param_id) { + # skip internal tuning parameter + if (param_id %in% c("xgboost.nrounds", "catboost.iterations", "lightgbm.num_iterations")) return() + search_space$add_dep( + id = param_id, + on = "branch.selection", + cond = CondEqual$new(learner_id) + ) + }) + }) + + # initial design + lhs_xdt = generate_lhs_design(pv$lhs_size, self$task_type, setdiff(learner_ids, c("lda", "extra_trees")), self$tuning_space) + default_xdt = generate_default_design(self$task_type, learner_ids, task, self$tuning_space) + initial_xdt = rbindlist(list(lhs_xdt, default_xdt), use.names = TRUE, fill = TRUE) + setorderv(initial_xdt, "branch.selection") + tuner$param_set$set_values(initial_design = initial_xdt) + + # initialize auto tuner + self$instance = ti_async( + task = task, + learner = graph_learner, + resampling = resampling, + measures = pv$measure, + terminator = pv$terminator, + search_space = search_space, + callbacks = pv$callbacks, + store_benchmark_result = pv$store_benchmark_result + ) + + # tune + lg$debug("Learner '%s' starts tuning phase", self$id) + tuner$optimize(self$instance) + + # fit final model + lg$debug("Learner '%s' fits final model", self$id) + if (length(learners_with_validation)) { + set_validate(graph_learner, NULL, ids = intersect(learner_ids, c("xgboost", "catboost", "lightgbm"))) + } + graph_learner$param_set$set_values(.values = self$instance$result_learner_param_vals, .insert = FALSE) + graph_learner$timeout = c(train = Inf, predict = Inf) + graph_learner$train(task) + + list(graph_learner = graph_learner, instance = self$instance) + }, + + .predict = function(task) { + lg$debug("Predicting with '%s' on task '%s'", self$id, task$id) + self$model$graph_learner$predict(task) + } + ) +) + +#' @include aaa.R +learners[["classif.auto"]] = LearnerClassifAuto + +build_graph = function(learner_ids) { + branches = list() + # glmnet + if ("glmnet" %in% learner_ids) { + branch_glmnet = po("removeconstants", id = "glmnet_removeconstants") %>>% + po("imputehist", id = "glmnet_imputehist") %>>% + po("imputeoor", id = "glmnet_imputeoor") %>>% + po("fixfactors", id = "glmnet_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "glmnet_imputesample") %>>% + po("collapsefactors", target_level_count = 100, id = "glmnet_collapse") %>>% + po("encode", method = "one-hot", id = "glmnet_encode") %>>% + po("removeconstants", id = "glmnet_post_removeconstants") %>>% + lrn("classif.glmnet", id = "glmnet") + branches = c(branches, branch_glmnet) + } + + # kknn + if ("kknn" %in% learner_ids) { + branch_kknn = po("removeconstants", id = "kknn_removeconstants") %>>% + po("imputehist", id = "kknn_imputehist") %>>% + po("imputeoor", id = "kknn_imputeoor") %>>% + po("fixfactors", id = "kknn_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "kknn_imputesample") %>>% + po("collapsefactors", target_level_count = 100, id = "kknn_collapse") %>>% + po("removeconstants", id = "kknn_post_removeconstants") %>>% + lrn("classif.kknn", id = "kknn") + branches = c(branches, branch_kknn) + } + + # lda + if ("lda" %in% learner_ids) { + branch_lda = po("removeconstants", id = "lda_removeconstants") %>>% + po("imputehist", id = "lda_imputehist") %>>% + po("imputeoor", id = "lda_imputeoor") %>>% + po("fixfactors", id = "lda_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "lda_imputesample") %>>% + po("collapsefactors", target_level_count = 100, id = "lda_collapse") %>>% + po("removeconstants", id = "lda_post_removeconstants") %>>% + lrn("classif.lda", id = "lda") + branches = c(branches, branch_lda) + } + + # nnet + if ("nnet" %in% learner_ids) { + branch_nnet = po("removeconstants", id = "nnet_removeconstants") %>>% + po("imputehist", id = "nnet_imputehist") %>>% + po("imputeoor", id = "nnet_imputeoor") %>>% + po("fixfactors", id = "nnet_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "nnet_imputesample") %>>% + po("collapsefactors", target_level_count = 100, id = "nnet_collapse") %>>% + po("removeconstants", id = "nnet_post_removeconstants") %>>% + lrn("classif.nnet", id = "nnet") + branches = c(branches, branch_nnet) + } + + # ranger + if ("ranger" %in% learner_ids) { + branch_ranger = po("removeconstants", id = "ranger_removeconstants") %>>% + po("imputeoor", id = "ranger_imputeoor") %>>% + po("fixfactors", id = "ranger_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "ranger_imputesample") %>>% + po("collapsefactors", target_level_count = 100, id = "ranger_collapse") %>>% + po("removeconstants", id = "ranger_post_removeconstants") %>>% + # use upper bound of search space for memory estimation + lrn("classif.ranger", id = "ranger", num.trees = 2000) + branches = c(branches, branch_ranger) + } + + # svm + if ("svm" %in% learner_ids) { + branch_svm = po("removeconstants", id = "svm_removeconstants") %>>% + po("imputehist", id = "svm_imputehist") %>>% + po("imputeoor", id = "svm_imputeoor") %>>% + po("fixfactors", id = "svm_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "svm_imputesample") %>>% + po("collapsefactors", target_level_count = 100, id = "svm_collapse") %>>% + po("encode", method = "one-hot", id = "svm_encode") %>>% + po("removeconstants", id = "svm_post_removeconstants") %>>% + lrn("classif.svm", id = "svm", type = "C-classification") + branches = c(branches, branch_svm) + } + + # xgboost + if ("xgboost" %in% learner_ids) { + branch_xgboost = po("removeconstants", id = "xgboost_removeconstants") %>>% + po("imputeoor", id = "xgboost_imputeoor") %>>% + po("fixfactors", id = "xgboost_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "xgboost_imputesample") %>>% + po("encodeimpact", id = "xgboost_encode") %>>% + po("removeconstants", id = "xgboost_post_removeconstants") %>>% + lrn("classif.xgboost", id = "xgboost", nrounds = 5000, early_stopping_rounds = 10) + branches = c(branches, branch_xgboost) + } + + # catboost + if ("catboost" %in% learner_ids) { + branch_catboost = po("colapply", id = "catboost_colapply", applicator = as.numeric, affect_columns = selector_type("integer")) %>>% + lrn("classif.catboost", id = "catboost", iterations = 500, early_stopping_rounds = 10, use_best_model = TRUE) + branches = c(branches, branch_catboost) + } + + # extra trees + if ("extra_trees" %in% learner_ids) { + branch_extra_trees = po("removeconstants", id = "extra_trees_removeconstants") %>>% + po("imputeoor", id = "extra_trees_imputeoor") %>>% + po("fixfactors", id = "extra_trees_fixfactors") %>>% + po("imputesample", affect_columns = selector_type(c("factor", "ordered")), id = "extra_trees_imputesample") %>>% + po("collapsefactors", target_level_count = 40, id = "extra_trees_collapse") %>>% + po("removeconstants", id = "extra_trees_post_removeconstants") %>>% + lrn("classif.ranger", id = "extra_trees", splitrule = "extratrees", num.trees = 100, replace = FALSE, sample.fraction = 1) + branches = c(branches, branch_extra_trees) + } + + # lightgbm + if ("lightgbm" %in% learner_ids) { + branch_lightgbm = lrn("classif.lightgbm", id = "lightgbm", num_iterations = 5000, early_stopping_rounds = 10) + branches = c(branches, branch_lightgbm) + } + + # branch graph + po("branch", options = learner_ids) %>>% + gunion(branches) %>>% + po("unbranch", options = learner_ids) +} + +tuning_space = list( + glmnet = list( + glmnet.s = to_tune(1e-4, 1e4, logscale = TRUE), + glmnet.alpha = to_tune(0, 1) + ), + + kknn = list( + kknn.k = to_tune(1, 50, logscale = TRUE), + kknn.distance = to_tune(1, 5), + kknn.kernel = to_tune(c("rectangular", "optimal", "epanechnikov", "biweight", "triweight", "cos", "inv", "gaussian", "rank")) + ), + + lda = list(), + + extra_trees = list(), + + nnet = list( + nnet.maxit = to_tune(1e1, 1e3, logscale = TRUE), + nnet.decay = to_tune(1e-4, 1e-1, logscale = TRUE), + nnet.size = to_tune(2, 50, logscale = TRUE) + ), + + ranger = list( + ranger.mtry.ratio = to_tune(0, 1), + ranger.replace = to_tune(), + ranger.sample.fraction = to_tune(1e-1, 1), + ranger.num.trees = to_tune(500, 2000) + ), + + svm = list( + svm.cost = to_tune(1e-4, 1e4, logscale = TRUE), + svm.kernel = to_tune(c("polynomial", "radial", "sigmoid", "linear")), + svm.degree = to_tune(2, 5), + svm.gamma = to_tune(1e-4, 1e4, logscale = TRUE) + ), + + xgboost = list( + xgboost.eta = to_tune(1e-4, 1, logscale = TRUE), + xgboost.max_depth = to_tune(1, 20), + xgboost.colsample_bytree = to_tune(1e-1, 1), + xgboost.colsample_bylevel = to_tune(1e-1, 1), + xgboost.lambda = to_tune(1e-3, 1e3, logscale = TRUE), + xgboost.alpha = to_tune(1e-3, 1e3, logscale = TRUE), + xgboost.subsample = to_tune(1e-1, 1), + xgboost.nrounds = to_tune(1, 5000, internal = TRUE) + ), + + catboost = list( + catboost.depth = to_tune(5, 8), + catboost.learning_rate = to_tune(5e-3, 0.2, logscale = TRUE), + catboost.l2_leaf_reg = to_tune(1, 5), + catboost.iterations = to_tune(1, 500, internal = TRUE) + ), + + + lightgbm = list( + lightgbm.learning_rate = to_tune(5e-3, 0.2, logscale = TRUE), + lightgbm.feature_fraction = to_tune(0.75, 1), + lightgbm.min_data_in_leaf = to_tune(2, 60), + lightgbm.num_leaves = to_tune(16, 96), + lightgbm.num_iterations = to_tune(1, 5000, internal = TRUE) + ) +) diff --git a/R/LearnerClassifAutoSVM.R b/R/LearnerClassifAutoSVM.R new file mode 100644 index 0000000..fd112b1 --- /dev/null +++ b/R/LearnerClassifAutoSVM.R @@ -0,0 +1,40 @@ +#' @title Classification SVM Auto Learner +#' +#' @description +#' Classification auto learner. +#' +#' @template param_id +#' +#' @export +LearnerClassifAutoSVM = R6Class("LearnerClassifAutoSVM", + inherit = LearnerClassifAuto, + public = list( + + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function(id = "classif.auto_svm") { + super$initialize(id = id) + + # reduce parameter set to the relevant parameters + private$.param_set = private$.param_set$subset( + c("learner_ids", + "learner_timeout", + "small_data_size", + "small_data_resampling", + "max_cardinality", + "resampling", + "terminator", + "measure", + "lhs_size", + "callbacks", + "store_benchmark_result") + ) + + self$param_set$set_values(learner_ids = "svm") + self$packages = c("mlr3tuning", "mlr3learners", "mlr3pipelines", "mlr3mbo", "mlr3automl", "e1071") + } + ) +) + +#' @include aaa.R +learners[["classif.auto_svm"]] = LearnerClassifAutoSVM diff --git a/R/LearnerClassifAutoXgboost.R b/R/LearnerClassifAutoXgboost.R new file mode 100644 index 0000000..e4087f3 --- /dev/null +++ b/R/LearnerClassifAutoXgboost.R @@ -0,0 +1,46 @@ +#' @title Classification XGBoost Auto Learner +#' +#' @description +#' Classification auto learner. +#' +#' @template param_id +#' +#' @export +LearnerClassifAutoXgboost = R6Class("LearnerClassifAutoXgboost", + inherit = LearnerClassifAuto, + public = list( + + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function(id = "classif.auto_xgboost") { + + super$initialize(id = id) + + # reduce parameter set to the relevant parameters + private$.param_set = private$.param_set$subset( + c("learner_ids", + "learner_timeout", + "xgboost_eval_metric", + "max_nthread", + "max_memory", + "large_data_size", + "large_data_learner_ids", + "large_data_nthread", + "small_data_size", + "small_data_resampling", + "max_cardinality", + "resampling", + "terminator", + "measure", + "lhs_size", + "callbacks", + "store_benchmark_result")) + + self$param_set$set_values(learner_ids = "xgboost") + self$packages = c("mlr3tuning", "mlr3learners", "mlr3pipelines", "mlr3mbo", "mlr3automl", "xgboost") + } + ) +) + +#' @include aaa.R +learners[["classif.auto_xgboost"]] = LearnerClassifAutoXgboost diff --git a/R/LearnerRegrAuto.R b/R/LearnerRegrAuto.R new file mode 100644 index 0000000..bee6e86 --- /dev/null +++ b/R/LearnerRegrAuto.R @@ -0,0 +1,41 @@ +#' @title Regression Auto Learner +#' +#' @description +#' Regression auto learner. +#' +#' @param id (`character(1)`)\cr +#' Identifier for the new instance. +#' +#' @export +LearnerRegrAuto = R6Class("LearnerRegrAuto", + inherit = Learner, + public = list( + + #' @field graph ([mlr3pipelines::Graph]). + graph = NULL, + + #' @field tuning_space (`list()`). + tuning_space = NULL, + + #' @field instance ([mlr3tuning::TuningInstanceAsyncSingleCrit]). + instance = NULL, + + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function(id = "classif.auto") { + + } + ), + private = list( + .train = function(task) { + + }, + + .predict = function(task) { + + } + ) +) + +#' @include aaa.R +learners[["regr.auto"]] = LearnerRegrAuto diff --git a/R/autoplot.R b/R/autoplot.R deleted file mode 100644 index b4998be..0000000 --- a/R/autoplot.R +++ /dev/null @@ -1,151 +0,0 @@ -#' @title Plots for Auto Learners -#' -#' @param object ([mlr3automl::LearnerClassifAutoBranch]). -#' @param type (character(1)):\cr -#' Type of the plot. -#' @param add_arrow (`logical(1)`)\cr -#' If `TRUE` (default), add arrows to the PCA plot. -#' @param cols_x (`character()`)\cr -#' Column names of x values. -#' By default, all untransformed x values from the search space are plotted. -#' Transformed hyperparameters are prefixed with `x_domain_`. -#' @param trafo (`logical(1)`)\cr -#' If `FALSE` (default), the untransformed x values are plotted. -#' If `TRUE`, the transformed x values are plotted. -#' @param batch (`integer()`)\cr -#' The batch number(s) to limit the plot to. -#' The default is all batches. -#' @param theme ([ggplot2::theme()])\cr -#' The [ggplot2::theme_minimal()] is applied by default to all plots. -#' @param ... (ignored). -#' -#' @return [ggplot2::ggplot()]. -#' -#' @export -autoplot.LearnerClassifAutoBranch = function(object, type = "marginal", add_arrow = TRUE, cols_x = NULL, trafo = FALSE, batch = NULL, theme = theme_minimal(), ...) { # nolint - assert_flag(trafo) - - require_namespaces("mlr3viz") - - object = object$model$tuning_instance - - if (is.null(cols_x)) { - cols_x = if (trafo) { - paste0("x_domain_", object$archive$cols_x) - } else { - object$archive$cols_x - } - } - - if (type %in% c("pca", "hyperparameter")) { - cols_y = object$archive$cols_y - data = fortify(object) - if (is.null(batch)) batch = seq_len(object$archive$n_batch) - assert_subset(batch, seq_len(object$archive$n_batch)) - data = data[list(batch), , on = "batch_nr"] - - switch(type, - # show all settings in 2D with dimensionality reduction - "pca" = { - if (length(cols_x) < 3) { - stop("Need at least 3 parameters.") - } - # remove non numeric columns - char_cols = names(data)[sapply(data, is.character)] - cols_x = cols_x[!cols_x %in% char_cols] - data = data[, c(..cols_x, ..cols_y)] - # replace NA with default - for (col in colnames(data)) { - if (any(is.na(data[, ..col]))) { - data[is.na(get(col)), (col) := defaults[[col]]] - } - } - # remove zero variance columns - zero_cols = names(data)[sapply(data, function(x) length(unique(x)) == 1)] - cols_x = setdiff(cols_x, zero_cols) - - # dimensionality reduction - data_dim = prcomp(data[, ..cols_x], scale. = TRUE) - data_dim = as.data.table(data_dim$x) - - plot = ggplot(data_dim, - mapping = aes(x = data_dim$PC1, - y = data_dim$PC2)) + - geom_point( - mapping = aes(fill = data[[cols_y]]), - data = data_dim, - shape = 21, - size = 3, - alpha = 0.5) + - geom_point( - data = data_dim[1, ], - mapping = aes(x = data_dim$PC1, - y = data_dim$PC2), - shape = 21, - colour = "green", - alpha = 1, - size = 5) + - geom_point( - data = data_dim[nrow(data_dim), ], - mapping = aes(x = data_dim$PC1, - y = data_dim$PC2), - shape = 21, - colour = "red", - alpha = 1, - size = 5) + - labs( - x = "First Principal Component", - y = "Second Principal Component", - fill = cols_y - ) + - scale_fill_viridis_c() + - guides(fill = guide_colorbar(barwidth = 0.5, barheight = 10)) + - theme - if (add_arrow) { - plot = plot + - geom_segment( - aes(xend = c(tail(data_dim$PC1, n = -1), NA), - yend = c(tail(data_dim$PC2, n = -1), NA)), - arrow = arrow(length = unit(0.2, "cm"))) - } - return(plot) - }, - "hyperparameter" = { - data = data[, c(..cols_x, ..cols_y)] - task = TaskRegr$new(id = "viz", backend = data, target = cols_y) - lrn = lrn("regr.rpart", keep_model = TRUE) - lrn = as_learner(pipeline_robustify(task, lrn) %>>% po("learner", lrn)) - lrn$train(task) - tree = lrn$graph_model$pipeops$regr.rpart$learner_model - - plot = ggparty::ggparty(partykit::as.party(tree$model)) + - ggparty::geom_edge() + - ggparty::geom_edge_label() + - ggparty::geom_node_splitvar() + - ggparty::geom_node_plot( - gglist = list( - geom_violin(aes(x = "", y = .data[[cols_y]])), - xlab(cols_y), - scale_fill_viridis_d(end = 0.8), - theme), - ids = "terminal", - shared_axis_labels = TRUE) + - ggparty::geom_node_label( - mapping = aes(label = paste0("n=", .data[["nodesize"]])), - nudge_y = 0.03, - ids = "terminal") - return(plot) - }, - - stopf("Unknown plot type '%s'", type) - ) - } else { - branch = object$result$branch.selection - cols_x = cols_x[grepl(branch, cols_x)] - if (length(cols_x) == 0) { - cols_x = object$archive$cols_x - } - autoplot(object = object, type = type, cols_x = cols_x, ...) - } -} - diff --git a/R/mlr_callbacks.R b/R/mlr_callbacks.R deleted file mode 100644 index f8dfe57..0000000 --- a/R/mlr_callbacks.R +++ /dev/null @@ -1,84 +0,0 @@ - -load_callback_branch_nrounds = function() { - callback_async_tuning("mlr3automl.branch_nrounds", - label = "Boosting Rounds Callback", - - on_eval_after_xs = function(callback, context) { - context$instance$objective$store_models = TRUE - }, - - on_eval_after_resample = function(callback, context) { - states = get_private(context$resample_result)$.data$learner_states(get_private(context$resample_result)$.view) - - callback$state$max_nrounds = max(map_dbl(states, function(state) { - if (!inherits(state$model$xgboost, "NO_OP")) { - state$model$xgboost$model$best_iteration %??% NA_real_ - } else if (!inherits(state$model$catboost, "NO_OP")) { - state$model$catboost$model$tree_count %??% NA_real_ - } else if (!inherits(state$model$lightgbm, "NO_OP")) { - state$model$lightgbm$model$best_iter %??% NA_real_ - } else { - NA_real_ - } - }), na.rm = TRUE) - }, - - on_eval_before_archive = function(callback, context) { - context$aggregated_performance = c(context$aggregated_performance, list(max_nrounds = callback$state$max_nrounds)) - context$resample_result$discard(models = TRUE) - }, - - on_result = function(callback, context) { - if (context$result$learner_param_vals[[1]]$branch.selection == "xgboost") { - context$result$learner_param_vals[[1]]$xgboost.early_stopping_rounds = NULL - context$result$learner_param_vals[[1]]$xgboost.callbacks = list(cb_timeout_xgboost(timeout = Inf)) - context$result$learner_param_vals[[1]]$xgboost.nrounds = max(context$instance$archive$best()$max_nrounds, 1) - context$result$learner_param_vals[[1]]$xgboost.holdout_task = NULL - } else if (context$result$learner_param_vals[[1]]$branch.selection == "catboost") { - context$result$learner_param_vals[[1]]$catboost.early_stopping_rounds = NULL - context$result$learner_param_vals[[1]]$catboost.iterations = max(context$instance$archive$best()$max_nrounds, 1) - context$result$learner_param_vals[[1]]$catboost.holdout_task = NULL - context$result$learner_param_vals[[1]]$catboost.eval_metric = NULL - } else if (context$result$learner_param_vals[[1]]$branch.selection == "lightgbm") { - context$result$learner_param_vals[[1]]$lightgbm.early_stopping_rounds = NULL - context$result$learner_param_vals[[1]]$lightgbm.num_iterations = max(context$instance$archive$best()$max_nrounds, 1) - context$result$learner_param_vals[[1]]$lightgbm.holdout_task = NULL - context$result$learner_param_vals[[1]]$lightgbm.callbacks = NULL - } - } - ) -} - -load_callback_nrounds = function() { - callback_async_tuning("mlr3automl.nrounds", - label = "Boosting Rounds Callback", - - on_eval_after_xs = function(callback, context) { - context$objective_tuning$store_models = TRUE - }, - - on_eval_after_resample = function(callback, context) { - states = get_private(context$resample_result)$.data$learner_states(get_private(context$resample_result)$.view) - - callback$state$max_nrounds = max(map_dbl(states, function(state) { - if (inherits(state$model$xgboost, "NO_OP") || is.null(state$model$xgboost$model$best_iteration)) { - NA_real_ - } else { - state$model$xgboost$model$best_iteration - } - })) - }, - - on_eval_before_archive = function(callback, context) { - context$aggregated_performance = c(context$aggregated_performance, list(max_nrounds = callback$state$max_nrounds)) - context$resample_result$discard(models = TRUE) - }, - - on_result = function(callback, context) { - context$result$learner_param_vals[[1]]$xgboost.early_stopping_rounds = NULL - context$result$learner_param_vals[[1]]$xgboost.callbacks = list(cb_timeout_xgboost(timeout = Inf)) - context$result$learner_param_vals[[1]]$xgboost.nrounds = context$instance$archive$best()$max_nrounds - context$result$learner_param_vals[[1]]$xgboost.holdout_task = NULL - } - ) -} diff --git a/R/reexports.R b/R/reexports.R deleted file mode 100644 index d183e6c..0000000 --- a/R/reexports.R +++ /dev/null @@ -1,5 +0,0 @@ -#' @export -ggplot2::autoplot - -#' @export -ggplot2::fortify diff --git a/R/zzz.R b/R/zzz.R index e1e0ab8..4cd3dd7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,8 +1,8 @@ -#' @import bbotk #' @import checkmate #' @import data.table #' @import mlr3 #' @import mlr3learners +#' @import mlr3extralearners #' @import mlr3mbo #' @import mlr3misc #' @import mlr3pipelines @@ -10,14 +10,18 @@ #' @import mlr3tuningspaces #' @import paradox #' @import R6 -#' @import ggplot2 +#' @importFrom rush rush_config +#' @import lhs + "_PACKAGE" +utils::globalVariables("start_time") + + #' @include aaa.R register_mlr3 = function() { x = utils::getFromNamespace("mlr_learners", ns = "mlr3") - iwalk(learners, function(obj, nm) x$add(nm, obj)) } @@ -25,11 +29,6 @@ register_mlr3 = function() { # nocov start register_namespace_callback(pkgname, "mlr3", register_mlr3) - # callbacks - x = utils::getFromNamespace("mlr_callbacks", ns = "mlr3misc") - x$add("mlr3automl.branch_nrounds", load_callback_branch_nrounds) - x$add("mlr3automl.nrounds", load_callback_nrounds) - # setup logger lg = lgr::get_logger(pkgname) assign("lg", lg, envir = parent.env(environment())) diff --git a/man-roxygen/param_id.R b/man-roxygen/param_id.R new file mode 100644 index 0000000..1f50f0e --- /dev/null +++ b/man-roxygen/param_id.R @@ -0,0 +1,2 @@ +#' @param id (`character(1)`)\cr +#' Identifier for the new instance. diff --git a/man/LearnerAutoBranch.Rd b/man/LearnerClassifAuto.Rd similarity index 67% rename from man/LearnerAutoBranch.Rd rename to man/LearnerClassifAuto.Rd index cbb9573..5ef6b62 100644 --- a/man/LearnerAutoBranch.Rd +++ b/man/LearnerClassifAuto.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LearnerAutoBranch.R -\name{LearnerAutoBranch} -\alias{LearnerAutoBranch} -\title{Auto Learner} +% Please edit documentation in R/LearnerClassifAuto.R +\name{LearnerClassifAuto} +\alias{LearnerClassifAuto} +\title{Classification Auto Learner} \description{ -Abstract base class for Auto like learner. +Classification auto learner. } \section{Super class}{ -\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{LearnerAutoBranch} +\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{LearnerClassifAuto} } \section{Public fields}{ \if{html}{\out{
}} @@ -16,15 +16,15 @@ Abstract base class for Auto like learner. \item{\code{tuning_space}}{(\code{list()}).} -\item{\code{instance}}{(\link{TuningInstanceRushSingleCrit}).} +\item{\code{instance}}{(\link[mlr3tuning:TuningInstanceAsyncSingleCrit]{mlr3tuning::TuningInstanceAsyncSingleCrit}).} } \if{html}{\out{
}} } \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-LearnerAutoBranch-new}{\code{LearnerAutoBranch$new()}} -\item \href{#method-LearnerAutoBranch-clone}{\code{LearnerAutoBranch$clone()}} +\item \href{#method-LearnerClassifAuto-new}{\code{LearnerClassifAuto$new()}} +\item \href{#method-LearnerClassifAuto-clone}{\code{LearnerClassifAuto$clone()}} } } \if{html}{\out{ @@ -43,12 +43,12 @@ Abstract base class for Auto like learner. }} \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-LearnerAutoBranch-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifAuto-new}{}}} \subsection{Method \code{new()}}{ Creates a new instance of this \link[R6:R6Class]{R6} class. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{LearnerAutoBranch$new(id, task_type, param_set, graph, tuning_space)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{LearnerClassifAuto$new(id = "classif.auto")}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -56,30 +56,17 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. \describe{ \item{\code{id}}{(\code{character(1)})\cr Identifier for the new instance.} - -\item{\code{task_type}}{(\code{character(1)})\cr -Type of task, e.g. \code{"regr"} or \code{"classif"}. -Must be an element of \link[=mlr_reflections]{mlr_reflections$task_types$type}.} - -\item{\code{param_set}}{(\link{ParamSet})\cr -Parameter set.} - -\item{\code{graph}}{(\link[mlr3pipelines:Graph]{mlr3pipelines::Graph}). -Graph.} - -\item{\code{tuning_space}}{(list of lists of \link[paradox:to_tune]{paradox::TuneToken})\cr -List of tuning spaces.} } \if{html}{\out{}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-LearnerAutoBranch-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifAuto-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{LearnerAutoBranch$clone(deep = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{LearnerClassifAuto$clone(deep = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/man/LearnerClassifAutoBranch.Rd b/man/LearnerClassifAutoSVM.Rd similarity index 77% rename from man/LearnerClassifAutoBranch.Rd rename to man/LearnerClassifAutoSVM.Rd index 5c27214..da4aefd 100644 --- a/man/LearnerClassifAutoBranch.Rd +++ b/man/LearnerClassifAutoSVM.Rd @@ -1,19 +1,19 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/LearnerAutoBranch.R -\name{LearnerClassifAutoBranch} -\alias{LearnerClassifAutoBranch} -\title{Classification Auto Learner} +% Please edit documentation in R/LearnerClassifAutoSVM.R +\name{LearnerClassifAutoSVM} +\alias{LearnerClassifAutoSVM} +\title{Classification SVM Auto Learner} \description{ -Classification Auto learner. +Classification auto learner. } \section{Super classes}{ -\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3automl:LearnerAutoBranch]{mlr3automl::LearnerAutoBranch}} -> \code{LearnerClassifAutoBranch} +\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3automl:LearnerClassifAuto]{mlr3automl::LearnerClassifAuto}} -> \code{LearnerClassifAutoSVM} } \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-LearnerClassifAutoBranch-new}{\code{LearnerClassifAutoBranch$new()}} -\item \href{#method-LearnerClassifAutoBranch-clone}{\code{LearnerClassifAutoBranch$clone()}} +\item \href{#method-LearnerClassifAutoSVM-new}{\code{LearnerClassifAutoSVM$new()}} +\item \href{#method-LearnerClassifAutoSVM-clone}{\code{LearnerClassifAutoSVM$clone()}} } } \if{html}{\out{ @@ -32,12 +32,12 @@ Classification Auto learner. }} \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-LearnerClassifAutoBranch-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifAutoSVM-new}{}}} \subsection{Method \code{new()}}{ Creates a new instance of this \link[R6:R6Class]{R6} class. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{LearnerClassifAutoBranch$new(id = "classif.automl")}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{LearnerClassifAutoSVM$new(id = "classif.auto_svm")}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -50,12 +50,12 @@ Identifier for the new instance.} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-LearnerClassifAutoBranch-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifAutoSVM-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{LearnerClassifAutoBranch$clone(deep = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{LearnerClassifAutoSVM$clone(deep = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ diff --git a/man/LearnerClassifAutoXgboost.Rd b/man/LearnerClassifAutoXgboost.Rd new file mode 100644 index 0000000..729d90e --- /dev/null +++ b/man/LearnerClassifAutoXgboost.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LearnerClassifAutoXgboost.R +\name{LearnerClassifAutoXgboost} +\alias{LearnerClassifAutoXgboost} +\title{Classification XGBoost Auto Learner} +\description{ +Classification auto learner. +} +\section{Super classes}{ +\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3automl:LearnerClassifAuto]{mlr3automl::LearnerClassifAuto}} -> \code{LearnerClassifAutoXgboost} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-LearnerClassifAutoXgboost-new}{\code{LearnerClassifAutoXgboost$new()}} +\item \href{#method-LearnerClassifAutoXgboost-clone}{\code{LearnerClassifAutoXgboost$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifAutoXgboost-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerClassifAutoXgboost$new(id = "classif.auto_xgboost")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)})\cr +Identifier for the new instance.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerClassifAutoXgboost-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerClassifAutoXgboost$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/LearnerRegrAuto.Rd b/man/LearnerRegrAuto.Rd new file mode 100644 index 0000000..9a21d18 --- /dev/null +++ b/man/LearnerRegrAuto.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LearnerRegrAuto.R +\name{LearnerRegrAuto} +\alias{LearnerRegrAuto} +\title{Regression Auto Learner} +\description{ +Regression auto learner. +} +\section{Super class}{ +\code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{LearnerRegrAuto} +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{graph}}{(\link[mlr3pipelines:Graph]{mlr3pipelines::Graph}).} + +\item{\code{tuning_space}}{(\code{list()}).} + +\item{\code{instance}}{(\link[mlr3tuning:TuningInstanceAsyncSingleCrit]{mlr3tuning::TuningInstanceAsyncSingleCrit}).} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-LearnerRegrAuto-new}{\code{LearnerRegrAuto$new()}} +\item \href{#method-LearnerRegrAuto-clone}{\code{LearnerRegrAuto$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerRegrAuto-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerRegrAuto$new(id = "classif.auto")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)})\cr +Identifier for the new instance.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-LearnerRegrAuto-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{LearnerRegrAuto$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/autoplot.LearnerClassifAutoBranch.Rd b/man/autoplot.LearnerClassifAutoBranch.Rd deleted file mode 100644 index 61396bc..0000000 --- a/man/autoplot.LearnerClassifAutoBranch.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/autoplot.R -\name{autoplot.LearnerClassifAutoBranch} -\alias{autoplot.LearnerClassifAutoBranch} -\title{Plots for Auto Learners} -\usage{ -\method{autoplot}{LearnerClassifAutoBranch}( - object, - type = "marginal", - add_arrow = TRUE, - cols_x = NULL, - trafo = FALSE, - batch = NULL, - theme = theme_minimal(), - ... -) -} -\arguments{ -\item{object}{(\link{LearnerClassifAutoBranch}).} - -\item{type}{(character(1)):\cr -Type of the plot.} - -\item{add_arrow}{(\code{logical(1)})\cr -If \code{TRUE} (default), add arrows to the PCA plot.} - -\item{cols_x}{(\code{character()})\cr -Column names of x values. -By default, all untransformed x values from the search space are plotted. -Transformed hyperparameters are prefixed with \code{x_domain_}.} - -\item{trafo}{(\code{logical(1)})\cr -If \code{FALSE} (default), the untransformed x values are plotted. -If \code{TRUE}, the transformed x values are plotted.} - -\item{batch}{(\code{integer()})\cr -The batch number(s) to limit the plot to. -The default is all batches.} - -\item{theme}{(\code{\link[ggplot2:theme]{ggplot2::theme()}})\cr -The \code{\link[ggplot2:ggtheme]{ggplot2::theme_minimal()}} is applied by default to all plots.} - -\item{...}{(ignored).} -} -\value{ -\code{\link[ggplot2:ggplot]{ggplot2::ggplot()}}. -} -\description{ -Plots for Auto Learners -} diff --git a/man/reexports.Rd b/man/reexports.Rd deleted file mode 100644 index b4aac63..0000000 --- a/man/reexports.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reexports.R -\docType{import} -\name{reexports} -\alias{reexports} -\alias{autoplot} -\alias{fortify} -\title{Objects exported from other packages} -\keyword{internal} -\description{ -These objects are imported from other packages. Follow the links -below to see their documentation. - -\describe{ - \item{ggplot2}{\code{\link[ggplot2]{autoplot}}, \code{\link[ggplot2]{fortify}}} -}} - diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R new file mode 100644 index 0000000..ae51699 --- /dev/null +++ b/tests/testthat/setup.R @@ -0,0 +1,20 @@ +old_opts = options( + warnPartialMatchArgs = TRUE, + warnPartialMatchAttr = TRUE, + warnPartialMatchDollar = TRUE +) + +# https://github.com/HenrikBengtsson/Wishlist-for-R/issues/88 +old_opts = lapply(old_opts, function(x) if (is.null(x)) FALSE else x) + +lg_mlr3 = lgr::get_logger("mlr3") +lg_bbotk = lgr::get_logger("bbotk") +lg_rush = lgr::get_logger("rush") + +old_threshold_mlr3 = lg_mlr3$threshold +old_threshold_bbotk = lg_bbotk$threshold +old_threshold_rush = lg_rush$threshold + +lg_mlr3$set_threshold(0) +lg_bbotk$set_threshold(0) +lg_rush$set_threshold(0) diff --git a/tests/testthat/test_LearnerAutoBranch.R b/tests/testthat/test_LearnerAutoBranch.R deleted file mode 100644 index 3968b57..0000000 --- a/tests/testthat/test_LearnerAutoBranch.R +++ /dev/null @@ -1,360 +0,0 @@ -test_that("initial design is generated", { - learner_ids = c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees") - xdt = generate_default_design( - task_type = "classif", - learner_ids, - task = tsk("sonar"), - tuning_space) - n_hp = sum(map_dbl(tuning_space, length)) - expect_data_table(xdt, nrows = length(learner_ids), ncols = n_hp + 1) -}) - -test_that("lhs design is generated", { - learner_ids = c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees") - xdt = generate_lhs_design(10, "classif", learner_ids, tuning_space) - n_hp = sum(map_dbl(tuning_space, length)) - expect_data_table(xdt, nrows = 80, ncols = n_hp + 1) -}) - -test_that("glmnet works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "glmnet", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "glmnet") -}) - -test_that("kknn works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "kknn", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "kknn") -}) - -test_that("lda works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "lda", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "lda") -}) - -test_that("nnet works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "nnet", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "nnet") -}) - -test_that("ranger works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "ranger", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "ranger") -}) - -test_that("svm works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "svm", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "svm") -}) - -test_that("xgboost works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "xgboost", - xgboost_eval_metric = "mlogloss", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "xgboost") - expect_numeric(learner$model$instance$archive$data$max_nrounds, lower = 1) -}) - -test_that("catboost works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "catboost", - catboost_eval_metric = "MultiClass", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "catboost") - expect_numeric(learner$model$instance$archive$data$max_nrounds, lower = 1) -}) - -test_that("extra_trees works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "extra_trees", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "extra_trees") -}) - -test_that("lightgbm works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = "lightgbm", - lightgbm_eval_metric = "multi_logloss", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 6) - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$result$branch.selection, "lightgbm") -}) - -test_that("xgboost, catboost and lightgbm work", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - learner_ids = c("xgboost", "catboost", "lightgbm"), - catboost_eval_metric = "MultiClass", - lightgbm_eval_metric = "multi_logloss", - xgboost_eval_metric = "mlogloss", - small_data_size = 100, - lhs_size = 1, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 20), - callbacks = clbk("mlr3tuning.async_save_logs") - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") -}) - -test_that("all learner work", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 20), - lhs_size = 1 - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_class(learner$model$instance, "TuningInstanceAsyncSingleCrit") - expect_prediction(learner$predict(task)) -}) - -test_that("memory limit works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("spam") - learner = lrn("classif.automl_branch", - max_memory = 50, - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 20), - resampling = rsmp("holdout"), - lhs_size = 1 - ) - - learner$train(task) -}) - -test_that("small data set switch works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - small_data_size = 1000, - small_data_resampling = rsmp("cv", folds = 2), - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 1), - lhs_size = 1, - store_benchmark_result = TRUE - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_equal(learner$model$instance$archive$benchmark_result$resamplings$resampling[[1]]$iters, 2) -}) - -test_that("large data set switch works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - large_data_size = 100, - large_data_nthread = 4, - large_data_learner_ids = "ranger", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 1), - lhs_size = 1, - store_benchmark_result = TRUE - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - expect_set_equal(learner$model$instance$archive$data$branch.selection, "ranger") -}) - -test_that("max_cardinality works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - small_data_size = 100, - max_cardinality = 2, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 1), - lhs_size = 1 - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") -}) - -test_that("max_cardinality works for extra trees", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - small_data_size = 100, - max_cardinality = 3, - extra_trees_max_cardinality = 2, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 1), - lhs_size = 1 - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") -}) - -test_that("logger callback works", { - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("penguins") - learner = lrn("classif.automl_branch", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 20), - lhs_size = 1, - callbacks = clbk("mlr3tuning.async_save_logs") - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") - - expect_list(learner$instance$archive$data$log) - expect_list(learner$instance$archive$data$log[[1]], len = 3) -}) - -test_that("integer columns work", { - library(mlr3oml) - rush_plan(n_workers = 2) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("oml", data_id = 1464) - learner = lrn("classif.automl_branch", - learner_ids = "catboost", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 20), - lhs_size = 1 - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") -}) - -test_that("constant columns work", { - library(mlr3oml) - rush_plan(n_workers = 2, lgr_thresholds = c(mlr3 = "info")) - lgr::get_logger("mlr3automl")$set_threshold("debug") - - task = tsk("oml", data_id = 41143) - learner = lrn("classif.automl_branch", - learner_ids = "catboost", - small_data_size = 100, - measure = msr("classif.ce"), - terminator = trm("evals", n_evals = 20), - lhs_size = 1 - ) - - expect_class(learner$train(task), "LearnerClassifAutoBranch") -}) diff --git a/tests/testthat/test_LearnerClassifAuto.R b/tests/testthat/test_LearnerClassifAuto.R new file mode 100644 index 0000000..7eebf33 --- /dev/null +++ b/tests/testthat/test_LearnerClassifAuto.R @@ -0,0 +1,412 @@ +test_that("initial design is generated", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost")) + + learner_ids = c("glmnet", "kknn", "lda", "nnet", "ranger", "svm", "xgboost", "catboost", "extra_trees") + xdt = generate_default_design( + task_type = "classif", + learner_ids, + task = tsk("sonar"), + tuning_space) + expect_data_table(xdt, nrows = length(learner_ids)) +}) + +test_that("lhs design is generated", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost")) + + learner_ids = c("glmnet", "kknn", "nnet", "ranger", "svm", "xgboost", "catboost") + xdt = generate_lhs_design(10, "classif", learner_ids, tuning_space) + expect_data_table(xdt, nrows = 70) +}) + +test_that("LearnerClassifAuto is initialized", { + learner = lrn("classif.auto", + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 10)) + + expect_null(learner$graph) + expect_null(learner$tuning_space) +}) + +test_that("glmnet works", { + rush_plan(n_workers = 2) + skip_if_not_installed("glmnet") + + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "glmnet", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$graph$param_set$values$branch.selection, "glmnet") + expect_equal(learner$model$instance$result$branch.selection, "glmnet") +}) + +test_that("kknn works", { + rush_plan(n_workers = 2) + skip_if_not_installed("kknn") + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "kknn", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$graph$param_set$values$branch.selection, "kknn") + expect_equal(learner$model$instance$result$branch.selection, "kknn") +}) + +test_that("only lda fails", { + rush_plan(n_workers = 2) + + task = tsk("penguins") + expect_error(lrn("classif.auto", + learner_ids = "lda", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ), "must be combined with other learners") +}) + +test_that("lda and glmnet works", { + rush_plan(n_workers = 2) + skip_if_not_installed("glmnet") + + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = c("lda", "glmnet"), + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_subset(learner$graph$param_set$values$branch.selection, c("glmnet", "lda")) + expect_subset(learner$model$instance$result$branch.selection, c("glmnet", "lda")) +}) + +test_that("nnet works", { + rush_plan(n_workers = 2) + skip_if_not_installed("nnet") + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "nnet", + resampling = rsmp("holdout"), + small_data_size = 1, + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "nnet") +}) + +test_that("ranger works", { + rush_plan(n_workers = 2) + skip_if_not_installed("ranger") + + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "ranger", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "ranger") +}) + +test_that("svm works", { + rush_plan(n_workers = 2) + skip_if_not_installed("e1071") + + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "svm", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "svm") +}) + +test_that("xgboost works", { + skip_if_not_installed("xgboost") + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "xgboost", + small_data_size = 1, + xgboost_eval_metric = "mlogloss", + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "xgboost") +}) + +test_that("catboost works", { + skip_if_not_installed("catboost") + rush_plan(n_workers = 2) + + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "catboost", + small_data_size = 1, + catboost_eval_metric = "MultiClass", + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "catboost") +}) + +test_that("only extra_trees fails", { + rush_plan(n_workers = 2) + + task = tsk("penguins") + expect_error(lrn("classif.auto", + learner_ids = "extra_trees", + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ), "must be combined with other learners") +}) + +test_that("extra_trees and glmnet works", { + skip_if_not_installed("glmnet") + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = c("extra_trees", "glmnet"), + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "extra_trees") +}) + +test_that("lightgbm works", { + skip_if_not_installed("lightgbm") + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = "lightgbm", + lightgbm_eval_metric = "multi_logloss", + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$result$branch.selection, "lightgbm") +}) + +test_that("xgboost, catboost and lightgbm work", { + skip_if_not_installed(c("xgboost", "catboost", "lightgbm")) + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + learner_ids = c("xgboost", "catboost", "lightgbm"), + catboost_eval_metric = "MultiClass", + lightgbm_eval_metric = "multi_logloss", + xgboost_eval_metric = "mlogloss", + resampling = rsmp("holdout"), + lhs_size = 1, + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 20), + callbacks = clbk("mlr3tuning.async_save_logs") + ) + + expect_class(learner$train(task), "LearnerClassifAuto") +}) + +test_that("all learner work", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + small_data_size = 100, + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 20), + lhs_size = 1 + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_class(learner$model$instance, "TuningInstanceAsyncSingleCrit") + expect_prediction(learner$predict(task)) +}) + +# test_that("memory limit works", { +# skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) +# rush_plan(n_workers = 2) + +# task = tsk("spam") +# learner = lrn("classif.auto", +# max_memory = 50, +# small_data_size = 100, +# measure = msr("classif.ce"), +# terminator = trm("evals", n_evals = 20), +# resampling = rsmp("holdout"), +# lhs_size = 1 +# ) + +# learner$train(task) +# }) + +test_that("small data set switch works", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + small_data_size = 1000, + small_data_resampling = rsmp("cv", folds = 2), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 1), + lhs_size = 1, + store_benchmark_result = TRUE + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_equal(learner$model$instance$archive$benchmark_result$resamplings$resampling[[1]]$iters, 2) +}) + +test_that("large data set switch works", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + large_data_size = 100, + large_data_nthread = 4, + large_data_learner_ids = "ranger", + small_data_size = 100, + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 1), + lhs_size = 1, + store_benchmark_result = TRUE + ) + + expect_class(learner$train(task), "LearnerClassifAuto") + expect_set_equal(learner$model$instance$archive$data$branch.selection, "ranger") +}) + +test_that("max_cardinality works", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + small_data_size = 1, + resampling = rsmp("holdout"), + max_cardinality = 2, + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 1), + lhs_size = 1 + ) + + expect_class(learner$train(task), "LearnerClassifAuto") +}) + +test_that("max_cardinality works for extra trees", { + skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) + rush_plan(n_workers = 2) + + task = tsk("penguins") + learner = lrn("classif.auto", + small_data_size = 1, + resampling = rsmp("holdout"), + max_cardinality = 3, + extra_trees_max_cardinality = 2, + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 1), + lhs_size = 1 + ) + + expect_class(learner$train(task), "LearnerClassifAuto") +}) + +# test_that("logger callback works", { +# skip_if_not_installed(c("glmnet", "kknn", "nnet", "ranger", "e1071", "xgboost", "catboost", "MASS", "lightgbm")) +# rush_plan(n_workers = 2) + +# task = tsk("penguins") +# learner = lrn("classif.auto", +# small_data_size = 1, +# resampling = rsmp("holdout"), +# measure = msr("classif.ce"), +# terminator = trm("evals", n_evals = 10), +# lhs_size = 1, +# callbacks = clbk("mlr3tuning.async_save_logs") +# ) + +# expect_class(learner$train(task), "LearnerClassifAuto") +# expect_list(learner$instance$archive$data$log) +# expect_list(learner$instance$archive$data$log[[1]], len = 1) +# }) + +# test_that("integer columns work", { +# library(mlr3oml) +# rush_plan(n_workers = 2) + + +# task = tsk("oml", data_id = 1464) +# learner = lrn("classif.auto", +# learner_ids = "catboost", +# small_data_size = 100, +# measure = msr("classif.ce"), +# terminator = trm("evals", n_evals = 20), +# lhs_size = 1 +# ) + +# expect_class(learner$train(task), "LearnerClassifAuto") +# }) + +# test_that("constant columns work", { +# library(mlr3oml) +# rush_plan(n_workers = 2, lgr_thresholds = c(mlr3 = "info")) + + +# task = tsk("oml", data_id = 41143) +# learner = lrn("classif.auto", +# learner_ids = "catboost", +# small_data_size = 100, +# measure = msr("classif.ce"), +# terminator = trm("evals", n_evals = 20), +# lhs_size = 1 +# ) + +# expect_class(learner$train(task), "LearnerClassifAuto") +# }) diff --git a/tests/testthat/test_LearnerClassifSVM.R b/tests/testthat/test_LearnerClassifSVM.R new file mode 100644 index 0000000..9a3274e --- /dev/null +++ b/tests/testthat/test_LearnerClassifSVM.R @@ -0,0 +1,25 @@ +test_that("LearnerClassifAutoSVM is initialized", { + learner = lrn("classif.auto_svm", + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 10)) + + expect_null(learner$graph) + expect_null(learner$tuning_space) +}) + +test_that("LearnerClassifAutoSVM is trained", { + rush_plan(n_workers = 2) + lgr::get_logger("mlr3automl")$set_threshold("debug") + + task = tsk("penguins") + learner = lrn("classif.auto_svm", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 6) + ) + + expect_class(learner$train(task), "LearnerClassifAutoSVM") + expect_equal(learner$graph$param_set$values$branch.selection, "svm") + expect_equal(learner$model$instance$result$branch.selection, "svm") +}) diff --git a/tests/testthat/test_LearnerClassifXgboost.R b/tests/testthat/test_LearnerClassifXgboost.R new file mode 100644 index 0000000..f5561f2 --- /dev/null +++ b/tests/testthat/test_LearnerClassifXgboost.R @@ -0,0 +1,25 @@ +test_that("LearnerClassifAutoXgboost is initialized", { + learner = lrn("classif.auto_xgboost", + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 10)) + + expect_null(learner$graph) + expect_null(learner$tuning_space) +}) + +test_that("LearnerClassifAutoXgboost is trained", { + rush_plan(n_workers = 2) + lgr::get_logger("mlr3automl")$set_threshold("debug") + + task = tsk("penguins") + learner = lrn("classif.auto_xgboost", + small_data_size = 1, + resampling = rsmp("holdout"), + measure = msr("classif.ce"), + terminator = trm("evals", n_evals = 3) + ) + + expect_class(learner$train(task), "LearnerClassifAutoXgboost") + expect_equal(learner$graph$param_set$values$branch.selection, "xgboost") + expect_equal(learner$model$instance$result$branch.selection, "xgboost") +})