From b24579ff11834fc4f49508a8ad703dd2b27dd4eb Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Thu, 29 Aug 2024 14:34:03 +0200 Subject: [PATCH 01/25] init PipeOps Tomek and Nearmiss --- R/PipeOpNearmiss.R | 127 +++++++++++++++++++++++++++++++++++++++++++++ R/PipeOpTomek.R | 123 +++++++++++++++++++++++++++++++++++++++++++ R/bibentries.R | 20 +++++++ 3 files changed, 270 insertions(+) create mode 100644 R/PipeOpNearmiss.R create mode 100644 R/PipeOpTomek.R diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R new file mode 100644 index 000000000..2032fd0fa --- /dev/null +++ b/R/PipeOpNearmiss.R @@ -0,0 +1,127 @@ +#' @title Nearmiss Down-Sampling +#' +#' @usage NULL +#' @name mlr_pipeops_nearmiss +#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Generates a more balanced data set by ... +#' +#' The algorithm down-samples ... +#' +#' It can only be applied to tasks with numeric (or integer) features with no missing values. +#' The algorithm treats integer features as numeric features. To not change feature types, these are then rounded back to integer. +#' +#' See [`themis::nearmiss`] for details. +#' +#' @section Construction: +#' ``` +#' PipeOpNearmiss$new(id = "nearmiss", param_vals = list()) +#' ``` +#' +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"nearmiss"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +#' +#' The output during training is the input [`Task`][mlr3::Task] with +#' The output during prediction is the unchanged input. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as +#' * `k` :: `integer(1)`\cr +#' Number of nearest neighbors used for generating new values from the minority class. Default is `5`. +#' * `under_ratio` :: `numeric(1)`\cr +#' Ratio of the minority to majority class. Default is `1`. For details, see [`themis::nearmiss`]. +#' +#' @section Fields: +#' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @references +#' `r format_bib("zhang2003")` +#' +#' @family PipeOps +#' @template seealso_pipeopslist +#' @include PipeOpTaskPreproc.R +#' @export +#' @examples +#' \dontshow{ if (requireNamespace("themis")) \{ } +#' library("mlr3") +#' +#' # Create example task +#' data = data.frame( +#' target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), +#' feature = rnorm(200) +#' ) +#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' task$head() +#' table(task$data()$target) +#' +#' # Generate synthetic data for minority class +#' pop = po("nearmiss") +#' nearmiss_result = pop$train(list(task))[[1]]$data() +#' table(nearmiss_result$target) +#' \dontshow{ \} } +PipeOpNearmiss = R6Class("PipeOpNearmiss", + inherit = PipeOpTaskPreproc, + public = list( + initialize = function(id = "nearmiss", param_vals = list()) { + ps = ps( + k = p_int(lower = 1, default = 5, tags = c("train", "nearmiss")), + over_ratio = p_dbl(lower = 0, default = 1, tags = c("train", "nearmiss")) + ) + super$initialize(id, param_set = ps, param_vals = param_vals, packages = "themis", can_subset_cols = FALSE, + task_type = "TaskClassif", tags = "imbalanced data") + } + ), + private = list( + + .train_task = function(task) { + cols = task$feature_names + + # Return task unchanged, if no feature columns exist + if (!length(cols)) { + return(task) + } + # PipeOp does not know how to handle non-feature columns + unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) + if (length(unsupported_cols)) { + stopf("Nearmiss cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", + paste(unsupported_cols, collapse = "', '")) + } + # Only numeric and integer features allowed + if (!all(task$feature_types$type %in% c("numeric", "integer"))) { + stop("Nearmiss does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") + } + + # Down-sample Data + dt = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) + + # Return task unchanged if no synthetic data was generated + if (nrow(dt) == task$nrow) { + return(task) + } + + # Filter snc to only contain the generated synthetic data + # dt <- dt[seq(task$nrow + 1L, nrow(snc))] + # Might need a better solution here, since we are reducing the number of rows + + # Convert originally integer columns back to integer as SMOTENC treats them as numeric + int_cols = task$feature_names[task$feature_types$type == "integer"] + dt[, (int_cols) := lapply(.SD, function(x) as.integer(round(x))), .SDcols = int_cols] + + task$rbind(dt) + } + ) +) + +mlr_pipeops$add("nearmiss", PipeOpNearmiss) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R new file mode 100644 index 000000000..5d30e5489 --- /dev/null +++ b/R/PipeOpTomek.R @@ -0,0 +1,123 @@ +#' @title Tomek Down-Sampling +#' +#' @usage NULL +#' @name mlr_pipeops_tomek +#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Generates a cleaner data set by removing all majority-minority Tomek links. +#' +#' The algorithm down-samples the data by removing all pairs of observations that form a Tomek link, +#' i.e. a pair of observations that are nearest neighbors and belong to different classes. +#' It can only be applied to tasks with numeric (or integer) features with no missing values. +#' The algorithm treats integer features as numeric features. To not change feature types, these are then rounded back to integer. +#' +#' ???? +#' Open question: Takes multiclass?, Is not balancing, but cleaning since results are not more balanced (would be the case if only removing majority cases) +#' +#' See [`themis::tomek`] for details. +#' +#' @section Construction: +#' ``` +#' PipeOpTOmek$new(id = "tomek", param_vals = list()) +#' ``` +#' +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"tomek"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +#' +#' The output during training is the input [`Task`][mlr3::Task] with removed rows for pairs of observations that form a Tomek link. +#' The output during prediction is the unchanged input. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Fields: +#' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @references +#' `r format_bib("tomek1976")` +#' +#' @family PipeOps +#' @template seealso_pipeopslist +#' @include PipeOpTaskPreproc.R +#' @export +#' @examples +#' \dontshow{ if (requireNamespace("themis")) \{ } +#' library("mlr3") +#' +#' # Create example task +#' data = data.frame( +#' target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), +#' feature = rnorm(200) +#' ) +#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' task$head() +#' table(task$data()$target) +#' +#' # Down-sample data +#' pop = po("tomek") +#' tomek_result = pop$train(list(task))[[1]]$data() +#' nrow(tomek_result) +#' table(tomek_result$target) +#' \dontshow{ \} } +PipeOpTomek = R6Class("PipeOpTomek", + inherit = PipeOpTaskPreproc, + public = list( + initialize = function(id = "tomek", param_vals = list()) { + super$initialize(id, param_set = ps(), param_vals = param_vals, packages = "themis", can_subset_cols = FALSE, + task_type = "TaskClassif", tags = "imbalanced data") + } + ), + private = list( + + .train_task = function(task) { + cols = task$feature_names + + # Return task unchanged, if no feature columns exist + if (!length(cols)) { + return(task) + } + # PipeOp does not know how to handle non-feature columns + unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) + if (length(unsupported_cols)) { + stopf("Tomek cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", + paste(unsupported_cols, collapse = "', '")) + } + # Only numeric and integer features allowed + if (!all(task$feature_types$type %in% c("numeric", "integer"))) { + stop("Tomek does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") + } + + # Down-sample Data + dt = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) + + # Return task unchanged if no data was excluded + if (nrow(dt) == task$nrow) { + return(task) + } + + # Filter snc to only contain the generated synthetic data + # snc <- snc[seq(task$nrow + 1L, nrow(snc))] + # need a better solution here, since we are reducing the number of rows + + # Convert originally integer columns back to integer as SMOTENC treats them as numeric + int_cols = task$feature_names[task$feature_types$type == "integer"] + dt[, (int_cols) := lapply(.SD, function(x) as.integer(round(x))), .SDcols = int_cols] + + task$rbind(dt) + } + ) +) + +mlr_pipeops$add("tomek", PipeOpTomek) diff --git a/R/bibentries.R b/R/bibentries.R index de55741d5..d59a38b67 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -52,5 +52,25 @@ bibentries = c( author = "Yujun Wu and Dennis D Boos and Leonard A Stefanski", title = "Controlling Variable Selection by the Addition of Pseudovariables", journal = "Journal of the American Statistical Association" + ), + + zhang2003 = bibentry("inproceedings", + year = "2003", + author = "Zhang, J. and Mani, I.", + title = "KNN Approach to Unbalanced Data Distributions: A Case Study Involving Information Extraction", + booktitle = "Proceedings of Workshop on Learning from Imbalanced Datasets (ICML)", + ), + + tomek1976 = bibentry("article", + doi = "10.1109/TSMC.1976.4309452", + author = "I. Tomek", + year = "1976", + title = "Two Modifications of CNN", + journal = "IEEE Transactions on Systems, Man and Cybernetics", + volume = "6", + number = "11", + pages = "769--772", + publisher = "IEEE" ) + ) From 86fc40ab223de33db144ab364915fa1616ada5f8 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 13:57:01 +0200 Subject: [PATCH 02/25] working PipeOpTomek --- R/PipeOpTomek.R | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 5d30e5489..998faac4a 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -9,11 +9,8 @@ #' #' The algorithm down-samples the data by removing all pairs of observations that form a Tomek link, #' i.e. a pair of observations that are nearest neighbors and belong to different classes. -#' It can only be applied to tasks with numeric (or integer) features with no missing values. -#' The algorithm treats integer features as numeric features. To not change feature types, these are then rounded back to integer. -#' -#' ???? -#' Open question: Takes multiclass?, Is not balancing, but cleaning since results are not more balanced (would be the case if only removing majority cases) +#' It can only be applied to tasks with numeric or integer features with no missing values. +#' Supports multiclass classification. #' #' See [`themis::tomek`] for details. #' @@ -89,33 +86,26 @@ PipeOpTomek = R6Class("PipeOpTomek", return(task) } # PipeOp does not know how to handle non-feature columns - unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) - if (length(unsupported_cols)) { - stopf("Tomek cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", - paste(unsupported_cols, collapse = "', '")) - } + # unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) + # if (length(unsupported_cols)) { + # stopf("Tomek cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", + # paste(unsupported_cols, collapse = "', '")) + # } + # do we want this? We could handle it, question just is whether it's useful to still raise an error + # since we are effectively ignoring stratify & co. + # Only numeric and integer features allowed if (!all(task$feature_types$type %in% c("numeric", "integer"))) { stop("Tomek does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") } - # Down-sample Data + # Down-sample data dt = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) - # Return task unchanged if no data was excluded - if (nrow(dt) == task$nrow) { - return(task) - } - - # Filter snc to only contain the generated synthetic data - # snc <- snc[seq(task$nrow + 1L, nrow(snc))] - # need a better solution here, since we are reducing the number of rows - - # Convert originally integer columns back to integer as SMOTENC treats them as numeric - int_cols = task$feature_names[task$feature_types$type == "integer"] - dt[, (int_cols) := lapply(.SD, function(x) as.integer(round(x))), .SDcols = int_cols] - - task$rbind(dt) + keep = as.integer(row.names(dt)) + # more robust, more computationally complex alternative: + # keep = as.integer(row.names(fintersect(task$data(), dt))) + task$filter(keep) } ) ) From 7aae18cf0a28297b33056e0ae2a9dc14fdf7bcff Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 14:08:30 +0200 Subject: [PATCH 03/25] PipeOpTomek tests --- tests/testthat/test_pipeop_tomek.R | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/testthat/test_pipeop_tomek.R diff --git a/tests/testthat/test_pipeop_tomek.R b/tests/testthat/test_pipeop_tomek.R new file mode 100644 index 000000000..e8a21d684 --- /dev/null +++ b/tests/testthat/test_pipeop_tomek.R @@ -0,0 +1,35 @@ +context("PipeOpTomek") + +test_that("PipeOpTomek - basic properties", { + skip_if_not_installed("themis") + + task = mlr_tasks$get("iris") + + expect_datapreproc_pipeop_class(PipeOpTomek, task = task, predict_like_train = FALSE) + +}) + +test_that("PipeOpTomek - train works as intended", { + skip_if_not_installed("themis") + + op = PipeOpTomek$new() + task = mlr_tasks$get("iris") + + # Compare to themis::tomek + train_out = op$train(list(task))[[1]]$data() + smotenc_out = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) + + expect_equal(train_out, smotenc_out) + + # Empty task is returned unchanged + task$select(character(0)) + expect_equal( + op$train(list(task))[[1L]], + task + ) + + # PipeOp does not accept tasks with wrong feature types + task = tsk("breast_cancer") + expect_error(op$train(list(task))) + +}) From ef58815799b8cbe030646fb91a6f4294a205b57a Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 14:28:40 +0200 Subject: [PATCH 04/25] typo --- tests/testthat/test_pipeop_tomek.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_pipeop_tomek.R b/tests/testthat/test_pipeop_tomek.R index e8a21d684..934d937ef 100644 --- a/tests/testthat/test_pipeop_tomek.R +++ b/tests/testthat/test_pipeop_tomek.R @@ -4,7 +4,6 @@ test_that("PipeOpTomek - basic properties", { skip_if_not_installed("themis") task = mlr_tasks$get("iris") - expect_datapreproc_pipeop_class(PipeOpTomek, task = task, predict_like_train = FALSE) }) @@ -17,9 +16,9 @@ test_that("PipeOpTomek - train works as intended", { # Compare to themis::tomek train_out = op$train(list(task))[[1]]$data() - smotenc_out = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) + tomek_out = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) - expect_equal(train_out, smotenc_out) + expect_equal(train_out, tomek_out) # Empty task is returned unchanged task$select(character(0)) @@ -29,7 +28,7 @@ test_that("PipeOpTomek - train works as intended", { ) # PipeOp does not accept tasks with wrong feature types - task = tsk("breast_cancer") + task = tsk("german_credit") expect_error(op$train(list(task))) }) From 2bf7004f57695ea9dc4b7529a4a8d0f2e94b0f84 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 14:30:04 +0200 Subject: [PATCH 05/25] Added PipeOpNearmiss tests --- tests/testthat/test_pipeop_nearmiss.R | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/testthat/test_pipeop_nearmiss.R diff --git a/tests/testthat/test_pipeop_nearmiss.R b/tests/testthat/test_pipeop_nearmiss.R new file mode 100644 index 000000000..8b5d47881 --- /dev/null +++ b/tests/testthat/test_pipeop_nearmiss.R @@ -0,0 +1,34 @@ +context("PipeOpNearmiss") + +test_that("PipeOpNearmiss - basic properties", { + skip_if_not_installed("themis") + + task = mlr_tasks$get("wine") + expect_datapreproc_pipeop_class(PipeOpNearmiss, task = task, predict_like_train = FALSE) + +}) + +test_that("PipeOpNearmiss - train works as intended", { + skip_if_not_installed("themis") + + op = PipeOpNearmiss$new() + task = mlr_tasks$get("wine") + + # Compare to themis::nearmiss + train_out = op$train(list(task))[[1]]$data() + nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names)) + + expect_equal(train_out, nearmiss_out) + + # Empty task is returned unchanged + task$select(character(0)) + expect_equal( + op$train(list(task))[[1L]], + task + ) + + # PipeOp does not accept tasks with wrong feature types + task = tsk("german_credit") + expect_error(op$train(list(task))) + +}) From 33b125d5870083d4e65961ef39589d331895d6dd Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 14:47:49 +0200 Subject: [PATCH 06/25] added tests --- tests/testthat/test_pipeop_nearmiss.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test_pipeop_nearmiss.R b/tests/testthat/test_pipeop_nearmiss.R index 8b5d47881..ae44a1d06 100644 --- a/tests/testthat/test_pipeop_nearmiss.R +++ b/tests/testthat/test_pipeop_nearmiss.R @@ -20,6 +20,21 @@ test_that("PipeOpNearmiss - train works as intended", { expect_equal(train_out, nearmiss_out) + # Compare to themis::nearmiss with changed params + op$param_set$set_values(k = 8, under_ratio = 0.5) + train_out = op$train(list(task))[[1]]$data() + nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, + k = 8, under_ratio = 0.5)) + + expect_equal(train_out, nearmiss_out) + + op$param_set$set_values(k = 8, under_ratio = 1.5) + train_out = op$train(list(task))[[1]]$data() + nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, + k = 8, under_ratio = 1.5)) + + expect_equal(train_out, nearmiss_out) + # Empty task is returned unchanged task$select(character(0)) expect_equal( From 995f1179a1872f968d069b88a0d58729657004a8 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 15:36:30 +0200 Subject: [PATCH 07/25] docs: small additions --- R/PipeOpTomek.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 998faac4a..ac34da2d9 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -60,7 +60,7 @@ #' ) #' task = TaskClassif$new(id = "example", backend = data, target = "target") #' task$head() -#' table(task$data()$target) +#' table(task$data(cols = "target")) #' #' # Down-sample data #' pop = po("tomek") From 2311c65191f0cbbdb78da2bc8a5c5aadcf7a6926 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:07:41 +0200 Subject: [PATCH 08/25] modified params in test --- tests/testthat/test_pipeop_nearmiss.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_pipeop_nearmiss.R b/tests/testthat/test_pipeop_nearmiss.R index ae44a1d06..a364f497d 100644 --- a/tests/testthat/test_pipeop_nearmiss.R +++ b/tests/testthat/test_pipeop_nearmiss.R @@ -21,17 +21,17 @@ test_that("PipeOpNearmiss - train works as intended", { expect_equal(train_out, nearmiss_out) # Compare to themis::nearmiss with changed params - op$param_set$set_values(k = 8, under_ratio = 0.5) + op$param_set$set_values(k = 8, under_ratio = 0.9) train_out = op$train(list(task))[[1]]$data() nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, - k = 8, under_ratio = 0.5)) + k = 8, under_ratio = 0.9)) expect_equal(train_out, nearmiss_out) - op$param_set$set_values(k = 8, under_ratio = 1.5) + op$param_set$set_values(k = 8, under_ratio = 1.1) train_out = op$train(list(task))[[1]]$data() nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, - k = 8, under_ratio = 1.5)) + k = 8, under_ratio = 1.1)) expect_equal(train_out, nearmiss_out) From 953d14e769be538f1a3a408b93939c3901414008 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:08:20 +0200 Subject: [PATCH 09/25] Working PipeOpNearmiss --- R/PipeOpNearmiss.R | 57 ++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index 2032fd0fa..afabbf456 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -5,13 +5,11 @@ #' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description -#' Generates a more balanced data set by ... -#' -#' The algorithm down-samples ... -#' -#' It can only be applied to tasks with numeric (or integer) features with no missing values. -#' The algorithm treats integer features as numeric features. To not change feature types, these are then rounded back to integer. +#' Generates a more balanced data set by down-sampling the instances of non-minority classes using the NEARMISS algorithm. #' +#' The algorithm down-samples by selecting instances from the non-minority classes that have the smallest mean distance +#' to their `k` nearest neighbors of different classes. +#' This can only be applied to [classification tasks][mlr3::TaskClassif] with numeric or integer features that have no missing values. #' See [`themis::nearmiss`] for details. #' #' @section Construction: @@ -27,7 +25,7 @@ #' @section Input and Output Channels: #' Input and output channels are inherited from [`PipeOpTaskPreproc`]. #' -#' The output during training is the input [`Task`][mlr3::Task] with +#' The output during training is the input [`Task`][mlr3::Task] with the rows removed from the non-minority classes. #' The output during prediction is the unchanged input. #' #' @section State: @@ -36,9 +34,11 @@ #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as #' * `k` :: `integer(1)`\cr -#' Number of nearest neighbors used for generating new values from the minority class. Default is `5`. +#' Number of nearest neighbors used for calculating the mean distances. Default is `5`. #' * `under_ratio` :: `numeric(1)`\cr -#' Ratio of the minority to majority class. Default is `1`. For details, see [`themis::nearmiss`]. +#' Ratio of the minority-to-majority frequencies. This specifies the ratio to which the number of instances +#' in the non-minority classes get down-sampled to, relative to the number of instances of the minority class. +#' Default is `1`. For details, see [`themis::nearmiss`]. #' #' @section Fields: #' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -64,11 +64,12 @@ #' ) #' task = TaskClassif$new(id = "example", backend = data, target = "target") #' task$head() -#' table(task$data()$target) +#' table(task$data(cols = "target")) #' #' # Generate synthetic data for minority class #' pop = po("nearmiss") #' nearmiss_result = pop$train(list(task))[[1]]$data() +#' nrow(nearmiss_result$target) #' table(nearmiss_result$target) #' \dontshow{ \} } PipeOpNearmiss = R6Class("PipeOpNearmiss", @@ -77,7 +78,7 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", initialize = function(id = "nearmiss", param_vals = list()) { ps = ps( k = p_int(lower = 1, default = 5, tags = c("train", "nearmiss")), - over_ratio = p_dbl(lower = 0, default = 1, tags = c("train", "nearmiss")) + under_ratio = p_dbl(lower = 0, default = 1, tags = c("train", "nearmiss")) ) super$initialize(id, param_set = ps, param_vals = param_vals, packages = "themis", can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data") @@ -93,33 +94,25 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", return(task) } # PipeOp does not know how to handle non-feature columns - unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) - if (length(unsupported_cols)) { - stopf("Nearmiss cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", - paste(unsupported_cols, collapse = "', '")) - } + # unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) + # if (length(unsupported_cols)) { + # stopf("Nearmiss cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", + # paste(unsupported_cols, collapse = "', '")) + # } + # Only numeric and integer features allowed if (!all(task$feature_types$type %in% c("numeric", "integer"))) { stop("Nearmiss does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") } - # Down-sample Data - dt = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) - - # Return task unchanged if no synthetic data was generated - if (nrow(dt) == task$nrow) { - return(task) - } - - # Filter snc to only contain the generated synthetic data - # dt <- dt[seq(task$nrow + 1L, nrow(snc))] - # Might need a better solution here, since we are reducing the number of rows - - # Convert originally integer columns back to integer as SMOTENC treats them as numeric - int_cols = task$feature_names[task$feature_types$type == "integer"] - dt[, (int_cols) := lapply(.SD, function(x) as.integer(round(x))), .SDcols = int_cols] + # Down-sample data + dt = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, + .args = self$param_set$get_values(tags = "nearmiss"))) - task$rbind(dt) + keep = as.integer(row.names(dt)) + # more robust, more computationally complex alternative: + # keep = as.integer(row.names(fintersect(task$data(), dt))) + task$filter(keep) } ) ) From 03032841ee3accb13df261047611e45614a455a0 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:08:43 +0200 Subject: [PATCH 10/25] Docs changes in PipeOpTomek --- R/PipeOpTomek.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index ac34da2d9..97804fcbc 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -9,7 +9,7 @@ #' #' The algorithm down-samples the data by removing all pairs of observations that form a Tomek link, #' i.e. a pair of observations that are nearest neighbors and belong to different classes. -#' It can only be applied to tasks with numeric or integer features with no missing values. +#' It can only be applied to [classification tasks][mlr3::TaskClassif] with numeric or integer features that have no missing values. #' Supports multiclass classification. #' #' See [`themis::tomek`] for details. From f8dd8817e1aa0d6ad86c2356faecf547eadbd3f8 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:27:51 +0200 Subject: [PATCH 11/25] remove dev comments --- R/PipeOpNearmiss.R | 11 ++--------- R/PipeOpTomek.R | 11 ----------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index afabbf456..eac1c04b1 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -10,6 +10,8 @@ #' The algorithm down-samples by selecting instances from the non-minority classes that have the smallest mean distance #' to their `k` nearest neighbors of different classes. #' This can only be applied to [classification tasks][mlr3::TaskClassif] with numeric or integer features that have no missing values. +#' Supports multiclass classification. +#' #' See [`themis::nearmiss`] for details. #' #' @section Construction: @@ -93,13 +95,6 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", if (!length(cols)) { return(task) } - # PipeOp does not know how to handle non-feature columns - # unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) - # if (length(unsupported_cols)) { - # stopf("Nearmiss cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", - # paste(unsupported_cols, collapse = "', '")) - # } - # Only numeric and integer features allowed if (!all(task$feature_types$type %in% c("numeric", "integer"))) { stop("Nearmiss does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") @@ -110,8 +105,6 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", .args = self$param_set$get_values(tags = "nearmiss"))) keep = as.integer(row.names(dt)) - # more robust, more computationally complex alternative: - # keep = as.integer(row.names(fintersect(task$data(), dt))) task$filter(keep) } ) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 97804fcbc..ffb5cc635 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -85,15 +85,6 @@ PipeOpTomek = R6Class("PipeOpTomek", if (!length(cols)) { return(task) } - # PipeOp does not know how to handle non-feature columns - # unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) - # if (length(unsupported_cols)) { - # stopf("Tomek cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", - # paste(unsupported_cols, collapse = "', '")) - # } - # do we want this? We could handle it, question just is whether it's useful to still raise an error - # since we are effectively ignoring stratify & co. - # Only numeric and integer features allowed if (!all(task$feature_types$type %in% c("numeric", "integer"))) { stop("Tomek does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") @@ -103,8 +94,6 @@ PipeOpTomek = R6Class("PipeOpTomek", dt = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) keep = as.integer(row.names(dt)) - # more robust, more computationally complex alternative: - # keep = as.integer(row.names(fintersect(task$data(), dt))) task$filter(keep) } ) From b4d2b29750fafc020115e79ac51e7119987554b7 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:29:59 +0200 Subject: [PATCH 12/25] docs: document() --- DESCRIPTION | 2 + NAMESPACE | 2 + man/PipeOp.Rd | 2 + man/PipeOpEnsemble.Rd | 2 + man/PipeOpImpute.Rd | 2 + man/PipeOpTargetTrafo.Rd | 2 + man/PipeOpTaskPreproc.Rd | 2 + man/PipeOpTaskPreprocSimple.Rd | 2 + man/mlr_pipeops.Rd | 2 + man/mlr_pipeops_boxcox.Rd | 2 + man/mlr_pipeops_branch.Rd | 2 + man/mlr_pipeops_chunk.Rd | 2 + man/mlr_pipeops_classbalancing.Rd | 2 + man/mlr_pipeops_classifavg.Rd | 2 + man/mlr_pipeops_classweights.Rd | 2 + man/mlr_pipeops_colapply.Rd | 2 + man/mlr_pipeops_collapsefactors.Rd | 2 + man/mlr_pipeops_colroles.Rd | 2 + man/mlr_pipeops_copy.Rd | 2 + man/mlr_pipeops_datefeatures.Rd | 2 + man/mlr_pipeops_encode.Rd | 2 + man/mlr_pipeops_encodeimpact.Rd | 2 + man/mlr_pipeops_encodelmer.Rd | 2 + man/mlr_pipeops_featureunion.Rd | 2 + man/mlr_pipeops_filter.Rd | 2 + man/mlr_pipeops_fixfactors.Rd | 2 + man/mlr_pipeops_histbin.Rd | 2 + man/mlr_pipeops_ica.Rd | 2 + man/mlr_pipeops_imputeconstant.Rd | 2 + man/mlr_pipeops_imputehist.Rd | 2 + man/mlr_pipeops_imputelearner.Rd | 2 + man/mlr_pipeops_imputemean.Rd | 2 + man/mlr_pipeops_imputemedian.Rd | 2 + man/mlr_pipeops_imputemode.Rd | 2 + man/mlr_pipeops_imputeoor.Rd | 2 + man/mlr_pipeops_imputesample.Rd | 2 + man/mlr_pipeops_kernelpca.Rd | 2 + man/mlr_pipeops_learner.Rd | 2 + man/mlr_pipeops_missind.Rd | 2 + man/mlr_pipeops_modelmatrix.Rd | 2 + man/mlr_pipeops_multiplicityexply.Rd | 2 + man/mlr_pipeops_multiplicityimply.Rd | 2 + man/mlr_pipeops_mutate.Rd | 2 + man/mlr_pipeops_nearmiss.Rd | 172 +++++++++++++++++++++++ man/mlr_pipeops_nmf.Rd | 2 + man/mlr_pipeops_nop.Rd | 2 + man/mlr_pipeops_ovrsplit.Rd | 2 + man/mlr_pipeops_ovrunite.Rd | 2 + man/mlr_pipeops_pca.Rd | 2 + man/mlr_pipeops_proxy.Rd | 2 + man/mlr_pipeops_quantilebin.Rd | 2 + man/mlr_pipeops_randomprojection.Rd | 2 + man/mlr_pipeops_randomresponse.Rd | 2 + man/mlr_pipeops_regravg.Rd | 2 + man/mlr_pipeops_removeconstants.Rd | 2 + man/mlr_pipeops_renamecolumns.Rd | 2 + man/mlr_pipeops_replicate.Rd | 2 + man/mlr_pipeops_rowapply.Rd | 2 + man/mlr_pipeops_scale.Rd | 2 + man/mlr_pipeops_scalemaxabs.Rd | 2 + man/mlr_pipeops_scalerange.Rd | 2 + man/mlr_pipeops_select.Rd | 2 + man/mlr_pipeops_smote.Rd | 2 + man/mlr_pipeops_spatialsign.Rd | 2 + man/mlr_pipeops_subsample.Rd | 2 + man/mlr_pipeops_targetinvert.Rd | 2 + man/mlr_pipeops_targetmutate.Rd | 2 + man/mlr_pipeops_targettrafoscalerange.Rd | 2 + man/mlr_pipeops_textvectorizer.Rd | 2 + man/mlr_pipeops_threshold.Rd | 2 + man/mlr_pipeops_tomek.Rd | 165 ++++++++++++++++++++++ man/mlr_pipeops_tunethreshold.Rd | 2 + man/mlr_pipeops_unbranch.Rd | 2 + man/mlr_pipeops_updatetarget.Rd | 2 + man/mlr_pipeops_vtreat.Rd | 2 + man/mlr_pipeops_yeojohnson.Rd | 2 + 76 files changed, 485 insertions(+) create mode 100644 man/mlr_pipeops_nearmiss.Rd create mode 100644 man/mlr_pipeops_tomek.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 85253382c..593b76ccb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -145,6 +145,7 @@ Collate: 'PipeOpMutate.R' 'PipeOpNMF.R' 'PipeOpNOP.R' + 'PipeOpNearmiss.R' 'PipeOpOVR.R' 'PipeOpPCA.R' 'PipeOpProxy.R' @@ -164,6 +165,7 @@ Collate: 'PipeOpSubsample.R' 'PipeOpTextVectorizer.R' 'PipeOpThreshold.R' + 'PipeOpTomek.R' 'PipeOpTrafo.R' 'PipeOpTuneThreshold.R' 'PipeOpUnbranch.R' diff --git a/NAMESPACE b/NAMESPACE index 6d8c22381..8dec616e1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ export(PipeOpMultiplicityImply) export(PipeOpMutate) export(PipeOpNMF) export(PipeOpNOP) +export(PipeOpNearmiss) export(PipeOpOVRSplit) export(PipeOpOVRUnite) export(PipeOpPCA) @@ -108,6 +109,7 @@ export(PipeOpTaskPreproc) export(PipeOpTaskPreprocSimple) export(PipeOpTextVectorizer) export(PipeOpThreshold) +export(PipeOpTomek) export(PipeOpTuneThreshold) export(PipeOpUnbranch) export(PipeOpVtreat) diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 3458719fd..5f677e2fa 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -307,6 +307,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -333,6 +334,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index 46bc5918b..81c667fc2 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -139,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -165,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index 52203632d..43591e376 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -172,6 +172,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -198,6 +199,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 8a534ec18..fb750b5be 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -180,6 +180,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -206,6 +207,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 69f92477c..78626b29d 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -235,6 +235,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -261,6 +262,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index d836e75a5..ffdbd0af7 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -172,6 +172,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -198,6 +199,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index e2a7d1e1a..2db99376b 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -109,6 +109,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -135,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index 064a069ca..1a7650842 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -123,6 +123,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -149,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index a83b502a1..f3a768528 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -141,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -167,6 +168,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd index 4b98bbd2a..4e42bc7e6 100644 --- a/man/mlr_pipeops_chunk.Rd +++ b/man/mlr_pipeops_chunk.Rd @@ -120,6 +120,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -146,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd index 19dcd067e..a1384b8de 100644 --- a/man/mlr_pipeops_classbalancing.Rd +++ b/man/mlr_pipeops_classbalancing.Rd @@ -161,6 +161,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -187,6 +188,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index 160ba73ab..c05fed339 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -137,6 +137,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -163,6 +164,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd index 7493a3a6b..fba6e61d9 100644 --- a/man/mlr_pipeops_classweights.Rd +++ b/man/mlr_pipeops_classweights.Rd @@ -140,6 +140,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -166,6 +167,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd index bf8065f8e..04d50cd94 100644 --- a/man/mlr_pipeops_colapply.Rd +++ b/man/mlr_pipeops_colapply.Rd @@ -150,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -176,6 +177,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd index 91798c99d..1c4c0c7c4 100644 --- a/man/mlr_pipeops_collapsefactors.Rd +++ b/man/mlr_pipeops_collapsefactors.Rd @@ -117,6 +117,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -143,6 +144,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index a0e742faa..829213c17 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -109,6 +109,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -135,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd index a4160342d..6f4255c3c 100644 --- a/man/mlr_pipeops_copy.Rd +++ b/man/mlr_pipeops_copy.Rd @@ -139,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -165,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd index 5028544b2..c0a20d11e 100644 --- a/man/mlr_pipeops_datefeatures.Rd +++ b/man/mlr_pipeops_datefeatures.Rd @@ -156,6 +156,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -182,6 +183,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 71a13f26f..e17da47ba 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -152,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -178,6 +179,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd index 7a435e1f2..870248449 100644 --- a/man/mlr_pipeops_encodeimpact.Rd +++ b/man/mlr_pipeops_encodeimpact.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -160,6 +161,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index ad391725f..93b177e07 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -149,6 +149,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -175,6 +176,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd index a509b87eb..91ced44d5 100644 --- a/man/mlr_pipeops_featureunion.Rd +++ b/man/mlr_pipeops_featureunion.Rd @@ -154,6 +154,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -180,6 +181,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index a37d1328a..1bc0784dc 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -185,6 +185,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -211,6 +212,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd index 6a4ac569c..4f638e174 100644 --- a/man/mlr_pipeops_fixfactors.Rd +++ b/man/mlr_pipeops_fixfactors.Rd @@ -109,6 +109,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -135,6 +136,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd index ce133cd8b..4a70ed677 100644 --- a/man/mlr_pipeops_histbin.Rd +++ b/man/mlr_pipeops_histbin.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -147,6 +148,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index d6e93d163..ad4746274 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -149,6 +149,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -175,6 +176,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd index a6ab5d027..9fe74e916 100644 --- a/man/mlr_pipeops_imputeconstant.Rd +++ b/man/mlr_pipeops_imputeconstant.Rd @@ -123,6 +123,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -149,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd index d71500f0b..313dd484d 100644 --- a/man/mlr_pipeops_imputehist.Rd +++ b/man/mlr_pipeops_imputehist.Rd @@ -115,6 +115,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -141,6 +142,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index 4819be20f..f832211b3 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -160,6 +160,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -186,6 +187,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd index 64dd29a38..f3438b10d 100644 --- a/man/mlr_pipeops_imputemean.Rd +++ b/man/mlr_pipeops_imputemean.Rd @@ -108,6 +108,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -134,6 +135,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd index 1f4286c64..f1ed64387 100644 --- a/man/mlr_pipeops_imputemedian.Rd +++ b/man/mlr_pipeops_imputemedian.Rd @@ -108,6 +108,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -134,6 +135,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd index 9cbcdba06..a0f0ddc64 100644 --- a/man/mlr_pipeops_imputemode.Rd +++ b/man/mlr_pipeops_imputemode.Rd @@ -115,6 +115,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -141,6 +142,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd index 499d7fb60..fe5d1083d 100644 --- a/man/mlr_pipeops_imputeoor.Rd +++ b/man/mlr_pipeops_imputeoor.Rd @@ -156,6 +156,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -182,6 +183,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd index d9f4d8f75..b803af004 100644 --- a/man/mlr_pipeops_imputesample.Rd +++ b/man/mlr_pipeops_imputesample.Rd @@ -110,6 +110,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -136,6 +137,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index a9bddd763..79b864684 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -124,6 +124,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -150,6 +151,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index 43c259806..953e365dc 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -155,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -181,6 +182,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd index b9f8d51da..608598735 100644 --- a/man/mlr_pipeops_missind.Rd +++ b/man/mlr_pipeops_missind.Rd @@ -138,6 +138,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -164,6 +165,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd index 1e1b00c2e..965cade03 100644 --- a/man/mlr_pipeops_modelmatrix.Rd +++ b/man/mlr_pipeops_modelmatrix.Rd @@ -114,6 +114,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -140,6 +141,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd index e4c67c232..b0e8a2507 100644 --- a/man/mlr_pipeops_multiplicityexply.Rd +++ b/man/mlr_pipeops_multiplicityexply.Rd @@ -120,6 +120,7 @@ Other PipeOps: \code{\link{mlr_pipeops_modelmatrix}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -146,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd index c07f85bab..1ebe96d2a 100644 --- a/man/mlr_pipeops_multiplicityimply.Rd +++ b/man/mlr_pipeops_multiplicityimply.Rd @@ -126,6 +126,7 @@ Other PipeOps: \code{\link{mlr_pipeops_modelmatrix}}, \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -152,6 +153,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd index 8da58522f..bc408e0e6 100644 --- a/man/mlr_pipeops_mutate.Rd +++ b/man/mlr_pipeops_mutate.Rd @@ -131,6 +131,7 @@ Other PipeOps: \code{\link{mlr_pipeops_modelmatrix}}, \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -157,6 +158,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd new file mode 100644 index 000000000..70da0ebc2 --- /dev/null +++ b/man/mlr_pipeops_nearmiss.Rd @@ -0,0 +1,172 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpNearmiss.R +\name{mlr_pipeops_nearmiss} +\alias{mlr_pipeops_nearmiss} +\alias{PipeOpNearmiss} +\title{Nearmiss Down-Sampling} +\format{ +\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Generates a more balanced data set by down-sampling the instances of non-minority classes using the NEARMISS algorithm. + +The algorithm down-samples by selecting instances from the non-minority classes that have the smallest mean distance +to their \code{k} nearest neighbors of different classes. +This can only be applied to \link[mlr3:TaskClassif]{classification tasks} with numeric or integer features that have no missing values. +Supports multiclass classification. + +See \code{\link[themis:nearmiss]{themis::nearmiss}} for details. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{PipeOpNearmiss$new(id = "nearmiss", param_vals = list()) +}\if{html}{\out{
}} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"nearmiss"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. + +The output during training is the input \code{\link[mlr3:Task]{Task}} with the rows removed from the non-minority classes. +The output during prediction is the unchanged input. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as +\itemize{ +\item \code{k} :: \code{integer(1)}\cr +Number of nearest neighbors used for calculating the mean distances. Default is \code{5}. +\item \code{under_ratio} :: \code{numeric(1)}\cr +Ratio of the minority-to-majority frequencies. This specifies the ratio to which the number of instances +in the non-minority classes get down-sampled to, relative to the number of instances of the minority class. +Default is \code{1}. For details, see \code{\link[themis:nearmiss]{themis::nearmiss}}. +} +} + +\section{Fields}{ + +Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +\dontshow{ if (requireNamespace("themis")) \{ } +library("mlr3") + +# Create example task +data = data.frame( + target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + feature = rnorm(200) +) +task = TaskClassif$new(id = "example", backend = data, target = "target") +task$head() +table(task$data(cols = "target")) + +# Generate synthetic data for minority class +pop = po("nearmiss") +nearmiss_result = pop$train(list(task))[[1]]$data() +nrow(nearmiss_result$target) +table(nearmiss_result$target) +\dontshow{ \} } +} +\references{ +Zhang, J., Mani, I. (2003). +\dQuote{KNN Approach to Unbalanced Data Distributions: A Case Study Involving Information Extraction.} +In \emph{Proceedings of Workshop on Learning from Imbalanced Datasets (ICML)}. +} +\seealso{ +https://mlr-org.com/pipeops.html + +Other PipeOps: +\code{\link{PipeOp}}, +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index f651ec210..d37566519 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -168,6 +168,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, \code{\link{mlr_pipeops_ovrunite}}, @@ -193,6 +194,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd index fd6fd2ea4..077e0ffc6 100644 --- a/man/mlr_pipeops_nop.Rd +++ b/man/mlr_pipeops_nop.Rd @@ -117,6 +117,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_ovrsplit}}, \code{\link{mlr_pipeops_ovrunite}}, @@ -142,6 +143,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index 76c661fde..22f9e2d62 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrunite}}, @@ -159,6 +160,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index f01cba41e..777795fd8 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -129,6 +129,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -154,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd index 18f5eb086..79917659e 100644 --- a/man/mlr_pipeops_pca.Rd +++ b/man/mlr_pipeops_pca.Rd @@ -126,6 +126,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -151,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index a5ef51112..03f8fe195 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -140,6 +140,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -165,6 +166,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd index 6e5a85a24..496ff4442 100644 --- a/man/mlr_pipeops_quantilebin.Rd +++ b/man/mlr_pipeops_quantilebin.Rd @@ -114,6 +114,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -139,6 +140,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd index 2323caf66..cff567111 100644 --- a/man/mlr_pipeops_randomprojection.Rd +++ b/man/mlr_pipeops_randomprojection.Rd @@ -126,6 +126,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -151,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index c497d3ad2..7f228e551 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -143,6 +143,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -168,6 +169,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index a97bde700..ff22879c6 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -129,6 +129,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -154,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd index ef3d43d75..36bef86ae 100644 --- a/man/mlr_pipeops_removeconstants.Rd +++ b/man/mlr_pipeops_removeconstants.Rd @@ -119,6 +119,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -144,6 +145,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd index 20947f1be..086adb087 100644 --- a/man/mlr_pipeops_renamecolumns.Rd +++ b/man/mlr_pipeops_renamecolumns.Rd @@ -118,6 +118,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -143,6 +144,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd index 71949f16c..ad0e468c3 100644 --- a/man/mlr_pipeops_replicate.Rd +++ b/man/mlr_pipeops_replicate.Rd @@ -111,6 +111,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -136,6 +137,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_rowapply.Rd b/man/mlr_pipeops_rowapply.Rd index cc15306ab..9d1a19f9c 100644 --- a/man/mlr_pipeops_rowapply.Rd +++ b/man/mlr_pipeops_rowapply.Rd @@ -117,6 +117,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -142,6 +143,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd index 9c8a3a316..28b6dab28 100644 --- a/man/mlr_pipeops_scale.Rd +++ b/man/mlr_pipeops_scale.Rd @@ -133,6 +133,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -158,6 +159,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd index 46c5c4c45..11b55295a 100644 --- a/man/mlr_pipeops_scalemaxabs.Rd +++ b/man/mlr_pipeops_scalemaxabs.Rd @@ -108,6 +108,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -133,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd index 678e54b0d..86ec337f5 100644 --- a/man/mlr_pipeops_scalerange.Rd +++ b/man/mlr_pipeops_scalerange.Rd @@ -113,6 +113,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -138,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd index 353e280b0..cca906d6a 100644 --- a/man/mlr_pipeops_select.Rd +++ b/man/mlr_pipeops_select.Rd @@ -129,6 +129,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -154,6 +155,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index ccbd9c6cd..c2619caee 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -132,6 +132,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -157,6 +158,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd index 9fdb650d6..488fc3593 100644 --- a/man/mlr_pipeops_spatialsign.Rd +++ b/man/mlr_pipeops_spatialsign.Rd @@ -108,6 +108,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -133,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd index c89142226..dfbf706ca 100644 --- a/man/mlr_pipeops_subsample.Rd +++ b/man/mlr_pipeops_subsample.Rd @@ -123,6 +123,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -148,6 +149,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index fe5073375..e1ecc3ff0 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -108,6 +108,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -133,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index dd7982fdf..a04aaee68 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -156,6 +156,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -181,6 +182,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index e651099eb..8dbed8ea9 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -122,6 +122,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -147,6 +148,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targetmutate}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index 57ab20d9a..ddf04e8b6 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -222,6 +222,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -247,6 +248,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targetmutate}}, \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index 44f63dc31..540152484 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -121,6 +121,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -146,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targetmutate}}, \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd new file mode 100644 index 000000000..4a052d68f --- /dev/null +++ b/man/mlr_pipeops_tomek.Rd @@ -0,0 +1,165 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpTomek.R +\name{mlr_pipeops_tomek} +\alias{mlr_pipeops_tomek} +\alias{PipeOpTomek} +\title{Tomek Down-Sampling} +\format{ +\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Generates a cleaner data set by removing all majority-minority Tomek links. + +The algorithm down-samples the data by removing all pairs of observations that form a Tomek link, +i.e. a pair of observations that are nearest neighbors and belong to different classes. +It can only be applied to \link[mlr3:TaskClassif]{classification tasks} with numeric or integer features that have no missing values. +Supports multiclass classification. + +See \code{\link[themis:tomek]{themis::tomek}} for details. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{PipeOpTOmek$new(id = "tomek", param_vals = list()) +}\if{html}{\out{
}} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"tomek"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. + +The output during training is the input \code{\link[mlr3:Task]{Task}} with removed rows for pairs of observations that form a Tomek link. +The output during prediction is the unchanged input. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Fields}{ + +Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +\dontshow{ if (requireNamespace("themis")) \{ } +library("mlr3") + +# Create example task +data = data.frame( + target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + feature = rnorm(200) +) +task = TaskClassif$new(id = "example", backend = data, target = "target") +task$head() +table(task$data(cols = "target")) + +# Down-sample data +pop = po("tomek") +tomek_result = pop$train(list(task))[[1]]$data() +nrow(tomek_result) +table(tomek_result$target) +\dontshow{ \} } +} +\references{ +Tomek I (1976). +\dQuote{Two Modifications of CNN.} +\emph{IEEE Transactions on Systems, Man and Cybernetics}, \bold{6}(11), 769--772. +\doi{10.1109/TSMC.1976.4309452}. +} +\seealso{ +https://mlr-org.com/pipeops.html + +Other PipeOps: +\code{\link{PipeOp}}, +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index 34fa82948..6bc2af29b 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -151,6 +151,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -177,6 +178,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd index 6d17dfeb3..995a14746 100644 --- a/man/mlr_pipeops_unbranch.Rd +++ b/man/mlr_pipeops_unbranch.Rd @@ -120,6 +120,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -146,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_updatetarget}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 2774382f7..2cb3bde02 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -135,6 +135,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -161,6 +162,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_vtreat}}, diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index 28d5f205a..2dd7fb1e5 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -188,6 +188,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -214,6 +215,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 89123d332..68626ebda 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -125,6 +125,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -151,6 +152,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, From 12945db709d2af8dc224a71e4989e7aa73f25aa7 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:35:55 +0200 Subject: [PATCH 13/25] added themis to suggests --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 593b76ccb..66174ddab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -88,7 +88,8 @@ Suggests: vtreat, future, htmlwidgets, - ranger + ranger, + themis ByteCompile: true Encoding: UTF-8 Config/testthat/edition: 3 From 25f6420d75d874e7ef0933dc3edea6de21701c21 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:36:26 +0200 Subject: [PATCH 14/25] docs: simplified examples --- R/PipeOpNearmiss.R | 6 +----- R/PipeOpTomek.R | 6 +----- man/mlr_pipeops_nearmiss.Rd | 6 +----- man/mlr_pipeops_tomek.Rd | 6 +----- 4 files changed, 4 insertions(+), 20 deletions(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index eac1c04b1..547bab762 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -60,11 +60,7 @@ #' library("mlr3") #' #' # Create example task -#' data = data.frame( -#' target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), -#' feature = rnorm(200) -#' ) -#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' task = tsk("wine") #' task$head() #' table(task$data(cols = "target")) #' diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index ffb5cc635..bbdfe04f9 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -54,11 +54,7 @@ #' library("mlr3") #' #' # Create example task -#' data = data.frame( -#' target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), -#' feature = rnorm(200) -#' ) -#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' task = tsk("iris") #' task$head() #' table(task$data(cols = "target")) #' diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd index 70da0ebc2..947cc1cbc 100644 --- a/man/mlr_pipeops_nearmiss.Rd +++ b/man/mlr_pipeops_nearmiss.Rd @@ -71,11 +71,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} library("mlr3") # Create example task -data = data.frame( - target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), - feature = rnorm(200) -) -task = TaskClassif$new(id = "example", backend = data, target = "target") +task = tsk("wine") task$head() table(task$data(cols = "target")) diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd index 4a052d68f..18143287f 100644 --- a/man/mlr_pipeops_tomek.Rd +++ b/man/mlr_pipeops_tomek.Rd @@ -63,11 +63,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} library("mlr3") # Create example task -data = data.frame( - target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), - feature = rnorm(200) -) -task = TaskClassif$new(id = "example", backend = data, target = "target") +task = tsk("iris") task$head() table(task$data(cols = "target")) From 714f1a1607a75867c50b1dc7a67815b92335e2af Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 31 Aug 2024 18:56:29 +0200 Subject: [PATCH 15/25] docs: corrections in examples --- R/PipeOpNearmiss.R | 8 ++++---- R/PipeOpTomek.R | 4 ++-- man/mlr_pipeops_nearmiss.Rd | 8 ++++---- man/mlr_pipeops_tomek.Rd | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index 547bab762..9e1441481 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -62,13 +62,13 @@ #' # Create example task #' task = tsk("wine") #' task$head() -#' table(task$data(cols = "target")) +#' table(task$data(cols = "type")) #' -#' # Generate synthetic data for minority class +#' # Down-sample and balance data #' pop = po("nearmiss") #' nearmiss_result = pop$train(list(task))[[1]]$data() -#' nrow(nearmiss_result$target) -#' table(nearmiss_result$target) +#' nrow(nearmiss_result) +#' table(nearmiss_result$type) #' \dontshow{ \} } PipeOpNearmiss = R6Class("PipeOpNearmiss", inherit = PipeOpTaskPreproc, diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index bbdfe04f9..76a59a8a9 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -56,13 +56,13 @@ #' # Create example task #' task = tsk("iris") #' task$head() -#' table(task$data(cols = "target")) +#' table(task$data(cols = "type")) #' #' # Down-sample data #' pop = po("tomek") #' tomek_result = pop$train(list(task))[[1]]$data() #' nrow(tomek_result) -#' table(tomek_result$target) +#' table(tomek_result$type) #' \dontshow{ \} } PipeOpTomek = R6Class("PipeOpTomek", inherit = PipeOpTaskPreproc, diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd index 947cc1cbc..e52a4f30d 100644 --- a/man/mlr_pipeops_nearmiss.Rd +++ b/man/mlr_pipeops_nearmiss.Rd @@ -73,13 +73,13 @@ library("mlr3") # Create example task task = tsk("wine") task$head() -table(task$data(cols = "target")) +table(task$data(cols = "type")) -# Generate synthetic data for minority class +# Down-sample and balance data pop = po("nearmiss") nearmiss_result = pop$train(list(task))[[1]]$data() -nrow(nearmiss_result$target) -table(nearmiss_result$target) +nrow(nearmiss_result) +table(nearmiss_result$type) \dontshow{ \} } } \references{ diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd index 18143287f..43922ac03 100644 --- a/man/mlr_pipeops_tomek.Rd +++ b/man/mlr_pipeops_tomek.Rd @@ -65,13 +65,13 @@ library("mlr3") # Create example task task = tsk("iris") task$head() -table(task$data(cols = "target")) +table(task$data(cols = "type")) # Down-sample data pop = po("tomek") tomek_result = pop$train(list(task))[[1]]$data() nrow(tomek_result) -table(tomek_result$target) +table(tomek_result$type) \dontshow{ \} } } \references{ From 0e704a2d7a077dc7a8fbea709c5d52a168839bbd Mon Sep 17 00:00:00 2001 From: "Keno M." <118814423+advieser@users.noreply.github.com> Date: Sat, 31 Aug 2024 19:13:11 +0200 Subject: [PATCH 16/25] Correcting corrections in examples --- R/PipeOpTomek.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 76a59a8a9..0dffafa90 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -56,13 +56,13 @@ #' # Create example task #' task = tsk("iris") #' task$head() -#' table(task$data(cols = "type")) +#' table(task$data(cols = "Species")) #' #' # Down-sample data #' pop = po("tomek") #' tomek_result = pop$train(list(task))[[1]]$data() #' nrow(tomek_result) -#' table(tomek_result$type) +#' table(tomek_result$Species) #' \dontshow{ \} } PipeOpTomek = R6Class("PipeOpTomek", inherit = PipeOpTaskPreproc, From 488600a46251a3ee5a93d9a05aef667ac44fd709 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 1 Sep 2024 12:13:02 +0200 Subject: [PATCH 17/25] document() --- man/mlr_pipeops_tomek.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd index 43922ac03..b06360cbe 100644 --- a/man/mlr_pipeops_tomek.Rd +++ b/man/mlr_pipeops_tomek.Rd @@ -65,13 +65,13 @@ library("mlr3") # Create example task task = tsk("iris") task$head() -table(task$data(cols = "type")) +table(task$data(cols = "Species")) # Down-sample data pop = po("tomek") tomek_result = pop$train(list(task))[[1]]$data() nrow(tomek_result) -table(tomek_result$type) +table(tomek_result$Species) \dontshow{ \} } } \references{ From 460c397e63a6a3dbdafb39e0b41a9f1fc3fab811 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 21 Sep 2024 18:16:50 +0200 Subject: [PATCH 18/25] code review changes --- R/PipeOpNearmiss.R | 22 +++++++++++----------- R/PipeOpTomek.R | 22 +++++++++++----------- man/mlr_pipeops_nearmiss.Rd | 5 +++-- man/mlr_pipeops_tomek.Rd | 5 +++-- tests/testthat/test_pipeop_nearmiss.R | 18 ++++++++++++++---- tests/testthat/test_pipeop_tomek.R | 17 +++++++++++++---- 6 files changed, 55 insertions(+), 34 deletions(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index 9e1441481..aa88516cf 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -9,8 +9,9 @@ #' #' The algorithm down-samples by selecting instances from the non-minority classes that have the smallest mean distance #' to their `k` nearest neighbors of different classes. -#' This can only be applied to [classification tasks][mlr3::TaskClassif] with numeric or integer features that have no missing values. -#' Supports multiclass classification. +#' For this only numeric and integer features are taken into account. These must have no missing values. +#' +#' This can only be applied to [classification tasks][mlr3::TaskClassif]. Multiclass classification is supported. #' #' See [`themis::nearmiss`] for details. #' @@ -85,22 +86,21 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", private = list( .train_task = function(task) { - cols = task$feature_names - # Return task unchanged, if no feature columns exist - if (!length(cols)) { + if (!length(task$feature_names)) { return(task) } - # Only numeric and integer features allowed - if (!all(task$feature_types$type %in% c("numeric", "integer"))) { - stop("Nearmiss does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") + # At least one numeric or integer feature required + if (!any(task$feature_types$type %in% c("numeric", "integer"))) { + stop("Nearmiss needs at least one numeric or integer feature to work.") } - + # Subset columns to only include integer/numeric features and the target + cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names) # Down-sample data - dt = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, + dt = setDT(invoke(themis::nearmiss, df = task$data(cols = cols), var = task$target_names, .args = self$param_set$get_values(tags = "nearmiss"))) - keep = as.integer(row.names(dt)) + keep = task$row_ids[as.integer(row.names(dt))] task$filter(keep) } ) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 0dffafa90..30f08a679 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -9,8 +9,9 @@ #' #' The algorithm down-samples the data by removing all pairs of observations that form a Tomek link, #' i.e. a pair of observations that are nearest neighbors and belong to different classes. -#' It can only be applied to [classification tasks][mlr3::TaskClassif] with numeric or integer features that have no missing values. -#' Supports multiclass classification. +#' For this only numeric and integer features are taken into account. These must have no missing values. +#' +#' This can only be applied to [classification tasks][mlr3::TaskClassif]. Multiclass classification is supported. #' #' See [`themis::tomek`] for details. #' @@ -75,21 +76,20 @@ PipeOpTomek = R6Class("PipeOpTomek", private = list( .train_task = function(task) { - cols = task$feature_names - # Return task unchanged, if no feature columns exist - if (!length(cols)) { + if (!length(task$feature_names)) { return(task) } - # Only numeric and integer features allowed - if (!all(task$feature_types$type %in% c("numeric", "integer"))) { - stop("Tomek does only accept numeric and integer features. Use PipeOpSelect to select the appropriate features.") + # At least one numeric or integer feature required + if (!any(task$feature_types$type %in% c("numeric", "integer"))) { + stop("Tomek needs at least one numeric or integer feature to work.") } - + # Subset columns to only include integer/numeric features and the target + cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names) # Down-sample data - dt = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) + dt = setDT(invoke(themis::tomek, df = task$data(cols = cols), var = task$target_names)) - keep = as.integer(row.names(dt)) + keep = task$row_ids[as.integer(row.names(dt))] task$filter(keep) } ) diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd index e52a4f30d..4d9f33104 100644 --- a/man/mlr_pipeops_nearmiss.Rd +++ b/man/mlr_pipeops_nearmiss.Rd @@ -12,8 +12,9 @@ Generates a more balanced data set by down-sampling the instances of non-minorit The algorithm down-samples by selecting instances from the non-minority classes that have the smallest mean distance to their \code{k} nearest neighbors of different classes. -This can only be applied to \link[mlr3:TaskClassif]{classification tasks} with numeric or integer features that have no missing values. -Supports multiclass classification. +For this only numeric and integer features are taken into account. These must have no missing values. + +This can only be applied to \link[mlr3:TaskClassif]{classification tasks}. Multiclass classification is supported. See \code{\link[themis:nearmiss]{themis::nearmiss}} for details. } diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd index b06360cbe..54132c2f7 100644 --- a/man/mlr_pipeops_tomek.Rd +++ b/man/mlr_pipeops_tomek.Rd @@ -12,8 +12,9 @@ Generates a cleaner data set by removing all majority-minority Tomek links. The algorithm down-samples the data by removing all pairs of observations that form a Tomek link, i.e. a pair of observations that are nearest neighbors and belong to different classes. -It can only be applied to \link[mlr3:TaskClassif]{classification tasks} with numeric or integer features that have no missing values. -Supports multiclass classification. +For this only numeric and integer features are taken into account. These must have no missing values. + +This can only be applied to \link[mlr3:TaskClassif]{classification tasks}. Multiclass classification is supported. See \code{\link[themis:tomek]{themis::tomek}} for details. } diff --git a/tests/testthat/test_pipeop_nearmiss.R b/tests/testthat/test_pipeop_nearmiss.R index a364f497d..0291d0cd5 100644 --- a/tests/testthat/test_pipeop_nearmiss.R +++ b/tests/testthat/test_pipeop_nearmiss.R @@ -12,15 +12,25 @@ test_that("PipeOpNearmiss - train works as intended", { skip_if_not_installed("themis") op = PipeOpNearmiss$new() - task = mlr_tasks$get("wine") - # Compare to themis::nearmiss + # Compare to themis::nearmiss for task with only numeric/integer features + task = mlr_tasks$get("wine") train_out = op$train(list(task))[[1]]$data() nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names)) expect_equal(train_out, nearmiss_out) + # Compare to themis::nearmiss for task with other features types (which should be ignored) + task = mlr_tasks$get("german_credit") + train_out = op$train(list(task))[[1]]$data() + dt = task$data(cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names)) + dt_out = setDT(invoke(themis::nearmiss, df = dt, var = task$target_names)) + nearmiss_out = task$data()[dt_out, on = colnames(dt_out)] + + expect_equal(train_out, nearmiss_out) + # Compare to themis::nearmiss with changed params + task = mlr_tasks$get("wine") op$param_set$set_values(k = 8, under_ratio = 0.9) train_out = op$train(list(task))[[1]]$data() nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names, @@ -42,8 +52,8 @@ test_that("PipeOpNearmiss - train works as intended", { task ) - # PipeOp does not accept tasks with wrong feature types - task = tsk("german_credit") + # PipeOp does not accept tasks that have no integer or numeric feature + task = tsk("breast_cancer") expect_error(op$train(list(task))) }) diff --git a/tests/testthat/test_pipeop_tomek.R b/tests/testthat/test_pipeop_tomek.R index 934d937ef..3e91798e8 100644 --- a/tests/testthat/test_pipeop_tomek.R +++ b/tests/testthat/test_pipeop_tomek.R @@ -12,14 +12,23 @@ test_that("PipeOpTomek - train works as intended", { skip_if_not_installed("themis") op = PipeOpTomek$new() - task = mlr_tasks$get("iris") - # Compare to themis::tomek + # Compare to themis::tomek for task with only numeric features + task = mlr_tasks$get("iris") train_out = op$train(list(task))[[1]]$data() tomek_out = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) expect_equal(train_out, tomek_out) + # Compare to themis::tomek for task with other features types (which should be ignored) + task = mlr_tasks$get("german_credit") + train_out = op$train(list(task))[[1]]$data() + dt = task$data(cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names)) + dt_out = setDT(invoke(themis::tomek, df = dt, var = task$target_names)) + tomek_out = task$data()[dt_out, on = colnames(dt_out)] + + expect_equal(train_out, tomek_out) + # Empty task is returned unchanged task$select(character(0)) expect_equal( @@ -27,8 +36,8 @@ test_that("PipeOpTomek - train works as intended", { task ) - # PipeOp does not accept tasks with wrong feature types - task = tsk("german_credit") + # PipeOp does not accept tasks that have no integer or numeric feature + task = tsk("breast_cancer") expect_error(op$train(list(task))) }) From b4ca9732612504e1a3aacff469cdfd8ad600afa6 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 21 Sep 2024 18:19:24 +0200 Subject: [PATCH 19/25] Updated NEWS.md --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 602322265..c2c24c6b1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # mlr3pipelines 0.6.0-9000 -* New PipeOp `PipeOpRowApply` / `po("rowapply")` +* New PipeOp: `PipeOpRowApply` / `po("rowapply")` +* New down-sampling PipeOps for inbalanced data: `PipeOpTomek` / `po("tomek")` and `PipeOpNearmiss` / `po("nearmiss")` # mlr3pipelines 0.6.0 From 0d96c5a1cc3ff6d5281884a7b3be3ef490f90169 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 21 Sep 2024 19:43:46 +0200 Subject: [PATCH 20/25] get in data.table --- R/PipeOpNearmiss.R | 2 +- R/PipeOpTomek.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index aa88516cf..a73367fcd 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -95,7 +95,7 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", stop("Nearmiss needs at least one numeric or integer feature to work.") } # Subset columns to only include integer/numeric features and the target - cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names) + cols = c(task$feature_types[get("type") %in% c("integer", "numeric"), get("id")], task$target_names) # Down-sample data dt = setDT(invoke(themis::nearmiss, df = task$data(cols = cols), var = task$target_names, .args = self$param_set$get_values(tags = "nearmiss"))) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 30f08a679..992af8208 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -85,7 +85,7 @@ PipeOpTomek = R6Class("PipeOpTomek", stop("Tomek needs at least one numeric or integer feature to work.") } # Subset columns to only include integer/numeric features and the target - cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names) + cols = c(task$feature_types[get("type") %in% c("integer", "numeric"), get("id")], task$target_names) # Down-sample data dt = setDT(invoke(themis::tomek, df = task$data(cols = cols), var = task$target_names)) From e23a812046c516374c956632d28f799ce5c28b4b Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 24 Sep 2024 11:12:52 +0200 Subject: [PATCH 21/25] static type checker var defs --- R/PipeOpNearmiss.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/PipeOpNearmiss.R b/R/PipeOpNearmiss.R index a73367fcd..cade2dea5 100644 --- a/R/PipeOpNearmiss.R +++ b/R/PipeOpNearmiss.R @@ -95,7 +95,8 @@ PipeOpNearmiss = R6Class("PipeOpNearmiss", stop("Nearmiss needs at least one numeric or integer feature to work.") } # Subset columns to only include integer/numeric features and the target - cols = c(task$feature_types[get("type") %in% c("integer", "numeric"), get("id")], task$target_names) + type = id = NULL + cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names) # Down-sample data dt = setDT(invoke(themis::nearmiss, df = task$data(cols = cols), var = task$target_names, .args = self$param_set$get_values(tags = "nearmiss"))) From 59f35c1e05ea1fa67d4b4aa16629e3ecf29758e0 Mon Sep 17 00:00:00 2001 From: mb706 Date: Tue, 24 Sep 2024 11:13:58 +0200 Subject: [PATCH 22/25] static type checker var defs II --- R/PipeOpTomek.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/PipeOpTomek.R b/R/PipeOpTomek.R index 992af8208..b14f8c921 100644 --- a/R/PipeOpTomek.R +++ b/R/PipeOpTomek.R @@ -85,7 +85,8 @@ PipeOpTomek = R6Class("PipeOpTomek", stop("Tomek needs at least one numeric or integer feature to work.") } # Subset columns to only include integer/numeric features and the target - cols = c(task$feature_types[get("type") %in% c("integer", "numeric"), get("id")], task$target_names) + type = id = NULL + cols = c(task$feature_types[type %in% c("integer", "numeric"), id], task$target_names) # Down-sample data dt = setDT(invoke(themis::tomek, df = task$data(cols = cols), var = task$target_names)) From 48e0de3910a5e67ba46fb6565ca91449b5b584de Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 24 Sep 2024 21:06:28 +0200 Subject: [PATCH 23/25] test for uncommon row_ids --- tests/testthat/test_pipeop_nearmiss.R | 7 +++++++ tests/testthat/test_pipeop_tomek.R | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/tests/testthat/test_pipeop_nearmiss.R b/tests/testthat/test_pipeop_nearmiss.R index 0291d0cd5..79ce1f7fc 100644 --- a/tests/testthat/test_pipeop_nearmiss.R +++ b/tests/testthat/test_pipeop_nearmiss.R @@ -20,6 +20,13 @@ test_that("PipeOpNearmiss - train works as intended", { expect_equal(train_out, nearmiss_out) + # Compare to themis::nearmiss for task with uncommon row_ids + task$filter(51:150) + train_out = op$train(list(task))[[1]]$data() + nearmiss_out = setDT(invoke(themis::nearmiss, df = task$data(), var = task$target_names)) + + expect_equal(train_out, nearmiss_out) + # Compare to themis::nearmiss for task with other features types (which should be ignored) task = mlr_tasks$get("german_credit") train_out = op$train(list(task))[[1]]$data() diff --git a/tests/testthat/test_pipeop_tomek.R b/tests/testthat/test_pipeop_tomek.R index 3e91798e8..edb39887b 100644 --- a/tests/testthat/test_pipeop_tomek.R +++ b/tests/testthat/test_pipeop_tomek.R @@ -20,6 +20,13 @@ test_that("PipeOpTomek - train works as intended", { expect_equal(train_out, tomek_out) + # Compare to themis::tomek for task with uncommon row_ids + task$filter(51:150) + train_out = op$train(list(task))[[1]]$data() + tomek_out = setDT(invoke(themis::tomek, df = task$data(), var = task$target_names)) + + expect_equal(train_out, tomek_out) + # Compare to themis::tomek for task with other features types (which should be ignored) task = mlr_tasks$get("german_credit") train_out = op$train(list(task))[[1]]$data() From 92ec13313fe9e81a0016233ee7a5a2cdbe7f1c54 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 24 Sep 2024 21:10:25 +0200 Subject: [PATCH 24/25] document --- R/bibentries.R | 2 +- man/mlr_pipeops_adas.Rd | 3 +++ man/mlr_pipeops_blsmote.Rd | 5 +++++ man/mlr_pipeops_nearmiss.Rd | 3 +++ man/mlr_pipeops_smotenc.Rd | 4 ++++ man/mlr_pipeops_tomek.Rd | 3 +++ 6 files changed, 19 insertions(+), 1 deletion(-) diff --git a/R/bibentries.R b/R/bibentries.R index 7d4669f17..87ab522c5 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -71,7 +71,7 @@ bibentries = c( number = "11", pages = "769--772", publisher = "IEEE" - ) + ), he_2008 = bibentry("InProceedings", author = "Haibo He and Yang Bai and Garcia, Edwardo A. and Shutao Li", diff --git a/man/mlr_pipeops_adas.Rd b/man/mlr_pipeops_adas.Rd index e5ccdee96..8663baff4 100644 --- a/man/mlr_pipeops_adas.Rd +++ b/man/mlr_pipeops_adas.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -153,6 +154,7 @@ Other PipeOps: \code{\link{mlr_pipeops_scalerange}}, \code{\link{mlr_pipeops_select}}, \code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_smotenc}}, \code{\link{mlr_pipeops_spatialsign}}, \code{\link{mlr_pipeops_subsample}}, \code{\link{mlr_pipeops_targetinvert}}, @@ -160,6 +162,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_blsmote.Rd b/man/mlr_pipeops_blsmote.Rd index 34046abab..0ad16183d 100644 --- a/man/mlr_pipeops_blsmote.Rd +++ b/man/mlr_pipeops_blsmote.Rd @@ -54,6 +54,8 @@ Default is \code{0}. See \code{\link[smotefamily:BLSMOTE]{BLSMOTE()}}. \item \code{method} :: \code{character(1)} \cr The type of Borderline-SMOTE algorithm to use. Default is \code{"type1"}. See \code{\link[smotefamily:BLSMOTE]{BLSMOTE()}}. +\item \code{quiet} :: \code{logical(1)} \cr +Whether to suppress printing status during training. Initialized to \code{TRUE}. } } @@ -137,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -156,6 +159,7 @@ Other PipeOps: \code{\link{mlr_pipeops_scalerange}}, \code{\link{mlr_pipeops_select}}, \code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_smotenc}}, \code{\link{mlr_pipeops_spatialsign}}, \code{\link{mlr_pipeops_subsample}}, \code{\link{mlr_pipeops_targetinvert}}, @@ -163,6 +167,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd index 4d9f33104..ce5a1908b 100644 --- a/man/mlr_pipeops_nearmiss.Rd +++ b/man/mlr_pipeops_nearmiss.Rd @@ -99,6 +99,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, @@ -152,6 +154,7 @@ Other PipeOps: \code{\link{mlr_pipeops_scalerange}}, \code{\link{mlr_pipeops_select}}, \code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_smotenc}}, \code{\link{mlr_pipeops_spatialsign}}, \code{\link{mlr_pipeops_subsample}}, \code{\link{mlr_pipeops_targetinvert}}, diff --git a/man/mlr_pipeops_smotenc.Rd b/man/mlr_pipeops_smotenc.Rd index eece14954..b7b321973 100644 --- a/man/mlr_pipeops_smotenc.Rd +++ b/man/mlr_pipeops_smotenc.Rd @@ -107,6 +107,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, @@ -141,6 +143,7 @@ Other PipeOps: \code{\link{mlr_pipeops_multiplicityexply}}, \code{\link{mlr_pipeops_multiplicityimply}}, \code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nearmiss}}, \code{\link{mlr_pipeops_nmf}}, \code{\link{mlr_pipeops_nop}}, \code{\link{mlr_pipeops_ovrsplit}}, @@ -167,6 +170,7 @@ Other PipeOps: \code{\link{mlr_pipeops_targettrafoscalerange}}, \code{\link{mlr_pipeops_textvectorizer}}, \code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tomek}}, \code{\link{mlr_pipeops_tunethreshold}}, \code{\link{mlr_pipeops_unbranch}}, \code{\link{mlr_pipeops_updatetarget}}, diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd index 54132c2f7..490f8e929 100644 --- a/man/mlr_pipeops_tomek.Rd +++ b/man/mlr_pipeops_tomek.Rd @@ -92,6 +92,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, @@ -146,6 +148,7 @@ Other PipeOps: \code{\link{mlr_pipeops_scalerange}}, \code{\link{mlr_pipeops_select}}, \code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_smotenc}}, \code{\link{mlr_pipeops_spatialsign}}, \code{\link{mlr_pipeops_subsample}}, \code{\link{mlr_pipeops_targetinvert}}, From 22f80a223d07f2ee6b943e9c1d3a40f7e9ecbdad Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 24 Sep 2024 21:38:36 +0200 Subject: [PATCH 25/25] update version --- DESCRIPTION | 2 +- NEWS.md | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ef9949baa..6c16d5644 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3pipelines Title: Preprocessing Operators and Pipelines for 'mlr3' -Version: 0.6.0-9000 +Version: 0.7.0-9000 Authors@R: c(person(given = "Martin", family = "Binder", diff --git a/NEWS.md b/NEWS.md index ebf960f3f..df8eb2d34 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,10 @@ -# mlr3pipelines 0.6.0-9000 +# mlr3pipelines 0.7.0-9000 -* New PipeOp: `PipeOpRowApply` / `po("rowapply")` * New down-sampling PipeOps for inbalanced data: `PipeOpTomek` / `po("tomek")` and `PipeOpNearmiss` / `po("nearmiss")` + +# mlr3pipelines 0.7.0 + +* New PipeOp: `PipeOpRowApply` / `po("rowapply")` * New PipeOps for handling inbalanced data: `PipeOpADAS` / `po("adas")`, `PipeOpBLSmote` / `po("blsmote")` and `PipeOpSmoteNC` / `po("smotenc")` # mlr3pipelines 0.6.0