From e27a46b765df91dcd9cb815249dbfb75adb57b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20Greni=C3=A9?= Date: Sat, 2 Dec 2023 09:19:25 +0100 Subject: [PATCH] Add benchmark scripts and template --- inst/benchmark_scripts/.batchtools.slurm.tmpl | 51 +++++++ inst/benchmark_scripts/benchmark.R | 126 ++++++++++++++++++ .../fd_benchmark_alternatives.R | 19 +++ .../benchmark_fcts/fd_benchmark_multicore.R | 24 ++++ .../benchmark_fcts/make_data.R | 19 +++ .../benchmark_fd_multicore.R | 57 ++++++++ .../benchmark_fric_comparison.R | 76 +++++++++++ 7 files changed, 372 insertions(+) create mode 100644 inst/benchmark_scripts/.batchtools.slurm.tmpl create mode 100644 inst/benchmark_scripts/benchmark.R create mode 100644 inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R create mode 100644 inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R create mode 100644 inst/benchmark_scripts/benchmark_fcts/make_data.R create mode 100644 inst/benchmark_scripts/benchmark_fd_multicore.R create mode 100644 inst/benchmark_scripts/benchmark_fric_comparison.R diff --git a/inst/benchmark_scripts/.batchtools.slurm.tmpl b/inst/benchmark_scripts/.batchtools.slurm.tmpl new file mode 100644 index 0000000..c6af0d7 --- /dev/null +++ b/inst/benchmark_scripts/.batchtools.slurm.tmpl @@ -0,0 +1,51 @@ +#!/bin/bash + +## Job Resource Interface Definition +## +## ntasks [integer(1)]: Number of required tasks, +## Set larger than 1 if you want to further parallelize +## with MPI within your job. +## ncpus [integer(1)]: Number of required cpus per task, +## Set larger than 1 if you want to further parallelize +## with multicore/parallel within each task. +## walltime [integer(1)]: Walltime for this job, in seconds. +## Must be at least 60 seconds for Slurm to work properly. +## memory [integer(1)]: Memory in megabytes for each cpu. +## Must be at least 100 (when I tried lower values my +## jobs did not start at all). +## +## Default resources can be set in your .batchtools.conf.R by defining the variable +## 'default.resources' as a named list. + +<% +# relative paths are not handled well by Slurm +log.file = fs::path_expand(log.file) +-%> + + +#SBATCH --job-name=<%= resources$job.name %> +#SBATCH --output=<%= resources$output %> +#SBATCH --error=<%= log.file %> +#SBATCH --time=<%= resources$walltime %> +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=<%= resources$ncpus %> +#SBATCH --mem-per-cpu=<%= resources$memory %> +#SBATCH --mail-user=<%= resources$email %> +#SBATCH --mail-type=ALL +<%= if (!is.null(resources$partition)) sprintf(paste0("#SBATCH --partition='", resources$partition, "'")) %> +<%= if (array.jobs) sprintf("#SBATCH --array=1-%i", nrow(jobs)) else "" %> + +## Initialize work environment like +module load foss/2020b R/4.0.4-2 +cd fundiversity + +## Export value of DEBUGME environemnt var to slave +export DEBUGME=<%= Sys.getenv("DEBUGME") %> + +<%= sprintf("export OMP_NUM_THREADS=%i", resources$omp.threads) -%> +<%= sprintf("export OPENBLAS_NUM_THREADS=%i", resources$blas.threads) -%> +<%= sprintf("export MKL_NUM_THREADS=%i", resources$blas.threads) -%> + +## Run R: +## we merge R output with stdout from SLURM, which gets then logged via --output option +Rscript -e 'batchtools::doJobCollection("<%= uri %>")' diff --git a/inst/benchmark_scripts/benchmark.R b/inst/benchmark_scripts/benchmark.R new file mode 100644 index 0000000..e64e7ce --- /dev/null +++ b/inst/benchmark_scripts/benchmark.R @@ -0,0 +1,126 @@ +# Script to make benchmark across different functions of similar packages + +# Options ---------------------------------------------------------------------- +options(fundiversity.memoise = FALSE) # Deactivate memoisation + +# Package & Functions ---------------------------------------------------------- +pkgload::load_all() # Need last version of fundiversity +source( + here::here("inst", "benchmark_scripts", "benchmark_fcts", + "fd_benchmark_multicore.R") +) +source( + here::here("inst", "benchmark_scripts", "benchmark_fcts", + "fd_benchmark_alternatives.R") +) +source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R")) + + +# FD indices expressions ------------------------------------------------------- + +dbfd_indices <- rlang::exprs( + FD_dbFD_fdiv = FD::dbFD( + given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = TRUE, + calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE + ), + FD_dbFD_feve = FD::dbFD( + given_data[["traits"]], given_data[["site_sp"]], calc.FDiv = FALSE, + calc.FGR = FALSE, calc.FRic = FALSE, calc.CWM = FALSE + ), + FD_dbFD_fric = FD::dbFD( + given_data[["traits"]], given_data[["site_sp"]], calc.FRic = TRUE, + calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE + ), + FD_dbFD_raoq = FD::dbFD( + given_data[["traits"]], given_data[["site_sp"]], calc.FRic = FALSE, + calc.CWM = FALSE, calc.FGR = FALSE, calc.FDiv = FALSE + ) +) + +fd_indices <- list( + "fdis" = rlang::exprs( + fundiversity_fd_fdis_unparallel = + { + fd_fdis(given_data[["traits"]], given_data[["site_sp"]]) + }, + BAT_dispersion = BAT::dispersion( + given_data[["site_sp"]], given_data[["traits"]] + ), + FD_fdisp = FD::fdisp( + dist(given_data[["traits"]]), given_data[["site_sp"]] + ), + mFD_alpha_fd = mFD::alpha.fd.multidim( + given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdis", + scaling = FALSE, verbose = FALSE + ) + ), + "fdiv" = rlang::exprs( + fundiversity_fd_fdiv_unparallel = + { + fd_fdiv(given_data[["traits"]], given_data[["site_sp"]]) + }, + mFD_alpha_fd = mFD::alpha.fd.multidim( + given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fdiv", + scaling = FALSE, verbose = FALSE + ) + ), + "feve" = rlang::exprs( + fundiversity_fd_feve_unparallel = + { + fd_feve(given_data[["traits"]], given_data[["site_sp"]]) + }, + mFD_alpha_fd = mFD::alpha.fd.multidim( + given_data[["traits"]], given_data[["site_sp"]], ind_vect = "feve", + scaling = FALSE, verbose = FALSE + ) + ), + "raoq" = rlang::exprs( + fundiversity_fd_raoq_unparallel = + { + fd_raoq(given_data[["traits"]], given_data[["site_sp"]]) + }, + adiv_qe = adiv::QE( + given_data[["site_sp"]], dist(given_data[["traits"]]) + ), + BAT_rao = BAT::rao( + given_data[["site_sp"]], distance = given_data[["traits"]] + ), + hillR_hill_func = hillR::hill_func( + given_data[["site_sp"]], given_data[["traits"]], fdis = FALSE + ), + mFD_alpha_fd_hill = mFD::alpha.fd.hill( + given_data[["site_sp"]], dist(given_data[["traits"]]), q = 2, + tau = "max" + ) + ) +) + +# Benchmarks ------------------------------------------------------------------- +bench_sites = c(5e1 , 1e2, 5e2) +bench_traits = c(2, 4, 10) +bench_species = c(2e2, 5e2, 1e3) + +n_iterations = 10 + +## Regular indices benchmark +# Parallelize across indices +future::plan( + future.batchtools::batchtools_slurm, + template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"), + label = "funbench", + resources = list( + job.name = "funbench", + walltime = 5760, + memory = "150G", + ncpus = 1, + output = "/work/%u/%j-%x.log", + email = Sys.getenv("USEREMAIL") + ), + finalize = FALSE +) + +fd_indices %>% + furrr::future_imap( + ~ fd_benchmark_alternatives(.x) %>% + saveRDS(glue::glue("bench_{index}_alternatives.rds", index = .y)) + ) diff --git a/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R b/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R new file mode 100644 index 0000000..9e3c114 --- /dev/null +++ b/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_alternatives.R @@ -0,0 +1,19 @@ +fd_benchmark_alternatives <- function(list_alternatives, seed = 20210906) { + + bench::press( + n_sites = bench_sites, + n_traits = bench_traits, + n_species = bench_species, + { + set.seed(seed) + given_data = make_data(n_species, n_traits, n_sites) + bench::mark( + exprs = list_alternatives, + iterations = n_iterations, + check = FALSE, + memory = FALSE + ) + } + ) + +} diff --git a/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R b/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R new file mode 100644 index 0000000..aa78919 --- /dev/null +++ b/inst/benchmark_scripts/benchmark_fcts/fd_benchmark_multicore.R @@ -0,0 +1,24 @@ +fd_benchmark_multicore <- function(fd_index) { + + fd_fct <- rlang::as_function(fd_index) + + bench::press( + n_sites = bench_sites, + n_traits = bench_traits, + n_species = bench_species, + { + set.seed(20210906) + given_data = make_data(n_species, n_traits, n_sites) + bench::mark( + iterations = n_iterations, + fundiversity_multicore = + { + fd_fct(given_data[["traits"]], given_data[["site_sp"]]) + }, + check = FALSE, + memory = FALSE + ) + } + ) + +} diff --git a/inst/benchmark_scripts/benchmark_fcts/make_data.R b/inst/benchmark_scripts/benchmark_fcts/make_data.R new file mode 100644 index 0000000..1b4dc76 --- /dev/null +++ b/inst/benchmark_scripts/benchmark_fcts/make_data.R @@ -0,0 +1,19 @@ +make_data = function(n_species = 10, n_traits = 5, n_sites = 20) { + traits = matrix(runif(n_species * n_traits), + ncol = n_traits, nrow = n_species, + dimnames = list(species = paste0("sp", seq(n_species)), + traits = paste0("t", seq(n_traits))) + ) + + site_sp = matrix( + as.numeric(runif(n_species * n_sites) >= 0.5), + nrow = n_sites, + ncol = n_species, + dimnames = list( + sites = paste0("s", seq(n_sites)), + species = paste0("sp", seq(n_species)) + ) + ) + + list(traits = traits, site_sp = site_sp) +} diff --git a/inst/benchmark_scripts/benchmark_fd_multicore.R b/inst/benchmark_scripts/benchmark_fd_multicore.R new file mode 100644 index 0000000..658fedc --- /dev/null +++ b/inst/benchmark_scripts/benchmark_fd_multicore.R @@ -0,0 +1,57 @@ +# Script to make benchmark across different functions of similar packages + +# Options ---------------------------------------------------------------------- +options(fundiversity.memoise = FALSE) # Deactivate memoisation + +# Package & Functions ---------------------------------------------------------- +pkgload::load_all() # Need last version of fundiversity +library("magrittr") +source( + here::here("inst", "benchmark_scripts", "benchmark_fcts", + "fd_benchmark_multicore.R") +) +source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R")) + +# Benchmarks Parameters -------------------------------------------------------- + +# Data Parameters +bench_sites = c(5e1 , 1e2, 5e2) +bench_traits = c(2, 4) +bench_species = c(2e2, 5e2, 1e3) + +# Number of Repeats per function +n_iterations = 20 + +# Number of Cores over which to parallelize +n_cores = c(1, seq(2, 6, by = 2)) + + +# Running the benchmark -------------------------------------------------------- +all_funs = alist( + fdis = fd_fdis, fdiv = fd_fdiv, feve = fd_feve, fric = fd_fric +) %>% + purrr::cross2(n_cores) %>% + purrr::map(setNames, c("fundiv_function", "n_core")) %>% + purrr::map( + function(x) { + + fundiv_function = x$fundiv_function + n_core = x$n_core + + future::plan( + list( + future::tweak( + future::multisession, workers = n_core + ) + ) + ) + + fd_benchmark_multicore(get(fundiv_function)) %>% + tidyr::unnest(c(time, gc)) %>% + select(-expression) %>% + mutate(fd_fct = as.character(fundiv_function), n_core = n_core) + + } + ) + +saveRDS(all_funs, "all_multicore_bench.Rds") diff --git a/inst/benchmark_scripts/benchmark_fric_comparison.R b/inst/benchmark_scripts/benchmark_fric_comparison.R new file mode 100644 index 0000000..aef2ebb --- /dev/null +++ b/inst/benchmark_scripts/benchmark_fric_comparison.R @@ -0,0 +1,76 @@ +# Script to make benchmark across different functions of similar packages + +# Options ---------------------------------------------------------------------- +options(fundiversity.memoise = FALSE) # Deactivate memoisation + +# Package & Functions ---------------------------------------------------------- +pkgload::load_all() # Need last version of fundiversity +source( + here::here("inst", "benchmark_scripts", "benchmark_fcts", + "fd_benchmark_alternatives.R") +) +source(here::here("inst", "benchmark_scripts", "benchmark_fcts", "make_data.R")) + + +# FD indices expressions ------------------------------------------------------- + +fric_exprs <- list( + "fric" = rlang::exprs( + fundiversity_fd_fric_unparallel = + { + fd_fric(given_data[["traits"]], given_data[["site_sp"]]) + }, + BAT_alpha_tree = BAT::alpha( + given_data[["site_sp"]], given_data[["traits"]] + ), + BAT_alpha_hull = BAT::hull.alpha( + BAT::hull.build(given_data[["site_sp"]], given_data[["traits"]]) + ), + mFD_alpha_fd = mFD::alpha.fd.multidim( + given_data[["traits"]], given_data[["site_sp"]], ind_vect = "fric", + scaling = FALSE, verbose = FALSE + ) + ), + "fric_intersect" = rlang::exprs( + fundiversity_fd_fric_intersect_unparallel = + { + fd_fric_intersect(given_data[["traits"]], given_data[["site_sp"]]) + }, + betapart_functional_beta = betapart::functional.beta.pair( + given_data[["site_sp"]], given_data[["traits"]] + ), + hillR_funct_pairwise = hillR::hill_func_parti_pairwise( + given_data[["site_sp"]], given_data[["traits"]] + ) + ) +) + +# Functional Richness Benchmark ------------------------------------------------ + +bench_sites = c(5e1 , 1e2) +bench_traits = c(2, 4) +bench_species = c(2e2, 5e2) + +n_iterations = 30 + +## Regular indices benchmark +# Parallelize across indices +future::plan( + future.batchtools::batchtools_slurm, + template = here::here("inst", "benchmark_scripts", ".batchtools.slurm.tmpl"), + label = "funbench_fric", + resources = list( + job.name = "funbench_fric", + walltime = 7200, + memory = "150G", + ncpus = 1, + output = "/work/%u/%x-%j.log", + email = Sys.getenv("USEREMAIL") + ) +) + +fric_exprs["fric_intersect"] %>% + furrr::future_imap( + ~ fd_benchmark_alternatives(.x, seed=20210915) %>% + saveRDS(glue::glue("fric_bench_{index}_alternatives.rds", index = .y)) + )