diff --git a/.github/workflows/r-cmd-check.yml b/.github/workflows/r-cmd-check.yml index e4004004..a20dce45 100644 --- a/.github/workflows/r-cmd-check.yml +++ b/.github/workflows/r-cmd-check.yml @@ -54,11 +54,25 @@ jobs: with: r-version: ${{ matrix.config.r }} + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - uses: r-lib/actions/setup-r-dependencies@v2 with: extra-packages: any::rcmdcheck needs: check + - name: Create Python virtual environments + run: | + python3 -m venv ~/.virtualenvs/r-hebo + ~/.virtualenvs/r-hebo/bin/pip install --upgrade pip --quiet + ~/.virtualenvs/r-hebo/bin/pip install torch --index-url https://download.pytorch.org/whl/cpu --quiet + ~/.virtualenvs/r-hebo/bin/pip install hebo --quiet + python3 -m venv ~/.virtualenvs/r-smac + ~/.virtualenvs/r-smac/bin/pip install --upgrade pip --quiet + ~/.virtualenvs/r-smac/bin/pip install smac ConfigSpace --quiet + - uses: mxschmitt/action-tmate@v3 if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} with: diff --git a/DESCRIPTION b/DESCRIPTION index 64099ba8..52470565 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,8 @@ Authors@R: c( person("Bernd", "Bischl", , "bernd_bischl@gmx.net", role = "aut", comment = c(ORCID = "0000-0001-6002-6980")), person("Martin", "Binder", , "martin.binder@mail.com", role = "aut"), - person("Olaf", "Mersmann", , "olafm@statistik.tu-dortmund.de", role = "ctb") + person("Olaf", "Mersmann", , "olafm@statistik.tu-dortmund.de", role = "ctb"), + person("Alexander", "Winterstetter", "alexanderwinterstetter@gmail.de", role = "ctb") ) Description: Features highly configurable search spaces via the 'paradox' package and optimizes every user-defined objective function. The @@ -35,6 +36,7 @@ Imports: R6 Suggests: adagio, + callr, emoa, GenSA, irace (>= 4.0.0), @@ -44,9 +46,13 @@ Suggests: processx, progressr, redux, + reticulate, RhpcBLASctl, rush (>= 1.0.0), - testthat (>= 3.0.0) + testthat (>= 3.0.0), + withr +Remotes: + mlr-org/paradox Config/testthat/edition: 3 Config/testthat/parallel: false Encoding: UTF-8 @@ -91,10 +97,12 @@ Collate: 'OptimizerBatchFocusSearch.R' 'OptimizerBatchGenSA.R' 'OptimizerBatchGridSearch.R' + 'OptimizerBatchHEBO.R' 'OptimizerBatchIrace.R' 'OptimizerBatchLocalSearch.R' 'OptimizerBatchNLoptr.R' 'OptimizerBatchRandomSearch.R' + 'OptimizerBatchSmac3.R' 'Progressor.R' 'mlr_terminators.R' 'Terminator.R' @@ -114,6 +122,7 @@ Collate: 'bibentries.R' 'conditions.R' 'helper.R' + 'helper_hebo.R' 'local_search.R' 'mlr_callbacks.R' 'mlr_test_functions.R' diff --git a/NAMESPACE b/NAMESPACE index 62c5b169..994dba1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,10 +48,12 @@ export(OptimizerBatchDesignPoints) export(OptimizerBatchFocusSearch) export(OptimizerBatchGenSA) export(OptimizerBatchGridSearch) +export(OptimizerBatchHEBO) export(OptimizerBatchIrace) export(OptimizerBatchLocalSearch) export(OptimizerBatchNLoptr) export(OptimizerBatchRandomSearch) +export(OptimizerBatchSmac3) export(Terminator) export(TerminatorClockTime) export(TerminatorCombo) diff --git a/R/OptimizerAsync.R b/R/OptimizerAsync.R index f170e146..f29f0596 100644 --- a/R/OptimizerAsync.R +++ b/R/OptimizerAsync.R @@ -20,13 +20,13 @@ #' There are three types of workers: #' #' - "mirai": Workers are started with \CRANpkg{mirai} on local or remote machines. -#' See `$start_workers()` in [Rush] for more details. +#' See `$start_workers()` in [rush::Rush] for more details. #' [mirai::daemons()] must be created before starting the optimization. #' - "processx": Workers are started as local processes with \CRANpkg{processx}. -#' See `$start_local_workers()` in [Rush] for more details. +#' See `$start_local_workers()` in [rush::Rush] for more details. #' - "script": Workers are started by the user with a custom script. -#' See `$create_worker_script()` in [Rush] for more details. +#' See `$create_worker_script()` in [rush::Rush] for more details. #' #' The workers are started when the `$optimize()` method is called. #' The main process waits until at least one worker is running. diff --git a/R/OptimizerBatchHEBO.R b/R/OptimizerBatchHEBO.R new file mode 100644 index 00000000..fc67dc73 --- /dev/null +++ b/R/OptimizerBatchHEBO.R @@ -0,0 +1,156 @@ +#' @title Heteroscedastic evolutionary bayesian optimization +#' +#' @export +#' + +OptimizerBatchHEBO = R6Class( + "OptimizerBatchHEBO", + inherit = OptimizerBatch, + public = list( + initialize = function() { + param_set = ps( + # optimizer interface + n_suggestions = p_int(lower = 1L, init = 1L), # remove as HEBO only handles single crit + n_init = p_int(lower = 1L), + seed = p_int(lower = 0L), + + # surrogate model + surrogate = p_fct(levels = c("gp", "rf"), init = "gp"), + rf_n_estimators = p_int(lower = 1L, depends = quote(surrogate == "rf")), + gp_lr = p_dbl(lower = 0, depends = quote(surrogate == "gp")), + gp_num_epochs = p_int(lower = 1L, depends = quote(surrogate == "gp")), + gp_noise_free = p_lgl(depends = quote(surrogate == "gp")), + gp_noise_lb = p_dbl(lower = 0, depends = quote(surrogate == "gp")), + gp_pred_likeli = p_lgl(depends = quote(surrogate == "gp")), + + # acquisition function + acq_function = p_fct(levels = c("mace", "lcb"), init = "mace"), + + # evolutionary search for acquisition optimization + es = p_fct( + levels = c( + "ga", + "brkga", + "de", + "nelder-mead", + "pattern-search", + "cmaes", + "pso", + "nsga2", + "rnsga2", + "nsga3", + "unsga3", + "rnsga3", + "moead", + "ctaea" + ), + init = "nsga2" + ) + ) + + super$initialize( + id = "hebo", + param_set = param_set, + param_classes = c("ParamDbl", "ParamInt", "ParamFct", "ParamLgl"), + properties = c("single-crit"), + packages = "reticulate", + label = "Heteroscedastic Evolutionary Bayesian Optimization", + man = "bbotk::mlr_optimizers_hebo" + ) + } + ), + + private = list( + .optimize = function(inst) { + assert_python_packages(c("hebo")) + hebo = reticulate::import("hebo") + + pv = self$param_set$values + + search_space = inst$search_space + space = paramset_to_hebo_space(search_space) + optimizer_args = list( + space = space, + rand_sample = as.integer(pv$n_init %??% (1L + search_space$length)), + es = pv$es %??% "nsga2" + ) + + # add nn; evtl boosting + # add unit tests see SMAC3 + if (!is.null(pv$surrogate)) { + if (pv$surrogate == "rf") { + optimizer_args$model_name = "rf" + optimizer_args$model_config = list( + n_estimators = as.integer(pv$rf_n_estimators %??% 20L) + ) + } else if (pv$surrogate == "gp") { + optimizer_args$model_name = "gp" + optimizer_args$model_config = list( + lr = pv$gp_lr %??% 0.01, + num_epochs = as.integer(pv$gp_num_epochs %??% 100L), + verbose = pv$gp_verbose %??% FALSE, + print_every = as.integer(pv$gp_print_every %??% 10L), + noise_free = pv$gp_noise_free %??% FALSE, + noise_lb = pv$gp_noise_lb %??% 8e-4, + pred_likeli = pv$gp_pred_likeli %??% FALSE + ) + } + } + + if (!is.null(pv$acq_function)) { + acq_mod = hebo$acquisitions$acq + optimizer_args$acq_cls = switch( + pv$acq_function, + "lcb" = acq_mod$LCB, + "mace" = acq_mod$MACE, + stopf( + "Unsupported HEBO acquisition function '%s'. Supported values are 'mace' and 'lcb'.", + pv$acq_function + ) + ) + } + + if (!is.null(pv$seed)) { + optimizer_args$scramble_seed = as.integer(pv$seed) + } + + hebo_optimizer = invoke(hebo$optimizers$hebo$HEBO, .args = optimizer_args) + + repeat { + if (inst$is_terminated) { + break + } + + # tryCatch muss noch etwas ausgestaltet werden, siehe folgender Kommentar + # hebo$optimizers$hebo$HEBO()$quasi_sample --- default HEBO sampler evtl notwendig, wenn suggest fehlschlägt; useful wenn SM zusammenbricht; wrappen in Trycatch von suggest funktion falls das passiert + # ISSUE 1 - HEBO suggest doesnt expose metadata as SMAC3 + # trial_info exposes paraeters as a pandas DataFrame + trial_info = tryCatch( + hebo_optimizer$suggest(n_suggestions = as.integer(pv$n_suggestions %??% 1L)), + # rausfinden wie HEBO suggest verwendet um mehrere configs oder einzelne configs zu kriegen; gibt mehrere Möglichkeiten das zu machen bei MBO interessant wie HEBO das macht + error = function(e) NULL + ) + if (is.null(trial_info)) { + break + } + + # convert pandas DataFrame to R data.frame-like object + trial_info_dt = setDT(reticulate::py_to_r(trial_info)) + + # evaluation of that config + res = inst$eval_batch(trial_info_dt) + + # evaluated target columns are turned into a numeric matrix + y = as.matrix(res[, inst$archive$cols_y, with = FALSE]) + + # signs are flipped + # ist es möglich bei HEBO auch multi-objective zu machen, also dass da mehrere y zurückkomen; das auch wichtig für denOptimizer ob er nicht nur SingleCrit oder auch MultiCrit kann + y = y * matrix(inst$objective_multiplicator, nrow = nrow(y), ncol = ncol(y), byrow = TRUE) + + hebo_optimizer$observe(trial_info, y) + } + } + ) +) + +mlr_optimizers$add("hebo", OptimizerBatchHEBO) diff --git a/R/OptimizerBatchSmac3.R b/R/OptimizerBatchSmac3.R new file mode 100644 index 00000000..349baaf1 --- /dev/null +++ b/R/OptimizerBatchSmac3.R @@ -0,0 +1,458 @@ +#' @title Sequential Model-Based Algorithm Configuration (SMAC3) +#' +#' @include Optimizer.R +#' @name mlr_optimizers_smac +#' +#' @description +#' Calls SMAC3 from Python via the \CRANpkg{reticulate} package. +#' +#' @note +#' All parameters of the search space must have default values. +#' +#' @section Parameters: +#' \describe{ +#' +#' \item{`n_init`}{`integer(1)`\cr +#' Number of initial configurations to evaluate before starting the optimization. +#' Defaults to `10` times the number of hyperparameters.} +#' +#' \item{`facade`}{`character(1)`\cr +#' Facade to use: +#' `"smac4bb"` (Black-Box Facade, uses Gaussian Process), +#' `"smac4hb"` (Hyperparameter Optimization Facade, uses Random Forest), +#' `"smac4ac"` (Algorithm Configuration Facade), +#' `"smac4mf"` (Multi-Fidelity Facade, uses Hyperband), +#' `"smac4rs"` (Random Facade / ROAR, no surrogate model). +#' Default is `"smac4bb"`.} +#' +#' \item{`output_directory`}{`character(1)`\cr +#' Directory to store the output of SMAC3. +#' Default is a temporary directory.} +#' +#' \item{`deterministic`}{`logical(1)`\cr +#' Whether the objective function is deterministic. +#' If `FALSE`, SMAC may re-evaluate configurations with different seeds. +#' Default is `TRUE`.} +#' +#' \item{`crash_cost`}{`numeric(1)`\cr +#' Cost assigned to crashed or failed trials.} +#' +#' \item{`seed`}{`integer(1)`\cr +#' Seed for the random number generator in SMAC. +#' Default is a random seed.} +#' +#' \item{`surrogate`}{`character(1)`\cr +#' Surrogate model to use. +#' `"rf"` (Random Forest) or `"gp"` (Gaussian Process). +#' Default is the facade's default (GP for `"smac4bb"`, RF for `"smac4hb"` and `"smac4ac"`).} +#' +#' \item{`rf.n_trees`}{`integer(1)`\cr +#' Number of trees in the Random Forest surrogate. +#' Only used when `surrogate = "rf"`. +#' Default is `10`.} +#' +#' \item{`rf.ratio_features`}{`numeric(1)`\cr +#' Ratio of features used per tree in the Random Forest. +#' Only used when `surrogate = "rf"`. +#' Default is `1.0`.} +#' +#' \item{`rf.min_samples_split`}{`integer(1)`\cr +#' Minimum number of samples to split a node in the Random Forest. +#' Only used when `surrogate = "rf"`. +#' Default is `2`.} +#' +#' \item{`rf.min_samples_leaf`}{`integer(1)`\cr +#' Minimum number of samples per leaf in the Random Forest. +#' Only used when `surrogate = "rf"`. +#' Default is `1`.} +#' +#' \item{`rf.max_depth`}{`integer(1)`\cr +#' Maximum depth of trees in the Random Forest. +#' Only used when `surrogate = "rf"`. +#' Default is `1048576`.} +#' +#' \item{`gp.n_restarts`}{`integer(1)`\cr +#' Number of restarts for Gaussian Process hyperparameter optimization. +#' Only used when `surrogate = "gp"`. +#' Default is `10`.} +#' +#' \item{`acq_function`}{`character(1)`\cr +#' Acquisition function to use. +#' `"ei"` (Expected Improvement), `"lcb"` (Lower Confidence Bound), +#' `"pi"` (Probability of Improvement), or `"ts"` (Thompson Sampling). +#' Default is the facade's default.} +#' +#' \item{`acq_function.xi`}{`numeric(1)`\cr +#' Exploration-exploitation trade-off parameter for Expected Improvement and +#' Probability of Improvement. +#' Only used when `acq_function` is `"ei"` or `"pi"`. +#' Default is `0.0`.} +#' +#' \item{`acq_function.beta`}{`numeric(1)`\cr +#' Exploration-exploitation trade-off parameter for Lower Confidence Bound. +#' Only used when `acq_function = "lcb"`. +#' Default is `1.0`.} +#' +#' \item{`initial_design`}{`character(1)`\cr +#' Initial design strategy. +#' `"sobol"`, `"random"`, `"lhc"` (Latin Hypercube), `"factorial"`, or `"default"`. +#' Default is the facade's default.} +#' +#' \item{`max_config_calls`}{`integer(1)`\cr +#' Maximum number of evaluations per configuration. +#' Values larger than `1` are useful for stochastic objectives. +#' Default is `1`.} +#' +#' \item{`random_design`}{`character(1)`\cr +#' Strategy for interleaving random configurations during optimization. +#' `"probability"` or `"modulus"`. +#' Default is the facade's default.} +#' +#' \item{`random_design.probability`}{`numeric(1)`\cr +#' Probability of sampling a random configuration instead of using the surrogate model. +#' Only used when `random_design = "probability"`.} +#' +#' \item{`random_design.modulus`}{`numeric(1)`\cr +#' Every `modulus`-th configuration is drawn randomly. +#' Only used when `random_design = "modulus"`.} +#' +#' \item{`eta`}{`integer(1)`\cr +#' Halving factor for Successive Halving / Hyperband. +#' Only used when `facade = "smac4mf"`. +#' Default is `3`.} +#' +#' \item{`min_budget`}{`numeric(1)`\cr +#' Minimum budget for multi-fidelity optimization (e.g., epochs, subset fraction). +#' Only used when `facade = "smac4mf"`.} +#' +#' \item{`max_budget`}{`numeric(1)`\cr +#' Maximum budget for multi-fidelity optimization. +#' Only used when `facade = "smac4mf"`.} +#' +#' } +#' +#' @template section_progress_bars +#' +#' @source +#' `r format_bib("lindauer_2022")` +#' +#' @export +#' @examples +#' \dontrun{ +#' # define the objective function +#' fun = function(xs) { +#' list(y = -(xs[[1]] - 2)^2 - (xs[[2]] + 3)^2 + 10) +#' } +#' +#' # set domain (all parameters must have defaults for ConfigSpace) +#' domain = ps( +#' x1 = p_dbl(-10, 10, default = 0), +#' x2 = p_dbl(-5, 5, default = 0) +#' ) +#' +#' # set codomain +#' codomain = ps( +#' y = p_dbl(tags = "maximize") +#' ) +#' +#' # create objective +#' objective = ObjectiveRFun$new( +#' fun = fun, +#' domain = domain, +#' codomain = codomain, +#' properties = "deterministic" +#' ) +#' +#' # initialize instance +#' instance = oi( +#' objective = objective, +#' terminator = trm("evals", n_evals = 20) +#' ) +#' +#' # load optimizer +#' optimizer = opt("smac") +#' +#' # trigger optimization +#' optimizer$optimize(instance) +#' +#' # all evaluated configurations +#' instance$archive +#' +#' # best performing configuration +#' instance$result +#' } +OptimizerBatchSmac3 = R6Class( + "OptimizerBatchSmac3", + inherit = OptimizerBatch, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + param_set = ps( + # scenario + n_init = p_int(lower = 1L), + facade = p_fct(levels = c("smac4bb", "smac4hb", "smac4ac", "smac4mf", "smac4rs"), init = "smac4bb"), + output_directory = p_uty(init = tempdir()), + deterministic = p_lgl(init = TRUE), + crash_cost = p_dbl(), + seed = p_int(lower = 0L), + + # surrogate model + surrogate = p_fct(levels = c("rf", "gp")), + rf.n_trees = p_int(lower = 1L, depends = quote(surrogate == "rf")), + rf.ratio_features = p_dbl(lower = 0, upper = 1, depends = quote(surrogate == "rf")), + rf.min_samples_split = p_int(lower = 2L, depends = quote(surrogate == "rf")), + rf.min_samples_leaf = p_int(lower = 1L, depends = quote(surrogate == "rf")), + rf.max_depth = p_int(lower = 1L, depends = quote(surrogate == "rf")), + gp.n_restarts = p_int(lower = 0L, depends = quote(surrogate == "gp")), + + # acquisition function + acq_function = p_fct(levels = c("ei", "lcb", "pi", "ts")), + acq_function.xi = p_dbl(lower = 0, depends = quote(acq_function %in% c("ei", "pi"))), + acq_function.beta = p_dbl(lower = 0, depends = quote(acq_function == "lcb")), + + # initial design + initial_design = p_fct(levels = c("sobol", "random", "lhc", "factorial", "default")), + + # intensifier + max_config_calls = p_int(lower = 1L), + + # random design + random_design = p_fct(levels = c("probability", "modulus")), + random_design.probability = p_dbl(lower = 0, upper = 1, depends = quote(random_design == "probability")), + random_design.modulus = p_dbl(lower = 1, depends = quote(random_design == "modulus")), + + # multi-fidelity + eta = p_int(lower = 2L, depends = quote(facade == "smac4mf")), + min_budget = p_dbl(lower = 0, depends = quote(facade == "smac4mf")), + max_budget = p_dbl(lower = 0, depends = quote(facade == "smac4mf")) + ) + + super$initialize( + id = "smac", + param_set = param_set, + param_classes = c("ParamDbl", "ParamInt", "ParamFct", "ParamLgl"), + properties = c("dependencies", "single-crit"), + packages = "reticulate", + label = "Sequential Model-Based Algorithm Configuration", + man = "bbotk::mlr_optimizers_smac" + ) + } + ), + + private = list( + .optimize = function(inst) { + assert_python_packages(c("smac", "ConfigSpace")) + smac = reticulate::import("smac") + + pv = self$param_set$values + search_space = inst$search_space + + # convert paradox search space to ConfigSpace + cs = paramset_to_configspace(search_space) + + terminator = inst$terminator + if (inherits(terminator, "TerminatorEvals")) { + n_trials = terminator$param_set$values$n_evals + } else { + # use a large number for other terminators + n_trials = .Machine$integer.max + } + + n_init = pv$n_init %??% (10L * search_space$length) + + # build scenario + scenario_args = list( + configspace = cs, + deterministic = pv$deterministic, + n_trials = as.integer(n_trials), + seed = as.integer(pv$seed %??% sample.int(.Machine$integer.max, 1L)), + output_directory = pv$output_directory + ) + + if (!is.null(pv$crash_cost)) { + scenario_args$crash_cost = pv$crash_cost + } + + if (!is.null(pv$min_budget)) { + scenario_args$min_budget = pv$min_budget + } + + if (!is.null(pv$max_budget)) { + scenario_args$max_budget = pv$max_budget + } + + scenario = invoke(smac$Scenario, .args = scenario_args) + + # map facade + facade_class = switch( + pv$facade, + "smac4bb" = smac$BlackBoxFacade, + "smac4hb" = smac$HyperparameterOptimizationFacade, + "smac4ac" = smac$AlgorithmConfigurationFacade, + "smac4mf" = smac$MultiFidelityFacade, + "smac4rs" = smac$RandomFacade + ) + + # build surrogate model + model = NULL + if (!is.null(pv$surrogate)) { + if (pv$surrogate == "rf") { + model = smac$model$random_forest$RandomForest( + configspace = cs, + n_trees = as.integer(pv$rf.n_trees %??% 10L), + ratio_features = pv$rf.ratio_features %??% 1.0, + min_samples_split = as.integer(pv$rf.min_samples_split %??% 2L), + min_samples_leaf = as.integer(pv$rf.min_samples_leaf %??% 1L), + max_depth = as.integer(pv$rf.max_depth %??% 1048576L), + seed = as.integer(scenario_args$seed) + ) + } else if (pv$surrogate == "gp") { + kernel = facade_class$get_kernel(scenario) + model = smac$model$gaussian_process$GaussianProcess( + configspace = cs, + kernel = kernel, + n_restarts = as.integer(pv$gp.n_restarts %??% 10L), + seed = as.integer(scenario_args$seed) + ) + } + } + + # build acquisition function + acq_function = NULL + if (!is.null(pv$acq_function)) { + acq_mod = smac$acquisition[["function"]] + acq_function = switch( + pv$acq_function, + "ei" = acq_mod$EI(xi = pv$acq_function.xi %??% 0.0), + "lcb" = acq_mod$LCB(beta = pv$acq_function.beta %??% 1.0), + "pi" = acq_mod$PI(xi = pv$acq_function.xi %??% 0.0), + "ts" = acq_mod$TS() + ) + } + + # build initial design + initial_design_obj = NULL + if (!is.null(pv$initial_design)) { + initial_design_obj = switch( + pv$initial_design, + "sobol" = smac$initial_design$SobolInitialDesign(scenario, n_configs = as.integer(n_init)), + "random" = smac$initial_design$RandomInitialDesign(scenario, n_configs = as.integer(n_init)), + "lhc" = smac$initial_design$LatinHypercubeInitialDesign(scenario, n_configs = as.integer(n_init)), + "factorial" = smac$initial_design$FactorialInitialDesign(scenario, n_configs = as.integer(n_init)), + "default" = smac$initial_design$DefaultInitialDesign(scenario) + ) + } else if (pv$facade %in% c("smac4bb", "smac4hb", "smac4mf")) { + initial_design_obj = facade_class$get_initial_design( + scenario, + n_configs = as.integer(n_init) + ) + } else { + # AlgorithmConfigurationFacade and RandomFacade use their own default initial designs + initial_design_obj = facade_class$get_initial_design(scenario) + } + + # build random design + random_design = NULL + if (!is.null(pv$random_design)) { + random_design = switch( + pv$random_design, + "probability" = smac$random_design$ProbabilityRandomDesign( + probability = pv$random_design.probability %??% 0.08447 + ), + "modulus" = smac$random_design$ModulusRandomDesign( + modulus = pv$random_design.modulus %??% 2.0 + ) + ) + } + + # build intensifier + if (pv$facade == "smac4mf") { + # MultiFidelityFacade uses Hyperband which takes eta, not max_config_calls + intensifier_args = list(scenario) + if (!is.null(pv$eta)) { + intensifier_args$eta = as.integer(pv$eta) + } + } else { + intensifier_args = list( + scenario, + max_config_calls = as.integer(pv$max_config_calls %??% 1L) + ) + } + + intensifier = invoke(facade_class$get_intensifier, .args = intensifier_args) + + # create smac optimizer + # use a dummy target function since we use the ask-tell interface + # SMAC validates the function signature, so we create a Python function directly + # MultiFidelityFacade requires a 'budget' argument in the target function + if (pv$facade == "smac4mf") { + reticulate::py_run_string("def _dummy_target_fn(config, seed, budget): return 0.0") + } else { + reticulate::py_run_string("def _dummy_target_fn(config, seed): return 0.0") + } + dummy_fn = reticulate::py$`_dummy_target_fn` + + facade_args = list( + scenario = scenario, + target_function = dummy_fn, + intensifier = intensifier, + initial_design = initial_design_obj, + overwrite = TRUE + ) + + if (!is.null(model)) { + facade_args$model = model + } + + if (!is.null(acq_function)) { + facade_args$acquisition_function = acq_function + } + + if (!is.null(random_design)) { + facade_args$random_design = random_design + } + + smac_optimizer = invoke(facade_class, .args = facade_args) + + # import TrialValue for reporting results + TrialValue = smac$runhistory$dataclasses$TrialValue + + repeat { + # ask for next configuration + trial_info = tryCatch( + smac_optimizer$ask(), + error = function(e) NULL + ) + + if (is.null(trial_info)) { + break + } + + # extract configuration as named list + config = trial_info$config + config_dict = reticulate::py_to_r(config$get_dictionary()) + + # inactive parameters are not in the config dictionary + # create data.table with all parameters (inactive ones are NA) + all_params = search_space$ids() + xdt = setDT(lapply(set_names(all_params), function(p) config_dict[[p]] %??% NA)) + + # fix logical parameters (ConfigSpace uses strings "TRUE"/"FALSE") + lgl_params = search_space$ids(class = "ParamLgl") + if (length(lgl_params)) { + xdt[, (lgl_params) := lapply(.SD, as.logical), .SDcols = lgl_params] + } + + res = inst$eval_batch(xdt) + cost = res[[inst$archive$cols_y]] * inst$objective_multiplicator + + # tell smac the result + trial_value = TrialValue(cost = cost, time = 0.0) + smac_optimizer$tell(trial_info, trial_value) + } + } + ) +) + +mlr_optimizers$add("smac", OptimizerBatchSmac3) diff --git a/R/assertions.R b/R/assertions.R index 7e2f2c81..c8bb9df8 100644 --- a/R/assertions.R +++ b/R/assertions.R @@ -209,3 +209,26 @@ assert_archive_batch = function(archive, null_ok = FALSE) { } assert_r6(archive, "ArchiveBatch") } + +#' @title Assert Python Packages +#' +#' @description +#' Assert that the given Python packages are available. +#' +#' @param packages (`character()`)\cr +#' Python packages to check. +#' @param python_version (`character(1)`)\cr +#' Python version to use. If `NULL`, the default Python version is used. +#' +#' @return (`character()`)\cr +#' Invisibly returns the input `packages` vector if all requested Python packages are available; otherwise throws an error listing the missing packages. +assert_python_packages = function(packages, python_version = NULL) { + if (!reticulate::py_available()) { + reticulate::py_require(packages, python_version = python_version) + } + available = map_lgl(packages, reticulate::py_module_available) + if (any(!available)) { + stopf("Package %s not available.", as_short_string(packages[!available])) + } + invisible(packages) +} diff --git a/R/bibentries.R b/R/bibentries.R index e1f09d23..3dc596fb 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -63,6 +63,17 @@ bibentries = c( eprint = "1903.04703", archivePrefix = "arXiv", primaryClass = "cs.LG" + ), + + lindauer_2022 = bibentry("article", + title = "{SMAC3}: A Versatile Bayesian Optimization Package for Hyperparameter Optimization", + author = "Marius Lindauer and Katharina Eggensperger and Matthias Feurer and Andre Biedenkapp and Difan Deng and Carolin Benjamins and Tim Ruhkopf and Rene Sass and Frank Hutter", + year = "2022", + journal = "Journal of Machine Learning Research", + volume = "23", + number = "54", + pages = "1--9", + url = "http://jmlr.org/papers/v23/21-0888.html" ) ) # nolint end diff --git a/R/helper_hebo.R b/R/helper_hebo.R new file mode 100644 index 00000000..b8c1035d --- /dev/null +++ b/R/helper_hebo.R @@ -0,0 +1,21 @@ +paramset_to_hebo_space = function(search_space) { + assert_python_packages(c("hebo")) + hebo = reticulate::import("hebo") + param = search_space$params + defs = lapply(seq_len(nrow(param)), function(i) { + id = param$id[[i]] + cls = param$cls[[i]] + if (cls == "ParamDbl") { + list(name = id, type = "num", lb = as.numeric(param$lower[[i]]), ub = as.numeric(param$upper[[i]])) + } else if (cls == "ParamInt") { + list(name = id, type = "int", lb = as.integer(param$lower[[i]]), ub = as.integer(param$upper[[i]])) + } else if (cls == "ParamLgl") { + list(name = id, type = "bool") + } else if (cls == "ParamFct") { + list(name = id, type = "cat", categories = as.list(param$levels[[i]])) + } else { + stop(sprintf("Unsupported parameter class: %s", cls)) + } + }) + hebo$design_space$design_space$DesignSpace()$parse(defs) +} diff --git a/attic/suggest_function_HEBO.py b/attic/suggest_function_HEBO.py new file mode 100644 index 00000000..73607b1c --- /dev/null +++ b/attic/suggest_function_HEBO.py @@ -0,0 +1,101 @@ +def suggest(self, n_suggestions=1, fix_input=None): + # --- MACE-specific: batch suggestions are only supported with MACE --- + # HEBO only supports parallel suggestions when using MACE acquisition. + if self.acq_cls != MACE and n_suggestions != 1: + raise RuntimeError("Parallel optimization is supported only for MACE acquisition") + + # Cold-start phase: before enough observations are available, return Sobol / quasi-random points. + if self.X.shape[0] < self.rand_sample: + sample = self.quasi_sample(n_suggestions, fix_input) + return sample + else: + # Convert observed configurations to HEBO's internal numeric/categorical representation. + X, Xe = self.space.transform(self.X) + + try: + # Normalize + power-transform objectives for more stable surrogate fitting. + # Use Yeo-Johnson if values can be <= 0, otherwise Box-Cox first. + if self.y.min() <= 0: + y = torch.FloatTensor(power_transform(self.y / self.y.std(), method="yeo-johnson")) + else: + y = torch.FloatTensor(power_transform(self.y / self.y.std(), method="box-cox")) + if y.std() < 0.5: + y = torch.FloatTensor(power_transform(self.y / self.y.std(), method="yeo-johnson")) + + # Abort transformed path if output has too little variation. + if y.std() < 0.5: + raise RuntimeError("Power transformation failed") + + # Build and fit surrogate model on transformed observations. + model = get_model( + self.model_name, + self.space.num_numeric, + self.space.num_categorical, + 1, + **self.model_config + ) + model.fit(X, Xe, y) + except: + # Fallback path: fit model on raw y if transformation fails. + y = torch.FloatTensor(self.y).clone() + model = get_model(self.model_name, self.space.num_numeric, self.space.num_categorical, 1, **self.model_config) + model.fit(X, Xe, y) + + # Get current incumbent (best observed point under optional fixed-context constraints). + best_id = self.get_best_id(fix_input) + best_x = self.X.iloc[[best_id]] + best_y = y.min() + + # Predict at incumbent and use predicted mean as reference in acquisition. + py_best, ps2_best = model.predict(*self.space.transform(best_x)) + py_best = py_best.detach().numpy().squeeze() + ps_best = ps2_best.sqrt().detach().numpy().squeeze() + + # Compute kappa (exploration strength) with iteration-dependent scaling. + iter = max(1, self.X.shape[0] // n_suggestions) + upsi = 0.5 + delta = 0.01 + # kappa = np.sqrt(upsi * 2 * np.log(iter ** (2.0 + self.X.shape[1] / 2.0) * 3 * np.pi**2 / (3 * delta))) + kappa = np.sqrt(upsi * 2 * ((2.0 + self.X.shape[1] / 2.0) * np.log(iter) + np.log(3 * np.pi**2 / (3 * delta)))) + + # Optimize acquisition with evolutionary search to obtain candidate recommendations. + acq = self.acq_cls(model, best_y=py_best, kappa=kappa) # LCB < py_best + mu = Mean(model) + sig = Sigma(model, linear_a=-1.0) + opt = EvolutionOpt(self.space, acq, pop=100, iters=100, verbose=False, es=self.es) + rec = opt.optimize(initial_suggest=best_x, fix_input=fix_input).drop_duplicates() + rec = rec[self.check_unique(rec)] + + # Ensure enough unique candidates; if needed, backfill with quasi-random points. + cnt = 0 + while rec.shape[0] < n_suggestions: + rand_rec = self.quasi_sample(n_suggestions - rec.shape[0], fix_input) + rand_rec = rand_rec[self.check_unique(rand_rec)] + rec = pd.concat([rec, rand_rec], axis=0, ignore_index=True) + cnt += 1 + if cnt > 3: + # Sometimes the design space is so small that duplicates are unavoidable. + break + if rec.shape[0] < n_suggestions: + rand_rec = self.quasi_sample(n_suggestions - rec.shape[0], fix_input) + rec = pd.concat([rec, rand_rec], axis=0, ignore_index=True) + + # --- Batch-selection heuristic (practically relevant when using MACE in parallel mode) --- + # Select final batch; for larger batches force one uncertain and one best-predicted point. + select_id = np.random.choice(rec.shape[0], n_suggestions, replace=False).tolist() + x_guess = [] + with torch.no_grad(): + py_all = mu(*self.space.transform(rec)).squeeze().numpy() + ps_all = -1 * sig(*self.space.transform(rec)).squeeze().numpy() + best_pred_id = np.argmin(py_all) + best_unce_id = np.argmax(ps_all) + # These replacements are only triggered for n_suggestions > 2 (batch mode). + # In this HEBO implementation, batch mode is gated by the MACE check above. + if best_unce_id not in select_id and n_suggestions > 2: + select_id[0] = best_unce_id + if best_pred_id not in select_id and n_suggestions > 2: + select_id[1] = best_pred_id + rec_selected = rec.iloc[select_id].copy() + + # Return proposed configuration(s) as a pandas DataFrame. + return rec_selected diff --git a/man/ArchiveBatch.Rd b/man/ArchiveBatch.Rd index a80c85ec..f9b3067e 100644 --- a/man/ArchiveBatch.Rd +++ b/man/ArchiveBatch.Rd @@ -71,7 +71,8 @@ as.data.table(instance$archive) Contains all performed \link{Objective} function calls.} \item{\code{data_extra}}{(named \code{list})\cr -Data created by specific \code{\link{Optimizer}}s that does not relate to any individual function evaluation and can therefore not be held in \verb{$data}. +Data created by specific \code{\link{Optimizer}}s that does not relate to any individual function evaluation +and can therefore not be held in \verb{$data}. Every optimizer should create and refer to its own entry in this list, named by its \code{class()}.} } \if{html}{\out{}} diff --git a/man/CallbackAsync.Rd b/man/CallbackAsync.Rd index 0bb9ea67..905c8ae7 100644 --- a/man/CallbackAsync.Rd +++ b/man/CallbackAsync.Rd @@ -7,7 +7,8 @@ Specialized \link[mlr3misc:Callback]{mlr3misc::Callback} for asynchronous optimization. Callbacks allow to customize the behavior of processes in bbotk. The \code{\link[=callback_async]{callback_async()}} function creates a \link{CallbackAsync}. -Predefined callbacks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_callbacks} and can be retrieved with \code{\link[=clbk]{clbk()}}. +Predefined callbacks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_callbacks} +and can be retrieved with \code{\link[=clbk]{clbk()}}. For more information on optimization callbacks see \code{\link[=callback_async]{callback_async()}}. } \seealso{ diff --git a/man/CallbackBatch.Rd b/man/CallbackBatch.Rd index 90721439..2496f385 100644 --- a/man/CallbackBatch.Rd +++ b/man/CallbackBatch.Rd @@ -7,7 +7,8 @@ Specialized \link[mlr3misc:Callback]{mlr3misc::Callback} for batch optimization. Callbacks allow to customize the behavior of processes in bbotk. The \code{\link[=callback_batch]{callback_batch()}} function creates a \link{CallbackBatch}. -Predefined callbacks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_callbacks} and can be retrieved with \code{\link[=clbk]{clbk()}}. +Predefined callbacks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_callbacks} +and can be retrieved with \code{\link[=clbk]{clbk()}}. For more information on optimization callbacks see \code{\link[=callback_batch]{callback_batch()}}. } \examples{ diff --git a/man/Codomain.Rd b/man/Codomain.Rd index 9468cc43..be3b0cfd 100644 --- a/man/Codomain.Rd +++ b/man/Codomain.Rd @@ -7,7 +7,8 @@ A \link[paradox:ParamSet]{paradox::ParamSet} defining the codomain of a function. The parameter set must contain at least one target parameter tagged with \code{"minimize"}, \code{"maximize"}, or \code{"learn"}. -The codomain may contain extra parameters which are ignored when calling the \link{Archive} methods \verb{$best()}, \verb{$nds_selection()} and \verb{$cols_y}. +The codomain may contain extra parameters which are ignored when calling the \link{Archive} methods +\verb{$best()}, \verb{$nds_selection()} and \verb{$cols_y}. This class is usually constructed internally from a \link[paradox:ParamSet]{paradox::ParamSet} when \link{Objective} is initialized. } \examples{ diff --git a/man/Objective.Rd b/man/Objective.Rd index f24a00ce..17e9b0de 100644 --- a/man/Objective.Rd +++ b/man/Objective.Rd @@ -7,7 +7,8 @@ The \code{Objective} class describes a black-box objective function that maps an arbitrary domain to a numerical codomain. } \details{ -\code{Objective} objects can have the following properties: \code{"noisy"}, \code{"deterministic"}, \code{"single-crit"} and \code{"multi-crit"}. +\code{Objective} objects can have the following properties: \code{"noisy"}, \code{"deterministic"}, \code{"single-crit"}, +and \code{"multi-crit"}. } \seealso{ \link{ObjectiveRFun}, \link{ObjectiveRFunMany}, \link{ObjectiveRFunDt} diff --git a/man/ObjectiveRFunMany.Rd b/man/ObjectiveRFunMany.Rd index 0a163378..b3941ee9 100644 --- a/man/ObjectiveRFunMany.Rd +++ b/man/ObjectiveRFunMany.Rd @@ -97,8 +97,11 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. \if{html}{\out{
}} \describe{ \item{\code{fun}}{(\code{function})\cr -R function that encodes objective and expects a list of lists that contains multiple x values, e.g. \code{list(list(x1 = 1, x2 = 2), list(x1 = 3, x2 = 4))}. -The function must return a \code{\link[data.table:data.table]{data.table::data.table()}} that contains one y-column for single-criteria functions and multiple y-columns for multi-criteria functions, e.g. \code{data.table(y = 1:2)} or \code{data.table(y1 = 1:2, y2 = 3:4)}.} +R function that encodes objective and expects a list of lists that contains multiple x values, +e.g. \code{list(list(x1 = 1, x2 = 2), list(x1 = 3, x2 = 4))}. +The function must return a \code{\link[data.table:data.table]{data.table::data.table()}} that contains one y-column for +single-criteria functions and multiple y-columns for multi-criteria functions, +e.g. \code{data.table(y = 1:2)} or \code{data.table(y1 = 1:2, y2 = 3:4)}.} \item{\code{domain}}{(\link[paradox:ParamSet]{paradox::ParamSet})\cr Specifies domain of function. @@ -145,7 +148,9 @@ A list of lists that contains multiple x values, e.g. \code{list(list(x1 = 1, x2 \if{html}{\out{
}} } \subsection{Returns}{ -\code{\link[data.table:data.table]{data.table::data.table()}} that contains one y-column for single-criteria functions and multiple y-columns for multi-criteria functions, e.g. \code{data.table(y = 1:2)} or \code{data.table(y1 = 1:2, y2 = 3:4)}. +\code{\link[data.table:data.table]{data.table::data.table()}} that contains one y-column for single-criteria functions and +multiple y-columns for multi-criteria functions, +e.g. \code{data.table(y = 1:2)} or \code{data.table(y1 = 1:2, y2 = 3:4)}. It may also contain additional columns that will be stored in the archive if called through the \link{OptimInstance}. These extra columns are referred to as \emph{extras}. } diff --git a/man/OptimInstance.Rd b/man/OptimInstance.Rd index ca9268d2..afba4bb5 100644 --- a/man/OptimInstance.Rd +++ b/man/OptimInstance.Rd @@ -10,7 +10,8 @@ Inherits from \link{EvalInstance} and adds optimization-specific functionality. \details{ \code{OptimInstance} is an abstract base class that implements the base functionality each instance must provide. The \link{Optimizer} writes the final result to the \code{.result} field by using the \verb{$assign_result()} method. -\code{.result} stores a \link[data.table:data.table]{data.table::data.table} consisting of x values in the \emph{search space}, (transformed) x values in the \emph{domain space} and y values in the \emph{codomain space} of the \link{Objective}. +\code{.result} stores a \link[data.table:data.table]{data.table::data.table} consisting of x values in the \emph{search space}, +(transformed) x values in the \emph{domain space} and y values in the \emph{codomain space} of the \link{Objective}. The user can access the results with active bindings (see below). } \seealso{ diff --git a/man/OptimInstanceAsyncMultiCrit.Rd b/man/OptimInstanceAsyncMultiCrit.Rd index ccdb65e0..6157939c 100644 --- a/man/OptimInstanceAsyncMultiCrit.Rd +++ b/man/OptimInstanceAsyncMultiCrit.Rd @@ -132,7 +132,8 @@ If a rush instance is supplied, the tuning runs without batches.} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-OptimInstanceAsyncMultiCrit-assign_result}{}}} \subsection{Method \code{assign_result()}}{ -The \link{OptimizerAsync} writes the best found points and estimated performance values here (probably the Pareto set / front). +The \link{OptimizerAsync} writes the best found points and estimated performance values here +(probably the Pareto set / front). For internal use. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{OptimInstanceAsyncMultiCrit$assign_result(xdt, ydt, extra = NULL, ...)}\if{html}{\out{
}} diff --git a/man/Optimizer.Rd b/man/Optimizer.Rd index ba290734..e4f0e82d 100644 --- a/man/Optimizer.Rd +++ b/man/Optimizer.Rd @@ -9,7 +9,8 @@ The \code{Optimizer} implements the optimization algorithm. \details{ \code{Optimizer} is an abstract base class that implements the base functionality each optimizer must provide. A \code{Optimizer} object describes the optimization strategy. -A \code{Optimizer} object must write its result to the \verb{$assign_result()} method of the \link{OptimInstance} at the end in order to store the best point and its estimated performance vector. +A \code{Optimizer} object must write its result to the \verb{$assign_result()} method of the \link{OptimInstance} at the end +in order to store the best point and its estimated performance vector. } \section{Progress Bars}{ @@ -46,7 +47,8 @@ String in the format \verb{[pkg]::[topic]} pointing to a manual page for this ob The referenced help package can be opened via method \verb{$help()}.} \item{\code{param_classes}}{(\code{character()})\cr -Supported parameter classes that the optimizer can optimize, as given in the \code{\link[paradox:ParamSet]{paradox::ParamSet}} \verb{$class} field.} +Supported parameter classes that the optimizer can optimize, +as given in the \code{\link[paradox:ParamSet]{paradox::ParamSet}} \verb{$class} field.} \item{\code{properties}}{(\code{character()})\cr Set of properties of the optimizer. @@ -54,7 +56,8 @@ Must be a subset of \code{\link[=bbotk_reflections]{bbotk_reflections$optimizer_ \item{\code{packages}}{(\code{character()})\cr Set of required packages. -A warning is signaled by the constructor if at least one of the packages is not installed, but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.} +A warning is signaled by the constructor if at least one of the packages is not installed, +but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.} } \if{html}{\out{}} } @@ -95,7 +98,8 @@ Identifier for the new instance.} Set of control parameters.} \item{\code{param_classes}}{(\code{character()})\cr -Supported parameter classes that the optimizer can optimize, as given in the \code{\link[paradox:ParamSet]{paradox::ParamSet}} \verb{$class} field.} +Supported parameter classes that the optimizer can optimize, +as given in the \code{\link[paradox:ParamSet]{paradox::ParamSet}} \verb{$class} field.} \item{\code{properties}}{(\code{character()})\cr Set of properties of the optimizer. @@ -103,7 +107,8 @@ Must be a subset of \code{\link[=bbotk_reflections]{bbotk_reflections$optimizer_ \item{\code{packages}}{(\code{character()})\cr Set of required packages. -A warning is signaled by the constructor if at least one of the packages is not installed, but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.} +A warning is signaled by the constructor if at least one of the packages is not installed, +but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.} \item{\code{label}}{(\code{character(1)})\cr Label for this object. diff --git a/man/OptimizerAsync.Rd b/man/OptimizerAsync.Rd index 413827e3..b4a6e4c7 100644 --- a/man/OptimizerAsync.Rd +++ b/man/OptimizerAsync.Rd @@ -50,7 +50,8 @@ The debug mode is enabled by setting \code{options(bbotk.debug = TRUE)}. \section{Tiny Logging}{ The tiny logging mode is enabled by setting the option \code{bbotk.tiny_logging} to \code{TRUE}. -In the tiny logging mode, the evaluated points are printed in a compact format and the currently best performing point is shown. +In the tiny logging mode, the evaluated points are printed in a compact format and the currently best +performing point is shown. Deactivated depending parameters are not printed. } diff --git a/man/OptimizerBatch.Rd b/man/OptimizerBatch.Rd index d27ad382..7e78583a 100644 --- a/man/OptimizerBatch.Rd +++ b/man/OptimizerBatch.Rd @@ -6,7 +6,8 @@ \description{ Abstract \code{OptimizerBatch} class that implements the base functionality each \code{OptimizerBatch} subclass must provide. A \code{OptimizerBatch} object describes the optimization strategy. -A \code{OptimizerBatch} object must write its result to the \verb{$assign_result()} method of the \link{OptimInstance} at the end in order to store the best point and its estimated performance vector. +A \code{OptimizerBatch} object must write its result to the \verb{$assign_result()} method of the \link{OptimInstance} at the end +in order to store the best point and its estimated performance vector. } \section{Progress Bars}{ @@ -44,7 +45,8 @@ combined with a \link{Terminator}. Simply wrap the function in \if{latex}{\out{\hypertarget{method-OptimizerBatch-optimize}{}}} \subsection{Method \code{optimize()}}{ Performs the optimization and writes optimization result into \link{OptimInstanceBatch}. -The optimization result is returned but the complete optimization path is stored in \link{ArchiveBatch} of \link{OptimInstanceBatch}. +The optimization result is returned but the complete optimization path is stored in \link{ArchiveBatch} +of \link{OptimInstanceBatch}. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{OptimizerBatch$optimize(inst)}\if{html}{\out{
}} } diff --git a/man/OptimizerBatchHEBO.Rd b/man/OptimizerBatchHEBO.Rd new file mode 100644 index 00000000..e51b0f61 --- /dev/null +++ b/man/OptimizerBatchHEBO.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/OptimizerBatchHEBO.R +\name{OptimizerBatchHEBO} +\alias{OptimizerBatchHEBO} +\title{Heteroscedastic evolutionary bayesian optimization} +\description{ +Heteroscedastic evolutionary bayesian optimization + +Heteroscedastic evolutionary bayesian optimization +} +\section{Super classes}{ +\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{\link[bbotk:OptimizerBatch]{bbotk::OptimizerBatch}} -> \code{OptimizerBatchHEBO} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-OptimizerBatchHEBO-new}{\code{OptimizerBatchHEBO$new()}} +\item \href{#method-OptimizerBatchHEBO-clone}{\code{OptimizerBatchHEBO$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-OptimizerBatchHEBO-new}{}}} +\subsection{Method \code{new()}}{ +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{OptimizerBatchHEBO$new()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-OptimizerBatchHEBO-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{OptimizerBatchHEBO$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/Terminator.Rd b/man/Terminator.Rd index 9e2118f8..99df3545 100644 --- a/man/Terminator.Rd +++ b/man/Terminator.Rd @@ -11,17 +11,24 @@ Termination of optimization works as follows: \itemize{ \item Evaluations in a instance are performed in batches. \item Before each batch evaluation, the \link{Terminator} is checked, and if it is positive, we stop. -\item The optimization algorithm itself might decide not to produce any more points, or even might decide to do a smaller batch in its last evaluation. +\item The optimization algorithm itself might decide not to produce any more points, +or even might decide to do a smaller batch in its last evaluation. } Therefore the following note seems in order: -While it is definitely possible to execute a fine-grained control for termination, and for many optimization algorithms we can specify exactly when to stop, it might happen that too few or even too many evaluations are performed, especially if multiple points are evaluated in a single batch (c.f. batch size parameter of many optimization algorithms). -So it is advised to check the size of the returned archive, in particular if you are benchmarking multiple optimization algorithms. +While it is definitely possible to execute a fine-grained control for termination, +and for many optimization algorithms we can specify exactly when to stop, +it might happen that too few or even too many evaluations are performed, +especially if multiple points are evaluated in a single batch +(c.f. batch size parameter of many optimization algorithms). +So it is advised to check the size of the returned archive, +in particular if you are benchmarking multiple optimization algorithms. } \section{Technical details}{ \code{Terminator} subclasses can overwrite \code{.status()} to support progress bars via the package \CRANpkg{progressr}. -The method must return the maximum number of steps (\code{max_steps}) and the currently achieved number of steps (\code{current_steps}) as a named integer vector. +The method must return the maximum number of steps (\code{max_steps}) and the currently achieved number of steps +(\code{current_steps}) as a named integer vector. } \seealso{ diff --git a/man/assert_python_packages.Rd b/man/assert_python_packages.Rd new file mode 100644 index 00000000..a9375ef2 --- /dev/null +++ b/man/assert_python_packages.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assertions.R +\name{assert_python_packages} +\alias{assert_python_packages} +\title{Assert Python Packages} +\usage{ +assert_python_packages(packages, python_version = NULL) +} +\arguments{ +\item{packages}{(\code{character()})\cr +Python packages to check.} + +\item{python_version}{(\code{character(1)})\cr +Python version to use. If \code{NULL}, the default Python version is used.} +} +\value{ +(\code{character()})\cr +Invisibly returns the input \code{packages} vector if all requested Python packages are available; otherwise throws an error listing the missing packages. +} +\description{ +Assert that the given Python packages are available. +} diff --git a/man/bb_optimize.Rd b/man/bb_optimize.Rd index dd9d532e..1b3b3209 100644 --- a/man/bb_optimize.Rd +++ b/man/bb_optimize.Rd @@ -58,7 +58,8 @@ domain.} Upper bounds on the parameters.} \item{maximize}{(\code{logical()})\cr -Logical vector used to create the codomain e.g. c(TRUE, FALSE) -> ps(y1 = p_dbl(tags = "maximize"), y2 = pd_dbl(tags = "minimize")). +Logical vector used to create the codomain e.g. +c(TRUE, FALSE) -> ps(y1 = p_dbl(tags = "maximize"), y2 = pd_dbl(tags = "minimize")). If named, names are used to create the codomain.} \item{search_space}{(\link[paradox:ParamSet]{paradox::ParamSet}).} diff --git a/man/branin.Rd b/man/branin.Rd index 2fc1d791..7d637dd9 100644 --- a/man/branin.Rd +++ b/man/branin.Rd @@ -27,7 +27,8 @@ branin_wu(x1, x2, fidelity) \code{numeric()} } \description{ -Classic 2-D Branin function with noise \code{branin(x1, x2, noise)} and Branin function with fidelity parameter \code{branin_wu(x1, x2, fidelity)}. +Classic 2-D Branin function with noise \code{branin(x1, x2, noise)} and Branin function with fidelity parameter +\code{branin_wu(x1, x2, fidelity)}. } \examples{ branin(x1 = 12, x2 = 2, noise = 0.05) diff --git a/man/local_search.Rd b/man/local_search.Rd index dfbc15f0..ec0828da 100644 --- a/man/local_search.Rd +++ b/man/local_search.Rd @@ -14,8 +14,10 @@ local_search( \arguments{ \item{objective}{(\verb{function(xdt)})\cr Objective to optimize. -The first arg (name 'xdt' is not enforced) will be a data.table with (scalar) columns corresponding exactly the search space, in the same order. -The function should must return numeric vector of exactly the same length as the number of rows in the dt, containing the objective values.} +The first arg (name 'xdt' is not enforced) will be a data.table with (scalar) columns +corresponding exactly the search space, in the same order. +The function should must return numeric vector of exactly the same length as the number of rows +in the dt, containing the objective values.} \item{search_space}{(\link[paradox:ParamSet]{paradox::ParamSet})\cr Search space for decision variables. @@ -41,7 +43,9 @@ The objective value of the best point. } \description{ Runs a local search on the objective function. -Somewhat similar to what is used in \href{https://github.com/automl/SMAC3/blob/main/smac/acquisition/maximizer/local_search.py}{SMAC} for acquisition function optimization of mixed type search spaces with hierarchical dependencies. +Somewhat similar to what is used in +\href{https://github.com/automl/SMAC3/blob/main/smac/acquisition/maximizer/local_search.py}{SMAC} +for acquisition function optimization of mixed type search spaces with hierarchical dependencies. The function always minimizes. If the objective is to be maximized, we handle it by multiplying with "obj_mult" (which will be -1). diff --git a/man/mlr_optimizers.Rd b/man/mlr_optimizers.Rd index cb1ae0d1..b3d918ca 100644 --- a/man/mlr_optimizers.Rd +++ b/man/mlr_optimizers.Rd @@ -25,7 +25,8 @@ See \link[mlr3misc:Dictionary]{mlr3misc::Dictionary}. \itemize{ \item \code{as.data.table(dict, ..., objects = FALSE)}\cr \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} -> \code{\link[data.table:data.table]{data.table::data.table()}}\cr -Returns a \code{\link[data.table:data.table]{data.table::data.table()}} with fields "key", "label", "param_classes", "properties" and "packages" as columns. +Returns a \code{\link[data.table:data.table]{data.table::data.table()}} with fields "key", "label", "param_classes", "properties", +and "packages" as columns. If \code{objects} is set to \code{TRUE}, the constructed objects are returned in the list column named \code{object}. } } diff --git a/man/mlr_optimizers_async_grid_search.Rd b/man/mlr_optimizers_async_grid_search.Rd index 766373fb..5f625a4b 100644 --- a/man/mlr_optimizers_async_grid_search.Rd +++ b/man/mlr_optimizers_async_grid_search.Rd @@ -12,7 +12,8 @@ Bergstra J, Bengio Y (2012). } \description{ \code{OptimizerAsyncGridSearch} class that implements a grid search. -The grid is constructed as a Cartesian product over discretized values per parameter, see \code{\link[paradox:generate_design_grid]{paradox::generate_design_grid()}}. +The grid is constructed as a Cartesian product over discretized values per parameter, +see \code{\link[paradox:generate_design_grid]{paradox::generate_design_grid()}}. The points of the grid are evaluated in a random order. } \section{Dictionary}{ diff --git a/man/mlr_optimizers_chain.Rd b/man/mlr_optimizers_chain.Rd index 830659cd..af193ef4 100644 --- a/man/mlr_optimizers_chain.Rd +++ b/man/mlr_optimizers_chain.Rd @@ -14,7 +14,8 @@ the additional \link{Terminator}s guard each individual \link{OptimizerBatch}. The optimization process works as follows: The first \link{OptimizerBatch} is run on the \link{OptimInstanceBatch} relying on a \link{TerminatorCombo} of the original \link{Terminator} of the \link{OptimInstanceBatch} and the (optional) additional \link{Terminator} as passed during construction. -Once this \link{TerminatorCombo} indicates termination (usually via the additional \link{Terminator}), the second \link{OptimizerBatch} is run. +Once this \link{TerminatorCombo} indicates termination (usually via the additional \link{Terminator}), +the second \link{OptimizerBatch} is run. This continues for all optimizers unless the original \link{Terminator} of the \link{OptimInstanceBatch} indicates termination. \link{OptimizerBatchChain} can also be used for random restarts of the same diff --git a/man/mlr_optimizers_cmaes.Rd b/man/mlr_optimizers_cmaes.Rd index 333b4a14..f0961e4c 100644 --- a/man/mlr_optimizers_cmaes.Rd +++ b/man/mlr_optimizers_cmaes.Rd @@ -5,7 +5,7 @@ \alias{OptimizerBatchCmaes} \title{Optimization via Covariance Matrix Adaptation Evolution Strategy} \description{ -\code{OptimizerBatchCmaes} class that implements CMA-ES. Calls \code{\link[adagio:cmaes]{adagio::pureCMAES()}} +\code{OptimizerBatchCmaes} class that implements CMA-ES. Calls \code{\link[adagio:pureCMAES]{adagio::pureCMAES()}} from package \CRANpkg{adagio}. The algorithm is typically applied to search space dimensions between three and fifty. Lower search space dimensions might crash. @@ -32,7 +32,7 @@ If set to \code{"custom"}, the start values can be passed via the \code{start} p Custom start values. Only applicable if \code{start_values} parameter is set to \code{"custom"}.} } -For the meaning of the control parameters, see \code{\link[adagio:cmaes]{adagio::pureCMAES()}}. Note +For the meaning of the control parameters, see \code{\link[adagio:pureCMAES]{adagio::pureCMAES()}}. Note that we have removed all control parameters which refer to the termination of the algorithm and where our terminators allow to obtain the same behavior. } diff --git a/man/mlr_optimizers_focus_search.Rd b/man/mlr_optimizers_focus_search.Rd index 52c6f97d..810e2b66 100644 --- a/man/mlr_optimizers_focus_search.Rd +++ b/man/mlr_optimizers_focus_search.Rd @@ -8,7 +8,9 @@ \code{OptimizerBatchFocusSearch} class that implements a Focus Search. Focus Search starts with evaluating \code{n_points} drawn uniformly at random. -For 1 to \code{maxit} batches, \code{n_points} are then drawn uniformly at random and if the best value of a batch outperforms the previous best value over all batches evaluated so far, the search space is shrinked around this new best point prior to the next batch being sampled and evaluated. +For 1 to \code{maxit} batches, \code{n_points} are then drawn uniformly at random and if the best value of a batch +outperforms the previous best value over all batches evaluated so far, +the search space is shrinked around this new best point prior to the next batch being sampled and evaluated. For details on the shrinking, see \link{shrink_ps}. diff --git a/man/mlr_optimizers_irace.Rd b/man/mlr_optimizers_irace.Rd index 727e97dd..e4ca03a5 100644 --- a/man/mlr_optimizers_irace.Rd +++ b/man/mlr_optimizers_irace.Rd @@ -46,7 +46,8 @@ Default is NA.} \item{\code{maxTime}}{\code{integer(1)}\cr Maximum total execution time for the executions of targetRunner. targetRunner must return two values: cost and time. -This value and the one returned by targetRunner must use the same units (seconds, minutes, iterations, evaluations, ...). +This value and the one returned by targetRunner must use the same units +(seconds, minutes, iterations, evaluations, ...). Default is 0.} \item{\code{budgetEstimation}}{\code{numeric(1)}\cr Fraction (smaller than 1) of the budget used to estimate the mean computation time of a configuration. diff --git a/man/mlr_optimizers_nloptr.Rd b/man/mlr_optimizers_nloptr.Rd index 58e22ae4..fe96f299 100644 --- a/man/mlr_optimizers_nloptr.Rd +++ b/man/mlr_optimizers_nloptr.Rd @@ -29,7 +29,8 @@ Custom start values can be passed via the \code{x0} parameter.} \item{\code{approximate_eval_grad_f}}{\code{logical(1)}\cr Should gradients be numerically approximated via finite differences (\link[nloptr:nl.grad]{nloptr::nl.grad}). Only required for certain algorithms. -Note that function evaluations required for the numerical gradient approximation will be logged as usual and are not treated differently than regular function evaluations by, e.g., \link{Terminator}s.} +Note that function evaluations required for the numerical gradient approximation will be logged as usual +and are not treated differently than regular function evaluations by, e.g., \link{Terminator}s.} } For the meaning of other control parameters, see \code{\link[nloptr:nloptr]{nloptr::nloptr()}} and \code{\link[nloptr:nloptr.print.options]{nloptr::nloptr.print.options()}}. diff --git a/man/mlr_optimizers_smac.Rd b/man/mlr_optimizers_smac.Rd new file mode 100644 index 00000000..5bd7d031 --- /dev/null +++ b/man/mlr_optimizers_smac.Rd @@ -0,0 +1,243 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/OptimizerBatchSmac3.R +\name{mlr_optimizers_smac} +\alias{mlr_optimizers_smac} +\alias{OptimizerBatchSmac3} +\title{Sequential Model-Based Algorithm Configuration (SMAC3)} +\source{ +Lindauer M, Eggensperger K, Feurer M, Biedenkapp A, Deng D, Benjamins C, Ruhkopf T, Sass R, Hutter F (2022). +\dQuote{SMAC3: A Versatile Bayesian Optimization Package for Hyperparameter Optimization.} +\emph{Journal of Machine Learning Research}, \bold{23}(54), 1--9. +\url{http://jmlr.org/papers/v23/21-0888.html}. +} +\description{ +Calls SMAC3 from Python via the \CRANpkg{reticulate} package. +} +\note{ +All parameters of the search space must have default values. +} +\section{Parameters}{ + +\describe{ + +\item{\code{n_init}}{\code{integer(1)}\cr +Number of initial configurations to evaluate before starting the optimization. +Defaults to \code{10} times the number of hyperparameters.} + +\item{\code{facade}}{\code{character(1)}\cr +Facade to use: +\code{"smac4bb"} (Black-Box Facade, uses Gaussian Process), +\code{"smac4hb"} (Hyperparameter Optimization Facade, uses Random Forest), +\code{"smac4ac"} (Algorithm Configuration Facade), +\code{"smac4mf"} (Multi-Fidelity Facade, uses Hyperband), +\code{"smac4rs"} (Random Facade / ROAR, no surrogate model). +Default is \code{"smac4bb"}.} + +\item{\code{output_directory}}{\code{character(1)}\cr +Directory to store the output of SMAC3. +Default is a temporary directory.} + +\item{\code{deterministic}}{\code{logical(1)}\cr +Whether the objective function is deterministic. +If \code{FALSE}, SMAC may re-evaluate configurations with different seeds. +Default is \code{TRUE}.} + +\item{\code{crash_cost}}{\code{numeric(1)}\cr +Cost assigned to crashed or failed trials.} + +\item{\code{seed}}{\code{integer(1)}\cr +Seed for the random number generator in SMAC. +Default is a random seed.} + +\item{\code{surrogate}}{\code{character(1)}\cr +Surrogate model to use. +\code{"rf"} (Random Forest) or \code{"gp"} (Gaussian Process). +Default is the facade's default (GP for \code{"smac4bb"}, RF for \code{"smac4hb"} and \code{"smac4ac"}).} + +\item{\code{rf.n_trees}}{\code{integer(1)}\cr +Number of trees in the Random Forest surrogate. +Only used when \code{surrogate = "rf"}. +Default is \code{10}.} + +\item{\code{rf.ratio_features}}{\code{numeric(1)}\cr +Ratio of features used per tree in the Random Forest. +Only used when \code{surrogate = "rf"}. +Default is \code{1.0}.} + +\item{\code{rf.min_samples_split}}{\code{integer(1)}\cr +Minimum number of samples to split a node in the Random Forest. +Only used when \code{surrogate = "rf"}. +Default is \code{2}.} + +\item{\code{rf.min_samples_leaf}}{\code{integer(1)}\cr +Minimum number of samples per leaf in the Random Forest. +Only used when \code{surrogate = "rf"}. +Default is \code{1}.} + +\item{\code{rf.max_depth}}{\code{integer(1)}\cr +Maximum depth of trees in the Random Forest. +Only used when \code{surrogate = "rf"}. +Default is \code{1048576}.} + +\item{\code{gp.n_restarts}}{\code{integer(1)}\cr +Number of restarts for Gaussian Process hyperparameter optimization. +Only used when \code{surrogate = "gp"}. +Default is \code{10}.} + +\item{\code{acq_function}}{\code{character(1)}\cr +Acquisition function to use. +\code{"ei"} (Expected Improvement), \code{"lcb"} (Lower Confidence Bound), +\code{"pi"} (Probability of Improvement), or \code{"ts"} (Thompson Sampling). +Default is the facade's default.} + +\item{\code{acq_function.xi}}{\code{numeric(1)}\cr +Exploration-exploitation trade-off parameter for Expected Improvement and +Probability of Improvement. +Only used when \code{acq_function} is \code{"ei"} or \code{"pi"}. +Default is \code{0.0}.} + +\item{\code{acq_function.beta}}{\code{numeric(1)}\cr +Exploration-exploitation trade-off parameter for Lower Confidence Bound. +Only used when \code{acq_function = "lcb"}. +Default is \code{1.0}.} + +\item{\code{initial_design}}{\code{character(1)}\cr +Initial design strategy. +\code{"sobol"}, \code{"random"}, \code{"lhc"} (Latin Hypercube), \code{"factorial"}, or \code{"default"}. +Default is the facade's default.} + +\item{\code{max_config_calls}}{\code{integer(1)}\cr +Maximum number of evaluations per configuration. +Values larger than \code{1} are useful for stochastic objectives. +Default is \code{1}.} + +\item{\code{random_design}}{\code{character(1)}\cr +Strategy for interleaving random configurations during optimization. +\code{"probability"} or \code{"modulus"}. +Default is the facade's default.} + +\item{\code{random_design.probability}}{\code{numeric(1)}\cr +Probability of sampling a random configuration instead of using the surrogate model. +Only used when \code{random_design = "probability"}.} + +\item{\code{random_design.modulus}}{\code{numeric(1)}\cr +Every \code{modulus}-th configuration is drawn randomly. +Only used when \code{random_design = "modulus"}.} + +\item{\code{eta}}{\code{integer(1)}\cr +Halving factor for Successive Halving / Hyperband. +Only used when \code{facade = "smac4mf"}. +Default is \code{3}.} + +\item{\code{min_budget}}{\code{numeric(1)}\cr +Minimum budget for multi-fidelity optimization (e.g., epochs, subset fraction). +Only used when \code{facade = "smac4mf"}.} + +\item{\code{max_budget}}{\code{numeric(1)}\cr +Maximum budget for multi-fidelity optimization. +Only used when \code{facade = "smac4mf"}.} + +} +} + +\section{Progress Bars}{ + +\verb{$optimize()} supports progress bars via the package \CRANpkg{progressr} +combined with a \link{Terminator}. Simply wrap the function in +\code{progressr::with_progress()} to enable them. We recommend to use package +\CRANpkg{progress} as backend; enable with \code{progressr::handlers("progress")}. +} + +\examples{ +\dontrun{ +# define the objective function +fun = function(xs) { + list(y = -(xs[[1]] - 2)^2 - (xs[[2]] + 3)^2 + 10) +} + +# set domain (all parameters must have defaults for ConfigSpace) +domain = ps( + x1 = p_dbl(-10, 10, default = 0), + x2 = p_dbl(-5, 5, default = 0) +) + +# set codomain +codomain = ps( + y = p_dbl(tags = "maximize") +) + +# create objective +objective = ObjectiveRFun$new( + fun = fun, + domain = domain, + codomain = codomain, + properties = "deterministic" +) + +# initialize instance +instance = oi( + objective = objective, + terminator = trm("evals", n_evals = 20) +) + +# load optimizer +optimizer = opt("smac") + +# trigger optimization +optimizer$optimize(instance) + +# all evaluated configurations +instance$archive + +# best performing configuration +instance$result +} +} +\section{Super classes}{ +\code{\link[bbotk:Optimizer]{bbotk::Optimizer}} -> \code{\link[bbotk:OptimizerBatch]{bbotk::OptimizerBatch}} -> \code{OptimizerBatchSmac3} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-OptimizerBatchSmac3-new}{\code{OptimizerBatchSmac3$new()}} +\item \href{#method-OptimizerBatchSmac3-clone}{\code{OptimizerBatchSmac3$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-OptimizerBatchSmac3-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{OptimizerBatchSmac3$new()}\if{html}{\out{
}} +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-OptimizerBatchSmac3-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{OptimizerBatchSmac3$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/mlr_terminators_none.Rd b/man/mlr_terminators_none.Rd index 73c8b43a..01f7406b 100644 --- a/man/mlr_terminators_none.Rd +++ b/man/mlr_terminators_none.Rd @@ -5,7 +5,8 @@ \alias{TerminatorNone} \title{None Terminator} \description{ -Mainly useful for optimization algorithms where the stopping is inherently controlled by the algorithm itself (e.g. \link{OptimizerBatchGridSearch}). +Mainly useful for optimization algorithms where the stopping is inherently controlled by the algorithm itself +(e.g. \link{OptimizerBatchGridSearch}). } \section{Dictionary}{ diff --git a/man/mlr_terminators_perf_reached.Rd b/man/mlr_terminators_perf_reached.Rd index afaaa289..2fc9ac5c 100644 --- a/man/mlr_terminators_perf_reached.Rd +++ b/man/mlr_terminators_perf_reached.Rd @@ -24,7 +24,8 @@ trm("perf_reached") \item{\code{level}}{\code{numeric(1)}\cr Performance level that needs to be reached. Default is 0. -Terminates if the performance exceeds (respective measure has to be maximized) or falls below (respective measure has to be minimized) this value.} +Terminates if the performance exceeds (respective measure has to be maximized) or falls below +(respective measure has to be minimized) this value.} } } diff --git a/man/mlr_terminators_stagnation_hypervolume.Rd b/man/mlr_terminators_stagnation_hypervolume.Rd index 8df2b7c6..d04ef4ac 100644 --- a/man/mlr_terminators_stagnation_hypervolume.Rd +++ b/man/mlr_terminators_stagnation_hypervolume.Rd @@ -5,7 +5,8 @@ \alias{TerminatorStagnationHypervolume} \title{Stagnation Hypervolume Terminator} \description{ -Class to terminate the optimization after the hypervolume stagnates, i.e. does not improve more than \code{threshold} over the last \code{iters} iterations. +Class to terminate the optimization after the hypervolume stagnates, +i.e. does not improve more than \code{threshold} over the last \code{iters} iterations. } \section{Dictionary}{ diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 610cba97..be66c3fa 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -89,3 +89,4 @@ reference: - terminated_error - bbotk_conditions - choose_search_space + - assert_python_packages diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 18e7349f..57a4392a 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -248,4 +248,8 @@ check_test_results = function(testres) { expect_true(testres[[i]], info = names(testres)[i], label = names(testres)[i]) } } + +if (requireNamespace("rush", quietly = TRUE)) { + source(system.file("testthat", "helper.R", package = "rush")) +} # nolint end: object_usage_linter. diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index fe0cde39..2cdd64ea 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -4,12 +4,22 @@ old_opts = options( warnPartialMatchDollar = TRUE ) -lapply(list.files(system.file("testthat", package = "rush"), pattern = "^helper.*\\.[rR]$", full.names = TRUE), source) - # https://github.com/HenrikBengtsson/Wishlist-for-R/issues/88 old_opts = lapply(old_opts, function(x) if (is.null(x)) FALSE else x) -lg = lgr::get_logger("mlr3") +lg_bbotk = lgr::get_logger("mlr3/bbotk") +lg_rush = lgr::get_logger("mlr3/rush") + +old_threshold_bbotk = lg_bbotk$threshold +old_threshold_rush = lg_rush$threshold -old_threshold = lg$threshold -lg$set_threshold(0) +lg_bbotk$set_threshold(0) +lg_rush$set_threshold(0) + +# PyTorch's torchinductor JIT compiler leaves behind a temp directory that +# causes R CMD check to flag a NOTE about detritus. Clean it up after all tests. +withr::defer( + # nolint + unlink(file.path(dirname(tempdir()), "torchinductor_runner"), recursive = TRUE), + envir = teardown_env() +) diff --git a/tests/testthat/teardown.R b/tests/testthat/teardown.R index 7e2bf1fc..d2aeb045 100644 --- a/tests/testthat/teardown.R +++ b/tests/testthat/teardown.R @@ -1,2 +1,9 @@ options(old_opts) -lg$set_threshold(old_threshold) +lg_bbotk$set_threshold(old_threshold_bbotk) +lg_rush$set_threshold(old_threshold_rush) + +if (requireNamespace("reticulate", quietly = TRUE)) { + python_temp_dir = dirname(reticulate::py_run_string("import tempfile; x=tempfile.NamedTemporaryFile().name", local = TRUE)$x) + detritus = list.files(python_temp_dir, pattern = "^uv-setuptools-[0-9A-Fa-f]{16}\\.lock$", full.names = TRUE) + if (length(detritus)) unlink(detritus) +} diff --git a/tests/testthat/test_OptimizerBatchHEBO.R b/tests/testthat/test_OptimizerBatchHEBO.R new file mode 100644 index 00000000..14ff8e77 --- /dev/null +++ b/tests/testthat/test_OptimizerBatchHEBO.R @@ -0,0 +1,281 @@ +skip_if_not_installed("reticulate") +skip_if_not_installed("callr") + +test_that("OptimizerBatchHEBO", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps(x = p_dbl(lower = -1, upper = 1)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + optimizer = opt("hebo") + assert_class(optimizer, "OptimizerBatchHEBO") + optimizer$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + x_opt = instance$result_x_domain + y_opt = instance$result_y + assert_list(x_opt, len = 1L) + stopifnot(identical(names(x_opt), "x")) + assert_numeric(y_opt, len = 1L) + stopifnot(identical(names(y_opt), "y")) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with 2d search space", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1), + x2 = p_dbl(lower = -1, upper = 1) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 8L) + ) + opt("hebo")$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 8L) + stopifnot(instance$archive$n_evals == 8L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with mixed parameter types", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1), + x2 = p_int(lower = 1L, upper = 10L), + x3 = p_fct(levels = c("a", "b", "c")), + x4 = p_lgl() + ) + fun = function(xs) { + y = xs$x1^2 + xs$x2 / 10 + if (xs$x3 == "a") { + y = y + 1 + } + if (xs$x4) { + y = y + 0.5 + } + list(y = y) + } + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("hebo", n_init = 5L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with RF surrogate", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1), + x2 = p_dbl(lower = -1, upper = 1) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 8L) + ) + opt("hebo", surrogate = "rf", rf_n_estimators = 10L, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 8L) + stopifnot(instance$archive$n_evals == 8L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with GP surrogate", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps(x = p_dbl(lower = -1, upper = 1)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 6L) + ) + opt("hebo", surrogate = "gp", gp_lr = 0.05, gp_num_epochs = 50L, gp_noise_free = FALSE, n_init = 2L)$optimize( + instance + ) + assert_data_table(instance$archive$data, min.rows = 6L) + stopifnot(instance$archive$n_evals == 6L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with LCB acquisition function", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps(x = p_dbl(lower = -1, upper = 1)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 6L) + ) + opt("hebo", acq_function = "lcb", n_suggestions = 1L, n_init = 2L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 6L) + stopifnot(instance$archive$n_evals == 6L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with batch suggestions", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1), + x2 = p_dbl(lower = -1, upper = 1) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 9L) + ) + opt("hebo", n_suggestions = 2L, n_init = 3L)$optimize(instance) + # batches of 2 crossing the terminator boundary may yield 9 or 10 evals + assert_data_table(instance$archive$data, min.rows = 9L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with explicit seed", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps(x = p_dbl(lower = -1, upper = 1)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("hebo", seed = 42L, n_init = 2L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with custom n_init", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1), + x2 = p_dbl(lower = -1, upper = 1) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 9L) + ) + opt("hebo", n_init = 6L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 9L) + stopifnot(instance$archive$n_evals == 9L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO with alternative evolutionary strategy", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1), + x2 = p_dbl(lower = -1, upper = 1) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 8L) + ) + opt("hebo", es = "ga", n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 8L) + stopifnot(instance$archive$n_evals == 8L) + TRUE + })) +}) + +test_that("OptimizerBatchHEBO maximization", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-hebo")) + library(checkmate) + library(bbotk) + library(paradox) + search_space = ps( + x1 = p_dbl(lower = -5, upper = 5), + x2 = p_dbl(lower = -5, upper = 5) + ) + fun = function(xs) list(y = -(xs$x1^2 + xs$x2^2)) + codomain = ps(y = p_dbl(tags = "maximize")) + objective = ObjectiveRFun$new( + fun = fun, + domain = search_space, + codomain = codomain, + properties = "single-crit" + ) + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 8L) + ) + opt("hebo", n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 8L) + stopifnot(instance$archive$n_evals == 8L) + TRUE + })) +}) diff --git a/tests/testthat/test_OptimizerBatchSmac3.R b/tests/testthat/test_OptimizerBatchSmac3.R new file mode 100644 index 00000000..0864743e --- /dev/null +++ b/tests/testthat/test_OptimizerBatchSmac3.R @@ -0,0 +1,376 @@ +skip_if_not_installed("reticulate") +skip_if_not_installed("callr") + +test_that("OptimizerBatchSmac3", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + optimizer = opt("smac") + assert_class(optimizer, "OptimizerBatchSmac3") + optimizer$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + x_opt = instance$result_x_domain + y_opt = instance$result_y + assert_list(x_opt, len = 1L) + stopifnot(identical(names(x_opt), "x")) + assert_numeric(y_opt, len = 1L) + stopifnot(identical(names(y_opt), "y")) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with 2d search space", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1, default = 0), + x2 = p_dbl(lower = -1, upper = 1, default = 0) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", n_init = 5L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with mixed parameter types", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1, default = 0), + x2 = p_int(lower = 1L, upper = 10L, default = 5L), + x3 = p_fct(levels = c("a", "b", "c"), default = "a"), + x4 = p_lgl(default = TRUE) + ) + fun = function(xs) { + y = xs$x1^2 + xs$x2 / 10 + if (xs$x3 == "a") { + y = y + 1 + } + if (xs$x4) { + y = y + 0.5 + } + list(y = y) + } + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", n_init = 5L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with dependencies", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps( + x1 = p_fct(levels = c("a", "b"), default = "a"), + x2 = p_dbl(lower = -1, upper = 1, default = 0, depends = x1 == "a") + ) + fun = function(xs) { + y = if (xs$x1 == "a") xs$x2^2 else 1 + list(y = y) + } + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", n_init = 5L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with HyperparameterOptimizationFacade", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", facade = "smac4hb", n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with AlgorithmConfigurationFacade", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", facade = "smac4ac", n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with RandomFacade", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", facade = "smac4rs", n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with custom surrogate model", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1, default = 0), + x2 = p_dbl(lower = -1, upper = 1, default = 0) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", facade = "smac4hb", surrogate = "rf", rf.n_trees = 5L, n_init = 5L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with GP surrogate", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", surrogate = "gp", gp.n_restarts = 5L, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with custom acquisition function", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", acq_function = "ei", acq_function.xi = 0.01, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with LCB acquisition function", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", acq_function = "lcb", acq_function.beta = 2.0, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with custom initial design", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps( + x1 = p_dbl(lower = -1, upper = 1, default = 0), + x2 = p_dbl(lower = -1, upper = 1, default = 0) + ) + fun = function(xs) list(y = sum(as.numeric(xs)^2)) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + for (design in c("sobol", "random", "lhc", "default")) { + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", initial_design = design, n_init = 5L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + } + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with explicit seed", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 5L) + ) + opt("smac", seed = 42L, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 5L) + stopifnot(instance$archive$n_evals == 5L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with deterministic FALSE and max_config_calls", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", deterministic = FALSE, max_config_calls = 2L, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with random design probability", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", random_design = "probability", random_design.probability = 0.2, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +}) + +test_that("OptimizerBatchSmac3 with MultiFidelityFacade", { + expect_true(callr::r(function() { + Sys.setenv(RETICULATE_PYTHON = reticulate::virtualenv_python("r-smac")) + library(bbotk) + library(paradox) + library(checkmate) + search_space = ps(x = p_dbl(lower = -1, upper = 1, default = 0)) + fun = function(xs) list(y = as.numeric(xs)^2) + objective = ObjectiveRFun$new(fun = fun, domain = search_space, properties = "single-crit") + instance = OptimInstanceBatchSingleCrit$new( + objective = objective, + search_space = search_space, + terminator = trm("evals", n_evals = 10L) + ) + opt("smac", facade = "smac4mf", eta = 3L, min_budget = 1, max_budget = 10, n_init = 3L)$optimize(instance) + assert_data_table(instance$archive$data, min.rows = 10L) + stopifnot(instance$archive$n_evals == 10L) + TRUE + })) +})