diff --git a/NEWS.md b/NEWS.md index 84e259bb..a954735a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,12 +4,18 @@ ## Major breaking changes + * Recovery (`--recover-file`) has been completely re-implemented. This + version of irace cannot recover files generated by previous versions and + vice versa. + ## New features and improvements * `psRace()` gains a `psrace_logFile` argument to avoid overwriting `scenario$logFile`. ## Fixes + * Fix #76: Recovery (`--recover-file`) is working again with a completely new implementation. + * Fixed documentation of `psRace()`. * Fix `psRace()` not saving `psrace_log` in `scenario$logFile`. diff --git a/R/aaa.R b/R/aaa.R index 06805958..3297a941 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -22,3 +22,7 @@ update_package_version <- function() update_package_version() # We define this tentatively to avoid: undefined exports: irace_version irace_version <- "unknown" + +.irace_tolerance <- sqrt(.Machine$double.eps) +.irace_minimum_saving_time <- 60 # seconds + diff --git a/R/irace.R b/R/irace.R index e63ae876..e85872f3 100644 --- a/R/irace.R +++ b/R/irace.R @@ -1,6 +1,3 @@ -# Sets irace variables from a recovery file. It is executed in the -# parent environment which must be irace(). -# # FIXME: Restoring occurs after reading the command-line/scenario file. At # least for the irace command-line parameters (scenario), it should occur # before. We would need to: @@ -16,31 +13,23 @@ # # A work-around is to modify the recovery file (you can load it in R, # modify scenario then save it again). -recoverFromFile <- function(filename) +recoverFromFile <- function(filename, scenario = list()) { - # substitute() is needed to evaluate filename here. - eval.parent(substitute({ - iraceResults <- read_logfile(filename) - if (iraceResults$irace_version != irace::irace_version) - irace.error("Recovery file '", filename, "' was generated by a version of irace (", - iraceResults$irace_version, ") different from this version of irace (", - irace::irace_version, ").") - # These variables are not state, but they are used directly by irace. - for (name in c("allConfigurations")) - assign(name, iraceResults[[name]]) - # Restore part of scenario but not all. - for (name in .irace.params.recover) - scenario[[name]] <- iraceResults$scenario[[name]] - # We call checkScenario again to fix any inconsistencies in the recovered - # data. - # FIXME: Do not call checkScenario earlier and instead do the minimum to check recoveryFile. - scenario <- checkScenario(scenario) - race_state <- iraceResults$state$clone() - race_state$recover() - firstRace <- FALSE - options(.race.debug.level = scenario$debugLevel) - options(.irace.debug.level = scenario$debugLevel) - })) + iraceResults <- read_logfile(filename) + if (iraceResults$irace_version != irace::irace_version) + irace.error("Recovery file '", filename, "' was generated by a version of irace (", + iraceResults$irace_version, ") different from this version of irace (", + irace::irace_version, ").") + + # Restore part of scenario but not all. + for (name in .irace.params.recover) + scenario[[name]] <- iraceResults$scenario[[name]] + # We call checkScenario() again to fix any inconsistencies in the recovered data. + # FIXME: Do not call checkScenario earlier and instead do the minimum to check recoveryFile. + scenario <- checkScenario(scenario) + race_state <- iraceResults$state$clone() + race_state$initialize(scenario, recover = TRUE) + race_state } ## @@ -363,12 +352,11 @@ generateInstances <- function(race_state, scenario, n, update = FALSE) do_experiments <- function(race_state, configurations, ninstances, scenario, iteration) { instances <- seq_len(ninstances) - output <- race_wrapper(race_state = race_state, + output <- race_wrapper_helper(race_state = race_state, configurations = configurations, instance_idx = instances, bounds = rep(scenario$boundMax, nrow(configurations)), - which_exps = seq_nrow(configurations), - scenario = scenario) + is_exe = rep_len(TRUE, nrow(configurations) * ninstances), scenario = scenario) Results <- race_state$update_experiment_log(output, instances = instances, configurations_id = configurations[[".ID."]], @@ -644,6 +632,14 @@ irace_common <- function(scenario, simple, output.width = 9999L) irace_run <- function(scenario) { + # Recover state from file? + if (is.null.or.empty(scenario$recoveryFile)) { + race_state <- RaceState$new(scenario) + } else { + irace.note ("Recovering from file: '", scenario$recoveryFile,"'\n") + race_state <- recoverFromFile(scenario$recoveryFile, scenario = scenario) + } + quiet <- scenario$quiet catInfo <- if (quiet) do_nothing else function(..., verbose = TRUE) { irace.note (..., "\n") @@ -673,101 +669,94 @@ irace_run <- function(scenario) if (scenario$postselection && scenario$maxTime == 0 && floor(remainingBudget / max(scenario$blockSize, scenario$eachTest)) > 1L) psRace(iraceResults, max_experiments = remainingBudget, iteration_elites = TRUE) else - race_state$elite_configurations + elite_configurations } debugLevel <- scenario$debugLevel + # Set options controlling debug level. + # FIXME: This should be the other way around, the options set the debugLevel. + options(.race.debug.level = debugLevel) + options(.irace.debug.level = debugLevel) - # Recover state from file? - if (!is.null.or.empty(scenario$recoveryFile)) { - irace.note ("Resuming from file: '", scenario$recoveryFile,"'\n") - recoverFromFile(scenario$recoveryFile) - race_state$start_parallel(scenario) - on.exit(race_state$stop_parallel(), add = TRUE) - } else { # Do not recover - firstRace <- TRUE - race_state <- RaceState$new(scenario) - # Set options controlling debug level. - # FIXME: This should be the other way around, the options set the debugLevel. - options(.race.debug.level = debugLevel) - options(.irace.debug.level = debugLevel) - # Create a data frame of all configurations ever generated. - allConfigurations <- allConfigurationsInit(scenario) - irace.assert(is.integer(allConfigurations[[".ID."]])) - nbUserConfigurations <- nrow(allConfigurations) + firstRace <- TRUE + # Create a data frame of all configurations ever generated. + allConfigurations <- allConfigurationsInit(scenario) + irace.assert(is.integer(allConfigurations[[".ID."]])) + nbUserConfigurations <- nrow(allConfigurations) - # To save the logs - iraceResults <- list( - scenario = scenario, - irace_version = irace_version, - iterationElites = c(), - allElites = list(), - experiments = matrix(nrow = 0L, ncol = 0L)) - blockSize <- scenario$blockSize - model <- NULL - nbConfigurations <- 0L - elite_configurations <- data.frame(stringsAsFactors=FALSE) - - nbIterations <- if (scenario$nbIterations == 0) - computeNbIterations(scenario$parameters$nbVariable) - else scenario$nbIterations - nbIterations <- floor(nbIterations) + # To save the logs + iraceResults <- list( + scenario = scenario, + irace_version = irace_version, + iterationElites = c(), + allElites = list(), + experiments = matrix(nrow = 0L, ncol = 0L)) + model <- NULL + nbConfigurations <- 0L + elite_configurations <- data.frame(stringsAsFactors=FALSE) + + nbIterations <- if (scenario$nbIterations == 0) + computeNbIterations(scenario$parameters$nbVariable) + else scenario$nbIterations + nbIterations <- floor(nbIterations) + + minSurvival <- if (scenario$minNbSurvival == 0) + computeTerminationOfRace(scenario$parameters$nbVariable) + else scenario$minNbSurvival + minSurvival <- floor(minSurvival) + # FIXME: Do this initialization within race_state. + race_state$minSurvival <- minSurvival + + # Generate initial instance + seed list + generateInstances(race_state, scenario, + n = if (scenario$maxExperiments != 0) ceiling(scenario$maxExperiments / minSurvival) + else max(scenario$firstTest, length(scenario$instances))) + indexIteration <- 1L + timeUsed <- 0 + boundEstimate <- NA + + race_state$start_parallel(scenario) + on.exit(race_state$stop_parallel(), add = TRUE) - minSurvival <- if (scenario$minNbSurvival == 0) - computeTerminationOfRace(scenario$parameters$nbVariable) - else scenario$minNbSurvival - minSurvival <- floor(minSurvival) - - # Generate initial instance + seed list - generateInstances(race_state, scenario, - n = if (scenario$maxExperiments != 0) ceiling(scenario$maxExperiments / minSurvival) - else max(scenario$firstTest, length(scenario$instances))) - indexIteration <- 1L - timeUsed <- 0 - boundEstimate <- NA - - race_state$start_parallel(scenario) - on.exit(race_state$stop_parallel(), add = TRUE) + if (scenario$maxTime == 0) { + if (is.na(scenario$minExperiments)) { + remainingBudget <- scenario$maxExperiments + } else { + remainingBudget <- max(scenario$minExperiments, + computeMinimumBudget(scenario, minSurvival, nbIterations, + race_state$elitist_new_instances)) + } + } else { ## Estimate time when maxTime is defined. + ## IMPORTANT: This is firstTest because these configurations will be + ## considered elite later, thus preserved up to firstTest, which is + ## fine. If a larger number of instances is used, it would prevent + ## discarding these configurations. + # Get the number of instances to be used. + ninstances <- round_to_next_multiple(scenario$firstTest, scenario$blockSize) + estimationTime <- ceiling(scenario$maxTime * scenario$budgetEstimation) + irace.note("Estimating execution time using ", 100 * scenario$budgetEstimation, + "% of ", scenario$maxTime, " = ", estimationTime, "\n") - if (scenario$maxTime == 0) { - if (is.na(scenario$minExperiments)) { - remainingBudget <- scenario$maxExperiments - } else { - remainingBudget <- max(scenario$minExperiments, - computeMinimumBudget(scenario, minSurvival, nbIterations, - race_state$elitist_new_instances)) - } - } else { ## Estimate time when maxTime is defined. - ## IMPORTANT: This is firstTest because these configurations will be - ## considered elite later, thus preserved up to firstTest, which is - ## fine. If a larger number of instances is used, it would prevent - ## discarding these configurations. - # Get the number of instances to be used. - ninstances <- round_to_next_multiple(scenario$firstTest, scenario$blockSize) - estimationTime <- ceiling(scenario$maxTime * scenario$budgetEstimation) - irace.note("Estimating execution time using ", 100 * scenario$budgetEstimation, - "% of ", scenario$maxTime, " = ", estimationTime, "\n") - - # Estimate the number of configurations to be used - nconfigurations <- max(2L, floor(scenario$parallel / ninstances)) - next_configuration <- 1L - nruns <- nconfigurations * ninstances - boundEstimate <- if (is.null(scenario$boundMax)) 1.0 else scenario$boundMax - if (estimationTime < boundEstimate * nruns) { - boundEstimate <- max(ceiling_digits(estimationTime / nruns, scenario$boundDigits), scenario$minMeasurableTime) - if (!is.null(scenario$boundMax)) { + # Estimate the number of configurations to be used + nconfigurations <- max(2L, floor(scenario$parallel / ninstances)) + next_configuration <- 1L + nruns <- nconfigurations * ninstances + boundEstimate <- if (is.null(scenario$boundMax)) 1.0 else scenario$boundMax + if (estimationTime < boundEstimate * nruns) { + boundEstimate <- max(ceiling_digits(estimationTime / nruns, scenario$boundDigits), scenario$minMeasurableTime) + if (!is.null(scenario$boundMax)) { irace.warning("boundMax = ", scenario$boundMax, " is too large, using ", boundEstimate, " instead.\n") # FIXME: We should not modify the scenario scenario$boundMax <- boundEstimate - } } - - repeat { - # Sample new configurations if needed + } + + repeat { + # Sample new configurations if needed if (nrow(allConfigurations) < nconfigurations) { newConfigurations <- sampleSobol(scenario$parameters, - nconfigurations - nrow(allConfigurations), - repair = scenario$repairConfiguration) + nconfigurations - nrow(allConfigurations), + repair = scenario$repairConfiguration) set(newConfigurations, j = ".ID.", value = max(0L, vlast(allConfigurations[[".ID."]])) + seq_nrow(newConfigurations)) setcolorder(newConfigurations, ".ID.", before=1L) @@ -778,94 +767,93 @@ irace_run <- function(scenario) # We may have generated less than the number requested if there were duplicates. nconfigurations <- nrow(allConfigurations) } - # Estimate the mean execution time. - # FIXME: Shouldn't we pass the bounds? - experiments <- do_experiments(race_state, - configurations = allConfigurations[next_configuration:nconfigurations, ], - ninstances = ninstances, scenario = scenario, - # These experiments are assigned iteration 0. - iteration = 0L) - # FIXME: Here we should check if everything timed out and increase the bound dynamically. - iraceResults$experiments <- merge_matrix(iraceResults$experiments, experiments) - rownames(iraceResults$experiments) <- seq_nrow(iraceResults$experiments) - # For the used time, we count the time reported in all configurations - # including rejected ones. - timeUsed <- sum(race_state$experiment_log[["time"]], na.rm = TRUE) - # User should return time zero for rejected_ids. - boundEstimate <- timeUsed / nrow(race_state$experiment_log) - boundEstimate <- max(ceiling_digits(boundEstimate, scenario$boundDigits), scenario$minMeasurableTime) - next_configuration <- nconfigurations + 1L - - # Calculate how many new configurations: - # 1. We do not want to overrun estimationTime - new_conf <- floor(((estimationTime - timeUsed) / boundEstimate) / ninstances) - # 2. But there is no point in executing more configurations than those - # that we can execute in parallel. - new_conf <- min(new_conf, max(1L, floor(scenario$parallel / ninstances))) - if (timeUsed >= estimationTime || new_conf == 0L || nconfigurations == 1024L) - break - else - nconfigurations <- min(1024L, nconfigurations + new_conf) - } # end of repeat - - if (length(race_state$rejected_ids)) - irace.note ("Immediately rejected configurations: ", - paste0(race_state$rejected_ids, collapse = ", ") , "\n") + # Estimate the mean execution time. + # FIXME: Shouldn't we pass the bounds? + experiments <- do_experiments(race_state, + configurations = allConfigurations[next_configuration:nconfigurations, ], + ninstances = ninstances, scenario = scenario, + # These experiments are assigned iteration 0. + iteration = 0L) + # FIXME: Here we should check if everything timed out and increase the bound dynamically. + iraceResults$experiments <- merge_matrix(iraceResults$experiments, experiments) + rownames(iraceResults$experiments) <- seq_nrow(iraceResults$experiments) + # For the used time, we count the time reported in all configurations + # including rejected ones. + timeUsed <- sum(race_state$experiment_log[["time"]], na.rm = TRUE) + # User should return time zero for rejected_ids. + boundEstimate <- timeUsed / nrow(race_state$experiment_log) + boundEstimate <- max(ceiling_digits(boundEstimate, scenario$boundDigits), scenario$minMeasurableTime) + next_configuration <- nconfigurations + 1L - # Update budget - remainingBudget <- round((scenario$maxTime - timeUsed) / boundEstimate) - elite_configurations <- allConfigurations[allConfigurations[[".ID."]] %not_in% race_state$rejected_ids, , drop = FALSE] - irace.assert(is.integer(elite_configurations[[".ID."]])) - # Without elitist, the racing does not re-use the results computed during - # the estimation. This means that the time used during estimation needs - # to be spent again during racing, thus leaving less time for racing. We - # want to avoid having less time for racing, and this is an - # implementation detail, thus we assume that the time was not actually - # wasted. - if (!scenario$elitist) timeUsed <- 0 - irace.note("Estimated execution time is ", boundEstimate, " based on ", - next_configuration - 1L, " configurations and ", - ninstances," instances. Used time: ", timeUsed, - ", remaining time: ", (scenario$maxTime - timeUsed), - ", remaining budget (experiments): ", remainingBudget, "\n") - if (!is.null(scenario$boundMax) && 2 * boundEstimate < scenario$boundMax) { - irace.warning("boundMax=", scenario$boundMax, " is much larger than estimated execution time, using ", - 2 * boundEstimate, " instead.\n") - scenario$boundMax <- 2 * boundEstimate - } - } # end of time estimation - - # Compute the total initial budget, that is, the maximum number of - # experiments that we can perform. - currentBudget <- if (scenario$nbExperimentsPerIteration == 0L) - computeComputationalBudget(remainingBudget, indexIteration, - nbIterations) - else scenario$nbExperimentsPerIteration - - # Check that the budget is enough. For the time estimation case we reduce - # the number of iterations. - warn_msg <- NULL - while (!checkMinimumBudget(scenario, remainingBudget, minSurvival, nbIterations, - boundEstimate, timeUsed, race_state$elitist_new_instances)) - { - if (is.null(warn_msg)) - warn_msg <- - paste0("With the current settings and estimated time per run (", - boundEstimate, - ") irace will not have enough budget to execute the minimum", - " number of iterations (", nbIterations, "). ", - "Execution will continue by assuming that the estimated time", - " is too high and reducing the minimum number of iterations,", - " however, if the estimation was correct or too low,", - " results might not be better than random sampling.\n") - nbIterations <- nbIterations - 1L - scenario$nbConfigurations <- if (scenario$nbConfigurations > 0L) - min(minSurvival * 2L, scenario$nbConfigurations) - else minSurvival * 2L - } - if (!is.null(warn_msg)) irace.warning(warn_msg) + # Calculate how many new configurations: + # 1. We do not want to overrun estimationTime + new_conf <- floor(((estimationTime - timeUsed) / boundEstimate) / ninstances) + # 2. But there is no point in executing more configurations than those + # that we can execute in parallel. + new_conf <- min(new_conf, max(1L, floor(scenario$parallel / ninstances))) + if (timeUsed >= estimationTime || new_conf == 0L || nconfigurations == 1024L) + break + else + nconfigurations <- min(1024L, nconfigurations + new_conf) + } # end of repeat + + if (length(race_state$rejected_ids)) + irace.note ("Immediately rejected configurations: ", + paste0(race_state$rejected_ids, collapse = ", ") , "\n") - } #end of do not recover + # Update budget + remainingBudget <- round((scenario$maxTime - timeUsed) / boundEstimate) + elite_configurations <- allConfigurations[allConfigurations[[".ID."]] %not_in% race_state$rejected_ids, , drop = FALSE] + irace.assert(is.integer(elite_configurations[[".ID."]])) + # Without elitist, the racing does not re-use the results computed during + # the estimation. This means that the time used during estimation needs + # to be spent again during racing, thus leaving less time for racing. We + # want to avoid having less time for racing, and this is an + # implementation detail, thus we assume that the time was not actually + # wasted. + if (!scenario$elitist) timeUsed <- 0 + irace.note("Estimated execution time is ", boundEstimate, " based on ", + next_configuration - 1L, " configurations and ", + ninstances," instances. Used time: ", timeUsed, + ", remaining time: ", (scenario$maxTime - timeUsed), + ", remaining budget (experiments): ", remainingBudget, "\n") + if (!is.null(scenario$boundMax) && 2 * boundEstimate < scenario$boundMax) { + irace.warning("boundMax=", scenario$boundMax, " is much larger than estimated execution time, using ", + 2 * boundEstimate, " instead.\n") + scenario$boundMax <- 2 * boundEstimate + } + } # end of time estimation + + # Compute the total initial budget, that is, the maximum number of + # experiments that we can perform. + currentBudget <- if (scenario$nbExperimentsPerIteration == 0L) + computeComputationalBudget(remainingBudget, indexIteration, + nbIterations) + else scenario$nbExperimentsPerIteration + + # Check that the budget is enough. For the time estimation case we reduce + # the number of iterations. + warn_msg <- NULL + while (!checkMinimumBudget(scenario, remainingBudget, minSurvival, nbIterations, + boundEstimate, timeUsed, race_state$elitist_new_instances)) + { + if (is.null(warn_msg)) + warn_msg <- + paste0("With the current settings and estimated time per run (", + boundEstimate, + ") irace will not have enough budget to execute the minimum", + " number of iterations (", nbIterations, "). ", + "Execution will continue by assuming that the estimated time", + " is too high and reducing the minimum number of iterations,", + " however, if the estimation was correct or too low,", + " results might not be better than random sampling.\n") + nbIterations <- nbIterations - 1L + scenario$nbConfigurations <- + if (scenario$nbConfigurations > 0L) + min(minSurvival * 2L, scenario$nbConfigurations) + else minSurvival * 2L + } + if (!is.null(warn_msg)) irace.warning(warn_msg) catInfo("Initialization\n", if (scenario$elitist) @@ -894,20 +882,11 @@ irace_run <- function(scenario) paste0("# boundMax: ", scenario$boundMax, "\n"), verbose = FALSE) + blockSize <- scenario$blockSize repeat { - # Save recovery info. - race_state$save_recovery(currentBudget = currentBudget, - elite_configurations = elite_configurations, - experimentsUsedSoFar = nrow(race_state$experiment_log), - indexIteration = indexIteration, - minSurvival = minSurvival, - model = model, - nbConfigurations = nbConfigurations, - nbIterations = nbIterations, - remainingBudget = remainingBudget, - timeUsed = timeUsed, - boundEstimate = boundEstimate) + # FIXME: We could directly use race_state$timeUsed everywhere. + race_state$timeUsed <- timeUsed ## Save to the log file. iraceResults$allConfigurations <- allConfigurations iraceResults$state <- race_state @@ -1173,10 +1152,17 @@ irace_run <- function(scenario) minSurvival = minSurvival, elite.data = elite_data, elitist_new_instances = if (firstRace) 0L - else race_state$elitist_new_instances) + else race_state$elitist_new_instances) # We add indexIteration as an additional column. set(raceResults$experiment_log, j = "iteration", value = indexIteration) - race_state$experiment_log <- rbind(race_state$experiment_log, raceResults$experiment_log) + # FIXME: There is a chance that the process stops after we remove + # race_experiment_log in elitist_race(), but before we update + # race_state$experiment_log here. Doing the two steps in a different order + # would be more robust but would need a smarter recovery routine that + # checks for duplicates. + withr::local_options(warn=2) + race_state$experiment_log <- rbindlist(list(race_state$experiment_log, raceResults$experiment_log), + use.names=TRUE) # Merge new results. iraceResults$experiments <- merge_matrix(iraceResults$experiments, raceResults$experiments) diff --git a/R/irace_summarise.R b/R/irace_summarise.R index 0a9fa9ba..02aeb629 100644 --- a/R/irace_summarise.R +++ b/R/irace_summarise.R @@ -37,6 +37,7 @@ irace_summarise <- function(iraceResults) if (is.null(version)) version <- iraceResults$irace.version + # Here to support older versions of irace. time_targetrunner <- iraceResults$state$recovery_info$timeUsed if (is.null(time_targetrunner)) time_targetrunner <- iraceResults$state$timeUsed diff --git a/R/psRace.R b/R/psRace.R index b6fdc202..14c20c66 100644 --- a/R/psRace.R +++ b/R/psRace.R @@ -225,8 +225,7 @@ psRace <- function(iraceResults, max_experiments, conf_ids = NULL, iteration_eli elitist_new_instances = 0L) elite_configurations <- extractElites(raceResults$configurations, - nbElites = race_state$recovery_info$minSurvival, - debugLevel = scenario$debugLevel) + nbElites = race_state$minSurvival, debugLevel = scenario$debugLevel) irace.note("Elite configurations (first number is the configuration ID;", " listed from best to worst according to the ", test.type.order.str(scenario$testType), "):\n") @@ -246,7 +245,6 @@ psRace <- function(iraceResults, max_experiments, conf_ids = NULL, iteration_eli iraceResults$experiments <- merge_matrix(iraceResults$experiments, raceResults$experiments) iraceResults$iterationElites[indexIteration] <- elite_configurations[[".ID."]][1L] iraceResults$allElites[[indexIteration]] <- elite_configurations[[".ID."]] - race_state$elite_configurations <- elite_configurations iraceResults$scenario <- scenario iraceResults$state <- race_state # FIXME: This log should contain only information of what was done in the diff --git a/R/race-wrapper.R b/R/race-wrapper.R index 4b3eb4ff..5d8f4108 100644 --- a/R/race-wrapper.R +++ b/R/race-wrapper.R @@ -88,7 +88,8 @@ target_error <- function(err_msg, output, scenario, target_runner_call, "\n", .irace_msg_prefix, advice_txt) } -check_output_target_evaluator <- function (output, scenario, target_runner_call = NULL, bound = NULL) +check_output_target_evaluator <- function (output, scenario, target_runner_time, + target_runner_call, bound) { if (!is.list(output)) { output <- list() @@ -104,15 +105,21 @@ check_output_target_evaluator <- function (output, scenario, target_runner_call } else if (is_na_nowarn(output$cost)) { err_msg <- "The output of targetEvaluator is not numeric!" } - if (scenario$batchmode != 0 && scenario$maxTime > 0) { - if (is.null (output$time)) { - err_msg <- "When batchmode != 0 and maxTime > 0, the output of targetEvaluator must be two numbers 'cost time'!" + if (scenario$maxTime > 0 || scenario$capping) { + if (scenario$batchmode != 0) { + if (is.null (output$time)) + err_msg <- "When batchmode != 0 and maxTime > 0, the output of targetEvaluator must be two numbers 'cost time'!" + # With scenario$capping == TRUE, we may have pre-executed targetRunner + # (which_elite_exe) that already have recorded the time, so when we + # reach this point, we may not have 'time'. + } else if (!scenario$capping && is.null(target_runner_time) && is.null(output$time)) { + err_msg <- "Either targetRunner or targetEvaluator must return 'time' !" } } - if (is.null(output$time)) { - output$time <- NA_real_ - } else { - if (is_na_nowarn(output$time)) { + if (!is.null(output$time)) { + if (!is.null(target_runner_time)) { + err_msg <- "Both targetRunner and targetEvaluator cannot return 'time' !" + } else if (is_na_nowarn(output$time)) { err_msg <- "The time returned by targetEvaluator is not numeric!" } else if (is.infinite(output$time)) { err_msg <- "The time returned by targetEvaluator is not finite!" @@ -125,13 +132,17 @@ check_output_target_evaluator <- function (output, scenario, target_runner_call err_msg <- paste0("The time returned by targetEvaluator (", output$time, ") does not respect the given bound of ", bound, "!") } } + } else { + output$time <- target_runner_time } } - if (!is.null(err_msg)) { + if (is.null(err_msg)) { + output$error <- NULL + } else { target_error (err_msg, output, scenario, - target_runner_call = target_runner_call, - target_evaluator_call = output$call) + target_runner_call = target_runner_call, + target_evaluator_call = output$call) } output } @@ -239,27 +250,25 @@ check_output_target_runner <- function(output, scenario, bound = NULL) err_msg <- output$error if (is.null(err_msg)) { - if (!is.null (output$cost)) { - if (is_na_or_empty(output$cost)) { - err_msg <- "The cost returned by targetRunner is not numeric!" - } + if (is.null(output$cost)) { + output$cost <- NULL # make sure to delete it. + } else if (is_na_or_empty(output$cost)) { + err_msg <- "The cost returned by targetRunner is not numeric!" } if (is.null(output$time)) { - output$time <- NA_real_ + output$time <- NULL # make sure to delete it. + } else if (is.na(output$time)) { + err_msg <- paste0("The time returned by targetRunner is not numeric!") + } else if (is.infinite(output$time)) { + err_msg <- paste0("The time returned by targetRunner is not finite!") + } else if (output$time <= 0) { + err_msg <- paste0("The value of time (", output$time, ") returned by targetRunner must be strictly positive!") } else { - if (is.na(output$time)) { - err_msg <- paste0("The time returned by targetRunner is not numeric!") - } else if (is.infinite(output$time)) { - err_msg <- paste0("The time returned by targetRunner is not finite!") - } else if (output$time <= 0) { - err_msg <- paste0("The value of time (", output$time, ") returned by targetRunner must be strictly positive!") - } else { - # Fix time. - output$time <- max(output$time, scenario$minMeasurableTime) - if (!is.null(bound) && !is.na(bound) && bound > 0 && bound + scenario$minMeasurableTime < output$time) { - err_msg <- paste0("The time returned by targetRunner (", output$time, ") does not respect the given bound of ", bound, "!") - } + # Fix time. + output$time <- max(output$time, scenario$minMeasurableTime) + if (!is.null(bound) && !is.na(bound) && bound > 0 && bound + scenario$minMeasurableTime < output$time) { + err_msg <- paste0("The time returned by targetRunner (", output$time, ") does not respect the given bound of ", bound, "!") } } if (is.null(err_msg)) { @@ -268,15 +277,15 @@ check_output_target_runner <- function(output, scenario, bound = NULL) # unless using batchmode, in that case targetRunner returns neither the # time nor the cost. if (scenario$batchmode != 0) { - if (!is.na(output$time) || !is.null(output$cost)) { + if (!is.null(output$time) || !is.null(output$cost)) { err_msg <- "When batchmode != 0, the output of targetRunner must not contain a cost nor a time!" } - } else if (scenario$maxTime > 0 && is.na(output$time)) { + } else if (scenario$maxTime > 0 && is.null(output$time)) { err_msg <- "The output of targetRunner must be one number 'time'!" } else if (!is.null(output$cost)) { err_msg <- "The output of targetRunner must be empty or just one number 'time'!" } - } else if (scenario$maxTime > 0 && (is.null(output$cost) || is.na(output$time))) { + } else if (scenario$maxTime > 0 && (is.null(output$cost) || is.null(output$time))) { err_msg <- "The output of targetRunner must be two numbers 'cost time'!" } else if (scenario$maxExperiments > 0 && is.null(output$cost)) { err_msg <- "The output of targetRunner must be one number 'cost'!" @@ -284,7 +293,9 @@ check_output_target_runner <- function(output, scenario, bound = NULL) } } - if (!is.null(err_msg)) { + if (is.null(err_msg)) { + output$error <- NULL + } else { target_error (err_msg, output, scenario, target_runner_call = output$call) } output @@ -602,6 +613,8 @@ execute_evaluator <- function(target_evaluator, experiments, scenario, target_ou ## FIXME: We do not need the configurations_id argument: irace.assert(isTRUE(all.equal(configurations_id, unique(sapply(experiments, getElement, "id_configuration"))))) + configurations_id <- unique(unlist(lapply(experiments, getElement, "id_configuration"), + recursive = FALSE, use.names = FALSE)) nconfs <- length(configurations_id) # Evaluate configurations sequentially. for (k in seq_along(experiments)) { @@ -610,12 +623,13 @@ execute_evaluator <- function(target_evaluator, experiments, scenario, target_ou output <- target_evaluator(experiment = experiment, num_configurations = nconfs, all_conf_id = configurations_id, scenario = scenario, target_runner_call = target_runner_call) - output <- check_output_target_evaluator(output, scenario, target_runner_call = target_runner_call, bound = experiment$bound) + output <- check_output_target_evaluator(output, scenario, target_runner_time = target_output[[k]]$time, + target_runner_call = target_runner_call, bound = experiment$bound) target_output[[k]]$cost <- output$cost + # targetEvaluator may return time, for example for batchmode != 0. + target_output[[k]]$time <- output$time if (is.null(target_output[[k]]$call)) target_output[[k]]$call <- output$call - if (is.null(target_output[[k]]$time) || !is.null.or.na(output$time)) - target_output[[k]]$time <- output$time } target_output } diff --git a/R/race.R b/R/race.R index bb2c7897..ff9728fa 100644 --- a/R/race.R +++ b/R/race.R @@ -59,17 +59,10 @@ createExperimentList <- function(configurations, parameters, rep(configurations, times = n_instances)), MoreArgs = NULL) } -## Executes a list of configurations in a particular instance -## configurations: description having the id of the configuration -## instance.idx: index of the instance,seed pair in race_state$instances_log -## bounds: execution bounds (NULL if not needed). -## which_exps: Which experiments really need to be executed. -race_wrapper <- function(race_state, configurations, instance_idx, bounds, - which_exps, - # FIXME: race_state already contains scenario but it is not update with scenario$instances - scenario) + +race_wrapper_helper <- function(race_state, configurations, instance_idx, bounds, + is_exe, scenario) { - # Experiment list to execute experiments <- createExperimentList(configurations, parameters = scenario$parameters, instances = scenario$instances, @@ -77,17 +70,59 @@ race_wrapper <- function(race_state, configurations, instance_idx, bounds, seeds = race_state$instances_log[["seed"]][instance_idx], bounds = bounds) - target_output <- vector("list", length(experiments)) - # Execute commands - if (length(which_exps)) - target_output[which_exps] <- execute_experiments(race_state, experiments[which_exps], scenario) + irace.assert(length(is_exe) == length(experiments)) + instance_idx <- rep(instance_idx, each = nrow(configurations)) + if (race_state$recovery_mode) { + # With targetEvaluator or if everything is executed, we recover everything. + if (!is.null(scenario$targetEvaluator) || all(is_exe)) { + configuration_id <- unlist_element(experiments, "id_configuration") + target_output <- race_state$recover_output(instance_idx, configuration_id) + } else { + irace.assert(any(is_exe)) + configuration_id <- unlist_element(experiments[is_exe], "id_configuration") + target_output <- race_state$recover_output(instance_idx[is_exe], configuration_id) + } + } else { # !recovery_mode + # We cannot let targetRunner or targetEvaluator modify our random seed, so we save it. + withr::local_preserve_seed() + target_output <- vector("list", length(experiments)) + # Execute experiments for which is_exe is TRUE: + if (any(is_exe)) + target_output[is_exe] <- execute_experiments(race_state, experiments[is_exe], scenario) - # targetEvaluator may be NULL. If so, target_output must contain the right - # output already. Otherwise, targetEvaluator always re-evaluates. - if (!is.null(scenario$targetEvaluator)) - target_output <- execute_evaluator(race_state$target_evaluator, experiments, scenario, target_output, - configurations_id = configurations[[".ID."]]) - + # If targetEvaluator is NULL, then target_output must contain the right + # output already. Otherwise, targetEvaluator considers all experiments. + if (!is.null(scenario$targetEvaluator)) { + target_output <- execute_evaluator(race_state$target_evaluator, experiments, scenario, target_output, + configurations_id = configurations[[".ID."]]) + } else if (any(!is_exe)) { + experiments <- experiments[is_exe] + instance_idx <- instance_idx[is_exe] + } + withr::local_options(warn=2) + target_output <- rbindlist(target_output, fill=TRUE, use.names=TRUE) + set(target_output, j = setdiff(colnames(target_output), c("cost", "time")), value = NULL) + if ("time" %notin% colnames(target_output)) + set(target_output, j = "time", value = NA) + set(target_output, j = "configuration", value = unlist_element(experiments, "id_configuration")) + set(target_output, j = "instance", value = instance_idx) + if (!is.null(bounds)) + set(target_output, j = "bound", value = unlist_element(experiments, "bound")) + } + target_output +} + +## Executes a list of configurations in a particular instance +## configurations: description having the id of the configuration +## instance.idx: index of the instance,seed pair in race_state$instances_log +## bounds: execution bounds (NULL if not needed). +## is_exe: Boolean vector that determines which experiments need to executed. +race_wrapper <- function(race_state, configurations, instance_idx, bounds, + is_exe, scenario) +{ + target_output <- race_wrapper_helper(race_state, configurations, instance_idx, + bounds, is_exe, scenario) + race_state$update_race_experiment_log(target_output, scenario) target_output } @@ -542,8 +577,8 @@ generateTimeMatrix <- function(elite_ids, experiment_log) experiment_log <- experiment_log[(configuration %in% elite_ids) & !is.na(time), c("configuration", "instance", "time", "bound")] experiment_log[, time := pmin.int(time, bound)] - time_matrix <- dcast(experiment_log[ , -"bound"], instance ~ configuration, - value.var = "time") + experiment_log[, bound := NULL] + time_matrix <- dcast(experiment_log, instance ~ configuration, value.var = "time") setcolorder(time_matrix, neworder = as.character(elite_ids)) as.matrix(time_matrix, rownames = "instance") } @@ -574,7 +609,6 @@ elitist_race <- function(race_state, maxExp, elitist <- scenario$elitist capping <- scenario$capping n_configurations <- nrow(configurations) - experiment_log <- data.table(instance=integer(0), configuration=integer(0), time=numeric(0), bound=numeric(0)) alive <- rep_len(TRUE, n_configurations) is_rejected <- logical(n_configurations) @@ -679,25 +713,23 @@ elitist_race <- function(race_state, maxExp, # which_exe values are within 1:nbConfigurations, whereas experiments # indices are within 1:length(which_alive). The following line converts # from one to the other. - which_exps = seq_along(which_elites), scenario = scenario) + is_exe = rep_len(TRUE, n_elite), scenario = scenario) # Extract results: # FIXME: check what would happen in case of having the target evaluator # MANUEL: Note how similar is this to what we do in do.experiments(), # perhaps we can create a function that takes output and experiment_log # and returns experiment_log. # LESLIE: Yes you are right, Ill do it once we figure out the rest! - vcost <- unlist(lapply(output, "[[", "cost")) - irace.assert(length(vcost) == n_elite) - vcost <- applyPAR(vcost, boundMax = scenario$boundMax, boundPar = scenario$boundPar) - Results[k, seq_len(n_elite)] <- vcost - vtimes <- unlist(lapply(output, "[[", "time")) + irace.assert(length(output[["cost"]]) == n_elite) + Results[k, seq_len(n_elite)] <- applyPAR(output[["cost"]], boundMax = scenario$boundMax, boundPar = scenario$boundPar) + vtimes <- output[["time"]] irace.assert(length(vtimes) == n_elite) - experimentsTime[k, which_elites] <- vtimes # capping is enabled - experiment_log <- update_experiment_log(experiment_log, - instance=race_instances[k], - configuration = configurations[[".ID."]][which_elites], - time = vtimes, bound = scenario$boundMax) - experiments_used <- experiments_used + n_elite + irace.assert(!anyNA(vtimes)) + experimentsTime[k, which_elites] <- output[["time"]] # capping is enabled + irace.assert(all.equal(configurations[[".ID."]][which_elites], output[["configuration"]])) + irace.assert(all.equal(output[["bound"]], rep(scenario$boundMax, n_elite))) + irace.assert(all.equal(unique(output[["instance"]]), race_instances[k])) + experiments_used <- experiments_used + n_elite # We remove elite configurations that are rejected given that # is not possible to calculate the bounds. @@ -853,22 +885,16 @@ elitist_race <- function(race_state, maxExp, instance_idx = race_instances[current_task], # FIXME: What if we already have a bound for this instance? bounds = rep(scenario$boundMax, length(which_elite_exe)), - # MANUEL: How does this work for target-evaluator? - # We are telling race_wrapper that only some elites are alive! - which_exps = seq_along(which_elite_exe), - scenario = scenario) + is_exe = rep_len(TRUE, length(which_elite_exe)), scenario = scenario) # Extract results - vcost <- unlist(lapply(output, "[[", "cost")) - irace.assert(length(vcost) == length(which_elite_exe)) - vcost <- applyPAR(vcost, boundMax = scenario$boundMax, boundPar = scenario$boundPar) - Results[current_task, which_elite_exe] <- vcost - vtimes <- unlist(lapply(output, "[[", "time")) + irace.assert(length(output[["cost"]]) == length(which_elite_exe)) + Results[current_task, which_elite_exe] <- applyPAR(output[["cost"]], boundMax = scenario$boundMax, boundPar = scenario$boundPar) + vtimes <- output[["time"]] irace.assert(length(vtimes) == length(which_elite_exe)) + irace.assert(!anyNA(vtimes)) + irace.assert(all.equal(configurations[which_elite_exe, ".ID."], output[["configuration"]])) experimentsTime[current_task, which_elite_exe] <- vtimes - experiment_log <- update_experiment_log(experiment_log, - instance = race_instances[current_task], - configuration = configurations[which_elite_exe, ".ID."], - time = vtimes, bound = scenario$boundMax) + irace.assert(all.equal(unique(output[["instance"]]), race_instances[current_task])) experiments_used <- experiments_used + length(which_elite_exe) # We remove elite configurations that are rejected given that @@ -920,7 +946,6 @@ elitist_race <- function(race_state, maxExp, next } } - all_bounds <- final_execution_bound(experimentsTime, elites = which(is_elite > 0L), current_task, which_alive, @@ -936,50 +961,44 @@ elitist_race <- function(race_state, maxExp, output <- race_wrapper(race_state, configurations = configurations[which_alive, , drop = FALSE], instance_idx = race_instances[current_task], bounds = final_bounds, - which_exps = which(which_alive %in% which_exe), + is_exe = which_alive %in% which_exe, scenario = scenario) - # Extract results - vcost <- unlist(lapply(output, "[[", "cost")) # Set max execution bound to timed out executions which have execution # times smaller than boundMax and implement parX if required. + vcost <- output[["cost"]] + # Output is not indexed in the same way as configurations. + which_has_time <- which(which_alive %in% which_exe) if (capping) { vcost <- applyPAR(vcost, boundMax = scenario$boundMax, boundPar = scenario$boundPar) if (scenario$boundAsTimeout) { - timeout_bounds <- if (is.null(scenario$targetEvaluator)) - final_bounds[which(which_alive %in% which_exe)] + timeout_bounds <- if (is.null(scenario$targetEvaluator)) final_bounds[which_has_time] else final_bounds + irace.assert(all.equal(output[["bound"]], timeout_bounds)) # We do not want to change Inf or -Inf because those represent rejection. vcost[is.finite(vcost) & (vcost >= timeout_bounds) & (vcost < scenario$boundMax)] <- scenario$boundMax } - } - ## Currently, targetEvaluator always re-evaluates, which implies that the - ## value may change without counting as an evaluation. We do this to allow online normalization. - which_exps <- if (is.null(scenario$targetEvaluator)) which_exe else which_alive - irace.assert(length(vcost) == length(which_exps)) - Results[current_task, which_exps] <- vcost - # Output is not indexed in the same way as configurations. - which_exps <- which(which_alive %in% which_exe) - irace.assert(length(which_exps) == length(which_exe)) - vtimes <- unlist(lapply(output[which_exps], "[[", "time")) - final_bounds <- final_bounds[which_exps] - irace.assert(length(vtimes) == length(which_exps)) - if (capping) { + # If targetEvaluator was used, we do not update the times because no + # evaluation actually happened, only the cost values possibly changed. + vtimes <- if (is.null(scenario$targetEvaluator)) output[["time"]] else output[["time"]][which_has_time] + irace.assert(length(which_has_time) == length(which_exe)) + irace.assert(length(vtimes) == length(which_has_time)) # Correct higher execution times. - experimentsTime[current_task, which_exps] <- pmin(vtimes, final_bounds) + irace.assert(all.equal(if (is.null(scenario$targetEvaluator)) output[["bound"]] else output[["bound"]][which_has_time], final_bounds[which_has_time])) + experimentsTime[current_task, which_has_time] <- pmin(vtimes, final_bounds[which_has_time]) } - experiment_log <- update_experiment_log(experiment_log, - instance = race_instances[current_task], - configuration = configurations[which_exe, ".ID."], - # FIXME: Do not store a column bounds if bounds are NULL. - time = vtimes, bound = if (is.null(final_bounds)) NA_real_ else final_bounds) + ## Currently, targetEvaluator always re-evaluates, which implies that the + ## value may change without counting as an evaluation. We do this to allow online normalization. + which_has_cost <- if (is.null(scenario$targetEvaluator)) which_exe else which_alive + irace.assert(all.equal(configurations[[".ID."]][which_has_cost], output[["configuration"]])) + irace.assert(length(output[["cost"]]) == length(which_has_cost)) + Results[current_task, which_has_cost] <- vcost - irace.assert(anyDuplicated(experiment_log[, c("instance", "configuration")]) == 0L, - eval_after = { - print(experiment_log) - print(mget(ls())) - }) + # With !is.null(scenario$targetEvaluator) we will have duplicated (instance, configuration) in output. + irace.assert(all.equal(output[["bound"]], if (is.null(scenario$targetEvaluator)) final_bounds[which_has_time] + else final_bounds)) + irace.assert(all.equal(unique(output[["instance"]]), race_instances[current_task])) experiments_used <- experiments_used + length(which_exe) # We update the elites that have been executed. is_elite <- update_is_elite(is_elite, which_elite_exe) @@ -1015,7 +1034,7 @@ elitist_race <- function(race_state, maxExp, # The second condition can be false if we eliminated via immediate # rejection. The third condition ensures that we see the block before capping. if (capping && sum(alive) > minSurvival && (current_task %% blockSize) == 0L) { - irace.assert(!any(is_elite > 0) == (current_task >= elite_safe)) + irace.assert(!any(is_elite > 0L) == (current_task >= elite_safe)) cap.alive <- dom_elim(Results[seq_len(current_task), , drop = FALSE], # Get current elite configurations. elites = which(is_elite > 0L), @@ -1105,8 +1124,8 @@ elitist_race <- function(race_state, maxExp, if (elitist) { # Compute number of statistical tests without eliminations. - irace.assert(!any(is_elite > 0) == (current_task >= elite_safe)) - if (!any(is_elite > 0) + irace.assert(!any(is_elite > 0L) == (current_task >= elite_safe)) + if (!any(is_elite > 0L) && current_task > first.test && (current_task %% each.test) == 0L) { if (length(which_alive) == length(prev_alive)) { no_elimination <- no_elimination + 1L @@ -1167,16 +1186,24 @@ elitist_race <- function(race_state, maxExp, irace.note ("Memory used in race():\n") race_state$print_mem_used() } + withr::local_options(warn=2) + local_experiment_log <- race_state$reset_race_experiment_log() + # nrow(Results) may be smaller, equal or larger than current_task. - irace.assert(nrow(experiment_log) == experiments_used) + if (is.null(scenario$targetEvaluator)) { + # With targetEvaluator, we may have the recorded a new cost value but not + # counted it as an experiment used if targetRunner was not called. + # If this assert fails, use debug-level 3 to trigger the expensive check above. + irace.assert(anyDuplicated(local_experiment_log[, c("instance", "configuration")]) == 0L, + eval_after = { + print(local_experiment_log) + print(mget(ls())) + }) + irace.assert(nrow(local_experiment_log) == experiments_used) + } list(experiments = Results, - experiment_log = experiment_log, + experiment_log = local_experiment_log, experimentsUsed = experiments_used, configurations = configurations) } - -update_experiment_log <- function(experiment_log, instance, configuration, time, bound) - rbind(experiment_log, - data.table(instance = instance, configuration = configuration, time = time, bound = bound)) - diff --git a/R/race_state.R b/R/race_state.R index b45e2ef5..0afcc183 100644 --- a/R/race_state.R +++ b/R/race_state.R @@ -5,22 +5,25 @@ RaceState <- R6Class("RaceState", lock_class = TRUE, completed = "Incomplete", elapsed = 0L, elapsed_recovered = 0L, - elite_configurations = NULL, elitist_new_instances = 0L, experiment_log = NULL, instances_log = NULL, - model = NULL, + minSurvival = NULL, next_instance = -1L, + race_experiment_log = NULL, recovery_info = NULL, + recovery_mode = FALSE, rejected_ids = NULL, rng = NULL, seed = NULL, session_info = NULL, target_evaluator = NULL, target_runner = NULL, + timeUsed = 0, + time_last_save = 0, timer = NULL, # Methods. - initialize = function(scenario, new = TRUE) { + initialize = function(scenario, new = TRUE, recover = FALSE) { self$timer <- Timer$new() self$target_runner <- if (is.function(scenario$targetRunner)) bytecompile(scenario$targetRunner) @@ -34,55 +37,115 @@ RaceState <- R6Class("RaceState", lock_class = TRUE, else target_evaluator_default } - if (is.null(self$experiment_log)) { - self$experiment_log <- data.table(iteration=integer(0), instance=integer(0), - configuration=integer(0), time=numeric(0), bound=numeric(0)) - } - + irace.assert(new || !recover) if (new) { - seed <- scenario$seed - if (is.na(seed)) - seed <- trunc(runif(1, 1, .Machine$integer.max)) - set_random_seed(seed) - self$seed <- seed - self$rng <- get_random_seed() self$elitist_new_instances <- round_to_next_multiple(scenario$elitistNewInstances, scenario$blockSize) - } else { + # We cannot recover if we did not get to initialize self$rng. + if (recover && !is.null(self$rng)) { + restore_random_seed(self$rng) + self$recovery_mode <- TRUE + set(self$experiment_log, j = "iteration", value = NULL) + self$recovery_info <- rbindlist(c(list(self$experiment_log), self$race_experiment_log), use.names = TRUE) + # Reinitialize some state. + self$completed = "Incomplete" + self$elapsed = 0L + self$elapsed_recovered = 0L + self$experiment_log = NULL + self$next_instance = -1L + self$race_experiment_log = NULL + self$rejected_ids = NULL + self$timeUsed = 0 + self$time_last_save = 0 + # Just in case anything is still running. + self$stop_parallel() + } else { + seed <- scenario$seed + if (is.na(seed)) + seed <- trunc(runif(1, 1, .Machine$integer.max)) + set_random_seed(seed) + self$seed <- seed + self$rng <- get_random_seed() + } + } else { # !new self$elapsed_recovered <- self$elapsed restore_random_seed(self$rng) } + + if (is.null(self$experiment_log)) { + self$experiment_log <- data.table(iteration=integer(0), instance=integer(0), + configuration=integer(0), cost = numeric(0), time = numeric(0), + bound = if (is.null(scenario$boundMax)) NULL else numeric(0)) + } + if (scenario$debugLevel >= 3L) { irace.note("RNGkind: ", paste0(self$rng$rng_kind, collapse = " "), "\n", "# .Random.seed: ", paste0(self$rng$random_seed, collapse = ", "), "\n") } - # We do this here it is available even if we crash. + # We do this here, so it is available even if we crash. self$session_info <- sessionInfo() invisible(self) }, update_experiment_log = function(output, instances, configurations_id, scenario, iteration) { + irace.assert(all.equal(unique(output[["configuration"]]), configurations_id)) + irace.assert(all.equal(rep(instances, each = length(configurations_id)), + output$instance)) # Extract results - costs <- unlist(lapply(output, "[[", "cost")) - times <- unlist(lapply(output, "[[", "time")) - if (scenario$capping) - costs <- applyPAR(costs, boundMax = scenario$boundMax, boundPar = scenario$boundPar) + set(output, j = "iteration", value = iteration) + if (!is.null(scenario$boundMax)) + set(output, j = "bound", value = scenario$boundMax) + self$experiment_log <- rbindlist(list(self$experiment_log, output), use.names=TRUE) - self$experiment_log <- rbind(self$experiment_log, - data.table(iteration = iteration, instance = rep(instances, each = length(configurations_id)), - configuration = rep(configurations_id, times = length(instances)), - time = times, bound = if (is.null(scenario$boundMax)) NA else scenario$boundMax)) + cost <- output[["cost"]] + if (scenario$capping) + cost <- applyPAR(cost, boundMax = scenario$boundMax, boundPar = scenario$boundPar) - matrix(costs, nrow = length(instances), ncol = length(configurations_id), + matrix(cost, nrow = length(instances), ncol = length(configurations_id), byrow = TRUE, dimnames = list(instances, as.character(configurations_id))) }, - - save_recovery = function(elite_configurations, model, ...) { - self$time_elapsed() - self$rng <- get_random_seed() - self$elite_configurations <- elite_configurations - self$model <- model - self$recovery_info <- list(...) + + update_race_experiment_log = function(experiment_log, scenario) { + self$race_experiment_log <- c(self$race_experiment_log, list(experiment_log)) + now <- self$timer$wallclock() + # Do not save to disk too frequently. + if (now > self$time_last_save) { + irace.note("Saving recovery info.\n") + iraceResults <- list( + scenario = scenario, + irace_version = irace_version, + state = self) + save_irace_logfile(iraceResults, logfile = scenario$logFile) + self$time_last_save <- now + .irace_minimum_saving_time + } + invisible() + }, + + reset_race_experiment_log = function() { + res <- rbindlist(self$race_experiment_log, use.names=TRUE) + self$race_experiment_log <- NULL + res + }, + + recover_output = function(instance_idx, configuration_id) { + search <- data.table(instance = instance_idx, configuration = configuration_id) + res <- self$recovery_info[search, on = .(instance,configuration), mult="first", nomatch=NULL, which=TRUE] + irace.assert(length(res) == 0L || length(res) == nrow(search)) + if (length(res) == 0L) { + irace.note("Cannot find the following in recovery info:") + print(search[!self$recovery_info, on = .(instance,configuration)]) + irace.error("Recovery terminated.") + } + # Get the rows. + output <- self$recovery_info[res] + # Delete those rows. + self$recovery_info <- self$recovery_info[-res] + if (nrow(self$recovery_info) == 0L) { + irace.note("Recovery completed.\n") + self$recovery_mode <- FALSE + self$recovery_info <- NULL + } + output }, update_rejected = function(rejected_ids, configurations) { @@ -91,19 +154,6 @@ RaceState <- R6Class("RaceState", lock_class = TRUE, configurations[configurations[[".ID."]] %in% rejected_ids, , drop = FALSE] }, - recover = function(scenario) { - self$initialize(scenario, new = FALSE) - restore_random_seed(self$rng) - envir <- parent.frame() - # FIXME: This is a bit annoying, it would be better to keep these within RaceState all the time. - for (name in setdiff(names(formals(self$save_recovery)), "...")) - assign(name, self[[name]], envir = envir) - for (name in names(self$recovery_info)) - assign(name, self$recovery_info[[name]], envir = envir) - self$stop_parallel() - invisible(self) - }, - time_elapsed = function() { self$elapsed <- self$timer$elapsed() + self$elapsed_recovered self$elapsed diff --git a/R/utils.R b/R/utils.R index 1df4f730..17711902 100644 --- a/R/utils.R +++ b/R/utils.R @@ -576,8 +576,6 @@ has_testing_data <- function(iraceResults) do_nothing <- function(...) invisible() -.irace_tolerance <- sqrt(.Machine$double.eps) - seq_nrow <- function(x) seq_len(nrow(x)) clamp <- function(x, lower, upper) pmax.int(lower, pmin.int(x, upper)) @@ -606,3 +604,7 @@ vlast <- function(x) # between machines. runif_integer <- function(size) sample.int(min(2147483647L, .Machine$integer.max), size = size, replace = TRUE) + +unlist_element <- function(x, element) + unlist(lapply(x, "[[", element, exact=TRUE), recursive=FALSE, use.names=FALSE) + diff --git a/inst/exdata/sann.rda b/inst/exdata/sann.rda index e9838b8b..0d8acdab 100644 Binary files a/inst/exdata/sann.rda and b/inst/exdata/sann.rda differ diff --git a/tests/testthat/helper-common.R b/tests/testthat/helper-common.R index ad53b142..e1699435 100644 --- a/tests/testthat/helper-common.R +++ b/tests/testthat/helper-common.R @@ -1,7 +1,7 @@ # This file is loaded automatically by testthat. generate_set_seed <- function() { - seed <- sample(2^30, 1) + seed <- sample.int(min(2147483647L, .Machine$integer.max), size = 1L, replace = TRUE) cat("Seed: ", seed, "\n") set.seed(seed) } @@ -117,6 +117,7 @@ target_runner_capping_xy <- function(experiment, scenario) x <- configuration[["x"]] y <- configuration[["y"]] + value <- switch(instance, ackley = f_ackley(x, y), goldestein = f_goldestein_price(x, y), @@ -127,8 +128,14 @@ target_runner_capping_xy <- function(experiment, scenario) list(cost = value, time=min(value + 0.1, bound), call = toString(experiment)) } -irace_capping_xy <- function(...) +irace_capping_xy <- function(..., targetRunner = force(target_runner_capping_xy)) { + # Silence Error in `save(iraceResults, file = logfile, version = 3L)`: (converted from warning) 'package:irace' may not be available when loading + # See https://github.com/r-lib/testthat/issues/2044 + if (!is.null(attr(environment(targetRunner), "name", exact=TRUE))) { + environment(targetRunner) <- globalenv() + } + args <- list(...) parameters_table <- ' x "" r (0, 1.00) @@ -138,7 +145,7 @@ irace_capping_xy <- function(...) parameters <- readParameters(text = parameters_table) logFile <- withr::local_tempfile(fileext=".Rdata") scenario <- list(instances = c("ackley", "goldestein", "matyas", "himmelblau"), - targetRunner = target_runner_capping_xy, + targetRunner = targetRunner, capping = TRUE, boundMax = 80, testType = "t-test", @@ -148,10 +155,25 @@ irace_capping_xy <- function(...) scenario <- modifyList(scenario, args) scenario <- checkScenario (scenario) - irace:::checkTargetFiles(scenario = scenario) + expect_true(irace:::checkTargetFiles(scenario = scenario)) confs <- irace(scenario = scenario) best_conf <- getFinalElites(scenario$logFile, n = 1L, drop.metadata = TRUE) expect_identical(removeConfigurationsMetaData(confs[1L, , drop = FALSE]), best_conf) } + +# Useful for testing recovery. +get_target_runner_error <- function(target_runner, limit) +{ + counter <- force(limit) + targetRunner <- force(target_runner) + + function(experiment, scenario) { + counter <<- counter - 1L + if (counter <= 0L) + return(list(cost=NA)) + targetRunner(experiment, scenario) + } +} + diff --git a/tests/testthat/test-blocksize.R b/tests/testthat/test-blocksize.R index cbb587b6..8554a1c9 100644 --- a/tests/testthat/test-blocksize.R +++ b/tests/testthat/test-blocksize.R @@ -1,6 +1,12 @@ withr::with_output_sink("test-blocksize.Rout", { -cap_irace <- function(...) +cap_irace <- function(..., targetRunner = force(target_runner_capping_xy)) { + # Silence Error in `save(iraceResults, file = logfile, version = 3L)`: (converted from warning) 'package:irace' may not be available when loading + # See https://github.com/r-lib/testthat/issues/2044 + if (!is.null(attr(environment(targetRunner), "name", exact=TRUE))) { + environment(targetRunner) <- globalenv() + } + args <- list(...) parameters_table <- ' x "" r (0, 1.00) @@ -10,7 +16,7 @@ cap_irace <- function(...) logFile <- withr::local_tempfile(fileext=".Rdata") scenario <- list(instances = c("ackley", "goldestein", "matyas", "himmelblau"), - targetRunner = target_runner_capping_xy, + targetRunner = targetRunner, capping = TRUE, blockSize = 4, boundMax = 80, diff --git a/tests/testthat/test-maxTime.R b/tests/testthat/test-maxTime.R index fd5f524a..e486ab6d 100644 --- a/tests/testthat/test-maxTime.R +++ b/tests/testthat/test-maxTime.R @@ -5,31 +5,27 @@ target_runner <- function(experiment, scenario) configuration <- experiment$configuration tmax <- configuration[["tmax"]] temp <- configuration[["temp"]] - stopifnot(is.numeric(tmax)) - stopifnot(is.numeric(temp)) time <- max(1, abs(rnorm(1, mean=(tmax+temp)/10))) - list(cost = time, time = time, call = toString(experiment)) + list(cost = time, time = time) } time_irace <- function(...) { args <- list(...) - weights <- rnorm(200, mean = 0.9, sd = 0.02) - test_weights <- rnorm(2, mean = 0.9, sd = 0.02) parameters <- readParameters(text = ' tmax "" i (1, 50) temp "" r (0, 10) dummy "" c ("dummy") ') scenario <- list(targetRunner = target_runner, - instances = weights, - testInstances = test_weights, + instances = 1:10, + testInstances = 11:20, seed = 1234567, parameters = parameters) scenario <- modifyList(scenario, args) scenario <- checkScenario (scenario) - irace:::checkTargetFiles(scenario = scenario) + expect_true(irace:::checkTargetFiles(scenario = scenario)) confs <- irace(scenario = scenario) final_ids <- sort(as.character(confs$.ID.[1:scenario$testNbElites])) diff --git a/tests/testthat/test-recovery.R b/tests/testthat/test-recovery.R new file mode 100644 index 00000000..6a61d8c4 --- /dev/null +++ b/tests/testthat/test-recovery.R @@ -0,0 +1,102 @@ +withr::with_output_sink("test-recovery.Rout", { + +test_that("recovery works", { + +target_runner_xy <- function(experiment, scenario) +{ + configuration <- experiment$configuration + instance <- experiment$instance + + x <- configuration[["x"]] + y <- configuration[["y"]] + value <- switch(instance, + ackley = f_ackley(x, y), + goldestein = f_goldestein_price(x, y), + matyas = f_matyas(x, y), + himmelblau = f_himmelblau(x, y)) + list(cost = value) +} + parameters_table <- ' + x "" r (0, 1.00) + y "" r (0, 1.00) + ' + + parameters <- readParameters(text = parameters_table) + logFile <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + + seed <- sample.int(min(2147483647L, .Machine$integer.max), size = 1, replace = TRUE) + + scenario <- list( + instances = c("ackley", "goldestein", "matyas", "himmelblau"), + parameters = parameters, + targetRunner = target_runner_xy, + logFile = logFile, + seed = seed, + maxExperiments = 500L) + + confs <- irace(scenario = scenario) + + scenario$targetRunner <- get_target_runner_error(target_runner_xy, 200L) + parent.env(environment(scenario$targetRunner)) <- globalenv() + # Otherwise, the tests are too fast. + with_mocked_bindings({ + expect_error(irace(scenario = scenario), "== irace == The cost returned by targetRunner is not numeric") + }, + .irace_minimum_saving_time = 0 + ) + + + logFile_new <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + scenario <- modifyList(scenario, list( + targetRunner = target_runner_xy, + recoveryFile = logFile, + seed = NA, + logFile = logFile_new)) + recover_confs <- irace(scenario = scenario) + expect_identical(confs, recover_confs) +}) + +test_that("recovery maxTime", { + target_runner <- function(experiment, scenario) { + configuration <- experiment$configuration + tmax <- configuration[["tmax"]] + temp <- configuration[["temp"]] + time <- max(1, abs(rnorm(1, mean=(tmax+temp)/10))) + list(cost = time, time = time) + } + parameters <- readParameters(text = ' + tmax "" i (1, 50) + temp "" r (0, 10) + ') + logFile <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + seed <- 1234567 + + scenario <- list(targetRunner = target_runner, + instances = 1:10, + seed = seed, + maxTime = 500, + logFile = logFile, + parameters = parameters) + + confs <- irace(scenario = scenario) + + scenario$targetRunner <- get_target_runner_error(target_runner, 200L) + parent.env(environment(scenario$targetRunner)) <- globalenv() + # Otherwise, the tests are too fast. + with_mocked_bindings({ + expect_error(irace(scenario = scenario), "== irace == The cost returned by targetRunner is not numeric") + }, + .irace_minimum_saving_time = 0 + ) + + logFile_new <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + scenario <- modifyList(scenario, list( + targetRunner = target_runner, + recoveryFile = logFile, + seed = NA, + logFile = logFile_new)) + recover_confs <- irace(scenario = scenario) + expect_identical(confs, recover_confs) +}) + +}) # withr::with_output_sink() diff --git a/tests/testthat/test-targeteval.R b/tests/testthat/test-targeteval.R index 76936a94..876f36f4 100644 --- a/tests/testthat/test-targeteval.R +++ b/tests/testthat/test-targeteval.R @@ -1,27 +1,99 @@ withr::with_output_sink("test-targeteval.Rout", { -test_that("target.evaluator", { + target_evaluator <- function(experiment, num_configurations, all_conf_id, + scenario, target_runner_call) { + withr::local_seed(experiment$seed) + list(cost = runif(1), call = deparse1(experiment)) + } -target_runner <- function(experiment, scenario) - list(call = toString(experiment)) - -target_evaluator <- function(experiment, num_configurations, all_conf_id, - scenario, target_runner_call) - list(cost = runif(1), call = toString(experiment)) - -parameters <- readParameters(text = ' + parameters <- readParameters(text = ' algorithm "--" c (as,mmas,eas,ras,acs) ') - generate_set_seed() - scenario <- checkScenario(list( +test_that("target_evaluator", { + + target_runner <- function(experiment, scenario) { + list(call = deparse1(experiment)) + } + seed <- sample.int(min(2147483647L, .Machine$integer.max), size = 1L, replace = TRUE) + instances <- 1:10 + logFile <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + + scenario <- checkScenario(list( + targetRunner = target_runner, targetEvaluator = target_evaluator, + maxExperiments = 200, instances = instances, + logFile = logFile, + seed = seed, + parameters = parameters)) + + expect_true(irace:::checkTargetFiles(scenario = scenario)) + + scenario <- checkScenario(list( + targetRunner = target_runner, targetEvaluator = target_evaluator, + maxExperiments = 200, instances = instances, + logFile = logFile, + seed = seed, + parameters = parameters)) + + confs <- irace(scenario = scenario) + expect_gt(nrow(confs), 0L) + + scenario$targetRunner <- get_target_runner_error(target_runner, 50L) + parent.env(environment(scenario$targetRunner)) <- globalenv() + # Otherwise, the tests are too fast. + with_mocked_bindings({ + expect_error(irace(scenario = scenario), "== irace == The cost returned by targetRunner is not numeric") + }, + .irace_minimum_saving_time = 0 + ) + + logFile_new <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + scenario <- modifyList(scenario, list( + targetRunner = target_runner, + recoveryFile = logFile, + seed = NA, + logFile = logFile_new)) + recover_confs <- irace(scenario = scenario) + expect_identical(confs, recover_confs) +}) + +test_that("target_evaluator maxTime", { + + target_runner <- function(experiment, scenario) { + withr::local_seed(experiment$seed) + list(time = min(experiment$bound, as.integer(1 + 10*runif(1)))) + } + + seed <- sample.int(min(2147483647L, .Machine$integer.max), size = 1L, replace = TRUE) + instances <- 1:10 + logFile <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + scenario <- checkScenario(list( targetRunner = target_runner, targetEvaluator = target_evaluator, - maxExperiments = 200, instances = runif(100), + maxTime = 2000, boundMax = 10, instances = instances, + logFile = logFile, seed = seed, parameters = parameters)) - irace:::checkTargetFiles(scenario = scenario) - confs <- irace(scenario = scenario) - expect_gt(nrow(confs), 0L) -}) + expect_true(scenario$capping) + confs <- irace(scenario = scenario) + expect_gt(nrow(confs), 0L) + scenario$targetRunner <- get_target_runner_error(target_runner, 100L) + parent.env(environment(scenario$targetRunner)) <- globalenv() + # Otherwise, the tests are too fast. + with_mocked_bindings({ + expect_error(irace(scenario = scenario), "== irace == The cost returned by targetRunner is not numeric") + }, + .irace_minimum_saving_time = 0 + ) + + logFile_new <- withr::local_tempfile(pattern = "irace", fileext = ".Rdata") + scenario <- modifyList(scenario, list( + targetRunner = target_runner, + recoveryFile = logFile, + seed = NA, + logFile = logFile_new)) + recover_confs <- irace(scenario = scenario) + expect_identical(confs, recover_confs) + +}) }) # withr::with_output_sink()