diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 73ae84bd76b..0ebbe89480c 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -944,47 +944,26 @@ scheduler_tmpl_t::set_initial_schedule( inputs_[input_idx].order_by_timestamp = true; } } - // Pick the starting inputs by sorting by relative time from each workload's - // base_timestamp, which our queue does for us. We want the rest of the - // inputs in the queue in any case so it is simplest to insert all and - // remove the first N rather than sorting the first N separately. - for (int i = 0; i < static_cast(inputs_.size()); ++i) { - add_to_ready_queue(&inputs_[i]); - } - for (int i = 0; i < static_cast(outputs_.size()); ++i) { - if (i < static_cast(inputs_.size())) { - input_info_t *queue_next; + // We'll pick the starting inputs below by sorting by relative time from + // each workload's base_timestamp, which our queue does for us. + } + // We need to honor output bindings and possibly time ordering, which our queue + // does for us. We want the rest of the inputs in the queue in any case so it is + // simplest to insert all and remove the first N. + for (int i = 0; i < static_cast(inputs_.size()); ++i) { + add_to_ready_queue(&inputs_[i]); + } + for (int i = 0; i < static_cast(outputs_.size()); ++i) { + input_info_t *queue_next; #ifndef NDEBUG - sched_type_t::stream_status_t status = + sched_type_t::stream_status_t status = #endif - pop_from_ready_queue(i, queue_next); - assert(status == STATUS_OK); // No blocked inputs yet. - if (queue_next == nullptr) - set_cur_input(i, INVALID_INPUT_ORDINAL); - else - set_cur_input(i, queue_next->index); - } else - set_cur_input(i, INVALID_INPUT_ORDINAL); - } - } else { - // Just take the 1st #outputs of schedulable (i.e., not "unscheduled") - // inputs (even if all from the same workload). - input_ordinal_t input = 0; - for (int i = 0; i < static_cast(outputs_.size()); ++i) { - while (input < static_cast(inputs_.size()) && - inputs_[input].unscheduled) { - add_to_ready_queue(&inputs_[input]); - ++input; - } - if (input < static_cast(inputs_.size())) - set_cur_input(i, input); - else - set_cur_input(i, INVALID_INPUT_ORDINAL); - ++input; - } - for (int i = input; i < static_cast(inputs_.size()); ++i) { - add_to_ready_queue(&inputs_[i]); - } + pop_from_ready_queue(i, queue_next); + assert(status == STATUS_OK || status == STATUS_IDLE); + if (queue_next == nullptr) + set_cur_input(i, INVALID_INPUT_ORDINAL); + else + set_cur_input(i, queue_next->index); } } return STATUS_SUCCESS; diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 2ec36c8f5ad..5611b664180 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -217,6 +217,12 @@ template class scheduler_tmpl_t { : output_binding(output_binding) { } + /** Convenience constructor for placing one thread on a set of cores. */ + input_thread_info_t(memref_tid_t tid, std::set output_binding) + : tids(1, tid) + , output_binding(output_binding) + { + } /** Size of the struct for binary-compatible additions. */ size_t struct_size = sizeof(input_thread_info_t); /** diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 056e34d7652..b49103559e5 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -1357,9 +1357,10 @@ test_synthetic_with_priorities() } static void -test_synthetic_with_bindings() +test_synthetic_with_bindings_time(bool time_deps) { - std::cerr << "\n----------------\nTesting synthetic with bindings\n"; + std::cerr << "\n----------------\nTesting synthetic with bindings (deps=" << time_deps + << ")\n"; static constexpr int NUM_WORKLOADS = 3; static constexpr int NUM_INPUTS_PER_WORKLOAD = 3; static constexpr int NUM_OUTPUTS = 5; @@ -1401,11 +1402,14 @@ test_synthetic_with_bindings() } sched_inputs.back().thread_modifiers.emplace_back(cores); } - - scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, - scheduler_t::DEPENDENCY_TIMESTAMPS, - scheduler_t::SCHEDULER_DEFAULTS, - /*verbosity=*/3); + scheduler_t::scheduler_options_t sched_ops( + scheduler_t::MAP_TO_ANY_OUTPUT, + // We expect the same output with time deps. We include it as a regression + // test for i#6874 which caused threads to start out on cores not on their + // binding lists, which fails the schedule string checks below. + time_deps ? scheduler_t::DEPENDENCY_TIMESTAMPS : scheduler_t::DEPENDENCY_IGNORE, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/3); sched_ops.quantum_duration = 3; scheduler_t scheduler; if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != @@ -1424,6 +1428,59 @@ test_synthetic_with_bindings() assert(sched_as_string[4] == ".BB.BA.AA.B.BB.AA.A.BB.B._____"); } +static void +test_synthetic_with_bindings_more_out() +{ + std::cerr << "\n----------------\nTesting synthetic with bindings and #out>#in\n"; + static constexpr int NUM_INPUTS = 3; + static constexpr int NUM_OUTPUTS = 4; + static constexpr int NUM_INSTRS = 9; + static constexpr memref_tid_t TID_BASE = 100; + std::vector sched_inputs; + for (int input_idx = 0; input_idx < NUM_INPUTS; input_idx++) { + std::vector readers; + memref_tid_t tid = TID_BASE + input_idx; + std::vector inputs; + inputs.push_back(make_thread(tid)); + inputs.push_back(make_pid(1)); + inputs.push_back(make_timestamp(10 + input_idx)); + for (int instr_idx = 0; instr_idx < NUM_INSTRS; instr_idx++) { + inputs.push_back(make_instr(42 + instr_idx * 4)); + } + inputs.push_back(make_exit(tid)); + readers.emplace_back(std::unique_ptr(new mock_reader_t(inputs)), + std::unique_ptr(new mock_reader_t()), tid); + sched_inputs.emplace_back(std::move(readers)); + // Bind the 1st 2 inputs to the same core to ensure the 3rd + // input gets scheduled even after an initially-unscheduled input. + if (input_idx < 2) { + std::set cores; + cores.insert(0); + scheduler_t::input_thread_info_t info(tid, cores); + sched_inputs.back().thread_modifiers.emplace_back(info); + } + } + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_IGNORE, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/3); + sched_ops.quantum_duration = 3; + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + std::vector sched_as_string = + run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE); + for (int i = 0; i < NUM_OUTPUTS; i++) { + std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; + } + // We have {A,B} on 0 and C anywhere. + assert(sched_as_string[0] == ".AAA.BBBAAABBBAAA.BBB."); + assert(sched_as_string[1] == ".CCCCCCCCC.___________"); + assert(sched_as_string[2] == "______________________"); + assert(sched_as_string[3] == "______________________"); +} + static void test_synthetic_with_bindings_weighted() { @@ -1495,6 +1552,15 @@ test_synthetic_with_bindings_weighted() assert(sched_as_string[4] == ".BB.BB.BB.BB.B._______________"); } +static void +test_synthetic_with_bindings() +{ + test_synthetic_with_bindings_time(/*time_deps=*/true); + test_synthetic_with_bindings_time(/*time_deps=*/false); + test_synthetic_with_bindings_more_out(); + test_synthetic_with_bindings_weighted(); +} + static void test_synthetic_with_syscalls_multiple() { @@ -5033,7 +5099,6 @@ test_main(int argc, const char *argv[]) test_synthetic_with_timestamps(); test_synthetic_with_priorities(); test_synthetic_with_bindings(); - test_synthetic_with_bindings_weighted(); test_synthetic_with_syscalls(); test_synthetic_multi_threaded(argv[1]); test_speculation();