From 278fc510a2adb237cd8983dd2ec112e83e6a7d12 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Tue, 12 Dec 2023 12:39:45 -0500 Subject: [PATCH] i#5843 scheduler: Accumulate quanta across runs (#6502) Changes the quanta accounting to match the real kernel by accumulating it across executions if a prior execution was terminated early due to a voluntary context switch. Adds new testing, and updates old tests with the behavior change. Scheduler unit test string changes were carefully vetted. E.g., for test_synthetic_with_syscalls_multiple(): the output strings changed because H's quantum accumulates and it hits a preempt in the middle of its second HH sequence, which decrements B's quantum, causing B to become available sooner. Issue: #5843 --- clients/drcachesim/scheduler/scheduler.cpp | 40 ++++++++++++----- clients/drcachesim/scheduler/scheduler.h | 3 +- .../drcachesim/tests/scheduler_unit_tests.cpp | 45 ++++++++++++++----- 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 344e0e313a1..393d42a40a9 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -1724,8 +1724,7 @@ scheduler_tmpl_t::set_cur_input(output_ordinal_t output, if (prev_input == input) return STATUS_OK; std::lock_guard lock(*inputs_[input].lock); - inputs_[input].instrs_in_quantum = 0; - inputs_[input].start_time_in_quantum = outputs_[output].cur_time; + inputs_[input].prev_time_in_quantum = outputs_[output].cur_time; if (options_.schedule_record_ostream != nullptr) { uint64_t instr_ord = inputs_[input].reader->get_instruction_ordinal(); if (!inputs_[input].recorded_in_schedule && instr_ord == 1) { @@ -2104,8 +2103,8 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, auto lock = std::unique_lock(*input->lock); // Since we do not ask for a start time, we have to check for the first record from // each input and set the time here. - if (input->start_time_in_quantum == 0) - input->start_time_in_quantum = cur_time; + if (input->prev_time_in_quantum == 0) + input->prev_time_in_quantum = cur_time; if (!outputs_[output].speculation_stack.empty()) { outputs_[output].prev_speculate_pc = outputs_[output].speculate_pc; error_string_ = outputs_[output].speculator.next_record( @@ -2170,7 +2169,9 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, input->index, input->reader->get_instruction_ordinal()); VDO(this, 5, print_record(record);); bool need_new_input = false; + bool preempt = false; double block_time_factor = 0.; + uint64_t prev_time_in_quantum = 0; if (options_.mapping == MAP_AS_PREVIOUSLY) { assert(outputs_[output].record_index >= 0); if (outputs_[output].record_index >= @@ -2275,28 +2276,32 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, VPRINT(this, 4, "next_record[%d]: input %d hit end of instr quantum\n", output, input->index); + preempt = !need_new_input; need_new_input = true; + input->instrs_in_quantum = 0; } } else if (options_.quantum_unit == QUANTUM_TIME) { - if (cur_time == 0 || cur_time < input->start_time_in_quantum) { + if (cur_time == 0 || cur_time < input->prev_time_in_quantum) { VPRINT(this, 1, "next_record[%d]: invalid time %" PRIu64 " vs start %" PRIu64 "\n", - output, cur_time, input->start_time_in_quantum); + output, cur_time, input->prev_time_in_quantum); return sched_type_t::STATUS_INVALID; } - if (cur_time - input->start_time_in_quantum >= - options_.quantum_duration && + input->time_spent_in_quantum += cur_time - input->prev_time_in_quantum; + prev_time_in_quantum = input->prev_time_in_quantum; + input->prev_time_in_quantum = cur_time; + if (input->time_spent_in_quantum >= options_.quantum_duration && // We only switch on instruction boundaries. We could possibly switch // in between (e.g., scatter/gather long sequence of reads/writes) by // setting input->switching_pre_instruction. record_type_is_instr(record)) { VPRINT(this, 4, - "next_record[%d]: hit end of time quantum after %" PRIu64 - " (%" PRIu64 " - %" PRIu64 ")\n", - output, cur_time - input->start_time_in_quantum, cur_time, - input->start_time_in_quantum); + "next_record[%d]: hit end of time quantum after %" PRIu64 "\n", + output, input->time_spent_in_quantum); + preempt = !need_new_input; need_new_input = true; + input->time_spent_in_quantum = 0; } } } @@ -2335,6 +2340,15 @@ scheduler_tmpl_t::next_record(output_ordinal_t output, lock.lock(); VPRINT(this, 5, "next_record_mid[%d]: switching from %d to %d\n", output, prev_input, outputs_[output].cur_input); + if (!preempt) { + if (options_.quantum_unit == QUANTUM_INSTRUCTIONS && + record_type_is_instr(record)) { + --inputs_[prev_input].instrs_in_quantum; + } else if (options_.quantum_unit == QUANTUM_TIME) { + inputs_[prev_input].time_spent_in_quantum -= + (cur_time - prev_time_in_quantum); + } + } if (res == sched_type_t::STATUS_WAIT) return res; input = &inputs_[outputs_[output].cur_input]; @@ -2392,6 +2406,8 @@ scheduler_tmpl_t::unread_last_record(output_ordinal_t ou record = outinfo.last_record; input = &inputs_[outinfo.cur_input]; std::lock_guard lock(*input->lock); + VPRINT(this, 4, "next_record[%d]: unreading last record, from %d\n", output, + input->index); input->queue.push_back(outinfo.last_record); if (options_.quantum_unit == QUANTUM_INSTRUCTIONS && record_type_is_instr(record)) --input->instrs_in_quantum; diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 490afeecc6b..ef218a9d350 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -1015,7 +1015,8 @@ template class scheduler_tmpl_t { // Used to switch before we've read the next instruction. bool switching_pre_instruction = false; // Used for time-based quanta. - uint64_t start_time_in_quantum = 0; + uint64_t prev_time_in_quantum = 0; + uint64_t time_spent_in_quantum = 0; // These fields model waiting at a blocking syscall. double block_time_factor = 0.; uint64_t blocked_start_time = 0; // For QUANTUM_TIME only. diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 445785772a1..d41d2119798 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -898,22 +898,38 @@ test_synthetic() static constexpr int NUM_OUTPUTS = 2; static constexpr int NUM_INSTRS = 9; static constexpr int QUANTUM_DURATION = 3; + static constexpr double BLOCK_SCALE = 0.1; static constexpr memref_tid_t TID_BASE = 100; std::vector inputs[NUM_INPUTS]; for (int i = 0; i < NUM_INPUTS; i++) { memref_tid_t tid = TID_BASE + i; inputs[i].push_back(make_thread(tid)); inputs[i].push_back(make_pid(1)); - for (int j = 0; j < NUM_INSTRS; j++) + inputs[i].push_back(make_version(TRACE_ENTRY_VERSION)); + inputs[i].push_back(make_timestamp(10)); // All the same time priority. + for (int j = 0; j < NUM_INSTRS; j++) { inputs[i].push_back(make_instr(42 + j * 4)); + // Test accumulation of usage across voluntary switches. + if ((i == 0 || i == 1) && j == 1) { + inputs[i].push_back(make_timestamp(20)); + inputs[i].push_back(make_marker(TRACE_MARKER_TYPE_SYSCALL, 42)); + inputs[i].push_back( + make_marker(TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0)); + inputs[i].push_back(make_timestamp(120)); + } + } inputs[i].push_back(make_exit(tid)); } // Hardcoding here for the 2 outputs and 7 inputs. // We expect 3 letter sequences (our quantum) alternating every-other as each - // core alternates; with an odd number the 2nd core finishes early. - // The dots are thread exits. - static const char *const CORE0_SCHED_STRING = "AAACCCEEEGGGBBBDDDFFFAAA.CCC.EEE.GGG."; - static const char *const CORE1_SCHED_STRING = "BBBDDDFFFAAACCCEEEGGGBBB.DDD.FFF.____"; + // core alternates. The dots are markers and thread exits. + // A and B have a voluntary switch after their 1st 2 letters, but we expect + // the usage to persist to their next scheduling which should only have + // a single letter. + static const char *const CORE0_SCHED_STRING = + "..AA......CCC..EEE..GGGEEEABGGGDDD.AAABBBAAA.___"; + static const char *const CORE1_SCHED_STRING = + "..BB......DDD..FFFCCCDDDFFFCCC.EEE.FFF.GGG.BBB."; { // Test instruction quanta. std::vector sched_inputs; @@ -929,6 +945,8 @@ test_synthetic() scheduler_t::SCHEDULER_DEFAULTS, /*verbosity=*/3); sched_ops.quantum_duration = QUANTUM_DURATION; + // We do not want to block for very long. + sched_ops.block_time_scale = BLOCK_SCALE; scheduler_t scheduler; if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != scheduler_t::STATUS_SUCCESS) @@ -957,6 +975,8 @@ test_synthetic() /*verbosity=*/3); sched_ops.quantum_unit = scheduler_t::QUANTUM_TIME; sched_ops.quantum_duration = QUANTUM_DURATION; + // QUANTUM_INSTRUCTIONS divides by the threshold so to match we multiply. + sched_ops.block_time_scale = sched_ops.blocking_switch_threshold * BLOCK_SCALE; scheduler_t scheduler; if (scheduler.init(sched_inputs, NUM_OUTPUTS, sched_ops) != scheduler_t::STATUS_SUCCESS) @@ -1530,9 +1550,9 @@ test_synthetic_with_syscalls_multiple() // with the "." in run_lockstep_simulation(). The omitted "." markers also // explains why the two strings are different lengths. assert(sched_as_string[0] == - "BHHHFFFJJJJJJJBEEHHHIIIFFFAAAHHHBAAAGGGAAABGGG__B___B___B"); + "BHHHFFFJJJJJJJBEEHHHIIIBIIIEEDDDBAAAEEGGGBDDD___B___B___B___B"); assert(sched_as_string[1] == - "EECCCIIICCCJJFFFCCCBIIIEEDDDGGGDDDEEDDD____EB__________________________"); + "EECCCIIICCCJJFFFCCCFFFAAAHHHGGGDDDAAAGGGE__________________________"); } static void @@ -1900,9 +1920,10 @@ test_synthetic_with_syscalls_idle() // The timestamps provide the ABCD ordering, but A's blocking syscall after its // 2nd instr makes it delayed for 3 full queue cycles of BCD BCD: A's duration // of 2 is decremented after the 1st (to 1) and 2nd (to 0) and A is finally - // schedulable after the 3rd. + // schedulable after the 3rd, when it just gets 1 instruction in before its + // (accumulated) count equals the quantum. assert(sched_as_string[0] == - "..AA......BB.B..CC.C..DD.DBBBCCCDDDBBBCCCDDDAAABBB.CCC.DDD.AAAAAAA."); + "..AA......BB.B..CC.C..DD.DBBBCCCDDDBBBCCCDDDABBB.CCC.DDD.AAAAAAAAA."); } static void @@ -3285,7 +3306,6 @@ test_inactive() // Ensure cpu0 now picks up the input that was on cpu1. // This is also the record we un-read earlier. check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); - check_next(stream0, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); // End of quantum. check_next(stream0, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_INSTR); // Make cpu0 inactive and cpu1 active. @@ -3296,6 +3316,7 @@ test_inactive() assert(status == scheduler_t::STATUS_OK); // Now cpu1 should finish things. check_next(stream1, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); + check_next(stream1, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_INSTR); check_next(stream1, scheduler_t::STATUS_OK, TID_B, TRACE_TYPE_THREAD_EXIT); check_next(stream1, scheduler_t::STATUS_OK, TID_A, TRACE_TYPE_THREAD_EXIT); check_next(stream1, scheduler_t::STATUS_EOF); @@ -3326,8 +3347,8 @@ test_inactive() for (int i = 0; i < NUM_OUTPUTS; i++) { std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; } - assert(sched_as_string[0] == "..AABBA._"); - assert(sched_as_string[1] == "..B---B."); + assert(sched_as_string[0] == "..AABA.__"); + assert(sched_as_string[1] == "..B--BB."); } #endif // HAS_ZIP }