diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 9cffd946533..166c5f32055 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -588,9 +588,11 @@ typedef enum { * core has no available inputs to run (all inputs are on other cores or are * blocked waiting for kernel resources). A new marker is emitted each * time the tool analysis framework requests a new record from the scheduler and - * is given a wait status. There are no units of time here but each repetition + * is given an idle status. There are no units of time here but each repetition * is roughly the time where a regular record could have been read and passed - * along. + * along. This idle marker indicates that a core actually had no work to do, + * as opposed to #TRACE_MARKER_TYPE_CORE_WAIT which is an artifact of an + * imposed re-created schedule. */ TRACE_MARKER_TYPE_CORE_IDLE, diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index a4109158902..e9494f44a80 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -1683,6 +1683,11 @@ scheduler_tmpl_t::pick_next_input_as_previously( // XXX i#5843: We may want to provide a kernel-mediated wait // feature so a multi-threaded simulator doesn't have to do a // spinning poll loop. + // XXX i#5843: For replaying a schedule as it was traced with + // MAP_TO_RECORDED_OUTPUT there may have been true idle periods during + // tracing where some other process than the traced workload was + // scheduled on a core. If we could identify those, we should return + // STATUS_IDLE rather than STATUS_WAIT. VPRINT(this, 3, "next_record[%d]: waiting for input %d instr #%" PRId64 "\n", output, index, segment.start_instruction); // Give up this input and go into a wait state. diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index e9f999117bb..bf5d1808921 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -110,12 +110,12 @@ template class scheduler_tmpl_t { * For dynamic scheduling with cross-stream dependencies, the scheduler may pause * a stream if it gets ahead of another stream it should have a dependence on. * This value is also used for schedules following the recorded timestamps - * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another. For - * replaying a schedule as it was traced with #MAP_TO_RECORDED_OUTPUT this can - * indicate an idle period on a core where the traced workload was not currently - * scheduled, but generally #STATUS_WAIT should be treated as artificial. + * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another. + * #STATUS_WAIT should be treated as artificial, an artifact of enforcing a + * recorded schedule on concurrent differently-timed output streams. * Simulators are suggested to not advance simulated time for #STATUS_WAIT while - * they should advance time for #STATUS_IDLE. + * they should advance time for #STATUS_IDLE as the latter indicates a true + * lack of work. */ STATUS_WAIT, STATUS_INVALID, /**< Error condition. */ diff --git a/clients/drcachesim/tests/scheduler_launcher.cpp b/clients/drcachesim/tests/scheduler_launcher.cpp index 814a0b703fa..b0cac30df6e 100644 --- a/clients/drcachesim/tests/scheduler_launcher.cpp +++ b/clients/drcachesim/tests/scheduler_launcher.cpp @@ -152,11 +152,15 @@ void simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &scheduler, std::string &thread_sequence) { + // XXX: Could we share some code with the schedule_stats analysis tool? + // Some features are now duplicated in both. + static constexpr char THREAD_LETTER_INITIAL_START = 'A'; + static constexpr char THREAD_LETTER_SUBSEQUENT_START = 'a'; + static constexpr char WAIT_SYMBOL = '-'; + static constexpr char IDLE_SYMBOL = '_'; memref_t record; uint64_t micros = op_sched_time.get_value() ? get_current_microseconds() : 0; uint64_t cur_segment_instrs = 0; - // XXX: Should we remove this tool as the schedule_stats analysis tool has - // superceded it? We're basically duplicating new features in both. bool prev_was_wait = false, prev_was_idle = false; // Measure cpu usage by counting each next_record() as one cycle. uint64_t cycles_total = 0, cycles_busy = 0; @@ -168,9 +172,14 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch if (op_sched_time.get_value()) micros = get_current_microseconds(); ++cycles_total; + // Cache and reset here to ensure we reset on early return paths. + bool was_wait = prev_was_wait; + bool was_idle = prev_was_idle; + prev_was_wait = false; + prev_was_idle = false; if (status == scheduler_t::STATUS_WAIT) { - if (!prev_was_wait || cur_segment_instrs == op_print_every.get_value()) - thread_sequence += '-'; + if (!was_wait || cur_segment_instrs == op_print_every.get_value()) + thread_sequence += WAIT_SYMBOL; ++cur_segment_instrs; if (cur_segment_instrs == op_print_every.get_value()) cur_segment_instrs = 0; @@ -178,8 +187,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch std::this_thread::yield(); continue; } else if (status == scheduler_t::STATUS_IDLE) { - if (!prev_was_idle || cur_segment_instrs == op_print_every.get_value()) - thread_sequence += '_'; + if (!was_idle || cur_segment_instrs == op_print_every.get_value()) + thread_sequence += IDLE_SYMBOL; ++cur_segment_instrs; if (cur_segment_instrs == op_print_every.get_value()) cur_segment_instrs = 0; @@ -215,9 +224,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch scheduler_t::input_ordinal_t input = stream->get_input_stream_ordinal(); if (input != prev_input) { // We convert to letters which only works well for <=26 inputs. - if (!thread_sequence.empty()) - thread_sequence += ','; - thread_sequence += 'A' + static_cast(input % 26); + thread_sequence += + THREAD_LETTER_INITIAL_START + static_cast(input % 26); cur_segment_instrs = 0; if (op_verbose.get_value() >= 2) { std::ostringstream line; @@ -248,7 +256,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch if (type_is_instr(record.instr.type)) { ++cur_segment_instrs; if (cur_segment_instrs == op_print_every.get_value()) { - thread_sequence += 'A' + static_cast(input % 26); + thread_sequence += + THREAD_LETTER_SUBSEQUENT_START + static_cast(input % 26); cur_segment_instrs = 0; } } @@ -272,8 +281,10 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch float usage = 0; if (cycles_total > 0) usage = 100.f * cycles_busy / static_cast(cycles_total); - std::cerr << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9) - << usage << "%\n"; + std::ostringstream line; + line << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9) << usage + << "%\n"; + std::cerr << line.str(); } } // namespace diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp index fdc250fb4b8..67bfe69b0e5 100644 --- a/clients/drcachesim/tools/schedule_stats.cpp +++ b/clients/drcachesim/tools/schedule_stats.cpp @@ -160,13 +160,18 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref line << "\n"; std::cerr << line.str(); } + // Cache and reset here to ensure we reset on early return paths. + bool was_wait = shard->prev_was_wait; + bool was_idle = shard->prev_was_idle; + shard->prev_was_wait = false; + shard->prev_was_idle = false; if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) { ++shard->counters.waits; - if (!shard->prev_was_wait) { + shard->prev_was_wait = true; + if (!was_wait) { shard->thread_sequence += WAIT_SYMBOL; shard->cur_segment_instrs = 0; - shard->prev_was_wait = true; } else { ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { @@ -178,10 +183,10 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } else if (memref.marker.type == TRACE_TYPE_MARKER && memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE) { ++shard->counters.idles; - if (!shard->prev_was_idle) { + shard->prev_was_idle = true; + if (!was_idle) { shard->thread_sequence += IDLE_SYMBOL; shard->cur_segment_instrs = 0; - shard->prev_was_idle = true; } else { ++shard->cur_segment_instrs; if (shard->cur_segment_instrs == knob_print_every_) { @@ -251,8 +256,6 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref } } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT) shard->saw_exit = true; - shard->prev_was_wait = false; - shard->prev_was_idle = false; return true; }