diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h
index 9cffd946533..166c5f32055 100644
--- a/clients/drcachesim/common/trace_entry.h
+++ b/clients/drcachesim/common/trace_entry.h
@@ -588,9 +588,11 @@ typedef enum {
      * core has no available inputs to run (all inputs are on other cores or are
      * blocked waiting for kernel resources).  A new marker is emitted each
      * time the tool analysis framework requests a new record from the scheduler and
-     * is given a wait status.  There are no units of time here but each repetition
+     * is given an idle status.  There are no units of time here but each repetition
      * is roughly the time where a regular record could have been read and passed
-     * along.
+     * along.  This idle marker indicates that a core actually had no work to do,
+     * as opposed to #TRACE_MARKER_TYPE_CORE_WAIT which is an artifact of an
+     * imposed re-created schedule.
      */
     TRACE_MARKER_TYPE_CORE_IDLE,
 
diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp
index a4109158902..e9494f44a80 100644
--- a/clients/drcachesim/scheduler/scheduler.cpp
+++ b/clients/drcachesim/scheduler/scheduler.cpp
@@ -1683,6 +1683,11 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input_as_previously(
             // XXX i#5843: We may want to provide a kernel-mediated wait
             // feature so a multi-threaded simulator doesn't have to do a
             // spinning poll loop.
+            // XXX i#5843: For replaying a schedule as it was traced with
+            // MAP_TO_RECORDED_OUTPUT there may have been true idle periods during
+            // tracing where some other process than the traced workload was
+            // scheduled on a core.  If we could identify those, we should return
+            // STATUS_IDLE rather than STATUS_WAIT.
             VPRINT(this, 3, "next_record[%d]: waiting for input %d instr #%" PRId64 "\n",
                    output, index, segment.start_instruction);
             // Give up this input and go into a wait state.
diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
index e9f999117bb..bf5d1808921 100644
--- a/clients/drcachesim/scheduler/scheduler.h
+++ b/clients/drcachesim/scheduler/scheduler.h
@@ -110,12 +110,12 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
          * For dynamic scheduling with cross-stream dependencies, the scheduler may pause
          * a stream if it gets ahead of another stream it should have a dependence on.
          * This value is also used for schedules following the recorded timestamps
-         * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another.  For
-         * replaying a schedule as it was traced with #MAP_TO_RECORDED_OUTPUT this can
-         * indicate an idle period on a core where the traced workload was not currently
-         * scheduled, but generally #STATUS_WAIT should be treated as artificial.
+         * (#DEPENDENCY_TIMESTAMPS) to avoid one stream getting ahead of another.
+         * #STATUS_WAIT should be treated as artificial, an artifact of enforcing a
+         * recorded schedule on concurrent differently-timed output streams.
          * Simulators are suggested to not advance simulated time for #STATUS_WAIT while
-         * they should advance time for #STATUS_IDLE.
+         * they should advance time for #STATUS_IDLE as the latter indicates a true
+         * lack of work.
          */
         STATUS_WAIT,
         STATUS_INVALID,         /**< Error condition. */
diff --git a/clients/drcachesim/tests/scheduler_launcher.cpp b/clients/drcachesim/tests/scheduler_launcher.cpp
index 814a0b703fa..b0cac30df6e 100644
--- a/clients/drcachesim/tests/scheduler_launcher.cpp
+++ b/clients/drcachesim/tests/scheduler_launcher.cpp
@@ -152,11 +152,15 @@ void
 simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &scheduler,
               std::string &thread_sequence)
 {
+    // XXX: Could we share some code with the schedule_stats analysis tool?
+    // Some features are now duplicated in both.
+    static constexpr char THREAD_LETTER_INITIAL_START = 'A';
+    static constexpr char THREAD_LETTER_SUBSEQUENT_START = 'a';
+    static constexpr char WAIT_SYMBOL = '-';
+    static constexpr char IDLE_SYMBOL = '_';
     memref_t record;
     uint64_t micros = op_sched_time.get_value() ? get_current_microseconds() : 0;
     uint64_t cur_segment_instrs = 0;
-    // XXX: Should we remove this tool as the schedule_stats analysis tool has
-    // superceded it?  We're basically duplicating new features in both.
     bool prev_was_wait = false, prev_was_idle = false;
     // Measure cpu usage by counting each next_record() as one cycle.
     uint64_t cycles_total = 0, cycles_busy = 0;
@@ -168,9 +172,14 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
         if (op_sched_time.get_value())
             micros = get_current_microseconds();
         ++cycles_total;
+        // Cache and reset here to ensure we reset on early return paths.
+        bool was_wait = prev_was_wait;
+        bool was_idle = prev_was_idle;
+        prev_was_wait = false;
+        prev_was_idle = false;
         if (status == scheduler_t::STATUS_WAIT) {
-            if (!prev_was_wait || cur_segment_instrs == op_print_every.get_value())
-                thread_sequence += '-';
+            if (!was_wait || cur_segment_instrs == op_print_every.get_value())
+                thread_sequence += WAIT_SYMBOL;
             ++cur_segment_instrs;
             if (cur_segment_instrs == op_print_every.get_value())
                 cur_segment_instrs = 0;
@@ -178,8 +187,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
             std::this_thread::yield();
             continue;
         } else if (status == scheduler_t::STATUS_IDLE) {
-            if (!prev_was_idle || cur_segment_instrs == op_print_every.get_value())
-                thread_sequence += '_';
+            if (!was_idle || cur_segment_instrs == op_print_every.get_value())
+                thread_sequence += IDLE_SYMBOL;
             ++cur_segment_instrs;
             if (cur_segment_instrs == op_print_every.get_value())
                 cur_segment_instrs = 0;
@@ -215,9 +224,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
         scheduler_t::input_ordinal_t input = stream->get_input_stream_ordinal();
         if (input != prev_input) {
             // We convert to letters which only works well for <=26 inputs.
-            if (!thread_sequence.empty())
-                thread_sequence += ',';
-            thread_sequence += 'A' + static_cast<char>(input % 26);
+            thread_sequence +=
+                THREAD_LETTER_INITIAL_START + static_cast<char>(input % 26);
             cur_segment_instrs = 0;
             if (op_verbose.get_value() >= 2) {
                 std::ostringstream line;
@@ -248,7 +256,8 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
         if (type_is_instr(record.instr.type)) {
             ++cur_segment_instrs;
             if (cur_segment_instrs == op_print_every.get_value()) {
-                thread_sequence += 'A' + static_cast<char>(input % 26);
+                thread_sequence +=
+                    THREAD_LETTER_SUBSEQUENT_START + static_cast<char>(input % 26);
                 cur_segment_instrs = 0;
             }
         }
@@ -272,8 +281,10 @@ simulate_core(int ordinal, scheduler_t::stream_t *stream, const scheduler_t &sch
     float usage = 0;
     if (cycles_total > 0)
         usage = 100.f * cycles_busy / static_cast<float>(cycles_total);
-    std::cerr << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9)
-              << usage << "%\n";
+    std::ostringstream line;
+    line << "Core #" << std::setw(2) << ordinal << " usage: " << std::setw(9) << usage
+         << "%\n";
+    std::cerr << line.str();
 }
 
 } // namespace
diff --git a/clients/drcachesim/tools/schedule_stats.cpp b/clients/drcachesim/tools/schedule_stats.cpp
index fdc250fb4b8..67bfe69b0e5 100644
--- a/clients/drcachesim/tools/schedule_stats.cpp
+++ b/clients/drcachesim/tools/schedule_stats.cpp
@@ -160,13 +160,18 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref
         line << "\n";
         std::cerr << line.str();
     }
+    // Cache and reset here to ensure we reset on early return paths.
+    bool was_wait = shard->prev_was_wait;
+    bool was_idle = shard->prev_was_idle;
+    shard->prev_was_wait = false;
+    shard->prev_was_idle = false;
     if (memref.marker.type == TRACE_TYPE_MARKER &&
         memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_WAIT) {
         ++shard->counters.waits;
-        if (!shard->prev_was_wait) {
+        shard->prev_was_wait = true;
+        if (!was_wait) {
             shard->thread_sequence += WAIT_SYMBOL;
             shard->cur_segment_instrs = 0;
-            shard->prev_was_wait = true;
         } else {
             ++shard->cur_segment_instrs;
             if (shard->cur_segment_instrs == knob_print_every_) {
@@ -178,10 +183,10 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref
     } else if (memref.marker.type == TRACE_TYPE_MARKER &&
                memref.marker.marker_type == TRACE_MARKER_TYPE_CORE_IDLE) {
         ++shard->counters.idles;
-        if (!shard->prev_was_idle) {
+        shard->prev_was_idle = true;
+        if (!was_idle) {
             shard->thread_sequence += IDLE_SYMBOL;
             shard->cur_segment_instrs = 0;
-            shard->prev_was_idle = true;
         } else {
             ++shard->cur_segment_instrs;
             if (shard->cur_segment_instrs == knob_print_every_) {
@@ -251,8 +256,6 @@ schedule_stats_t::parallel_shard_memref(void *shard_data, const memref_t &memref
         }
     } else if (memref.exit.type == TRACE_TYPE_THREAD_EXIT)
         shard->saw_exit = true;
-    shard->prev_was_wait = false;
-    shard->prev_was_idle = false;
     return true;
 }