Skip to content

Commit

Permalink
i#5843 scheduler: Add direct thread switch marker (#6404)
Browse files Browse the repository at this point in the history
Adds a new marker type TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH for use
with custom kernel scheduling features where one thread directly
switches to another on the same cpu.

Refactors raw2trace marker processing code to allow a subclass to insert
the new marker.

Makes the raw2trace blocking syscall code virtual to allow a subclass to
label custom syscalls as blocking.

Given that the changes are used in separate code it is not simple to
make a test of the raw2trace refactoring + virtual. For the marker:
tests that use the marker will be forthcoming in scheduler_unit_tests.

Issue: #5843
  • Loading branch information
derekbruening authored Nov 2, 2023
1 parent 8bdf01f commit 11c57d1
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 62 deletions.
8 changes: 8 additions & 0 deletions clients/drcachesim/common/trace_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,14 @@ typedef enum {
*/
TRACE_MARKER_TYPE_SYSCALL_FAILED,

/**
* This marker is emitted prior to a system call that causes an immediate switch to
* another thread on the same core (with the current thread entering an unscheduled
* state), bypassing the kernel scheduler's normal dynamic switch code based on run
* queues. The marker value holds the thread id of the target thread.
*/
TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH,

// ...
// These values are reserved for future built-in marker types.
// ...
Expand Down
4 changes: 4 additions & 0 deletions clients/drcachesim/tools/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
case TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL:
std::cerr << "<marker: maybe-blocking system call>\n";
break;
case TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH:
std::cerr << "<marker: direct switch to thread " << memref.marker.marker_value
<< ">\n";
break;
case TRACE_MARKER_TYPE_WINDOW_ID:
// Handled above.
break;
Expand Down
129 changes: 70 additions & 59 deletions clients/drcachesim/tracer/raw2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,62 +585,12 @@ raw2trace_t::process_offline_entry(raw2trace_thread_data_t *tdata,
uintptr_t marker_val = 0;
if (!get_marker_value(tdata, &in_entry, &marker_val))
return false;
buf += trace_metadata_writer_t::write_marker(
buf, (trace_marker_type_t)in_entry->extended.valueB, marker_val);
if (in_entry->extended.valueB == TRACE_MARKER_TYPE_KERNEL_EVENT) {
log(4, "Signal/exception between bbs\n");
// An rseq side exit may next hit a signal which is then the
// boundary of the rseq region.
if (tdata->rseq_past_end_) {
if (!adjust_and_emit_rseq_buffer(tdata, marker_val))
return false;
}
} else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_RSEQ_ABORT) {
log(4, "Rseq abort %d\n", tdata->rseq_past_end_);
if (!adjust_and_emit_rseq_buffer(tdata, marker_val, marker_val))
return false;
} else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_RSEQ_ENTRY) {
if (tdata->rseq_want_rollback_) {
if (tdata->rseq_buffering_enabled_) {
// Our rollback schemes do the minimal rollback: for a side
// exit, taking the last branch. This means we don't need the
// prior iterations in the buffer.
log(4, "Rseq was already buffered: assuming loop; emitting\n");
if (!adjust_and_emit_rseq_buffer(tdata, marker_val))
return false;
}
log(4,
"--- Reached rseq entry (end=0x%zx): buffering all output ---\n",
marker_val);
if (!tdata->rseq_ever_saw_entry_)
tdata->rseq_ever_saw_entry_ = true;
tdata->rseq_buffering_enabled_ = true;
tdata->rseq_end_pc_ = marker_val;
}
} else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_FILTER_ENDPOINT) {
log(2, "Reached filter endpoint\n");

// The file type needs to be updated during the switch to correctly
// process the entries that follow after. This does not affect the
// written-out type.
int file_type = get_file_type(tdata);
// We do not remove OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP here
// because that still stands true for this trace.
file_type &= ~(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_IFILTERED |
OFFLINE_FILE_TYPE_DFILTERED);
set_file_type(tdata, (offline_file_type_t)file_type);

// For the full trace, the cache contains block-level info unlike the
// filtered trace which contains instr-level info. Since we cannot use
// the decode cache entries after the transition, we need to flush the
// cache here.
*flush_decode_cache = true;
} else if (in_entry->extended.valueB == TRACE_MARKER_TYPE_SYSCALL &&
is_maybe_blocking_syscall(marker_val)) {
log(2, "Maybe-blocking syscall %zu\n", marker_val);
buf += trace_metadata_writer_t::write_marker(
buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0);
}
trace_marker_type_t marker_type =
static_cast<trace_marker_type_t>(in_entry->extended.valueB);
buf += trace_metadata_writer_t::write_marker(buf, marker_type, marker_val);
if (!process_marker_additionally(tdata, marker_type, marker_val, buf,
flush_decode_cache))
return false;
// If there is currently a delayed branch that has not been emitted yet,
// delay most markers since intra-block markers can cause issues with
// tools that do not expect markers amid records for a single instruction
Expand All @@ -649,14 +599,13 @@ raw2trace_t::process_offline_entry(raw2trace_thread_data_t *tdata,
// OFFLINE_TYPE_TIMESTAMP which is handled at a higher level in
// process_next_thread_buffer() so there is no need to have a separate
// check for it here.
if (in_entry->extended.valueB != TRACE_MARKER_TYPE_CPU_ID) {
if (marker_type != TRACE_MARKER_TYPE_CPU_ID) {
if (delayed_branches_exist(tdata)) {
return write_delayed_branches(tdata, buf_base,
reinterpret_cast<trace_entry_t *>(buf));
}
}
log(3, "Appended marker type %u value " PIFX "\n",
(trace_marker_type_t)in_entry->extended.valueB,
log(3, "Appended marker type %u value " PIFX "\n", marker_type,
(uintptr_t)in_entry->extended.valueA);
} else {
std::stringstream ss;
Expand Down Expand Up @@ -718,6 +667,68 @@ raw2trace_t::process_offline_entry(raw2trace_thread_data_t *tdata,
return true;
}

bool
raw2trace_t::process_marker_additionally(raw2trace_thread_data_t *tdata,
trace_marker_type_t marker_type,
uintptr_t marker_val, byte *&buf,
OUT bool *flush_decode_cache)
{
if (marker_type == TRACE_MARKER_TYPE_KERNEL_EVENT) {
log(4, "Signal/exception between bbs\n");
// An rseq side exit may next hit a signal which is then the
// boundary of the rseq region.
if (tdata->rseq_past_end_) {
if (!adjust_and_emit_rseq_buffer(tdata, marker_val))
return false;
}
} else if (marker_type == TRACE_MARKER_TYPE_RSEQ_ABORT) {
log(4, "Rseq abort %d\n", tdata->rseq_past_end_);
if (!adjust_and_emit_rseq_buffer(tdata, marker_val, marker_val))
return false;
} else if (marker_type == TRACE_MARKER_TYPE_RSEQ_ENTRY) {
if (tdata->rseq_want_rollback_) {
if (tdata->rseq_buffering_enabled_) {
// Our rollback schemes do the minimal rollback: for a side
// exit, taking the last branch. This means we don't need the
// prior iterations in the buffer.
log(4, "Rseq was already buffered: assuming loop; emitting\n");
if (!adjust_and_emit_rseq_buffer(tdata, marker_val))
return false;
}
log(4, "--- Reached rseq entry (end=0x%zx): buffering all output ---\n",
marker_val);
if (!tdata->rseq_ever_saw_entry_)
tdata->rseq_ever_saw_entry_ = true;
tdata->rseq_buffering_enabled_ = true;
tdata->rseq_end_pc_ = marker_val;
}
} else if (marker_type == TRACE_MARKER_TYPE_FILTER_ENDPOINT) {
log(2, "Reached filter endpoint\n");

// The file type needs to be updated during the switch to correctly
// process the entries that follow after. This does not affect the
// written-out type.
int file_type = get_file_type(tdata);
// We do not remove OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP here
// because that still stands true for this trace.
file_type &= ~(OFFLINE_FILE_TYPE_FILTERED | OFFLINE_FILE_TYPE_IFILTERED |
OFFLINE_FILE_TYPE_DFILTERED);
set_file_type(tdata, (offline_file_type_t)file_type);

// For the full trace, the cache contains block-level info unlike the
// filtered trace which contains instr-level info. Since we cannot use
// the decode cache entries after the transition, we need to flush the
// cache here.
*flush_decode_cache = true;
} else if (marker_type == TRACE_MARKER_TYPE_SYSCALL &&
is_maybe_blocking_syscall(marker_val)) {
log(2, "Maybe-blocking syscall %zu\n", marker_val);
buf += trace_metadata_writer_t::write_marker(
buf, TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, 0);
}
return true;
}

bool
raw2trace_t::read_header(raw2trace_thread_data_t *tdata, OUT trace_header_t *header)
{
Expand Down
16 changes: 13 additions & 3 deletions clients/drcachesim/tracer/raw2trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,15 @@ class raw2trace_t {
thread_id_t tid, OUT bool *end_of_record,
OUT bool *last_bb_handled, OUT bool *flush_decode_cache);

/**
* Performs any additional actions for the marker "marker_type" with value
* "marker_val", beyond writing out a marker record. New records can be written to
* "buf". Returns whether successful.
*/
virtual bool
process_marker_additionally(raw2trace_thread_data_t *tdata,
trace_marker_type_t marker_type, uintptr_t marker_val,
byte *&buf, OUT bool *flush_decode_cache);
/**
* Read the header of a thread, by calling get_next_entry() successively to
* populate the header values. The timestamp field is populated only
Expand Down Expand Up @@ -1266,6 +1275,10 @@ class raw2trace_t {
modmap_ptr_ = modmap;
}

/** Returns whether this system number *might* block. */
virtual bool
is_maybe_blocking_syscall(uintptr_t number);

const module_mapper_t *modmap_ptr_ = nullptr;

uint64 count_elided_ = 0;
Expand Down Expand Up @@ -1492,9 +1505,6 @@ class raw2trace_t {
bool
should_omit_syscall(raw2trace_thread_data_t *tdata);

bool
is_maybe_blocking_syscall(uintptr_t number);

int worker_count_;
std::vector<std::vector<raw2trace_thread_data_t *>> worker_tasks_;

Expand Down

0 comments on commit 11c57d1

Please sign in to comment.