Skip to content

Commit

Permalink
i#5505 kernel tracing: Delay syscall_idx marker after PT trace (#6935)
Browse files Browse the repository at this point in the history
Delays outputting the TRACE_MARKER_TYPE_SYSCALL_IDX marker to the
post-syscall callback (instead of the pre-syscall callback where it is
output currently) where we also output the PT trace. Some drmemtrace
derivations may interleave user-space and PT trace data, which means we
want the PT trace to be read before the syscall_idx marker. If a signal
interrupts the syscall, then the syscall_idx marker will be written to
trace before the PT trace: we want to avoid this.

Relaxes invariant checks related to the syscall_num marker before the
syscall trace start marker for PT instr-only traces. In the case
described above, there will be no syscall_num marker immediately before
the syscall trace start. Added a comment noting the complexities of
handling this case, which exist even without the ordering constraint
described above.

Verified on an Intel-PT hardware that the related tests pass:

```
The following tests passed:
	code_api|tool.drcacheoff.kernel.simple_SUDO
	code_api|tool.drcacheoff.kernel.opcode-mix_SUDO
	code_api|tool.drcacheoff.kernel.syscall-mix_SUDO
	code_api|tool.drcacheoff.kernel.invariant-checker_SUDO

The following tests passed:
	code_api|client.drpttracer_SUDO-test
```

Issue: #5505
  • Loading branch information
abhinav92003 authored Aug 24, 2024
1 parent db323a4 commit 8541566
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 40 deletions.
37 changes: 35 additions & 2 deletions clients/drcachesim/tests/invariant_checker_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3074,6 +3074,9 @@ check_kernel_syscall_trace(void)
static constexpr addr_t BASE_ADDR = 0x123450;
static constexpr uintptr_t FILE_TYPE = OFFLINE_FILE_TYPE_ENCODINGS |
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY;
static constexpr uintptr_t FILE_TYPE_FULL_SYSCALL_TRACE =
OFFLINE_FILE_TYPE_ENCODINGS | OFFLINE_FILE_TYPE_SYSCALL_NUMBERS |
OFFLINE_FILE_TYPE_KERNEL_SYSCALLS;
bool res = true;
{
std::vector<memref_with_IR_t> memref_setup = {
Expand Down Expand Up @@ -3161,7 +3164,12 @@ check_kernel_syscall_trace(void)
}
{
std::vector<memref_with_IR_t> memref_setup = {
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr },
// OFFLINE_FILE_TYPE_KERNEL_SYSCALLS enables some extra invariant checks over
// OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, which is why we use
// FILE_TYPE_FULL_SYSCALL_TRACE here. This is fine because this trace does
// not have any load or store instructions.
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE_FULL_SYSCALL_TRACE),
nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr },
{ gen_instr(TID_A), sys },
Expand Down Expand Up @@ -3223,7 +3231,12 @@ check_kernel_syscall_trace(void)
}
{
std::vector<memref_with_IR_t> memref_setup = {
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr },
// OFFLINE_FILE_TYPE_KERNEL_SYSCALLS enables some extra invariant checks over
// OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, which is why we use
// FILE_TYPE_FULL_SYSCALL_TRACE here. This is fine because this trace does
// not have any load or store instructions.
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE_FULL_SYSCALL_TRACE),
nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr },
{ gen_instr(TID_A), sys },
Expand All @@ -3243,6 +3256,26 @@ check_kernel_syscall_trace(void)
"Failed to catch missing prior sysnum marker"))
res = false;
}
{
std::vector<memref_with_IR_t> memref_setup = {
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64), nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_PAGE_SIZE, 4096), nullptr },
{ gen_instr(TID_A), sys },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL, 42), nullptr },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_CPU_ID, 11), nullptr },
// Missing prior sysnum marker does not raise an error for
// OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY, unlike
// OFFLINE_FILE_TYPE_KERNEL_SYSCALLS.
{ gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_START, 42), nullptr },
{ gen_instr(TID_A), move },
{ gen_marker(TID_A, TRACE_MARKER_TYPE_SYSCALL_TRACE_END, 42), nullptr },
{ gen_exit(TID_A), nullptr }
};
auto memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR);
if (!run_checker(memrefs, false))
res = false;
}
{
std::vector<memref_with_IR_t> memref_setup = {
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, FILE_TYPE), nullptr },
Expand Down
12 changes: 11 additions & 1 deletion clients/drcachesim/tools/invariant_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,17 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem
"Found kernel syscall trace without corresponding file type");
report_if_false(shard, !shard->between_kernel_syscall_trace_markers_,
"Nested kernel syscall traces are not expected");
if (!TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES,

// PT kernel syscall traces are inserted at the TRACE_MARKER_TYPE_SYSCALL_IDX
// marker. The marker is deliberately added to the trace in the post-syscall
// callback to ensure it is emitted together with the actual PT trace and not
// before. If a signal interrupts the syscall, the
// TRACE_MARKER_TYPE_SYSCALL_IDX marker may separate away from the
// TRACE_MARKER_TYPE_SYSCALL marker.
// TODO i#5505: This case needs more thought. Do we pause PT tracing on entry
// to the signal handler? Do we discard the PT trace collected until then?
if (!TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES |
OFFLINE_FILE_TYPE_KERNEL_SYSCALL_INSTR_ONLY,
shard->file_type_)) {
report_if_false(shard, prev_was_syscall_marker_saved,
"System call trace found without prior syscall marker");
Expand Down
111 changes: 74 additions & 37 deletions clients/drcachesim/tracer/tracer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1496,6 +1496,40 @@ exit_record_syscall()
hashtable_delete(&syscall2args);
}

#ifdef BUILD_PT_TRACER
static bool
stop_cur_syscall_pt_trace(void *drcontext, per_thread_t *data)
{
int cur_recording_sysnum = data->syscall_pt_trace.get_cur_recording_sysnum();
ASSERT(cur_recording_sysnum != INVALID_SYSNUM,
"Routine expected to be called only when PT tracing is active.");
/* Write a marker to the userspace raw trace that denotes where raw2trace
* should decode and insert the PT trace for the system call being
* recorded currently. Some drmemtrace derivations may interleave the PT
* trace raw data with the drmemtrace user-space raw trace data (instead of
* outputting the PT trace data to separate files like we do here). In such
* cases, we want to ensure that the TRACE_MARKER_TYPE_SYSCALL_IDX does not
* get output before the actual PT trace data, so we output the marker when
* we stop and write the PT trace (instead of when we start the PT trace).
* Note that the order below does not matter because the actual buffer
* flush happens later.
*/
trace_marker_type_t marker_type = TRACE_MARKER_TYPE_SYSCALL_IDX;
uintptr_t marker_val = data->syscall_pt_trace.get_traced_syscall_idx();
BUF_PTR(data->seg_base) +=
instru->append_marker(BUF_PTR(data->seg_base), marker_type, marker_val);
if (!data->syscall_pt_trace.stop_syscall_pt_trace()) {
NOTIFY(0,
"ERROR: Failed to stop PT tracing for syscall %d of thread "
"T%d.\n",
cur_recording_sysnum, dr_get_thread_id(drcontext));
ASSERT(false, "Failed to stop syscall PT trace");
return false;
}
return true;
}
#endif

static bool
event_pre_syscall(void *drcontext, int sysnum)
{
Expand Down Expand Up @@ -1567,24 +1601,21 @@ event_pre_syscall(void *drcontext, int sysnum)
#ifdef BUILD_PT_TRACER
if (op_offline.get_value() && op_enable_kernel_tracing.get_value()) {
if (data->syscall_pt_trace.get_cur_recording_sysnum() != INVALID_SYSNUM) {
ASSERT(false, "last tracing isn't stopped");
if (!data->syscall_pt_trace.stop_syscall_pt_trace()) {
ASSERT(false, "failed to stop syscall pt trace");
NOTIFY(0,
"ERROR: Tracing for last syscall %d wasn't stopped when we reached "
"the next one in T%d.\n",
data->syscall_pt_trace.get_cur_recording_sysnum(),
dr_get_thread_id(drcontext));
ASSERT(false,
"Last syscall tracing wasn't stopped when we reached the next one");
// In the release build, in case we somehow did not stop the PT tracing, we
// try to stop it and continue.
if (!stop_cur_syscall_pt_trace(drcontext, data))
return false;
}
}

if (!syscall_pt_trace_t::is_syscall_pt_trace_enabled(sysnum)) {
return true;
}

/* Write a marker to userspace raw trace. */
trace_marker_type_t marker_type = TRACE_MARKER_TYPE_SYSCALL_IDX;
uintptr_t marker_val = data->syscall_pt_trace.get_traced_syscall_idx();
BUF_PTR(data->seg_base) +=
instru->append_marker(BUF_PTR(data->seg_base), marker_type, marker_val);

if (!data->syscall_pt_trace.start_syscall_pt_trace(sysnum)) {
if (syscall_pt_trace_t::is_syscall_pt_trace_enabled(sysnum) &&
!data->syscall_pt_trace.start_syscall_pt_trace(sysnum)) {
ASSERT(false, "failed to start syscall pt trace");
return false;
}
Expand Down Expand Up @@ -1638,24 +1669,32 @@ event_post_syscall(void *drcontext, int sysnum)
}

#ifdef BUILD_PT_TRACER
if (!op_offline.get_value() || !op_enable_kernel_tracing.get_value())
return;
if (!is_in_tracing_mode(tracing_mode.load(std::memory_order_acquire)))
return;
if (!syscall_pt_trace_t::is_syscall_pt_trace_enabled(sysnum))
return;

if (data->syscall_pt_trace.get_cur_recording_sysnum() == INVALID_SYSNUM) {
ASSERT(false, "last syscall is not traced");
return;
if (op_offline.get_value() && op_enable_kernel_tracing.get_value()) {
// We intentionally do not check is_in_tracing_mode here because
// we may be in a non-tracing mode now but may have enabled PT
// tracing in the pre-syscall event when we were in tracing mode.
// We still want to stop the PT tracing the syscall in this case.
// We also intentionally do not check is_syscall_pt_trace_enabled
// for sysnum so that we may catch a case where we were not able
// to see the post-syscall event for the syscall for which we
// started PT tracing (maybe there was a signal that interrupted
// that syscall); we still have a debug-build assert below for
// this case.
int cur_recording_sysnum = data->syscall_pt_trace.get_cur_recording_sysnum();
if (cur_recording_sysnum != INVALID_SYSNUM) {
ASSERT(cur_recording_sysnum == sysnum,
"Last tracing isn't for the expected sysnum");
ASSERT(syscall_pt_trace_t::is_syscall_pt_trace_enabled(cur_recording_sysnum),
"Did not expect syscall tracing to be enabled for this syscall");
// Ignore return value and try to continue in release build.
stop_cur_syscall_pt_trace(drcontext, data);
} else {
// No syscall trace is being recorded. This may be because syscall tracing
// is not enabled for sysnum, or that we were not in tracing mode at the
// pre-syscall event for sysnum.
}
}

ASSERT(data->syscall_pt_trace.get_cur_recording_sysnum() == sysnum,
"last tracing isn't for the expected sysnum");
if (!data->syscall_pt_trace.stop_syscall_pt_trace()) {
ASSERT(false, "failed to stop syscall pt trace");
return;
}
#endif
}

Expand Down Expand Up @@ -1820,13 +1859,11 @@ event_thread_exit(void *drcontext)
int cur_recording_sysnum = data->syscall_pt_trace.get_cur_recording_sysnum();
if (cur_recording_sysnum != INVALID_SYSNUM) {
NOTIFY(0,
"ERROR: The last recorded syscall %d of thread T%d wasn't be "
"stopped.\n",
"ERROR: PT tracing for the last syscall %d of thread T%d was "
"found active at detach.\n",
cur_recording_sysnum, dr_get_thread_id(drcontext));
ASSERT(cur_recording_sysnum, "syscall recording is not stopped");
if (!data->syscall_pt_trace.stop_syscall_pt_trace()) {
ASSERT(false, "failed to stop syscall pt trace");
}
// Ignore return value and try to continue in release build.
stop_cur_syscall_pt_trace(drcontext, data);
}
}
#endif
Expand Down

0 comments on commit 8541566

Please sign in to comment.