Skip to content

Commit

Permalink
i#5505 kernel tracing: Add syscall instr encodings
Browse files Browse the repository at this point in the history
Adds encodings for kernel system call instructions to the trace in raw2trace.
Kernel system call traces are decoded using libipt which also provides the
instruction encodings. We write these encodings to a new buffer which is re-used
for all dynamic instances of that instr.

Adds a new drcachesim option to ignore failures in decoding instructions. This
is currently required because the kernel traces have opcodes that are not yet
recognized. Such decode failures are not critical because their impact is
limited (not counting unsupported instrs in the opcode_mix tool, or not showing
unsupported instrs in the view tool) unlike user-space instrs where missing
decoder support may lead to drreg issues etc. Uses the new option in opcode_mix
and the view tool.

Adds support in the syscall_mix tool to report the counts of each system call's
trace also.

Issue: #5505
  • Loading branch information
abhinav92003 committed Nov 28, 2023
1 parent 2a632a9 commit 626ffab
Show file tree
Hide file tree
Showing 16 changed files with 188 additions and 41 deletions.
6 changes: 4 additions & 2 deletions clients/drcachesim/analyzer_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,15 +410,17 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator
return nullptr;
}
return opcode_mix_tool_create(module_file_path, op_verbose.get_value(),
op_alt_module_dir.get_value());
op_alt_module_dir.get_value(),
op_ignore_decode_failure.get_value());
} else if (simulator_type == SYSCALL_MIX) {
return syscall_mix_tool_create(op_verbose.get_value());
} else if (simulator_type == VIEW) {
std::string module_file_path = get_module_file_path();
// The module file is optional so we don't check for emptiness.
return view_tool_create(module_file_path, op_skip_refs.get_value(),
op_sim_refs.get_value(), op_view_syntax.get_value(),
op_verbose.get_value(), op_alt_module_dir.get_value());
op_verbose.get_value(), op_alt_module_dir.get_value(),
op_ignore_decode_failure.get_value());
} else if (simulator_type == FUNC_VIEW) {
std::string funclist_file_path = get_aux_file_path(
op_funclist_file.get_value(), DRMEMTRACE_FUNCTION_LIST_FILENAME);
Expand Down
8 changes: 8 additions & 0 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,14 @@ droption_t<std::string> op_alt_module_dir(
"analysis tools, or in the raw modules file for post-prcoessing of offline "
"raw trace files. This directory takes precedence over the recorded path.");

droption_t<bool> op_ignore_decode_failure(
DROPTION_SCOPE_FRONTEND, "ignore_decode_failure", false,
"Whether instruction decode failures should be ignored",
"Specifies whether failures to decode the instruction encodings should be ignored by "
"the analysis tools. This is useful especially when decoding kernel traces collected "
"by Intel-PT where decode failures may not indicate a critical issue and can be "
"moved past.");

droption_t<bytesize_t> op_chunk_instr_count(
DROPTION_SCOPE_FRONTEND, "chunk_instr_count", bytesize_t(10 * 1000 * 1000U),
// We do not support tiny chunks. We do not support disabling chunks with a 0
Expand Down
1 change: 1 addition & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ extern dynamorio::droption::droption_t<std::string> op_infile;
extern dynamorio::droption::droption_t<std::string> op_indir;
extern dynamorio::droption::droption_t<std::string> op_module_file;
extern dynamorio::droption::droption_t<std::string> op_alt_module_dir;
extern dynamorio::droption::droption_t<bool> op_ignore_decode_failure;
extern dynamorio::droption::droption_t<dynamorio::droption::bytesize_t>
op_chunk_instr_count;
extern dynamorio::droption::droption_t<bool> op_instr_encodings;
Expand Down
12 changes: 11 additions & 1 deletion clients/drcachesim/drpt2trace/ir2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
return IR2TRACE_CONV_ERROR_INVALID_PARAMETER;
}
instr_t *instr = instrlist_first(drir.get_ilist());
bool prev_was_repstr = false;
while (instr != NULL) {
trace_entry_t entry = {};

Expand All @@ -87,6 +88,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
*/
entry.type = TRACE_TYPE_INSTR;
if (instr_opcode_valid(instr)) {
bool cur_is_repstr = false;
if (instr_is_call_direct(instr)) {
entry.type = TRACE_TYPE_INSTR_DIRECT_CALL;
} else if (instr_is_call_indirect(instr)) {
Expand All @@ -103,7 +105,15 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
} else if (instr_get_opcode(instr) == OP_sysenter) {
entry.type = TRACE_TYPE_INSTR_SYSENTER;
} else if (instr_is_rep_string_op(instr)) {
entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
cur_is_repstr = true;
if (prev_was_repstr) {
entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
} else {
prev_was_repstr = true;
}
}
if (!cur_is_repstr) {
prev_was_repstr = false;
}
} else {
VPRINT(1, "Trying to convert an invalid instruction.\n");
Expand Down
7 changes: 7 additions & 0 deletions clients/drcachesim/tests/offline-kernel-opcode_mix.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Hello, world!
Opcode mix tool results:
.*: total executed instructions
.*:.*
.*
.*: .*syscall
.*
27 changes: 18 additions & 9 deletions clients/drcachesim/tools/opcode_mix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,22 @@ namespace dynamorio {
namespace drmemtrace {

const std::string opcode_mix_t::TOOL_NAME = "Opcode mix tool";
constexpr int kInvalidOpcode = -1;

analysis_tool_t *
opcode_mix_tool_create(const std::string &module_file_path, unsigned int verbose,
const std::string &alt_module_dir)
const std::string &alt_module_dir, bool ignore_decode_failure)
{
return new opcode_mix_t(module_file_path, verbose, alt_module_dir);
return new opcode_mix_t(module_file_path, verbose, alt_module_dir,
ignore_decode_failure);
}

opcode_mix_t::opcode_mix_t(const std::string &module_file_path, unsigned int verbose,
const std::string &alt_module_dir)
const std::string &alt_module_dir, bool ignore_decode_failure)
: module_file_path_(module_file_path)
, knob_verbose_(verbose)
, knob_alt_module_dir_(alt_module_dir)
, knob_ignore_decode_failure_(ignore_decode_failure)
{
}

Expand Down Expand Up @@ -219,12 +222,16 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
app_pc next_pc =
decode_from_copy(dcontext_.dcontext, decode_pc, trace_pc, &instr);
if (next_pc == NULL || !instr_valid(&instr)) {
instr_free(dcontext_.dcontext, &instr);
shard->error =
"Failed to decode instruction " + to_hex_string(memref.instr.addr);
return false;
if (!knob_ignore_decode_failure_) {
instr_free(dcontext_.dcontext, &instr);
shard->error =
"Failed to decode instruction " + to_hex_string(memref.instr.addr);
return false;
}
opcode = kInvalidOpcode;
} else {
opcode = instr_get_opcode(&instr);
}
opcode = instr_get_opcode(&instr);
shard->worker->opcode_cache[trace_pc] = opcode;
instr_free(dcontext_.dcontext, &instr);
}
Expand Down Expand Up @@ -276,7 +283,9 @@ opcode_mix_t::print_results()
std::sort(sorted.begin(), sorted.end(), cmp_val);
for (const auto &keyvals : sorted) {
std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9)
<< decode_opcode_name(keyvals.first) << "\n";
<< (keyvals.first == kInvalidOpcode ? "<INVALID>"
: decode_opcode_name(keyvals.first))
<< "\n";
}
return true;
}
Expand Down
4 changes: 3 additions & 1 deletion clients/drcachesim/tools/opcode_mix.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ class opcode_mix_t : public analysis_tool_t {
// XXX: Once we update our toolchains to guarantee C++17 support we could use
// std::optional here.
opcode_mix_t(const std::string &module_file_path, unsigned int verbose,
const std::string &alt_module_dir = "");
const std::string &alt_module_dir = "",
bool ignore_decode_failure = false);
virtual ~opcode_mix_t();
std::string
initialize() override;
Expand Down Expand Up @@ -141,6 +142,7 @@ class opcode_mix_t : public analysis_tool_t {
std::mutex shard_map_mutex_;
unsigned int knob_verbose_;
std::string knob_alt_module_dir_;
bool knob_ignore_decode_failure_;
static const std::string TOOL_NAME;
// For serial operation.
worker_data_t serial_worker_;
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/tools/opcode_mix_create.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ namespace drmemtrace {
*/
analysis_tool_t *
opcode_mix_tool_create(const std::string &module_file_path, unsigned int verbose = 0,
const std::string &alt_module_dir = "");
const std::string &alt_module_dir = "",
bool ignore_decode_failure = false);

} // namespace drmemtrace
} // namespace dynamorio
Expand Down
41 changes: 33 additions & 8 deletions clients/drcachesim/tools/syscall_mix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,21 @@ bool
syscall_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
{
shard_data_t *shard = reinterpret_cast<shard_data_t *>(shard_data);
if (memref.marker.type != TRACE_TYPE_MARKER ||
memref.marker.marker_type != TRACE_MARKER_TYPE_SYSCALL)
return true;
int syscall_num = static_cast<int>(memref.marker.marker_value);
if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) {
int syscall_num = static_cast<int>(memref.marker.marker_value);
#ifdef X64
assert(static_cast<uintptr_t>(syscall_num) == memref.marker.marker_value);
#endif
++shard->syscall_counts[syscall_num];
} else if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START) {
int syscall_num = static_cast<int>(memref.marker.marker_value);
#ifdef X64
assert(static_cast<uintptr_t>(syscall_num) == memref.marker.marker_value);
assert(static_cast<uintptr_t>(syscall_num) == memref.marker.marker_value);
#endif
++shard->syscall_counts[syscall_num];
++shard->syscall_trace_counts[syscall_num];
}
return true;
}

Expand All @@ -142,7 +149,9 @@ syscall_mix_t::process_memref(const memref_t &memref)
static bool
cmp_second_val(const std::pair<int, int64_t> &l, const std::pair<int, int64_t> &r)
{
return l.second > r.second;
if (l.second > r.second)
return true;
return l.first > r.first;
}

bool
Expand All @@ -156,10 +165,13 @@ syscall_mix_t::print_results()
for (const auto &keyvals : shard.second->syscall_counts) {
total.syscall_counts[keyvals.first] += keyvals.second;
}
for (const auto &keyvals : shard.second->syscall_trace_counts) {
total.syscall_trace_counts[keyvals.first] += keyvals.second;
}
}
}
std::cerr << TOOL_NAME << " results:\n";
std::cerr << std::setw(15) << "count"
std::cerr << std::setw(15) << "syscall count"
<< " : " << std::setw(9) << "syscall_num\n";
std::vector<std::pair<int, int64_t>> sorted(total.syscall_counts.begin(),
total.syscall_counts.end());
Expand All @@ -170,6 +182,19 @@ syscall_mix_t::print_results()
std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9)
<< keyvals.first << "\n";
}
if (!total.syscall_trace_counts.empty()) {
std::cerr << std::setw(15) << "syscall trace count"
<< " : " << std::setw(9) << "syscall_num\n";
std::vector<std::pair<int, int64_t>> sorted_trace(
total.syscall_trace_counts.begin(), total.syscall_trace_counts.end());
std::sort(sorted_trace.begin(), sorted_trace.end(), cmp_second_val);
for (const auto &keyvals : sorted_trace) {
// XXX: It would be nicer to print the system call name string instead
// of its number.
std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9)
<< keyvals.first << "\n";
}
}
return true;
}

Expand Down
1 change: 1 addition & 0 deletions clients/drcachesim/tools/syscall_mix.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class syscall_mix_t : public analysis_tool_t {
protected:
struct shard_data_t {
std::unordered_map<int, int64_t> syscall_counts;
std::unordered_map<int, int64_t> syscall_trace_counts;
std::string error;
};

Expand Down
20 changes: 14 additions & 6 deletions clients/drcachesim/tools/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ const std::string view_t::TOOL_NAME = "View tool";
analysis_tool_t *
view_tool_create(const std::string &module_file_path, uint64_t skip_refs,
uint64_t sim_refs, const std::string &syntax, unsigned int verbose,
const std::string &alt_module_dir)
const std::string &alt_module_dir, bool ignore_decode_failure)
{
return new view_t(module_file_path, skip_refs, sim_refs, syntax, verbose,
alt_module_dir);
alt_module_dir, ignore_decode_failure);
}

view_t::view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs,
const std::string &syntax, unsigned int verbose,
const std::string &alt_module_dir)
const std::string &alt_module_dir, bool ignore_decode_failure)
: module_file_path_(module_file_path)
, knob_verbose_(verbose)
, trace_version_(-1)
Expand All @@ -88,6 +88,7 @@ view_t::view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t
, filetype_(-1)
, timestamp_(0)
, has_modules_(true)
, knob_ignore_decode_failure_(ignore_decode_failure)
{
}

Expand Down Expand Up @@ -553,10 +554,17 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
/*show_bytes=*/true, buf, BUFFER_SIZE_ELEMENTS(buf),
/*printed=*/nullptr);
if (next_pc == nullptr) {
error_string_ = "Failed to disassemble " + to_hex_string(memref.instr.addr);
return false;
if (!knob_ignore_decode_failure_) {
error_string_ =
"Failed to disassemble " + to_hex_string(memref.instr.addr);
return false;
}
// We still print the limited bytes output and a string saying
// "<INVALID>".
disasm = buf;
} else {
disasm = buf;
}
disasm = buf;
disasm_cache_.insert({ orig_pc, disasm });
}
// Add branch decoration, which varies and so can't be cached purely by PC.
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/tools/view.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class view_t : public analysis_tool_t {
// std::optional here.
view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs,
const std::string &syntax, unsigned int verbose,
const std::string &alt_module_dir = "");
const std::string &alt_module_dir = "", bool ignore_decode_failure = false);
std::string
initialize_stream(memtrace_stream_t *serial_stream) override;
bool
Expand Down Expand Up @@ -161,6 +161,7 @@ class view_t : public analysis_tool_t {
int64_t filetype_record_ord_ = -1;
bool has_modules_;
memtrace_stream_t *serial_stream_ = nullptr;
bool knob_ignore_decode_failure_;

private:
static constexpr int RECORD_COLUMN_WIDTH = 12;
Expand Down
3 changes: 2 additions & 1 deletion clients/drcachesim/tools/view_create.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ namespace drmemtrace {
analysis_tool_t *
view_tool_create(const std::string &module_file_path, uint64_t skip_refs,
uint64_t sim_refs, const std::string &syntax, unsigned int verbose = 0,
const std::string &alt_module_dir = "");
const std::string &alt_module_dir = "",
bool ignore_decode_failure = false);

} // namespace drmemtrace
} // namespace dynamorio
Expand Down
Loading

0 comments on commit 626ffab

Please sign in to comment.