Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#5505 kernel tracing: Add syscall instr encodings #6479

Merged
merged 17 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions clients/drcachesim/common/trace_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,14 @@ typedef enum {
TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL,

/**
* Indicates a point in the trace where a syscall's kernel trace starts.
* Indicates a point in the trace where a syscall's kernel trace starts. The value
* of the marker is set to the syscall number.
*/
TRACE_MARKER_TYPE_SYSCALL_TRACE_START,

/**
* Indicates a point in the trace where a syscall's trace end.
* Indicates a point in the trace where a syscall's trace ends. The value of the
* marker is set to the syscall number.
*/
TRACE_MARKER_TYPE_SYSCALL_TRACE_END,

Expand Down
73 changes: 72 additions & 1 deletion clients/drcachesim/drpt2trace/drir.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
#include "dr_api.h"
#include "utils.h"

#include <cstring>
#include <memory>
#include <unordered_map>

namespace dynamorio {
namespace drmemtrace {

Expand All @@ -62,8 +66,10 @@ class drir_t {
}
}

// Appends the given instr to the internal ilist, and records (replaces if
// one already exists) the given encoding for the orig_pc.
void
append(instr_t *instr)
append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding)
{
ASSERT(drcontext_ != nullptr, "drir_t: invalid drcontext_");
ASSERT(ilist_ != nullptr, "drir_t: invalid ilist_");
Expand All @@ -72,23 +78,88 @@ class drir_t {
return;
}
instrlist_append(ilist_, instr);
record_encoding(orig_pc, instr_length, encoding);
}

// Returns the opaque pointer to the dcontext_t used to construct this
// object.
void *
get_drcontext()
{
return drcontext_;
}

// Returns the instrlist_t of instrs accumulated so far.
instrlist_t *
get_ilist()
{
return ilist_;
}

// Clears the instrs accumulated in the ilist. Note that this does
// not clear the encodings accumulated.
void
clear_ilist()
{
instrlist_clear(drcontext_, ilist_);
}

// Returns the address of the encoding recorded for the given orig_pc.
// Encodings are persisted across clear_ilist() calls, so we will
// return the same decode_pc for the same orig_pc unless a new encoding
// is added for the same orig_pc.
app_pc
get_decode_pc(app_pc orig_pc)
{
if (decode_pc_.find(orig_pc) == decode_pc_.end()) {
return nullptr;
}
return decode_pc_[orig_pc].first;
}

private:
void *drcontext_;
instrlist_t *ilist_;
#define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024)
// For each original app pc key, this stores a pair value: the first
// element is the address where the encoding is stored for the instruction
// at that app pc, the second element is the length of the encoding.
std::unordered_map<app_pc, std::pair<app_pc, int>> decode_pc_;
// A vector of buffers of size SYSCALL_PT_ENCODING_BUF_SIZE. Each buffer
// stores some encoded instructions back-to-back. Note that each element
// in the buffer is a single byte, so one instr's encoding occupies possibly
// multiple consecutive elements.
// We allocate new memory to store kernel instruction encodings in
// increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this like a
// cache and clear previously stored encodings because we want to ensure
// decode_pc uniqueness to callers of get_decode_pc.
std::vector<std::unique_ptr<uint8_t[]>> instr_encodings_;
// Next available offset into instr_encodings_.back().
size_t next_encoding_offset_ = 0;
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved

void
record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding)
{
auto it = decode_pc_.find(orig_pc);
// We record the encoding only if we don't already have the same encoding for
// the given orig_pc.
if (it != decode_pc_.end() &&
// We confirm that the instruction encoding has not changed. Just in case
// the kernel is doing JIT.
it->second.second == instr_len &&
memcmp(it->second.first, encoding, it->second.second) == 0) {
return;
}
if (instr_encodings_.empty() ||
next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) {
instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]);
next_encoding_offset_ = 0;
}
app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_];
memcpy(encode_pc, encoding, instr_len);
decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len);
next_encoding_offset_ += instr_len;
}
};

} // namespace drmemtrace
Expand Down
7 changes: 4 additions & 3 deletions clients/drcachesim/drpt2trace/drpt2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@ main(int argc, const char *argv[])

uint8_t *pt_data = pt_raw_buffer.data();
size_t pt_data_size = pt_raw_buffer.size();
pt2ir_convert_status_t status = ptconverter->convert(pt_data, pt_data_size, drir);
pt2ir_convert_status_t status =
ptconverter->convert(pt_data, pt_data_size, &drir);
if (status != PT2IR_CONV_SUCCESS) {
std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR."
<< "[error status: " << status << "]" << std::endl;
Expand Down Expand Up @@ -521,7 +522,7 @@ main(int argc, const char *argv[])

/* Convert the PT Data to DR IR. */
pt2ir_convert_status_t status =
ptconverter->convert(pt_data, pt_data_size, drir);
ptconverter->convert(pt_data, pt_data_size, &drir);
if (status != PT2IR_CONV_SUCCESS) {
std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR."
<< "[error status: " << status << "]" << std::endl;
Expand All @@ -542,7 +543,7 @@ main(int argc, const char *argv[])
/* Convert the DR IR to trace entries. */
std::vector<trace_entry_t> entries;
ir2trace_convert_status_t ir2trace_convert_status =
ir2trace_t::convert(drir, entries);
ir2trace_t::convert(&drir, entries);
if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) {
std::cerr << CLIENT_NAME << ": failed to convert DR IR to trace entries."
<< "[error status: " << ir2trace_convert_status << "]" << std::endl;
Expand Down
23 changes: 16 additions & 7 deletions clients/drcachesim/drpt2trace/ir2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,19 @@ namespace drmemtrace {
#define ERRMSG_HEADER "[drpt2ir] "

ir2trace_convert_status_t
ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
ir2trace_t::convert(DR_PARAM_IN drir_t *drir,
DR_PARAM_INOUT std::vector<trace_entry_t> &trace,
DR_PARAM_IN int verbosity)
{
if (drir.get_ilist() == NULL) {
if (drir == nullptr || drir->get_ilist() == NULL) {
return IR2TRACE_CONV_ERROR_INVALID_PARAMETER;
}
instr_t *instr = instrlist_first(drir.get_ilist());
instr_t *instr = instrlist_first(drir->get_ilist());
bool prev_was_repstr = false;
while (instr != NULL) {
trace_entry_t entry = {};
entry.size = instr_length(GLOBAL_DCONTEXT, instr);
entry.addr = reinterpret_cast<uintptr_t>(instr_get_app_pc(instr));

if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) {
if (instr_get_prev(instr) == nullptr ||
Expand All @@ -87,6 +90,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
*/
entry.type = TRACE_TYPE_INSTR;
if (instr_opcode_valid(instr)) {
bool cur_is_repstr = false;
if (instr_is_call_direct(instr)) {
entry.type = TRACE_TYPE_INSTR_DIRECT_CALL;
} else if (instr_is_call_indirect(instr)) {
Expand All @@ -103,15 +107,20 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
} else if (instr_get_opcode(instr) == OP_sysenter) {
entry.type = TRACE_TYPE_INSTR_SYSENTER;
} else if (instr_is_rep_string_op(instr)) {
entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
cur_is_repstr = true;
if (prev_was_repstr) {
entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
} else {
prev_was_repstr = true;
}
}
if (!cur_is_repstr) {
prev_was_repstr = false;
}
} else {
VPRINT(1, "Trying to convert an invalid instruction.\n");
}

entry.size = instr_length(GLOBAL_DCONTEXT, instr);
entry.addr = (uintptr_t)instr_get_app_pc(instr);

trace.push_back(entry);

instr = instr_get_next(instr);
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/drpt2trace/ir2trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class ir2trace_t {
* error code.
*/
static ir2trace_convert_status_t
convert(DR_PARAM_IN drir_t &drir, DR_PARAM_INOUT std::vector<trace_entry_t> &trace,
convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector<trace_entry_t> &trace,
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
DR_PARAM_IN int verbosity = 0);
};

Expand Down
26 changes: 8 additions & 18 deletions clients/drcachesim/drpt2trace/pt2ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,13 @@ pt2ir_t::init(DR_PARAM_IN pt2ir_config_t &pt2ir_config, DR_PARAM_IN int verbosit

pt2ir_convert_status_t
pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size,
DR_PARAM_INOUT drir_t &drir)
DR_PARAM_INOUT drir_t *drir)
{
if (!pt2ir_initialized_) {
return PT2IR_CONV_ERROR_NOT_INITIALIZED;
}

if (pt_data == nullptr || pt_data_size <= 0) {
if (pt_data == nullptr || pt_data_size <= 0 || drir == nullptr) {
return PT2IR_CONV_ERROR_INVALID_INPUT;
}

Expand Down Expand Up @@ -379,24 +379,14 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_
}

/* Use drdecode to decode insn(pt_insn) to instr_t. */
instr_t *instr = instr_create(drir.get_drcontext());
instr_init(drir.get_drcontext(), instr);
instr_t *instr = instr_create(drir->get_drcontext());
instr_init(drir->get_drcontext(), instr);
instr_set_isa_mode(instr,
insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64);
bool instr_valid = false;
if (decode(drir.get_drcontext(), insn.raw, instr) != nullptr)
instr_valid = true;
instr_set_translation(instr, (app_pc)insn.ip);
instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size);
if (!instr_valid) {
/* The decode() function will not correctly identify the raw bits for
* invalid instruction. So we need to set the raw bits of instr manually.
*/
instr_free_raw_bits(drir.get_drcontext(), instr);
instr_set_raw_bits(instr, insn.raw, insn.size);
instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size);
app_pc instr_ip = reinterpret_cast<app_pc>(insn.ip);
if (decode_from_copy(drir->get_drcontext(), insn.raw, instr_ip, instr) ==
nullptr) {
#ifdef DEBUG

/* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */
if (verbosity_ >= 1) {
fprintf(stderr,
Expand All @@ -409,7 +399,7 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_
}
#endif
}
drir.append(instr);
drir->append(instr, instr_ip, insn.size, insn.raw);
}
}
return PT2IR_CONV_SUCCESS;
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/drpt2trace/pt2ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ class pt2ir_t {
*/
pt2ir_convert_status_t
convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size,
DR_PARAM_INOUT drir_t &drir);
DR_PARAM_INOUT drir_t *drir);

private:
/* Diagnose converting errors and output diagnostic results.
Expand Down
14 changes: 0 additions & 14 deletions clients/drcachesim/drpt2trace/test_simple.expect

This file was deleted.

14 changes: 14 additions & 0 deletions clients/drcachesim/drpt2trace/test_simple.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
TAG 0x0000000000000000
\+0 L3 .* mov \$0x00000001 -> %eax
\+5 L3 .* mov \$0x00000001 -> %edi
\+10 L3 .* \$0x0000000000402000 -> %rsi
.*
\+20 L3 .* mov \$0x0000000e -> %edx
\+25 L3 .* syscall -> %rcx %r11
\+27 L3 .* mov \$0x0000003c -> %eax
\+32 L3 .* mov \$0x00000000 -> %edi
\+37 L3 .* syscall -> %rcx %r11
END 0x0000000000000000
.*
Number of Instructions: 8
Number of Trace Entries: 8
3 changes: 1 addition & 2 deletions clients/drcachesim/reader/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,7 @@ reader_t::process_input_entry()
version_ = cur_ref_.marker.marker_value;
else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
filetype_ = cur_ref_.marker.marker_value;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_) &&
!TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, filetype_)) {
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) {
expect_no_encodings_ = false;
}
} else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE)
Expand Down
6 changes: 6 additions & 0 deletions clients/drcachesim/tests/offline-kernel-opcode-mix.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Hello, world!
Opcode mix tool results:
.*: total executed instructions
.*
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
.*: .*clac
.*
6 changes: 6 additions & 0 deletions clients/drcachesim/tests/offline-kernel-syscall-mix.templatex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Hello, world!
Syscall mix tool results:
syscall count : syscall_num
.*
syscall trace count : syscall_num
.*
2 changes: 1 addition & 1 deletion clients/drcachesim/tests/offline-syscall-mix.templatex
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Hello, world!
Syscall mix tool results:
count : syscall_num
syscall count : syscall_num
( *[1-9][0-9]* : *[0-9]*.*)+
2 changes: 1 addition & 1 deletion clients/drcachesim/tests/syscall-mix.templatex
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Hello, world!
---- <application exited with code 0> ----
Syscall mix tool results:
count : syscall_num
syscall count : syscall_num
( *[1-9][0-9]* : *[0-9]*.*)+
Loading
Loading