diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 166c5f32055..794472ed1a2 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -531,12 +531,14 @@ typedef enum { TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, /** - * Indicates a point in the trace where a syscall's kernel trace starts. + * Indicates a point in the trace where a syscall's kernel trace starts. The value + * of the marker is set to the syscall number. */ TRACE_MARKER_TYPE_SYSCALL_TRACE_START, /** - * Indicates a point in the trace where a syscall's trace end. + * Indicates a point in the trace where a syscall's trace ends. The value of the + * marker is set to the syscall number. */ TRACE_MARKER_TYPE_SYSCALL_TRACE_END, diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h index eb4b133f20b..a8f177a423c 100644 --- a/clients/drcachesim/drpt2trace/drir.h +++ b/clients/drcachesim/drpt2trace/drir.h @@ -41,6 +41,10 @@ #include "dr_api.h" #include "utils.h" +#include +#include +#include + namespace dynamorio { namespace drmemtrace { @@ -62,8 +66,10 @@ class drir_t { } } + // Appends the given instr to the internal ilist, and records (replaces if + // one already exists) the given encoding for the orig_pc. void - append(instr_t *instr) + append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding) { ASSERT(drcontext_ != nullptr, "drir_t: invalid drcontext_"); ASSERT(ilist_ != nullptr, "drir_t: invalid ilist_"); @@ -72,23 +78,88 @@ class drir_t { return; } instrlist_append(ilist_, instr); + record_encoding(orig_pc, instr_length, encoding); } + // Returns the opaque pointer to the dcontext_t used to construct this + // object. void * get_drcontext() { return drcontext_; } + // Returns the instrlist_t of instrs accumulated so far. instrlist_t * get_ilist() { return ilist_; } + // Clears the instrs accumulated in the ilist. Note that this does + // not clear the encodings accumulated. + void + clear_ilist() + { + instrlist_clear(drcontext_, ilist_); + } + + // Returns the address of the encoding recorded for the given orig_pc. + // Encodings are persisted across clear_ilist() calls, so we will + // return the same decode_pc for the same orig_pc unless a new encoding + // is added for the same orig_pc. + app_pc + get_decode_pc(app_pc orig_pc) + { + if (decode_pc_.find(orig_pc) == decode_pc_.end()) { + return nullptr; + } + return decode_pc_[orig_pc].first; + } + private: void *drcontext_; instrlist_t *ilist_; +#define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024) + // For each original app pc key, this stores a pair value: the first + // element is the address where the encoding is stored for the instruction + // at that app pc, the second element is the length of the encoding. + std::unordered_map> decode_pc_; + // A vector of buffers of size SYSCALL_PT_ENCODING_BUF_SIZE. Each buffer + // stores some encoded instructions back-to-back. Note that each element + // in the buffer is a single byte, so one instr's encoding occupies possibly + // multiple consecutive elements. + // We allocate new memory to store kernel instruction encodings in + // increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this like a + // cache and clear previously stored encodings because we want to ensure + // decode_pc uniqueness to callers of get_decode_pc. + std::vector> instr_encodings_; + // Next available offset into instr_encodings_.back(). + size_t next_encoding_offset_ = 0; + + void + record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding) + { + auto it = decode_pc_.find(orig_pc); + // We record the encoding only if we don't already have the same encoding for + // the given orig_pc. + if (it != decode_pc_.end() && + // We confirm that the instruction encoding has not changed. Just in case + // the kernel is doing JIT. + it->second.second == instr_len && + memcmp(it->second.first, encoding, it->second.second) == 0) { + return; + } + if (instr_encodings_.empty() || + next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) { + instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); + next_encoding_offset_ = 0; + } + app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; + memcpy(encode_pc, encoding, instr_len); + decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len); + next_encoding_offset_ += instr_len; + } }; } // namespace drmemtrace diff --git a/clients/drcachesim/drpt2trace/drpt2trace.cpp b/clients/drcachesim/drpt2trace/drpt2trace.cpp index e1961c3e5fa..654740376d1 100644 --- a/clients/drcachesim/drpt2trace/drpt2trace.cpp +++ b/clients/drcachesim/drpt2trace/drpt2trace.cpp @@ -461,7 +461,8 @@ main(int argc, const char *argv[]) uint8_t *pt_data = pt_raw_buffer.data(); size_t pt_data_size = pt_raw_buffer.size(); - pt2ir_convert_status_t status = ptconverter->convert(pt_data, pt_data_size, drir); + pt2ir_convert_status_t status = + ptconverter->convert(pt_data, pt_data_size, &drir); if (status != PT2IR_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR." << "[error status: " << status << "]" << std::endl; @@ -521,7 +522,7 @@ main(int argc, const char *argv[]) /* Convert the PT Data to DR IR. */ pt2ir_convert_status_t status = - ptconverter->convert(pt_data, pt_data_size, drir); + ptconverter->convert(pt_data, pt_data_size, &drir); if (status != PT2IR_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR." << "[error status: " << status << "]" << std::endl; @@ -542,7 +543,7 @@ main(int argc, const char *argv[]) /* Convert the DR IR to trace entries. */ std::vector entries; ir2trace_convert_status_t ir2trace_convert_status = - ir2trace_t::convert(drir, entries); + ir2trace_t::convert(&drir, entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert DR IR to trace entries." << "[error status: " << ir2trace_convert_status << "]" << std::endl; diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index 62e36621343..57e27e931d1 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -55,16 +55,19 @@ namespace drmemtrace { #define ERRMSG_HEADER "[drpt2ir] " ir2trace_convert_status_t -ir2trace_t::convert(DR_PARAM_IN drir_t &drir, +ir2trace_t::convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity) { - if (drir.get_ilist() == NULL) { + if (drir == nullptr || drir->get_ilist() == NULL) { return IR2TRACE_CONV_ERROR_INVALID_PARAMETER; } - instr_t *instr = instrlist_first(drir.get_ilist()); + instr_t *instr = instrlist_first(drir->get_ilist()); + bool prev_was_repstr = false; while (instr != NULL) { trace_entry_t entry = {}; + entry.size = instr_length(GLOBAL_DCONTEXT, instr); + entry.addr = reinterpret_cast(instr_get_app_pc(instr)); if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) { if (instr_get_prev(instr) == nullptr || @@ -87,6 +90,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, */ entry.type = TRACE_TYPE_INSTR; if (instr_opcode_valid(instr)) { + bool cur_is_repstr = false; if (instr_is_call_direct(instr)) { entry.type = TRACE_TYPE_INSTR_DIRECT_CALL; } else if (instr_is_call_indirect(instr)) { @@ -103,15 +107,20 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, } else if (instr_get_opcode(instr) == OP_sysenter) { entry.type = TRACE_TYPE_INSTR_SYSENTER; } else if (instr_is_rep_string_op(instr)) { - entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + cur_is_repstr = true; + if (prev_was_repstr) { + entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + } else { + prev_was_repstr = true; + } + } + if (!cur_is_repstr) { + prev_was_repstr = false; } } else { VPRINT(1, "Trying to convert an invalid instruction.\n"); } - entry.size = instr_length(GLOBAL_DCONTEXT, instr); - entry.addr = (uintptr_t)instr_get_app_pc(instr); - trace.push_back(entry); instr = instr_get_next(instr); diff --git a/clients/drcachesim/drpt2trace/ir2trace.h b/clients/drcachesim/drpt2trace/ir2trace.h index 18458dc8e3c..13444881f22 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.h +++ b/clients/drcachesim/drpt2trace/ir2trace.h @@ -92,7 +92,7 @@ class ir2trace_t { * error code. */ static ir2trace_convert_status_t - convert(DR_PARAM_IN drir_t &drir, DR_PARAM_INOUT std::vector &trace, + convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity = 0); }; diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 4a5ad88f293..13ea7221daf 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -257,13 +257,13 @@ pt2ir_t::init(DR_PARAM_IN pt2ir_config_t &pt2ir_config, DR_PARAM_IN int verbosit pt2ir_convert_status_t pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size, - DR_PARAM_INOUT drir_t &drir) + DR_PARAM_INOUT drir_t *drir) { if (!pt2ir_initialized_) { return PT2IR_CONV_ERROR_NOT_INITIALIZED; } - if (pt_data == nullptr || pt_data_size <= 0) { + if (pt_data == nullptr || pt_data_size <= 0 || drir == nullptr) { return PT2IR_CONV_ERROR_INVALID_INPUT; } @@ -379,24 +379,14 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } /* Use drdecode to decode insn(pt_insn) to instr_t. */ - instr_t *instr = instr_create(drir.get_drcontext()); - instr_init(drir.get_drcontext(), instr); + instr_t *instr = instr_create(drir->get_drcontext()); + instr_init(drir->get_drcontext(), instr); instr_set_isa_mode(instr, insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64); - bool instr_valid = false; - if (decode(drir.get_drcontext(), insn.raw, instr) != nullptr) - instr_valid = true; - instr_set_translation(instr, (app_pc)insn.ip); - instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); - if (!instr_valid) { - /* The decode() function will not correctly identify the raw bits for - * invalid instruction. So we need to set the raw bits of instr manually. - */ - instr_free_raw_bits(drir.get_drcontext(), instr); - instr_set_raw_bits(instr, insn.raw, insn.size); - instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); + app_pc instr_ip = reinterpret_cast(insn.ip); + if (decode_from_copy(drir->get_drcontext(), insn.raw, instr_ip, instr) == + nullptr) { #ifdef DEBUG - /* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */ if (verbosity_ >= 1) { fprintf(stderr, @@ -409,7 +399,7 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } #endif } - drir.append(instr); + drir->append(instr, instr_ip, insn.size, insn.raw); } } return PT2IR_CONV_SUCCESS; diff --git a/clients/drcachesim/drpt2trace/pt2ir.h b/clients/drcachesim/drpt2trace/pt2ir.h index a1e2f49f01e..02ec9a0a4f6 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.h +++ b/clients/drcachesim/drpt2trace/pt2ir.h @@ -365,7 +365,7 @@ class pt2ir_t { */ pt2ir_convert_status_t convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size, - DR_PARAM_INOUT drir_t &drir); + DR_PARAM_INOUT drir_t *drir); private: /* Diagnose converting errors and output diagnostic results. diff --git a/clients/drcachesim/drpt2trace/test_simple.expect b/clients/drcachesim/drpt2trace/test_simple.expect deleted file mode 100644 index c55837c430b..00000000000 --- a/clients/drcachesim/drpt2trace/test_simple.expect +++ /dev/null @@ -1,14 +0,0 @@ -TAG 0x0000000000000000 - +0 L2 b8 01 00 00 00 mov $0x00000001 -> %eax - +5 L2 bf 01 00 00 00 mov $0x00000001 -> %edi - +10 L2 48 be 00 20 40 00 00 mov $0x0000000000402000 -> %rsi - 00 00 00 - +20 L2 ba 0e 00 00 00 mov $0x0000000e -> %edx - +25 L2 0f 05 syscall -> %rcx %r11 - +27 L2 b8 3c 00 00 00 mov $0x0000003c -> %eax - +32 L2 bf 00 00 00 00 mov $0x00000000 -> %edi - +37 L2 0f 05 syscall -> %rcx %r11 -END 0x0000000000000000 - -Number of Instructions: 8 -Number of Trace Entries: 8 diff --git a/clients/drcachesim/drpt2trace/test_simple.templatex b/clients/drcachesim/drpt2trace/test_simple.templatex new file mode 100644 index 00000000000..7dc05ea81df --- /dev/null +++ b/clients/drcachesim/drpt2trace/test_simple.templatex @@ -0,0 +1,14 @@ +TAG 0x0000000000000000 + \+0 L3 .* mov \$0x00000001 -> %eax + \+5 L3 .* mov \$0x00000001 -> %edi + \+10 L3 .* \$0x0000000000402000 -> %rsi + .* + \+20 L3 .* mov \$0x0000000e -> %edx + \+25 L3 .* syscall -> %rcx %r11 + \+27 L3 .* mov \$0x0000003c -> %eax + \+32 L3 .* mov \$0x00000000 -> %edi + \+37 L3 .* syscall -> %rcx %r11 +END 0x0000000000000000 +.* +Number of Instructions: 8 +Number of Trace Entries: 8 diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index 1cd157ab44a..783d2a44bb8 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -321,8 +321,7 @@ reader_t::process_input_entry() version_ = cur_ref_.marker.marker_value; else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { filetype_ = cur_ref_.marker.marker_value; - if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_) && - !TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, filetype_)) { + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) { expect_no_encodings_ = false; } } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE) diff --git a/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex new file mode 100644 index 00000000000..ce75a56588d --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex @@ -0,0 +1,6 @@ +Hello, world! +Opcode mix tool results: +.*: total executed instructions +.* +.*: .*clac +.* diff --git a/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex new file mode 100644 index 00000000000..8582336ef38 --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex @@ -0,0 +1,6 @@ +Hello, world! +Syscall mix tool results: + syscall count : syscall_num +.* + syscall trace count : syscall_num +.* diff --git a/clients/drcachesim/tests/offline-syscall-mix.templatex b/clients/drcachesim/tests/offline-syscall-mix.templatex index 231b6bb93d2..a02a9ca3e1e 100644 --- a/clients/drcachesim/tests/offline-syscall-mix.templatex +++ b/clients/drcachesim/tests/offline-syscall-mix.templatex @@ -1,4 +1,4 @@ Hello, world! Syscall mix tool results: - count : syscall_num + syscall count : syscall_num ( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tests/syscall-mix.templatex b/clients/drcachesim/tests/syscall-mix.templatex index 59ae08aad9e..e58b2ec7da2 100644 --- a/clients/drcachesim/tests/syscall-mix.templatex +++ b/clients/drcachesim/tests/syscall-mix.templatex @@ -1,5 +1,5 @@ Hello, world! ---- ---- Syscall mix tool results: - count : syscall_num + syscall count : syscall_num ( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tools/syscall_mix.cpp b/clients/drcachesim/tools/syscall_mix.cpp index 2885e37e364..a18c6f1e5ed 100644 --- a/clients/drcachesim/tools/syscall_mix.cpp +++ b/clients/drcachesim/tools/syscall_mix.cpp @@ -111,14 +111,21 @@ bool syscall_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { shard_data_t *shard = reinterpret_cast(shard_data); - if (memref.marker.type != TRACE_TYPE_MARKER || - memref.marker.marker_type != TRACE_MARKER_TYPE_SYSCALL) - return true; - int syscall_num = static_cast(memref.marker.marker_value); + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) { + int syscall_num = static_cast(memref.marker.marker_value); #ifdef X64 - assert(static_cast(syscall_num) == memref.marker.marker_value); + assert(static_cast(syscall_num) == memref.marker.marker_value); #endif - ++shard->syscall_counts[syscall_num]; + ++shard->syscall_counts[syscall_num]; + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START) { + int syscall_num = static_cast(memref.marker.marker_value); +#ifdef X64 + assert(static_cast(syscall_num) == memref.marker.marker_value); +#endif + ++shard->syscall_trace_counts[syscall_num]; + } return true; } @@ -142,6 +149,8 @@ syscall_mix_t::process_memref(const memref_t &memref) static bool cmp_second_val(const std::pair &l, const std::pair &r) { + if (l.second == r.second) + return l.first > r.first; return l.second > r.second; } @@ -156,10 +165,13 @@ syscall_mix_t::print_results() for (const auto &keyvals : shard.second->syscall_counts) { total.syscall_counts[keyvals.first] += keyvals.second; } + for (const auto &keyvals : shard.second->syscall_trace_counts) { + total.syscall_trace_counts[keyvals.first] += keyvals.second; + } } } std::cerr << TOOL_NAME << " results:\n"; - std::cerr << std::setw(15) << "count" + std::cerr << std::setw(15) << "syscall count" << " : " << std::setw(9) << "syscall_num\n"; std::vector> sorted(total.syscall_counts.begin(), total.syscall_counts.end()); @@ -170,6 +182,19 @@ syscall_mix_t::print_results() std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) << keyvals.first << "\n"; } + if (!total.syscall_trace_counts.empty()) { + std::cerr << std::setw(20) << "syscall trace count" + << " : " << std::setw(9) << "syscall_num\n"; + std::vector> sorted_trace( + total.syscall_trace_counts.begin(), total.syscall_trace_counts.end()); + std::sort(sorted_trace.begin(), sorted_trace.end(), cmp_second_val); + for (const auto &keyvals : sorted_trace) { + // XXX: It would be nicer to print the system call name string instead + // of its number. + std::cerr << std::setw(20) << keyvals.second << " : " << std::setw(9) + << keyvals.first << "\n"; + } + } return true; } diff --git a/clients/drcachesim/tools/syscall_mix.h b/clients/drcachesim/tools/syscall_mix.h index 7dc42a1a3c4..04cfb449f81 100644 --- a/clients/drcachesim/tools/syscall_mix.h +++ b/clients/drcachesim/tools/syscall_mix.h @@ -71,6 +71,7 @@ class syscall_mix_t : public analysis_tool_t { protected: struct shard_data_t { std::unordered_map syscall_counts; + std::unordered_map syscall_trace_counts; std::string error; }; diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 2c77e04d682..3a07e944671 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -418,10 +418,12 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) // Handled above. break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_START: - std::cerr << "\n"; + std::cerr << "\n"; break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_END: - std::cerr << "\n"; + std::cerr << "\n"; break; case TRACE_MARKER_TYPE_BRANCH_TARGET: // These are not expected to be visible (since the reader adds them diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index c6dd6439d93..f38c9e079b2 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1008,9 +1008,12 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } /* Convert the PT Data to DR IR. */ - drir_t drir(GLOBAL_DCONTEXT); - pt2ir_convert_status_t pt2ir_convert_status = - tdata->pt2ir.convert(pt_data->data.get(), pt_data_size, drir); + if (tdata->pt_decode_state_ == nullptr) { + tdata->pt_decode_state_ = std::unique_ptr(new drir_t(GLOBAL_DCONTEXT)); + } + tdata->pt_decode_state_->clear_ilist(); + pt2ir_convert_status_t pt2ir_convert_status = tdata->pt2ir.convert( + pt_data->data.get(), pt_data_size, tdata->pt_decode_state_.get()); if (pt2ir_convert_status != PT2IR_CONV_SUCCESS) { tdata->error = "Failed to convert PT raw trace to DR IR [error status: " + std::to_string(pt2ir_convert_status) + "]"; @@ -1018,13 +1021,15 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } /* Convert the DR IR to trace entries. */ + addr_t sysnum = + pt_data->header[dynamorio::drmemtrace::PDB_HEADER_SYSNUM_IDX].sysnum.sysnum; std::vector entries; trace_entry_t start_entry = { .type = TRACE_TYPE_MARKER, .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_START, - .addr = 0 }; + .addr = sysnum }; entries.push_back(start_entry); ir2trace_convert_status_t ir2trace_convert_status = - ir2trace_t::convert(drir, entries); + ir2trace_t::convert(tdata->pt_decode_state_.get(), entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { tdata->error = "Failed to convert DR IR to trace entries [error status: " + std::to_string(ir2trace_convert_status) + "]"; @@ -1032,7 +1037,7 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } trace_entry_t end_entry = { .type = TRACE_TYPE_MARKER, .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_END, - .addr = 0 }; + .addr = sysnum }; entries.push_back(end_entry); if (entries.size() == 2) { tdata->error = "No trace entries generated from PT data"; @@ -1040,17 +1045,40 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } accumulate_to_statistic(tdata, RAW2TRACE_STAT_SYSCALL_TRACES_DECODED, 1); + app_pc saved_decode_pc; + trace_entry_t entries_with_encodings[WRITE_BUFFER_SIZE]; + trace_entry_t *buf = entries_with_encodings; for (const auto &entry : entries) { - if (type_is_instr(static_cast(entry.type))) + if (type_is_instr(static_cast(entry.type))) { + if (buf != entries_with_encodings) { + if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) { + return false; + } + buf = entries_with_encodings; + } accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1); + // The per-thread drir_t object (pt_decode_state_) keeps instr encoding + // state across system calls. So different dynamic instances of the same + // instruction in system calls will have the same decode_pc. + saved_decode_pc = tdata->pt_decode_state_->get_decode_pc( + reinterpret_cast(entry.addr)); + if (saved_decode_pc == nullptr) { + tdata->error = + "Unknown pc after ir2trace: did ir2trace insert new instr?"; + return false; + } + if (!append_encoding(tdata, saved_decode_pc, entry.size, buf, + entries_with_encodings)) + return false; + } + *buf = entry; + ++buf; } - - if (!tdata->out_file->write(reinterpret_cast(entries.data()), - sizeof(trace_entry_t) * entries.size())) { - tdata->error = "Failed to write to output file"; - return false; + if (buf != entries_with_encodings) { + if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) { + return false; + } } - return true; } #endif diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index dfdbd333cad..6c451242284 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1080,6 +1080,7 @@ class raw2trace_t { std::vector rseq_decode_pcs_; #ifdef BUILD_PT_POST_PROCESSOR + std::unique_ptr pt_decode_state_ = nullptr; std::istream *kthread_file; bool pt_metadata_processed = false; pt2ir_t pt2ir; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 1d60cb6d480..1ecdc74f66d 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4554,7 +4554,7 @@ if (BUILD_CLIENTS) if (proc_supports_pt) if (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) get_target_path_for_execution(drpt2trace_path drpt2trace "${location_suffix}") - macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args) + macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args sim_atops) set(testname_full "tool.drcacheoff.kernel.${testname}_SUDO") torunonly_ci(${testname_full} ${exetgt} drcachesim "offline-kernel-${testname}.c" # for templatex basename @@ -4571,13 +4571,18 @@ if (BUILD_CLIENTS) set(${testname_full}_precmd "foreach@${cmd_pfx}${CMAKE_COMMAND}@-E@remove_directory@${testname_full}.*.dir") set(${testname_full}_postcmd - "firstglob@${cmd_pfx}${drcachesim_path}@-simulator_type@basic_counts@-indir@${testname_full}.*.dir${sim_atops}") + "firstglob@${cmd_pfx}${drcachesim_path}@-indir@${testname_full}.*.dir${sim_atops}") endmacro () # We use '-raw_compress none' because when snappy or lz4 is used for raw traces, # the check that complains about malloc use in the client is disabled by invoking # dr_allow_unsafe_static_behavior. We want to perform this check on the kernel # tracing flow. - torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "") + torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@basic_counts") + torunonly_drcacheoff_kernel(opcode-mix ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@opcode_mix") + torunonly_drcacheoff_kernel(syscall-mix ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@syscall_mix") endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (proc_supports_pt) @@ -4704,7 +4709,7 @@ if (BUILD_CLIENTS) "-sb_sysroot" "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw") torunonly_api(tool.drpt2trace.sideband drpt2trace - "../../clients/drcachesim/drpt2trace/test_simple.expect" + "../../clients/drcachesim/drpt2trace/test_simple.templatex" "" "${drpt2trace_sideband_args}" ON OFF) set(drpt2trace_elf_args ${drpt2trace_commong} "-mode" "ELF" @@ -4712,7 +4717,7 @@ if (BUILD_CLIENTS) "-elf" "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw/hello") torunonly_api(tool.drpt2trace.elf drpt2trace - "../../clients/drcachesim/drpt2trace/test_simple.expect" + "../../clients/drcachesim/drpt2trace/test_simple.templatex" "" "${drpt2trace_elf_args}" ON OFF) endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (BUILD_CLIENTS)