From 626ffabe70787bca971de30c39390d619cb354a5 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 28 Nov 2023 13:07:04 -0500 Subject: [PATCH 01/16] i#5505 kernel tracing: Add syscall instr encodings Adds encodings for kernel system call instructions to the trace in raw2trace. Kernel system call traces are decoded using libipt which also provides the instruction encodings. We write these encodings to a new buffer which is re-used for all dynamic instances of that instr. Adds a new drcachesim option to ignore failures in decoding instructions. This is currently required because the kernel traces have opcodes that are not yet recognized. Such decode failures are not critical because their impact is limited (not counting unsupported instrs in the opcode_mix tool, or not showing unsupported instrs in the view tool) unlike user-space instrs where missing decoder support may lead to drreg issues etc. Uses the new option in opcode_mix and the view tool. Adds support in the syscall_mix tool to report the counts of each system call's trace also. Issue: #5505 --- clients/drcachesim/analyzer_multi.cpp | 6 +- clients/drcachesim/common/options.cpp | 8 +++ clients/drcachesim/common/options.h | 1 + clients/drcachesim/drpt2trace/ir2trace.cpp | 12 +++- .../tests/offline-kernel-opcode_mix.templatex | 7 ++ clients/drcachesim/tools/opcode_mix.cpp | 27 ++++--- clients/drcachesim/tools/opcode_mix.h | 4 +- clients/drcachesim/tools/opcode_mix_create.h | 3 +- clients/drcachesim/tools/syscall_mix.cpp | 41 ++++++++--- clients/drcachesim/tools/syscall_mix.h | 1 + clients/drcachesim/tools/view.cpp | 20 ++++-- clients/drcachesim/tools/view.h | 3 +- clients/drcachesim/tools/view_create.h | 3 +- clients/drcachesim/tracer/raw2trace.cpp | 71 ++++++++++++++++--- clients/drcachesim/tracer/raw2trace.h | 13 ++++ suite/tests/CMakeLists.txt | 9 ++- 16 files changed, 188 insertions(+), 41 deletions(-) create mode 100644 clients/drcachesim/tests/offline-kernel-opcode_mix.templatex diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index 11ba9ef3b23..1bb4277938a 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -410,7 +410,8 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator return nullptr; } return opcode_mix_tool_create(module_file_path, op_verbose.get_value(), - op_alt_module_dir.get_value()); + op_alt_module_dir.get_value(), + op_ignore_decode_failure.get_value()); } else if (simulator_type == SYSCALL_MIX) { return syscall_mix_tool_create(op_verbose.get_value()); } else if (simulator_type == VIEW) { @@ -418,7 +419,8 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator // The module file is optional so we don't check for emptiness. return view_tool_create(module_file_path, op_skip_refs.get_value(), op_sim_refs.get_value(), op_view_syntax.get_value(), - op_verbose.get_value(), op_alt_module_dir.get_value()); + op_verbose.get_value(), op_alt_module_dir.get_value(), + op_ignore_decode_failure.get_value()); } else if (simulator_type == FUNC_VIEW) { std::string funclist_file_path = get_aux_file_path( op_funclist_file.get_value(), DRMEMTRACE_FUNCTION_LIST_FILENAME); diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 53fceb2c03d..2aa6db8b56b 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -117,6 +117,14 @@ droption_t op_alt_module_dir( "analysis tools, or in the raw modules file for post-prcoessing of offline " "raw trace files. This directory takes precedence over the recorded path."); +droption_t op_ignore_decode_failure( + DROPTION_SCOPE_FRONTEND, "ignore_decode_failure", false, + "Whether instruction decode failures should be ignored", + "Specifies whether failures to decode the instruction encodings should be ignored by " + "the analysis tools. This is useful especially when decoding kernel traces collected " + "by Intel-PT where decode failures may not indicate a critical issue and can be " + "moved past."); + droption_t op_chunk_instr_count( DROPTION_SCOPE_FRONTEND, "chunk_instr_count", bytesize_t(10 * 1000 * 1000U), // We do not support tiny chunks. We do not support disabling chunks with a 0 diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 8deff88ea74..4817ad03c8d 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -92,6 +92,7 @@ extern dynamorio::droption::droption_t op_infile; extern dynamorio::droption::droption_t op_indir; extern dynamorio::droption::droption_t op_module_file; extern dynamorio::droption::droption_t op_alt_module_dir; +extern dynamorio::droption::droption_t op_ignore_decode_failure; extern dynamorio::droption::droption_t op_chunk_instr_count; extern dynamorio::droption::droption_t op_instr_encodings; diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index 62e36621343..1f709eb5fb9 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -63,6 +63,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, return IR2TRACE_CONV_ERROR_INVALID_PARAMETER; } instr_t *instr = instrlist_first(drir.get_ilist()); + bool prev_was_repstr = false; while (instr != NULL) { trace_entry_t entry = {}; @@ -87,6 +88,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, */ entry.type = TRACE_TYPE_INSTR; if (instr_opcode_valid(instr)) { + bool cur_is_repstr = false; if (instr_is_call_direct(instr)) { entry.type = TRACE_TYPE_INSTR_DIRECT_CALL; } else if (instr_is_call_indirect(instr)) { @@ -103,7 +105,15 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, } else if (instr_get_opcode(instr) == OP_sysenter) { entry.type = TRACE_TYPE_INSTR_SYSENTER; } else if (instr_is_rep_string_op(instr)) { - entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + cur_is_repstr = true; + if (prev_was_repstr) { + entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + } else { + prev_was_repstr = true; + } + } + if (!cur_is_repstr) { + prev_was_repstr = false; } } else { VPRINT(1, "Trying to convert an invalid instruction.\n"); diff --git a/clients/drcachesim/tests/offline-kernel-opcode_mix.templatex b/clients/drcachesim/tests/offline-kernel-opcode_mix.templatex new file mode 100644 index 00000000000..c8949559467 --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-opcode_mix.templatex @@ -0,0 +1,7 @@ +Hello, world! +Opcode mix tool results: +.*: total executed instructions +.*:.* +.* +.*: .*syscall +.* diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 5389829c5ac..94fe322f4d4 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -64,19 +64,22 @@ namespace dynamorio { namespace drmemtrace { const std::string opcode_mix_t::TOOL_NAME = "Opcode mix tool"; +constexpr int kInvalidOpcode = -1; analysis_tool_t * opcode_mix_tool_create(const std::string &module_file_path, unsigned int verbose, - const std::string &alt_module_dir) + const std::string &alt_module_dir, bool ignore_decode_failure) { - return new opcode_mix_t(module_file_path, verbose, alt_module_dir); + return new opcode_mix_t(module_file_path, verbose, alt_module_dir, + ignore_decode_failure); } opcode_mix_t::opcode_mix_t(const std::string &module_file_path, unsigned int verbose, - const std::string &alt_module_dir) + const std::string &alt_module_dir, bool ignore_decode_failure) : module_file_path_(module_file_path) , knob_verbose_(verbose) , knob_alt_module_dir_(alt_module_dir) + , knob_ignore_decode_failure_(ignore_decode_failure) { } @@ -219,12 +222,16 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) app_pc next_pc = decode_from_copy(dcontext_.dcontext, decode_pc, trace_pc, &instr); if (next_pc == NULL || !instr_valid(&instr)) { - instr_free(dcontext_.dcontext, &instr); - shard->error = - "Failed to decode instruction " + to_hex_string(memref.instr.addr); - return false; + if (!knob_ignore_decode_failure_) { + instr_free(dcontext_.dcontext, &instr); + shard->error = + "Failed to decode instruction " + to_hex_string(memref.instr.addr); + return false; + } + opcode = kInvalidOpcode; + } else { + opcode = instr_get_opcode(&instr); } - opcode = instr_get_opcode(&instr); shard->worker->opcode_cache[trace_pc] = opcode; instr_free(dcontext_.dcontext, &instr); } @@ -276,7 +283,9 @@ opcode_mix_t::print_results() std::sort(sorted.begin(), sorted.end(), cmp_val); for (const auto &keyvals : sorted) { std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) - << decode_opcode_name(keyvals.first) << "\n"; + << (keyvals.first == kInvalidOpcode ? "" + : decode_opcode_name(keyvals.first)) + << "\n"; } return true; } diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index 062de8e6a5b..2d1d1989166 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -58,7 +58,8 @@ class opcode_mix_t : public analysis_tool_t { // XXX: Once we update our toolchains to guarantee C++17 support we could use // std::optional here. opcode_mix_t(const std::string &module_file_path, unsigned int verbose, - const std::string &alt_module_dir = ""); + const std::string &alt_module_dir = "", + bool ignore_decode_failure = false); virtual ~opcode_mix_t(); std::string initialize() override; @@ -141,6 +142,7 @@ class opcode_mix_t : public analysis_tool_t { std::mutex shard_map_mutex_; unsigned int knob_verbose_; std::string knob_alt_module_dir_; + bool knob_ignore_decode_failure_; static const std::string TOOL_NAME; // For serial operation. worker_data_t serial_worker_; diff --git a/clients/drcachesim/tools/opcode_mix_create.h b/clients/drcachesim/tools/opcode_mix_create.h index d398d295b1c..9bc38741f8a 100644 --- a/clients/drcachesim/tools/opcode_mix_create.h +++ b/clients/drcachesim/tools/opcode_mix_create.h @@ -54,7 +54,8 @@ namespace drmemtrace { */ analysis_tool_t * opcode_mix_tool_create(const std::string &module_file_path, unsigned int verbose = 0, - const std::string &alt_module_dir = ""); + const std::string &alt_module_dir = "", + bool ignore_decode_failure = false); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/syscall_mix.cpp b/clients/drcachesim/tools/syscall_mix.cpp index 2885e37e364..d2b5c90a309 100644 --- a/clients/drcachesim/tools/syscall_mix.cpp +++ b/clients/drcachesim/tools/syscall_mix.cpp @@ -111,14 +111,21 @@ bool syscall_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { shard_data_t *shard = reinterpret_cast(shard_data); - if (memref.marker.type != TRACE_TYPE_MARKER || - memref.marker.marker_type != TRACE_MARKER_TYPE_SYSCALL) - return true; - int syscall_num = static_cast(memref.marker.marker_value); + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) { + int syscall_num = static_cast(memref.marker.marker_value); +#ifdef X64 + assert(static_cast(syscall_num) == memref.marker.marker_value); +#endif + ++shard->syscall_counts[syscall_num]; + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START) { + int syscall_num = static_cast(memref.marker.marker_value); #ifdef X64 - assert(static_cast(syscall_num) == memref.marker.marker_value); + assert(static_cast(syscall_num) == memref.marker.marker_value); #endif - ++shard->syscall_counts[syscall_num]; + ++shard->syscall_trace_counts[syscall_num]; + } return true; } @@ -142,7 +149,9 @@ syscall_mix_t::process_memref(const memref_t &memref) static bool cmp_second_val(const std::pair &l, const std::pair &r) { - return l.second > r.second; + if (l.second > r.second) + return true; + return l.first > r.first; } bool @@ -156,10 +165,13 @@ syscall_mix_t::print_results() for (const auto &keyvals : shard.second->syscall_counts) { total.syscall_counts[keyvals.first] += keyvals.second; } + for (const auto &keyvals : shard.second->syscall_trace_counts) { + total.syscall_trace_counts[keyvals.first] += keyvals.second; + } } } std::cerr << TOOL_NAME << " results:\n"; - std::cerr << std::setw(15) << "count" + std::cerr << std::setw(15) << "syscall count" << " : " << std::setw(9) << "syscall_num\n"; std::vector> sorted(total.syscall_counts.begin(), total.syscall_counts.end()); @@ -170,6 +182,19 @@ syscall_mix_t::print_results() std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) << keyvals.first << "\n"; } + if (!total.syscall_trace_counts.empty()) { + std::cerr << std::setw(15) << "syscall trace count" + << " : " << std::setw(9) << "syscall_num\n"; + std::vector> sorted_trace( + total.syscall_trace_counts.begin(), total.syscall_trace_counts.end()); + std::sort(sorted_trace.begin(), sorted_trace.end(), cmp_second_val); + for (const auto &keyvals : sorted_trace) { + // XXX: It would be nicer to print the system call name string instead + // of its number. + std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) + << keyvals.first << "\n"; + } + } return true; } diff --git a/clients/drcachesim/tools/syscall_mix.h b/clients/drcachesim/tools/syscall_mix.h index 7dc42a1a3c4..04cfb449f81 100644 --- a/clients/drcachesim/tools/syscall_mix.h +++ b/clients/drcachesim/tools/syscall_mix.h @@ -71,6 +71,7 @@ class syscall_mix_t : public analysis_tool_t { protected: struct shard_data_t { std::unordered_map syscall_counts; + std::unordered_map syscall_trace_counts; std::string error; }; diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 2c77e04d682..4eb829e29c2 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -65,15 +65,15 @@ const std::string view_t::TOOL_NAME = "View tool"; analysis_tool_t * view_tool_create(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, - const std::string &alt_module_dir) + const std::string &alt_module_dir, bool ignore_decode_failure) { return new view_t(module_file_path, skip_refs, sim_refs, syntax, verbose, - alt_module_dir); + alt_module_dir, ignore_decode_failure); } view_t::view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, - const std::string &alt_module_dir) + const std::string &alt_module_dir, bool ignore_decode_failure) : module_file_path_(module_file_path) , knob_verbose_(verbose) , trace_version_(-1) @@ -88,6 +88,7 @@ view_t::view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t , filetype_(-1) , timestamp_(0) , has_modules_(true) + , knob_ignore_decode_failure_(ignore_decode_failure) { } @@ -553,10 +554,17 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) /*show_bytes=*/true, buf, BUFFER_SIZE_ELEMENTS(buf), /*printed=*/nullptr); if (next_pc == nullptr) { - error_string_ = "Failed to disassemble " + to_hex_string(memref.instr.addr); - return false; + if (!knob_ignore_decode_failure_) { + error_string_ = + "Failed to disassemble " + to_hex_string(memref.instr.addr); + return false; + } + // We still print the limited bytes output and a string saying + // "". + disasm = buf; + } else { + disasm = buf; } - disasm = buf; disasm_cache_.insert({ orig_pc, disasm }); } // Add branch decoration, which varies and so can't be cached purely by PC. diff --git a/clients/drcachesim/tools/view.h b/clients/drcachesim/tools/view.h index 30d5f033e14..fb5581f6c01 100644 --- a/clients/drcachesim/tools/view.h +++ b/clients/drcachesim/tools/view.h @@ -60,7 +60,7 @@ class view_t : public analysis_tool_t { // std::optional here. view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, - const std::string &alt_module_dir = ""); + const std::string &alt_module_dir = "", bool ignore_decode_failure = false); std::string initialize_stream(memtrace_stream_t *serial_stream) override; bool @@ -161,6 +161,7 @@ class view_t : public analysis_tool_t { int64_t filetype_record_ord_ = -1; bool has_modules_; memtrace_stream_t *serial_stream_ = nullptr; + bool knob_ignore_decode_failure_; private: static constexpr int RECORD_COLUMN_WIDTH = 12; diff --git a/clients/drcachesim/tools/view_create.h b/clients/drcachesim/tools/view_create.h index ebe8a811f6a..020198b5139 100644 --- a/clients/drcachesim/tools/view_create.h +++ b/clients/drcachesim/tools/view_create.h @@ -54,7 +54,8 @@ namespace drmemtrace { analysis_tool_t * view_tool_create(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose = 0, - const std::string &alt_module_dir = ""); + const std::string &alt_module_dir = "", + bool ignore_decode_failure = false); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index c6dd6439d93..9de4fb5df49 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1016,12 +1016,16 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall std::to_string(pt2ir_convert_status) + "]"; return false; } - + if (!track_syscall_pt_encodings(tdata, drir.get_ilist())) { + return false; + } /* Convert the DR IR to trace entries. */ + addr_t sysnum = + pt_data->header[dynamorio::drmemtrace::PDB_HEADER_SYSNUM_IDX].sysnum.sysnum; std::vector entries; trace_entry_t start_entry = { .type = TRACE_TYPE_MARKER, .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_START, - .addr = 0 }; + .addr = sysnum }; entries.push_back(start_entry); ir2trace_convert_status_t ir2trace_convert_status = ir2trace_t::convert(drir, entries); @@ -1032,7 +1036,7 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } trace_entry_t end_entry = { .type = TRACE_TYPE_MARKER, .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_END, - .addr = 0 }; + .addr = sysnum }; entries.push_back(end_entry); if (entries.size() == 2) { tdata->error = "No trace entries generated from PT data"; @@ -1040,19 +1044,70 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } accumulate_to_statistic(tdata, RAW2TRACE_STAT_SYSCALL_TRACES_DECODED, 1); + std::vector decode_pcs; for (const auto &entry : entries) { - if (type_is_instr(static_cast(entry.type))) + if (type_is_instr(static_cast(entry.type))) { + app_pc instr_pc = reinterpret_cast(entry.addr); accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1); + if (tdata->syscall_pc_to_decode_pc_.find(instr_pc) == + tdata->syscall_pc_to_decode_pc_.end()) { + tdata->error = + "Unknown pc after ir2trace: did ir2trace insert new instr?"; + return false; + } + decode_pcs.push_back(tdata->syscall_pc_to_decode_pc_[instr_pc].first); + } } - - if (!tdata->out_file->write(reinterpret_cast(entries.data()), - sizeof(trace_entry_t) * entries.size())) { - tdata->error = "Failed to write to output file"; + if (!write(tdata, entries.data(), entries.data() + entries.size(), decode_pcs.data(), + decode_pcs.size())) { return false; } return true; } + +bool +raw2trace_t::track_syscall_pt_encodings(raw2trace_thread_data_t *tdata, + instrlist_t *ilist) +{ + instr_t *instr = instrlist_first(ilist); + while (instr != NULL) { + app_pc orig_pc = instr_get_app_pc(instr); + int instr_len = instr_length(dcontext_, instr); + if (tdata->syscall_instr_encodings_.empty() || + tdata->syscall_next_encoding_offset_ + instr_len >= + SYSCALL_PT_ENCODING_BUF_SIZE) { + // We allocate new memory to store kernel instruction encodings in + // increments of SYSCALL_PT_ENCODING_BUF_SIZE. Note that we cannot + // treat this like a cache and clear previously stored encodings + // because any re-use in the decode address will confuse our write() + // logic (remember that we key encodings using the decode pc). + tdata->syscall_instr_encodings_.emplace_back( + new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); + tdata->syscall_next_encoding_offset_ = 0; + } + app_pc encode_pc = + &tdata->syscall_instr_encodings_.back()[tdata->syscall_next_encoding_offset_]; + byte *ret = instr_encode_to_copy(dcontext_, instr, encode_pc, orig_pc); + if (ret == NULL) { + tdata->error = "Failed to encode a system call kernel instruction."; + return false; + } + DR_ASSERT(ret - encode_pc == instr_len); + auto it = tdata->syscall_pc_to_decode_pc_.find(orig_pc); + if (it == tdata->syscall_pc_to_decode_pc_.end() || + // We confirm that the instruction encoding has not changed. Just in case + // the kernel is doing JIT. + it->second.second != instr_len || + memcmp(it->second.first, encode_pc, it->second.second) != 0) { + tdata->syscall_next_encoding_offset_ += instr_len; + tdata->syscall_pc_to_decode_pc_[orig_pc] = + std::make_pair(encode_pc, instr_len); + } + instr = instr_get_next(instr); + } + return true; +} #endif bool diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index dfdbd333cad..cc969cb4cd3 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1080,6 +1080,11 @@ class raw2trace_t { std::vector rseq_decode_pcs_; #ifdef BUILD_PT_POST_PROCESSOR +# define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024 * 10) + std::unordered_map> syscall_pc_to_decode_pc_; + std::vector> syscall_instr_encodings_; + size_t syscall_next_encoding_offset_ = 0; + std::istream *kthread_file; bool pt_metadata_processed = false; pt2ir_t pt2ir; @@ -1182,6 +1187,14 @@ class raw2trace_t { */ bool process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall_idx); + + /** + * Records the encodings for all instructions in the given ilist. Note that + * kernel system call traces gathered using Intel-PT are decoded using libipt + * which also provides the instr encodings. + */ + bool + track_syscall_pt_encodings(raw2trace_thread_data_t *tdata, instrlist_t *ilist); #endif /** diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index f19742abd65..8a3d330f7e4 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4539,7 +4539,7 @@ if (BUILD_CLIENTS) if (proc_supports_pt) if (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) get_target_path_for_execution(drpt2trace_path drpt2trace "${location_suffix}") - macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args) + macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args sim_atops) set(testname_full "tool.drcacheoff.kernel.${testname}_SUDO") torunonly_ci(${testname_full} ${exetgt} drcachesim "offline-kernel-${testname}.c" # for templatex basename @@ -4556,13 +4556,16 @@ if (BUILD_CLIENTS) set(${testname_full}_precmd "foreach@${cmd_pfx}${CMAKE_COMMAND}@-E@remove_directory@${testname_full}.*.dir") set(${testname_full}_postcmd - "firstglob@${cmd_pfx}${drcachesim_path}@-simulator_type@basic_counts@-indir@${testname_full}.*.dir${sim_atops}") + "firstglob@${cmd_pfx}${drcachesim_path}@-indir@${testname_full}.*.dir${sim_atops}") endmacro () # We use '-raw_compress none' because when snappy or lz4 is used for raw traces, # the check that complains about malloc use in the client is disabled by invoking # dr_allow_unsafe_static_behavior. We want to perform this check on the kernel # tracing flow. - torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "") + torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@basic_counts@-ignore_decode_failure") + torunonly_drcacheoff_kernel(opcode_mix ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@opcode_mix@-ignore_decode_failure") endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (proc_supports_pt) From d89c89abf712dacd1b0b04d3182a5672ccbb9ffd Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 28 Nov 2023 15:18:44 -0500 Subject: [PATCH 02/16] append_encoding before passing entries to write. Add syscall_mix test. --- clients/drcachesim/reader/reader.cpp | 3 +-- ...ex => offline-kernel-opcode-mix.templatex} | 0 .../offline-kernel-syscall-mix.templatex | 6 +++++ .../tests/offline-syscall-mix.templatex | 2 +- .../drcachesim/tests/syscall-mix.templatex | 2 +- clients/drcachesim/tools/syscall_mix.cpp | 4 ++-- clients/drcachesim/tracer/raw2trace.cpp | 22 +++++++++++++------ suite/tests/CMakeLists.txt | 4 +++- 8 files changed, 29 insertions(+), 14 deletions(-) rename clients/drcachesim/tests/{offline-kernel-opcode_mix.templatex => offline-kernel-opcode-mix.templatex} (100%) create mode 100644 clients/drcachesim/tests/offline-kernel-syscall-mix.templatex diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index 1cd157ab44a..783d2a44bb8 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -321,8 +321,7 @@ reader_t::process_input_entry() version_ = cur_ref_.marker.marker_value; else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { filetype_ = cur_ref_.marker.marker_value; - if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_) && - !TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, filetype_)) { + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) { expect_no_encodings_ = false; } } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE) diff --git a/clients/drcachesim/tests/offline-kernel-opcode_mix.templatex b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex similarity index 100% rename from clients/drcachesim/tests/offline-kernel-opcode_mix.templatex rename to clients/drcachesim/tests/offline-kernel-opcode-mix.templatex diff --git a/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex new file mode 100644 index 00000000000..82de6c517a0 --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex @@ -0,0 +1,6 @@ +Hello, world! +Syscall mix tool results: + syscall count : syscall_num +( *[1-9][0-9]* : *[0-9]*.*)+ + syscall trace count : syscall_num +( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tests/offline-syscall-mix.templatex b/clients/drcachesim/tests/offline-syscall-mix.templatex index 231b6bb93d2..a02a9ca3e1e 100644 --- a/clients/drcachesim/tests/offline-syscall-mix.templatex +++ b/clients/drcachesim/tests/offline-syscall-mix.templatex @@ -1,4 +1,4 @@ Hello, world! Syscall mix tool results: - count : syscall_num + syscall count : syscall_num ( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tests/syscall-mix.templatex b/clients/drcachesim/tests/syscall-mix.templatex index 59ae08aad9e..e58b2ec7da2 100644 --- a/clients/drcachesim/tests/syscall-mix.templatex +++ b/clients/drcachesim/tests/syscall-mix.templatex @@ -1,5 +1,5 @@ Hello, world! ---- ---- Syscall mix tool results: - count : syscall_num + syscall count : syscall_num ( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tools/syscall_mix.cpp b/clients/drcachesim/tools/syscall_mix.cpp index d2b5c90a309..1b9c1d47653 100644 --- a/clients/drcachesim/tools/syscall_mix.cpp +++ b/clients/drcachesim/tools/syscall_mix.cpp @@ -183,7 +183,7 @@ syscall_mix_t::print_results() << keyvals.first << "\n"; } if (!total.syscall_trace_counts.empty()) { - std::cerr << std::setw(15) << "syscall trace count" + std::cerr << std::setw(20) << "syscall trace count" << " : " << std::setw(9) << "syscall_num\n"; std::vector> sorted_trace( total.syscall_trace_counts.begin(), total.syscall_trace_counts.end()); @@ -191,7 +191,7 @@ syscall_mix_t::print_results() for (const auto &keyvals : sorted_trace) { // XXX: It would be nicer to print the system call name string instead // of its number. - std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) + std::cerr << std::setw(20) << keyvals.second << " : " << std::setw(9) << keyvals.first << "\n"; } } diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 9de4fb5df49..3c65a61f33e 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1044,9 +1044,17 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } accumulate_to_statistic(tdata, RAW2TRACE_STAT_SYSCALL_TRACES_DECODED, 1); - std::vector decode_pcs; + app_pc saved_decode_pc; + trace_entry_t entries_with_encodings[WRITE_BUFFER_SIZE]; + trace_entry_t *buf = entries_with_encodings; for (const auto &entry : entries) { if (type_is_instr(static_cast(entry.type))) { + if (buf != entries_with_encodings) { + if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) { + return false; + } + buf = entries_with_encodings; + } app_pc instr_pc = reinterpret_cast(entry.addr); accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1); if (tdata->syscall_pc_to_decode_pc_.find(instr_pc) == @@ -1055,14 +1063,14 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall "Unknown pc after ir2trace: did ir2trace insert new instr?"; return false; } - decode_pcs.push_back(tdata->syscall_pc_to_decode_pc_[instr_pc].first); + saved_decode_pc = tdata->syscall_pc_to_decode_pc_[instr_pc].first; + if (!append_encoding(tdata, saved_decode_pc, entry.size, buf, + entries_with_encodings)) + return false; } + *buf = entry; + ++buf; } - if (!write(tdata, entries.data(), entries.data() + entries.size(), decode_pcs.data(), - decode_pcs.size())) { - return false; - } - return true; } diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 8a3d330f7e4..e2149088dd9 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4564,8 +4564,10 @@ if (BUILD_CLIENTS) # tracing flow. torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "" "@-simulator_type@basic_counts@-ignore_decode_failure") - torunonly_drcacheoff_kernel(opcode_mix ${ci_shared_app} "-raw_compress none" "" + torunonly_drcacheoff_kernel(opcode-mix ${ci_shared_app} "-raw_compress none" "" "@-simulator_type@opcode_mix@-ignore_decode_failure") + torunonly_drcacheoff_kernel(syscall-mix ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@syscall_mix") endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (proc_supports_pt) From d612b738f8b8f45c4f1763fdbcde04c7fd33868a Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 28 Nov 2023 16:15:59 -0500 Subject: [PATCH 03/16] Fix regex to reduce overhead. --- .../drcachesim/tests/offline-kernel-syscall-mix.templatex | 4 ++-- clients/drcachesim/tracer/raw2trace.cpp | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex index 82de6c517a0..8582336ef38 100644 --- a/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex +++ b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex @@ -1,6 +1,6 @@ Hello, world! Syscall mix tool results: syscall count : syscall_num -( *[1-9][0-9]* : *[0-9]*.*)+ +.* syscall trace count : syscall_num -( *[1-9][0-9]* : *[0-9]*.*)+ +.* diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index 3c65a61f33e..f76bd7ff224 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1071,6 +1071,11 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall *buf = entry; ++buf; } + if (buf != entries_with_encodings) { + if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) { + return false; + } + } return true; } From 01a1e492f7764baf87862f8e8f8271e20a7b96f5 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Tue, 28 Nov 2023 19:06:30 -0500 Subject: [PATCH 04/16] Fix comparator --- clients/drcachesim/tools/syscall_mix.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/drcachesim/tools/syscall_mix.cpp b/clients/drcachesim/tools/syscall_mix.cpp index 1b9c1d47653..a18c6f1e5ed 100644 --- a/clients/drcachesim/tools/syscall_mix.cpp +++ b/clients/drcachesim/tools/syscall_mix.cpp @@ -149,9 +149,9 @@ syscall_mix_t::process_memref(const memref_t &memref) static bool cmp_second_val(const std::pair &l, const std::pair &r) { - if (l.second > r.second) - return true; - return l.first > r.first; + if (l.second == r.second) + return l.first > r.first; + return l.second > r.second; } bool From 94d1ef0c72c92459cb3ebb612297dc9d91107797 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 20:27:30 -0500 Subject: [PATCH 05/16] Move encoding tracking to inside drir --- clients/drcachesim/drpt2trace/drir.h | 51 ++++++++++++++- clients/drcachesim/drpt2trace/drpt2trace.cpp | 7 ++- clients/drcachesim/drpt2trace/ir2trace.cpp | 6 +- clients/drcachesim/drpt2trace/ir2trace.h | 2 +- clients/drcachesim/drpt2trace/pt2ir.cpp | 28 +++------ clients/drcachesim/drpt2trace/pt2ir.h | 2 +- clients/drcachesim/tracer/raw2trace.cpp | 65 ++++---------------- clients/drcachesim/tracer/raw2trace.h | 6 +- 8 files changed, 78 insertions(+), 89 deletions(-) diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h index eb4b133f20b..85eb2f52ca2 100644 --- a/clients/drcachesim/drpt2trace/drir.h +++ b/clients/drcachesim/drpt2trace/drir.h @@ -41,6 +41,10 @@ #include "dr_api.h" #include "utils.h" +#include +#include +#include + namespace dynamorio { namespace drmemtrace { @@ -63,7 +67,7 @@ class drir_t { } void - append(instr_t *instr) + append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding) { ASSERT(drcontext_ != nullptr, "drir_t: invalid drcontext_"); ASSERT(ilist_ != nullptr, "drir_t: invalid ilist_"); @@ -72,6 +76,7 @@ class drir_t { return; } instrlist_append(ilist_, instr); + record_encoding(orig_pc, instr_length, encoding); } void * @@ -86,9 +91,53 @@ class drir_t { return ilist_; } + void + clear_ilist() + { + instrlist_clear(drcontext_, ilist_); + } + + app_pc + get_decode_pc(app_pc orig_pc) + { + if (decode_pc_.find(orig_pc) == decode_pc_.end()) { + return nullptr; + } + return decode_pc_[orig_pc].first; + } + private: void *drcontext_; instrlist_t *ilist_; +#define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024) + std::unordered_map> decode_pc_; + std::vector> instr_encodings_; + size_t next_encoding_offset_ = 0; + + void + record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding) + { + if (instr_encodings_.empty() || + next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) { + // We allocate new memory to store kernel instruction encodings in + // increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this + // like a cache and clear previously stored encodings because we want to + // ensure decode_pc uniqueness to callers. + instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); + next_encoding_offset_ = 0; + } + app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; + memcpy(encode_pc, encoding, instr_len); + auto it = decode_pc_.find(orig_pc); + if (it == decode_pc_.end() || + // We confirm that the instruction encoding has not changed. Just in case + // the kernel is doing JIT. + it->second.second != instr_len || + memcmp(it->second.first, encode_pc, it->second.second) != 0) { + next_encoding_offset_ += instr_len; + decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len); + } + } }; } // namespace drmemtrace diff --git a/clients/drcachesim/drpt2trace/drpt2trace.cpp b/clients/drcachesim/drpt2trace/drpt2trace.cpp index e1961c3e5fa..654740376d1 100644 --- a/clients/drcachesim/drpt2trace/drpt2trace.cpp +++ b/clients/drcachesim/drpt2trace/drpt2trace.cpp @@ -461,7 +461,8 @@ main(int argc, const char *argv[]) uint8_t *pt_data = pt_raw_buffer.data(); size_t pt_data_size = pt_raw_buffer.size(); - pt2ir_convert_status_t status = ptconverter->convert(pt_data, pt_data_size, drir); + pt2ir_convert_status_t status = + ptconverter->convert(pt_data, pt_data_size, &drir); if (status != PT2IR_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR." << "[error status: " << status << "]" << std::endl; @@ -521,7 +522,7 @@ main(int argc, const char *argv[]) /* Convert the PT Data to DR IR. */ pt2ir_convert_status_t status = - ptconverter->convert(pt_data, pt_data_size, drir); + ptconverter->convert(pt_data, pt_data_size, &drir); if (status != PT2IR_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR." << "[error status: " << status << "]" << std::endl; @@ -542,7 +543,7 @@ main(int argc, const char *argv[]) /* Convert the DR IR to trace entries. */ std::vector entries; ir2trace_convert_status_t ir2trace_convert_status = - ir2trace_t::convert(drir, entries); + ir2trace_t::convert(&drir, entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert DR IR to trace entries." << "[error status: " << ir2trace_convert_status << "]" << std::endl; diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index 1f709eb5fb9..cf7376a815d 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -55,14 +55,14 @@ namespace drmemtrace { #define ERRMSG_HEADER "[drpt2ir] " ir2trace_convert_status_t -ir2trace_t::convert(DR_PARAM_IN drir_t &drir, +ir2trace_t::convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity) { - if (drir.get_ilist() == NULL) { + if (drir == nullptr || drir->get_ilist() == NULL) { return IR2TRACE_CONV_ERROR_INVALID_PARAMETER; } - instr_t *instr = instrlist_first(drir.get_ilist()); + instr_t *instr = instrlist_first(drir->get_ilist()); bool prev_was_repstr = false; while (instr != NULL) { trace_entry_t entry = {}; diff --git a/clients/drcachesim/drpt2trace/ir2trace.h b/clients/drcachesim/drpt2trace/ir2trace.h index 18458dc8e3c..13444881f22 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.h +++ b/clients/drcachesim/drpt2trace/ir2trace.h @@ -92,7 +92,7 @@ class ir2trace_t { * error code. */ static ir2trace_convert_status_t - convert(DR_PARAM_IN drir_t &drir, DR_PARAM_INOUT std::vector &trace, + convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity = 0); }; diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 45397d96350..0f3b4cdc312 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -257,13 +257,13 @@ pt2ir_t::init(DR_PARAM_IN pt2ir_config_t &pt2ir_config, DR_PARAM_IN int verbosit pt2ir_convert_status_t pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size, - DR_PARAM_INOUT drir_t &drir) + DR_PARAM_INOUT drir_t *drir) { if (!pt2ir_initialized_) { return PT2IR_CONV_ERROR_NOT_INITIALIZED; } - if (pt_data == nullptr || pt_data_size <= 0) { + if (pt_data == nullptr || pt_data_size <= 0 || drir == nullptr) { return PT2IR_CONV_ERROR_INVALID_INPUT; } @@ -379,27 +379,12 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } /* Use drdecode to decode insn(pt_insn) to instr_t. */ - instr_t *instr = instr_create(drir.get_drcontext()); - instr_init(drir.get_drcontext(), instr); + instr_t *instr = instr_create(drir->get_drcontext()); + instr_init(drir->get_drcontext(), instr); instr_set_isa_mode(instr, insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64); - bool instr_valid = false; - if (decode(drir.get_drcontext(), insn.raw, instr) != nullptr) - instr_valid = true; - instr_set_translation(instr, (app_pc)insn.ip); - instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); - /* TODO i#2103: Currently, the PT raw data may contain 'STAC' and 'CLAC' - * instructions that are not supported by Dynamorio. - */ - if (!instr_valid) { - /* The decode() function will not correctly identify the raw bits for - * invalid instruction. So we need to set the raw bits of instr manually. - */ - instr_free_raw_bits(drir.get_drcontext(), instr); - instr_set_raw_bits(instr, insn.raw, insn.size); - instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); + if (decode(drir->get_drcontext(), insn.raw, instr) == nullptr) { #ifdef DEBUG - /* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */ if (verbosity_ >= 1) { fprintf(stderr, @@ -412,7 +397,8 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } #endif } - drir.append(instr); + instr_set_translation(instr, (app_pc)insn.ip); + drir->append(instr, (app_pc)insn.ip, insn.size, insn.raw); } } return PT2IR_CONV_SUCCESS; diff --git a/clients/drcachesim/drpt2trace/pt2ir.h b/clients/drcachesim/drpt2trace/pt2ir.h index a1e2f49f01e..02ec9a0a4f6 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.h +++ b/clients/drcachesim/drpt2trace/pt2ir.h @@ -365,7 +365,7 @@ class pt2ir_t { */ pt2ir_convert_status_t convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size, - DR_PARAM_INOUT drir_t &drir); + DR_PARAM_INOUT drir_t *drir); private: /* Diagnose converting errors and output diagnostic results. diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index f76bd7ff224..d08b9847d9c 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1008,17 +1008,18 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } /* Convert the PT Data to DR IR. */ - drir_t drir(GLOBAL_DCONTEXT); - pt2ir_convert_status_t pt2ir_convert_status = - tdata->pt2ir.convert(pt_data->data.get(), pt_data_size, drir); + if (tdata->pt_decode_state_ == nullptr) { + tdata->pt_decode_state_ = std::unique_ptr(new drir_t(GLOBAL_DCONTEXT)); + } + tdata->pt_decode_state_->clear_ilist(); + pt2ir_convert_status_t pt2ir_convert_status = tdata->pt2ir.convert( + pt_data->data.get(), pt_data_size, tdata->pt_decode_state_.get()); if (pt2ir_convert_status != PT2IR_CONV_SUCCESS) { tdata->error = "Failed to convert PT raw trace to DR IR [error status: " + std::to_string(pt2ir_convert_status) + "]"; return false; } - if (!track_syscall_pt_encodings(tdata, drir.get_ilist())) { - return false; - } + /* Convert the DR IR to trace entries. */ addr_t sysnum = pt_data->header[dynamorio::drmemtrace::PDB_HEADER_SYSNUM_IDX].sysnum.sysnum; @@ -1028,7 +1029,7 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall .addr = sysnum }; entries.push_back(start_entry); ir2trace_convert_status_t ir2trace_convert_status = - ir2trace_t::convert(drir, entries); + ir2trace_t::convert(tdata->pt_decode_state_.get(), entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { tdata->error = "Failed to convert DR IR to trace entries [error status: " + std::to_string(ir2trace_convert_status) + "]"; @@ -1055,15 +1056,14 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } buf = entries_with_encodings; } - app_pc instr_pc = reinterpret_cast(entry.addr); accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1); - if (tdata->syscall_pc_to_decode_pc_.find(instr_pc) == - tdata->syscall_pc_to_decode_pc_.end()) { + saved_decode_pc = tdata->pt_decode_state_->get_decode_pc( + reinterpret_cast(entry.addr)); + if (saved_decode_pc == nullptr) { tdata->error = "Unknown pc after ir2trace: did ir2trace insert new instr?"; return false; } - saved_decode_pc = tdata->syscall_pc_to_decode_pc_[instr_pc].first; if (!append_encoding(tdata, saved_decode_pc, entry.size, buf, entries_with_encodings)) return false; @@ -1078,49 +1078,6 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } return true; } - -bool -raw2trace_t::track_syscall_pt_encodings(raw2trace_thread_data_t *tdata, - instrlist_t *ilist) -{ - instr_t *instr = instrlist_first(ilist); - while (instr != NULL) { - app_pc orig_pc = instr_get_app_pc(instr); - int instr_len = instr_length(dcontext_, instr); - if (tdata->syscall_instr_encodings_.empty() || - tdata->syscall_next_encoding_offset_ + instr_len >= - SYSCALL_PT_ENCODING_BUF_SIZE) { - // We allocate new memory to store kernel instruction encodings in - // increments of SYSCALL_PT_ENCODING_BUF_SIZE. Note that we cannot - // treat this like a cache and clear previously stored encodings - // because any re-use in the decode address will confuse our write() - // logic (remember that we key encodings using the decode pc). - tdata->syscall_instr_encodings_.emplace_back( - new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); - tdata->syscall_next_encoding_offset_ = 0; - } - app_pc encode_pc = - &tdata->syscall_instr_encodings_.back()[tdata->syscall_next_encoding_offset_]; - byte *ret = instr_encode_to_copy(dcontext_, instr, encode_pc, orig_pc); - if (ret == NULL) { - tdata->error = "Failed to encode a system call kernel instruction."; - return false; - } - DR_ASSERT(ret - encode_pc == instr_len); - auto it = tdata->syscall_pc_to_decode_pc_.find(orig_pc); - if (it == tdata->syscall_pc_to_decode_pc_.end() || - // We confirm that the instruction encoding has not changed. Just in case - // the kernel is doing JIT. - it->second.second != instr_len || - memcmp(it->second.first, encode_pc, it->second.second) != 0) { - tdata->syscall_next_encoding_offset_ += instr_len; - tdata->syscall_pc_to_decode_pc_[orig_pc] = - std::make_pair(encode_pc, instr_len); - } - instr = instr_get_next(instr); - } - return true; -} #endif bool diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index cc969cb4cd3..16e2dd4e41b 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1080,11 +1080,7 @@ class raw2trace_t { std::vector rseq_decode_pcs_; #ifdef BUILD_PT_POST_PROCESSOR -# define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024 * 10) - std::unordered_map> syscall_pc_to_decode_pc_; - std::vector> syscall_instr_encodings_; - size_t syscall_next_encoding_offset_ = 0; - + std::unique_ptr pt_decode_state_ = nullptr; std::istream *kthread_file; bool pt_metadata_processed = false; pt2ir_t pt2ir; From 8d99dd8a66eaaa855c51a08c2c2b29f46d81f4ac Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 20:35:13 -0500 Subject: [PATCH 06/16] Revert ignore_decode_failure flag --- clients/drcachesim/analyzer_multi.cpp | 6 ++--- clients/drcachesim/common/options.cpp | 8 ------ clients/drcachesim/common/options.h | 1 - clients/drcachesim/tools/opcode_mix.cpp | 27 +++++++------------- clients/drcachesim/tools/opcode_mix.h | 4 +-- clients/drcachesim/tools/opcode_mix_create.h | 3 +-- clients/drcachesim/tools/view.cpp | 20 +++++---------- clients/drcachesim/tools/view.h | 3 +-- clients/drcachesim/tools/view_create.h | 3 +-- 9 files changed, 21 insertions(+), 54 deletions(-) diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index 1bb4277938a..11ba9ef3b23 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -410,8 +410,7 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator return nullptr; } return opcode_mix_tool_create(module_file_path, op_verbose.get_value(), - op_alt_module_dir.get_value(), - op_ignore_decode_failure.get_value()); + op_alt_module_dir.get_value()); } else if (simulator_type == SYSCALL_MIX) { return syscall_mix_tool_create(op_verbose.get_value()); } else if (simulator_type == VIEW) { @@ -419,8 +418,7 @@ analyzer_multi_t::create_analysis_tool_from_options(const std::string &simulator // The module file is optional so we don't check for emptiness. return view_tool_create(module_file_path, op_skip_refs.get_value(), op_sim_refs.get_value(), op_view_syntax.get_value(), - op_verbose.get_value(), op_alt_module_dir.get_value(), - op_ignore_decode_failure.get_value()); + op_verbose.get_value(), op_alt_module_dir.get_value()); } else if (simulator_type == FUNC_VIEW) { std::string funclist_file_path = get_aux_file_path( op_funclist_file.get_value(), DRMEMTRACE_FUNCTION_LIST_FILENAME); diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 2aa6db8b56b..53fceb2c03d 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -117,14 +117,6 @@ droption_t op_alt_module_dir( "analysis tools, or in the raw modules file for post-prcoessing of offline " "raw trace files. This directory takes precedence over the recorded path."); -droption_t op_ignore_decode_failure( - DROPTION_SCOPE_FRONTEND, "ignore_decode_failure", false, - "Whether instruction decode failures should be ignored", - "Specifies whether failures to decode the instruction encodings should be ignored by " - "the analysis tools. This is useful especially when decoding kernel traces collected " - "by Intel-PT where decode failures may not indicate a critical issue and can be " - "moved past."); - droption_t op_chunk_instr_count( DROPTION_SCOPE_FRONTEND, "chunk_instr_count", bytesize_t(10 * 1000 * 1000U), // We do not support tiny chunks. We do not support disabling chunks with a 0 diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 4817ad03c8d..8deff88ea74 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -92,7 +92,6 @@ extern dynamorio::droption::droption_t op_infile; extern dynamorio::droption::droption_t op_indir; extern dynamorio::droption::droption_t op_module_file; extern dynamorio::droption::droption_t op_alt_module_dir; -extern dynamorio::droption::droption_t op_ignore_decode_failure; extern dynamorio::droption::droption_t op_chunk_instr_count; extern dynamorio::droption::droption_t op_instr_encodings; diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 94fe322f4d4..5389829c5ac 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -64,22 +64,19 @@ namespace dynamorio { namespace drmemtrace { const std::string opcode_mix_t::TOOL_NAME = "Opcode mix tool"; -constexpr int kInvalidOpcode = -1; analysis_tool_t * opcode_mix_tool_create(const std::string &module_file_path, unsigned int verbose, - const std::string &alt_module_dir, bool ignore_decode_failure) + const std::string &alt_module_dir) { - return new opcode_mix_t(module_file_path, verbose, alt_module_dir, - ignore_decode_failure); + return new opcode_mix_t(module_file_path, verbose, alt_module_dir); } opcode_mix_t::opcode_mix_t(const std::string &module_file_path, unsigned int verbose, - const std::string &alt_module_dir, bool ignore_decode_failure) + const std::string &alt_module_dir) : module_file_path_(module_file_path) , knob_verbose_(verbose) , knob_alt_module_dir_(alt_module_dir) - , knob_ignore_decode_failure_(ignore_decode_failure) { } @@ -222,16 +219,12 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) app_pc next_pc = decode_from_copy(dcontext_.dcontext, decode_pc, trace_pc, &instr); if (next_pc == NULL || !instr_valid(&instr)) { - if (!knob_ignore_decode_failure_) { - instr_free(dcontext_.dcontext, &instr); - shard->error = - "Failed to decode instruction " + to_hex_string(memref.instr.addr); - return false; - } - opcode = kInvalidOpcode; - } else { - opcode = instr_get_opcode(&instr); + instr_free(dcontext_.dcontext, &instr); + shard->error = + "Failed to decode instruction " + to_hex_string(memref.instr.addr); + return false; } + opcode = instr_get_opcode(&instr); shard->worker->opcode_cache[trace_pc] = opcode; instr_free(dcontext_.dcontext, &instr); } @@ -283,9 +276,7 @@ opcode_mix_t::print_results() std::sort(sorted.begin(), sorted.end(), cmp_val); for (const auto &keyvals : sorted) { std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) - << (keyvals.first == kInvalidOpcode ? "" - : decode_opcode_name(keyvals.first)) - << "\n"; + << decode_opcode_name(keyvals.first) << "\n"; } return true; } diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index 2d1d1989166..062de8e6a5b 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -58,8 +58,7 @@ class opcode_mix_t : public analysis_tool_t { // XXX: Once we update our toolchains to guarantee C++17 support we could use // std::optional here. opcode_mix_t(const std::string &module_file_path, unsigned int verbose, - const std::string &alt_module_dir = "", - bool ignore_decode_failure = false); + const std::string &alt_module_dir = ""); virtual ~opcode_mix_t(); std::string initialize() override; @@ -142,7 +141,6 @@ class opcode_mix_t : public analysis_tool_t { std::mutex shard_map_mutex_; unsigned int knob_verbose_; std::string knob_alt_module_dir_; - bool knob_ignore_decode_failure_; static const std::string TOOL_NAME; // For serial operation. worker_data_t serial_worker_; diff --git a/clients/drcachesim/tools/opcode_mix_create.h b/clients/drcachesim/tools/opcode_mix_create.h index 9bc38741f8a..d398d295b1c 100644 --- a/clients/drcachesim/tools/opcode_mix_create.h +++ b/clients/drcachesim/tools/opcode_mix_create.h @@ -54,8 +54,7 @@ namespace drmemtrace { */ analysis_tool_t * opcode_mix_tool_create(const std::string &module_file_path, unsigned int verbose = 0, - const std::string &alt_module_dir = "", - bool ignore_decode_failure = false); + const std::string &alt_module_dir = ""); } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 4eb829e29c2..2c77e04d682 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -65,15 +65,15 @@ const std::string view_t::TOOL_NAME = "View tool"; analysis_tool_t * view_tool_create(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, - const std::string &alt_module_dir, bool ignore_decode_failure) + const std::string &alt_module_dir) { return new view_t(module_file_path, skip_refs, sim_refs, syntax, verbose, - alt_module_dir, ignore_decode_failure); + alt_module_dir); } view_t::view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, - const std::string &alt_module_dir, bool ignore_decode_failure) + const std::string &alt_module_dir) : module_file_path_(module_file_path) , knob_verbose_(verbose) , trace_version_(-1) @@ -88,7 +88,6 @@ view_t::view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t , filetype_(-1) , timestamp_(0) , has_modules_(true) - , knob_ignore_decode_failure_(ignore_decode_failure) { } @@ -554,17 +553,10 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) /*show_bytes=*/true, buf, BUFFER_SIZE_ELEMENTS(buf), /*printed=*/nullptr); if (next_pc == nullptr) { - if (!knob_ignore_decode_failure_) { - error_string_ = - "Failed to disassemble " + to_hex_string(memref.instr.addr); - return false; - } - // We still print the limited bytes output and a string saying - // "". - disasm = buf; - } else { - disasm = buf; + error_string_ = "Failed to disassemble " + to_hex_string(memref.instr.addr); + return false; } + disasm = buf; disasm_cache_.insert({ orig_pc, disasm }); } // Add branch decoration, which varies and so can't be cached purely by PC. diff --git a/clients/drcachesim/tools/view.h b/clients/drcachesim/tools/view.h index fb5581f6c01..30d5f033e14 100644 --- a/clients/drcachesim/tools/view.h +++ b/clients/drcachesim/tools/view.h @@ -60,7 +60,7 @@ class view_t : public analysis_tool_t { // std::optional here. view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, - const std::string &alt_module_dir = "", bool ignore_decode_failure = false); + const std::string &alt_module_dir = ""); std::string initialize_stream(memtrace_stream_t *serial_stream) override; bool @@ -161,7 +161,6 @@ class view_t : public analysis_tool_t { int64_t filetype_record_ord_ = -1; bool has_modules_; memtrace_stream_t *serial_stream_ = nullptr; - bool knob_ignore_decode_failure_; private: static constexpr int RECORD_COLUMN_WIDTH = 12; diff --git a/clients/drcachesim/tools/view_create.h b/clients/drcachesim/tools/view_create.h index 020198b5139..ebe8a811f6a 100644 --- a/clients/drcachesim/tools/view_create.h +++ b/clients/drcachesim/tools/view_create.h @@ -54,8 +54,7 @@ namespace drmemtrace { analysis_tool_t * view_tool_create(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose = 0, - const std::string &alt_module_dir = "", - bool ignore_decode_failure = false); + const std::string &alt_module_dir = ""); } // namespace drmemtrace } // namespace dynamorio From 4ca1dbf9b4312bc58350d650809a911e4f82587c Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 20:50:34 -0500 Subject: [PATCH 07/16] Other reviewer suggested edits. --- clients/drcachesim/common/trace_entry.h | 6 ++++-- .../drcachesim/tests/offline-kernel-opcode-mix.templatex | 3 +-- clients/drcachesim/tools/view.cpp | 6 ++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 166c5f32055..794472ed1a2 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -531,12 +531,14 @@ typedef enum { TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, /** - * Indicates a point in the trace where a syscall's kernel trace starts. + * Indicates a point in the trace where a syscall's kernel trace starts. The value + * of the marker is set to the syscall number. */ TRACE_MARKER_TYPE_SYSCALL_TRACE_START, /** - * Indicates a point in the trace where a syscall's trace end. + * Indicates a point in the trace where a syscall's trace ends. The value of the + * marker is set to the syscall number. */ TRACE_MARKER_TYPE_SYSCALL_TRACE_END, diff --git a/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex index c8949559467..ce75a56588d 100644 --- a/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex +++ b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex @@ -1,7 +1,6 @@ Hello, world! Opcode mix tool results: .*: total executed instructions -.*:.* .* -.*: .*syscall +.*: .*clac .* diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 2c77e04d682..3a07e944671 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -418,10 +418,12 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) // Handled above. break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_START: - std::cerr << "\n"; + std::cerr << "\n"; break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_END: - std::cerr << "\n"; + std::cerr << "\n"; break; case TRACE_MARKER_TYPE_BRANCH_TARGET: // These are not expected to be visible (since the reader adds them From 58aef7cdb047540a60a7222db29e34e783217441 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 20:52:22 -0500 Subject: [PATCH 08/16] Remove ignore_decode_failure from cmakelists --- suite/tests/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index e2149088dd9..5ea8e7a6865 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4563,9 +4563,9 @@ if (BUILD_CLIENTS) # dr_allow_unsafe_static_behavior. We want to perform this check on the kernel # tracing flow. torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "" - "@-simulator_type@basic_counts@-ignore_decode_failure") + "@-simulator_type@basic_counts") torunonly_drcacheoff_kernel(opcode-mix ${ci_shared_app} "-raw_compress none" "" - "@-simulator_type@opcode_mix@-ignore_decode_failure") + "@-simulator_type@opcode_mix") torunonly_drcacheoff_kernel(syscall-mix ${ci_shared_app} "-raw_compress none" "" "@-simulator_type@syscall_mix") endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) From 9c09cdf8b3075f6c41fb64a0b031f04d04981f8e Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 21:19:13 -0500 Subject: [PATCH 09/16] Do missing cleanup --- clients/drcachesim/drpt2trace/drir.h | 10 ++++++---- clients/drcachesim/tracer/raw2trace.h | 8 -------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h index 85eb2f52ca2..6c799b119d9 100644 --- a/clients/drcachesim/drpt2trace/drir.h +++ b/clients/drcachesim/drpt2trace/drir.h @@ -126,16 +126,18 @@ class drir_t { instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); next_encoding_offset_ = 0; } - app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; - memcpy(encode_pc, encoding, instr_len); auto it = decode_pc_.find(orig_pc); + // We record the encoding only if we don't already have the same encoding for + // the given orig_pc. if (it == decode_pc_.end() || // We confirm that the instruction encoding has not changed. Just in case // the kernel is doing JIT. it->second.second != instr_len || - memcmp(it->second.first, encode_pc, it->second.second) != 0) { - next_encoding_offset_ += instr_len; + memcmp(it->second.first, encoding, it->second.second) != 0) { + app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; + memcpy(encode_pc, encoding, instr_len); decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len); + next_encoding_offset_ += instr_len; } } }; diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index 16e2dd4e41b..6c451242284 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1183,14 +1183,6 @@ class raw2trace_t { */ bool process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall_idx); - - /** - * Records the encodings for all instructions in the given ilist. Note that - * kernel system call traces gathered using Intel-PT are decoded using libipt - * which also provides the instr encodings. - */ - bool - track_syscall_pt_encodings(raw2trace_thread_data_t *tdata, instrlist_t *ilist); #endif /** From b3aa55fd217e79b3192d76f2bc4b9a73e7689116 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 22:11:15 -0500 Subject: [PATCH 10/16] Fix conditional branch taken/not-taken detection --- clients/drcachesim/drpt2trace/ir2trace.cpp | 5 ++--- clients/drcachesim/drpt2trace/pt2ir.cpp | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index cf7376a815d..3006748f8fc 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -66,6 +66,8 @@ ir2trace_t::convert(DR_PARAM_IN drir_t *drir, bool prev_was_repstr = false; while (instr != NULL) { trace_entry_t entry = {}; + entry.size = instr_length(GLOBAL_DCONTEXT, instr); + entry.addr = (uintptr_t)instr_get_app_pc(instr); if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) { if (instr_get_prev(instr) == nullptr || @@ -119,9 +121,6 @@ ir2trace_t::convert(DR_PARAM_IN drir_t *drir, VPRINT(1, "Trying to convert an invalid instruction.\n"); } - entry.size = instr_length(GLOBAL_DCONTEXT, instr); - entry.addr = (uintptr_t)instr_get_app_pc(instr); - trace.push_back(entry); instr = instr_get_next(instr); diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 0f3b4cdc312..40da57b8530 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -383,7 +383,8 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ instr_init(drir->get_drcontext(), instr); instr_set_isa_mode(instr, insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64); - if (decode(drir->get_drcontext(), insn.raw, instr) == nullptr) { + if (decode_from_copy(drir->get_drcontext(), insn.raw, (app_pc)insn.ip, + instr) == nullptr) { #ifdef DEBUG /* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */ if (verbosity_ >= 1) { @@ -397,7 +398,6 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } #endif } - instr_set_translation(instr, (app_pc)insn.ip); drir->append(instr, (app_pc)insn.ip, insn.size, insn.raw); } } From da4776f213704ef211942527a72e82da92cfcc1c Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 22:28:08 -0500 Subject: [PATCH 11/16] Add clarifying comment. --- clients/drcachesim/tracer/raw2trace.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index d08b9847d9c..f38c9e079b2 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1057,6 +1057,9 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall buf = entries_with_encodings; } accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1); + // The per-thread drir_t object (pt_decode_state_) keeps instr encoding + // state across system calls. So different dynamic instances of the same + // instruction in system calls will have the same decode_pc. saved_decode_pc = tdata->pt_decode_state_->get_decode_pc( reinterpret_cast(entry.addr)); if (saved_decode_pc == nullptr) { From e2dd3a462c56c03291013c90cc84e83a8d786063 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Wed, 29 Nov 2023 22:41:56 -0500 Subject: [PATCH 12/16] Add clarifying comment. --- clients/drcachesim/drpt2trace/drir.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h index 6c799b119d9..6b6f0b0974a 100644 --- a/clients/drcachesim/drpt2trace/drir.h +++ b/clients/drcachesim/drpt2trace/drir.h @@ -66,6 +66,8 @@ class drir_t { } } + // Appends the given instr to the internal ilist, and records (replaces if + // one already exists) the given encoding for the orig_pc. void append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding) { @@ -79,24 +81,33 @@ class drir_t { record_encoding(orig_pc, instr_length, encoding); } + // Returns the opaque pointer to the dcontext_t used to construct this + // object. void * get_drcontext() { return drcontext_; } + // Returns the instrlist_t of instrs accumulated so far. instrlist_t * get_ilist() { return ilist_; } + // Clears the instrs accumulated in the ilist. Note that this does + // not clear the encodings accumulated. void clear_ilist() { instrlist_clear(drcontext_, ilist_); } + // Returns the address of the encoding recorded for the given orig_pc. + // Encodings are persisted across clear_ilist() calls, so we will + // return the same decode_pc for the same orig_pc unless a new encoding + // is added for the same orig_pc. app_pc get_decode_pc(app_pc orig_pc) { From a4e7572e8c4f37c31a08fc496dea2eab78ea6376 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 30 Nov 2023 00:55:25 -0500 Subject: [PATCH 13/16] Fix pt2trace tests. --- .../drcachesim/drpt2trace/test_simple.expect | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clients/drcachesim/drpt2trace/test_simple.expect b/clients/drcachesim/drpt2trace/test_simple.expect index c55837c430b..fab610f2f06 100644 --- a/clients/drcachesim/drpt2trace/test_simple.expect +++ b/clients/drcachesim/drpt2trace/test_simple.expect @@ -1,13 +1,13 @@ TAG 0x0000000000000000 - +0 L2 b8 01 00 00 00 mov $0x00000001 -> %eax - +5 L2 bf 01 00 00 00 mov $0x00000001 -> %edi - +10 L2 48 be 00 20 40 00 00 mov $0x0000000000402000 -> %rsi - 00 00 00 - +20 L2 ba 0e 00 00 00 mov $0x0000000e -> %edx - +25 L2 0f 05 syscall -> %rcx %r11 - +27 L2 b8 3c 00 00 00 mov $0x0000003c -> %eax - +32 L2 bf 00 00 00 00 mov $0x00000000 -> %edi - +37 L2 0f 05 syscall -> %rcx %r11 + +0 L3 .* mov $0x00000001 -> %eax + +5 L3 .* mov $0x00000001 -> %edi + +10 L3 .* $0x0000000000402000 -> %rsi + .* + +20 L3 .* mov $0x0000000e -> %edx + +25 L3 .* syscall -> %rcx %r11 + +27 L3 .* mov $0x0000003c -> %eax + +32 L3 .* mov $0x00000000 -> %edi + +37 L3 .* syscall -> %rcx %r11 END 0x0000000000000000 Number of Instructions: 8 From ba2a3253dca9f6526c6993eed890dfcf951c8169 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 30 Nov 2023 01:18:52 -0500 Subject: [PATCH 14/16] Fix drpt2trace test expected output. --- clients/drcachesim/drpt2trace/test_simple.expect | 14 -------------- .../drcachesim/drpt2trace/test_simple.templatex | 14 ++++++++++++++ suite/tests/CMakeLists.txt | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) delete mode 100644 clients/drcachesim/drpt2trace/test_simple.expect create mode 100644 clients/drcachesim/drpt2trace/test_simple.templatex diff --git a/clients/drcachesim/drpt2trace/test_simple.expect b/clients/drcachesim/drpt2trace/test_simple.expect deleted file mode 100644 index fab610f2f06..00000000000 --- a/clients/drcachesim/drpt2trace/test_simple.expect +++ /dev/null @@ -1,14 +0,0 @@ -TAG 0x0000000000000000 - +0 L3 .* mov $0x00000001 -> %eax - +5 L3 .* mov $0x00000001 -> %edi - +10 L3 .* $0x0000000000402000 -> %rsi - .* - +20 L3 .* mov $0x0000000e -> %edx - +25 L3 .* syscall -> %rcx %r11 - +27 L3 .* mov $0x0000003c -> %eax - +32 L3 .* mov $0x00000000 -> %edi - +37 L3 .* syscall -> %rcx %r11 -END 0x0000000000000000 - -Number of Instructions: 8 -Number of Trace Entries: 8 diff --git a/clients/drcachesim/drpt2trace/test_simple.templatex b/clients/drcachesim/drpt2trace/test_simple.templatex new file mode 100644 index 00000000000..ab436a13d81 --- /dev/null +++ b/clients/drcachesim/drpt2trace/test_simple.templatex @@ -0,0 +1,14 @@ +TAG 0x0000000000000000 + \+0 L3 .* mov \$0x00000001 -> %eax + \+5 L3 .* mov \$0x00000001 -> %edi + \+10 L3 .* \$0x0000000000402000 -> %rsi + .* + \+20 L3 .* mov \$0x0000000e -> %edx + \+25 L3 .* syscall -> %rcx %r11 + \+27 L3 .* mov \$0x0000003c -> %eax + \+32 L3 .* mov \$0x00000000 -> %edi + \+37 L3 .* syscall -> %rcx %r11 +END 0x0000000000000000 +.* +Number of Instructions: 8 +Number of Trace Entries: 8 \ No newline at end of file diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 5ea8e7a6865..0ebfafa7789 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4694,7 +4694,7 @@ if (BUILD_CLIENTS) "-sb_sysroot" "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw") torunonly_api(tool.drpt2trace.sideband drpt2trace - "../../clients/drcachesim/drpt2trace/test_simple.expect" + "../../clients/drcachesim/drpt2trace/test_simple.templatex" "" "${drpt2trace_sideband_args}" ON OFF) set(drpt2trace_elf_args ${drpt2trace_commong} "-mode" "ELF" @@ -4702,7 +4702,7 @@ if (BUILD_CLIENTS) "-elf" "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw/hello") torunonly_api(tool.drpt2trace.elf drpt2trace - "../../clients/drcachesim/drpt2trace/test_simple.expect" + "../../clients/drcachesim/drpt2trace/test_simple.templatex" "" "${drpt2trace_elf_args}" ON OFF) endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (BUILD_CLIENTS) From c555355d943a031c3d3a8dd0b1d6242344748bfd Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 30 Nov 2023 19:01:30 -0500 Subject: [PATCH 15/16] Add missing newline --- clients/drcachesim/drpt2trace/test_simple.templatex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/drcachesim/drpt2trace/test_simple.templatex b/clients/drcachesim/drpt2trace/test_simple.templatex index ab436a13d81..7dc05ea81df 100644 --- a/clients/drcachesim/drpt2trace/test_simple.templatex +++ b/clients/drcachesim/drpt2trace/test_simple.templatex @@ -11,4 +11,4 @@ TAG 0x0000000000000000 END 0x0000000000000000 .* Number of Instructions: 8 -Number of Trace Entries: 8 \ No newline at end of file +Number of Trace Entries: 8 From 449647b693044423dabe07337a176048815ece87 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 30 Nov 2023 19:13:05 -0500 Subject: [PATCH 16/16] Address reviewer comments. --- clients/drcachesim/drpt2trace/drir.h | 41 +++++++++++++--------- clients/drcachesim/drpt2trace/ir2trace.cpp | 2 +- clients/drcachesim/drpt2trace/pt2ir.cpp | 7 ++-- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h index 6b6f0b0974a..a8f177a423c 100644 --- a/clients/drcachesim/drpt2trace/drir.h +++ b/clients/drcachesim/drpt2trace/drir.h @@ -121,35 +121,44 @@ class drir_t { void *drcontext_; instrlist_t *ilist_; #define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024) + // For each original app pc key, this stores a pair value: the first + // element is the address where the encoding is stored for the instruction + // at that app pc, the second element is the length of the encoding. std::unordered_map> decode_pc_; + // A vector of buffers of size SYSCALL_PT_ENCODING_BUF_SIZE. Each buffer + // stores some encoded instructions back-to-back. Note that each element + // in the buffer is a single byte, so one instr's encoding occupies possibly + // multiple consecutive elements. + // We allocate new memory to store kernel instruction encodings in + // increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this like a + // cache and clear previously stored encodings because we want to ensure + // decode_pc uniqueness to callers of get_decode_pc. std::vector> instr_encodings_; + // Next available offset into instr_encodings_.back(). size_t next_encoding_offset_ = 0; void record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding) { - if (instr_encodings_.empty() || - next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) { - // We allocate new memory to store kernel instruction encodings in - // increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this - // like a cache and clear previously stored encodings because we want to - // ensure decode_pc uniqueness to callers. - instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); - next_encoding_offset_ = 0; - } auto it = decode_pc_.find(orig_pc); // We record the encoding only if we don't already have the same encoding for // the given orig_pc. - if (it == decode_pc_.end() || + if (it != decode_pc_.end() && // We confirm that the instruction encoding has not changed. Just in case // the kernel is doing JIT. - it->second.second != instr_len || - memcmp(it->second.first, encoding, it->second.second) != 0) { - app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; - memcpy(encode_pc, encoding, instr_len); - decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len); - next_encoding_offset_ += instr_len; + it->second.second == instr_len && + memcmp(it->second.first, encoding, it->second.second) == 0) { + return; + } + if (instr_encodings_.empty() || + next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) { + instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); + next_encoding_offset_ = 0; } + app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; + memcpy(encode_pc, encoding, instr_len); + decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len); + next_encoding_offset_ += instr_len; } }; diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index 3006748f8fc..57e27e931d1 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -67,7 +67,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t *drir, while (instr != NULL) { trace_entry_t entry = {}; entry.size = instr_length(GLOBAL_DCONTEXT, instr); - entry.addr = (uintptr_t)instr_get_app_pc(instr); + entry.addr = reinterpret_cast(instr_get_app_pc(instr)); if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) { if (instr_get_prev(instr) == nullptr || diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 40da57b8530..13ea7221daf 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -383,8 +383,9 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ instr_init(drir->get_drcontext(), instr); instr_set_isa_mode(instr, insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64); - if (decode_from_copy(drir->get_drcontext(), insn.raw, (app_pc)insn.ip, - instr) == nullptr) { + app_pc instr_ip = reinterpret_cast(insn.ip); + if (decode_from_copy(drir->get_drcontext(), insn.raw, instr_ip, instr) == + nullptr) { #ifdef DEBUG /* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */ if (verbosity_ >= 1) { @@ -398,7 +399,7 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } #endif } - drir->append(instr, (app_pc)insn.ip, insn.size, insn.raw); + drir->append(instr, instr_ip, insn.size, insn.raw); } } return PT2IR_CONV_SUCCESS;