diff --git a/clients/drcachesim/tests/decode_cache_test.cpp b/clients/drcachesim/tests/decode_cache_test.cpp index a5274752431..5ccecaf87a2 100644 --- a/clients/drcachesim/tests/decode_cache_test.cpp +++ b/clients/drcachesim/tests/decode_cache_test.cpp @@ -50,6 +50,8 @@ class test_decode_info_t : public decode_info_base_t { public: bool is_nop_ = false; bool is_ret_ = false; + bool is_ipt_ = false; + bool decode_info_set_ = false; private: void @@ -57,8 +59,12 @@ class test_decode_info_t : public decode_info_base_t { const dynamorio::drmemtrace::_memref_instr_t &memref_instr, instr_t *instr) override { + // decode_cache_t should call set_decode_info only one time per object. + assert(!decode_info_set_); is_nop_ = instr_is_nop(instr); is_ret_ = instr_is_return(instr); + is_ipt_ = instr_is_interrupt(instr); + decode_info_set_ = true; } }; @@ -68,13 +74,16 @@ check_decode_caching(void *drcontext, bool persist_instrs, bool use_module_mappe static constexpr addr_t BASE_ADDR = 0x123450; instr_t *nop = XINST_CREATE_nop(drcontext); instr_t *ret = XINST_CREATE_return(drcontext); + instr_t *ipt = XINST_CREATE_interrupt(drcontext, OPND_CREATE_INT8(10)); instrlist_t *ilist = instrlist_create(drcontext); instrlist_append(ilist, nop); instrlist_append(ilist, ret); + instrlist_append(ilist, ipt); std::vector memref_setup = { { gen_instr(TID_A), nop }, { gen_instr(TID_A), ret }, { gen_instr(TID_A), nop }, + { gen_instr(TID_A), ipt }, }; std::vector memrefs; instrlist_t *ilist_for_test_decode_cache = nullptr; @@ -89,8 +98,6 @@ check_decode_caching(void *drcontext, bool persist_instrs, bool use_module_mappe module_file_for_test_decode_cache = "some_mod_file"; } else { memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR); - // Set up the second nop memref to reuse the same encoding as the first nop. - memrefs[2].instr.encoding_is_new = false; } test_decode_info_t test_decode_info; @@ -108,7 +115,9 @@ check_decode_caching(void *drcontext, bool persist_instrs, bool use_module_mappe /*persist_decoded_instr=*/true, ilist_for_test_decode_cache); decode_cache.init(ENCODING_FILE_TYPE, module_file_for_test_decode_cache, ""); for (const memref_t &memref : memrefs) { - std::string err = decode_cache.add_decode_info(memref.instr); + instr_decode_info_t *unused_cached_decode_info; + std::string err = + decode_cache.add_decode_info(memref.instr, unused_cached_decode_info); if (err != "") return err; } @@ -131,36 +140,75 @@ check_decode_caching(void *drcontext, bool persist_instrs, bool use_module_mappe drcontext, /*persist_decoded_instrs=*/false, ilist_for_test_decode_cache); decode_cache.init(ENCODING_FILE_TYPE, module_file_for_test_decode_cache, ""); + + // Test: Lookup non-existing pc. if (decode_cache.get_decode_info( reinterpret_cast(memrefs[0].instr.addr)) != nullptr) { return "Unexpected test_decode_info_t for never-seen pc"; } - std::string err = decode_cache.add_decode_info(memrefs[0].instr); + + test_decode_info_t *cached_decode_info; + + // Test: Lookup existing pc. + std::string err = + decode_cache.add_decode_info(memrefs[0].instr, cached_decode_info); if (err != "") return err; test_decode_info_t *decode_info_nop = decode_cache.get_decode_info(reinterpret_cast(memrefs[0].instr.addr)); - if (decode_info_nop == nullptr || !decode_info_nop->is_valid() || - !decode_info_nop->is_nop_) { + if (decode_info_nop == nullptr || decode_info_nop != cached_decode_info || + !decode_info_nop->is_valid() || !decode_info_nop->is_nop_) { return "Unexpected test_decode_info_t for nop instr"; } - err = decode_cache.add_decode_info(memrefs[1].instr); + + // Test: Lookup another existing pc. + err = decode_cache.add_decode_info(memrefs[1].instr, cached_decode_info); if (err != "") return err; test_decode_info_t *decode_info_ret = decode_cache.get_decode_info(reinterpret_cast(memrefs[1].instr.addr)); - if (decode_info_ret == nullptr || !decode_info_ret->is_valid() || - !decode_info_ret->is_ret_) { + if (decode_info_ret == nullptr || decode_info_ret != cached_decode_info || + !decode_info_ret->is_valid() || !decode_info_ret->is_ret_) { return "Unexpected test_decode_info_t for ret instr"; } - err = decode_cache.add_decode_info(memrefs[2].instr); + + // Test: Lookup existing pc but from a different memref. + // Set up the second nop memref to reuse the same encoding as the first nop. + memrefs[2].instr.encoding_is_new = false; + err = decode_cache.add_decode_info(memrefs[2].instr, cached_decode_info); if (err != "") return err; test_decode_info_t *decode_info_nop_2 = decode_cache.get_decode_info(reinterpret_cast(memrefs[2].instr.addr)); - if (decode_info_nop_2 != decode_info_nop) { - return "Did not see same decode info instance for second instance of nop"; + if (decode_info_nop_2 != decode_info_nop || + decode_info_nop_2 != cached_decode_info) { + return "Unexpected decode info instance for second instance of nop"; + } + + if (!use_module_mapper) { + // Test: Overwrite existing decode info for a pc. Works only with embedded + // encodings. + // Pretend the interrupt is at the same trace pc as the ret. + // Encodings have been added to the memref already so this still remains + // an interrupt instruction even though we've modified addr. + memrefs[3].instr.addr = memrefs[1].instr.addr; + err = decode_cache.add_decode_info(memrefs[3].instr, cached_decode_info); + if (err != "") + return err; + test_decode_info_t *decode_info_ipt = decode_cache.get_decode_info( + reinterpret_cast(memrefs[3].instr.addr)); + if (decode_info_ipt == nullptr || decode_info_ipt != cached_decode_info || + !decode_info_ipt->is_valid() || !decode_info_ipt->is_ipt_) { + return "Unexpected test_decode_info_t for ipt instr"; + } + decode_info_ret = decode_cache.get_decode_info( + reinterpret_cast(memrefs[1].instr.addr)); + if (decode_info_ret != decode_info_ipt) { + return "Expected ret and ipt memref pcs to return the same decode info"; + } } + + // Test: Verify all cached decode info gets cleared. decode_cache.clear_cache(); decode_info_nop = decode_cache.get_decode_info(reinterpret_cast(memrefs[0].instr.addr)); @@ -183,10 +231,16 @@ check_missing_module_mapper_and_no_encoding(void *drcontext) test_decode_cache_t decode_cache( drcontext, /*persist_decoded_instr=*/true, /*ilist_for_test_module_mapper=*/nullptr); - std::string err = decode_cache.add_decode_info(instr.instr); + instr_decode_info_t dummy; + // Initialize to non-nullptr for the test. + instr_decode_info_t *cached_decode_info = &dummy; + std::string err = decode_cache.add_decode_info(instr.instr, cached_decode_info); if (err == "") { return "Expected error at add_decode_info but did not get any"; } + if (cached_decode_info != nullptr) { + return "Expected returned reference cached_decode_info to be nullptr"; + } err = decode_cache.init( static_cast(OFFLINE_FILE_TYPE_SYSCALL_NUMBERS), "", ""); if (err == "") { diff --git a/clients/drcachesim/tools/common/decode_cache.h b/clients/drcachesim/tools/common/decode_cache.h index 63cd0ce4b08..db08f89203a 100644 --- a/clients/drcachesim/tools/common/decode_cache.h +++ b/clients/drcachesim/tools/common/decode_cache.h @@ -243,6 +243,10 @@ template class decode_cache_t : public decode_cache_base_t { * * Guaranteed to be non-nullptr if the prior add_decode_info for that pc * returned no error. + * + * When analyzing memref_t from a trace, it may be better to use + * add_decode_info() instead if it's possible that the instr at \p pc + * may have changed (e.g., JIT code). */ DecodeInfo * get_decode_info(app_pc pc) @@ -263,27 +267,48 @@ template class decode_cache_t : public decode_cache_base_t { * If there is a decoding failure, the default-constructed DecodeInfo that * returns is_valid() = false will be added to the cache. * - * Returns the error string, or an empty string if there was no error. + * Returns a pointer to whatever DecodeInfo is present in the cache in the + * \p cached_decode_info reference pointer parameter, or a nullptr if none + * is cached. This helps avoid a repeated lookup in a subsequent + * get_decode_info() call. + * + * Returns the error string, or an empty string if there was no error. A + * valid DecodeInfo is guaranteed to be added to the cache if there was no + * error. */ std::string - add_decode_info(const dynamorio::drmemtrace::_memref_instr_t &memref_instr) + add_decode_info(const dynamorio::drmemtrace::_memref_instr_t &memref_instr, + DecodeInfo *&cached_decode_info) { + cached_decode_info = nullptr; if (!init_done_) { return "init() must be called first"; } const app_pc trace_pc = reinterpret_cast(memref_instr.addr); - if (!use_module_mapper_ && memref_instr.encoding_is_new) { - // If the user asked to use the module mapper despite having - // embedded encodings in the trace, we don't invalidate the - // cached encoding. - decode_cache_.erase(trace_pc); - } - auto it = decode_cache_.find(trace_pc); - // If the prior cached info was invalid, we try again. - if (it != decode_cache_.end() && it->second.is_valid()) { + auto it_inserted = decode_cache_.emplace(trace_pc, DecodeInfo()); + typename std::unordered_map::iterator info = + it_inserted.first; + bool exists = !it_inserted.second; + if (exists && + // If the prior cached info was invalid, we try decoding it again. + info->second.is_valid() && + // We can return the cached decode info if: + // - we're using the module mapper, where we don't support JIT + // encodings that may change; or + // - we're using embedded encodings from the trace, and the + // current instr explicitly says the encoding isn't new. + (use_module_mapper_ || !memref_instr.encoding_is_new)) { + cached_decode_info = &info->second; return ""; + } else if (exists) { + // We may end up here if the existing DecodeInfo in the cache: + // - is invalid; or + // - is valid but now we have an instr with encoding_is_new set, + // and we're using the embedded encodings from the trace. + // In both these cases we create a new DecodeInfo. + info->second = DecodeInfo(); } - decode_cache_[trace_pc] = DecodeInfo(); + cached_decode_info = &info->second; instr_t *instr = nullptr; instr_noalloc_t noalloc; if (persist_decoded_instrs_) { @@ -309,7 +334,7 @@ template class decode_cache_t : public decode_cache_base_t { } return "decode_from_copy failed"; } - decode_cache_[trace_pc].set_decode_info(dcontext_, memref_instr, instr); + info->second.set_decode_info(dcontext_, memref_instr, instr); return ""; } diff --git a/clients/drcachesim/tools/invariant_checker.cpp b/clients/drcachesim/tools/invariant_checker.cpp index 4cb5a4b5abc..8a631772492 100644 --- a/clients/drcachesim/tools/invariant_checker.cpp +++ b/clients/drcachesim/tools/invariant_checker.cpp @@ -782,15 +782,14 @@ invariant_checker_t::parallel_shard_memref(void *shard_data, const memref_t &mem if (shard->error_ != "") return false; } - shard->error_ = shard->decode_cache_->add_decode_info(memref.instr); + per_shard_t::decoding_info_t *decode_info; + shard->error_ = + shard->decode_cache_->add_decode_info(memref.instr, decode_info); if (shard->error_ != "") { return false; } - // The decode_info returned here will never be nullptr since we - // return early if the prior add_decode_info returned an error. - per_shard_t::decoding_info_t *decode_info = - shard->decode_cache_->get_decode_info( - reinterpret_cast(memref.instr.addr)); + // The decode_info here will never be nullptr since we return + // early if the prior add_decode_info returned an error. cur_instr_info.decoding = *decode_info; #ifdef X86 if (cur_instr_info.decoding.opcode_ == OP_sti) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 3524631759c..07b84529cc8 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -212,14 +212,13 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } ++shard->instr_count; - shard->error = shard->decode_cache->add_decode_info(memref.instr); + opcode_data_t *opcode_data; + shard->error = shard->decode_cache->add_decode_info(memref.instr, opcode_data); if (shard->error != "") { return false; } - // The opcode_data returned here will never be nullptr since we - // return early if the prior add_decode_info returned an error. - opcode_data_t *opcode_data = - shard->decode_cache->get_decode_info(reinterpret_cast(memref.instr.addr)); + // The opcode_data here will never be nullptr since we return + // early if the prior add_decode_info returned an error. ++shard->opcode_counts[opcode_data->opcode_]; ++shard->category_counts[opcode_data->category_]; return true;