From fc3a273fc2ddbac6adbf52e4ed2b3d796999c677 Mon Sep 17 00:00:00 2001 From: JerryYouxin Date: Fri, 22 Sep 2023 01:49:15 +0800 Subject: [PATCH 1/2] Add existing dynamorio-based external tools for value profilers (#6312) We develop a portable and efficient framework [VClinic](https://github.com/VClinic/VClinic) for fine-grained value profilers built atop DynamoRIO. We wish to add a short line of introduction with corresponding external link into the README and dynamorio.org home page. In short, the highlights of VClinic built upon DynamoRIO are shown as follows: - Decoupling implementation of value tracing and value profiling - Implement trace buffer with lightweight inline assembly for efficient value tracing - Support operand-centric view for development of fine-grained value profilers instead of instruction-centric view of DynamoRIO. - Currently support both ARM and X86 platforms. In the future, it can support all DynamoRIO-supported architectures. The detailed document of VClinic can be referred to [here](https://vclinic.readthedocs.io/en/latest/index.html) and the related technical [paper](https://dl.acm.org/doi/abs/10.1145/3575693.3576934) --- README.md | 1 + api/docs/home.dox | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 44e0ed5054b..ddd1bd0c230 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ DynamoRIO is the basis for some well-known external tools: - The [Arm Instruction Emulator (ArmIE)](https://developer.arm.com/tools-and-software/server-and-hpc/arm-architecture-tools/arm-instruction-emulator) - [WinAFL](https://github.com/googleprojectzero/winafl), the Windows fuzzing tool, as an instrumentation and code coverage engine - The fine-grained profiler for ARM [DrCCTProf](https://xl10.github.io/blog/drcctprof.html) +- The portable and efficient framework for fine-grained value profilers [VClinic](https://github.com/VClinic/VClinic) Tools built on DynamoRIO and available in the [release package](https://dynamorio.org/page_download) include: diff --git a/api/docs/home.dox b/api/docs/home.dox index 63a0b356906..97e62a4018a 100644 --- a/api/docs/home.dox +++ b/api/docs/home.dox @@ -62,6 +62,7 @@ DynamoRIO is the basis for some well-known external tools: - The [Arm Instruction Emulator (ArmIE)](https://developer.arm.com/tools-and-software/server-and-hpc/arm-architecture-tools/arm-instruction-emulator) - [WinAFL](https://github.com/googleprojectzero/winafl), the Windows fuzzing tool, as an instrumentation and code coverage engine - The fine-grained profiler for ARM [DrCCTProf](https://xl10.github.io/blog/drcctprof.html) +- The portable and efficient framework for fine-grained value profilers [VClinic](https://github.com/VClinic/VClinic) Tools built on DynamoRIO and available in the [release package](@ref page_download) include: From 10060710dec07f0f778f559f9bb0756ec28c9472 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 21 Sep 2023 19:19:15 -0400 Subject: [PATCH 2/2] i#6303: Avoid duplicate encoding entry on chunk-split branch pair (#6315) Fixes a case where a branch followed by an indirect branch with a chunk split in the middle results in an extra encoding entry after the indirect branch target marker. This extra entry is accumulated in the reader and results in a fatal error. Adds a unit test. Confirmed the fix on the drcacheoff.invariant_checker test as well where it was failing reliably on one machine. Fixes #6303 --- .../drcachesim/tests/raw2trace_unit_tests.cpp | 71 ++++++++++++++++--- clients/drcachesim/tracer/raw2trace.cpp | 57 +++++++++------ 2 files changed, 99 insertions(+), 29 deletions(-) diff --git a/clients/drcachesim/tests/raw2trace_unit_tests.cpp b/clients/drcachesim/tests/raw2trace_unit_tests.cpp index 9ac2daa0651..af8c5024935 100644 --- a/clients/drcachesim/tests/raw2trace_unit_tests.cpp +++ b/clients/drcachesim/tests/raw2trace_unit_tests.cpp @@ -771,26 +771,43 @@ test_chunk_encodings(void *drcontext) instr_t *nop = XINST_CREATE_nop(drcontext); // Test i#5724 where a chunk boundary between consecutive branches results // in a missing encoding entry. + // Also test i#6303 where a delayed indirect branch with tagalong encoding has its + // encoding repeated, causing a reader assert as it accumulates the duplicate. instr_t *move1 = XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); instr_t *move2 = XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); - instr_t *jmp2 = XINST_CREATE_jump(drcontext, opnd_create_instr(move2)); - instr_t *jmp1 = XINST_CREATE_jump(drcontext, opnd_create_instr(jmp2)); + instr_t *jmp_move2 = XINST_CREATE_jump(drcontext, opnd_create_instr(move2)); + instr_t *jmp_jmp = XINST_CREATE_jump(drcontext, opnd_create_instr(jmp_move2)); + instr_t *nop_start = XINST_CREATE_nop(drcontext); + instr_t *jcc_move1 = + XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(move1)); + instr_t *ret = XINST_CREATE_return(drcontext); instrlist_append(ilist, nop); // Block 1. instrlist_append(ilist, move1); - instrlist_append(ilist, jmp1); + instrlist_append(ilist, jmp_jmp); // Block 2. - instrlist_append(ilist, jmp2); + instrlist_append(ilist, jmp_move2); // Block 3. instrlist_append(ilist, move2); + // Block 4. + instrlist_append(ilist, nop_start); + instrlist_append(ilist, XINST_CREATE_nop(drcontext)); + instrlist_append(ilist, XINST_CREATE_nop(drcontext)); + // i#6303 needs a direct branch before an indirect branch. + instrlist_append(ilist, jcc_move1); + // We should have a chunk boundary here. + instrlist_append(ilist, ret); size_t offs_nop = 0; size_t offs_move1 = offs_nop + instr_length(drcontext, nop); - size_t offs_jmp1 = offs_move1 + instr_length(drcontext, move1); - size_t offs_jmp2 = offs_jmp1 + instr_length(drcontext, jmp1); - size_t offs_move2 = offs_jmp2 + instr_length(drcontext, jmp2); + size_t offs_jmp_jmp = offs_move1 + instr_length(drcontext, move1); + size_t offs_jmp_move2 = offs_jmp_jmp + instr_length(drcontext, jmp_jmp); + size_t offs_move2 = offs_jmp_move2 + instr_length(drcontext, jmp_move2); + size_t offs_nop_start = offs_move2 + instr_length(drcontext, move2); + size_t offs_jcc_move1 = offs_nop_start + 3 * instr_length(drcontext, nop_start); + size_t offs_ret = offs_jcc_move1 + instr_length(drcontext, jcc_move1); // Now we synthesize our raw trace itself, including a valid header sequence. std::vector raw; @@ -801,11 +818,19 @@ test_chunk_encodings(void *drcontext) raw.push_back(make_timestamp()); raw.push_back(make_core()); raw.push_back(make_block(offs_move1, 2)); - raw.push_back(make_block(offs_jmp2, 1)); + raw.push_back(make_block(offs_jmp_move2, 1)); raw.push_back(make_block(offs_move2, 1)); // Repeat the jmp,jmp to test re-emitting encodings in new chunks. raw.push_back(make_block(offs_move1, 2)); - raw.push_back(make_block(offs_jmp2, 1)); + raw.push_back(make_block(offs_jmp_move2, 1)); + raw.push_back(make_block(offs_move2, 1)); + // Add a final chunk boundary right between a branch;ret pair. + raw.push_back(make_block(offs_nop_start, 4)); + raw.push_back(make_block(offs_ret, 1)); + // Test that we don't get another encoding for a 2nd instance of the ret + // (yes, nonsensical having the ret target itself: that's ok). + raw.push_back(make_block(offs_ret, 1)); + // Re-use move2 for the target of the 2nd ret to it isn't truncated. raw.push_back(make_block(offs_move2, 1)); raw.push_back(make_exit()); @@ -863,6 +888,34 @@ test_chunk_encodings(void *drcontext) // Block 3. check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + // Block 4. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1, offs_nop_start) && + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + // The jcc_move1 instr. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +#endif + check_entry(entries, idx, TRACE_TYPE_INSTR_UNTAKEN_JUMP, -1, offs_jcc_move1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + // There should be just one encoding, before the branch target (i#6303). + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_BRANCH_TARGET) && + check_entry(entries, idx, TRACE_TYPE_INSTR_RETURN, -1, offs_ret) && + // There should be no encoding before the 2nd instance. + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_BRANCH_TARGET) && + check_entry(entries, idx, TRACE_TYPE_INSTR_RETURN, -1, offs_ret) && + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + // Footer. check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); } diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index d6bf98af6c2..8157edff85a 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -3078,30 +3078,47 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start, if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) && type_is_instr(static_cast(it->type)) && // We don't want encodings for the PC-only i-filtered entries. - it->size > 0 && !prev_was_encoding && - record_encoding_emitted(tdata, *(decode_pcs + instr_ordinal))) { - // Write any data we were waiting until post-loop to write. - if (it > start && - !tdata->out_file->write(reinterpret_cast(start), - reinterpret_cast(it) - - reinterpret_cast(start))) { - tdata->error = "Failed to write to output file"; - return false; - } - if (!insert_post_chunk_encodings(tdata, it, - *(decode_pcs + instr_ordinal))) - return false; - if (!tdata->out_file->write(reinterpret_cast(it), - sizeof(*it))) { - tdata->error = "Failed to write to output file"; - return false; + it->size > 0) { + if (prev_was_encoding) { + // We've already emitted the encoding(s) for this instr. But if we + // opened a new chunk then we've cleared the hashtable record, so + // re-add it here. + record_encoding_emitted(tdata, *(decode_pcs + instr_ordinal)); + } else if + // Check whether this instr's encoding has already been emitted + // due to multiple instances of the same delayed branch (the encoding + // cache was cleared in open_new_chunk()). + (record_encoding_emitted(tdata, *(decode_pcs + instr_ordinal))) { + // Write any data we were waiting until post-loop to write. + if (it > start && + !tdata->out_file->write( + reinterpret_cast(start), + reinterpret_cast(it) - + reinterpret_cast(start))) { + tdata->error = "Failed to write to output file"; + return false; + } + if (!insert_post_chunk_encodings(tdata, it, + *(decode_pcs + instr_ordinal))) + return false; + if (!tdata->out_file->write(reinterpret_cast(it), + sizeof(*it))) { + tdata->error = "Failed to write to output file"; + return false; + } + start = it + 1; } - start = it + 1; } if (it->type == TRACE_TYPE_ENCODING) prev_was_encoding = true; - else - prev_was_encoding = false; + else { + // Do not clear across an indirect branch target, to avoid a duplicate + // encoding being emitted when the indirect branch instruction itself is + // reached. + if (it->type != TRACE_TYPE_MARKER || + it->size != TRACE_MARKER_TYPE_BRANCH_TARGET) + prev_was_encoding = false; + } if (it->type == TRACE_TYPE_MARKER) { if (it->size == TRACE_MARKER_TYPE_TIMESTAMP) tdata->last_timestamp_ = it->addr;