diff --git a/clients/drcachesim/tests/raw2trace_unit_tests.cpp b/clients/drcachesim/tests/raw2trace_unit_tests.cpp index 2c6a7006ce2..889dce30b7b 100644 --- a/clients/drcachesim/tests/raw2trace_unit_tests.cpp +++ b/clients/drcachesim/tests/raw2trace_unit_tests.cpp @@ -169,13 +169,13 @@ class archive_ostream_test_t : public archive_ostream_t { }; offline_entry_t -make_header(int version = OFFLINE_FILE_VERSION) +make_header(int version = OFFLINE_FILE_VERSION, uint64_t additional_file_types = 0) { offline_entry_t entry; entry.extended.type = OFFLINE_TYPE_EXTENDED; entry.extended.ext = OFFLINE_EXT_TYPE_HEADER; entry.extended.valueA = OFFLINE_FILE_TYPE_DEFAULT | OFFLINE_FILE_TYPE_ENCODINGS | - OFFLINE_FILE_TYPE_SYSCALL_NUMBERS; + OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | additional_file_types; entry.extended.valueB = version; return entry; } @@ -288,7 +288,7 @@ check_entry(std::vector &entries, int &idx, unsigned short expect int expected_size, addr_t expected_addr = 0) { if (expected_type != entries[idx].type || - (expected_size > 0 && + (expected_size >= 0 && static_cast(expected_size) != entries[idx].size) || (expected_addr > 0 && expected_addr != entries[idx].addr)) { std::cerr << "Entry " << idx << " has type " << entries[idx].type << " and size " @@ -2738,6 +2738,149 @@ test_is_maybe_blocking_syscall(void *drcontext) return true; } +bool +test_ifiltered(void *drcontext) +{ +#if defined(X86) || defined(ARM) + std::cerr << "\n===============\nTesting ifiltered trace\n"; + // Our synthetic test first constructs a list of instructions to be encoded into + // a buffer for decoding by raw2trace. + instrlist_t *ilist = instrlist_create(drcontext); + // raw2trace doesn't like offsets of 0 so we shift with a nop. + instr_t *nop = XINST_CREATE_nop(drcontext); + instr_t *move1 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *move2 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instr_t *jcc = + XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(move1)); + // Control flow in the test assumes that memaddr stores address to jcc. + instr_t *jmp = + XINST_CREATE_jump_mem(drcontext, opnd_create_mem_instr(jcc, 0, OPSZ_PTR)); + instr_t *move3 = + XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2)); + instrlist_append(ilist, nop); + instrlist_append(ilist, move1); + instrlist_append(ilist, jmp); + instrlist_append(ilist, jcc); + instrlist_append(ilist, move2); + instrlist_append(ilist, move3); + size_t offs_nop = 0; + size_t offs_move1 = offs_nop + instr_length(drcontext, nop); + size_t offs_jmp = offs_move1 + instr_length(drcontext, move1); + int jmp_length = instr_length(drcontext, jmp); + size_t offs_jcc = offs_jmp + jmp_length; + size_t offs_move2 = offs_jcc + instr_length(drcontext, jcc); + size_t offs_move3 = offs_move2 + instr_length(drcontext, move2); + + // Now we synthesize our raw trace itself, including a valid header sequence. + std::vector raw; + raw.push_back(make_header(OFFLINE_FILE_VERSION, OFFLINE_FILE_TYPE_IFILTERED)); + raw.push_back(make_tid()); + raw.push_back(make_pid()); + raw.push_back(make_line_size()); + // First instance of the jmp instr is filtered out but its memref is not filtered + // out (indicated by the zero sized block), so no encoding will be emitted and + // it will not count towards the chunk instr count. But this will still be + // accumulated as a delayed branch. + raw.push_back(make_block(offs_jmp, 0)); + raw.push_back(make_memref(42)); + // Second accumulated delayed branch. + raw.push_back(make_block(offs_jcc, 1)); + // At this point, the jmp and jcc are accumulated as delayed branches. + // When writing the delayed branches, we want to make sure we correctly track + // the index into decode_pcs. If we don't increment the index at ifiltered + // instrs, the decode pc of jmp will be accidentally used when recording the + // encoding emitted for jcc. This will cause the jmp encoding to not be emitted + // in the next entry because raw2trace incorrectly tracked that it had + // already emitted it. + raw.push_back(make_block(offs_move1, 1)); + // Second instance of the jmp instr is not filtered out. Its encoding must be + // emitted by raw2trace, or else the reader (in memref_counter_t) will + // complain about a missing encoding. + raw.push_back(make_block(offs_jmp, 1)); + // The memref is also not filtered out. We have a separate pc entry with + // zero instr count just before the memref. + raw.push_back(make_block(offs_jmp, 0)); + raw.push_back(make_memref(42)); + raw.push_back(make_block(offs_jcc, 1)); + raw.push_back(make_block(offs_move2, 1)); + // End of first chunk. + raw.push_back(make_block(offs_move3, 1)); + raw.push_back(make_exit()); + + static const int CHUNK_INSTR_COUNT = 5; + std::vector entries; + if (!run_raw2trace(drcontext, raw, ilist, entries, nullptr, CHUNK_INSTR_COUNT)) + return false; + int idx = 0; + return ( + check_entry(entries, idx, TRACE_TYPE_HEADER, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) && + check_entry(entries, idx, TRACE_TYPE_THREAD, -1) && + check_entry(entries, idx, TRACE_TYPE_PID, -1) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) && + check_entry(entries, idx, TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) && + // jmp + // No encoding for the i-filtered instr with 0-instr count. + check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, 0) && + check_entry(entries, idx, TRACE_TYPE_READ, -1) && + // jcc + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +# ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +# endif + // Since we cannot infer branch targets accurately for i-filtered traces, this + // has the generic conditional jump type (instead of the more specific + // TRACE_TYPE_INSTR_TAKEN_JUMP type). + check_entry(entries, idx, TRACE_TYPE_INSTR_CONDITIONAL_JUMP, -1) && + // move1 + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + // jmp + // This has an encoding because the previous dynamic instance was actually + // i-filtered. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +# ifdef X86_32 + // An extra encoding entry is needed. + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && +# endif + // In filtered traces, we have one pc entry for the instr itself (if the + // instruction is not i-filtered out) which has the instr length, and another + // zero-length pc entry before each of the instr's memrefs (if the memref + // is not d-filtered out). + check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, jmp_length) && + check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, 0) && + check_entry(entries, idx, TRACE_TYPE_READ, -1) && + // jcc. No encoding because it has already been emitted above. + // Since we cannot infer branch targets accurately for i-filtered traces, this + // has the generic conditional jump type (instead of the more specific + // TRACE_TYPE_INSTR_UNTAKEN_JUMP type). + check_entry(entries, idx, TRACE_TYPE_INSTR_CONDITIONAL_JUMP, -1) && + // move2 + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + // Chunk ends since we've seen exactly 5 instrs. + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) && + check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) && + // move3 + check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) && + check_entry(entries, idx, TRACE_TYPE_INSTR, -1) && + check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) && + check_entry(entries, idx, TRACE_TYPE_FOOTER, -1)); +#else + // This test requires a CTI (so that it gets accumulated as a delayed branch) that + // also reads from memory (so that it's possible to have a case with a zero-sized PC + // entry in the raw trace). AArch64 does not have such an instr. + return true; +#endif +} + int test_main(int argc, const char *argv[]) { @@ -2756,7 +2899,7 @@ test_main(int argc, const char *argv[]) !test_midrseq_end(drcontext) || !test_xfer_modoffs(drcontext) || !test_xfer_absolute(drcontext) || !test_branch_decoration(drcontext) || !test_stats_timestamp_instr_count(drcontext) || - !test_is_maybe_blocking_syscall(drcontext)) + !test_is_maybe_blocking_syscall(drcontext) || !test_ifiltered(drcontext)) return 1; return 0; } diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index b1375ef6a94..c6dd6439d93 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -3161,19 +3161,19 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start, start = it; DEBUG_ASSERT(tdata->cur_chunk_instr_count == 0); } - if (type_is_instr(static_cast(it->type)) && - // Do not count PC-only i-filtered instrs. - it->size > 0) { - accumulate_to_statistic(tdata, - RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, 1); - ++tdata->cur_chunk_instr_count; + if (type_is_instr(static_cast(it->type))) { ++instr_ordinal; - if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) && - // We don't want encodings for the PC-only i-filtered entries. - it->size > 0 && instr_ordinal >= static_cast(decode_pcs_size)) { - tdata->error = "decode_pcs is missing entries for written " - "instructions"; - return false; + // Do not count PC-only i-filtered instrs. + if (it->size > 0) { + accumulate_to_statistic( + tdata, RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, 1); + ++tdata->cur_chunk_instr_count; + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) && + instr_ordinal >= static_cast(decode_pcs_size)) { + tdata->error = "decode_pcs is missing entries for written " + "instructions"; + return false; + } } } // Check for missing encodings after possibly opening a new chunk. @@ -3199,6 +3199,9 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start, // Check whether this instr's encoding has already been emitted // due to multiple instances of the same delayed branch (the encoding // cache was cleared in open_new_chunk()). + // XXX: Do we need to delay PC-only (i-filtered) instrs (the ones + // with it->size == 0)? We're anyway skipping over those entries here + // so maybe we could avoid adding them to decode_pcs. (record_encoding_emitted(tdata, *(decode_pcs + instr_ordinal))) { // Write any data we were waiting until post-loop to write. if (it > start &&