Skip to content

Commit

Permalink
Merge branch 'master' into i5036-aarch64-scatter-gather-multivector
Browse files Browse the repository at this point in the history
  • Loading branch information
jackgallagher-arm authored Nov 20, 2023
2 parents 482432d + 40e2edb commit 7eecb0e
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 16 deletions.
151 changes: 147 additions & 4 deletions clients/drcachesim/tests/raw2trace_unit_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,13 @@ class archive_ostream_test_t : public archive_ostream_t {
};

offline_entry_t
make_header(int version = OFFLINE_FILE_VERSION)
make_header(int version = OFFLINE_FILE_VERSION, uint64_t additional_file_types = 0)
{
offline_entry_t entry;
entry.extended.type = OFFLINE_TYPE_EXTENDED;
entry.extended.ext = OFFLINE_EXT_TYPE_HEADER;
entry.extended.valueA = OFFLINE_FILE_TYPE_DEFAULT | OFFLINE_FILE_TYPE_ENCODINGS |
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS;
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | additional_file_types;
entry.extended.valueB = version;
return entry;
}
Expand Down Expand Up @@ -288,7 +288,7 @@ check_entry(std::vector<trace_entry_t> &entries, int &idx, unsigned short expect
int expected_size, addr_t expected_addr = 0)
{
if (expected_type != entries[idx].type ||
(expected_size > 0 &&
(expected_size >= 0 &&
static_cast<unsigned short>(expected_size) != entries[idx].size) ||
(expected_addr > 0 && expected_addr != entries[idx].addr)) {
std::cerr << "Entry " << idx << " has type " << entries[idx].type << " and size "
Expand Down Expand Up @@ -2738,6 +2738,149 @@ test_is_maybe_blocking_syscall(void *drcontext)
return true;
}

bool
test_ifiltered(void *drcontext)
{
#if defined(X86) || defined(ARM)
std::cerr << "\n===============\nTesting ifiltered trace\n";
// Our synthetic test first constructs a list of instructions to be encoded into
// a buffer for decoding by raw2trace.
instrlist_t *ilist = instrlist_create(drcontext);
// raw2trace doesn't like offsets of 0 so we shift with a nop.
instr_t *nop = XINST_CREATE_nop(drcontext);
instr_t *move1 =
XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2));
instr_t *move2 =
XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2));
instr_t *jcc =
XINST_CREATE_jump_cond(drcontext, DR_PRED_EQ, opnd_create_instr(move1));
// Control flow in the test assumes that memaddr stores address to jcc.
instr_t *jmp =
XINST_CREATE_jump_mem(drcontext, opnd_create_mem_instr(jcc, 0, OPSZ_PTR));
instr_t *move3 =
XINST_CREATE_move(drcontext, opnd_create_reg(REG1), opnd_create_reg(REG2));
instrlist_append(ilist, nop);
instrlist_append(ilist, move1);
instrlist_append(ilist, jmp);
instrlist_append(ilist, jcc);
instrlist_append(ilist, move2);
instrlist_append(ilist, move3);
size_t offs_nop = 0;
size_t offs_move1 = offs_nop + instr_length(drcontext, nop);
size_t offs_jmp = offs_move1 + instr_length(drcontext, move1);
int jmp_length = instr_length(drcontext, jmp);
size_t offs_jcc = offs_jmp + jmp_length;
size_t offs_move2 = offs_jcc + instr_length(drcontext, jcc);
size_t offs_move3 = offs_move2 + instr_length(drcontext, move2);

// Now we synthesize our raw trace itself, including a valid header sequence.
std::vector<offline_entry_t> raw;
raw.push_back(make_header(OFFLINE_FILE_VERSION, OFFLINE_FILE_TYPE_IFILTERED));
raw.push_back(make_tid());
raw.push_back(make_pid());
raw.push_back(make_line_size());
// First instance of the jmp instr is filtered out but its memref is not filtered
// out (indicated by the zero sized block), so no encoding will be emitted and
// it will not count towards the chunk instr count. But this will still be
// accumulated as a delayed branch.
raw.push_back(make_block(offs_jmp, 0));
raw.push_back(make_memref(42));
// Second accumulated delayed branch.
raw.push_back(make_block(offs_jcc, 1));
// At this point, the jmp and jcc are accumulated as delayed branches.
// When writing the delayed branches, we want to make sure we correctly track
// the index into decode_pcs. If we don't increment the index at ifiltered
// instrs, the decode pc of jmp will be accidentally used when recording the
// encoding emitted for jcc. This will cause the jmp encoding to not be emitted
// in the next entry because raw2trace incorrectly tracked that it had
// already emitted it.
raw.push_back(make_block(offs_move1, 1));
// Second instance of the jmp instr is not filtered out. Its encoding must be
// emitted by raw2trace, or else the reader (in memref_counter_t) will
// complain about a missing encoding.
raw.push_back(make_block(offs_jmp, 1));
// The memref is also not filtered out. We have a separate pc entry with
// zero instr count just before the memref.
raw.push_back(make_block(offs_jmp, 0));
raw.push_back(make_memref(42));
raw.push_back(make_block(offs_jcc, 1));
raw.push_back(make_block(offs_move2, 1));
// End of first chunk.
raw.push_back(make_block(offs_move3, 1));
raw.push_back(make_exit());

static const int CHUNK_INSTR_COUNT = 5;
std::vector<trace_entry_t> entries;
if (!run_raw2trace(drcontext, raw, ilist, entries, nullptr, CHUNK_INSTR_COUNT))
return false;
int idx = 0;
return (
check_entry(entries, idx, TRACE_TYPE_HEADER, -1) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_VERSION) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_FILETYPE) &&
check_entry(entries, idx, TRACE_TYPE_THREAD, -1) &&
check_entry(entries, idx, TRACE_TYPE_PID, -1) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CACHE_LINE_SIZE) &&
check_entry(entries, idx, TRACE_TYPE_MARKER,
TRACE_MARKER_TYPE_CHUNK_INSTR_COUNT) &&
// jmp
// No encoding for the i-filtered instr with 0-instr count.
check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, 0) &&
check_entry(entries, idx, TRACE_TYPE_READ, -1) &&
// jcc
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# ifdef X86_32
// An extra encoding entry is needed.
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# endif
// Since we cannot infer branch targets accurately for i-filtered traces, this
// has the generic conditional jump type (instead of the more specific
// TRACE_TYPE_INSTR_TAKEN_JUMP type).
check_entry(entries, idx, TRACE_TYPE_INSTR_CONDITIONAL_JUMP, -1) &&
// move1
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
check_entry(entries, idx, TRACE_TYPE_INSTR, -1) &&
// jmp
// This has an encoding because the previous dynamic instance was actually
// i-filtered.
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# ifdef X86_32
// An extra encoding entry is needed.
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
# endif
// In filtered traces, we have one pc entry for the instr itself (if the
// instruction is not i-filtered out) which has the instr length, and another
// zero-length pc entry before each of the instr's memrefs (if the memref
// is not d-filtered out).
check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, jmp_length) &&
check_entry(entries, idx, TRACE_TYPE_INSTR_INDIRECT_JUMP, 0) &&
check_entry(entries, idx, TRACE_TYPE_READ, -1) &&
// jcc. No encoding because it has already been emitted above.
// Since we cannot infer branch targets accurately for i-filtered traces, this
// has the generic conditional jump type (instead of the more specific
// TRACE_TYPE_INSTR_UNTAKEN_JUMP type).
check_entry(entries, idx, TRACE_TYPE_INSTR_CONDITIONAL_JUMP, -1) &&
// move2
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
check_entry(entries, idx, TRACE_TYPE_INSTR, -1) &&
// Chunk ends since we've seen exactly 5 instrs.
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CHUNK_FOOTER) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_RECORD_ORDINAL) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_TIMESTAMP) &&
check_entry(entries, idx, TRACE_TYPE_MARKER, TRACE_MARKER_TYPE_CPU_ID) &&
// move3
check_entry(entries, idx, TRACE_TYPE_ENCODING, -1) &&
check_entry(entries, idx, TRACE_TYPE_INSTR, -1) &&
check_entry(entries, idx, TRACE_TYPE_THREAD_EXIT, -1) &&
check_entry(entries, idx, TRACE_TYPE_FOOTER, -1));
#else
// This test requires a CTI (so that it gets accumulated as a delayed branch) that
// also reads from memory (so that it's possible to have a case with a zero-sized PC
// entry in the raw trace). AArch64 does not have such an instr.
return true;
#endif
}

int
test_main(int argc, const char *argv[])
{
Expand All @@ -2756,7 +2899,7 @@ test_main(int argc, const char *argv[])
!test_midrseq_end(drcontext) || !test_xfer_modoffs(drcontext) ||
!test_xfer_absolute(drcontext) || !test_branch_decoration(drcontext) ||
!test_stats_timestamp_instr_count(drcontext) ||
!test_is_maybe_blocking_syscall(drcontext))
!test_is_maybe_blocking_syscall(drcontext) || !test_ifiltered(drcontext))
return 1;
return 0;
}
Expand Down
27 changes: 15 additions & 12 deletions clients/drcachesim/tracer/raw2trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3161,19 +3161,19 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start,
start = it;
DEBUG_ASSERT(tdata->cur_chunk_instr_count == 0);
}
if (type_is_instr(static_cast<trace_type_t>(it->type)) &&
// Do not count PC-only i-filtered instrs.
it->size > 0) {
accumulate_to_statistic(tdata,
RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, 1);
++tdata->cur_chunk_instr_count;
if (type_is_instr(static_cast<trace_type_t>(it->type))) {
++instr_ordinal;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) &&
// We don't want encodings for the PC-only i-filtered entries.
it->size > 0 && instr_ordinal >= static_cast<int>(decode_pcs_size)) {
tdata->error = "decode_pcs is missing entries for written "
"instructions";
return false;
// Do not count PC-only i-filtered instrs.
if (it->size > 0) {
accumulate_to_statistic(
tdata, RAW2TRACE_STAT_FINAL_TRACE_INSTRUCTION_COUNT, 1);
++tdata->cur_chunk_instr_count;
if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, tdata->file_type) &&
instr_ordinal >= static_cast<int>(decode_pcs_size)) {
tdata->error = "decode_pcs is missing entries for written "
"instructions";
return false;
}
}
}
// Check for missing encodings after possibly opening a new chunk.
Expand All @@ -3199,6 +3199,9 @@ raw2trace_t::write(raw2trace_thread_data_t *tdata, const trace_entry_t *start,
// Check whether this instr's encoding has already been emitted
// due to multiple instances of the same delayed branch (the encoding
// cache was cleared in open_new_chunk()).
// XXX: Do we need to delay PC-only (i-filtered) instrs (the ones
// with it->size == 0)? We're anyway skipping over those entries here
// so maybe we could avoid adding them to decode_pcs.
(record_encoding_emitted(tdata, *(decode_pcs + instr_ordinal))) {
// Write any data we were waiting until post-loop to write.
if (it > start &&
Expand Down

0 comments on commit 7eecb0e

Please sign in to comment.