From 8cdf1b1fa6aa4d323eb3baafa93aff387f8380ae Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Thu, 21 Mar 2024 00:21:20 -0400 Subject: [PATCH] i#6712: Add record_scheduler_t support for replay-as-traced (#6714) Adds record_scheduler_t (for the record_filter tool) support for replaying as-traced. The key change here is having record_reader_t's instruction ordinal, record_scheduler_t's output stream instruction ordinal, and record_scheduler_t's switch boundaries all consider the first in any sequence of encoding records or TRACE_MARKER_TYPE_BRANCH_TARGET markers to start an instruction instead of waiting for the instruction record. (Previously the scheduler switch point did consider encodings, but not branch targets, and the ordinals only considered instructions.) Moving the boundary back avoids splitting encodings from instructions when operating at instrution boundaries in the scheduler (encountered during replay, but this would also affect skipping for scheduler regions of interest). Adds tests of all 3 boundary types to the record_scheduler_t unit test. Adds a test of record_filter on the checked-in threadsig trace in as-traced replay mode. Fixes #6712 --- .../drcachesim/reader/record_file_reader.h | 22 ++++++++++++- clients/drcachesim/scheduler/scheduler.cpp | 32 +++++++++++-------- clients/drcachesim/scheduler/scheduler.h | 5 +++ .../tests/record_filter_as_traced.templatex | 10 ++++++ .../drcachesim/tests/scheduler_unit_tests.cpp | 24 +++++++++++++- suite/tests/CMakeLists.txt | 18 ++++++++++- 6 files changed, 95 insertions(+), 16 deletions(-) create mode 100644 clients/drcachesim/tests/record_filter_as_traced.templatex diff --git a/clients/drcachesim/reader/record_file_reader.h b/clients/drcachesim/reader/record_file_reader.h index a80cdeba581..e966bc51f4e 100644 --- a/clients/drcachesim/reader/record_file_reader.h +++ b/clients/drcachesim/reader/record_file_reader.h @@ -141,6 +141,15 @@ class record_reader_t : public std::iteratortype == TRACE_TYPE_ENCODING || + // The branch target marker sits between any encodings and the instr. + (record->type == TRACE_TYPE_MARKER && + record->size == TRACE_MARKER_TYPE_BRANCH_TARGET); + } + record_reader_t & operator++() { @@ -149,7 +158,13 @@ class record_reader_t : public std::iterator(cur_entry_.type))) + // We increment the instr count at the encoding as that avoids multiple + // problems with separating encodings from instrs when skipping (including + // for scheduler regions of interest) and when replaying schedules: anything + // using instr ordinals as boundaries. + if (!prev_record_was_pre_instr_ && + (record_is_pre_instr(&cur_entry_) || + type_is_instr(static_cast(cur_entry_.type)))) ++cur_instr_count_; else if (cur_entry_.type == TRACE_TYPE_MARKER) { switch (cur_entry_.size) { @@ -177,6 +192,7 @@ class record_reader_t : public std::iterator::record_type_is_encoding( return static_cast(record.type) == TRACE_TYPE_ENCODING; } -template <> -bool -scheduler_tmpl_t::record_type_is_instr_boundary( - trace_entry_t record, trace_entry_t prev_record) -{ - // Don't advance past encodings and split them from their associated instr. - return (record_type_is_instr(record) || record_type_is_encoding(record)) && - !record_type_is_encoding(prev_record); -} - template <> typename scheduler_tmpl_t::stream_status_t scheduler_tmpl_t::unread_last_record( @@ -447,6 +437,18 @@ scheduler_tmpl_t::record_type_is_marker( return true; } +template <> +bool +scheduler_tmpl_t::record_type_is_instr_boundary( + trace_entry_t record, trace_entry_t prev_record) +{ + // Don't advance past encodings or target markers and split them from their + // associated instr. + return (record_type_is_instr(record) || + record_reader_t::record_is_pre_instr(&record)) && + !record_reader_t::record_is_pre_instr(&prev_record); +} + template <> bool scheduler_tmpl_t::record_type_is_timestamp( @@ -563,7 +565,7 @@ scheduler_tmpl_t::stream_t::next_record(RecordType &reco std::lock_guard guard(*input->lock); if (!input->reader->is_record_synthetic()) ++cur_ref_count_; - if (scheduler_->record_type_is_instr(record)) + if (scheduler_->record_type_is_instr_boundary(record, prev_record_)) ++cur_instr_count_; VPRINT(scheduler_, 4, "stream record#=%" PRId64 ", instr#=%" PRId64 " (cur input %" PRId64 @@ -597,6 +599,7 @@ scheduler_tmpl_t::stream_t::next_record(RecordType &reco break; } } + prev_record_ = record; return sched_type_t::STATUS_OK; } @@ -1789,7 +1792,9 @@ scheduler_tmpl_t::clear_input_queue(input_info_t &input) // skip it all when skipping ahead in the input stream. int i = 0; while (!input.queue.empty()) { - assert(i == 0 || !record_type_is_instr(input.queue.front())); + assert(i == 0 || + (!record_type_is_instr(input.queue.front()) && + !record_type_is_encoding(input.queue.front()))); ++i; input.queue.pop_front(); } @@ -1809,7 +1814,8 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out // For a skip of 0 we still need to clear non-instrs from the queue, but // should not have an instr in there. assert(skip_amount > 0 || input.queue.empty() || - !record_type_is_instr(input.queue.front())); + (!record_type_is_instr(input.queue.front()) && + !record_type_is_encoding(input.queue.front()))); clear_input_queue(input); input.reader->skip_instructions(skip_amount); if (*input.reader == *input.reader_end) { diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 72c4ad9d4fd..fea81cc71ec 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -792,6 +792,10 @@ template class scheduler_tmpl_t { } /** * Returns the count of instructions from the start of the trace to this point. + * For record_scheduler_t, if any encoding records or the internal record + * TRACE_MARKER_TYPE_BRANCH_TARGET records are present prior to an instruction + * marker, the count will increase at the first of those records as they are + * considered part of the instruction. * If #SCHEDULER_USE_INPUT_ORDINALS is set, then this value matches the * instruction ordinal for the current input stream (and thus might decrease or * not change across records if the input changed). Otherwise, if multiple input @@ -1039,6 +1043,7 @@ template class scheduler_tmpl_t { uint64_t cache_line_size_ = 0; uint64_t chunk_instr_count_ = 0; uint64_t page_size_ = 0; + RecordType prev_record_ = {}; // Let the outer class update our state. friend class scheduler_tmpl_t; diff --git a/clients/drcachesim/tests/record_filter_as_traced.templatex b/clients/drcachesim/tests/record_filter_as_traced.templatex new file mode 100644 index 00000000000..e9e8f316122 --- /dev/null +++ b/clients/drcachesim/tests/record_filter_as_traced.templatex @@ -0,0 +1,10 @@ +Output .* entries from .* entries. +Schedule stats tool results: +.* +Core #0 schedule: .* +Core #1 schedule: .* +Core #2 schedule: .* +Core #3 schedule: .* +Core #4 schedule: .* +Core #5 schedule: .* +Core #6 schedule: .* diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 5801bce250c..1b77dc5f113 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -3955,6 +3955,8 @@ test_record_scheduler() make_encoding(ENCODING_SIZE, ENCODING_IGNORE), make_instr(40), make_encoding(ENCODING_SIZE, ENCODING_IGNORE), + // Test a target marker between the encoding and the instr. + make_marker(TRACE_MARKER_TYPE_BRANCH_TARGET, 42), make_instr(60), // No encoding for repeated instr. make_instr(20), @@ -4006,8 +4008,17 @@ test_record_scheduler() check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_A); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + // Test ordinals. + assert(stream0->get_instruction_ordinal() == 0); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 0); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + // The encoding should have incremented the ordinal. + assert(stream0->get_instruction_ordinal() == 1); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 1); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + // The instr should not have further incremented it. + assert(stream0->get_instruction_ordinal() == 1); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 1); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); @@ -4022,7 +4033,8 @@ test_record_scheduler() check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); - // Ensure the switch is *before* the encoding. + // Ensure the switch is *before* the encoding and target marker. + assert(stream0->get_input_interface()->get_instruction_ordinal() == 2); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_A); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_A); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); @@ -4030,8 +4042,18 @@ test_record_scheduler() check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_B); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_B); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_ENCODING); + assert(stream0->get_instruction_ordinal() == 5); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 3); + check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_MARKER); + assert(stream0->get_instruction_ordinal() == 5); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 3); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + // Should still be at the same count after the encoding, marker, and instr. + assert(stream0->get_instruction_ordinal() == 5); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 3); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_INSTR); + assert(stream0->get_instruction_ordinal() == 6); + assert(stream0->get_input_interface()->get_instruction_ordinal() == 4); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD_EXIT); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_THREAD, TID_A); check_next(stream0, record_scheduler_t::STATUS_OK, TRACE_TYPE_PID, PID_A); diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 9855b91fe77..16e49335c25 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4661,8 +4661,8 @@ if (BUILD_CLIENTS) "schedule_stats") endif () - # Test the trim filter. if (X86 AND X64 AND ZLIB_FOUND) + # Test the trim filter. set(zip_path "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.allasm_x86_64.trace.zip") set(outdir ${PROJECT_BINARY_DIR}/trim_filter) @@ -4680,6 +4680,22 @@ if (BUILD_CLIENTS) "${drcachesim_path}@-simulator_type@record_filter@-trim_before_timestamp@13352268558646120@-trim_after_timestamp@13352268558646661@-indir@${srcdir}@-outdir@${outdir}") set(tool.drcacheoff.trim_basedir "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") set(tool.drcacheoff.trim_rawtemp ON) # no preprocessor + + # Test the record_filter in as-traced mode. + set(trace_dir + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/drmemtrace.threadsig.x64.tracedir") + set(sched_file "${trace_dir}/cpu_schedule.bin.zip") + set(outdir ${CMAKE_CURRENT_BINARY_DIR}/filter_as_traced) + file(MAKE_DIRECTORY ${outdir}) + torunonly_api(tool.record_filter_as_traced "${drcachesim_path}" + "record_filter_as_traced" + "" "-simulator_type;schedule_stats;-indir;${outdir}" OFF OFF) + set(tool.record_filter_as_traced_runcmp "${CMAKE_CURRENT_SOURCE_DIR}/runmulti.cmake") + set(tool.record_filter_as_traced_precmd + "${drcachesim_path}@-simulator_type@record_filter@-cpu_schedule_file@${sched_file}@-core_sharded@-cores@7@-indir@${trace_dir}@-outdir@${outdir}") + set(tool.record_filter_as_traced_basedir + "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests") + set(tool.record_filter_as_traced_rawtemp ON) # no preprocessor endif () if (AARCH64)