Skip to content

Commit

Permalink
Merge branch 'master' into i6417-restore-output-register-before-syscall
Browse files Browse the repository at this point in the history
  • Loading branch information
ivankyluk authored Dec 15, 2023
2 parents d0f8a79 + 6d1912f commit 438bca2
Show file tree
Hide file tree
Showing 51 changed files with 2,969 additions and 885 deletions.
6 changes: 6 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,14 @@ changes:
to include direct switch dependencies. This is not a binary compatibility change
as the old value still refers purely to timestamps, but on a recompile it
refers to timestamps and direct switches, which is what most users should want.
- Rename the macro INSTR_CREATE_mul_sve to INSTR_CREATE_mul_sve_imm to
differentiate it from the other SVE MUL instructions.

Further non-compatibility-affecting changes include:
- Added raw2trace support to inject system call kernel trace templates collected from
elsewhere (e.g., QEMU, Gem5) into the user-space drmemtrace traces at the
corresponding system call number marker. This is done by specifying the path to the
template file via the new -syscall_template_file option.
- Added a new scheme for the modoffs field in the PC trace entry which allows L0
filtering of non-module code; see
#dynamorio::drmemtrace::ENCODING_FILE_TYPE_SEPARATE_NON_MOD_INSTRS. Also added
Expand Down
2 changes: 1 addition & 1 deletion api/samples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ add_sample_client(memtrace_simple "memtrace_simple.c;utils.c" "drmgr;drreg;druti
add_sample_client(memval_simple "memval_simple.c;utils.c" "drmgr;drreg;drutil;drx")
add_sample_client(instrace_simple "instrace_simple.c;utils.c" "drmgr;drreg;drx")
add_sample_client(opcode_count "opcode_count.cpp" "drmgr;drreg;drx;droption")
if (X86) # FIXME i#1551, i#1569: port to ARM and AArch64
if (X86) # FIXME i#1551, i#1569, i#3544: port to ARM/AArch64/RISCV64
add_sample_client(cbr "cbr.c" "drmgr")
add_sample_client(countcalls "countcalls.c" "drmgr;drreg")
add_sample_client(inc2add "inc2add.c" "drmgr;drreg")
Expand Down
3 changes: 2 additions & 1 deletion api/samples/bbcount.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
* here won't be used: drreg's slots will be.
*/
SPILL_SLOT_MAX + 1,
IF_AARCHXX_(SPILL_SLOT_MAX + 1) & global_count, 1, 0);
IF_AARCHXX_OR_RISCV64_(SPILL_SLOT_MAX + 1) & global_count,
1, 0);

#if defined(VERBOSE) && defined(VERBOSE_VERBOSE)
dr_printf("Finished instrumenting dynamorio_basic_block(tag=" PFX ")\n", tag);
Expand Down
4 changes: 2 additions & 2 deletions api/samples/opcode_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ event_opcode_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *i
* here won't be used: drreg's slots will be.
*/
static_cast<dr_spill_slot_t>(SPILL_SLOT_MAX + 1),
IF_AARCHXX_(static_cast<dr_spill_slot_t>(SPILL_SLOT_MAX + 1)) &
IF_AARCHXX_OR_RISCV64_(static_cast<dr_spill_slot_t>(SPILL_SLOT_MAX + 1)) &
global_opcode_count,
1,
/* TODO i#4215: DRX_COUNTER_LOCK is not yet supported on ARM. */
Expand Down Expand Up @@ -139,7 +139,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
* here won't be used: drreg's slots will be.
*/
static_cast<dr_spill_slot_t>(SPILL_SLOT_MAX + 1),
IF_AARCHXX_(static_cast<dr_spill_slot_t>(SPILL_SLOT_MAX + 1)) &
IF_AARCHXX_OR_RISCV64_(static_cast<dr_spill_slot_t>(SPILL_SLOT_MAX + 1)) &
global_total_count,
(int)bb_size,
/* TODO i#4215: DRX_COUNTER_LOCK is not yet supported on ARM. */
Expand Down
2 changes: 1 addition & 1 deletion api/samples/opcodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ event_app_instruction(void *drcontext, void *tag, instrlist_t *bb, instr_t *inst
* here won't be used: drreg's slots will be.
*/
SPILL_SLOT_MAX + 1,
IF_AARCHXX_(SPILL_SLOT_MAX + 1) &
IF_AARCHXX_OR_RISCV64_(SPILL_SLOT_MAX + 1) &
count[isa_idx][instr_get_opcode(ins)],
1,
/* DRX_COUNTER_LOCK is not yet supported on ARM */
Expand Down
11 changes: 11 additions & 0 deletions clients/drcachesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ set(raw2trace_srcs
tracer/instru_offline.cpp
reader/reader.cpp
common/trace_entry.cpp
reader/record_file_reader.cpp
${zlib_reader}
)
if (libsnappy)
set(raw2trace_srcs ${raw2trace_srcs}
Expand Down Expand Up @@ -1022,6 +1024,15 @@ if (BUILD_TESTS)
target_link_libraries(tool.drcacheoff.burst_maps test_helpers)
add_win32_flags(tool.drcacheoff.burst_maps)
endif ()
if (LINUX)
add_executable(tool.drcacheoff.burst_syscall_inject tests/burst_syscall_inject.cpp)
configure_DynamoRIO_static(tool.drcacheoff.burst_syscall_inject)
use_DynamoRIO_static_client(tool.drcacheoff.burst_syscall_inject drmemtrace_static)
target_link_libraries(tool.drcacheoff.burst_syscall_inject drmemtrace_raw2trace
drmemtrace_analyzer test_helpers drmemtrace_basic_counts)
add_win32_flags(tool.drcacheoff.burst_syscall_inject)
use_DynamoRIO_drmemtrace_tracer(tool.drcacheoff.burst_syscall_inject)
endif ()

if (UNIX)
if (X86 AND NOT APPLE) # This test is x86-specific.
Expand Down
8 changes: 6 additions & 2 deletions clients/drcachesim/analyzer_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ analyzer_multi_t::analyzer_multi_t()
if (needs_processing) {
raw2trace_directory_t dir(op_verbose.get_value());
std::string dir_err =
dir.initialize(op_indir.get_value(), "", op_trace_compress.get_value());
dir.initialize(op_indir.get_value(), "", op_trace_compress.get_value(),
op_syscall_template_file.get_value());
if (!dir_err.empty()) {
success_ = false;
error_string_ = "Directory setup failed: " + dir_err;
Expand All @@ -171,7 +172,8 @@ analyzer_multi_t::analyzer_multi_t()
dir.encoding_file_, dir.serial_schedule_file_, dir.cpu_schedule_file_,
nullptr, op_verbose.get_value(), op_jobs.get_value(),
op_alt_module_dir.get_value(), op_chunk_instr_count.get_value(),
dir.in_kfiles_map_, dir.kcoredir_, dir.kallsymsdir_);
dir.in_kfiles_map_, dir.kcoredir_, dir.kallsymsdir_,
std::move(dir.syscall_template_file_reader_));
std::string error = raw2trace.do_conversion();
if (!error.empty()) {
success_ = false;
Expand Down Expand Up @@ -259,6 +261,8 @@ analyzer_multi_t::init_dynamic_schedule()
sched_ops.quantum_unit = scheduler_t::QUANTUM_TIME;
sched_ops.syscall_switch_threshold = op_sched_syscall_switch_us.get_value();
sched_ops.blocking_switch_threshold = op_sched_blocking_switch_us.get_value();
sched_ops.block_time_scale = op_sched_block_scale.get_value();
sched_ops.block_time_max = op_sched_block_max_us.get_value();
#ifdef HAS_ZIP
if (!op_record_file.get_value().empty()) {
record_schedule_zip_.reset(new zipfile_ostream_t(op_record_file.get_value()));
Expand Down
32 changes: 30 additions & 2 deletions clients/drcachesim/common/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,27 @@ droption_t<uint64_t> op_sched_blocking_switch_us(
"maybe-blocking to incur a context switch. Applies to -core_sharded and "
"-core_serial. ");

droption_t<double> op_sched_block_scale(
DROPTION_SCOPE_ALL, "sched_block_scale", 1000., "Input block time scale factor",
"The scale applied to the microsecond latency of blocking system calls. A higher "
"value here results in blocking syscalls keeping inputs unscheduled for longer. "
"This should roughly equal the slowdown of instruction record processing versus the "
"original (untraced) application execution.");

// We have a max to avoid outlier latencies that are already a second or more from
// scaling up to tens of minutes. We assume a cap is representative as the outliers
// likely were not part of key dependence chains. Without a cap the other threads all
// finish and the simulation waits for tens of minutes further for a couple of outliers.
// The cap remains a flag and not a constant as different length traces and different
// speed simulators need different idle time ranges, so we need to be able to tune this
// to achieve desired cpu usage targets. The default value was selected while tuning
// a 1-minute-long schedule_stats run on a 112-core 500-thread large application
// to produce good cpu usage without unduly increasing tool runtime.
droption_t<uint64_t> op_sched_block_max_us(DROPTION_SCOPE_ALL, "sched_block_max_us",
25000000,
"Maximum blocked input time, in microseconds",
"The maximum blocked time, after scaling with "
"-sched_block_scale.");
#ifdef HAS_ZIP
droption_t<std::string> op_record_file(DROPTION_SCOPE_FRONTEND, "record_file", "",
"Path for storing record of schedule",
Expand All @@ -870,9 +891,16 @@ droption_t<std::string>

// Schedule_stats options.
droption_t<uint64_t>
op_schedule_stats_print_every(DROPTION_SCOPE_ALL, "schedule_stats_print_every", 5000,
"A letter is printed every N instrs",
op_schedule_stats_print_every(DROPTION_SCOPE_ALL, "schedule_stats_print_every",
500000, "A letter is printed every N instrs",
"A letter is printed every N instrs or N waits");

droption_t<std::string> op_syscall_template_file(
DROPTION_SCOPE_FRONTEND, "syscall_template_file", "",
"Path to the file that contains system call trace templates.",
"Path to the file that contains system call trace templates. "
"If set, system call traces will be injected from the file "
"into the resulting trace.");

} // namespace drmemtrace
} // namespace dynamorio
3 changes: 3 additions & 0 deletions clients/drcachesim/common/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,15 @@ extern dynamorio::droption::droption_t<bool> op_sched_time;
extern dynamorio::droption::droption_t<bool> op_sched_order_time;
extern dynamorio::droption::droption_t<uint64_t> op_sched_syscall_switch_us;
extern dynamorio::droption::droption_t<uint64_t> op_sched_blocking_switch_us;
extern dynamorio::droption::droption_t<double> op_sched_block_scale;
extern dynamorio::droption::droption_t<uint64_t> op_sched_block_max_us;
#ifdef HAS_ZIP
extern dynamorio::droption::droption_t<std::string> op_record_file;
extern dynamorio::droption::droption_t<std::string> op_replay_file;
extern dynamorio::droption::droption_t<std::string> op_cpu_schedule_file;
#endif
extern dynamorio::droption::droption_t<uint64_t> op_schedule_stats_print_every;
extern dynamorio::droption::droption_t<std::string> op_syscall_template_file;

} // namespace drmemtrace
} // namespace dynamorio
Expand Down
21 changes: 20 additions & 1 deletion clients/drcachesim/common/trace_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -895,7 +895,10 @@ typedef enum {
OFFLINE_FILE_TYPE_BLOCKING_SYSCALLS = 0x800,
/**
* Kernel traces of syscalls are included.
* The included kernel traces are in the Intel® Processor Trace format.
* The included kernel traces are provided either by the -syscall_template_file to
* raw2trace (see #OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES), or on x86 using
* the -enable_kernel_tracing option that uses Intel® Processor Trace to collect a
* trace for system call execution.
*/
OFFLINE_FILE_TYPE_KERNEL_SYSCALLS = 0x1000,
/**
Expand All @@ -906,6 +909,22 @@ typedef enum {
* The initial part can be used by a simulator for warmup.
*/
OFFLINE_FILE_TYPE_BIMODAL_FILTERED_WARMUP = 0x2000,
/**
* Indicates an offline trace that contains trace templates for some system calls.
* The individual traces are separated by a #TRACE_MARKER_TYPE_SYSCALL marker which
* also specifies what system call the following trace belongs to. This file can be
* used with -syscall_template_file to raw2trace to create a
* #OFFLINE_FILE_TYPE_KERNEL_SYSCALLS trace. See the sample file written by the
* burst_syscall_inject.cpp test for more details on the expected format for the
* system call template file.
* TODO i#6495: Add support for reading a zipfile where each trace template is in
* a separate component. This will make it easier to manually append, update, or
* inspect the individual templates, and also allow streaming the component with the
* required template when needed instead of reading the complete file into memory
* ahead of time. Note that we may drop support for non-zipfile template files in
* the future.
*/
OFFLINE_FILE_TYPE_KERNEL_SYSCALL_TRACE_TEMPLATES = 0x4000,
} offline_file_type_t;

static inline const char *
Expand Down
5 changes: 3 additions & 2 deletions clients/drcachesim/reader/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,14 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
std::queue<trace_entry_t> queue_;
trace_entry_t entry_copy_; // For use in returning a queue entry.

private:
struct encoding_info_t {
size_t size = 0;
unsigned char bits[MAX_ENCODING_LENGTH];
};

std::unordered_map<addr_t, encoding_info_t> encodings_;

private:
memref_t cur_ref_;
memref_tid_t cur_tid_ = 0;
memref_pid_t cur_pid_ = 0;
Expand All @@ -277,7 +279,6 @@ class reader_t : public std::iterator<std::input_iterator_tag, memref_t>,
std::unordered_map<memref_tid_t, memref_pid_t> tid2pid_;
bool expect_no_encodings_ = true;
encoding_info_t last_encoding_;
std::unordered_map<addr_t, encoding_info_t> encodings_;
addr_t last_branch_target_ = 0;
};

Expand Down
6 changes: 5 additions & 1 deletion clients/drcachesim/reader/record_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,12 @@ template <>
bool
record_file_reader_t<std::ifstream>::read_next_entry()
{
if (!input_file_->read((char *)&cur_entry_, sizeof(cur_entry_)))
if (!input_file_->read((char *)&cur_entry_, sizeof(cur_entry_))) {
if (input_file_->eof()) {
eof_ = true;
}
return false;
}
VPRINT(this, 4, "Read from file: type=%s (%d), size=%d, addr=%zu\n",
trace_type_names[cur_entry_.type], cur_entry_.type, cur_entry_.size,
cur_entry_.addr);
Expand Down
Loading

0 comments on commit 438bca2

Please sign in to comment.