From a1e13cfd72420684bd2802b5e01fcfb181507a80 Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Wed, 8 Nov 2023 15:42:47 -0500 Subject: [PATCH 1/2] i#5843 scheduler: Add flexible_queue_t and use it in scheduler_t (#6414) Adds a flexible priority queue class which tracks indices and so supports asking whether an entry is in the queue and removing an entry from anywhere in the queue. Adds a simple unit test. Changes the scheduler to use this new queue class, in anticipation of needing both new features to handle direct targeted thread switches. Issue: #5843 --- clients/drcachesim/CMakeLists.txt | 9 + clients/drcachesim/scheduler/flexible_queue.h | 231 ++++++++++++++++++ clients/drcachesim/scheduler/scheduler.h | 5 +- .../drcachesim/tests/flexible_queue_tests.cpp | 83 +++++++ 4 files changed, 325 insertions(+), 3 deletions(-) create mode 100644 clients/drcachesim/scheduler/flexible_queue.h create mode 100644 clients/drcachesim/tests/flexible_queue_tests.cpp diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 9807dd3a42b..c7eaf52e95e 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -357,6 +357,7 @@ install_client_nonDR_header(drmemtrace tools/func_view_create.h) install_client_nonDR_header(drmemtrace tracer/raw2trace.h) install_client_nonDR_header(drmemtrace tracer/raw2trace_shared.h) install_client_nonDR_header(drmemtrace scheduler/scheduler.h) +install_client_nonDR_header(drmemtrace scheduler/flexible_queue.h) install_client_nonDR_header(drmemtrace scheduler/speculator.h) add_library(test_helpers STATIC tests/test_helpers.cpp) @@ -791,6 +792,14 @@ if (BUILD_TESTS) ${PROJECT_SOURCE_DIR}/clients/drcachesim/tests) set_tests_properties(tool.scheduler.unit_tests PROPERTIES TIMEOUT ${test_seconds}) + add_executable(tool.drcacheoff.flexible_queue_tests tests/flexible_queue_tests.cpp) + add_win32_flags(tool.drcacheoff.flexible_queue_tests) + target_link_libraries(tool.drcacheoff.flexible_queue_tests test_helpers) + add_test(NAME tool.drcacheoff.flexible_queue_tests + COMMAND tool.drcacheoff.flexible_queue_tests) + set_tests_properties(tool.drcacheoff.flexible_queue_tests PROPERTIES TIMEOUT + ${test_seconds}) + add_executable(tool.drcachesim.core_sharded tests/core_sharded_test.cpp # XXX: Better to put these into libraries but that requires a bigger cleanup: analyzer_multi.cpp ${client_and_sim_srcs} reader/ipc_reader.cpp diff --git a/clients/drcachesim/scheduler/flexible_queue.h b/clients/drcachesim/scheduler/flexible_queue.h new file mode 100644 index 00000000000..699d70ff94d --- /dev/null +++ b/clients/drcachesim/scheduler/flexible_queue.h @@ -0,0 +1,231 @@ +/* ********************************************************** + * Copyright (c) 2023 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* A priority queue with constant-time search and removal from the middle. */ + +#ifndef _DRMEMTRACE_FLEXIBLE_QUEUE_H_ +#define _DRMEMTRACE_FLEXIBLE_QUEUE_H_ 1 + +/** + * @file drmemtrace/flexible_queue.h + * @brief DrMemtrace flexible priority queue. + */ + +#define NOMINMAX // Avoid windows.h messing up std::max. +#include +#include +#include +#include +#include + +#include "utils.h" + +namespace dynamorio { /**< General DynamoRIO namespace. */ +namespace drmemtrace { /**< DrMemtrace tracing + simulation infrastructure namespace. */ + +/** + * A priority queue with constant-time search and removal from the middle. + * The type T must support the << operator. + * We follow std::priority_queue convention where comparator_t(a,b) returning true + * means that a is lower priority (worse) than b. + */ +template , class hash_t = std::hash> +class flexible_queue_t { +public: + typedef typename std::vector::size_type index_t; + // Wrap max in parens to work around Visual Studio compiler issues with the + // max macro (even despite NOMINMAX defined above). + static constexpr index_t INVALID_INDEX = (std::numeric_limits::max)(); + + flexible_queue_t() = default; + explicit flexible_queue_t(int verbose) + : verbose_(verbose) + { + } + bool + push(T entry) + { + if (entry2index_.find(entry) != entry2index_.end()) + return false; // Duplicates not allowed. + entries_.push_back(entry); + index_t node = entries_.size() - 1; + entry2index_[entry] = node; + percolate_up(node); + vprint(1, "after push"); + return true; + } + + void + pop() + { + erase(top()); + } + + T + top() const + { + return entries_[0]; // Undefined if empty. + } + + bool + empty() const + { + return entries_.empty(); + } + + size_t + size() const + { + return entries_.size(); + } + + bool + find(T entry) + { + auto it = entry2index_.find(entry); + if (it == entry2index_.end()) + return false; + return true; + } + + bool + erase(T entry) + { + auto it = entry2index_.find(entry); + if (it == entry2index_.end()) + return false; + index_t node = it->second; + if (node == entries_.size() - 1) { + entry2index_.erase(entry); + entries_.pop_back(); + return true; + } + swap(node, entries_.size() - 1); + entry2index_.erase(entry); + entries_.pop_back(); + percolate_down(node); + percolate_up(node); + vprint(1, "after erase"); + return true; + } + +private: + void + vprint(int verbose_threshold, const std::string &message) + { + if (verbose_ < verbose_threshold) + return; + std::cout << message << "\n"; + print(); + } + + void + print() + { + for (index_t i = 0; i < entries_.size(); ++i) { + std::cout << i << ": " << entries_[i] << " @ " << entry2index_[entries_[i]] + << "\n"; + } + } + + inline index_t + parent_node(index_t node) + { + if (node <= 0) + return INVALID_INDEX; + return node / 2; + } + inline index_t + left_child(index_t node) + { + index_t child = node * 2; + if (child >= entries_.size()) + return INVALID_INDEX; + return child; + } + inline index_t + right_child(index_t node) + { + index_t child = node * 2 + 1; + if (child >= entries_.size()) + return INVALID_INDEX; + return child; + } + void + swap(index_t a, index_t b) + { + T temp = entries_[a]; + entries_[a] = entries_[b]; + entries_[b] = temp; + entry2index_[temp] = b; + entry2index_[entries_[a]] = a; + } + void + percolate_down(index_t node) + { + index_t should_be_parent = node; + index_t left = left_child(node); + index_t right = right_child(node); + if (left != INVALID_INDEX && !compare_(entries_[left], entries_[node])) + should_be_parent = left; + if (right != INVALID_INDEX && !compare_(entries_[right], entries_[node])) { + if (should_be_parent != left || !compare_(entries_[right], entries_[left])) + should_be_parent = right; + } + if (should_be_parent == node) + return; + swap(node, should_be_parent); + percolate_down(should_be_parent); + } + + void + percolate_up(index_t node) + { + index_t parent = parent_node(node); + if (parent == INVALID_INDEX || !compare_(entries_[parent], entries_[node])) + return; + swap(node, parent); + percolate_up(parent); + } + + std::vector entries_; + // We follow std::priority_queue convention where compare_(a,b) returning true + // means that a is lower priority (worse) than b. + comparator_t compare_; + std::unordered_map entry2index_; + int verbose_ = 0; +}; + +} // namespace drmemtrace +} // namespace dynamorio + +#endif /* _DRMEMTRACE_FLEXIBLE_QUEUE_H_ */ diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 9d5113011f9..ea6d897841c 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -59,6 +59,7 @@ #include "archive_istream.h" #include "archive_ostream.h" +#include "flexible_queue.h" #include "memref.h" #include "memtrace_stream.h" #include "reader.h" @@ -1278,9 +1279,7 @@ template class scheduler_tmpl_t { // dependencies are requested. We use the timestamp delta from the first observed // timestamp in each workload in order to mix inputs from different workloads in the // same queue. FIFO ordering is used for same-priority entries. - std::priority_queue, - InputTimestampComparator> - ready_priority_; + flexible_queue_t ready_priority_; // Global ready queue counter used to provide FIFO for same-priority inputs. uint64_t ready_counter_ = 0; }; diff --git a/clients/drcachesim/tests/flexible_queue_tests.cpp b/clients/drcachesim/tests/flexible_queue_tests.cpp new file mode 100644 index 00000000000..1d8e0d40ce6 --- /dev/null +++ b/clients/drcachesim/tests/flexible_queue_tests.cpp @@ -0,0 +1,83 @@ +/* ********************************************************** + * Copyright (c) 2023 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* Unit tests for flexible_queue. */ + +#include "scheduler/flexible_queue.h" + +#include + +#include + +namespace dynamorio { +namespace drmemtrace { +namespace { + +bool +test_basics() +{ + // Create a min-heap queue. + flexible_queue_t> q(/*verbose=*/1); + assert(!q.find(4)); + q.push(4); + assert(q.find(4)); + assert(q.top() == 4); + q.push(3); + assert(q.top() == 3); + q.push(5); + assert(q.top() == 3); + q.pop(); + assert(q.top() == 4); + assert(!q.find(3)); + q.push(6); + assert(q.find(5)); + q.erase(5); + assert(!q.find(5)); + assert(q.top() == 4); + q.pop(); + assert(!q.find(4)); + assert(q.top() == 6); + return true; +} + +} // namespace + +int +test_main(int argc, const char *argv[]) +{ + if (!test_basics()) + return 1; + return 0; +} + +} // namespace drmemtrace +} // namespace dynamorio From e945bf33301b3e03b47430caec0e955759557333 Mon Sep 17 00:00:00 2001 From: Edmund Grimley Evans Date: Thu, 9 Nov 2023 08:25:02 +0000 Subject: [PATCH 2/2] i#4393 AArch64 codec: Move FCVT from v8.2 to v8.0. (#6421) Move FCVT from codec_v82.txt to codec_v80.txt and replace FP16 with BASE as these instructions work without the FP16 feature. --- core/ir/aarch64/codec_v80.txt | 4 ++++ core/ir/aarch64/codec_v82.txt | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/ir/aarch64/codec_v80.txt b/core/ir/aarch64/codec_v80.txt index e361199b49f..76429ea1940 100644 --- a/core/ir/aarch64/codec_v80.txt +++ b/core/ir/aarch64/codec_v80.txt @@ -297,6 +297,10 @@ x1001010xx0xxxxxxxxxxxxxxxxxxxxx n 90 BASE eor wx0 : wx5 000111100x1xxxxxxxxx11xxxxxxxxxx r 109 BASE fcsel fcsel_sd 0001111000100010110000xxxxxxxxxx n 110 BASE fcvt d0 : s5 0001111001100010010000xxxxxxxxxx n 110 BASE fcvt s0 : d5 +0001111000100011110000xxxxxxxxxx n 110 BASE fcvt h0 : s5 +0001111001100011110000xxxxxxxxxx n 110 BASE fcvt h0 : d5 +0001111011100010010000xxxxxxxxxx n 110 BASE fcvt s0 : h5 +0001111011100010110000xxxxxxxxxx n 110 BASE fcvt d0 : h5 0001111000100100000000xxxxxxxxxx n 111 BASE fcvtas w0 : s5 1001111000100100000000xxxxxxxxxx n 111 BASE fcvtas x0 : s5 0001111001100100000000xxxxxxxxxx n 111 BASE fcvtas w0 : d5 diff --git a/core/ir/aarch64/codec_v82.txt b/core/ir/aarch64/codec_v82.txt index d319bd18c8f..7c84851af1b 100644 --- a/core/ir/aarch64/codec_v82.txt +++ b/core/ir/aarch64/codec_v82.txt @@ -72,10 +72,6 @@ 0001111011100000001000xxxxx11000 w 108 FP16 fcmpe : h5 zero_fp_const 00011110111xxxxx001000xxxxx10000 w 108 FP16 fcmpe : h5 h16 00011110111xxxxxxxxx11xxxxxxxxxx r 109 FP16 fcsel fcsel_h -0001111000100011110000xxxxxxxxxx n 110 FP16 fcvt h0 : s5 -0001111001100011110000xxxxxxxxxx n 110 FP16 fcvt h0 : d5 -0001111011100010010000xxxxxxxxxx n 110 FP16 fcvt s0 : h5 -0001111011100010110000xxxxxxxxxx n 110 FP16 fcvt d0 : h5 0x00111001111001110010xxxxxxxxxx n 111 FP16 fcvtas dq0 : dq5 h_sz 0101111001111001110010xxxxxxxxxx n 111 FP16 fcvtas h0 : h5 0001111011100100000000xxxxxxxxxx n 111 FP16 fcvtas w0 : h5