From 30031e06005a839e5b923d046f6b56fe6d07f361 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 30 Nov 2023 11:44:48 -0500 Subject: [PATCH 1/2] i#5505 kernel tracing: Support clac and stac in DR decoder (#6484) Adds encoding-decoding support for clac and stac which are observed in kernel traces. decode_fast does not need any change and already should get the 3-byte size correctly. Adds EFLAGS_READ_AC and EFLAGS_WRITE_AC to track reads/writes to the Alignment Check flag. Currently we try to avoid a major binary compatibility break by adding them to the end. This makes some code more complicated. Added a TODO with #6485 to re-number when we're about to do the next major release. Issue: #5505 Fixes: #2103 --- clients/drcachesim/drpt2trace/pt2ir.cpp | 3 --- core/ir/instr_api.h | 22 +++++++++++++++---- core/ir/x86/decode_fast.c | 2 +- core/ir/x86/decode_table.c | 10 +++++++-- core/ir/x86/instr_create_api.h | 4 +++- core/ir/x86/opcode_api.h | 5 ++++- suite/tests/api/ir_x86_0args.h | 5 ++++- .../test_decenc/drdecode_decenc_x86.expect | 2 ++ .../test_decenc/drdecode_decenc_x86_64.expect | 2 ++ .../binutils/test_decenc/test_decenc_x86.asm | 14 ++++++++---- .../test_decenc/test_decenc_x86_64.asm | 13 +++++++---- 11 files changed, 61 insertions(+), 21 deletions(-) diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 45397d96350..4a5ad88f293 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -388,9 +388,6 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ instr_valid = true; instr_set_translation(instr, (app_pc)insn.ip); instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); - /* TODO i#2103: Currently, the PT raw data may contain 'STAC' and 'CLAC' - * instructions that are not supported by Dynamorio. - */ if (!instr_valid) { /* The decode() function will not correctly identify the raw bits for * invalid instruction. So we need to set the raw bits of instr manually. diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h index 148190886e9..9fc140d7a36 100644 --- a/core/ir/instr_api.h +++ b/core/ir/instr_api.h @@ -2550,6 +2550,7 @@ instr_is_reg_spill_or_restore(void *drcontext, instr_t *instr, bool *tls DR_PARA # define EFLAGS_READ_OF 0x00000100 /**< Reads OF (Overflow Flag). */ # define EFLAGS_READ_NT 0x00000200 /**< Reads NT (Nested Task). */ # define EFLAGS_READ_RF 0x00000400 /**< Reads RF (Resume Flag). */ + # define EFLAGS_WRITE_CF 0x00000800 /**< Writes CF (Carry Flag). */ # define EFLAGS_WRITE_PF 0x00001000 /**< Writes PF (Parity Flag). */ # define EFLAGS_WRITE_AF 0x00002000 /**< Writes AF (Auxiliary Carry Flag). */ @@ -2562,9 +2563,18 @@ instr_is_reg_spill_or_restore(void *drcontext, instr_t *instr, bool *tls DR_PARA # define EFLAGS_WRITE_NT 0x00100000 /**< Writes NT (Nested Task). */ # define EFLAGS_WRITE_RF 0x00200000 /**< Writes RF (Resume Flag). */ -# define EFLAGS_READ_ALL 0x000007ff /**< Reads all flags. */ +/* TODO i#6485: Re-number the following when a major binary compatibility break + * is more convenient. + */ +/* OP_clac and OP_stac both write the AC flag. Even though we do not have an + * opcode that reads it, we still add EFLAGS_READ_AC for parity. + */ +# define EFLAGS_READ_AC 0x00400000 /**< Reads AC (Alignment Check Flag). */ +# define EFLAGS_WRITE_AC 0x00800000 /**< Writes AC (Alignment Check Flag). */ + +# define EFLAGS_READ_ALL 0x004007ff /**< Reads all flags. */ # define EFLAGS_READ_NON_PRED EFLAGS_READ_ALL /**< Flags not read by predicates. */ -# define EFLAGS_WRITE_ALL 0x003ff800 /**< Writes all flags. */ +# define EFLAGS_WRITE_ALL 0x00bff800 /**< Writes all flags. */ /* 6 most common flags ("arithmetic flags"): CF, PF, AF, ZF, SF, OF */ /** Reads all 6 arithmetic flags (CF, PF, AF, ZF, SF, OF). */ # define EFLAGS_READ_6 0x0000011f @@ -2577,9 +2587,13 @@ instr_is_reg_spill_or_restore(void *drcontext, instr_t *instr, bool *tls DR_PARA # define EFLAGS_WRITE_ARITH EFLAGS_WRITE_6 /** Converts an EFLAGS_WRITE_* value to the corresponding EFLAGS_READ_* value. */ -# define EFLAGS_WRITE_TO_READ(x) ((x) >> 11) +# define EFLAGS_WRITE_TO_READ(x) \ + ((((x) & ((EFLAGS_WRITE_ALL) & ~(EFLAGS_WRITE_AC))) >> 11) | \ + (((x) & (EFLAGS_WRITE_AC)) >> 1)) /** Converts an EFLAGS_READ_* value to the corresponding EFLAGS_WRITE_* value. */ -# define EFLAGS_READ_TO_WRITE(x) ((x) << 11) +# define EFLAGS_READ_TO_WRITE(x) \ + ((((x) & ((EFLAGS_READ_ALL) & ~(EFLAGS_READ_AC))) << 11) | \ + (((x) & (EFLAGS_READ_AC)) << 1)) /** * The actual bits in the eflags register that we care about:\n
diff --git a/core/ir/x86/decode_fast.c b/core/ir/x86/decode_fast.c
index 7662bb15df4..0a2686e3d00 100644
--- a/core/ir/x86/decode_fast.c
+++ b/core/ir/x86/decode_fast.c
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2023 Google, Inc.  All rights reserved.
  * Copyright (c) 2001-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
diff --git a/core/ir/x86/decode_table.c b/core/ir/x86/decode_table.c
index 408d529e9f3..0c505cb13ec 100644
--- a/core/ir/x86/decode_table.c
+++ b/core/ir/x86/decode_table.c
@@ -1623,6 +1623,10 @@ const instr_info_t * const op_instr[] =
     /* AVX512 VPOPCNTDQ */
     /* OP_vpopcntd, */ &evex_Wb_extensions[274][0],
     /* OP_vpopcntq, */ &evex_Wb_extensions[274][2],
+
+    /* Supervisor Mode Access Prevention (SMAP) */
+    /* OP_clac */ &rm_extensions[1][2],
+    /* OP_stac */ &rm_extensions[1][3]
 };
 
 
@@ -2079,6 +2083,7 @@ const instr_info_t * const op_instr[] =
 #define fRO   EFLAGS_READ_OF
 #define fRN   EFLAGS_READ_NT
 #define fRR   EFLAGS_READ_RF
+#define fRAC   EFLAGS_READ_AC
 #define fRX   EFLAGS_READ_ALL
 #define fR6   EFLAGS_READ_6
 #define fWC   EFLAGS_WRITE_CF
@@ -2092,6 +2097,7 @@ const instr_info_t * const op_instr[] =
 #define fWO   EFLAGS_WRITE_OF
 #define fWN   EFLAGS_WRITE_NT
 #define fWR   EFLAGS_WRITE_RF
+#define fWAC   EFLAGS_WRITE_AC
 #define fWX   EFLAGS_WRITE_ALL
 #define fW6   EFLAGS_WRITE_6
 /* flags affected by OP_int*
@@ -7019,8 +7025,8 @@ const instr_info_t rm_extensions[][8] = {
     /* XXX i#4013: Treat address in xax as IR memref? */
     {OP_monitor, 0xc80f0171, catUncategorized, "monitor",  xx, xx, axAX, ecx, edx, mrm, x, END_LIST},
     {OP_mwait,   0xc90f0171, catUncategorized, "mwait",  xx, xx, eax, ecx, xx, mrm, x, END_LIST},
-    {INVALID,   0x0f0131, catUncategorized, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
-    {INVALID,   0x0f0131, catUncategorized, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
+    {OP_clac,   0xca0f0171, catUncategorized, "clac", xx, xx, xx, xx, xx, no, fWAC, NA},
+    {OP_stac,   0xcb0f0171, catUncategorized, "stac", xx, xx, xx, xx, xx, no, fWAC, NA},
     {INVALID,   0x0f0131, catUncategorized, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
     {INVALID,   0x0f0131, catUncategorized, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
     {INVALID,   0x0f0131, catUncategorized, "(bad)", xx, xx, xx, xx, xx, no, x, NA},
diff --git a/core/ir/x86/instr_create_api.h b/core/ir/x86/instr_create_api.h
index 7afdbee971f..8444dd6b8c8 100644
--- a/core/ir/x86/instr_create_api.h
+++ b/core/ir/x86/instr_create_api.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2022 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2023 Google, Inc.  All rights reserved.
  * Copyright (c) 2002-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -527,6 +527,8 @@
 #define INSTR_CREATE_vzeroupper(dc) instr_create_0dst_0src((dc), OP_vzeroupper)
 #define INSTR_CREATE_vzeroall(dc) instr_create_0dst_0src((dc), OP_vzeroall)
 #define INSTR_CREATE_xtest(dc) instr_create_0dst_0src((dc), OP_xtest)
+#define INSTR_CREATE_clac(dc) instr_create_0dst_0src((dc), OP_clac)
+#define INSTR_CREATE_stac(dc) instr_create_0dst_0src((dc), OP_stac)
 /** @} */ /* end doxygen group */
 
 /* no destination, 1 source */
diff --git a/core/ir/x86/opcode_api.h b/core/ir/x86/opcode_api.h
index 58bf150d868..4272edd1976 100644
--- a/core/ir/x86/opcode_api.h
+++ b/core/ir/x86/opcode_api.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2021 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2023 Google, Inc.  All rights reserved.
  * Copyright (c) 2000-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -1614,6 +1614,9 @@ enum {
     /* 1433 */ OP_vpopcntd, /**< IA-32/AMD64 vpopcntd opcode. */
     /* 1434 */ OP_vpopcntq, /**< IA-32/AMD64 vpopcntd opcode. */
 
+    /* Supervisor Mode Access Prevention (SMAP) */
+    /* 1435 */ OP_clac,
+    /* 1436 */ OP_stac,
     OP_AFTER_LAST,
     OP_FIRST = OP_add,           /**< First real opcode. */
     OP_LAST = OP_AFTER_LAST - 1, /**< Last real opcode. */
diff --git a/suite/tests/api/ir_x86_0args.h b/suite/tests/api/ir_x86_0args.h
index 6c98022acc2..34a544ab536 100644
--- a/suite/tests/api/ir_x86_0args.h
+++ b/suite/tests/api/ir_x86_0args.h
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2011-2016 Google, Inc.  All rights reserved.
+ * Copyright (c) 2011-2023 Google, Inc.  All rights reserved.
  * Copyright (c) 2008-2010 VMware, Inc.  All rights reserved.
  * **********************************************************/
 
@@ -197,3 +197,6 @@ OPCODE(xtest, xtest, xtest, 0)
 
 OPCODE(rdpkru, rdpkru, rdpkru, 0)
 OPCODE(wrpkru, wrpkru, wrpkru, 0)
+
+OPCODE(clac, clac, clac, 0)
+OPCODE(stac, stac, stac, 0)
diff --git a/third_party/binutils/test_decenc/drdecode_decenc_x86.expect b/third_party/binutils/test_decenc/drdecode_decenc_x86.expect
index c9059243d82..1801e89ca5f 100644
--- a/third_party/binutils/test_decenc/drdecode_decenc_x86.expect
+++ b/third_party/binutils/test_decenc/drdecode_decenc_x86.expect
@@ -138762,6 +138762,8 @@ test_s:
  7b 00 00 00
  62 f2 7d 29 91 b4 f5 vpgatherqd 0x0000007b(%ebp,%ymm6,8), %ymm6 {%k1} {%k1}
  7b 00 00 00
+ 0f 01 ca             clac
+ 0f 01 cb             stac
  90                   nop
  90                   nop
  90                   nop
diff --git a/third_party/binutils/test_decenc/drdecode_decenc_x86_64.expect b/third_party/binutils/test_decenc/drdecode_decenc_x86_64.expect
index 2b19bc9c36e..04fa5247dd0 100644
--- a/third_party/binutils/test_decenc/drdecode_decenc_x86_64.expect
+++ b/third_party/binutils/test_decenc/drdecode_decenc_x86_64.expect
@@ -102826,6 +102826,8 @@ test_x86_64_s:
  c4 e2 59 53 11       vpdpwssds %xmm4, (%rcx), %xmm2
  62 b2 5d 08 53 d6    vpdpwssds %xmm4, %xmm22, %xmm2 {%k0}
  62 d2 5d 08 50 d4    vpdpbusd %xmm4, %xmm12, %xmm2 {%k0}
+ 0f 01 ca             clac
+ 0f 01 cb             stac
  90                   nop
  90                   nop
  90                   nop
diff --git a/third_party/binutils/test_decenc/test_decenc_x86.asm b/third_party/binutils/test_decenc/test_decenc_x86.asm
index 1aa50c20c99..1a0ee210522 100644
--- a/third_party/binutils/test_decenc/test_decenc_x86.asm
+++ b/third_party/binutils/test_decenc/test_decenc_x86.asm
@@ -298,10 +298,6 @@ GLOBAL_LABEL(FUNCNAME:)
 
         /* arch_13.s */
 
-        /* clac, CPL 0 instruction. */
-        /* RAW(0f) RAW(01) RAW(ca) */
-        /* stac, CPL 0 instruction. */
-        /* RAW(0f) RAW(01) RAW(cb) */
         RAW(66) RAW(0f) RAW(38) RAW(f6) RAW(ca)
         RAW(f3) RAW(0f) RAW(38) RAW(f6) RAW(ca)
         RAW(0f) RAW(c7) RAW(f8)
@@ -139592,6 +139588,16 @@ GLOBAL_LABEL(FUNCNAME:)
         RAW(7b) RAW(00) RAW(00) RAW(00)
         RAW(62) RAW(f2) RAW(7d) RAW(29) RAW(91) RAW(b4) RAW(f5)
         RAW(7b) RAW(00) RAW(00) RAW(00)
+
+        /* TODO i#5505: Move the following back under
+         * arch_13.s in a separate PR to keep the huge
+         * diff isolated from PR #6484.
+         */
+        /* clac, CPL 0 instruction. */
+        RAW(0f) RAW(01) RAW(ca)
+        /* stac, CPL 0 instruction. */
+        RAW(0f) RAW(01) RAW(cb)
+
         END_OF_SUBTEST_MARKER
 
 #ifdef DISABLED_UNTIL_BUG_3577_IS_FIXED
diff --git a/third_party/binutils/test_decenc/test_decenc_x86_64.asm b/third_party/binutils/test_decenc/test_decenc_x86_64.asm
index d2be83ac5e0..3527f297855 100644
--- a/third_party/binutils/test_decenc/test_decenc_x86_64.asm
+++ b/third_party/binutils/test_decenc/test_decenc_x86_64.asm
@@ -220,10 +220,6 @@ GLOBAL_LABEL(FUNCNAME:)
 
         /* x86_64_arch_3.s */
 
-        /* clac, CPL 0 instruction. */
-        /* RAW(0f) RAW(01) RAW(ca) */
-        /* stac, CPL 0 instruction. */
-        /* RAW(0f) RAW(01) RAW(cb) */
         RAW(66) RAW(0f) RAW(38) RAW(f6) RAW(ca)
         RAW(f3) RAW(0f) RAW(38) RAW(f6) RAW(ca)
         RAW(0f) RAW(c7) RAW(f8)
@@ -106655,5 +106651,14 @@ GLOBAL_LABEL(FUNCNAME:)
         RAW(62) RAW(b2) RAW(5d) RAW(08) RAW(53) RAW(d6)
         RAW(62) RAW(d2) RAW(5d) RAW(08) RAW(50) RAW(d4)
 
+        /* TODO i#5505: Move the following back under
+         * x86_64_arch_3.s in a separate PR to keep the huge
+         * diff isolated from PR #6484.
+         */
+        /* clac, CPL 0 instruction. */
+        RAW(0f) RAW(01) RAW(ca)
+        /* stac, CPL 0 instruction. */
+        RAW(0f) RAW(01) RAW(cb)
+
         END_OF_FUNCTION_MARKER
 END_FUNC(FUNCNAME)

From b9441b340bdd9977f74c06257bffde559b8d1835 Mon Sep 17 00:00:00 2001
From: Abhinav Anil Sharma 
Date: Thu, 30 Nov 2023 21:54:56 -0500
Subject: [PATCH 2/2] i#5505 kernel tracing: Add syscall instr encodings
 (#6479)

Adds encodings for kernel system call instructions to the trace in
raw2trace. Kernel system call traces are decoded using libipt which also
provides the instruction encodings. We add support to drir_t to write
these encodings to a new buffer which is re-used for all dynamic
instances of that instr even across multiple system call traces.

Fixes taken/not-taken detection for conditional branches in the syscall
trace.

Adds support in the syscall_mix tool to report the counts of each system
call's traces also. Adds sysnum to system call trace start and end
markers to achieve this.

Ran all Intel-PT tests locally:

```
$ ctest -VV -R 'SUDO'
...
The following tests passed:
	code_api|client.drpttracer_SUDO-test
	code_api|tool.drcachesim.phys_SUDO # not really PT. Just included because of ctest -R.
	code_api|tool.drcachesim.phys-threads_SUDO # not really PT. Just included because of ctest -R.
	code_api|tool.drcacheoff.phys_SUDO # not really PT. Just included because of ctest -R.
	code_api|tool.drcacheoff.kernel.simple_SUDO
	code_api|tool.drcacheoff.kernel.opcode-mix_SUDO
	code_api|tool.drcacheoff.kernel.syscall-mix_SUDO

100% tests passed, 0 tests failed out of 7

```

Found some flakiness due to #6486 in local runs of the kernel sudo
tests, which will be addressed separately.

Issue: #5505
---
 clients/drcachesim/common/trace_entry.h       |  6 +-
 clients/drcachesim/drpt2trace/drir.h          | 73 ++++++++++++++++++-
 clients/drcachesim/drpt2trace/drpt2trace.cpp  |  7 +-
 clients/drcachesim/drpt2trace/ir2trace.cpp    | 23 ++++--
 clients/drcachesim/drpt2trace/ir2trace.h      |  2 +-
 clients/drcachesim/drpt2trace/pt2ir.cpp       | 26 ++-----
 clients/drcachesim/drpt2trace/pt2ir.h         |  2 +-
 .../drcachesim/drpt2trace/test_simple.expect  | 14 ----
 .../drpt2trace/test_simple.templatex          | 14 ++++
 clients/drcachesim/reader/reader.cpp          |  3 +-
 .../tests/offline-kernel-opcode-mix.templatex |  6 ++
 .../offline-kernel-syscall-mix.templatex      |  6 ++
 .../tests/offline-syscall-mix.templatex       |  2 +-
 .../drcachesim/tests/syscall-mix.templatex    |  2 +-
 clients/drcachesim/tools/syscall_mix.cpp      | 39 ++++++++--
 clients/drcachesim/tools/syscall_mix.h        |  1 +
 clients/drcachesim/tools/view.cpp             |  6 +-
 clients/drcachesim/tracer/raw2trace.cpp       | 54 ++++++++++----
 clients/drcachesim/tracer/raw2trace.h         |  1 +
 suite/tests/CMakeLists.txt                    | 15 ++--
 20 files changed, 224 insertions(+), 78 deletions(-)
 delete mode 100644 clients/drcachesim/drpt2trace/test_simple.expect
 create mode 100644 clients/drcachesim/drpt2trace/test_simple.templatex
 create mode 100644 clients/drcachesim/tests/offline-kernel-opcode-mix.templatex
 create mode 100644 clients/drcachesim/tests/offline-kernel-syscall-mix.templatex

diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h
index 166c5f32055..794472ed1a2 100644
--- a/clients/drcachesim/common/trace_entry.h
+++ b/clients/drcachesim/common/trace_entry.h
@@ -531,12 +531,14 @@ typedef enum {
     TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL,
 
     /**
-     * Indicates a point in the trace where a syscall's kernel trace starts.
+     * Indicates a point in the trace where a syscall's kernel trace starts. The value
+     * of the marker is set to the syscall number.
      */
     TRACE_MARKER_TYPE_SYSCALL_TRACE_START,
 
     /**
-     * Indicates a point in the trace where a syscall's trace end.
+     * Indicates a point in the trace where a syscall's trace ends. The value of the
+     * marker is set to the syscall number.
      */
     TRACE_MARKER_TYPE_SYSCALL_TRACE_END,
 
diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h
index eb4b133f20b..a8f177a423c 100644
--- a/clients/drcachesim/drpt2trace/drir.h
+++ b/clients/drcachesim/drpt2trace/drir.h
@@ -41,6 +41,10 @@
 #include "dr_api.h"
 #include "utils.h"
 
+#include 
+#include 
+#include 
+
 namespace dynamorio {
 namespace drmemtrace {
 
@@ -62,8 +66,10 @@ class drir_t {
         }
     }
 
+    // Appends the given instr to the internal ilist, and records (replaces if
+    // one already exists) the given encoding for the orig_pc.
     void
-    append(instr_t *instr)
+    append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding)
     {
         ASSERT(drcontext_ != nullptr, "drir_t: invalid drcontext_");
         ASSERT(ilist_ != nullptr, "drir_t: invalid ilist_");
@@ -72,23 +78,88 @@ class drir_t {
             return;
         }
         instrlist_append(ilist_, instr);
+        record_encoding(orig_pc, instr_length, encoding);
     }
 
+    // Returns the opaque pointer to the dcontext_t used to construct this
+    // object.
     void *
     get_drcontext()
     {
         return drcontext_;
     }
 
+    // Returns the instrlist_t of instrs accumulated so far.
     instrlist_t *
     get_ilist()
     {
         return ilist_;
     }
 
+    // Clears the instrs accumulated in the ilist. Note that this does
+    // not clear the encodings accumulated.
+    void
+    clear_ilist()
+    {
+        instrlist_clear(drcontext_, ilist_);
+    }
+
+    // Returns the address of the encoding recorded for the given orig_pc.
+    // Encodings are persisted across clear_ilist() calls, so we will
+    // return the same decode_pc for the same orig_pc unless a new encoding
+    // is added for the same orig_pc.
+    app_pc
+    get_decode_pc(app_pc orig_pc)
+    {
+        if (decode_pc_.find(orig_pc) == decode_pc_.end()) {
+            return nullptr;
+        }
+        return decode_pc_[orig_pc].first;
+    }
+
 private:
     void *drcontext_;
     instrlist_t *ilist_;
+#define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024)
+    // For each original app pc key, this stores a pair value: the first
+    // element is the address where the encoding is stored for the instruction
+    // at that app pc, the second element is the length of the encoding.
+    std::unordered_map> decode_pc_;
+    // A vector of buffers of size SYSCALL_PT_ENCODING_BUF_SIZE. Each buffer
+    // stores some encoded instructions back-to-back. Note that each element
+    // in the buffer is a single byte, so one instr's encoding occupies possibly
+    // multiple consecutive elements.
+    // We allocate new memory to store kernel instruction encodings in
+    // increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this like a
+    // cache and clear previously stored encodings because we want to ensure
+    // decode_pc uniqueness to callers of get_decode_pc.
+    std::vector> instr_encodings_;
+    // Next available offset into instr_encodings_.back().
+    size_t next_encoding_offset_ = 0;
+
+    void
+    record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding)
+    {
+        auto it = decode_pc_.find(orig_pc);
+        // We record the encoding only if we don't already have the same encoding for
+        // the given orig_pc.
+        if (it != decode_pc_.end() &&
+            // We confirm that the instruction encoding has not changed. Just in case
+            // the kernel is doing JIT.
+            it->second.second == instr_len &&
+            memcmp(it->second.first, encoding, it->second.second) == 0) {
+            return;
+        }
+        if (instr_encodings_.empty() ||
+            next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) {
+            instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]);
+            next_encoding_offset_ = 0;
+        }
+        app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_];
+        memcpy(encode_pc, encoding, instr_len);
+        decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len);
+        next_encoding_offset_ += instr_len;
+    }
 };
 
 } // namespace drmemtrace
diff --git a/clients/drcachesim/drpt2trace/drpt2trace.cpp b/clients/drcachesim/drpt2trace/drpt2trace.cpp
index e1961c3e5fa..654740376d1 100644
--- a/clients/drcachesim/drpt2trace/drpt2trace.cpp
+++ b/clients/drcachesim/drpt2trace/drpt2trace.cpp
@@ -461,7 +461,8 @@ main(int argc, const char *argv[])
 
         uint8_t *pt_data = pt_raw_buffer.data();
         size_t pt_data_size = pt_raw_buffer.size();
-        pt2ir_convert_status_t status = ptconverter->convert(pt_data, pt_data_size, drir);
+        pt2ir_convert_status_t status =
+            ptconverter->convert(pt_data, pt_data_size, &drir);
         if (status != PT2IR_CONV_SUCCESS) {
             std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR."
                       << "[error status: " << status << "]" << std::endl;
@@ -521,7 +522,7 @@ main(int argc, const char *argv[])
 
             /* Convert the PT Data to DR IR. */
             pt2ir_convert_status_t status =
-                ptconverter->convert(pt_data, pt_data_size, drir);
+                ptconverter->convert(pt_data, pt_data_size, &drir);
             if (status != PT2IR_CONV_SUCCESS) {
                 std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR."
                           << "[error status: " << status << "]" << std::endl;
@@ -542,7 +543,7 @@ main(int argc, const char *argv[])
     /* Convert the DR IR to trace entries. */
     std::vector entries;
     ir2trace_convert_status_t ir2trace_convert_status =
-        ir2trace_t::convert(drir, entries);
+        ir2trace_t::convert(&drir, entries);
     if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) {
         std::cerr << CLIENT_NAME << ": failed to convert DR IR to trace entries."
                   << "[error status: " << ir2trace_convert_status << "]" << std::endl;
diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp
index 62e36621343..57e27e931d1 100644
--- a/clients/drcachesim/drpt2trace/ir2trace.cpp
+++ b/clients/drcachesim/drpt2trace/ir2trace.cpp
@@ -55,16 +55,19 @@ namespace drmemtrace {
 #define ERRMSG_HEADER "[drpt2ir] "
 
 ir2trace_convert_status_t
-ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
+ir2trace_t::convert(DR_PARAM_IN drir_t *drir,
                     DR_PARAM_INOUT std::vector &trace,
                     DR_PARAM_IN int verbosity)
 {
-    if (drir.get_ilist() == NULL) {
+    if (drir == nullptr || drir->get_ilist() == NULL) {
         return IR2TRACE_CONV_ERROR_INVALID_PARAMETER;
     }
-    instr_t *instr = instrlist_first(drir.get_ilist());
+    instr_t *instr = instrlist_first(drir->get_ilist());
+    bool prev_was_repstr = false;
     while (instr != NULL) {
         trace_entry_t entry = {};
+        entry.size = instr_length(GLOBAL_DCONTEXT, instr);
+        entry.addr = reinterpret_cast(instr_get_app_pc(instr));
 
         if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) {
             if (instr_get_prev(instr) == nullptr ||
@@ -87,6 +90,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
          */
         entry.type = TRACE_TYPE_INSTR;
         if (instr_opcode_valid(instr)) {
+            bool cur_is_repstr = false;
             if (instr_is_call_direct(instr)) {
                 entry.type = TRACE_TYPE_INSTR_DIRECT_CALL;
             } else if (instr_is_call_indirect(instr)) {
@@ -103,15 +107,20 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir,
             } else if (instr_get_opcode(instr) == OP_sysenter) {
                 entry.type = TRACE_TYPE_INSTR_SYSENTER;
             } else if (instr_is_rep_string_op(instr)) {
-                entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
+                cur_is_repstr = true;
+                if (prev_was_repstr) {
+                    entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH;
+                } else {
+                    prev_was_repstr = true;
+                }
+            }
+            if (!cur_is_repstr) {
+                prev_was_repstr = false;
             }
         } else {
             VPRINT(1, "Trying to convert an invalid instruction.\n");
         }
 
-        entry.size = instr_length(GLOBAL_DCONTEXT, instr);
-        entry.addr = (uintptr_t)instr_get_app_pc(instr);
-
         trace.push_back(entry);
 
         instr = instr_get_next(instr);
diff --git a/clients/drcachesim/drpt2trace/ir2trace.h b/clients/drcachesim/drpt2trace/ir2trace.h
index 18458dc8e3c..13444881f22 100644
--- a/clients/drcachesim/drpt2trace/ir2trace.h
+++ b/clients/drcachesim/drpt2trace/ir2trace.h
@@ -92,7 +92,7 @@ class ir2trace_t {
      * error code.
      */
     static ir2trace_convert_status_t
-    convert(DR_PARAM_IN drir_t &drir, DR_PARAM_INOUT std::vector &trace,
+    convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace,
             DR_PARAM_IN int verbosity = 0);
 };
 
diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp
index 4a5ad88f293..13ea7221daf 100644
--- a/clients/drcachesim/drpt2trace/pt2ir.cpp
+++ b/clients/drcachesim/drpt2trace/pt2ir.cpp
@@ -257,13 +257,13 @@ pt2ir_t::init(DR_PARAM_IN pt2ir_config_t &pt2ir_config, DR_PARAM_IN int verbosit
 
 pt2ir_convert_status_t
 pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size,
-                 DR_PARAM_INOUT drir_t &drir)
+                 DR_PARAM_INOUT drir_t *drir)
 {
     if (!pt2ir_initialized_) {
         return PT2IR_CONV_ERROR_NOT_INITIALIZED;
     }
 
-    if (pt_data == nullptr || pt_data_size <= 0) {
+    if (pt_data == nullptr || pt_data_size <= 0 || drir == nullptr) {
         return PT2IR_CONV_ERROR_INVALID_INPUT;
     }
 
@@ -379,24 +379,14 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_
             }
 
             /* Use drdecode to decode insn(pt_insn) to instr_t. */
-            instr_t *instr = instr_create(drir.get_drcontext());
-            instr_init(drir.get_drcontext(), instr);
+            instr_t *instr = instr_create(drir->get_drcontext());
+            instr_init(drir->get_drcontext(), instr);
             instr_set_isa_mode(instr,
                                insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64);
-            bool instr_valid = false;
-            if (decode(drir.get_drcontext(), insn.raw, instr) != nullptr)
-                instr_valid = true;
-            instr_set_translation(instr, (app_pc)insn.ip);
-            instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size);
-            if (!instr_valid) {
-                /* The decode() function will not correctly identify the raw bits for
-                 * invalid instruction. So we need to set the raw bits of instr manually.
-                 */
-                instr_free_raw_bits(drir.get_drcontext(), instr);
-                instr_set_raw_bits(instr, insn.raw, insn.size);
-                instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size);
+            app_pc instr_ip = reinterpret_cast(insn.ip);
+            if (decode_from_copy(drir->get_drcontext(), insn.raw, instr_ip, instr) ==
+                nullptr) {
 #ifdef DEBUG
-
                 /* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */
                 if (verbosity_ >= 1) {
                     fprintf(stderr,
@@ -409,7 +399,7 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_
                 }
 #endif
             }
-            drir.append(instr);
+            drir->append(instr, instr_ip, insn.size, insn.raw);
         }
     }
     return PT2IR_CONV_SUCCESS;
diff --git a/clients/drcachesim/drpt2trace/pt2ir.h b/clients/drcachesim/drpt2trace/pt2ir.h
index a1e2f49f01e..02ec9a0a4f6 100644
--- a/clients/drcachesim/drpt2trace/pt2ir.h
+++ b/clients/drcachesim/drpt2trace/pt2ir.h
@@ -365,7 +365,7 @@ class pt2ir_t {
      */
     pt2ir_convert_status_t
     convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size,
-            DR_PARAM_INOUT drir_t &drir);
+            DR_PARAM_INOUT drir_t *drir);
 
 private:
     /* Diagnose converting errors and output diagnostic results.
diff --git a/clients/drcachesim/drpt2trace/test_simple.expect b/clients/drcachesim/drpt2trace/test_simple.expect
deleted file mode 100644
index c55837c430b..00000000000
--- a/clients/drcachesim/drpt2trace/test_simple.expect
+++ /dev/null
@@ -1,14 +0,0 @@
-TAG  0x0000000000000000
- +0    L2                      b8 01 00 00 00       mov    $0x00000001 -> %eax
- +5    L2                      bf 01 00 00 00       mov    $0x00000001 -> %edi
- +10   L2                      48 be 00 20 40 00 00 mov    $0x0000000000402000 -> %rsi
-                                00 00 00
- +20   L2                      ba 0e 00 00 00       mov    $0x0000000e -> %edx
- +25   L2                      0f 05                syscall  -> %rcx %r11
- +27   L2                      b8 3c 00 00 00       mov    $0x0000003c -> %eax
- +32   L2                      bf 00 00 00 00       mov    $0x00000000 -> %edi
- +37   L2                      0f 05                syscall  -> %rcx %r11
-END 0x0000000000000000
-
-Number of Instructions: 8
-Number of Trace Entries: 8
diff --git a/clients/drcachesim/drpt2trace/test_simple.templatex b/clients/drcachesim/drpt2trace/test_simple.templatex
new file mode 100644
index 00000000000..7dc05ea81df
--- /dev/null
+++ b/clients/drcachesim/drpt2trace/test_simple.templatex
@@ -0,0 +1,14 @@
+TAG  0x0000000000000000
+ \+0    L3 .* mov    \$0x00000001 -> %eax
+ \+5    L3 .* mov    \$0x00000001 -> %edi
+ \+10   L3 .* \$0x0000000000402000 -> %rsi
+          .*
+ \+20   L3 .* mov    \$0x0000000e -> %edx
+ \+25   L3 .* syscall  -> %rcx %r11
+ \+27   L3 .* mov    \$0x0000003c -> %eax
+ \+32   L3 .* mov    \$0x00000000 -> %edi
+ \+37   L3 .* syscall  -> %rcx %r11
+END 0x0000000000000000
+.*
+Number of Instructions: 8
+Number of Trace Entries: 8
diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp
index 1cd157ab44a..783d2a44bb8 100644
--- a/clients/drcachesim/reader/reader.cpp
+++ b/clients/drcachesim/reader/reader.cpp
@@ -321,8 +321,7 @@ reader_t::process_input_entry()
             version_ = cur_ref_.marker.marker_value;
         else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
             filetype_ = cur_ref_.marker.marker_value;
-            if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_) &&
-                !TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, filetype_)) {
+            if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) {
                 expect_no_encodings_ = false;
             }
         } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE)
diff --git a/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex
new file mode 100644
index 00000000000..ce75a56588d
--- /dev/null
+++ b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex
@@ -0,0 +1,6 @@
+Hello, world!
+Opcode mix tool results:
+.*: total executed instructions
+.*
+.*: .*clac
+.*
diff --git a/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex
new file mode 100644
index 00000000000..8582336ef38
--- /dev/null
+++ b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex
@@ -0,0 +1,6 @@
+Hello, world!
+Syscall mix tool results:
+  syscall count : syscall_num
+.*
+ syscall trace count : syscall_num
+.*
diff --git a/clients/drcachesim/tests/offline-syscall-mix.templatex b/clients/drcachesim/tests/offline-syscall-mix.templatex
index 231b6bb93d2..a02a9ca3e1e 100644
--- a/clients/drcachesim/tests/offline-syscall-mix.templatex
+++ b/clients/drcachesim/tests/offline-syscall-mix.templatex
@@ -1,4 +1,4 @@
 Hello, world!
 Syscall mix tool results:
-          count : syscall_num
+  syscall count : syscall_num
 (  *[1-9][0-9]* : *[0-9]*.*)+
diff --git a/clients/drcachesim/tests/syscall-mix.templatex b/clients/drcachesim/tests/syscall-mix.templatex
index 59ae08aad9e..e58b2ec7da2 100644
--- a/clients/drcachesim/tests/syscall-mix.templatex
+++ b/clients/drcachesim/tests/syscall-mix.templatex
@@ -1,5 +1,5 @@
 Hello, world!
 ----  ----
 Syscall mix tool results:
-          count : syscall_num
+  syscall count : syscall_num
 (  *[1-9][0-9]* : *[0-9]*.*)+
diff --git a/clients/drcachesim/tools/syscall_mix.cpp b/clients/drcachesim/tools/syscall_mix.cpp
index 2885e37e364..a18c6f1e5ed 100644
--- a/clients/drcachesim/tools/syscall_mix.cpp
+++ b/clients/drcachesim/tools/syscall_mix.cpp
@@ -111,14 +111,21 @@ bool
 syscall_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
 {
     shard_data_t *shard = reinterpret_cast(shard_data);
-    if (memref.marker.type != TRACE_TYPE_MARKER ||
-        memref.marker.marker_type != TRACE_MARKER_TYPE_SYSCALL)
-        return true;
-    int syscall_num = static_cast(memref.marker.marker_value);
+    if (memref.marker.type == TRACE_TYPE_MARKER &&
+        memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) {
+        int syscall_num = static_cast(memref.marker.marker_value);
 #ifdef X64
-    assert(static_cast(syscall_num) == memref.marker.marker_value);
+        assert(static_cast(syscall_num) == memref.marker.marker_value);
 #endif
-    ++shard->syscall_counts[syscall_num];
+        ++shard->syscall_counts[syscall_num];
+    } else if (memref.marker.type == TRACE_TYPE_MARKER &&
+               memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START) {
+        int syscall_num = static_cast(memref.marker.marker_value);
+#ifdef X64
+        assert(static_cast(syscall_num) == memref.marker.marker_value);
+#endif
+        ++shard->syscall_trace_counts[syscall_num];
+    }
     return true;
 }
 
@@ -142,6 +149,8 @@ syscall_mix_t::process_memref(const memref_t &memref)
 static bool
 cmp_second_val(const std::pair &l, const std::pair &r)
 {
+    if (l.second == r.second)
+        return l.first > r.first;
     return l.second > r.second;
 }
 
@@ -156,10 +165,13 @@ syscall_mix_t::print_results()
             for (const auto &keyvals : shard.second->syscall_counts) {
                 total.syscall_counts[keyvals.first] += keyvals.second;
             }
+            for (const auto &keyvals : shard.second->syscall_trace_counts) {
+                total.syscall_trace_counts[keyvals.first] += keyvals.second;
+            }
         }
     }
     std::cerr << TOOL_NAME << " results:\n";
-    std::cerr << std::setw(15) << "count"
+    std::cerr << std::setw(15) << "syscall count"
               << " : " << std::setw(9) << "syscall_num\n";
     std::vector> sorted(total.syscall_counts.begin(),
                                                 total.syscall_counts.end());
@@ -170,6 +182,19 @@ syscall_mix_t::print_results()
         std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9)
                   << keyvals.first << "\n";
     }
+    if (!total.syscall_trace_counts.empty()) {
+        std::cerr << std::setw(20) << "syscall trace count"
+                  << " : " << std::setw(9) << "syscall_num\n";
+        std::vector> sorted_trace(
+            total.syscall_trace_counts.begin(), total.syscall_trace_counts.end());
+        std::sort(sorted_trace.begin(), sorted_trace.end(), cmp_second_val);
+        for (const auto &keyvals : sorted_trace) {
+            // XXX: It would be nicer to print the system call name string instead
+            // of its number.
+            std::cerr << std::setw(20) << keyvals.second << " : " << std::setw(9)
+                      << keyvals.first << "\n";
+        }
+    }
     return true;
 }
 
diff --git a/clients/drcachesim/tools/syscall_mix.h b/clients/drcachesim/tools/syscall_mix.h
index 7dc42a1a3c4..04cfb449f81 100644
--- a/clients/drcachesim/tools/syscall_mix.h
+++ b/clients/drcachesim/tools/syscall_mix.h
@@ -71,6 +71,7 @@ class syscall_mix_t : public analysis_tool_t {
 protected:
     struct shard_data_t {
         std::unordered_map syscall_counts;
+        std::unordered_map syscall_trace_counts;
         std::string error;
     };
 
diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp
index 2c77e04d682..3a07e944671 100644
--- a/clients/drcachesim/tools/view.cpp
+++ b/clients/drcachesim/tools/view.cpp
@@ -418,10 +418,12 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
             // Handled above.
             break;
         case TRACE_MARKER_TYPE_SYSCALL_TRACE_START:
-            std::cerr << "\n";
+            std::cerr << "\n";
             break;
         case TRACE_MARKER_TYPE_SYSCALL_TRACE_END:
-            std::cerr << "\n";
+            std::cerr << "\n";
             break;
         case TRACE_MARKER_TYPE_BRANCH_TARGET:
             // These are not expected to be visible (since the reader adds them
diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp
index c6dd6439d93..f38c9e079b2 100644
--- a/clients/drcachesim/tracer/raw2trace.cpp
+++ b/clients/drcachesim/tracer/raw2trace.cpp
@@ -1008,9 +1008,12 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall
     }
 
     /* Convert the PT Data to DR IR. */
-    drir_t drir(GLOBAL_DCONTEXT);
-    pt2ir_convert_status_t pt2ir_convert_status =
-        tdata->pt2ir.convert(pt_data->data.get(), pt_data_size, drir);
+    if (tdata->pt_decode_state_ == nullptr) {
+        tdata->pt_decode_state_ = std::unique_ptr(new drir_t(GLOBAL_DCONTEXT));
+    }
+    tdata->pt_decode_state_->clear_ilist();
+    pt2ir_convert_status_t pt2ir_convert_status = tdata->pt2ir.convert(
+        pt_data->data.get(), pt_data_size, tdata->pt_decode_state_.get());
     if (pt2ir_convert_status != PT2IR_CONV_SUCCESS) {
         tdata->error = "Failed to convert PT raw trace to DR IR [error status: " +
             std::to_string(pt2ir_convert_status) + "]";
@@ -1018,13 +1021,15 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall
     }
 
     /* Convert the DR IR to trace entries. */
+    addr_t sysnum =
+        pt_data->header[dynamorio::drmemtrace::PDB_HEADER_SYSNUM_IDX].sysnum.sysnum;
     std::vector entries;
     trace_entry_t start_entry = { .type = TRACE_TYPE_MARKER,
                                   .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_START,
-                                  .addr = 0 };
+                                  .addr = sysnum };
     entries.push_back(start_entry);
     ir2trace_convert_status_t ir2trace_convert_status =
-        ir2trace_t::convert(drir, entries);
+        ir2trace_t::convert(tdata->pt_decode_state_.get(), entries);
     if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) {
         tdata->error = "Failed to convert DR IR to trace entries [error status: " +
             std::to_string(ir2trace_convert_status) + "]";
@@ -1032,7 +1037,7 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall
     }
     trace_entry_t end_entry = { .type = TRACE_TYPE_MARKER,
                                 .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_END,
-                                .addr = 0 };
+                                .addr = sysnum };
     entries.push_back(end_entry);
     if (entries.size() == 2) {
         tdata->error = "No trace entries generated from PT data";
@@ -1040,17 +1045,40 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall
     }
 
     accumulate_to_statistic(tdata, RAW2TRACE_STAT_SYSCALL_TRACES_DECODED, 1);
+    app_pc saved_decode_pc;
+    trace_entry_t entries_with_encodings[WRITE_BUFFER_SIZE];
+    trace_entry_t *buf = entries_with_encodings;
     for (const auto &entry : entries) {
-        if (type_is_instr(static_cast(entry.type)))
+        if (type_is_instr(static_cast(entry.type))) {
+            if (buf != entries_with_encodings) {
+                if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) {
+                    return false;
+                }
+                buf = entries_with_encodings;
+            }
             accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1);
+            // The per-thread drir_t object (pt_decode_state_) keeps instr encoding
+            // state across system calls. So different dynamic instances of the same
+            // instruction in system calls will have the same decode_pc.
+            saved_decode_pc = tdata->pt_decode_state_->get_decode_pc(
+                reinterpret_cast(entry.addr));
+            if (saved_decode_pc == nullptr) {
+                tdata->error =
+                    "Unknown pc after ir2trace: did ir2trace insert new instr?";
+                return false;
+            }
+            if (!append_encoding(tdata, saved_decode_pc, entry.size, buf,
+                                 entries_with_encodings))
+                return false;
+        }
+        *buf = entry;
+        ++buf;
     }
-
-    if (!tdata->out_file->write(reinterpret_cast(entries.data()),
-                                sizeof(trace_entry_t) * entries.size())) {
-        tdata->error = "Failed to write to output file";
-        return false;
+    if (buf != entries_with_encodings) {
+        if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) {
+            return false;
+        }
     }
-
     return true;
 }
 #endif
diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h
index dfdbd333cad..6c451242284 100644
--- a/clients/drcachesim/tracer/raw2trace.h
+++ b/clients/drcachesim/tracer/raw2trace.h
@@ -1080,6 +1080,7 @@ class raw2trace_t {
         std::vector rseq_decode_pcs_;
 
 #ifdef BUILD_PT_POST_PROCESSOR
+        std::unique_ptr pt_decode_state_ = nullptr;
         std::istream *kthread_file;
         bool pt_metadata_processed = false;
         pt2ir_t pt2ir;
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index 1d60cb6d480..1ecdc74f66d 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -4554,7 +4554,7 @@ if (BUILD_CLIENTS)
   if (proc_supports_pt)
     if (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR)
       get_target_path_for_execution(drpt2trace_path drpt2trace "${location_suffix}")
-      macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args)
+      macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args sim_atops)
         set(testname_full "tool.drcacheoff.kernel.${testname}_SUDO")
         torunonly_ci(${testname_full} ${exetgt} drcachesim
           "offline-kernel-${testname}.c" # for templatex basename
@@ -4571,13 +4571,18 @@ if (BUILD_CLIENTS)
         set(${testname_full}_precmd
           "foreach@${cmd_pfx}${CMAKE_COMMAND}@-E@remove_directory@${testname_full}.*.dir")
         set(${testname_full}_postcmd
-          "firstglob@${cmd_pfx}${drcachesim_path}@-simulator_type@basic_counts@-indir@${testname_full}.*.dir${sim_atops}")
+          "firstglob@${cmd_pfx}${drcachesim_path}@-indir@${testname_full}.*.dir${sim_atops}")
       endmacro ()
       # We use '-raw_compress none' because when snappy or lz4 is used for raw traces,
       # the check that complains about malloc use in the client is disabled by invoking
       # dr_allow_unsafe_static_behavior. We want to perform this check on the kernel
       # tracing flow.
-      torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "")
+      torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" ""
+                                  "@-simulator_type@basic_counts")
+      torunonly_drcacheoff_kernel(opcode-mix ${ci_shared_app} "-raw_compress none" ""
+                                  "@-simulator_type@opcode_mix")
+      torunonly_drcacheoff_kernel(syscall-mix ${ci_shared_app} "-raw_compress none" ""
+                                  "@-simulator_type@syscall_mix")
     endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR)
   endif (proc_supports_pt)
 
@@ -4704,7 +4709,7 @@ if (BUILD_CLIENTS)
       "-sb_sysroot"
       "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw")
     torunonly_api(tool.drpt2trace.sideband drpt2trace
-      "../../clients/drcachesim/drpt2trace/test_simple.expect"
+      "../../clients/drcachesim/drpt2trace/test_simple.templatex"
       "" "${drpt2trace_sideband_args}" ON OFF)
     set(drpt2trace_elf_args ${drpt2trace_commong}
       "-mode" "ELF"
@@ -4712,7 +4717,7 @@ if (BUILD_CLIENTS)
       "-elf"
       "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw/hello")
     torunonly_api(tool.drpt2trace.elf drpt2trace
-      "../../clients/drcachesim/drpt2trace/test_simple.expect"
+      "../../clients/drcachesim/drpt2trace/test_simple.templatex"
       "" "${drpt2trace_elf_args}" ON OFF)
   endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR)
 endif (BUILD_CLIENTS)