From 3a91038877504a08c7e805a69522a25293709f10 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Fri, 22 Sep 2023 10:23:07 +0000 Subject: [PATCH] i#3544 RV64: Add preliminary stolen registration support This patch adds preliminary support for stolen register. Similar to AArch64, when inside a basic block, we need to steal a register to hold os_local_state. All instructions in the basic block that use stolen register and/or tp need to be mangled. --- core/arch/arch.h | 4 ++ core/arch/arch_exports.h | 7 +-- core/arch/emit_utils_shared.c | 83 ++++++++++++++++++++++++++++------- core/arch/interp.c | 2 + core/arch/x86_code.c | 2 + core/fcache.c | 10 +++-- core/ir/instr_shared.c | 6 +-- core/ir/opnd.h | 7 ++- core/ir/riscv64/opnd.c | 11 +++-- core/lib/instrument.c | 14 ++++-- core/options.c | 4 +- core/optionsx.h | 8 +++- 12 files changed, 115 insertions(+), 43 deletions(-) diff --git a/core/arch/arch.h b/core/arch/arch.h index 6f24bb53cd8..b8f8b9b563b 100644 --- a/core/arch/arch.h +++ b/core/arch/arch.h @@ -159,6 +159,9 @@ mixed_mode_enabled(void) # define MC_RETVAL_REG r0 # define SS_RETVAL_REG r0 #elif defined(RISCV64) +# define X0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x0))) +# define X1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x1))) +# define F0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, f0))) # define REG0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, a0))) # define REG1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, a1))) # define REG2_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, a2))) @@ -177,6 +180,7 @@ mixed_mode_enabled(void) # define SCRATCH_REG3_OFFS REG3_OFFSET # define SCRATCH_REG4_OFFS REG4_OFFSET # define SCRATCH_REG5_OFFS REG5_OFFSET +# define REG_OFFSET(reg) (X0_OFFSET + ((reg)-DR_REG_X0) * sizeof(reg_t)) /* FIXME i#3544: Check is T6 safe to use */ # define CALL_SCRATCH_REG DR_REG_T6 # define MC_IBL_REG a2 diff --git a/core/arch/arch_exports.h b/core/arch/arch_exports.h index 77b9ce203a3..8f1ffb65801 100644 --- a/core/arch/arch_exports.h +++ b/core/arch/arch_exports.h @@ -157,9 +157,8 @@ typedef struct _spill_state_t { reg_t reg_stolen; /* slot for the stolen register */ #elif defined(RISCV64) reg_t a0, a1, a2, a3; - /* Slot for the stolen register, which is tp. - * Note that on RISC-V, tp is the thread pointer, and it is also a general-purpose - * register, so we steal tp to store DR's tls. + /* Slots for the stolen register. Similar to AArch64, we use reg_stolen slot to hold + * the guest's stolen reg value. */ reg_t reg_stolen; #endif @@ -235,10 +234,12 @@ typedef struct _local_state_extended_t { # define TLS_REG2_SLOT ((ushort)offsetof(spill_state_t, a2)) # define TLS_REG3_SLOT ((ushort)offsetof(spill_state_t, a3)) # define TLS_REG_STOLEN_SLOT ((ushort)offsetof(spill_state_t, reg_stolen)) +# define TLS_REG_TP_SLOT os_get_app_tls_base_offset(TLS_REG_LIB) # define SCRATCH_REG0 DR_REG_A0 # define SCRATCH_REG1 DR_REG_A1 # define SCRATCH_REG2 DR_REG_A2 # define SCRATCH_REG3 DR_REG_A3 +# define SCRATCH_REG_LAST DR_REG_A3 #endif /* X86/ARM */ #define IBL_TARGET_REG SCRATCH_REG2 #define IBL_TARGET_SLOT TLS_REG2_SLOT diff --git a/core/arch/emit_utils_shared.c b/core/arch/emit_utils_shared.c index 045c3ff9ae1..98ec8f7e637 100644 --- a/core/arch/emit_utils_shared.c +++ b/core/arch/emit_utils_shared.c @@ -1298,9 +1298,9 @@ update_indirect_exit_stub(dcontext_t *dcontext, fragment_t *f, linkstub_t *l) int fragment_prefix_size(uint flags) { -#ifdef AARCH64 - /* For AArch64, there is no need to save the flags - * so we always have the same ibt prefix. */ +#if defined(AARCH64) || defined(RISCV64) + /* For AArch64/RISC-V, there is no need to save the flags so we always have the same + * ibt prefix. */ return fragment_ibt_prefix_size(flags); #else if (use_ibt_prefix(flags)) { @@ -2159,10 +2159,10 @@ emit_fcache_enter_common(dcontext_t *dcontext, generated_code_t *code, byte *pc, SCRATCH_REG0 /*scratch*/, false /*to app*/); #endif -#ifdef AARCH64 - /* Put app's X0, X1 in TLS_REG0_SLOT, TLS_REG1_SLOT; this is required by + /* Put app's X0/A0, X1/A1 in TLS_REG0_SLOT, TLS_REG1_SLOT; this is required by * the fragment prefix. */ +#ifdef AARCH64 /* ldp x0, x1, [x5] */ APP(&ilist, XINST_CREATE_load_pair( @@ -2174,6 +2174,27 @@ emit_fcache_enter_common(dcontext_t *dcontext, generated_code_t *code, byte *pc, XINST_CREATE_store_pair( dcontext, opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, 0, OPSZ_16), opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1))); +#elif defined(RISCV64) + + APP(&ilist, + INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(REG_DCXT, DR_REG_NULL, 0, + REG_OFFSET(DR_REG_A0), OPSZ_8))); + APP(&ilist, + INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_A1), + opnd_create_base_disp(REG_DCXT, DR_REG_NULL, 0, + REG_OFFSET(DR_REG_A1), OPSZ_8))); + + APP(&ilist, + INSTR_CREATE_sd( + dcontext, + opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG0_SLOT, OPSZ_8), + opnd_create_reg(DR_REG_A0))); + APP(&ilist, + INSTR_CREATE_sd( + dcontext, + opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG1_SLOT, OPSZ_8), + opnd_create_reg(DR_REG_A1))); #endif /* restore the original register state */ @@ -4813,14 +4834,24 @@ emit_do_syscall_common(dcontext_t *dcontext, generated_code_t *code, byte *pc, /* initialize the ilist */ instrlist_init(&ilist); + /* We will call this from handle_system_call, so need prefix on AArch64/RISCV64. */ #ifdef AARCH64 - /* We will call this from handle_system_call, so need prefix on AArch64. */ APP(&ilist, XINST_CREATE_load_pair( dcontext, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1), opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, 0, OPSZ_16))); /* XXX: should have a proper patch list entry */ *syscall_offs += AARCH64_INSTR_SIZE; +#elif defined(RISCV64) + APP(&ilist, + INSTR_CREATE_ld( + dcontext, opnd_create_reg(DR_REG_A0), + opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG0_SLOT, OPSZ_8))); + APP(&ilist, + INSTR_CREATE_ld( + dcontext, opnd_create_reg(DR_REG_A1), + opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG1_SLOT, OPSZ_8))); + *syscall_offs += RISCV64_INSTR_SIZE * 2; #endif #if defined(ARM) @@ -4830,16 +4861,28 @@ emit_do_syscall_common(dcontext_t *dcontext, generated_code_t *code, byte *pc, APP(&ilist, instr_create_save_to_tls(dcontext, DR_REG_R0, TLS_REG0_SLOT)); /* XXX: should have a proper patch list entry */ *syscall_offs += THUMB_LONG_INSTR_SIZE; -#elif defined(AARCH64) - /* For AArch64, we need to save both x0 and x1 into SLOT 0 and SLOT 1 + + /* For AArch64/RISCV64, we need to save both x0/a0 and x1/a1 into SLOT 0 and SLOT 1 * in case the syscall is interrupted. See append_save_gpr. - * stp x0, x1, [x28] */ +#elif defined(AARCH64) APP(&ilist, INSTR_CREATE_stp(dcontext, opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, 0, OPSZ_16), opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1))); *syscall_offs += AARCH64_INSTR_SIZE; +#elif defined(RISCV64) + APP(&ilist, + INSTR_CREATE_sd( + dcontext, + opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG0_SLOT, OPSZ_8), + opnd_create_reg(DR_REG_A0))); + APP(&ilist, + INSTR_CREATE_sd( + dcontext, + opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG1_SLOT, OPSZ_8), + opnd_create_reg(DR_REG_A1))); + *syscall_offs += RISCV64_INSTR_SIZE * 2; #endif /* system call itself -- using same method we've observed OS using */ @@ -4876,8 +4919,10 @@ emit_do_syscall_common(dcontext_t *dcontext, generated_code_t *code, byte *pc, instr_create_save_to_dcontext(dcontext, SCRATCH_REG0, SCRATCH_REG0_OFFS)); } + /* Save SCRATCH_REG1 as this is used for the indirect branch in the exit stub. */ #ifdef AARCH64 - /* Save X1 as this is used for the indirect branch in the exit stub. */ + APP(&ilist, instr_create_save_to_tls(dcontext, SCRATCH_REG1, TLS_REG1_SLOT)); +#elif defined(RISCV64) APP(&ilist, instr_create_save_to_tls(dcontext, SCRATCH_REG1, TLS_REG1_SLOT)); #endif @@ -5226,7 +5271,7 @@ byte * emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc) { instrlist_t ilist; - IF_NOT_AARCH64(uint offset;) + IF_NOT_AARCH64(IF_NOT_RISCV64(uint offset)); /* initialize the ilist */ instrlist_init(&ilist); @@ -5241,7 +5286,7 @@ emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc) * new_thread_setup() will restore real app xsp. * We emulate x86.asm's PUSH_DR_MCONTEXT(SCRATCH_REG0) (for priv_mcontext_t.pc). */ - IF_NOT_AARCH64(offset =) + IF_NOT_AARCH64(IF_NOT_RISCV64(offset =)) insert_push_all_registers(dcontext, NULL, &ilist, NULL, IF_X64_ELSE(16, 4), opnd_create_reg(SCRATCH_REG0), /* we have to pass in scratch to prevent @@ -5249,7 +5294,7 @@ emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc) * a race w/ the parent's use of it! */ SCRATCH_REG0 _IF_AARCH64(false)); -# ifndef AARCH64 +# if !defined AARCH64 && !defined(RISCV64) /* put pre-push xsp into priv_mcontext_t.xsp slot */ ASSERT(offset == get_clean_call_switch_stack_size()); APP(&ilist, @@ -5281,14 +5326,18 @@ emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc) APP(&ilist, XINST_CREATE_move(dcontext, opnd_create_reg(SCRATCH_REG0), opnd_create_reg(REG_XSP))); -# else - /* For AArch64, SP was already saved by insert_push_all_registers and - * pointing to priv_mcontext_t. Move sp to the first argument: - * mov x0, sp + + /* For AArch64/RISCV64, SP was already saved by insert_push_all_registers and + * pointing to priv_mcontext_t. Move sp to the first argument. */ +# elif defined(AARCH64) APP(&ilist, XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_XSP))); +# elif defined(RISCV64) + APP(&ilist, + XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_A0), + opnd_create_reg(DR_REG_XSP))); # endif dr_insert_call_noreturn(dcontext, &ilist, NULL, (void *)new_thread_setup, 1, opnd_create_reg(SCRATCH_REG0)); diff --git a/core/arch/interp.c b/core/arch/interp.c index bc66991ecd9..825c07dd4e1 100644 --- a/core/arch/interp.c +++ b/core/arch/interp.c @@ -6439,6 +6439,8 @@ fixup_last_cti(dcontext_t *dcontext, instrlist_t *trace, app_pc next_tag, uint n /* remove unnecessary ubr at end of block */ #ifdef AARCH64 delete_after = fixup_cbr_on_stolen_reg(dcontext, trace, targeter); +#elif defined(RISCV64) + ASSERT_NOT_IMPLEMENTED(false); #else delete_after = instr_get_prev(targeter); #endif diff --git a/core/arch/x86_code.c b/core/arch/x86_code.c index e9d978a9cc6..b754693fab6 100644 --- a/core/arch/x86_code.c +++ b/core/arch/x86_code.c @@ -299,6 +299,8 @@ new_thread_setup(priv_mcontext_t *mc) set_stolen_reg_val(mc, get_clone_record_stolen_value(crec)); /* set the thread register if necessary */ set_thread_register_from_clone_record(crec); +# elif defined(RISCV64) + ASSERT_NOT_IMPLEMENTED(false); # endif DEBUG_DECLARE(int rc =) diff --git a/core/fcache.c b/core/fcache.c index c3696b43cc9..d7ff07e563c 100644 --- a/core/fcache.c +++ b/core/fcache.c @@ -4244,10 +4244,12 @@ fcache_reset_all_caches_proactively(uint target) /* no lock needed */ dynamo_resetting = true; - IF_AARCHXX({ - if (INTERNAL_OPTION(steal_reg_at_reset) != 0) - arch_reset_stolen_reg(); - }); +#ifdef AARCHXX + if (INTERNAL_OPTION(steal_reg_at_reset) != 0) + arch_reset_stolen_reg(); +#elif defined(RISCV64) + ASSERT_NOT_IMPLEMENTED(false); +#endif /* We free everything before re-init so we can free all heap units. * For everything to be freed, it must either be individually freed, diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index fcbbf5e6513..c36f369c843 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -3794,13 +3794,9 @@ instr_check_tls_spill_restore(instr_t *instr, bool *spill, reg_id_t *reg, int *o # ifdef X86 opnd_is_far_base_disp(memop) && opnd_get_segment(memop) == SEG_TLS && opnd_is_abs_base_disp(memop) -# elif defined(AARCHXX) +# elif defined(AARCHXX) || defined(RISCV64) opnd_is_base_disp(memop) && opnd_get_base(memop) == dr_reg_stolen && opnd_get_index(memop) == DR_REG_NULL -# elif defined(RISCV64) - /* FIXME i#3544: Check if valid. */ - opnd_is_base_disp(memop) && opnd_get_base(memop) == DR_REG_TP && - opnd_get_index(memop) == DR_REG_NULL # endif ) { if (reg != NULL) diff --git a/core/ir/opnd.h b/core/ir/opnd.h index d1906f49fe7..533f2b2216f 100644 --- a/core/ir/opnd.h +++ b/core/ir/opnd.h @@ -325,7 +325,12 @@ opnd_immed_float_arch(uint opcode); #ifdef AARCHXX # define DR_REG_STOLEN_MIN IF_X64_ELSE(DR_REG_X9, DR_REG_R8) /* DR_REG_SYSNUM + 1 */ # define DR_REG_STOLEN_MAX IF_X64_ELSE(DR_REG_X29, DR_REG_R12) -/* DR's stolen register for TLS access */ +/* DR's stolen register for TLS access. */ +extern reg_id_t dr_reg_stolen; +#elif defined(RISCV64) +# define DR_REG_STOLEN_MIN DR_REG_X18 /* DR_REG_SYSNUM + 1 */ +# define DR_REG_STOLEN_MAX DR_REG_X31 +/* DR's stolen register for TLS access. */ extern reg_id_t dr_reg_stolen; #endif diff --git a/core/ir/riscv64/opnd.c b/core/ir/riscv64/opnd.c index eb15636653d..d8c0cdbec55 100644 --- a/core/ir/riscv64/opnd.c +++ b/core/ir/riscv64/opnd.c @@ -34,6 +34,8 @@ #include "instr.h" #include "arch.h" +reg_id_t dr_reg_stolen = DR_REG_NULL; + uint opnd_immed_float_arch(uint opcode) { @@ -46,13 +48,11 @@ DR_API bool reg_is_stolen(reg_id_t reg) { + if (dr_reg_fixer[reg] == dr_reg_stolen && dr_reg_fixer[reg] != DR_REG_NULL) + return true; return false; } -#define X0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x0))) -#define X1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x1))) -#define F0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, f0))) - int opnd_get_reg_dcontext_offs(reg_id_t reg) { @@ -69,8 +69,7 @@ opnd_get_reg_dcontext_offs(reg_id_t reg) opnd_t opnd_create_sized_tls_slot(int offs, opnd_size_t size) { - /* FIXME i#3544: Check if this is actual TP or one stolen by DynamoRIO? */ - return opnd_create_base_disp(DR_REG_TP, REG_NULL, 0, offs, size); + return opnd_create_base_disp(dr_reg_stolen, REG_NULL, 0, offs, size); } #endif /* !STANDALONE_DECODER */ diff --git a/core/lib/instrument.c b/core/lib/instrument.c index 00686214ed2..1bb9258687d 100644 --- a/core/lib/instrument.c +++ b/core/lib/instrument.c @@ -4633,7 +4633,7 @@ dr_set_tls_field(void *drcontext, void *value) DR_API void * dr_get_dr_segment_base(IN reg_id_t seg) { -#ifdef AARCHXX +#if defined(AARCHXX) || defined(RISCV64) if (seg == dr_reg_stolen) return os_get_dr_tls_base(get_thread_private_dcontext()); else @@ -4650,7 +4650,7 @@ dr_raw_tls_calloc(OUT reg_id_t *tls_register, OUT uint *offset, IN uint num_slot { CLIENT_ASSERT(tls_register != NULL, "dr_raw_tls_calloc: tls_register cannot be NULL"); CLIENT_ASSERT(offset != NULL, "dr_raw_tls_calloc: offset cannot be NULL"); - *tls_register = IF_X86_ELSE(SEG_TLS, IF_RISCV64_ELSE(DR_REG_TP, dr_reg_stolen)); + *tls_register = IF_X86_ELSE(SEG_TLS, dr_reg_stolen); if (num_slots == 0) return true; return os_tls_calloc(offset, num_slots, alignment); @@ -6503,6 +6503,8 @@ dr_get_mcontext_priv(dcontext_t *dcontext, dr_mcontext_t *dmc, priv_mcontext_t * (reg_t)d_r_get_tls(os_tls_offset(TLS_REG_STOLEN_SLOT))); } } +#elif defined(RISCV64) + ASSERT_NOT_IMPLEMENTED(false); #endif /* XXX: should we set the pc field? @@ -6566,6 +6568,8 @@ dr_set_mcontext(void *drcontext, dr_mcontext_t *context) /* save the reg val on the stack to be clobbered by the the copy below */ reg_val = get_stolen_reg_val(state); } +#elif defined(RISCV64) + CLIENT_ASSERT(false, "NYI on RISCV64"); #endif if (!dr_mcontext_to_priv_mcontext(state, context)) return false; @@ -6574,6 +6578,8 @@ dr_set_mcontext(void *drcontext, dr_mcontext_t *context) /* restore the reg val on the stack clobbered by the copy above */ set_stolen_reg_val(state, reg_val); } +#elif defined(RISCV64) + CLIENT_ASSERT(false, "NYI on RISCV64"); #endif if (TEST(DR_MC_CONTROL, context->flags)) { @@ -7352,7 +7358,7 @@ DR_API reg_id_t dr_get_stolen_reg() { - return IF_AARCHXX_ELSE(dr_reg_stolen, REG_NULL); + return IF_X86_ELSE(DR_REG_NULL, dr_reg_stolen); } DR_API @@ -7368,6 +7374,8 @@ dr_insert_get_stolen_reg_value(void *drcontext, instrlist_t *ilist, instr_t *ins #ifdef AARCHXX instrlist_meta_preinsert( ilist, instr, instr_create_restore_from_tls(drcontext, reg, TLS_REG_STOLEN_SLOT)); +#elif defined(RISCV64) + CLIENT_ASSERT(false, "NYI on RISCV64"); #endif return true; } diff --git a/core/options.c b/core/options.c index 0d6f8e35c7e..55d8674933b 100644 --- a/core/options.c +++ b/core/options.c @@ -1233,9 +1233,9 @@ static bool check_option_compatibility_helper(int recurse_count) { bool changed_options = false; -# ifdef AARCH64 +# if defined(AARCH64) || defined(RISCV64) if (!DYNAMO_OPTION(bb_prefixes)) { - USAGE_ERROR("bb_prefixes must be true on AArch64"); + USAGE_ERROR("bb_prefixes must be true on AArch64/RISCV64"); dynamo_options.bb_prefixes = true; changed_options = true; } diff --git a/core/optionsx.h b/core/optionsx.h index 848d31d9adb..5296e8a2f86 100644 --- a/core/optionsx.h +++ b/core/optionsx.h @@ -521,7 +521,8 @@ OPTION_COMMAND( }, "enable memory savings at potential loss in performance", STATIC, OP_PCACHE_NOP) -PC_OPTION_DEFAULT_INTERNAL(bool, bb_prefixes, IF_AARCH64_ELSE(true, false), +PC_OPTION_DEFAULT_INTERNAL(bool, bb_prefixes, + IF_AARCH64_ELSE(true, IF_RISCV64_ELSE(true, false)), "give all bbs a prefix") /* If a client registers a bb hook, we force a full decode. This option * requests a full decode regardless of whether there is a bb hook. @@ -642,7 +643,10 @@ OPTION_DEFAULT_INTERNAL(bool, ldstex2cas, true, "replace exclusive load/store with compare-and-swap to " "allow instrumentation, at the risk of ABA errors") #endif - +#ifdef RISCV64 +/* We only allow register between x18 and x31 to be used. */ +OPTION_DEFAULT_INTERNAL(uint, steal_reg, 28, "The register stolen/used by DynamoRIO") +#endif #ifdef WINDOWS_PC_SAMPLE OPTION_DEFAULT(uint, prof_pcs_DR, 2, "PC profile dynamorio.dll, value is bit shift to use, < 2 or > 32 "