Skip to content

Commit

Permalink
i#3544 RV64: Add preliminary stolen registration support
Browse files Browse the repository at this point in the history
This patch adds preliminary support for stolen register. Similar to AArch64, when inside a basic block, we need to steal a register to hold os_local_state. All instructions in the basic block that use stolen register and/or tp need to be mangled.
  • Loading branch information
ksco committed Sep 22, 2023
1 parent 1006071 commit 3a91038
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 43 deletions.
4 changes: 4 additions & 0 deletions core/arch/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@ mixed_mode_enabled(void)
# define MC_RETVAL_REG r0
# define SS_RETVAL_REG r0
#elif defined(RISCV64)
# define X0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x0)))
# define X1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x1)))
# define F0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, f0)))
# define REG0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, a0)))
# define REG1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, a1)))
# define REG2_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, a2)))
Expand All @@ -177,6 +180,7 @@ mixed_mode_enabled(void)
# define SCRATCH_REG3_OFFS REG3_OFFSET
# define SCRATCH_REG4_OFFS REG4_OFFSET
# define SCRATCH_REG5_OFFS REG5_OFFSET
# define REG_OFFSET(reg) (X0_OFFSET + ((reg)-DR_REG_X0) * sizeof(reg_t))
/* FIXME i#3544: Check is T6 safe to use */
# define CALL_SCRATCH_REG DR_REG_T6
# define MC_IBL_REG a2
Expand Down
7 changes: 4 additions & 3 deletions core/arch/arch_exports.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,8 @@ typedef struct _spill_state_t {
reg_t reg_stolen; /* slot for the stolen register */
#elif defined(RISCV64)
reg_t a0, a1, a2, a3;
/* Slot for the stolen register, which is tp.
* Note that on RISC-V, tp is the thread pointer, and it is also a general-purpose
* register, so we steal tp to store DR's tls.
/* Slots for the stolen register. Similar to AArch64, we use reg_stolen slot to hold
* the guest's stolen reg value.
*/
reg_t reg_stolen;
#endif
Expand Down Expand Up @@ -235,10 +234,12 @@ typedef struct _local_state_extended_t {
# define TLS_REG2_SLOT ((ushort)offsetof(spill_state_t, a2))
# define TLS_REG3_SLOT ((ushort)offsetof(spill_state_t, a3))
# define TLS_REG_STOLEN_SLOT ((ushort)offsetof(spill_state_t, reg_stolen))
# define TLS_REG_TP_SLOT os_get_app_tls_base_offset(TLS_REG_LIB)
# define SCRATCH_REG0 DR_REG_A0
# define SCRATCH_REG1 DR_REG_A1
# define SCRATCH_REG2 DR_REG_A2
# define SCRATCH_REG3 DR_REG_A3
# define SCRATCH_REG_LAST DR_REG_A3
#endif /* X86/ARM */
#define IBL_TARGET_REG SCRATCH_REG2
#define IBL_TARGET_SLOT TLS_REG2_SLOT
Expand Down
83 changes: 66 additions & 17 deletions core/arch/emit_utils_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -1298,9 +1298,9 @@ update_indirect_exit_stub(dcontext_t *dcontext, fragment_t *f, linkstub_t *l)
int
fragment_prefix_size(uint flags)
{
#ifdef AARCH64
/* For AArch64, there is no need to save the flags
* so we always have the same ibt prefix. */
#if defined(AARCH64) || defined(RISCV64)
/* For AArch64/RISC-V, there is no need to save the flags so we always have the same
* ibt prefix. */
return fragment_ibt_prefix_size(flags);
#else
if (use_ibt_prefix(flags)) {
Expand Down Expand Up @@ -2159,10 +2159,10 @@ emit_fcache_enter_common(dcontext_t *dcontext, generated_code_t *code, byte *pc,
SCRATCH_REG0 /*scratch*/, false /*to app*/);
#endif

#ifdef AARCH64
/* Put app's X0, X1 in TLS_REG0_SLOT, TLS_REG1_SLOT; this is required by
/* Put app's X0/A0, X1/A1 in TLS_REG0_SLOT, TLS_REG1_SLOT; this is required by
* the fragment prefix.
*/
#ifdef AARCH64
/* ldp x0, x1, [x5] */
APP(&ilist,
XINST_CREATE_load_pair(
Expand All @@ -2174,6 +2174,27 @@ emit_fcache_enter_common(dcontext_t *dcontext, generated_code_t *code, byte *pc,
XINST_CREATE_store_pair(
dcontext, opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, 0, OPSZ_16),
opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1)));
#elif defined(RISCV64)

APP(&ilist,
INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_A0),
opnd_create_base_disp(REG_DCXT, DR_REG_NULL, 0,
REG_OFFSET(DR_REG_A0), OPSZ_8)));
APP(&ilist,
INSTR_CREATE_ld(dcontext, opnd_create_reg(DR_REG_A1),
opnd_create_base_disp(REG_DCXT, DR_REG_NULL, 0,
REG_OFFSET(DR_REG_A1), OPSZ_8)));

APP(&ilist,
INSTR_CREATE_sd(
dcontext,
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG0_SLOT, OPSZ_8),
opnd_create_reg(DR_REG_A0)));
APP(&ilist,
INSTR_CREATE_sd(
dcontext,
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG1_SLOT, OPSZ_8),
opnd_create_reg(DR_REG_A1)));
#endif

/* restore the original register state */
Expand Down Expand Up @@ -4813,14 +4834,24 @@ emit_do_syscall_common(dcontext_t *dcontext, generated_code_t *code, byte *pc,
/* initialize the ilist */
instrlist_init(&ilist);

/* We will call this from handle_system_call, so need prefix on AArch64/RISCV64. */
#ifdef AARCH64
/* We will call this from handle_system_call, so need prefix on AArch64. */
APP(&ilist,
XINST_CREATE_load_pair(
dcontext, opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1),
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, 0, OPSZ_16)));
/* XXX: should have a proper patch list entry */
*syscall_offs += AARCH64_INSTR_SIZE;
#elif defined(RISCV64)
APP(&ilist,
INSTR_CREATE_ld(
dcontext, opnd_create_reg(DR_REG_A0),
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG0_SLOT, OPSZ_8)));
APP(&ilist,
INSTR_CREATE_ld(
dcontext, opnd_create_reg(DR_REG_A1),
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG1_SLOT, OPSZ_8)));
*syscall_offs += RISCV64_INSTR_SIZE * 2;
#endif

#if defined(ARM)
Expand All @@ -4830,16 +4861,28 @@ emit_do_syscall_common(dcontext_t *dcontext, generated_code_t *code, byte *pc,
APP(&ilist, instr_create_save_to_tls(dcontext, DR_REG_R0, TLS_REG0_SLOT));
/* XXX: should have a proper patch list entry */
*syscall_offs += THUMB_LONG_INSTR_SIZE;
#elif defined(AARCH64)
/* For AArch64, we need to save both x0 and x1 into SLOT 0 and SLOT 1

/* For AArch64/RISCV64, we need to save both x0/a0 and x1/a1 into SLOT 0 and SLOT 1
* in case the syscall is interrupted. See append_save_gpr.
* stp x0, x1, [x28]
*/
#elif defined(AARCH64)
APP(&ilist,
INSTR_CREATE_stp(dcontext,
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, 0, OPSZ_16),
opnd_create_reg(DR_REG_X0), opnd_create_reg(DR_REG_X1)));
*syscall_offs += AARCH64_INSTR_SIZE;
#elif defined(RISCV64)
APP(&ilist,
INSTR_CREATE_sd(
dcontext,
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG0_SLOT, OPSZ_8),
opnd_create_reg(DR_REG_A0)));
APP(&ilist,
INSTR_CREATE_sd(
dcontext,
opnd_create_base_disp(dr_reg_stolen, DR_REG_NULL, 0, TLS_REG1_SLOT, OPSZ_8),
opnd_create_reg(DR_REG_A1)));
*syscall_offs += RISCV64_INSTR_SIZE * 2;
#endif

/* system call itself -- using same method we've observed OS using */
Expand Down Expand Up @@ -4876,8 +4919,10 @@ emit_do_syscall_common(dcontext_t *dcontext, generated_code_t *code, byte *pc,
instr_create_save_to_dcontext(dcontext, SCRATCH_REG0, SCRATCH_REG0_OFFS));
}

/* Save SCRATCH_REG1 as this is used for the indirect branch in the exit stub. */
#ifdef AARCH64
/* Save X1 as this is used for the indirect branch in the exit stub. */
APP(&ilist, instr_create_save_to_tls(dcontext, SCRATCH_REG1, TLS_REG1_SLOT));
#elif defined(RISCV64)
APP(&ilist, instr_create_save_to_tls(dcontext, SCRATCH_REG1, TLS_REG1_SLOT));
#endif

Expand Down Expand Up @@ -5226,7 +5271,7 @@ byte *
emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc)
{
instrlist_t ilist;
IF_NOT_AARCH64(uint offset;)
IF_NOT_AARCH64(IF_NOT_RISCV64(uint offset));

/* initialize the ilist */
instrlist_init(&ilist);
Expand All @@ -5241,15 +5286,15 @@ emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc)
* new_thread_setup() will restore real app xsp.
* We emulate x86.asm's PUSH_DR_MCONTEXT(SCRATCH_REG0) (for priv_mcontext_t.pc).
*/
IF_NOT_AARCH64(offset =)
IF_NOT_AARCH64(IF_NOT_RISCV64(offset =))
insert_push_all_registers(dcontext, NULL, &ilist, NULL, IF_X64_ELSE(16, 4),
opnd_create_reg(SCRATCH_REG0),
/* we have to pass in scratch to prevent
* use of the stolen reg, which would be
* a race w/ the parent's use of it!
*/
SCRATCH_REG0 _IF_AARCH64(false));
# ifndef AARCH64
# if !defined AARCH64 && !defined(RISCV64)
/* put pre-push xsp into priv_mcontext_t.xsp slot */
ASSERT(offset == get_clean_call_switch_stack_size());
APP(&ilist,
Expand Down Expand Up @@ -5281,14 +5326,18 @@ emit_new_thread_dynamo_start(dcontext_t *dcontext, byte *pc)
APP(&ilist,
XINST_CREATE_move(dcontext, opnd_create_reg(SCRATCH_REG0),
opnd_create_reg(REG_XSP)));
# else
/* For AArch64, SP was already saved by insert_push_all_registers and
* pointing to priv_mcontext_t. Move sp to the first argument:
* mov x0, sp

/* For AArch64/RISCV64, SP was already saved by insert_push_all_registers and
* pointing to priv_mcontext_t. Move sp to the first argument.
*/
# elif defined(AARCH64)
APP(&ilist,
XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_X0),
opnd_create_reg(DR_REG_XSP)));
# elif defined(RISCV64)
APP(&ilist,
XINST_CREATE_move(dcontext, opnd_create_reg(DR_REG_A0),
opnd_create_reg(DR_REG_XSP)));
# endif
dr_insert_call_noreturn(dcontext, &ilist, NULL, (void *)new_thread_setup, 1,
opnd_create_reg(SCRATCH_REG0));
Expand Down
2 changes: 2 additions & 0 deletions core/arch/interp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6439,6 +6439,8 @@ fixup_last_cti(dcontext_t *dcontext, instrlist_t *trace, app_pc next_tag, uint n
/* remove unnecessary ubr at end of block */
#ifdef AARCH64
delete_after = fixup_cbr_on_stolen_reg(dcontext, trace, targeter);
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
#else
delete_after = instr_get_prev(targeter);
#endif
Expand Down
2 changes: 2 additions & 0 deletions core/arch/x86_code.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ new_thread_setup(priv_mcontext_t *mc)
set_stolen_reg_val(mc, get_clone_record_stolen_value(crec));
/* set the thread register if necessary */
set_thread_register_from_clone_record(crec);
# elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
# endif

DEBUG_DECLARE(int rc =)
Expand Down
10 changes: 6 additions & 4 deletions core/fcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -4244,10 +4244,12 @@ fcache_reset_all_caches_proactively(uint target)
/* no lock needed */
dynamo_resetting = true;

IF_AARCHXX({
if (INTERNAL_OPTION(steal_reg_at_reset) != 0)
arch_reset_stolen_reg();
});
#ifdef AARCHXX
if (INTERNAL_OPTION(steal_reg_at_reset) != 0)
arch_reset_stolen_reg();
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
#endif

/* We free everything before re-init so we can free all heap units.
* For everything to be freed, it must either be individually freed,
Expand Down
6 changes: 1 addition & 5 deletions core/ir/instr_shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -3794,13 +3794,9 @@ instr_check_tls_spill_restore(instr_t *instr, bool *spill, reg_id_t *reg, int *o
# ifdef X86
opnd_is_far_base_disp(memop) && opnd_get_segment(memop) == SEG_TLS &&
opnd_is_abs_base_disp(memop)
# elif defined(AARCHXX)
# elif defined(AARCHXX) || defined(RISCV64)
opnd_is_base_disp(memop) && opnd_get_base(memop) == dr_reg_stolen &&
opnd_get_index(memop) == DR_REG_NULL
# elif defined(RISCV64)
/* FIXME i#3544: Check if valid. */
opnd_is_base_disp(memop) && opnd_get_base(memop) == DR_REG_TP &&
opnd_get_index(memop) == DR_REG_NULL
# endif
) {
if (reg != NULL)
Expand Down
7 changes: 6 additions & 1 deletion core/ir/opnd.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,12 @@ opnd_immed_float_arch(uint opcode);
#ifdef AARCHXX
# define DR_REG_STOLEN_MIN IF_X64_ELSE(DR_REG_X9, DR_REG_R8) /* DR_REG_SYSNUM + 1 */
# define DR_REG_STOLEN_MAX IF_X64_ELSE(DR_REG_X29, DR_REG_R12)
/* DR's stolen register for TLS access */
/* DR's stolen register for TLS access. */
extern reg_id_t dr_reg_stolen;
#elif defined(RISCV64)
# define DR_REG_STOLEN_MIN DR_REG_X18 /* DR_REG_SYSNUM + 1 */
# define DR_REG_STOLEN_MAX DR_REG_X31
/* DR's stolen register for TLS access. */
extern reg_id_t dr_reg_stolen;
#endif

Expand Down
11 changes: 5 additions & 6 deletions core/ir/riscv64/opnd.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "instr.h"
#include "arch.h"

reg_id_t dr_reg_stolen = DR_REG_NULL;

uint
opnd_immed_float_arch(uint opcode)
{
Expand All @@ -46,13 +48,11 @@ DR_API
bool
reg_is_stolen(reg_id_t reg)
{
if (dr_reg_fixer[reg] == dr_reg_stolen && dr_reg_fixer[reg] != DR_REG_NULL)
return true;
return false;
}

#define X0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x0)))
#define X1_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, x1)))
#define F0_OFFSET ((MC_OFFS) + (offsetof(priv_mcontext_t, f0)))

int
opnd_get_reg_dcontext_offs(reg_id_t reg)
{
Expand All @@ -69,8 +69,7 @@ opnd_get_reg_dcontext_offs(reg_id_t reg)
opnd_t
opnd_create_sized_tls_slot(int offs, opnd_size_t size)
{
/* FIXME i#3544: Check if this is actual TP or one stolen by DynamoRIO? */
return opnd_create_base_disp(DR_REG_TP, REG_NULL, 0, offs, size);
return opnd_create_base_disp(dr_reg_stolen, REG_NULL, 0, offs, size);
}

#endif /* !STANDALONE_DECODER */
14 changes: 11 additions & 3 deletions core/lib/instrument.c
Original file line number Diff line number Diff line change
Expand Up @@ -4633,7 +4633,7 @@ dr_set_tls_field(void *drcontext, void *value)
DR_API void *
dr_get_dr_segment_base(IN reg_id_t seg)
{
#ifdef AARCHXX
#if defined(AARCHXX) || defined(RISCV64)
if (seg == dr_reg_stolen)
return os_get_dr_tls_base(get_thread_private_dcontext());
else
Expand All @@ -4650,7 +4650,7 @@ dr_raw_tls_calloc(OUT reg_id_t *tls_register, OUT uint *offset, IN uint num_slot
{
CLIENT_ASSERT(tls_register != NULL, "dr_raw_tls_calloc: tls_register cannot be NULL");
CLIENT_ASSERT(offset != NULL, "dr_raw_tls_calloc: offset cannot be NULL");
*tls_register = IF_X86_ELSE(SEG_TLS, IF_RISCV64_ELSE(DR_REG_TP, dr_reg_stolen));
*tls_register = IF_X86_ELSE(SEG_TLS, dr_reg_stolen);
if (num_slots == 0)
return true;
return os_tls_calloc(offset, num_slots, alignment);
Expand Down Expand Up @@ -6503,6 +6503,8 @@ dr_get_mcontext_priv(dcontext_t *dcontext, dr_mcontext_t *dmc, priv_mcontext_t *
(reg_t)d_r_get_tls(os_tls_offset(TLS_REG_STOLEN_SLOT)));
}
}
#elif defined(RISCV64)
ASSERT_NOT_IMPLEMENTED(false);
#endif

/* XXX: should we set the pc field?
Expand Down Expand Up @@ -6566,6 +6568,8 @@ dr_set_mcontext(void *drcontext, dr_mcontext_t *context)
/* save the reg val on the stack to be clobbered by the the copy below */
reg_val = get_stolen_reg_val(state);
}
#elif defined(RISCV64)
CLIENT_ASSERT(false, "NYI on RISCV64");
#endif
if (!dr_mcontext_to_priv_mcontext(state, context))
return false;
Expand All @@ -6574,6 +6578,8 @@ dr_set_mcontext(void *drcontext, dr_mcontext_t *context)
/* restore the reg val on the stack clobbered by the copy above */
set_stolen_reg_val(state, reg_val);
}
#elif defined(RISCV64)
CLIENT_ASSERT(false, "NYI on RISCV64");
#endif

if (TEST(DR_MC_CONTROL, context->flags)) {
Expand Down Expand Up @@ -7352,7 +7358,7 @@ DR_API
reg_id_t
dr_get_stolen_reg()
{
return IF_AARCHXX_ELSE(dr_reg_stolen, REG_NULL);
return IF_X86_ELSE(DR_REG_NULL, dr_reg_stolen);
}

DR_API
Expand All @@ -7368,6 +7374,8 @@ dr_insert_get_stolen_reg_value(void *drcontext, instrlist_t *ilist, instr_t *ins
#ifdef AARCHXX
instrlist_meta_preinsert(
ilist, instr, instr_create_restore_from_tls(drcontext, reg, TLS_REG_STOLEN_SLOT));
#elif defined(RISCV64)
CLIENT_ASSERT(false, "NYI on RISCV64");
#endif
return true;
}
Expand Down
4 changes: 2 additions & 2 deletions core/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -1233,9 +1233,9 @@ static bool
check_option_compatibility_helper(int recurse_count)
{
bool changed_options = false;
# ifdef AARCH64
# if defined(AARCH64) || defined(RISCV64)
if (!DYNAMO_OPTION(bb_prefixes)) {
USAGE_ERROR("bb_prefixes must be true on AArch64");
USAGE_ERROR("bb_prefixes must be true on AArch64/RISCV64");
dynamo_options.bb_prefixes = true;
changed_options = true;
}
Expand Down
Loading

0 comments on commit 3a91038

Please sign in to comment.