Skip to content

Commit

Permalink
Fix AArch64 fp reg state save/restore
Browse files Browse the repository at this point in the history
Fixes the slot used to save and restore FP regs at fcache enter and
return events. PR #6725 adjusted the slots used during signal handling
in core/unix/signal_linux_aarch64.c but did not adjust the same in
fcache enter/exit and attach events. Prior to that the FP regs were
simply stored in a continuous manner in signal handling code, and
fcache enter/exit routines.

The mismatch between slot usage caused failueres in the signalNNN1
tests on some systems.
  • Loading branch information
abhinav92003 committed Apr 5, 2024
1 parent 2587672 commit 8c44158
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 22 deletions.
48 changes: 37 additions & 11 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2019-2023 Google, Inc. All rights reserved.
* Copyright (c) 2019-2024 Google, Inc. All rights reserved.
* Copyright (c) 2016 ARM Limited. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -226,16 +226,42 @@ save_priv_mcontext_helper:
str w2, [x0, #(16 * ARG_SZ*2 + 12)]
str w3, [x0, #(16 * ARG_SZ*2 + 16)]
add x4, x0, #simd_OFFSET
st1 {v0.2d-v3.2d}, [x4], #64
st1 {v4.2d-v7.2d}, [x4], #64
st1 {v8.2d-v11.2d}, [x4], #64
st1 {v12.2d-v15.2d}, [x4], #64
st1 {v16.2d-v19.2d}, [x4], #64
st1 {v20.2d-v23.2d}, [x4], #64
st1 {v24.2d-v27.2d}, [x4], #64
st1 {v28.2d-v31.2d}, [x4], #64
/* TODO i#5365: Save Z/P regs as well? Will require runtime check of
* ID_AA64PFR0_EL1 for FEAT_SVE.

/* Registers Q0-Q31 map directly to registers V0-V31. */
str q0, [x4], #64
str q1, [x4], #64
str q2, [x4], #64
str q3, [x4], #64
str q4, [x4], #64
str q5, [x4], #64
str q6, [x4], #64
str q7, [x4], #64
str q8, [x4], #64
str q9, [x4], #64
str q10, [x4], #64
str q11, [x4], #64
str q12, [x4], #64
str q13, [x4], #64
str q14, [x4], #64
str q15, [x4], #64
str q16, [x4], #64
str q17, [x4], #64
str q18, [x4], #64
str q19, [x4], #64
str q20, [x4], #64
str q21, [x4], #64
str q22, [x4], #64
str q23, [x4], #64
str q24, [x4], #64
str q25, [x4], #64
str q26, [x4], #64
str q27, [x4], #64
str q28, [x4], #64
str q29, [x4], #64
str q30, [x4], #64
str q31, [x4], #64
/* TODO i#5365, i#5036: Save Z/P regs as well? Will require runtime
* check of ID_AA64PFR0_EL1 for FEAT_SVE.
*/
ret

Expand Down
29 changes: 18 additions & 11 deletions core/arch/aarch64/emit_utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2014-2021 Google, Inc. All rights reserved.
* Copyright (c) 2014-2024 Google, Inc. All rights reserved.
* Copyright (c) 2016 ARM Limited. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -567,15 +567,18 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1),
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd))));
for (i = 0; i < 32; i += 2) {
/* ldp q(i), q(i + 1), [x1, #(i * 16)] */
ASSERT(sizeof(dr_simd_t) == 64);
for (i = 0; i < 32; i ++) {
/* ldr q(i), [x1, #(i * sizeof(dr_simd_t))] */
APP(ilist,
INSTR_CREATE_ldp(
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_Q0 + i),
opnd_create_reg(DR_REG_Q0 + i + 1),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32)));
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 64, OPSZ_16)));
}
if (proc_has_feature(FEATURE_SVE)) {
/* TODO i#5036: Ensure that the following uses the correct offset for the
* dr_simd_t element corresponding to each vector reg.
*/
for (i = 0; i < 32; i++) {
/* ldr z(i), [x1, #(i mul vl)]
* From the SVE manual:
Expand Down Expand Up @@ -778,20 +781,24 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1),
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd))));
for (i = 0; i < 32; i += 2) {
/* stp q(i), q(i + 1), [x1, #(i * 16)]
ASSERT(sizeof(dr_simd_t) == 64);
for (i = 0; i < 32; i++) {
/* str q(i), [x1, #(i * sizeof(dr_simd_t))]
* From the AArch64 manual:
* "The signed immediate byte offset is a multiple of 16 in the range
* -1024 to 1008, defaulting to 0 and encoded in the imm7 field as
* <imm>/16."
*/
APP(ilist,
INSTR_CREATE_stp(
INSTR_CREATE_str(
dcontext,
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32),
opnd_create_reg(DR_REG_Q0 + i), opnd_create_reg(DR_REG_Q0 + i + 1)));
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 64, OPSZ_16),
opnd_create_reg(DR_REG_Q0 + i)));
}
if (proc_has_feature(FEATURE_SVE)) {
/* TODO i#5036: Ensure that the following uses the correct offset for the
* dr_simd_t element corresponding to each vector reg.
*/
for (i = 0; i < 32; i++) {
/* str z(i), [x1, #(i mul vl)]
* "Store a vector register to a memory address generated by a
Expand Down

0 comments on commit 8c44158

Please sign in to comment.