Skip to content

Commit

Permalink
i#6758: Fix AArch64 FP state at fcache events and attach (#6757)
Browse files Browse the repository at this point in the history
Fixes the slot used to save and restore FP regs at fcache enter and
return events. PR #6725 adjusted the slots used during signal handling
in core/unix/signal_linux_aarch64.c but did not adjust the same in
fcache enter/return and attach events. Prior to that PR, the FP regs
were simply stored in a contiguous manner in signal handling code and
fcache enter/return routines (instead of in their respective dr_simd_t
struct member), which was a bit confusing.

The mismatch between slot usage in signal handling and fcache
enter/return code caused failures in the signalNNN1 tests on some
systems. Verified that those tests pass with this fix.

Also fixes the same issue in save_priv_mcontext_helper which is used in
the dr_app_start API. Unit tests for this scenario will be added as part
of #6759.

Issue: #5036, #6755, #5365
Fixes #6758
  • Loading branch information
abhinav92003 authored Apr 5, 2024
1 parent 367b127 commit a0b86cc
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 32 deletions.
48 changes: 37 additions & 11 deletions core/arch/aarch64/aarch64.asm
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2019-2023 Google, Inc. All rights reserved.
* Copyright (c) 2019-2024 Google, Inc. All rights reserved.
* Copyright (c) 2016 ARM Limited. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -226,16 +226,42 @@ save_priv_mcontext_helper:
str w2, [x0, #(16 * ARG_SZ*2 + 12)]
str w3, [x0, #(16 * ARG_SZ*2 + 16)]
add x4, x0, #simd_OFFSET
st1 {v0.2d-v3.2d}, [x4], #64
st1 {v4.2d-v7.2d}, [x4], #64
st1 {v8.2d-v11.2d}, [x4], #64
st1 {v12.2d-v15.2d}, [x4], #64
st1 {v16.2d-v19.2d}, [x4], #64
st1 {v20.2d-v23.2d}, [x4], #64
st1 {v24.2d-v27.2d}, [x4], #64
st1 {v28.2d-v31.2d}, [x4], #64
/* TODO i#5365: Save Z/P regs as well? Will require runtime check of
* ID_AA64PFR0_EL1 for FEAT_SVE.

/* Registers Q0-Q31 map directly to registers V0-V31. */
str q0, [x4], #64
str q1, [x4], #64
str q2, [x4], #64
str q3, [x4], #64
str q4, [x4], #64
str q5, [x4], #64
str q6, [x4], #64
str q7, [x4], #64
str q8, [x4], #64
str q9, [x4], #64
str q10, [x4], #64
str q11, [x4], #64
str q12, [x4], #64
str q13, [x4], #64
str q14, [x4], #64
str q15, [x4], #64
str q16, [x4], #64
str q17, [x4], #64
str q18, [x4], #64
str q19, [x4], #64
str q20, [x4], #64
str q21, [x4], #64
str q22, [x4], #64
str q23, [x4], #64
str q24, [x4], #64
str q25, [x4], #64
str q26, [x4], #64
str q27, [x4], #64
str q28, [x4], #64
str q29, [x4], #64
str q30, [x4], #64
str q31, [x4], #64
/* TODO i#5365, i#5036: Save Z/P regs as well? Will require runtime
* check of ID_AA64PFR0_EL1 for FEAT_SVE.
*/
ret

Expand Down
39 changes: 18 additions & 21 deletions core/arch/aarch64/emit_utils.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2014-2021 Google, Inc. All rights reserved.
* Copyright (c) 2014-2024 Google, Inc. All rights reserved.
* Copyright (c) 2016 ARM Limited. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -567,13 +567,12 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1),
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd))));
for (i = 0; i < 32; i += 2) {
/* ldp q(i), q(i + 1), [x1, #(i * 16)] */
for (i = 0; i < 32; i++) {
/* ldr q(i), [x1, #(i * sizeof(dr_simd_t))] */
APP(ilist,
INSTR_CREATE_ldp(
dcontext, opnd_create_reg(DR_REG_Q0 + i),
opnd_create_reg(DR_REG_Q0 + i + 1),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32)));
INSTR_CREATE_ldr(dcontext, opnd_create_reg(DR_REG_Q0 + i),
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
i * sizeof(dr_simd_t), OPSZ_16)));
}
if (proc_has_feature(FEATURE_SVE)) {
for (i = 0; i < 32; i++) {
Expand All @@ -588,7 +587,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_Z0 + i),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(),
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes()))));
}
/* add x1, x(dcxt), #(offset svep) */
Expand All @@ -605,8 +604,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P0 + i),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0,
i * (proc_get_vector_length_bytes() / 8),
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
}
/* There is no load instruction for the first-fault register (FFR). Use
Expand All @@ -633,7 +631,7 @@ append_restore_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8),
DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
}
}
Expand Down Expand Up @@ -778,18 +776,18 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
XINST_CREATE_add_2src(dcontext, opnd_create_reg(DR_REG_X1),
opnd_create_reg(REG_DCXT),
OPND_CREATE_INTPTR(offsetof(priv_mcontext_t, simd))));
for (i = 0; i < 32; i += 2) {
/* stp q(i), q(i + 1), [x1, #(i * 16)]
for (i = 0; i < 32; i++) {
/* str q(i), [x1, #(i * sizeof(dr_simd_t))]
* From the AArch64 manual:
* "The signed immediate byte offset is a multiple of 16 in the range
* -1024 to 1008, defaulting to 0 and encoded in the imm7 field as
* <imm>/16."
*/
APP(ilist,
INSTR_CREATE_stp(
dcontext,
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0, i * 16, OPSZ_32),
opnd_create_reg(DR_REG_Q0 + i), opnd_create_reg(DR_REG_Q0 + i + 1)));
INSTR_CREATE_str(dcontext,
opnd_create_base_disp(DR_REG_X1, DR_REG_NULL, 0,
i * sizeof(dr_simd_t), OPSZ_16),
opnd_create_reg(DR_REG_Q0 + i)));
}
if (proc_has_feature(FEATURE_SVE)) {
for (i = 0; i < 32; i++) {
Expand All @@ -803,7 +801,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
INSTR_CREATE_str(
dcontext,
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, i * proc_get_vector_length_bytes(),
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes())),
opnd_create_reg(DR_REG_Z0 + i)));
}
Expand All @@ -818,8 +816,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
INSTR_CREATE_str(
dcontext,
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0,
i * (proc_get_vector_length_bytes() / 8),
DR_REG_X1, DR_REG_NULL, 0, i * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8)),
opnd_create_reg(DR_REG_P0 + i)));
}
Expand Down Expand Up @@ -848,7 +845,7 @@ append_save_simd_reg(dcontext_t *dcontext, instrlist_t *ilist, bool absolute)
INSTR_CREATE_ldr(
dcontext, opnd_create_reg(DR_REG_P15),
opnd_create_base_disp(
DR_REG_X1, DR_REG_NULL, 0, 15 * (proc_get_vector_length_bytes() / 8),
DR_REG_X1, DR_REG_NULL, 0, 15 * sizeof(dr_simd_t),
opnd_size_from_bytes(proc_get_vector_length_bytes() / 8))));
}
}
Expand Down

0 comments on commit a0b86cc

Please sign in to comment.