Skip to content

Commit

Permalink
i#3044 AArch64 SVE codec: fix scalar+immediate LD/ST offsets (#6390)
Browse files Browse the repository at this point in the history
All SVE scalar+immediate LD[1234]/ST[1234] have a signed 4-bit immediate
value that encodes a vector index offset from the base register. This
value was being used directly in the IR for instructions, however
base+disp memory operands should always use a byte displacement.

This changes the codec to use byte displacements in the IR and updates
the codec unit tests accordingly.

The following instructions are updated:

LD1B    { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1B    { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1B    { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1B    { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1D    { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1H    { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1H    { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1H    { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1SB   { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1SB   { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1SB   { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1SH   { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1SH   { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1SW   { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1W    { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD1W    { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD2B    { <Zt1>.B, <Zt2>.B }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD2D    { <Zt1>.D, <Zt2>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD2H    { <Zt1>.H, <Zt2>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD2W    { <Zt1>.S, <Zt2>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD3B    { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD3D    { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD3H    { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD3W    { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD4B    { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD4D    { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD4H    { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LD4W    { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1B  { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1B  { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1B  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1B  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1D  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1H  { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1H  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1H  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1SB { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1SB { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1SB { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1SH { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1SH { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1SW { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1W  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNF1W  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNT1B  { <Zt>.B }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNT1D  { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNT1H  { <Zt>.H }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
LDNT1W  { <Zt>.S }, <Pg>/Z, [<Xn|SP>{, #<simm>, MUL VL}]
ST1B    { <Zt>.<T> }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST1D    { <Zt>.D }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST1H    { <Zt>.<T> }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST1W    { <Zt>.<T> }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST2B    { <Zt1>.B, <Zt2>.B }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST2D    { <Zt1>.D, <Zt2>.D }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST2H    { <Zt1>.H, <Zt2>.H }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST2W    { <Zt1>.S, <Zt2>.S }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST3B    { <Zt1>.B, <Zt2>.B, <Zt3>.B }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST3D    { <Zt1>.D, <Zt2>.D, <Zt3>.D }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST3H    { <Zt1>.H, <Zt2>.H, <Zt3>.H }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST3W    { <Zt1>.S, <Zt2>.S, <Zt3>.S }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST4B    { <Zt1>.B, <Zt2>.B, <Zt3>.B, <Zt4>.B }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST4D    { <Zt1>.D, <Zt2>.D, <Zt3>.D, <Zt4>.D }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST4H    { <Zt1>.H, <Zt2>.H, <Zt3>.H, <Zt4>.H }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
ST4W    { <Zt1>.S, <Zt2>.S, <Zt3>.S, <Zt4>.S }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
STNT1B  { <Zt>.B }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
STNT1D  { <Zt>.D }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
STNT1H  { <Zt>.H }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]
STNT1W  { <Zt>.S }, <Pg>, [<Xn|SP>{, #<simm>, MUL VL}]

Issue: #3044
  • Loading branch information
jackgallagher-arm authored Oct 25, 2023
1 parent 196979b commit 352e496
Show file tree
Hide file tree
Showing 4 changed files with 1,658 additions and 1,637 deletions.
10 changes: 10 additions & 0 deletions api/docs/release.dox
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@ changes:
was added to indicate failure.

Further non-compatibility-affecting changes include:
- Fixed a bug in the AArch64 codec with the way that SVE scalar+immediate predicated
contiguous load and store instructions represented the immediate offset in the IR.
In 10.0.0 the memory operand in these instruction used the immediate value from the
instruction (which is an index to be scaled by the vector length) as the displacement,
whereas the displacement value in a DynamoRIO memory operand should always be a byte
offset. This has now been corrected.
Traces and other tool results created with DynamoRIO prior to this fix may have
incorrect results if the application contained these instructions.
See <a href="https://github.com/DynamoRIO/dynamorio/pull/6390">PR #6390</a> for the
full list of affected instructions.
- Added core-sharded analysis tool support where traces are sharded by
core instead of by thread, with the thread schedules onto the cores
either following how they were traced or using a dynamic schedule.
Expand Down
29 changes: 20 additions & 9 deletions core/ir/aarch64/codec.c
Original file line number Diff line number Diff line change
Expand Up @@ -5511,21 +5511,29 @@ static inline bool
decode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
const uint register_count = BITS(enc, 22, 21) + 1;
const opnd_size_t transfer_size =
opnd_size_from_bytes((register_count * dr_get_sve_vector_length()) / 8);
const uint transfer_bytes = (register_count * dr_get_sve_vector_length()) / 8;
/* The offset is scaled by the size of the vector in memory.
* This is the same as the transfer size.
*/
const uint scale = transfer_bytes;

return decode_svemem_gpr_simm4(enc, transfer_size, register_count, opnd);
return decode_svemem_gpr_simm4(enc, opnd_size_from_bytes(transfer_bytes), scale,
opnd);
}

static inline bool
encode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
const uint register_count = BITS(enc, 22, 21) + 1;
const opnd_size_t transfer_size =
opnd_size_from_bytes((register_count * dr_get_sve_vector_length()) / 8);
const uint transfer_bytes = (register_count * dr_get_sve_vector_length()) / 8;
/* The offset is scaled by the size of the vector in memory.
* This is the same as the transfer size.
*/
const uint scale = transfer_bytes;

return encode_svemem_gpr_simm4(enc, transfer_size, register_count, opnd, enc_out);
return encode_svemem_gpr_simm4(enc, opnd_size_from_bytes(transfer_bytes), scale, opnd,
enc_out);
}

/* hsd_immh_sz: The element size of a vector mediated by immh with possible values h, s
Expand Down Expand Up @@ -7931,15 +7939,18 @@ memory_transfer_size_from_dtype(uint enc)
static inline bool
decode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
return decode_svemem_gpr_simm4(enc, memory_transfer_size_from_dtype(enc), 1, opnd);
const opnd_size_t transfer_size = memory_transfer_size_from_dtype(enc);
return decode_svemem_gpr_simm4(enc, transfer_size, opnd_size_in_bytes(transfer_size),
opnd);
}

static inline bool
encode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, opnd_t opnd,
OUT uint *enc_out)
{
return encode_svemem_gpr_simm4(enc, memory_transfer_size_from_dtype(enc), 1, opnd,
enc_out);
const opnd_size_t transfer_size = memory_transfer_size_from_dtype(enc);
return encode_svemem_gpr_simm4(enc, transfer_size, opnd_size_in_bytes(transfer_size),
opnd, enc_out);
}

/* SVE memory operand [<Xn|SP>, <Xm> LSL #x], mem transfer size based on ssz */
Expand Down
Loading

0 comments on commit 352e496

Please sign in to comment.