From 3f5b478b9127026d79a2034683f54891500336a8 Mon Sep 17 00:00:00 2001 From: Phil Ramsey Date: Tue, 12 Dec 2023 09:46:38 +0000 Subject: [PATCH] i#3044 AArch64 SVE codec: Add SVE2 MUL variants (#6501) This patch adds the appropriate macros, tests and codec entries to decode and encode the following instructions: MUL ., ., . MUL .D, .D, .D[] MUL .H, .H, .H[] MUL .S, .S, .S[] Issue: #3044 --- api/docs/release.dox | 2 + core/ir/aarch64/codec_sve2.txt | 22 +++--- core/ir/aarch64/instr_create_api.h | 35 ++++++++- suite/tests/api/dis-a64-sve2.txt | 120 +++++++++++++++++++++++++++++ suite/tests/api/ir_aarch64_sve.c | 12 +-- suite/tests/api/ir_aarch64_sve2.c | 98 +++++++++++++++++++++++ 6 files changed, 273 insertions(+), 16 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index 445e4567fba..1fa147bd9c7 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -140,6 +140,8 @@ changes: to include direct switch dependencies. This is not a binary compatibility change as the old value still refers purely to timestamps, but on a recompile it refers to timestamps and direct switches, which is what most users should want. + - Rename the macro INSTR_CREATE_mul_sve to INSTR_CREATE_mul_sve_imm to + differentiate it from the other SVE MUL instructions. Further non-compatibility-affecting changes include: - Added raw2trace support to inject system call kernel trace templates collected from diff --git a/core/ir/aarch64/codec_sve2.txt b/core/ir/aarch64/codec_sve2.txt index 767c4a95275..093e99cf4f0 100644 --- a/core/ir/aarch64/codec_sve2.txt +++ b/core/ir/aarch64/codec_sve2.txt @@ -88,7 +88,7 @@ 01000101001xxxxx101000xxxxxxxxxx n 1071 SVE2 histseg z_b_0 : z_b_5 z_b_16 11000100000xxxxx110xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo 10000100000xxxxx101xxxxxxxxxxxxx n 950 SVE2 ldnt1b z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo -11000101100xxxxx110xxxxxxxxxxxxx n 992 SVE2 ldnt1d z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo +11000101100xxxxx110xxxxxxxxxxxxx n 992 SVE2 ldnt1d z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo 11000100100xxxxx110xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo 10000100100xxxxx101xxxxxxxxxxxxx n 993 SVE2 ldnt1h z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo 11000100000xxxxx100xxxxxxxxxxxxx n 1186 SVE2 ldnt1sb z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo @@ -97,8 +97,12 @@ 10000100100xxxxx100xxxxxxxxxxxxx n 1187 SVE2 ldnt1sh z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo 11000101000xxxxx100xxxxxxxxxxxxx n 1188 SVE2 ldnt1sw z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo 11000101000xxxxx110xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_d_0 : svemem_vec_30sd_gpr16 p10_zer_lo -10000101000xxxxx101xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo +10000101000xxxxx101xxxxxxxxxxxxx n 994 SVE2 ldnt1w z_s_0 : svemem_vec_30sd_gpr16 p10_zer_lo 01000101xx1xxxxx100xxxxxxxx0xxxx w 1189 SVE2 match p_size_bh_0 : p10_zer_lo z_size_bh_5 z_size_bh_16 +00000100xx1xxxxx011000xxxxxxxxxx n 321 SVE2 mul z_size_bhsd_0 : z_size_bhsd_5 z_size_bhsd_16 +01000100111xxxxx111110xxxxxxxxxx n 321 SVE2 mul z_d_0 : z_d_5 z4_d_16 i1_index_20 +010001000x1xxxxx111110xxxxxxxxxx n 321 SVE2 mul z_h_0 : z_h_5 z3_h_16 i3_index_19 +01000100101xxxxx111110xxxxxxxxxx n 321 SVE2 mul z_s_0 : z_s_5 z3_s_16 i2_index_19 00000100111xxxxx001111xxxxxxxxxx n 1072 SVE2 nbsl z_d_0 : z_d_0 z_d_16 z_d_5 01000101xx1xxxxx100xxxxxxxx1xxxx w 1190 SVE2 nmatch p_size_bh_0 : p10_zer_lo z_size_bh_5 z_size_bh_16 00000100001xxxxx011001xxxxxxxxxx n 328 SVE2 pmul z_msz_bhsd_0 : z_msz_bhsd_5 z_msz_bhsd_16 @@ -230,13 +234,13 @@ 01000101xx0xxxxx100011xxxxxxxxxx n 1116 SVE2 ssubltb z_size_hsd_0 : z_sizep1_bhs_5 z_sizep1_bhs_16 01000101xx0xxxxx010100xxxxxxxxxx n 1117 SVE2 ssubwb z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16 01000101xx0xxxxx010101xxxxxxxxxx n 1118 SVE2 ssubwt z_size_hsd_0 : z_size_hsd_5 z_sizep1_bhs_16 -11100100000xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_d_0 p10_lo -11100100010xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_s_0 p10_lo -11100101100xxxxx001xxxxxxxxxxxxx n 1004 SVE2 stnt1d svemem_vec_30sd_gpr16 : z_d_0 p10_lo -11100100100xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_d_0 p10_lo -11100100110xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_s_0 p10_lo -11100101000xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_d_0 p10_lo -11100101010xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_s_0 p10_lo +11100100000xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_d_0 p10_lo +11100100010xxxxx001xxxxxxxxxxxxx n 952 SVE2 stnt1b svemem_vec_22sd_gpr16 : z_s_0 p10_lo +11100101100xxxxx001xxxxxxxxxxxxx n 1004 SVE2 stnt1d svemem_vec_30sd_gpr16 : z_d_0 p10_lo +11100100100xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_d_0 p10_lo +11100100110xxxxx001xxxxxxxxxxxxx n 1005 SVE2 stnt1h svemem_vec_22sd_gpr16 : z_s_0 p10_lo +11100101000xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_d_0 p10_lo +11100101010xxxxx001xxxxxxxxxxxxx n 1006 SVE2 stnt1w svemem_vec_22sd_gpr16 : z_s_0 p10_lo 01000101xx1xxxxx011100xxxxxxxxxx n 1119 SVE2 subhnb z_sizep1_bhs_0 : z_size_hsd_5 z_size_hsd_16 01000101xx1xxxxx011101xxxxxxxxxx n 1120 SVE2 subhnt z_sizep1_bhs_0 : z_sizep1_bhs_0 z_size_hsd_5 z_size_hsd_16 01000100xx011100100xxxxxxxxxxxxx n 474 SVE2 suqadd z_size_bhsd_0 : p10_mrg_lo z_size_bhsd_0 z_size_bhsd_5 diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index 6a4e2e97168..ccee4a382f1 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -5727,7 +5727,7 @@ * \param Zdn The first source and destination vector register, Z (Scalable) * \param simm The signed immediate imm */ -#define INSTR_CREATE_mul_sve(dc, Zdn, simm) \ +#define INSTR_CREATE_mul_sve_imm(dc, Zdn, simm) \ instr_create_1dst_2src(dc, OP_mul, Zdn, Zdn, simm) /** @@ -18150,4 +18150,37 @@ dc, OP_dc_gzva, \ opnd_create_base_disp(opnd_get_reg(Rn), DR_REG_NULL, 0, 0, OPSZ_sys)) +/** + * Creates a MUL instruction. + * + * This macro is used to encode the forms: + \verbatim + MUL ., ., . + \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zd The destination vector register. Can be Z.b, Z.h, Z.s or Z.d. + * \param Zn The first source vector register. Can be Z.b, Z.h, Z.s or Z.d. + * \param Zm The second source vector register. Can be Z.b, Z.h, Z.s or Z.d. + */ +#define INSTR_CREATE_mul_sve_vector(dc, Zd, Zn, Zm) \ + instr_create_1dst_2src(dc, OP_mul, Zd, Zn, Zm) + +/** + * Creates a MUL instruction. + * + * This macro is used to encode the forms: + \verbatim + MUL .D, .D, .D[] + MUL .H, .H, .H[] + MUL .S, .S, .S[] + \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zd The destination vector register, Z.d. + * \param Zn The first source vector register, Z.d. + * \param Zm The second source vector register, Z.d. + * \param index The immediate index for Zm. + */ +#define INSTR_CREATE_mul_sve_idx(dc, Zd, Zn, Zm, index) \ + instr_create_1dst_3src(dc, OP_mul, Zd, Zn, Zm, index) + #endif /* DR_IR_MACROS_AARCH64_H */ diff --git a/suite/tests/api/dis-a64-sve2.txt b/suite/tests/api/dis-a64-sve2.txt index 00a8e7375b5..7871cead7e0 100644 --- a/suite/tests/api/dis-a64-sve2.txt +++ b/suite/tests/api/dis-a64-sve2.txt @@ -1850,6 +1850,126 @@ c51edfff : ldnt1w z31.d, p7/Z, [z31.d, x30] : ldnt1w (%z31.d,%x30)[16by 457e9fad : match p13.h, p7/Z, z29.h, z30.h : match %p7/z %z29.h %z30.h -> %p13.h 457f9fef : match p15.h, p7/Z, z31.h, z31.h : match %p7/z %z31.h %z31.h -> %p15.h +# MUL ., ., . (MUL-Z.ZZ-_) +04206000 : mul z0.b, z0.b, z0.b : mul %z0.b %z0.b -> %z0.b +04246062 : mul z2.b, z3.b, z4.b : mul %z3.b %z4.b -> %z2.b +042660a4 : mul z4.b, z5.b, z6.b : mul %z5.b %z6.b -> %z4.b +042860e6 : mul z6.b, z7.b, z8.b : mul %z7.b %z8.b -> %z6.b +042a6128 : mul z8.b, z9.b, z10.b : mul %z9.b %z10.b -> %z8.b +042c616a : mul z10.b, z11.b, z12.b : mul %z11.b %z12.b -> %z10.b +042e61ac : mul z12.b, z13.b, z14.b : mul %z13.b %z14.b -> %z12.b +043061ee : mul z14.b, z15.b, z16.b : mul %z15.b %z16.b -> %z14.b +04326230 : mul z16.b, z17.b, z18.b : mul %z17.b %z18.b -> %z16.b +04336251 : mul z17.b, z18.b, z19.b : mul %z18.b %z19.b -> %z17.b +04356293 : mul z19.b, z20.b, z21.b : mul %z20.b %z21.b -> %z19.b +043762d5 : mul z21.b, z22.b, z23.b : mul %z22.b %z23.b -> %z21.b +04396317 : mul z23.b, z24.b, z25.b : mul %z24.b %z25.b -> %z23.b +043b6359 : mul z25.b, z26.b, z27.b : mul %z26.b %z27.b -> %z25.b +043d639b : mul z27.b, z28.b, z29.b : mul %z28.b %z29.b -> %z27.b +043f63ff : mul z31.b, z31.b, z31.b : mul %z31.b %z31.b -> %z31.b +04606000 : mul z0.h, z0.h, z0.h : mul %z0.h %z0.h -> %z0.h +04646062 : mul z2.h, z3.h, z4.h : mul %z3.h %z4.h -> %z2.h +046660a4 : mul z4.h, z5.h, z6.h : mul %z5.h %z6.h -> %z4.h +046860e6 : mul z6.h, z7.h, z8.h : mul %z7.h %z8.h -> %z6.h +046a6128 : mul z8.h, z9.h, z10.h : mul %z9.h %z10.h -> %z8.h +046c616a : mul z10.h, z11.h, z12.h : mul %z11.h %z12.h -> %z10.h +046e61ac : mul z12.h, z13.h, z14.h : mul %z13.h %z14.h -> %z12.h +047061ee : mul z14.h, z15.h, z16.h : mul %z15.h %z16.h -> %z14.h +04726230 : mul z16.h, z17.h, z18.h : mul %z17.h %z18.h -> %z16.h +04736251 : mul z17.h, z18.h, z19.h : mul %z18.h %z19.h -> %z17.h +04756293 : mul z19.h, z20.h, z21.h : mul %z20.h %z21.h -> %z19.h +047762d5 : mul z21.h, z22.h, z23.h : mul %z22.h %z23.h -> %z21.h +04796317 : mul z23.h, z24.h, z25.h : mul %z24.h %z25.h -> %z23.h +047b6359 : mul z25.h, z26.h, z27.h : mul %z26.h %z27.h -> %z25.h +047d639b : mul z27.h, z28.h, z29.h : mul %z28.h %z29.h -> %z27.h +047f63ff : mul z31.h, z31.h, z31.h : mul %z31.h %z31.h -> %z31.h +04a06000 : mul z0.s, z0.s, z0.s : mul %z0.s %z0.s -> %z0.s +04a46062 : mul z2.s, z3.s, z4.s : mul %z3.s %z4.s -> %z2.s +04a660a4 : mul z4.s, z5.s, z6.s : mul %z5.s %z6.s -> %z4.s +04a860e6 : mul z6.s, z7.s, z8.s : mul %z7.s %z8.s -> %z6.s +04aa6128 : mul z8.s, z9.s, z10.s : mul %z9.s %z10.s -> %z8.s +04ac616a : mul z10.s, z11.s, z12.s : mul %z11.s %z12.s -> %z10.s +04ae61ac : mul z12.s, z13.s, z14.s : mul %z13.s %z14.s -> %z12.s +04b061ee : mul z14.s, z15.s, z16.s : mul %z15.s %z16.s -> %z14.s +04b26230 : mul z16.s, z17.s, z18.s : mul %z17.s %z18.s -> %z16.s +04b36251 : mul z17.s, z18.s, z19.s : mul %z18.s %z19.s -> %z17.s +04b56293 : mul z19.s, z20.s, z21.s : mul %z20.s %z21.s -> %z19.s +04b762d5 : mul z21.s, z22.s, z23.s : mul %z22.s %z23.s -> %z21.s +04b96317 : mul z23.s, z24.s, z25.s : mul %z24.s %z25.s -> %z23.s +04bb6359 : mul z25.s, z26.s, z27.s : mul %z26.s %z27.s -> %z25.s +04bd639b : mul z27.s, z28.s, z29.s : mul %z28.s %z29.s -> %z27.s +04bf63ff : mul z31.s, z31.s, z31.s : mul %z31.s %z31.s -> %z31.s +04e06000 : mul z0.d, z0.d, z0.d : mul %z0.d %z0.d -> %z0.d +04e46062 : mul z2.d, z3.d, z4.d : mul %z3.d %z4.d -> %z2.d +04e660a4 : mul z4.d, z5.d, z6.d : mul %z5.d %z6.d -> %z4.d +04e860e6 : mul z6.d, z7.d, z8.d : mul %z7.d %z8.d -> %z6.d +04ea6128 : mul z8.d, z9.d, z10.d : mul %z9.d %z10.d -> %z8.d +04ec616a : mul z10.d, z11.d, z12.d : mul %z11.d %z12.d -> %z10.d +04ee61ac : mul z12.d, z13.d, z14.d : mul %z13.d %z14.d -> %z12.d +04f061ee : mul z14.d, z15.d, z16.d : mul %z15.d %z16.d -> %z14.d +04f26230 : mul z16.d, z17.d, z18.d : mul %z17.d %z18.d -> %z16.d +04f36251 : mul z17.d, z18.d, z19.d : mul %z18.d %z19.d -> %z17.d +04f56293 : mul z19.d, z20.d, z21.d : mul %z20.d %z21.d -> %z19.d +04f762d5 : mul z21.d, z22.d, z23.d : mul %z22.d %z23.d -> %z21.d +04f96317 : mul z23.d, z24.d, z25.d : mul %z24.d %z25.d -> %z23.d +04fb6359 : mul z25.d, z26.d, z27.d : mul %z26.d %z27.d -> %z25.d +04fd639b : mul z27.d, z28.d, z29.d : mul %z28.d %z29.d -> %z27.d +04ff63ff : mul z31.d, z31.d, z31.d : mul %z31.d %z31.d -> %z31.d + +# MUL .H, .H, .H[] (MUL-Z.ZZi-H) +4420f800 : mul z0.h, z0.h, z0.h[0] : mul %z0.h %z0.h $0x00 -> %z0.h +4422f862 : mul z2.h, z3.h, z2.h[0] : mul %z3.h %z2.h $0x00 -> %z2.h +442bf8a4 : mul z4.h, z5.h, z3.h[1] : mul %z5.h %z3.h $0x01 -> %z4.h +442bf8e6 : mul z6.h, z7.h, z3.h[1] : mul %z7.h %z3.h $0x01 -> %z6.h +4434f928 : mul z8.h, z9.h, z4.h[2] : mul %z9.h %z4.h $0x02 -> %z8.h +4434f96a : mul z10.h, z11.h, z4.h[2] : mul %z11.h %z4.h $0x02 -> %z10.h +443df9ac : mul z12.h, z13.h, z5.h[3] : mul %z13.h %z5.h $0x03 -> %z12.h +443df9ee : mul z14.h, z15.h, z5.h[3] : mul %z15.h %z5.h $0x03 -> %z14.h +4466fa30 : mul z16.h, z17.h, z6.h[4] : mul %z17.h %z6.h $0x04 -> %z16.h +4466fa51 : mul z17.h, z18.h, z6.h[4] : mul %z18.h %z6.h $0x04 -> %z17.h +4466fa93 : mul z19.h, z20.h, z6.h[4] : mul %z20.h %z6.h $0x04 -> %z19.h +446ffad5 : mul z21.h, z22.h, z7.h[5] : mul %z22.h %z7.h $0x05 -> %z21.h +446ffb17 : mul z23.h, z24.h, z7.h[5] : mul %z24.h %z7.h $0x05 -> %z23.h +4470fb59 : mul z25.h, z26.h, z0.h[6] : mul %z26.h %z0.h $0x06 -> %z25.h +4470fb9b : mul z27.h, z28.h, z0.h[6] : mul %z28.h %z0.h $0x06 -> %z27.h +447ffbff : mul z31.h, z31.h, z7.h[7] : mul %z31.h %z7.h $0x07 -> %z31.h + +# MUL .S, .S, .S[] (MUL-Z.ZZi-S) +44a0f800 : mul z0.s, z0.s, z0.s[0] : mul %z0.s %z0.s $0x00 -> %z0.s +44a2f862 : mul z2.s, z3.s, z2.s[0] : mul %z3.s %z2.s $0x00 -> %z2.s +44a3f8a4 : mul z4.s, z5.s, z3.s[0] : mul %z5.s %z3.s $0x00 -> %z4.s +44abf8e6 : mul z6.s, z7.s, z3.s[1] : mul %z7.s %z3.s $0x01 -> %z6.s +44acf928 : mul z8.s, z9.s, z4.s[1] : mul %z9.s %z4.s $0x01 -> %z8.s +44acf96a : mul z10.s, z11.s, z4.s[1] : mul %z11.s %z4.s $0x01 -> %z10.s +44adf9ac : mul z12.s, z13.s, z5.s[1] : mul %z13.s %z5.s $0x01 -> %z12.s +44adf9ee : mul z14.s, z15.s, z5.s[1] : mul %z15.s %z5.s $0x01 -> %z14.s +44b6fa30 : mul z16.s, z17.s, z6.s[2] : mul %z17.s %z6.s $0x02 -> %z16.s +44b6fa51 : mul z17.s, z18.s, z6.s[2] : mul %z18.s %z6.s $0x02 -> %z17.s +44b6fa93 : mul z19.s, z20.s, z6.s[2] : mul %z20.s %z6.s $0x02 -> %z19.s +44b7fad5 : mul z21.s, z22.s, z7.s[2] : mul %z22.s %z7.s $0x02 -> %z21.s +44b7fb17 : mul z23.s, z24.s, z7.s[2] : mul %z24.s %z7.s $0x02 -> %z23.s +44b0fb59 : mul z25.s, z26.s, z0.s[2] : mul %z26.s %z0.s $0x02 -> %z25.s +44b8fb9b : mul z27.s, z28.s, z0.s[3] : mul %z28.s %z0.s $0x03 -> %z27.s +44bffbff : mul z31.s, z31.s, z7.s[3] : mul %z31.s %z7.s $0x03 -> %z31.s + +# MUL .D, .D, .D[] (MUL-Z.ZZi-D) +44e0f800 : mul z0.d, z0.d, z0.d[0] : mul %z0.d %z0.d $0x00 -> %z0.d +44e3f862 : mul z2.d, z3.d, z3.d[0] : mul %z3.d %z3.d $0x00 -> %z2.d +44e4f8a4 : mul z4.d, z5.d, z4.d[0] : mul %z5.d %z4.d $0x00 -> %z4.d +44e5f8e6 : mul z6.d, z7.d, z5.d[0] : mul %z7.d %z5.d $0x00 -> %z6.d +44e6f928 : mul z8.d, z9.d, z6.d[0] : mul %z9.d %z6.d $0x00 -> %z8.d +44e7f96a : mul z10.d, z11.d, z7.d[0] : mul %z11.d %z7.d $0x00 -> %z10.d +44e8f9ac : mul z12.d, z13.d, z8.d[0] : mul %z13.d %z8.d $0x00 -> %z12.d +44e9f9ee : mul z14.d, z15.d, z9.d[0] : mul %z15.d %z9.d $0x00 -> %z14.d +44eafa30 : mul z16.d, z17.d, z10.d[0] : mul %z17.d %z10.d $0x00 -> %z16.d +44fafa51 : mul z17.d, z18.d, z10.d[1] : mul %z18.d %z10.d $0x01 -> %z17.d +44fbfa93 : mul z19.d, z20.d, z11.d[1] : mul %z20.d %z11.d $0x01 -> %z19.d +44fcfad5 : mul z21.d, z22.d, z12.d[1] : mul %z22.d %z12.d $0x01 -> %z21.d +44fdfb17 : mul z23.d, z24.d, z13.d[1] : mul %z24.d %z13.d $0x01 -> %z23.d +44fefb59 : mul z25.d, z26.d, z14.d[1] : mul %z26.d %z14.d $0x01 -> %z25.d +44fffb9b : mul z27.d, z28.d, z15.d[1] : mul %z28.d %z15.d $0x01 -> %z27.d +44fffbff : mul z31.d, z31.d, z15.d[1] : mul %z31.d %z15.d $0x01 -> %z31.d + # NBSL .D, .D, .D, .D (NBSL-Z.ZZZ-_) 04e03c00 : nbsl z0.d, z0.d, z0.d, z0.d : nbsl %z0.d %z0.d %z0.d -> %z0.d 04e33c82 : nbsl z2.d, z2.d, z3.d, z4.d : nbsl %z2.d %z3.d %z4.d -> %z2.d diff --git a/suite/tests/api/ir_aarch64_sve.c b/suite/tests/api/ir_aarch64_sve.c index 49b6cb7d283..dfb18841b36 100644 --- a/suite/tests/api/ir_aarch64_sve.c +++ b/suite/tests/api/ir_aarch64_sve.c @@ -1505,7 +1505,7 @@ TEST_INSTR(mul_sve_pred) opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8)); } -TEST_INSTR(mul_sve) +TEST_INSTR(mul_sve_imm) { /* Testing MUL ., ., # */ int imm8_0_0[6] = { -128, -85, -42, 1, 43, 127 }; @@ -1514,7 +1514,7 @@ TEST_INSTR(mul_sve) "mul %z10.b $0xd6 -> %z10.b", "mul %z16.b $0x01 -> %z16.b", "mul %z21.b $0x2b -> %z21.b", "mul %z31.b $0x7f -> %z31.b", }; - TEST_LOOP(mul, mul_sve, 6, expected_0_0[i], + TEST_LOOP(mul, mul_sve_imm, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_immed_int(imm8_0_0[i], OPSZ_1)); @@ -1524,7 +1524,7 @@ TEST_INSTR(mul_sve) "mul %z10.h $0xd6 -> %z10.h", "mul %z16.h $0x01 -> %z16.h", "mul %z21.h $0x2b -> %z21.h", "mul %z31.h $0x7f -> %z31.h", }; - TEST_LOOP(mul, mul_sve, 6, expected_0_1[i], + TEST_LOOP(mul, mul_sve_imm, 6, expected_0_1[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), opnd_create_immed_int(imm8_0_1[i], OPSZ_1)); @@ -1534,7 +1534,7 @@ TEST_INSTR(mul_sve) "mul %z10.s $0xd6 -> %z10.s", "mul %z16.s $0x01 -> %z16.s", "mul %z21.s $0x2b -> %z21.s", "mul %z31.s $0x7f -> %z31.s", }; - TEST_LOOP(mul, mul_sve, 6, expected_0_2[i], + TEST_LOOP(mul, mul_sve_imm, 6, expected_0_2[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), opnd_create_immed_int(imm8_0_2[i], OPSZ_1)); @@ -1544,7 +1544,7 @@ TEST_INSTR(mul_sve) "mul %z10.d $0xd6 -> %z10.d", "mul %z16.d $0x01 -> %z16.d", "mul %z21.d $0x2b -> %z21.d", "mul %z31.d $0x7f -> %z31.d", }; - TEST_LOOP(mul, mul_sve, 6, expected_0_3[i], + TEST_LOOP(mul, mul_sve_imm, 6, expected_0_3[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), opnd_create_immed_int(imm8_0_3[i], OPSZ_1)); } @@ -20571,7 +20571,7 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(mls_sve_pred); RUN_INSTR_TEST(msb_sve_pred); RUN_INSTR_TEST(mul_sve_pred); - RUN_INSTR_TEST(mul_sve); + RUN_INSTR_TEST(mul_sve_imm); RUN_INSTR_TEST(smulh_sve_pred); RUN_INSTR_TEST(umulh_sve_pred); diff --git a/suite/tests/api/ir_aarch64_sve2.c b/suite/tests/api/ir_aarch64_sve2.c index 025683de45c..1fb8f6248cf 100644 --- a/suite/tests/api/ir_aarch64_sve2.c +++ b/suite/tests/api/ir_aarch64_sve2.c @@ -8615,6 +8615,101 @@ TEST_INSTR(stnt1w_sve_pred) OPSZ_4, DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32, 0)); } + +TEST_INSTR(mul_sve_vector) +{ + + /* Testing MUL ., ., . */ + const char *const expected_0_0[6] = { + "mul %z0.b %z0.b -> %z0.b", "mul %z6.b %z7.b -> %z5.b", + "mul %z11.b %z12.b -> %z10.b", "mul %z17.b %z18.b -> %z16.b", + "mul %z22.b %z23.b -> %z21.b", "mul %z31.b %z31.b -> %z31.b", + }; + TEST_LOOP(mul, mul_sve_vector, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_1), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_1)); + + const char *const expected_1_0[6] = { + "mul %z0.h %z0.h -> %z0.h", "mul %z6.h %z7.h -> %z5.h", + "mul %z11.h %z12.h -> %z10.h", "mul %z17.h %z18.h -> %z16.h", + "mul %z22.h %z23.h -> %z21.h", "mul %z31.h %z31.h -> %z31.h", + }; + TEST_LOOP(mul, mul_sve_vector, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_2), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_2)); + + const char *const expected_2_0[6] = { + "mul %z0.s %z0.s -> %z0.s", "mul %z6.s %z7.s -> %z5.s", + "mul %z11.s %z12.s -> %z10.s", "mul %z17.s %z18.s -> %z16.s", + "mul %z22.s %z23.s -> %z21.s", "mul %z31.s %z31.s -> %z31.s", + }; + TEST_LOOP(mul, mul_sve_vector, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_4), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_4)); + + const char *const expected_3_0[6] = { + "mul %z0.d %z0.d -> %z0.d", "mul %z6.d %z7.d -> %z5.d", + "mul %z11.d %z12.d -> %z10.d", "mul %z17.d %z18.d -> %z16.d", + "mul %z22.d %z23.d -> %z21.d", "mul %z31.d %z31.d -> %z31.d", + }; + TEST_LOOP(mul, mul_sve_vector, 6, expected_3_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_8), + opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_8)); +} + +TEST_INSTR(mul_sve_idx) +{ + + /* Testing MUL .D, .D, .D[] */ + static const reg_id_t Zm_0_0[6] = { DR_REG_Z0, DR_REG_Z4, DR_REG_Z7, + DR_REG_Z10, DR_REG_Z12, DR_REG_Z15 }; + static const uint i1_0_0[6] = { 0, 1, 1, 1, 0, 1 }; + const char *const expected_0_0[6] = { + "mul %z0.d %z0.d $0x00 -> %z0.d", "mul %z6.d %z4.d $0x01 -> %z5.d", + "mul %z11.d %z7.d $0x01 -> %z10.d", "mul %z17.d %z10.d $0x01 -> %z16.d", + "mul %z22.d %z12.d $0x00 -> %z21.d", "mul %z31.d %z15.d $0x01 -> %z31.d", + }; + TEST_LOOP(mul, mul_sve_idx, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_8), + opnd_create_reg_element_vector(Zm_0_0[i], OPSZ_8), + opnd_create_immed_uint(i1_0_0[i], OPSZ_1b)); + + /* Testing MUL .H, .H, .H[] */ + static const reg_id_t Zm_1_0[6] = { DR_REG_Z0, DR_REG_Z3, DR_REG_Z4, + DR_REG_Z6, DR_REG_Z7, DR_REG_Z7 }; + static const uint i3_0_0[6] = { 0, 4, 5, 7, 0, 7 }; + const char *const expected_1_0[6] = { + "mul %z0.h %z0.h $0x00 -> %z0.h", "mul %z6.h %z3.h $0x04 -> %z5.h", + "mul %z11.h %z4.h $0x05 -> %z10.h", "mul %z17.h %z6.h $0x07 -> %z16.h", + "mul %z22.h %z7.h $0x00 -> %z21.h", "mul %z31.h %z7.h $0x07 -> %z31.h", + }; + TEST_LOOP(mul, mul_sve_idx, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_2), + opnd_create_reg_element_vector(Zm_1_0[i], OPSZ_2), + opnd_create_immed_uint(i3_0_0[i], OPSZ_3b)); + + /* Testing MUL .S, .S, .S[] */ + static const reg_id_t Zm_2_0[6] = { DR_REG_Z0, DR_REG_Z3, DR_REG_Z4, + DR_REG_Z6, DR_REG_Z7, DR_REG_Z7 }; + static const uint i2_0_0[6] = { 0, 3, 0, 1, 1, 3 }; + const char *const expected_2_0[6] = { + "mul %z0.s %z0.s $0x00 -> %z0.s", "mul %z6.s %z3.s $0x03 -> %z5.s", + "mul %z11.s %z4.s $0x00 -> %z10.s", "mul %z17.s %z6.s $0x01 -> %z16.s", + "mul %z22.s %z7.s $0x01 -> %z21.s", "mul %z31.s %z7.s $0x03 -> %z31.s", + }; + TEST_LOOP(mul, mul_sve_idx, 6, expected_2_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_4), + opnd_create_reg_element_vector(Zm_2_0[i], OPSZ_4), + opnd_create_immed_uint(i2_0_0[i], OPSZ_2b)); +} + int main(int argc, char *argv[]) { @@ -8868,6 +8963,9 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(stnt1h_sve_pred); RUN_INSTR_TEST(stnt1w_sve_pred); + RUN_INSTR_TEST(mul_sve_vector); + RUN_INSTR_TEST(mul_sve_idx); + print("All SVE2 tests complete.\n"); #ifndef STANDALONE_DECODER dr_standalone_exit();