Unverified Commit c3253fc6 authored by Cam Mannett's avatar Cam Mannett Committed by GitHub
Browse files

i#2626 Finish AArch64 encoder/decoder: BF16 instructions (#5881)

This patch adds the appropriate macros, tests and codec entries to
encode the following variants:
```
BFCVT   <Hd>, <Sn>
BFCVTN  <Vd>.4H, <Vn>.4S
BFCVTN2 <Vd>.8H, <Vn>.4S
BFDOT   <Vd>.<Ts>, <Vn>.<Tb>, <Vm>.<Tb>
BFDOT   <Vd>.<Ts>, <Vn>.<Tb>, <Vm>.2H[<index>]
BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H
BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>]
BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H
BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>]
BFMMLA  <Vd>.4S, <Vn>.8H, <Vm>.8H
```

Issue #2626
Showing with 629 additions and 1 deletion
+629 -1
......@@ -3753,6 +3753,28 @@ encode_opnd_z4_d_16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_o
return encode_single_sized(OPSZ_SCALABLE, 16, DOUBLE_REG, 0, opnd, enc_out);
}
/* q4_16: Q0-15 register at position 16 */
static inline bool
decode_opnd_q4_16(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
*opnd = decode_vreg(QUAD_REG, extract_uint(enc, 16, 4));
return true;
}
static inline bool
encode_opnd_q4_16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
opnd_size_t size = OPSZ_NA;
uint r;
IF_RETURN_FALSE(!encode_vreg(&size, &r, opnd))
IF_RETURN_FALSE(size != OPSZ_16)
IF_RETURN_FALSE(r > 15)
*enc_out = r << 16;
return true;
}
/* sysreg: system register, operand of MRS/MSR */
static inline bool
......@@ -4677,6 +4699,26 @@ encode_opnd_x16immvs(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_
return false;
}
/* vindex_S: Index for vector with single. */
static inline bool
decode_opnd_vindex_S(uint enc, int opcode, byte *pc, OUT opnd_t *opnd)
{
const uint value = (extract_uint(enc, 11, 1) << 1) | extract_uint(enc, 21, 1);
*opnd = opnd_create_immed_int(value, OPSZ_2b);
return true;
}
static inline bool
encode_opnd_vindex_S(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out)
{
IF_RETURN_FALSE(!opnd_is_immed_int(opnd) || (opnd_get_size(opnd) != OPSZ_2b))
const uint val = opnd_get_immed_int(opnd);
*enc_out = (BITS(val, 1, 1) << 11) | (BITS(val, 0, 0) << 21);
return true;
}
/* vindex_H: Index for vector with half elements (0-7). */
static inline bool
......
......@@ -691,7 +691,7 @@ def main():
# The Arm AArch64's architecture versions supported by the DynamoRIO codec.
# Currently, v8.0 is fully supported, while v8.1, v8.2, v8.3, v8.6, SVE,
# and SBE2 are partially supported. The null terminator element at the end
# and SVE2 are partially supported. The null terminator element at the end
# is required by some generator functions to correctly generate links
# between each version's decode/encode logic.
isa_versions = ['v80', 'v81', 'v82', 'v83', 'v86', 'sve', 'sve2', '']
......
......@@ -35,3 +35,14 @@
# generation.
# Instruction definitions:
0001111001100011010000xxxxxxxxxx n 953 BF16 bfcvt h0 : s5
0000111010100001011010xxxxxxxxxx n 973 BF16 bfcvtn d0 : q5 s_const_sz
0100111010100001011010xxxxxxxxxx n 974 BF16 bfcvtn2 q0 : q5 s_const_sz
0x101110010xxxxx111111xxxxxxxxxx n 954 BF16 bfdot dq0 : dq0 dq5 dq16 h_sz
0x00111101xxxxxx1111x0xxxxxxxxxx n 954 BF16 bfdot dq0 : dq0 dq5 q16 vindex_S h_sz
00101110110xxxxx111111xxxxxxxxxx n 955 BF16 bfmlalb q0 : q0 q5 q16 h_sz
0000111111xxxxxx1111x0xxxxxxxxxx n 955 BF16 bfmlalb q0 : q0 q5 q4_16 vindex_H h_sz
01101110110xxxxx111111xxxxxxxxxx n 956 BF16 bfmlalt q0 : q0 q5 q16 h_sz
0100111111xxxxxx1111x0xxxxxxxxxx n 956 BF16 bfmlalt q0 : q0 q5 q4_16 vindex_H h_sz
01101110010xxxxx111011xxxxxxxxxx n 957 BF16 bfmmla q0 : q0 q5 q16 h_sz
......@@ -4929,6 +4929,165 @@
*/
#define INSTR_CREATE_xpaci(dc, Rd) instr_create_0dst_1src((dc), OP_xpaci, (Rd))
/**
* Creates a BFCVT instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFCVT <Hd>, <Sn>
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The destination register, H (halfword, 16 bits).
* \param Rn The source register, S (singleword, 32 bits).
*/
#define INSTR_CREATE_bfcvt(dc, Rd, Rn) instr_create_1dst_1src(dc, OP_bfcvt, Rd, Rn)
/**
* Creates a BFCVTN2 instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFCVTN2 <Vd>.8H, <Vn>.4S
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The destination vector register, Q (quadword, 128 bits).
* \param Rn The source vector register, Q (quadword, 128 bits).
*/
#define INSTR_CREATE_bfcvtn2_vector(dc, Rd, Rn) \
instr_create_1dst_2src(dc, OP_bfcvtn2, Rd, Rn, OPND_CREATE_SINGLE())
/**
* Creates a BFCVTN instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFCVTN <Vd>.4H, <Vn>.4S
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The destination vector register, D (doubleword, 64 bits).
* \param Rn The source vector register, Q (quadword, 128 bits).
*/
#define INSTR_CREATE_bfcvtn_vector(dc, Rd, Rn) \
instr_create_1dst_2src(dc, OP_bfcvtn, Rd, Rn, OPND_CREATE_SINGLE())
/**
* Creates a BFDOT instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFDOT <Vd>.<Ts>, <Vn>.<Tb>, <Vm>.<Tb>
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register. Can be D (doubleword,
* 64 bits) or Q (quadword, 128 bits).
* \param Rn The second source vector register. Can be D (doubleword, 64 bits)
* or Q (quadword, 128 bits).
* \param Rm The third source vector register. Can be D (doubleword, 64 bits)
* or Q (quadword, 128 bits).
*/
#define INSTR_CREATE_bfdot_vector(dc, Rd, Rn, Rm) \
instr_create_1dst_4src(dc, OP_bfdot, Rd, Rd, Rn, Rm, OPND_CREATE_HALF())
/**
* Creates a BFDOT instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFDOT <Vd>.<Ts>, <Vn>.<Tb>, <Vm>.2H[<index>]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register. Can be D (doubleword,
* 64 bits) or Q (quadword, 128 bits).
* \param Rn The second source vector register. Can be D (doubleword, 64 bits)
* or Q (quadword, 128 bits).
* \param Rm The third source vector register, Q (quadword, 128 bits).
* \param index The immediate index for Rm, in the range 0-3.
*/
#define INSTR_CREATE_bfdot_vector_idx(dc, Rd, Rn, Rm, index) \
instr_create_1dst_5src(dc, OP_bfdot, Rd, Rd, Rn, Rm, index, OPND_CREATE_HALF())
/**
* Creates a BFMLALB instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.8H
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register, Q (quadword, 128
* bits).
* \param Rn The second source vector register, Q (quadword, 128 bits).
* \param Rm The third source vector register, Q (quadword, 128 bits).
*/
#define INSTR_CREATE_bfmlalb_vector(dc, Rd, Rn, Rm) \
instr_create_1dst_4src(dc, OP_bfmlalb, Rd, Rd, Rn, Rm, OPND_CREATE_HALF())
/**
* Creates a BFMLALB instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFMLALB <Vd>.4S, <Vn>.8H, <Vm>.H[<index>]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register, Q (quadword, 128
* bits).
* \param Rn The second source vector register, Q (quadword, 128 bits).
* \param Rm The third source vector register, Q (quadword, 128 bits).
* \param index The immediate index for Rm, in the range 0-7.
*/
#define INSTR_CREATE_bfmlalb_vector_idx(dc, Rd, Rn, Rm, index) \
instr_create_1dst_5src(dc, OP_bfmlalb, Rd, Rd, Rn, Rm, index, OPND_CREATE_HALF())
/**
* Creates a BFMLALT instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.8H
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register, Q (quadword, 128
* bits).
* \param Rn The second source vector register, Q (quadword, 128 bits).
* \param Rm The third source vector register, Q (quadword, 128 bits).
*/
#define INSTR_CREATE_bfmlalt_vector(dc, Rd, Rn, Rm) \
instr_create_1dst_4src(dc, OP_bfmlalt, Rd, Rd, Rn, Rm, OPND_CREATE_HALF())
/**
* Creates a BFMLALT instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFMLALT <Vd>.4S, <Vn>.8H, <Vm>.H[<index>]
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register, Q (quadword, 128
* bits).
* \param Rn The second source vector register, Q (quadword, 128 bits).
* \param Rm The third source vector register, Q (quadword, 128 bits).
* \param index The immediate index for Rm, in the range 0-7.
*/
#define INSTR_CREATE_bfmlalt_vector_idx(dc, Rd, Rn, Rm, index) \
instr_create_1dst_5src(dc, OP_bfmlalt, Rd, Rd, Rn, Rm, index, OPND_CREATE_HALF())
/**
* Creates a BFMMLA instruction.
*
* This macro is used to encode the forms:
* \verbatim
* BFMMLA <Vd>.4S, <Vn>.8H, <Vm>.8H
* \endverbatim
* \param dc The void * dcontext used to allocate memory for the #instr_t.
* \param Rd The source and destination vector register, Q (quadword, 128
* bits).
* \param Rn The second source vector register, Q (quadword, 128 bits).
* \param Rm The third source vector register, Q (quadword, 128 bits).
*/
#define INSTR_CREATE_bfmmla_vector(dc, Rd, Rn, Rm) \
instr_create_1dst_4src(dc, OP_bfmmla, Rd, Rd, Rn, Rm, OPND_CREATE_HALF())
/****************************************************************************
* SVE Instructions *
****************************************************************************/
......
......@@ -176,6 +176,7 @@
------------xxxx---------------- imm4_16p1 # 4bit imm at 19-16, plus 1
------------xxxx---------------- z4_s_16 # Z0-15 register with s size elements at position 16
------------xxxx---------------- z4_d_16 # Z0-15 register with d size elements at position 16
------------xxxx---------------- q4_16 # Q0-15 register at position 16
------------xxxxxxxxxxxxxxx----- sysreg # operand of MRS
-----------?????---------------- bh_imm5_sz # Size encoded as least significant bit in imm5
-----------?????---------------- bhs_imm5_sz # Size encoded as least significant bit in imm5
......@@ -221,6 +222,7 @@
----------?-----???-??xxxxx----- memvs # gets size from 21, 15:13 and 11:10
----------?xxxxx--?-??---------- x16immvr # computes immed from 21, 13 and 11:10
----------?xxxxx???-??---------- x16immvs # computes immed from 21, 15:13 and 11:10
----------x---------x----------- vindex_S # Index for vector with single
----------xx--------x----------- vindex_H # Index for vector with half elements (0-7)
----------xxxxxx------xxxxx----- svemem_gpr_simm6_vl # imm offset and base reg for SVE
----------xxxxxx------xxxxx----- svememx6_b_5 # vector memory reg with 6 bit imm for byte value
......
......@@ -32,3 +32,214 @@
# See dis-a64-sve.txt for the formatting.
# Tests:
# BFCVT <Hd>, <Sn> (BFCVT-V.V-H_floatdp1)
1e634000 : bfcvt h0, s0 : bfcvt %s0 -> %h0
1e634062 : bfcvt h2, s3 : bfcvt %s3 -> %h2
1e6340a4 : bfcvt h4, s5 : bfcvt %s5 -> %h4
1e6340e6 : bfcvt h6, s7 : bfcvt %s7 -> %h6
1e634128 : bfcvt h8, s9 : bfcvt %s9 -> %h8
1e63416a : bfcvt h10, s11 : bfcvt %s11 -> %h10
1e6341ac : bfcvt h12, s13 : bfcvt %s13 -> %h12
1e6341ee : bfcvt h14, s15 : bfcvt %s15 -> %h14
1e634230 : bfcvt h16, s17 : bfcvt %s17 -> %h16
1e634251 : bfcvt h17, s18 : bfcvt %s18 -> %h17
1e634293 : bfcvt h19, s20 : bfcvt %s20 -> %h19
1e6342d5 : bfcvt h21, s22 : bfcvt %s22 -> %h21
1e634317 : bfcvt h23, s24 : bfcvt %s24 -> %h23
1e634359 : bfcvt h25, s26 : bfcvt %s26 -> %h25
1e63439b : bfcvt h27, s28 : bfcvt %s28 -> %h27
1e6343ff : bfcvt h31, s31 : bfcvt %s31 -> %h31
# BFCVTN <Hd>.4H, <Sn>.4S (BFCVTN-Q.Q-Vec)
0ea16800 : bfcvtn v0.4h, v0.4s : bfcvtn %q0 $0x02 -> %d0
0ea16862 : bfcvtn v2.4h, v3.4s : bfcvtn %q3 $0x02 -> %d2
0ea168a4 : bfcvtn v4.4h, v5.4s : bfcvtn %q5 $0x02 -> %d4
0ea168e6 : bfcvtn v6.4h, v7.4s : bfcvtn %q7 $0x02 -> %d6
0ea16928 : bfcvtn v8.4h, v9.4s : bfcvtn %q9 $0x02 -> %d8
0ea1696a : bfcvtn v10.4h, v11.4s : bfcvtn %q11 $0x02 -> %d10
0ea169ac : bfcvtn v12.4h, v13.4s : bfcvtn %q13 $0x02 -> %d12
0ea169ee : bfcvtn v14.4h, v15.4s : bfcvtn %q15 $0x02 -> %d14
0ea16a30 : bfcvtn v16.4h, v17.4s : bfcvtn %q17 $0x02 -> %d16
0ea16a51 : bfcvtn v17.4h, v18.4s : bfcvtn %q18 $0x02 -> %d17
0ea16a93 : bfcvtn v19.4h, v20.4s : bfcvtn %q20 $0x02 -> %d19
0ea16ad5 : bfcvtn v21.4h, v22.4s : bfcvtn %q22 $0x02 -> %d21
0ea16b17 : bfcvtn v23.4h, v24.4s : bfcvtn %q24 $0x02 -> %d23
0ea16b59 : bfcvtn v25.4h, v26.4s : bfcvtn %q26 $0x02 -> %d25
0ea16b9b : bfcvtn v27.4h, v28.4s : bfcvtn %q28 $0x02 -> %d27
0ea16bff : bfcvtn v31.4h, v31.4s : bfcvtn %q31 $0x02 -> %d31
# BFCVTN2 <Hd>.8H, <Sn>.4S (BFCVTN2-Q.Q-Vec)
4ea16800 : bfcvtn2 v0.8h, v0.4s : bfcvtn2 %q0 $0x02 -> %q0
4ea16862 : bfcvtn2 v2.8h, v3.4s : bfcvtn2 %q3 $0x02 -> %q2
4ea168a4 : bfcvtn2 v4.8h, v5.4s : bfcvtn2 %q5 $0x02 -> %q4
4ea168e6 : bfcvtn2 v6.8h, v7.4s : bfcvtn2 %q7 $0x02 -> %q6
4ea16928 : bfcvtn2 v8.8h, v9.4s : bfcvtn2 %q9 $0x02 -> %q8
4ea1696a : bfcvtn2 v10.8h, v11.4s : bfcvtn2 %q11 $0x02 -> %q10
4ea169ac : bfcvtn2 v12.8h, v13.4s : bfcvtn2 %q13 $0x02 -> %q12
4ea169ee : bfcvtn2 v14.8h, v15.4s : bfcvtn2 %q15 $0x02 -> %q14
4ea16a30 : bfcvtn2 v16.8h, v17.4s : bfcvtn2 %q17 $0x02 -> %q16
4ea16a51 : bfcvtn2 v17.8h, v18.4s : bfcvtn2 %q18 $0x02 -> %q17
4ea16a93 : bfcvtn2 v19.8h, v20.4s : bfcvtn2 %q20 $0x02 -> %q19
4ea16ad5 : bfcvtn2 v21.8h, v22.4s : bfcvtn2 %q22 $0x02 -> %q21
4ea16b17 : bfcvtn2 v23.8h, v24.4s : bfcvtn2 %q24 $0x02 -> %q23
4ea16b59 : bfcvtn2 v25.8h, v26.4s : bfcvtn2 %q26 $0x02 -> %q25
4ea16b9b : bfcvtn2 v27.8h, v28.4s : bfcvtn2 %q28 $0x02 -> %q27
4ea16bff : bfcvtn2 v31.8h, v31.4s : bfcvtn2 %q31 $0x02 -> %q31
# BFDOT <Sd>.<T>, <Hn>.<Tb>, <Hm>.<Tb> (BFDOT-Q.QQ-Vec)
2e40fc00 : bfdot v0.2s, v0.4h, v0.4h : bfdot %d0 %d0 %d0 $0x01 -> %d0
2e44fc62 : bfdot v2.2s, v3.4h, v4.4h : bfdot %d2 %d3 %d4 $0x01 -> %d2
2e46fca4 : bfdot v4.2s, v5.4h, v6.4h : bfdot %d4 %d5 %d6 $0x01 -> %d4
2e48fce6 : bfdot v6.2s, v7.4h, v8.4h : bfdot %d6 %d7 %d8 $0x01 -> %d6
2e4afd28 : bfdot v8.2s, v9.4h, v10.4h : bfdot %d8 %d9 %d10 $0x01 -> %d8
2e4cfd6a : bfdot v10.2s, v11.4h, v12.4h : bfdot %d10 %d11 %d12 $0x01 -> %d10
2e4efdac : bfdot v12.2s, v13.4h, v14.4h : bfdot %d12 %d13 %d14 $0x01 -> %d12
2e50fdee : bfdot v14.2s, v15.4h, v16.4h : bfdot %d14 %d15 %d16 $0x01 -> %d14
2e52fe30 : bfdot v16.2s, v17.4h, v18.4h : bfdot %d16 %d17 %d18 $0x01 -> %d16
2e53fe51 : bfdot v17.2s, v18.4h, v19.4h : bfdot %d17 %d18 %d19 $0x01 -> %d17
2e55fe93 : bfdot v19.2s, v20.4h, v21.4h : bfdot %d19 %d20 %d21 $0x01 -> %d19
2e57fed5 : bfdot v21.2s, v22.4h, v23.4h : bfdot %d21 %d22 %d23 $0x01 -> %d21
2e59ff17 : bfdot v23.2s, v24.4h, v25.4h : bfdot %d23 %d24 %d25 $0x01 -> %d23
2e5bff59 : bfdot v25.2s, v26.4h, v27.4h : bfdot %d25 %d26 %d27 $0x01 -> %d25
2e5dff9b : bfdot v27.2s, v28.4h, v29.4h : bfdot %d27 %d28 %d29 $0x01 -> %d27
2e5fffff : bfdot v31.2s, v31.4h, v31.4h : bfdot %d31 %d31 %d31 $0x01 -> %d31
6e40fc00 : bfdot v0.4s, v0.8h, v0.8h : bfdot %q0 %q0 %q0 $0x01 -> %q0
6e44fc62 : bfdot v2.4s, v3.8h, v4.8h : bfdot %q2 %q3 %q4 $0x01 -> %q2
6e46fca4 : bfdot v4.4s, v5.8h, v6.8h : bfdot %q4 %q5 %q6 $0x01 -> %q4
6e48fce6 : bfdot v6.4s, v7.8h, v8.8h : bfdot %q6 %q7 %q8 $0x01 -> %q6
6e4afd28 : bfdot v8.4s, v9.8h, v10.8h : bfdot %q8 %q9 %q10 $0x01 -> %q8
6e4cfd6a : bfdot v10.4s, v11.8h, v12.8h : bfdot %q10 %q11 %q12 $0x01 -> %q10
6e4efdac : bfdot v12.4s, v13.8h, v14.8h : bfdot %q12 %q13 %q14 $0x01 -> %q12
6e50fdee : bfdot v14.4s, v15.8h, v16.8h : bfdot %q14 %q15 %q16 $0x01 -> %q14
6e52fe30 : bfdot v16.4s, v17.8h, v18.8h : bfdot %q16 %q17 %q18 $0x01 -> %q16
6e53fe51 : bfdot v17.4s, v18.8h, v19.8h : bfdot %q17 %q18 %q19 $0x01 -> %q17
6e55fe93 : bfdot v19.4s, v20.8h, v21.8h : bfdot %q19 %q20 %q21 $0x01 -> %q19
6e57fed5 : bfdot v21.4s, v22.8h, v23.8h : bfdot %q21 %q22 %q23 $0x01 -> %q21
6e59ff17 : bfdot v23.4s, v24.8h, v25.8h : bfdot %q23 %q24 %q25 $0x01 -> %q23
6e5bff59 : bfdot v25.4s, v26.8h, v27.8h : bfdot %q25 %q26 %q27 $0x01 -> %q25
6e5dff9b : bfdot v27.4s, v28.8h, v29.8h : bfdot %q27 %q28 %q29 $0x01 -> %q27
6e5fffff : bfdot v31.4s, v31.8h, v31.8h : bfdot %q31 %q31 %q31 $0x01 -> %q31
# BFDOT <Sd>.<T>, <Hn>.<Tb>, <Hm>.2H[<imm>] (BFDOT-Q.QQi-asimdelem_L)
0f40f000 : bfdot v0.2s, v0.4h, v0.2h[0] : bfdot %d0 %d0 %q0 $0x00 $0x01 -> %d0
0f44f062 : bfdot v2.2s, v3.4h, v4.2h[0] : bfdot %d2 %d3 %q4 $0x00 $0x01 -> %d2
0f46f0a4 : bfdot v4.2s, v5.4h, v6.2h[0] : bfdot %d4 %d5 %q6 $0x00 $0x01 -> %d4
0f68f0e6 : bfdot v6.2s, v7.4h, v8.2h[1] : bfdot %d6 %d7 %q8 $0x01 $0x01 -> %d6
0f6af128 : bfdot v8.2s, v9.4h, v10.2h[1] : bfdot %d8 %d9 %q10 $0x01 $0x01 -> %d8
0f6cf16a : bfdot v10.2s, v11.4h, v12.2h[1] : bfdot %d10 %d11 %q12 $0x01 $0x01 -> %d10
0f6ef1ac : bfdot v12.2s, v13.4h, v14.2h[1] : bfdot %d12 %d13 %q14 $0x01 $0x01 -> %d12
0f70f1ee : bfdot v14.2s, v15.4h, v16.2h[1] : bfdot %d14 %d15 %q16 $0x01 $0x01 -> %d14
0f52fa30 : bfdot v16.2s, v17.4h, v18.2h[2] : bfdot %d16 %d17 %q18 $0x02 $0x01 -> %d16
0f53fa51 : bfdot v17.2s, v18.4h, v19.2h[2] : bfdot %d17 %d18 %q19 $0x02 $0x01 -> %d17
0f55fa93 : bfdot v19.2s, v20.4h, v21.2h[2] : bfdot %d19 %d20 %q21 $0x02 $0x01 -> %d19
0f57fad5 : bfdot v21.2s, v22.4h, v23.2h[2] : bfdot %d21 %d22 %q23 $0x02 $0x01 -> %d21
0f59fb17 : bfdot v23.2s, v24.4h, v25.2h[2] : bfdot %d23 %d24 %q25 $0x02 $0x01 -> %d23
0f5bfb59 : bfdot v25.2s, v26.4h, v27.2h[2] : bfdot %d25 %d26 %q27 $0x02 $0x01 -> %d25
0f7dfb9b : bfdot v27.2s, v28.4h, v29.2h[3] : bfdot %d27 %d28 %q29 $0x03 $0x01 -> %d27
0f7ffbff : bfdot v31.2s, v31.4h, v31.2h[3] : bfdot %d31 %d31 %q31 $0x03 $0x01 -> %d31
4f40f000 : bfdot v0.4s, v0.8h, v0.2h[0] : bfdot %q0 %q0 %q0 $0x00 $0x01 -> %q0
4f44f062 : bfdot v2.4s, v3.8h, v4.2h[0] : bfdot %q2 %q3 %q4 $0x00 $0x01 -> %q2
4f46f0a4 : bfdot v4.4s, v5.8h, v6.2h[0] : bfdot %q4 %q5 %q6 $0x00 $0x01 -> %q4
4f68f0e6 : bfdot v6.4s, v7.8h, v8.2h[1] : bfdot %q6 %q7 %q8 $0x01 $0x01 -> %q6
4f6af128 : bfdot v8.4s, v9.8h, v10.2h[1] : bfdot %q8 %q9 %q10 $0x01 $0x01 -> %q8
4f6cf16a : bfdot v10.4s, v11.8h, v12.2h[1] : bfdot %q10 %q11 %q12 $0x01 $0x01 -> %q10
4f6ef1ac : bfdot v12.4s, v13.8h, v14.2h[1] : bfdot %q12 %q13 %q14 $0x01 $0x01 -> %q12
4f70f1ee : bfdot v14.4s, v15.8h, v16.2h[1] : bfdot %q14 %q15 %q16 $0x01 $0x01 -> %q14
4f52fa30 : bfdot v16.4s, v17.8h, v18.2h[2] : bfdot %q16 %q17 %q18 $0x02 $0x01 -> %q16
4f53fa51 : bfdot v17.4s, v18.8h, v19.2h[2] : bfdot %q17 %q18 %q19 $0x02 $0x01 -> %q17
4f55fa93 : bfdot v19.4s, v20.8h, v21.2h[2] : bfdot %q19 %q20 %q21 $0x02 $0x01 -> %q19
4f57fad5 : bfdot v21.4s, v22.8h, v23.2h[2] : bfdot %q21 %q22 %q23 $0x02 $0x01 -> %q21
4f59fb17 : bfdot v23.4s, v24.8h, v25.2h[2] : bfdot %q23 %q24 %q25 $0x02 $0x01 -> %q23
4f5bfb59 : bfdot v25.4s, v26.8h, v27.2h[2] : bfdot %q25 %q26 %q27 $0x02 $0x01 -> %q25
4f7dfb9b : bfdot v27.4s, v28.8h, v29.2h[3] : bfdot %q27 %q28 %q29 $0x03 $0x01 -> %q27
4f7ffbff : bfdot v31.4s, v31.8h, v31.2h[3] : bfdot %q31 %q31 %q31 $0x03 $0x01 -> %q31
# BFMLALB <Sd>.4S, <Hn>.8H, <Hm>.8H (BFMLALB-Q.QQ-Vec)
2ec0fc00 : bfmlalb v0.4s, v0.8h, v0.8h : bfmlalb %q0 %q0 %q0 $0x01 -> %q0
2ec4fc62 : bfmlalb v2.4s, v3.8h, v4.8h : bfmlalb %q2 %q3 %q4 $0x01 -> %q2
2ec6fca4 : bfmlalb v4.4s, v5.8h, v6.8h : bfmlalb %q4 %q5 %q6 $0x01 -> %q4
2ec8fce6 : bfmlalb v6.4s, v7.8h, v8.8h : bfmlalb %q6 %q7 %q8 $0x01 -> %q6
2ecafd28 : bfmlalb v8.4s, v9.8h, v10.8h : bfmlalb %q8 %q9 %q10 $0x01 -> %q8
2eccfd6a : bfmlalb v10.4s, v11.8h, v12.8h : bfmlalb %q10 %q11 %q12 $0x01 -> %q10
2ecefdac : bfmlalb v12.4s, v13.8h, v14.8h : bfmlalb %q12 %q13 %q14 $0x01 -> %q12
2ed0fdee : bfmlalb v14.4s, v15.8h, v16.8h : bfmlalb %q14 %q15 %q16 $0x01 -> %q14
2ed2fe30 : bfmlalb v16.4s, v17.8h, v18.8h : bfmlalb %q16 %q17 %q18 $0x01 -> %q16
2ed3fe51 : bfmlalb v17.4s, v18.8h, v19.8h : bfmlalb %q17 %q18 %q19 $0x01 -> %q17
2ed5fe93 : bfmlalb v19.4s, v20.8h, v21.8h : bfmlalb %q19 %q20 %q21 $0x01 -> %q19
2ed7fed5 : bfmlalb v21.4s, v22.8h, v23.8h : bfmlalb %q21 %q22 %q23 $0x01 -> %q21
2ed9ff17 : bfmlalb v23.4s, v24.8h, v25.8h : bfmlalb %q23 %q24 %q25 $0x01 -> %q23
2edbff59 : bfmlalb v25.4s, v26.8h, v27.8h : bfmlalb %q25 %q26 %q27 $0x01 -> %q25
2eddff9b : bfmlalb v27.4s, v28.8h, v29.8h : bfmlalb %q27 %q28 %q29 $0x01 -> %q27
2edfffff : bfmlalb v31.4s, v31.8h, v31.8h : bfmlalb %q31 %q31 %q31 $0x01 -> %q31
# BFMLALB <Sd>.4S, <Hn>.8H, <Hm>.H[<imm>] (BFMLALB-Q.QQi-asimdelem_L)
0fc0f000 : bfmlalb v0.4s, v0.8h, v0.h[0] : bfmlalb %q0 %q0 %q0 $0x00 $0x01 -> %q0
0fc3f062 : bfmlalb v2.4s, v3.8h, v3.h[0] : bfmlalb %q2 %q3 %q3 $0x00 $0x01 -> %q2
0fd4f0a4 : bfmlalb v4.4s, v5.8h, v4.h[1] : bfmlalb %q4 %q5 %q4 $0x01 $0x01 -> %q4
0fd5f0e6 : bfmlalb v6.4s, v7.8h, v5.h[1] : bfmlalb %q6 %q7 %q5 $0x01 $0x01 -> %q6
0fe6f128 : bfmlalb v8.4s, v9.8h, v6.h[2] : bfmlalb %q8 %q9 %q6 $0x02 $0x01 -> %q8
0fe7f16a : bfmlalb v10.4s, v11.8h, v7.h[2] : bfmlalb %q10 %q11 %q7 $0x02 $0x01 -> %q10
0ff8f1ac : bfmlalb v12.4s, v13.8h, v8.h[3] : bfmlalb %q12 %q13 %q8 $0x03 $0x01 -> %q12
0ff9f1ee : bfmlalb v14.4s, v15.8h, v9.h[3] : bfmlalb %q14 %q15 %q9 $0x03 $0x01 -> %q14
0fcafa30 : bfmlalb v16.4s, v17.8h, v10.h[4] : bfmlalb %q16 %q17 %q10 $0x04 $0x01 -> %q16
0fcafa51 : bfmlalb v17.4s, v18.8h, v10.h[4] : bfmlalb %q17 %q18 %q10 $0x04 $0x01 -> %q17
0fcbfa93 : bfmlalb v19.4s, v20.8h, v11.h[4] : bfmlalb %q19 %q20 %q11 $0x04 $0x01 -> %q19
0fdcfad5 : bfmlalb v21.4s, v22.8h, v12.h[5] : bfmlalb %q21 %q22 %q12 $0x05 $0x01 -> %q21
0fddfb17 : bfmlalb v23.4s, v24.8h, v13.h[5] : bfmlalb %q23 %q24 %q13 $0x05 $0x01 -> %q23
0feefb59 : bfmlalb v25.4s, v26.8h, v14.h[6] : bfmlalb %q25 %q26 %q14 $0x06 $0x01 -> %q25
0feffb9b : bfmlalb v27.4s, v28.8h, v15.h[6] : bfmlalb %q27 %q28 %q15 $0x06 $0x01 -> %q27
0ffffbff : bfmlalb v31.4s, v31.8h, v15.h[7] : bfmlalb %q31 %q31 %q15 $0x07 $0x01 -> %q31
# BFMLALT <Sd>.4S, <Hn>.8H, <Hm>.8H (BFMLALT-Q.QQ-Vec)
6ec0fc00 : bfmlalt v0.4s, v0.8h, v0.8h : bfmlalt %q0 %q0 %q0 $0x01 -> %q0
6ec4fc62 : bfmlalt v2.4s, v3.8h, v4.8h : bfmlalt %q2 %q3 %q4 $0x01 -> %q2
6ec6fca4 : bfmlalt v4.4s, v5.8h, v6.8h : bfmlalt %q4 %q5 %q6 $0x01 -> %q4
6ec8fce6 : bfmlalt v6.4s, v7.8h, v8.8h : bfmlalt %q6 %q7 %q8 $0x01 -> %q6
6ecafd28 : bfmlalt v8.4s, v9.8h, v10.8h : bfmlalt %q8 %q9 %q10 $0x01 -> %q8
6eccfd6a : bfmlalt v10.4s, v11.8h, v12.8h : bfmlalt %q10 %q11 %q12 $0x01 -> %q10
6ecefdac : bfmlalt v12.4s, v13.8h, v14.8h : bfmlalt %q12 %q13 %q14 $0x01 -> %q12
6ed0fdee : bfmlalt v14.4s, v15.8h, v16.8h : bfmlalt %q14 %q15 %q16 $0x01 -> %q14
6ed2fe30 : bfmlalt v16.4s, v17.8h, v18.8h : bfmlalt %q16 %q17 %q18 $0x01 -> %q16
6ed3fe51 : bfmlalt v17.4s, v18.8h, v19.8h : bfmlalt %q17 %q18 %q19 $0x01 -> %q17
6ed5fe93 : bfmlalt v19.4s, v20.8h, v21.8h : bfmlalt %q19 %q20 %q21 $0x01 -> %q19
6ed7fed5 : bfmlalt v21.4s, v22.8h, v23.8h : bfmlalt %q21 %q22 %q23 $0x01 -> %q21
6ed9ff17 : bfmlalt v23.4s, v24.8h, v25.8h : bfmlalt %q23 %q24 %q25 $0x01 -> %q23
6edbff59 : bfmlalt v25.4s, v26.8h, v27.8h : bfmlalt %q25 %q26 %q27 $0x01 -> %q25
6eddff9b : bfmlalt v27.4s, v28.8h, v29.8h : bfmlalt %q27 %q28 %q29 $0x01 -> %q27
6edfffff : bfmlalt v31.4s, v31.8h, v31.8h : bfmlalt %q31 %q31 %q31 $0x01 -> %q31
# BFMLALT <Sd>.4S, <Hn>.8H, <Hm>.H[<imm>] (BFMLALT-Q.QQi-asimdelem_L)
4fc0f000 : bfmlalt v0.4s, v0.8h, v0.h[0] : bfmlalt %q0 %q0 %q0 $0x00 $0x01 -> %q0
4fc3f062 : bfmlalt v2.4s, v3.8h, v3.h[0] : bfmlalt %q2 %q3 %q3 $0x00 $0x01 -> %q2
4fd4f0a4 : bfmlalt v4.4s, v5.8h, v4.h[1] : bfmlalt %q4 %q5 %q4 $0x01 $0x01 -> %q4
4fd5f0e6 : bfmlalt v6.4s, v7.8h, v5.h[1] : bfmlalt %q6 %q7 %q5 $0x01 $0x01 -> %q6
4fe6f128 : bfmlalt v8.4s, v9.8h, v6.h[2] : bfmlalt %q8 %q9 %q6 $0x02 $0x01 -> %q8
4fe7f16a : bfmlalt v10.4s, v11.8h, v7.h[2] : bfmlalt %q10 %q11 %q7 $0x02 $0x01 -> %q10
4ff8f1ac : bfmlalt v12.4s, v13.8h, v8.h[3] : bfmlalt %q12 %q13 %q8 $0x03 $0x01 -> %q12
4ff9f1ee : bfmlalt v14.4s, v15.8h, v9.h[3] : bfmlalt %q14 %q15 %q9 $0x03 $0x01 -> %q14
4fcafa30 : bfmlalt v16.4s, v17.8h, v10.h[4] : bfmlalt %q16 %q17 %q10 $0x04 $0x01 -> %q16
4fcafa51 : bfmlalt v17.4s, v18.8h, v10.h[4] : bfmlalt %q17 %q18 %q10 $0x04 $0x01 -> %q17
4fcbfa93 : bfmlalt v19.4s, v20.8h, v11.h[4] : bfmlalt %q19 %q20 %q11 $0x04 $0x01 -> %q19
4fdcfad5 : bfmlalt v21.4s, v22.8h, v12.h[5] : bfmlalt %q21 %q22 %q12 $0x05 $0x01 -> %q21
4fddfb17 : bfmlalt v23.4s, v24.8h, v13.h[5] : bfmlalt %q23 %q24 %q13 $0x05 $0x01 -> %q23
4feefb59 : bfmlalt v25.4s, v26.8h, v14.h[6] : bfmlalt %q25 %q26 %q14 $0x06 $0x01 -> %q25
4feffb9b : bfmlalt v27.4s, v28.8h, v15.h[6] : bfmlalt %q27 %q28 %q15 $0x06 $0x01 -> %q27
4ffffbff : bfmlalt v31.4s, v31.8h, v15.h[7] : bfmlalt %q31 %q31 %q15 $0x07 $0x01 -> %q31
# BFMMLA <Sd>.4S, <Hn>.8H, <Hm>.8H (BFMMLA-Q.QQ-Vec)
6e40ec00 : bfmmla v0.4s, v0.8h, v0.8h : bfmmla %q0 %q0 %q0 $0x01 -> %q0
6e44ec62 : bfmmla v2.4s, v3.8h, v4.8h : bfmmla %q2 %q3 %q4 $0x01 -> %q2
6e46eca4 : bfmmla v4.4s, v5.8h, v6.8h : bfmmla %q4 %q5 %q6 $0x01 -> %q4
6e48ece6 : bfmmla v6.4s, v7.8h, v8.8h : bfmmla %q6 %q7 %q8 $0x01 -> %q6
6e4aed28 : bfmmla v8.4s, v9.8h, v10.8h : bfmmla %q8 %q9 %q10 $0x01 -> %q8
6e4ced6a : bfmmla v10.4s, v11.8h, v12.8h : bfmmla %q10 %q11 %q12 $0x01 -> %q10
6e4eedac : bfmmla v12.4s, v13.8h, v14.8h : bfmmla %q12 %q13 %q14 $0x01 -> %q12
6e50edee : bfmmla v14.4s, v15.8h, v16.8h : bfmmla %q14 %q15 %q16 $0x01 -> %q14
6e52ee30 : bfmmla v16.4s, v17.8h, v18.8h : bfmmla %q16 %q17 %q18 $0x01 -> %q16
6e53ee51 : bfmmla v17.4s, v18.8h, v19.8h : bfmmla %q17 %q18 %q19 $0x01 -> %q17
6e55ee93 : bfmmla v19.4s, v20.8h, v21.8h : bfmmla %q19 %q20 %q21 $0x01 -> %q19
6e57eed5 : bfmmla v21.4s, v22.8h, v23.8h : bfmmla %q21 %q22 %q23 $0x01 -> %q21
6e59ef17 : bfmmla v23.4s, v24.8h, v25.8h : bfmmla %q23 %q24 %q25 $0x01 -> %q23
6e5bef59 : bfmmla v25.4s, v26.8h, v27.8h : bfmmla %q25 %q26 %q27 $0x01 -> %q25
6e5def9b : bfmmla v27.4s, v28.8h, v29.8h : bfmmla %q27 %q28 %q29 $0x01 -> %q27
6e5fefff : bfmmla v31.4s, v31.8h, v31.8h : bfmmla %q31 %q31 %q31 $0x01 -> %q31
......@@ -173,5 +173,17 @@ const reg_id_t Vdn_h_six_offset_0[6] = { DR_REG_H0, DR_REG_H5, DR_REG_H10,
DR_REG_H16, DR_REG_H21, DR_REG_H31 };
const reg_id_t Vdn_s_six_offset_0[6] = { DR_REG_S0, DR_REG_S5, DR_REG_S10,
DR_REG_S16, DR_REG_S21, DR_REG_S31 };
const reg_id_t Vdn_s_six_offset_1[6] = { DR_REG_S0, DR_REG_S6, DR_REG_S11,
DR_REG_S17, DR_REG_S22, DR_REG_S31 };
const reg_id_t Vdn_d_six_offset_0[6] = { DR_REG_D0, DR_REG_D5, DR_REG_D10,
DR_REG_D16, DR_REG_D21, DR_REG_D31 };
const reg_id_t Vdn_d_six_offset_1[6] = { DR_REG_D0, DR_REG_D6, DR_REG_D11,
DR_REG_D17, DR_REG_D22, DR_REG_D31 };
const reg_id_t Vdn_d_six_offset_2[6] = { DR_REG_D0, DR_REG_D7, DR_REG_D12,
DR_REG_D18, DR_REG_D23, DR_REG_D31 };
const reg_id_t Vdn_q_six_offset_0[6] = { DR_REG_Q0, DR_REG_Q5, DR_REG_Q10,
DR_REG_Q16, DR_REG_Q21, DR_REG_Q31 };
const reg_id_t Vdn_q_six_offset_1[6] = { DR_REG_Q0, DR_REG_Q6, DR_REG_Q11,
DR_REG_Q17, DR_REG_Q22, DR_REG_Q31 };
const reg_id_t Vdn_q_six_offset_2[6] = { DR_REG_Q0, DR_REG_Q7, DR_REG_Q12,
DR_REG_Q18, DR_REG_Q23, DR_REG_Q31 };
......@@ -47,10 +47,201 @@
#include "ir_aarch64.h"
TEST_INSTR(bfcvt)
{
/* Testing BFCVT <Hd>, <Sn> */
const char *const expected_0_0[6] = {
"bfcvt %s0 -> %h0", "bfcvt %s6 -> %h5", "bfcvt %s11 -> %h10",
"bfcvt %s17 -> %h16", "bfcvt %s22 -> %h21", "bfcvt %s31 -> %h31",
};
TEST_LOOP(bfcvt, bfcvt, 6, expected_0_0[i], opnd_create_reg(Vdn_h_six_offset_0[i]),
opnd_create_reg(Vdn_s_six_offset_1[i]));
}
TEST_INSTR(bfcvtn2_vector)
{
/* Testing BFCVTN2 <Hd>.8H, <Sn>.4S */
const char *const expected_0_0[6] = {
"bfcvtn2 %q0 $0x02 -> %q0", "bfcvtn2 %q6 $0x02 -> %q5",
"bfcvtn2 %q11 $0x02 -> %q10", "bfcvtn2 %q17 $0x02 -> %q16",
"bfcvtn2 %q22 $0x02 -> %q21", "bfcvtn2 %q31 $0x02 -> %q31",
};
TEST_LOOP(bfcvtn2, bfcvtn2_vector, 6, expected_0_0[i],
opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]));
}
TEST_INSTR(bfcvtn_vector)
{
/* Testing BFCVTN <Hd>.4H, <Sn>.4S */
const char *const expected_0_0[6] = {
"bfcvtn %q0 $0x02 -> %d0", "bfcvtn %q6 $0x02 -> %d5",
"bfcvtn %q11 $0x02 -> %d10", "bfcvtn %q17 $0x02 -> %d16",
"bfcvtn %q22 $0x02 -> %d21", "bfcvtn %q31 $0x02 -> %d31",
};
TEST_LOOP(bfcvtn, bfcvtn_vector, 6, expected_0_0[i],
opnd_create_reg(Vdn_d_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]));
}
TEST_INSTR(bfdot_vector)
{
/* Testing BFDOT <Sd>.<Ts>, <Hn>.<Tb>, <Hm>.<Tb> */
const char *const expected_0_0[6] = {
"bfdot %d0 %d0 %d0 $0x01 -> %d0", "bfdot %d5 %d6 %d7 $0x01 -> %d5",
"bfdot %d10 %d11 %d12 $0x01 -> %d10", "bfdot %d16 %d17 %d18 $0x01 -> %d16",
"bfdot %d21 %d22 %d23 $0x01 -> %d21", "bfdot %d31 %d31 %d31 $0x01 -> %d31",
};
TEST_LOOP(
bfdot, bfdot_vector, 6, expected_0_0[i], opnd_create_reg(Vdn_d_six_offset_0[i]),
opnd_create_reg(Vdn_d_six_offset_1[i]), opnd_create_reg(Vdn_d_six_offset_2[i]));
const char *const expected_0_1[6] = {
"bfdot %q0 %q0 %q0 $0x01 -> %q0", "bfdot %q5 %q6 %q7 $0x01 -> %q5",
"bfdot %q10 %q11 %q12 $0x01 -> %q10", "bfdot %q16 %q17 %q18 $0x01 -> %q16",
"bfdot %q21 %q22 %q23 $0x01 -> %q21", "bfdot %q31 %q31 %q31 $0x01 -> %q31",
};
TEST_LOOP(
bfdot, bfdot_vector, 6, expected_0_1[i], opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]), opnd_create_reg(Vdn_q_six_offset_2[i]));
}
TEST_INSTR(bfdot_vector_idx)
{
/* Testing BFDOT <Sd>.<Ts>, <Hn>.<Tb>, <Hm>.2H[<index>] */
static const uint index_0_0[6] = { 0, 3, 0, 1, 1, 3 };
const char *const expected_0_0[6] = {
"bfdot %d0 %d0 %q0 $0x00 $0x01 -> %d0",
"bfdot %d5 %d6 %q7 $0x03 $0x01 -> %d5",
"bfdot %d10 %d11 %q12 $0x00 $0x01 -> %d10",
"bfdot %d16 %d17 %q18 $0x01 $0x01 -> %d16",
"bfdot %d21 %d22 %q23 $0x01 $0x01 -> %d21",
"bfdot %d31 %d31 %q31 $0x03 $0x01 -> %d31",
};
TEST_LOOP(bfdot, bfdot_vector_idx, 6, expected_0_0[i],
opnd_create_reg(Vdn_d_six_offset_0[i]),
opnd_create_reg(Vdn_d_six_offset_1[i]),
opnd_create_reg(Vdn_q_six_offset_2[i]),
opnd_create_immed_uint(index_0_0[i], OPSZ_2b));
const char *const expected_0_1[6] = {
"bfdot %q0 %q0 %q0 $0x00 $0x01 -> %q0",
"bfdot %q5 %q6 %q7 $0x03 $0x01 -> %q5",
"bfdot %q10 %q11 %q12 $0x00 $0x01 -> %q10",
"bfdot %q16 %q17 %q18 $0x01 $0x01 -> %q16",
"bfdot %q21 %q22 %q23 $0x01 $0x01 -> %q21",
"bfdot %q31 %q31 %q31 $0x03 $0x01 -> %q31",
};
TEST_LOOP(bfdot, bfdot_vector_idx, 6, expected_0_1[i],
opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]),
opnd_create_reg(Vdn_q_six_offset_2[i]),
opnd_create_immed_uint(index_0_0[i], OPSZ_2b));
}
TEST_INSTR(bfmlalb_vector)
{
/* Testing BFMLALB <Sd>.4S, <Hn>.8H, <Hm>.8H */
const char *const expected_0_0[6] = {
"bfmlalb %q0 %q0 %q0 $0x01 -> %q0", "bfmlalb %q5 %q6 %q7 $0x01 -> %q5",
"bfmlalb %q10 %q11 %q12 $0x01 -> %q10", "bfmlalb %q16 %q17 %q18 $0x01 -> %q16",
"bfmlalb %q21 %q22 %q23 $0x01 -> %q21", "bfmlalb %q31 %q31 %q31 $0x01 -> %q31",
};
TEST_LOOP(bfmlalb, bfmlalb_vector, 6, expected_0_0[i],
opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]),
opnd_create_reg(Vdn_q_six_offset_2[i]));
}
TEST_INSTR(bfmlalb_vector_idx)
{
/* Testing BFMLALB <Sd>.4S, <Hn>.8H, <Hm>.H[<index>] */
static const reg_id_t Rm_0_0[6] = { DR_REG_Q0, DR_REG_Q4, DR_REG_Q7,
DR_REG_Q10, DR_REG_Q12, DR_REG_Q15 };
static const uint index_0_0[6] = { 0, 4, 5, 7, 0, 7 };
const char *const expected_0_0[6] = {
"bfmlalb %q0 %q0 %q0 $0x00 $0x01 -> %q0",
"bfmlalb %q5 %q6 %q4 $0x04 $0x01 -> %q5",
"bfmlalb %q10 %q11 %q7 $0x05 $0x01 -> %q10",
"bfmlalb %q16 %q17 %q10 $0x07 $0x01 -> %q16",
"bfmlalb %q21 %q22 %q12 $0x00 $0x01 -> %q21",
"bfmlalb %q31 %q31 %q15 $0x07 $0x01 -> %q31",
};
TEST_LOOP(bfmlalb, bfmlalb_vector_idx, 6, expected_0_0[i],
opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]), opnd_create_reg(Rm_0_0[i]),
opnd_create_immed_uint(index_0_0[i], OPSZ_3b));
}
TEST_INSTR(bfmlalt_vector)
{
/* Testing BFMLALT <Sd>.4S, <Hn>.8H, <Hm>.8H */
const char *const expected_0_0[6] = {
"bfmlalt %q0 %q0 %q0 $0x01 -> %q0", "bfmlalt %q5 %q6 %q7 $0x01 -> %q5",
"bfmlalt %q10 %q11 %q12 $0x01 -> %q10", "bfmlalt %q16 %q17 %q18 $0x01 -> %q16",
"bfmlalt %q21 %q22 %q23 $0x01 -> %q21", "bfmlalt %q31 %q31 %q31 $0x01 -> %q31",
};
TEST_LOOP(bfmlalt, bfmlalt_vector, 6, expected_0_0[i],
opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]),
opnd_create_reg(Vdn_q_six_offset_2[i]));
}
TEST_INSTR(bfmlalt_vector_idx)
{
/* Testing BFMLALT <Sd>.4S, <Hn>.8H, <Hm>.H[<index>] */
static const reg_id_t Rm_0_0[6] = { DR_REG_Q0, DR_REG_Q4, DR_REG_Q7,
DR_REG_Q10, DR_REG_Q12, DR_REG_Q15 };
static const uint index_0_0[6] = { 0, 4, 5, 7, 0, 7 };
const char *const expected_0_0[6] = {
"bfmlalt %q0 %q0 %q0 $0x00 $0x01 -> %q0",
"bfmlalt %q5 %q6 %q4 $0x04 $0x01 -> %q5",
"bfmlalt %q10 %q11 %q7 $0x05 $0x01 -> %q10",
"bfmlalt %q16 %q17 %q10 $0x07 $0x01 -> %q16",
"bfmlalt %q21 %q22 %q12 $0x00 $0x01 -> %q21",
"bfmlalt %q31 %q31 %q15 $0x07 $0x01 -> %q31",
};
TEST_LOOP(bfmlalt, bfmlalt_vector_idx, 6, expected_0_0[i],
opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]), opnd_create_reg(Rm_0_0[i]),
opnd_create_immed_uint(index_0_0[i], OPSZ_3b));
}
TEST_INSTR(bfmmla_vector)
{
/* Testing BFMMLA <Sd>.4S, <Hn>.8H, <Hm>.8H */
const char *const expected_0_0[6] = {
"bfmmla %q0 %q0 %q0 $0x01 -> %q0", "bfmmla %q5 %q6 %q7 $0x01 -> %q5",
"bfmmla %q10 %q11 %q12 $0x01 -> %q10", "bfmmla %q16 %q17 %q18 $0x01 -> %q16",
"bfmmla %q21 %q22 %q23 $0x01 -> %q21", "bfmmla %q31 %q31 %q31 $0x01 -> %q31",
};
TEST_LOOP(
bfmmla, bfmmla_vector, 6, expected_0_0[i], opnd_create_reg(Vdn_q_six_offset_0[i]),
opnd_create_reg(Vdn_q_six_offset_1[i]), opnd_create_reg(Vdn_q_six_offset_2[i]));
}
int
main(int argc, char *argv[])
{
#ifdef STANDALONE_DECODER
void *dcontext = GLOBAL_DCONTEXT;
#else
void *dcontext = dr_standalone_init();
#endif
bool result = true;
bool test_result;
instr_t *instr;
RUN_INSTR_TEST(bfcvt);
RUN_INSTR_TEST(bfcvtn2_vector);
RUN_INSTR_TEST(bfcvtn_vector);
RUN_INSTR_TEST(bfdot_vector);
RUN_INSTR_TEST(bfdot_vector_idx);
RUN_INSTR_TEST(bfmlalb_vector);
RUN_INSTR_TEST(bfmlalb_vector_idx);
RUN_INSTR_TEST(bfmlalt_vector);
RUN_INSTR_TEST(bfmlalt_vector_idx);
RUN_INSTR_TEST(bfmmla_vector);
print("All v8.6 tests complete.\n");
#ifndef STANDALONE_DECODER
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment