summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 015aedb)
raw | patch | inline | side by side (parent: 015aedb)
author | Robert Khasanov <rob.khasanov@gmail.com> | |
Thu, 30 Oct 2014 14:21:47 +0000 (14:21 +0000) | ||
committer | Robert Khasanov <rob.khasanov@gmail.com> | |
Thu, 30 Oct 2014 14:21:47 +0000 (14:21 +0000) |
Refactored through AVX512_maskable
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220908 91177308-0d34-0410-b5e6-96231b3b80d8
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220908 91177308-0d34-0410-b5e6-96231b3b80d8
lib/Target/X86/X86InstrAVX512.td | patch | blob | history | |
test/MC/X86/avx512-encodings.s | patch | blob | history | |
test/MC/X86/x86-64-avx512f_vl.s | patch | blob | history |
index 61e6bc502c17d4cab81849e6a922e2caba467dca..3ff37d4537673e60dfb6307e2766191ff8b3fde1 100644 (file)
v4i32x_info>;
def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
v2i64x_info>;
+def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
+ v4f32x_info>;
+def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
+ v2f64x_info>;
// This multiclass generates the masking variants from the non-masking
// variant. It only provides the assembly pieces for the masking variants.
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass DestRC,
- RegisterClass SrcRC, X86MemOperand x86memop> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DestRC:$dst), (ins SrcRC:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
- []>, EVEX;
- def rm : AVX5128I<opc, MRMSrcMem, (outs DestRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),[]>, EVEX;
+multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
+ ValueType svt, X86VectorVTInfo _> {
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
+ "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
+ T8PD, EVEX;
+
+ let mayLoad = 1 in {
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src),
+ "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
+ (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX;
+ }
}
+
+multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+ EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
+ EVEX_V256;
+ }
+}
+
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSSZ : avx512_fp_broadcast<0x18, "vbroadcastss", VR512,
- VR128X, f32mem>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
+ avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ let Predicates = [HasVLX] in {
+ defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
+ v4f32, v4f32x_info>, EVEX_V128,
+ EVEX_CD8<32, CD8VT1>;
+ }
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSDZ : avx512_fp_broadcast<0x19, "vbroadcastsd", VR512,
- VR128X, f64mem>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
+ avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
}
def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
- (VBROADCASTSSZrm addr:$src)>;
+ (VBROADCASTSSZm addr:$src)>;
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
- (VBROADCASTSDZrm addr:$src)>;
+ (VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
RegisterClass SrcRC, RegisterClass KRC> {
(VPBROADCASTQZrr VR128X:$src)>;
def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
+ (VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+ (VBROADCASTSDZr VR128X:$src)>;
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
- (VBROADCASTSSZrr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
+ (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
- (VBROADCASTSDZrr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
+ (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
(VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
(VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
- (VBROADCASTSSZrr VR128X:$src)>;
+ (VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
- (VBROADCASTSDZrr VR128X:$src)>;
+ (VBROADCASTSDZr VR128X:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
- (VBROADCASTSSZrr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+ (VBROADCASTSSZr (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
- (VBROADCASTSDZrr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+ (VBROADCASTSDZr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
let Predicates = [HasAVX512] in {
index 231226f347ea9b5c8acc06660c0663a4a314de6e..c734da8fddf08faf28a85aafad345f1be3553aee 100644 (file)
// CHECK: encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0xfc,0xfd,0xff,0xff]
vaddps -516(%rdx){1to16}, %zmm13, %zmm18
+// CHECK: vbroadcastsd (%rcx), %zmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x31]
+ vbroadcastsd (%rcx), %zmm30
+
+// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4}
+// CHECK: encoding: [0x62,0x62,0xfd,0x4c,0x19,0x31]
+ vbroadcastsd (%rcx), %zmm30 {%k4}
+
+// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x62,0xfd,0xcc,0x19,0x31]
+ vbroadcastsd (%rcx), %zmm30 {%k4} {z}
+
+// CHECK: vbroadcastsd 291(%rax,%r14,8), %zmm30
+// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastsd 291(%rax,%r14,8), %zmm30
+
+// CHECK: vbroadcastsd 1016(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x7f]
+ vbroadcastsd 1016(%rdx), %zmm30
+
+// CHECK: vbroadcastsd 1024(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0x00,0x04,0x00,0x00]
+ vbroadcastsd 1024(%rdx), %zmm30
+
+// CHECK: vbroadcastsd -1024(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x80]
+ vbroadcastsd -1024(%rdx), %zmm30
+
+// CHECK: vbroadcastsd -1032(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0xf8,0xfb,0xff,0xff]
+ vbroadcastsd -1032(%rdx), %zmm30
+
+// CHECK: vbroadcastsd %xmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x19,0xee]
+ vbroadcastsd %xmm22, %zmm21
+
+// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x19,0xee]
+ vbroadcastsd %xmm22, %zmm21 {%k7}
+
+// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x19,0xee]
+ vbroadcastsd %xmm22, %zmm21 {%k7} {z}
+
+// CHECK: vbroadcastss (%rcx), %zmm3
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x19]
+ vbroadcastss (%rcx), %zmm3
+
+// CHECK: vbroadcastss (%rcx), %zmm3 {%k4}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4c,0x18,0x19]
+ vbroadcastss (%rcx), %zmm3 {%k4}
+
+// CHECK: vbroadcastss (%rcx), %zmm3 {%k4} {z}
+// CHECK: encoding: [0x62,0xf2,0x7d,0xcc,0x18,0x19]
+ vbroadcastss (%rcx), %zmm3 {%k4} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %zmm3
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastss 291(%rax,%r14,8), %zmm3
+
+// CHECK: vbroadcastss 508(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x7f]
+ vbroadcastss 508(%rdx), %zmm3
+
+// CHECK: vbroadcastss 512(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0x00,0x02,0x00,0x00]
+ vbroadcastss 512(%rdx), %zmm3
+
+// CHECK: vbroadcastss -512(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x80]
+ vbroadcastss -512(%rdx), %zmm3
+
+// CHECK: vbroadcastss -516(%rdx), %zmm3
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0xfc,0xfd,0xff,0xff]
+ vbroadcastss -516(%rdx), %zmm3
+
+// CHECK: vbroadcastss %xmm18, %zmm18
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x18,0xd2]
+ vbroadcastss %xmm18, %zmm18
+
+// CHECK: vbroadcastss %xmm18, %zmm18 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x18,0xd2]
+ vbroadcastss %xmm18, %zmm18 {%k2}
+
+// CHECK: vbroadcastss %xmm18, %zmm18 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xca,0x18,0xd2]
+ vbroadcastss %xmm18, %zmm18 {%k2} {z}
+
// CHECK: vdivpd %zmm11, %zmm6, %zmm18
// CHECK: encoding: [0x62,0xc1,0xcd,0x48,0x5e,0xd3]
vdivpd %zmm11, %zmm6, %zmm18
index a0ba3b365b218e5979f2cf0e56709733b457b649..973a553a8abb37bfa1f635185b4acf286f1cd143 100644 (file)
// CHECK: encoding: [0x62,0x61,0x2c,0x30,0x58,0x8a,0xfc,0xfd,0xff,0xff]
vaddps -516(%rdx){1to8}, %ymm26, %ymm25
+// CHECK: vbroadcastsd (%rcx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x31]
+ vbroadcastsd (%rcx), %ymm22
+
+// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x19,0x31]
+ vbroadcastsd (%rcx), %ymm22 {%k5}
+
+// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x19,0x31]
+ vbroadcastsd (%rcx), %ymm22 {%k5} {z}
+
+// CHECK: vbroadcastsd 291(%rax,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastsd 291(%rax,%r14,8), %ymm22
+
+// CHECK: vbroadcastsd 1016(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x7f]
+ vbroadcastsd 1016(%rdx), %ymm22
+
+// CHECK: vbroadcastsd 1024(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0x00,0x04,0x00,0x00]
+ vbroadcastsd 1024(%rdx), %ymm22
+
+// CHECK: vbroadcastsd -1024(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x80]
+ vbroadcastsd -1024(%rdx), %ymm22
+
+// CHECK: vbroadcastsd -1032(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0xf8,0xfb,0xff,0xff]
+ vbroadcastsd -1032(%rdx), %ymm22
+
+// CHECK: vbroadcastsd %xmm17, %ymm19
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x19,0xd9]
+ vbroadcastsd %xmm17, %ymm19
+
+// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2e,0x19,0xd9]
+ vbroadcastsd %xmm17, %ymm19 {%k6}
+
+// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xae,0x19,0xd9]
+ vbroadcastsd %xmm17, %ymm19 {%k6} {z}
+
+// CHECK: vbroadcastss (%rcx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x29]
+ vbroadcastss (%rcx), %xmm21
+
+// CHECK: vbroadcastss (%rcx), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x18,0x29]
+ vbroadcastss (%rcx), %xmm21 {%k2}
+
+// CHECK: vbroadcastss (%rcx), %xmm21 {%k2} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8a,0x18,0x29]
+ vbroadcastss (%rcx), %xmm21 {%k2} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x18,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastss 291(%rax,%r14,8), %xmm21
+
+// CHECK: vbroadcastss 508(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x7f]
+ vbroadcastss 508(%rdx), %xmm21
+
+// CHECK: vbroadcastss 512(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0x00,0x02,0x00,0x00]
+ vbroadcastss 512(%rdx), %xmm21
+
+// CHECK: vbroadcastss -512(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x80]
+ vbroadcastss -512(%rdx), %xmm21
+
+// CHECK: vbroadcastss -516(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0xfc,0xfd,0xff,0xff]
+ vbroadcastss -516(%rdx), %xmm21
+
+// CHECK: vbroadcastss (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x31]
+ vbroadcastss (%rcx), %ymm30
+
+// CHECK: vbroadcastss (%rcx), %ymm30 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x29,0x18,0x31]
+ vbroadcastss (%rcx), %ymm30 {%k1}
+
+// CHECK: vbroadcastss (%rcx), %ymm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xa9,0x18,0x31]
+ vbroadcastss (%rcx), %ymm30 {%k1} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x18,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastss 291(%rax,%r14,8), %ymm30
+
+// CHECK: vbroadcastss 508(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x7f]
+ vbroadcastss 508(%rdx), %ymm30
+
+// CHECK: vbroadcastss 512(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0x00,0x02,0x00,0x00]
+ vbroadcastss 512(%rdx), %ymm30
+
+// CHECK: vbroadcastss -512(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x80]
+ vbroadcastss -512(%rdx), %ymm30
+
+// CHECK: vbroadcastss -516(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0xfc,0xfd,0xff,0xff]
+ vbroadcastss -516(%rdx), %ymm30
+
+// CHECK: vbroadcastss %xmm24, %xmm24
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x18,0xc0]
+ vbroadcastss %xmm24, %xmm24
+
+// CHECK: vbroadcastss %xmm24, %xmm24 {%k2}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0a,0x18,0xc0]
+ vbroadcastss %xmm24, %xmm24 {%k2}
+
+// CHECK: vbroadcastss %xmm24, %xmm24 {%k2} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8a,0x18,0xc0]
+ vbroadcastss %xmm24, %xmm24 {%k2} {z}
+
+// CHECK: vbroadcastss %xmm28, %ymm24
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x18,0xc4]
+ vbroadcastss %xmm28, %ymm24
+
+// CHECK: vbroadcastss %xmm28, %ymm24 {%k6}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x18,0xc4]
+ vbroadcastss %xmm28, %ymm24 {%k6}
+
+// CHECK: vbroadcastss %xmm28, %ymm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x18,0xc4]
+ vbroadcastss %xmm28, %ymm24 {%k6} {z}
+
// CHECK: vdivpd %xmm27, %xmm18, %xmm19
// CHECK: encoding: [0x62,0x81,0xed,0x00,0x5e,0xdb]
vdivpd %xmm27, %xmm18, %xmm19