summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: b727b13)
raw | patch | inline | side by side (parent: b727b13)
author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | |
Wed, 21 Jan 2015 17:07:06 +0000 (17:07 +0000) | ||
committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | |
Wed, 21 Jan 2015 17:07:06 +0000 (17:07 +0000) |
Now that we can fully specify extload legality, we can declare them
legal for the PMOVSX/PMOVZX instructions. This for instance enables
a DAGCombine to fire on code such as
(and (<zextload-equivalent> ...), <redundant mask>)
to turn it into:
(zextload ...)
as seen in the testcase changes.
There is one regression, in widen_load-2.ll: we're no longer able
to do store-to-load forwarding with illegal extload memory types.
This will be addressed separately.
Differential Revision: http://reviews.llvm.org/D6533
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226676 91177308-0d34-0410-b5e6-96231b3b80d8
legal for the PMOVSX/PMOVZX instructions. This for instance enables
a DAGCombine to fire on code such as
(and (<zextload-equivalent> ...), <redundant mask>)
to turn it into:
(zextload ...)
as seen in the testcase changes.
There is one regression, in widen_load-2.ll: we're no longer able
to do store-to-load forwarding with illegal extload memory types.
This will be addressed separately.
Differential Revision: http://reviews.llvm.org/D6533
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226676 91177308-0d34-0410-b5e6-96231b3b80d8
index 40609a6df379e174baf93a06612003d27534e534..d887fd5dbf7386c8b33e16c1d7c3a8cead9340c8 100644 (file)
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
}
+ // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
+
// i8 and i16 vectors are custom because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
// Custom CTPOP always performs better on natively supported v8i32
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
+
+ // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
+
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
index af51ced3397cf07d2d630e40a1a995a4875d4d44..953aa281cf377e55860b8e34cb3965415f355cad 100644 (file)
defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>;
// AVX2 Patterns
-multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> {
+multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> {
// Register-Register patterns
def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BWYrr) VR128:$src)>;
def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))),
(!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
+ // Simple Register-Memory patterns
+ def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
+ def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BDYrm) addr:$src)>;
+ def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BQYrm) addr:$src)>;
+
+ def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
+ def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
+
+ def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+ (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
+
// AVX2 Register-Memory patterns
def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
}
let Predicates = [HasAVX2] in {
- defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>;
- defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>;
+ defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
+ defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
}
// SSE4.1/AVX patterns.
-multiclass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
- PatFrag ExtLoad16> {
+multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
+ SDNode ExtOp, PatFrag ExtLoad16> {
def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))),
(!cast<I>(OpcPrefix#BWrr) VR128:$src)>;
def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))),
def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))),
(!cast<I>(OpcPrefix#DQrr) VR128:$src)>;
+ def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BWrm) addr:$src)>;
+ def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BDrm) addr:$src)>;
+ def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)),
+ (!cast<I>(OpcPrefix#BQrm) addr:$src)>;
+
+ def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WDrm) addr:$src)>;
+ def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)),
+ (!cast<I>(OpcPrefix#WQrm) addr:$src)>;
+
+ def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)),
+ (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
+
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWrm) addr:$src)>;
def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
}
let Predicates = [HasAVX] in {
- defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>;
- defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>;
+ defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
}
let Predicates = [UseSSE41] in {
- defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>;
- defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>;
+ defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
}
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll b/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
index 44eb42adb9f821d55e4544a8f926f12f1e483fd3..4054931be3798622d354bb6fea94f250063d1862 100644 (file)
; CHECK-LABEL: test_avx2_pmovx_256
; We really don't care about the generated code.
; CHECK: vpmovzxbd
-; CHECK: vpbroadcastd
-; CHECK: vpand
; CHECK: vcvtdq2ps
; CHECK: vmovups
; CHECK: vzeroupper
index 0ee99875264f29e43867cb21089759ace3b8a5d7..5e0c2dae0834378c09bc1a7cc9860911273d9319 100644 (file)
entry:
%G = load <4 x i8>* %p
;CHECK: movl
-;CHECK: pmovzxbd
-;CHECK: pand
+;CHECK: pmovzxbd (%
%K = inttoptr <4 x i8> %G to <4 x i32*>
;CHECK: ret
ret <4 x i32*> %K
index a3cc407911797aff6af186c12042150f2a76b8b7..c6bd96421d751a4743588b14117707b9ae198488 100644 (file)
@@ -191,8 +191,9 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; CHECK-NEXT: movd %[[CONSTANT1]], %e[[R1:[abcd]]]x
; CHECK-NEXT: movw %[[R1]]x, (%[[PTR1:.*]])
; CHECK-NEXT: movb $1, 2(%[[PTR1]])
-; CHECK-NEXT: pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
-; CHECK-NEXT: pand {{.*}}, %[[X0]]
+; CHECK-NEXT: movl (%[[PTR0]]), [[TMP1:%e[abcd]+x]]
+; CHECK-NEXT: movl [[TMP1]], [[TMP2:.*]]
+; CHECK-NEXT: pmovzxbd [[TMP2]], %[[X0:xmm[0-9]+]]
; CHECK-NEXT: pextrd $1, %[[X0]], %e[[R0:[abcd]]]x
; CHECK-NEXT: shrl %e[[R0]]x
; CHECK-NEXT: movd %[[X0]], %e[[R1:[abcd]]]x