[DAGCombine] Produce better code for constant splats

author Michael Kuperstein <michael.m.kuperstein@intel.com>

Thu, 22 Jan 2015 12:37:23 +0000 (12:37 +0000)

committer Michael Kuperstein <michael.m.kuperstein@intel.com>

Thu, 22 Jan 2015 12:37:23 +0000 (12:37 +0000)
author Michael Kuperstein <michael.m.kuperstein@intel.com>
Thu, 22 Jan 2015 12:37:23 +0000 (12:37 +0000)
committer Michael Kuperstein <michael.m.kuperstein@intel.com>
Thu, 22 Jan 2015 12:37:23 +0000 (12:37 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index a06f35ee9378a3c27a4ab37184277fdb8c7f3708..a9bcefc5cd2db096f3920d7da86ecc92db497bcf 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11490,7 +11490,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    }
  
    // If it is a splat, check if the argument vector is another splat or a
-  // build_vector with all scalar elements the same.
+  // build_vector.
    if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
      SDNode *V = N0.getNode();
  
@@ -11527,6 +11527,24 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
        // Splat of <x, x, x, x>, return <x, x, x, x>
        if (AllSame)
          return N0;
+
+      // If the splatted element is a constant, just build the vector out of
+      // constants directly.
+      const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+      if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
+        SmallVector<SDValue, 8> Ops;
+        for (unsigned i = 0; i != NumElts; ++i) {
+          Ops.push_back(Splatted);
+        }
+        SDValue &NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+          V->getValueType(0), Ops);
+
+        // We may have jumped through bitcasts, so the type of the
+        // BUILD_VECTOR may not match the type of the shuffle.
+        if (V->getValueType(0) != VT)
+           NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+        return NewBV;
+      }
      }
    }
  
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index f75d5f4b2bd200a8ef1466af8193fc86c5a21a3e..c5ef77a3195287a8980c7b2d4c33b94cc0b0a0de 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1513,9 +1513,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
      return getUNDEF(VT);
  
    // If Identity shuffle return that node.
-  bool Identity = true;
+  bool Identity = true, AllSame = true;
    for (unsigned i = 0; i != NElts; ++i) {
      if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] != MaskVec[0]) AllSame = false;
    }
    if (Identity && NElts)
      return N1;
@@ -1549,6 +1550,26 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
            if (C->isNullValue())
              return N1;
        }
+
+      // If the shuffle itself creates a constant splat, build the vector
+      // directly.
+      if (AllSame) {
+         const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+         if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
+           SmallVector<SDValue, 8> Ops;
+           for (unsigned i = 0; i != NElts; ++i) {
+             Ops.push_back(Splatted);
+           }
+           SDValue &NewBV = getNode(ISD::BUILD_VECTOR, dl,
+             BV->getValueType(0), Ops);
+
+           // We may have jumped through bitcasts, so the type of the
+           // BUILD_VECTOR may not match the type of the shuffle.
+           if (BV->getValueType(0) != VT)
+             NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+           return NewBV;
+         }
+      }
      }
    }
  
diff --git a/test/CodeGen/X86/splat-const.ll b/test/CodeGen/X86/splat-const.ll

new file mode 100644 (file)

index 0000000..19997b0
--- /dev/null
+++ b/test/CodeGen/X86/splat-const.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mcpu=penryn | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mcpu=sandybridge | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=AVX2
+; This checks that lowering for creation of constant vectors is sane and
+; doesn't use redundant shuffles. (fixes PR22276)
+target triple = "x86_64-unknown-unknown"
+
+define <4 x i32> @zero_vector() {
+; SSE-LABEL: zero_vector:
+; SSE: xorps %xmm0, %xmm0
+; SSE-NEXT: retq
+; AVX-LABEL: zero_vector:
+; AVX: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+; AVX2-LABEL: zero_vector:
+; AVX2: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: retq
+  %zero = insertelement <4 x i32> undef, i32 0, i32 0
+  %splat = shufflevector <4 x i32> %zero, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat
+}
+
+; Note that for the "const_vector" versions, lowering that uses a shuffle
+; instead of a load would be legitimate, if it's a single broadcast shuffle.
+; (as opposed to the previous mess)
+; However, this is not the current preferred lowering.
+define <4 x i32> @const_vector() {
+; SSE-LABEL: const_vector:
+; SSE: movaps {{.*}}, %xmm0 # xmm0 = [42,42,42,42]
+; SSE-NEXT: retq
+; AVX-LABEL: const_vector:
+; AVX: vmovaps {{.*}}, %xmm0 # xmm0 = [42,42,42,42]
+; AVX-NEXT: retq
+; AVX2-LABEL: const_vector:
+; AVX2: vbroadcastss {{[^%].*}}, %xmm0
+; AVX2-NEXT: retq
+  %const = insertelement <4 x i32> undef, i32 42, i32 0
+  %splat = shufflevector <4 x i32> %const, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll

index 3295e2b206bb10d09cda4341bf5dd895df1625de..23b97f002a0e561e1b9e3df0d7b3d2ce746c4a08 100644 (file)
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1003,14 +1003,14 @@ define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
  ; X32-LABEL: insertps_pr20411:
  ; X32:       ## BB#0:
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
+; X32-NEXT:    movaps {{.*#+}} xmm0 = [3,3,3,3]
  ; X32-NEXT:    insertps $-36, LCPI49_1+12, %xmm0
  ; X32-NEXT:    movups %xmm0, (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: insertps_pr20411:
  ; X64:       ## BB#0:
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
+; X64-NEXT:    movaps {{.*#+}} xmm0 = [3,3,3,3]
  ; X64-NEXT:    insertps $-36, LCPI49_1+{{.*}}(%rip), %xmm0
  ; X64-NEXT:    movups %xmm0, (%rdi)
  ; X64-NEXT:    retq
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll

index 70fdbb7c9c82c46debc73f24db3c58baa23f00b6..2aa870f16eb4a6f557a112204c6bde70ba58e51b 100644 (file)
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -82,8 +82,8 @@ define void @shuf5(<8 x i8>* %p) nounwind {
  ; CHECK-LABEL: shuf5:
  ; CHECK:       # BB#0:
  ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = <4,33,u,u,u,u,u,u>
-; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33]
+; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
  ; CHECK-NEXT:    movlpd %xmm0, (%eax)
  ; CHECK-NEXT:    retl
    %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
author	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Thu, 22 Jan 2015 12:37:23 +0000 (12:37 +0000)
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>
	Thu, 22 Jan 2015 12:37:23 +0000 (12:37 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
test/CodeGen/X86/splat-const.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/sse41.ll		patch \| blob \| history
test/CodeGen/X86/widen_shuffle-1.ll		patch \| blob \| history