From: Tim Northover Date: Wed, 21 Jan 2015 23:17:19 +0000 (+0000) Subject: DAGCombine: fold (or (and X, M), (and X, N)) -> (and X, (or M, N)) X-Git-Url: https://git.ti.com/gitweb?p=opencl%2Fllvm.git;a=commitdiff_plain;h=f5f8a3e6a6ed8f72d1ed57c29e5fc6686bbcbedd DAGCombine: fold (or (and X, M), (and X, N)) -> (and X, (or M, N)) It can help with argument juggling on some targets, and is generally a good idea. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226740 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3bde991879..849508891d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3527,6 +3527,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } } + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X); + } + // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); diff --git a/test/CodeGen/AArch64/or-combine.ll b/test/CodeGen/AArch64/or-combine.ll new file mode 100644 index 0000000000..c6c343a3f7 --- /dev/null +++ b/test/CodeGen/AArch64/or-combine.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +define i32 @test_consts(i32 %in) { +; CHECK-LABEL: test_consts: +; CHECK-NOT: bfxil +; CHECK-NOT: and +; CHECK-NOT: orr +; CHECK: ret + + %lo = and i32 %in, 65535 + %hi = and i32 %in, -65536 + %res = or i32 %lo, %hi + ret i32 %res +} + +define i32 @test_generic(i32 %in, i32 %mask1, i32 %mask2) { +; CHECK-LABEL: test_generic: +; CHECK: orr [[FULL_MASK:w[0-9]+]], w1, w2 +; CHECK: and w0, w0, [[FULL_MASK]] + + %lo = and i32 %in, %mask1 + %hi = and i32 %in, %mask2 + %res = or i32 %lo, %hi + ret i32 %res +} + +; In this case the transformation isn't profitable, since %lo and %hi +; are used more than once. +define [3 x i32] @test_reuse(i32 %in, i32 %mask1, i32 %mask2) { +; CHECK-LABEL: test_reuse: +; CHECK-DAG: and w1, w0, w1 +; CHECK-DAG: and w2, w0, w2 +; CHECK-DAG: orr w0, w1, w2 + + %lo = and i32 %in, %mask1 + %hi = and i32 %in, %mask2 + %recombine = or i32 %lo, %hi + + %res.tmp0 = insertvalue [3 x i32] undef, i32 %recombine, 0 + %res.tmp1 = insertvalue [3 x i32] %res.tmp0, i32 %lo, 1 + %res = insertvalue [3 x i32] %res.tmp1, i32 %hi, 2 + + ret [3 x i32] %res +} diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll index 4a94acaba0..45c5d44992 100644 --- a/test/CodeGen/R600/extload.ll +++ b/test/CodeGen/R600/extload.ll @@ -2,8 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}anyext_load_i8: -; EG: AND_INT -; EG: 255 +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], +; EG: VTX_READ_32 [[VAL]] + define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)* %load = load i32 addrspace(1)* %cast, align 1 @@ -14,10 +15,9 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac } ; FUNC-LABEL: {{^}}anyext_load_i16: -; EG: AND_INT -; EG: AND_INT -; EG-DAG: 65535 -; EG-DAG: -65536 +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], +; EG: VTX_READ_32 [[VAL]] + define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind { %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)* %load = load i32 addrspace(1)* %cast, align 1 @@ -28,8 +28,8 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs } ; FUNC-LABEL: {{^}}anyext_load_lds_i8: -; EG: AND_INT -; EG: 255 +; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]] +; EG: LDS_WRITE * [[VAL]] define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)* %load = load i32 addrspace(3)* %cast, align 1 @@ -40,10 +40,8 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr } ; FUNC-LABEL: {{^}}anyext_load_lds_i16: -; EG: AND_INT -; EG: AND_INT -; EG-DAG: 65535 -; EG-DAG: -65536 +; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]] +; EG: LDS_WRITE * [[VAL]] define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind { %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)* %load = load i32 addrspace(3)* %cast, align 1 diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll index 3bd1dc4cb9..ad734100f7 100644 --- a/test/CodeGen/X86/vselect.ll +++ b/test/CodeGen/X86/vselect.ll @@ -53,10 +53,9 @@ define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test6: ; CHECK: # BB#0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [0,65535,0,65535,0,65535,0,65535] -; CHECK-NEXT: andps %xmm0, %xmm1 -; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0] +; CHECK-NEXT: orps {{.*}}(%rip), %xmm1 +; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %a ret <8 x i16> %1