summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: b315be2)
raw | patch | inline | side by side (parent: b315be2)
author | Simon Pilgrim <llvm-dev@redking.me.uk> | |
Tue, 14 Oct 2014 22:31:34 +0000 (22:31 +0000) | ||
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | |
Tue, 14 Oct 2014 22:31:34 +0000 (22:31 +0000) |
Patch to provide shuffle decodes and asm comments for the sse pslldq/psrldq SSE2/AVX2 byte shift instructions.
Differential Revision: http://reviews.llvm.org/D5598
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219738 91177308-0d34-0410-b5e6-96231b3b80d8
Differential Revision: http://reviews.llvm.org/D5598
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219738 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 432cf930b4ac63e7aab7c3bc5dc344e057e647b8..a8f15e6b9485da663c6c8e414a4ddf7661370913 100644 (file)
DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
break;
+ case X86::PSLLDQri:
+ case X86::VPSLLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSLLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSLLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSRLDQri:
+ case X86::VPSRLDQri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v16i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::VPSRLDQYri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ DestName = getRegName(MI->getOperand(0).getReg());
+ if(MI->getOperand(MI->getNumOperands()-1).isImm())
+ DecodePSRLDQMask(MVT::v32i8,
+ MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
Src1Name = getRegName(MI->getOperand(2).getReg());
index a3f45233454761f88f6a56a1c914707b551ef87e..ba6cbc8bc21098a8b852e46c7486a75e7c09b84f 100644 (file)
}
}
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ int M = SM_SentinelZero;
+ if (i >= Imm) M = i - Imm + l;
+ ShuffleMask.push_back(M);
+ }
+}
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned NumElts = VectorSizeInBits / 8;
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; ++i) {
+ unsigned Base = i + Imm;
+ int M = Base + l;
+ if (Base >= NumLaneElts) M = SM_SentinelZero;
+ ShuffleMask.push_back(M);
+ }
+}
+
void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
index af26d0a509b618dccad61ec6eef0984f68dcfa5d..6ba3c64f8ec3de7de00bcc901fc09b224d855219 100644 (file)
void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
+void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
index a957d10cc2b282ea32fc8cef6fc1f70062b2676b..ef3e83fc7ad43b8de4aca3461fbaa634bc0afa24 100644 (file)
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
- ; CHECK: vpslldq
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
- ; CHECK: vpslldq
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
+\r
+\r
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {\r
+ ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]\r
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone\r
+\r
+\r
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {\r
+ ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]\r
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
- ; CHECK: vpsrldq
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
- ; CHECK: vpsrldq
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
+\r
+\r
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {\r
+ ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]\r
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone\r
+\r
+\r
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {\r
+ ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero\r
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
index 31635de3400d08a1e2c5b26d628bb4524827d6ee..84b22b76bf6de5bb6a56682e9097e667f9025376 100644 (file)
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
- ; CHECK: vpslldq
- %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
- ; CHECK: vpslldq
- %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
+\r
+\r
+define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {\r
+ ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]\r
+ %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]\r
+ ret <4 x i64> %res\r
+}\r
+declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone\r
+\r
+\r
+define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {\r
+ ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]\r
+ %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]\r
+ ret <4 x i64> %res\r
+}\r
declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
- ; CHECK: vpsrldq
- %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
- ; CHECK: vpsrldq
- %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
+\r
+\r
+define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {\r
+ ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]\r
+ %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]\r
+ ret <4 x i64> %res\r
+}\r
+declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone\r
+\r
+\r
+define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {\r
+ ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero\r
+ %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]\r
+ ret <4 x i64> %res\r
+}\r
declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
index c906ecdd60c10e44b27061d7ab9db7621d5bd0fe..c4d9e6d7e28f8e013215031caf4cfb8bef36a376 100644 (file)
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
- ; CHECK: pslldq
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
- ; CHECK: pslldq
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
+\r
+\r
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {\r
+ ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]\r
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone\r
+\r
+\r
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {\r
+ ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]\r
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
- ; CHECK: psrldq
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
- ; CHECK: psrldq
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
+\r
+\r
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {\r
+ ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]\r
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone\r
+\r
+\r
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {\r
+ ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero\r
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]\r
+ ret <2 x i64> %res\r
+}\r
declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
index 7d6e2bb11249900e77c5495eee199249548353c8..12051db85a82c8b7ef1ea1577d450e7691a15d5a 100644 (file)
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
-; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq $2, %xmm0
-; SSE-NEXT: retq
-;
-; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
-; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq $2, %xmm0, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <8 x i16> undef, i16 %i, i32 0
- %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
+; SSE: # BB#0:\r
+; SSE-NEXT: movzwl %di, %eax\r
+; SSE-NEXT: movd %eax, %xmm0\r
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]\r
+; SSE-NEXT: retq\r
+;\r
+; AVX-LABEL: shuffle_v8i16_z8zzzzzz:\r
+; AVX: # BB#0:\r
+; AVX-NEXT: movzwl %di, %eax\r
+; AVX-NEXT: vmovd %eax, %xmm0\r
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]\r
+; AVX-NEXT: retq\r
+ %a = insertelement <8 x i16> undef, i16 %i, i32 0\r
+ %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>\r
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
-; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq $10, %xmm0
-; SSE-NEXT: retq
-;
-; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
-; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq $10, %xmm0, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <8 x i16> undef, i16 %i, i32 0
- %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
+; SSE: # BB#0:\r
+; SSE-NEXT: movzwl %di, %eax\r
+; SSE-NEXT: movd %eax, %xmm0\r
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]\r
+; SSE-NEXT: retq\r
+;\r
+; AVX-LABEL: shuffle_v8i16_zzzzz8zz:\r
+; AVX: # BB#0:\r
+; AVX-NEXT: movzwl %di, %eax\r
+; AVX-NEXT: vmovd %eax, %xmm0\r
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]\r
+; AVX-NEXT: retq\r
+ %a = insertelement <8 x i16> undef, i16 %i, i32 0\r
+ %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>\r
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
-; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq $14, %xmm0
-; SSE-NEXT: retq
-;
-; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
-; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq $14, %xmm0, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <8 x i16> undef, i16 %i, i32 0
- %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
+; SSE: # BB#0:\r
+; SSE-NEXT: movzwl %di, %eax\r
+; SSE-NEXT: movd %eax, %xmm0\r
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]\r
+; SSE-NEXT: retq\r
+;\r
+; AVX-LABEL: shuffle_v8i16_zuuzuuz8:\r
+; AVX: # BB#0:\r
+; AVX-NEXT: movzwl %di, %eax\r
+; AVX-NEXT: vmovd %eax, %xmm0\r
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]\r
+; AVX-NEXT: retq\r
+ %a = insertelement <8 x i16> undef, i16 %i, i32 0\r
+ %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>\r
ret <8 x i16> %shuffle
}
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
-; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq $4, %xmm0
-; SSE-NEXT: retq
-;
-; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
-; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq $4, %xmm0, %xmm0
-; AVX-NEXT: retq
- %a = insertelement <8 x i16> undef, i16 %i, i32 3
- %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SSE: # BB#0:\r
+; SSE-NEXT: movzwl %di, %eax\r
+; SSE-NEXT: movd %eax, %xmm0\r
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]\r
+; SSE-NEXT: retq\r
+;\r
+; AVX-LABEL: shuffle_v8i16_zzBzzzzz:\r
+; AVX: # BB#0:\r
+; AVX-NEXT: movzwl %di, %eax\r
+; AVX-NEXT: vmovd %eax, %xmm0\r
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]\r
+; AVX-NEXT: retq\r
+ %a = insertelement <8 x i16> undef, i16 %i, i32 3\r
+ %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>\r
ret <8 x i16> %shuffle
}