1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
4 ; CHECK-LABEL: addpd512:
5 ; CHECK: ## BB#0: ## %entry
6 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
7 ; CHECK-NEXT: retq
8 entry:
9 %add.i = fadd <8 x double> %x, %y
10 ret <8 x double> %add.i
11 }
13 define <8 x double> @addpd512fold(<8 x double> %y) {
14 ; CHECK-LABEL: addpd512fold:
15 ; CHECK: ## BB#0: ## %entry
16 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
17 ; CHECK-NEXT: retq
18 entry:
19 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
20 ret <8 x double> %add.i
21 }
23 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
24 ; CHECK-LABEL: addps512:
25 ; CHECK: ## BB#0: ## %entry
26 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
27 ; CHECK-NEXT: retq
28 entry:
29 %add.i = fadd <16 x float> %x, %y
30 ret <16 x float> %add.i
31 }
33 define <16 x float> @addps512fold(<16 x float> %y) {
34 ; CHECK-LABEL: addps512fold:
35 ; CHECK: ## BB#0: ## %entry
36 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
37 ; CHECK-NEXT: retq
38 entry:
39 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
40 ret <16 x float> %add.i
41 }
43 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
44 ; CHECK-LABEL: subpd512:
45 ; CHECK: ## BB#0: ## %entry
46 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0
47 ; CHECK-NEXT: retq
48 entry:
49 %sub.i = fsub <8 x double> %x, %y
50 ret <8 x double> %sub.i
51 }
53 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
54 ; CHECK-LABEL: subpd512fold:
55 ; CHECK: ## BB#0: ## %entry
56 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
57 ; CHECK-NEXT: retq
58 entry:
59 %tmp2 = load <8 x double>* %x, align 8
60 %sub.i = fsub <8 x double> %y, %tmp2
61 ret <8 x double> %sub.i
62 }
64 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
65 ; CHECK-LABEL: subps512:
66 ; CHECK: ## BB#0: ## %entry
67 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0
68 ; CHECK-NEXT: retq
69 entry:
70 %sub.i = fsub <16 x float> %x, %y
71 ret <16 x float> %sub.i
72 }
74 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
75 ; CHECK-LABEL: subps512fold:
76 ; CHECK: ## BB#0: ## %entry
77 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
78 ; CHECK-NEXT: retq
79 entry:
80 %tmp2 = load <16 x float>* %x, align 4
81 %sub.i = fsub <16 x float> %y, %tmp2
82 ret <16 x float> %sub.i
83 }
85 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
86 ; CHECK-LABEL: imulq512:
87 ; CHECK: ## BB#0:
88 ; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
89 ; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3
90 ; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
91 ; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3
92 ; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2
93 ; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1
94 ; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
95 ; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0
96 ; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
97 ; CHECK-NEXT: retq
98 %z = mul <8 x i64>%x, %y
99 ret <8 x i64>%z
100 }
102 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
103 ; CHECK-LABEL: mulpd512:
104 ; CHECK: ## BB#0: ## %entry
105 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0
106 ; CHECK-NEXT: retq
107 entry:
108 %mul.i = fmul <8 x double> %x, %y
109 ret <8 x double> %mul.i
110 }
112 define <8 x double> @mulpd512fold(<8 x double> %y) {
113 ; CHECK-LABEL: mulpd512fold:
114 ; CHECK: ## BB#0: ## %entry
115 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0
116 ; CHECK-NEXT: retq
117 entry:
118 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
119 ret <8 x double> %mul.i
120 }
122 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
123 ; CHECK-LABEL: mulps512:
124 ; CHECK: ## BB#0: ## %entry
125 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0
126 ; CHECK-NEXT: retq
127 entry:
128 %mul.i = fmul <16 x float> %x, %y
129 ret <16 x float> %mul.i
130 }
132 define <16 x float> @mulps512fold(<16 x float> %y) {
133 ; CHECK-LABEL: mulps512fold:
134 ; CHECK: ## BB#0: ## %entry
135 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
136 ; CHECK-NEXT: retq
137 entry:
138 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
139 ret <16 x float> %mul.i
140 }
142 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
143 ; CHECK-LABEL: divpd512:
144 ; CHECK: ## BB#0: ## %entry
145 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0
146 ; CHECK-NEXT: retq
147 entry:
148 %div.i = fdiv <8 x double> %x, %y
149 ret <8 x double> %div.i
150 }
152 define <8 x double> @divpd512fold(<8 x double> %y) {
153 ; CHECK-LABEL: divpd512fold:
154 ; CHECK: ## BB#0: ## %entry
155 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0
156 ; CHECK-NEXT: retq
157 entry:
158 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
159 ret <8 x double> %div.i
160 }
162 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
163 ; CHECK-LABEL: divps512:
164 ; CHECK: ## BB#0: ## %entry
165 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0
166 ; CHECK-NEXT: retq
167 entry:
168 %div.i = fdiv <16 x float> %x, %y
169 ret <16 x float> %div.i
170 }
172 define <16 x float> @divps512fold(<16 x float> %y) {
173 ; CHECK-LABEL: divps512fold:
174 ; CHECK: ## BB#0: ## %entry
175 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0
176 ; CHECK-NEXT: retq
177 entry:
178 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
179 ret <16 x float> %div.i
180 }
182 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
183 ; CHECK-LABEL: vpaddq_test:
184 ; CHECK: ## BB#0:
185 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
186 ; CHECK-NEXT: retq
187 %x = add <8 x i64> %i, %j
188 ret <8 x i64> %x
189 }
191 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
192 ; CHECK-LABEL: vpaddq_fold_test:
193 ; CHECK: ## BB#0:
194 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
195 ; CHECK-NEXT: retq
196 %tmp = load <8 x i64>* %j, align 4
197 %x = add <8 x i64> %i, %tmp
198 ret <8 x i64> %x
199 }
201 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
202 ; CHECK-LABEL: vpaddq_broadcast_test:
203 ; CHECK: ## BB#0:
204 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
205 ; CHECK-NEXT: retq
206 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
207 ret <8 x i64> %x
208 }
210 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
211 ; CHECK-LABEL: vpaddq_broadcast2_test:
212 ; CHECK: ## BB#0:
213 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
214 ; CHECK-NEXT: retq
215 %tmp = load i64* %j
216 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
217 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
218 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
219 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
220 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
221 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
222 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
223 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
224 %x = add <8 x i64> %i, %j.7
225 ret <8 x i64> %x
226 }
228 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
229 ; CHECK-LABEL: vpaddd_test:
230 ; CHECK: ## BB#0:
231 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
232 ; CHECK-NEXT: retq
233 %x = add <16 x i32> %i, %j
234 ret <16 x i32> %x
235 }
237 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
238 ; CHECK-LABEL: vpaddd_fold_test:
239 ; CHECK: ## BB#0:
240 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
241 ; CHECK-NEXT: retq
242 %tmp = load <16 x i32>* %j, align 4
243 %x = add <16 x i32> %i, %tmp
244 ret <16 x i32> %x
245 }
247 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
248 ; CHECK-LABEL: vpaddd_broadcast_test:
249 ; CHECK: ## BB#0:
250 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
251 ; CHECK-NEXT: retq
252 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
253 ret <16 x i32> %x
254 }
256 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
257 ; CHECK-LABEL: vpaddd_mask_test:
258 ; CHECK: ## BB#0:
259 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
260 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
261 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1}
262 ; CHECK-NEXT: retq
263 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
264 %x = add <16 x i32> %i, %j
265 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
266 ret <16 x i32> %r
267 }
269 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
270 ; CHECK-LABEL: vpaddd_maskz_test:
271 ; CHECK: ## BB#0:
272 ; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
273 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
274 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
275 ; CHECK-NEXT: retq
276 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
277 %x = add <16 x i32> %i, %j
278 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
279 ret <16 x i32> %r
280 }
282 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
283 ; CHECK-LABEL: vpaddd_mask_fold_test:
284 ; CHECK: ## BB#0:
285 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
286 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
287 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
288 ; CHECK-NEXT: retq
289 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
290 %j = load <16 x i32>* %j.ptr
291 %x = add <16 x i32> %i, %j
292 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
293 ret <16 x i32> %r
294 }
296 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
297 ; CHECK-LABEL: vpaddd_mask_broadcast_test:
298 ; CHECK: ## BB#0:
299 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
300 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
301 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
302 ; CHECK-NEXT: retq
303 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
304 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
305 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
306 ret <16 x i32> %r
307 }
309 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
310 ; CHECK-LABEL: vpaddd_maskz_fold_test:
311 ; CHECK: ## BB#0:
312 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
313 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
314 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
315 ; CHECK-NEXT: retq
316 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
317 %j = load <16 x i32>* %j.ptr
318 %x = add <16 x i32> %i, %j
319 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
320 ret <16 x i32> %r
321 }
323 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
324 ; CHECK-LABEL: vpaddd_maskz_broadcast_test:
325 ; CHECK: ## BB#0:
326 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
327 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
328 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
329 ; CHECK-NEXT: retq
330 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
331 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
332 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
333 ret <16 x i32> %r
334 }
336 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
337 ; CHECK-LABEL: vpsubq_test:
338 ; CHECK: ## BB#0:
339 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
340 ; CHECK-NEXT: retq
341 %x = sub <8 x i64> %i, %j
342 ret <8 x i64> %x
343 }
345 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
346 ; CHECK-LABEL: vpsubd_test:
347 ; CHECK: ## BB#0:
348 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
349 ; CHECK-NEXT: retq
350 %x = sub <16 x i32> %i, %j
351 ret <16 x i32> %x
352 }
354 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
355 ; CHECK-LABEL: vpmulld_test:
356 ; CHECK: ## BB#0:
357 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
358 ; CHECK-NEXT: retq
359 %x = mul <16 x i32> %i, %j
360 ret <16 x i32> %x
361 }
363 declare float @sqrtf(float) readnone
364 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
365 ; CHECK-LABEL: sqrtA:
366 ; CHECK: ## BB#0: ## %entry
367 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
368 ; CHECK-NEXT: retq
369 entry:
370 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
371 ret float %conv1
372 }
374 declare double @sqrt(double) readnone
375 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
376 ; CHECK-LABEL: sqrtB:
377 ; CHECK: ## BB#0: ## %entry
378 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
379 ; CHECK-NEXT: retq
380 entry:
381 %call = tail call double @sqrt(double %a) nounwind readnone
382 ret double %call
383 }
385 declare float @llvm.sqrt.f32(float)
386 define float @sqrtC(float %a) nounwind {
387 ; CHECK-LABEL: sqrtC:
388 ; CHECK: ## BB#0:
389 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
390 ; CHECK-NEXT: retq
391 %b = call float @llvm.sqrt.f32(float %a)
392 ret float %b
393 }
395 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
396 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
397 ; CHECK-LABEL: sqrtD:
398 ; CHECK: ## BB#0:
399 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0
400 ; CHECK-NEXT: retq
401 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
402 ret <16 x float> %b
403 }
405 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
406 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
407 ; CHECK-LABEL: sqrtE:
408 ; CHECK: ## BB#0:
409 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
410 ; CHECK-NEXT: retq
411 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
412 ret <8 x double> %b
413 }
415 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
416 ; CHECK-LABEL: fadd_broadcast:
417 ; CHECK: ## BB#0:
418 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
419 ; CHECK-NEXT: retq
420 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
421 ret <16 x float> %b
422 }
424 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
425 ; CHECK-LABEL: addq_broadcast:
426 ; CHECK: ## BB#0:
427 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
428 ; CHECK-NEXT: retq
429 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
430 ret <8 x i64> %b
431 }
433 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
434 ; CHECK-LABEL: orq_broadcast:
435 ; CHECK: ## BB#0:
436 ; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
437 ; CHECK-NEXT: retq
438 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
439 ret <8 x i64> %b
440 }
442 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
443 ; CHECK-LABEL: andd512fold:
444 ; CHECK: ## BB#0: ## %entry
445 ; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
446 ; CHECK-NEXT: retq
447 entry:
448 %a = load <16 x i32>* %x, align 4
449 %b = and <16 x i32> %y, %a
450 ret <16 x i32> %b
451 }
453 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
454 ; CHECK-LABEL: andqbrst:
455 ; CHECK: ## BB#0: ## %entry
456 ; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
457 ; CHECK-NEXT: retq
458 entry:
459 %a = load i64* %ap, align 8
460 %b = insertelement <8 x i64> undef, i64 %a, i32 0
461 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
462 %d = and <8 x i64> %p1, %c
463 ret <8 x i64>%d
464 }
466 ; CHECK-LABEL: test_mask_vaddps
467 ; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
468 ; CHECK: ret
469 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
470 <16 x float> %j, <16 x i32> %mask1)
471 nounwind readnone {
472 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
473 %x = fadd <16 x float> %i, %j
474 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
475 ret <16 x float> %r
476 }
478 ; CHECK-LABEL: test_mask_vmulps
479 ; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
480 ; CHECK: ret
481 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
482 <16 x float> %j, <16 x i32> %mask1)
483 nounwind readnone {
484 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
485 %x = fmul <16 x float> %i, %j
486 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
487 ret <16 x float> %r
488 }
490 ; CHECK-LABEL: test_mask_vminps
491 ; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
492 ; CHECK: ret
493 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
494 <16 x float> %j, <16 x i32> %mask1)
495 nounwind readnone {
496 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
497 %cmp_res = fcmp olt <16 x float> %i, %j
498 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
499 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
500 ret <16 x float> %r
501 }
503 ; CHECK-LABEL: test_mask_vminpd
504 ; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
505 ; CHECK: ret
506 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
507 <8 x double> %j, <8 x i32> %mask1)
508 nounwind readnone {
509 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
510 %cmp_res = fcmp olt <8 x double> %i, %j
511 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
512 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
513 ret <8 x double> %r
514 }
516 ; CHECK-LABEL: test_mask_vmaxps
517 ; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
518 ; CHECK: ret
519 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
520 <16 x float> %j, <16 x i32> %mask1)
521 nounwind readnone {
522 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
523 %cmp_res = fcmp ogt <16 x float> %i, %j
524 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
525 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
526 ret <16 x float> %r
527 }
529 ; CHECK-LABEL: test_mask_vmaxpd
530 ; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
531 ; CHECK: ret
532 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
533 <8 x double> %j, <8 x i32> %mask1)
534 nounwind readnone {
535 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
536 %cmp_res = fcmp ogt <8 x double> %i, %j
537 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
538 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
539 ret <8 x double> %r
540 }
542 ; CHECK-LABEL: test_mask_vsubps
543 ; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
544 ; CHECK: ret
545 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
546 <16 x float> %j, <16 x i32> %mask1)
547 nounwind readnone {
548 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
549 %x = fsub <16 x float> %i, %j
550 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
551 ret <16 x float> %r
552 }
554 ; CHECK-LABEL: test_mask_vdivps
555 ; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
556 ; CHECK: ret
557 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
558 <16 x float> %j, <16 x i32> %mask1)
559 nounwind readnone {
560 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
561 %x = fdiv <16 x float> %i, %j
562 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
563 ret <16 x float> %r
564 }
566 ; CHECK-LABEL: test_mask_vaddpd
567 ; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
568 ; CHECK: ret
569 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
570 <8 x double> %j, <8 x i64> %mask1)
571 nounwind readnone {
572 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
573 %x = fadd <8 x double> %i, %j
574 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
575 ret <8 x double> %r
576 }
578 ; CHECK-LABEL: test_maskz_vaddpd
579 ; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
580 ; CHECK: ret
581 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
582 <8 x i64> %mask1) nounwind readnone {
583 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
584 %x = fadd <8 x double> %i, %j
585 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
586 ret <8 x double> %r
587 }
589 ; CHECK-LABEL: test_mask_fold_vaddpd
590 ; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
591 ; CHECK: ret
592 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
593 <8 x double>* %j, <8 x i64> %mask1)
594 nounwind {
595 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
596 %tmp = load <8 x double>* %j, align 8
597 %x = fadd <8 x double> %i, %tmp
598 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
599 ret <8 x double> %r
600 }
602 ; CHECK-LABEL: test_maskz_fold_vaddpd
603 ; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
604 ; CHECK: ret
605 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
606 <8 x i64> %mask1) nounwind {
607 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
608 %tmp = load <8 x double>* %j, align 8
609 %x = fadd <8 x double> %i, %tmp
610 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
611 ret <8 x double> %r
612 }
614 ; CHECK-LABEL: test_broadcast_vaddpd
615 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
616 ; CHECK: ret
617 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
618 %tmp = load double* %j
619 %b = insertelement <8 x double> undef, double %tmp, i32 0
620 %c = shufflevector <8 x double> %b, <8 x double> undef,
621 <8 x i32> zeroinitializer
622 %x = fadd <8 x double> %c, %i
623 ret <8 x double> %x
624 }
626 ; CHECK-LABEL: test_mask_broadcast_vaddpd
627 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
628 ; CHECK: ret
629 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
630 double* %j, <8 x i64> %mask1) nounwind {
631 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
632 %tmp = load double* %j
633 %b = insertelement <8 x double> undef, double %tmp, i32 0
634 %c = shufflevector <8 x double> %b, <8 x double> undef,
635 <8 x i32> zeroinitializer
636 %x = fadd <8 x double> %c, %i
637 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
638 ret <8 x double> %r
639 }
641 ; CHECK-LABEL: test_maskz_broadcast_vaddpd
642 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
643 ; CHECK: ret
644 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
645 <8 x i64> %mask1) nounwind {
646 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
647 %tmp = load double* %j
648 %b = insertelement <8 x double> undef, double %tmp, i32 0
649 %c = shufflevector <8 x double> %b, <8 x double> undef,
650 <8 x i32> zeroinitializer
651 %x = fadd <8 x double> %c, %i
652 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
653 ret <8 x double> %r
654 }