• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
7
8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
9; CHECK-LABEL: addpd512:
10; CHECK:       ## BB#0: ## %entry
11; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
12; CHECK-NEXT:    retq
13entry:
14  %add.i = fadd <8 x double> %x, %y
15  ret <8 x double> %add.i
16}
17
18define <8 x double> @addpd512fold(<8 x double> %y) {
19; CHECK-LABEL: addpd512fold:
20; CHECK:       ## BB#0: ## %entry
21; CHECK-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
22; CHECK-NEXT:    retq
23entry:
24  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
25  ret <8 x double> %add.i
26}
27
28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
29; CHECK-LABEL: addps512:
30; CHECK:       ## BB#0: ## %entry
31; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
32; CHECK-NEXT:    retq
33entry:
34  %add.i = fadd <16 x float> %x, %y
35  ret <16 x float> %add.i
36}
37
38define <16 x float> @addps512fold(<16 x float> %y) {
39; CHECK-LABEL: addps512fold:
40; CHECK:       ## BB#0: ## %entry
41; CHECK-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
42; CHECK-NEXT:    retq
43entry:
44  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
45  ret <16 x float> %add.i
46}
47
48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
49; CHECK-LABEL: subpd512:
50; CHECK:       ## BB#0: ## %entry
51; CHECK-NEXT:    vsubpd %zmm0, %zmm1, %zmm0
52; CHECK-NEXT:    retq
53entry:
54  %sub.i = fsub <8 x double> %x, %y
55  ret <8 x double> %sub.i
56}
57
58define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
59; CHECK-LABEL: subpd512fold:
60; CHECK:       ## BB#0: ## %entry
61; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
62; CHECK-NEXT:    retq
63entry:
64  %tmp2 = load <8 x double>, <8 x double>* %x, align 8
65  %sub.i = fsub <8 x double> %y, %tmp2
66  ret <8 x double> %sub.i
67}
68
69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
70; CHECK-LABEL: subps512:
71; CHECK:       ## BB#0: ## %entry
72; CHECK-NEXT:    vsubps %zmm0, %zmm1, %zmm0
73; CHECK-NEXT:    retq
74entry:
75  %sub.i = fsub <16 x float> %x, %y
76  ret <16 x float> %sub.i
77}
78
79define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
80; CHECK-LABEL: subps512fold:
81; CHECK:       ## BB#0: ## %entry
82; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
83; CHECK-NEXT:    retq
84entry:
85  %tmp2 = load <16 x float>, <16 x float>* %x, align 4
86  %sub.i = fsub <16 x float> %y, %tmp2
87  ret <16 x float> %sub.i
88}
89
90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
91; AVX512F-LABEL: imulq512:
92; AVX512F:       ## BB#0:
93; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
94; AVX512F-NEXT:    vpsrlq $32, %zmm0, %zmm3
95; AVX512F-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
96; AVX512F-NEXT:    vpsllq $32, %zmm3, %zmm3
97; AVX512F-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
98; AVX512F-NEXT:    vpsrlq $32, %zmm1, %zmm1
99; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
100; AVX512F-NEXT:    vpsllq $32, %zmm0, %zmm0
101; AVX512F-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
102; AVX512F-NEXT:    retq
103;
104; AVX512VL-LABEL: imulq512:
105; AVX512VL:       ## BB#0:
106; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
107; AVX512VL-NEXT:    vpsrlq $32, %zmm0, %zmm3
108; AVX512VL-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
109; AVX512VL-NEXT:    vpsllq $32, %zmm3, %zmm3
110; AVX512VL-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
111; AVX512VL-NEXT:    vpsrlq $32, %zmm1, %zmm1
112; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
113; AVX512VL-NEXT:    vpsllq $32, %zmm0, %zmm0
114; AVX512VL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
115; AVX512VL-NEXT:    retq
116;
117; AVX512BW-LABEL: imulq512:
118; AVX512BW:       ## BB#0:
119; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
120; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm3
121; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
122; AVX512BW-NEXT:    vpsllq $32, %zmm3, %zmm3
123; AVX512BW-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
124; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm1
125; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
126; AVX512BW-NEXT:    vpsllq $32, %zmm0, %zmm0
127; AVX512BW-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
128; AVX512BW-NEXT:    retq
129;
130; AVX512DQ-LABEL: imulq512:
131; AVX512DQ:       ## BB#0:
132; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
133; AVX512DQ-NEXT:    retq
134;
135; SKX-LABEL: imulq512:
136; SKX:       ## BB#0:
137; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
138; SKX-NEXT:    retq
139  %z = mul <8 x i64>%x, %y
140  ret <8 x i64>%z
141}
142
143define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
144; CHECK-LABEL: mulpd512:
145; CHECK:       ## BB#0: ## %entry
146; CHECK-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
147; CHECK-NEXT:    retq
148entry:
149  %mul.i = fmul <8 x double> %x, %y
150  ret <8 x double> %mul.i
151}
152
153define <8 x double> @mulpd512fold(<8 x double> %y) {
154; CHECK-LABEL: mulpd512fold:
155; CHECK:       ## BB#0: ## %entry
156; CHECK-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0
157; CHECK-NEXT:    retq
158entry:
159  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
160  ret <8 x double> %mul.i
161}
162
163define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
164; CHECK-LABEL: mulps512:
165; CHECK:       ## BB#0: ## %entry
166; CHECK-NEXT:    vmulps %zmm0, %zmm1, %zmm0
167; CHECK-NEXT:    retq
168entry:
169  %mul.i = fmul <16 x float> %x, %y
170  ret <16 x float> %mul.i
171}
172
173define <16 x float> @mulps512fold(<16 x float> %y) {
174; CHECK-LABEL: mulps512fold:
175; CHECK:       ## BB#0: ## %entry
176; CHECK-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
177; CHECK-NEXT:    retq
178entry:
179  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
180  ret <16 x float> %mul.i
181}
182
183define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
184; CHECK-LABEL: divpd512:
185; CHECK:       ## BB#0: ## %entry
186; CHECK-NEXT:    vdivpd %zmm0, %zmm1, %zmm0
187; CHECK-NEXT:    retq
188entry:
189  %div.i = fdiv <8 x double> %x, %y
190  ret <8 x double> %div.i
191}
192
193define <8 x double> @divpd512fold(<8 x double> %y) {
194; CHECK-LABEL: divpd512fold:
195; CHECK:       ## BB#0: ## %entry
196; CHECK-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0
197; CHECK-NEXT:    retq
198entry:
199  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
200  ret <8 x double> %div.i
201}
202
203define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
204; CHECK-LABEL: divps512:
205; CHECK:       ## BB#0: ## %entry
206; CHECK-NEXT:    vdivps %zmm0, %zmm1, %zmm0
207; CHECK-NEXT:    retq
208entry:
209  %div.i = fdiv <16 x float> %x, %y
210  ret <16 x float> %div.i
211}
212
213define <16 x float> @divps512fold(<16 x float> %y) {
214; CHECK-LABEL: divps512fold:
215; CHECK:       ## BB#0: ## %entry
216; CHECK-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0
217; CHECK-NEXT:    retq
218entry:
219  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
220  ret <16 x float> %div.i
221}
222
223define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
224; CHECK-LABEL: vpaddq_test:
225; CHECK:       ## BB#0:
226; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
227; CHECK-NEXT:    retq
228  %x = add <8 x i64> %i, %j
229  ret <8 x i64> %x
230}
231
232define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
233; CHECK-LABEL: vpaddq_fold_test:
234; CHECK:       ## BB#0:
235; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
236; CHECK-NEXT:    retq
237  %tmp = load <8 x i64>, <8 x i64>* %j, align 4
238  %x = add <8 x i64> %i, %tmp
239  ret <8 x i64> %x
240}
241
242define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
243; CHECK-LABEL: vpaddq_broadcast_test:
244; CHECK:       ## BB#0:
245; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
246; CHECK-NEXT:    retq
247  %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
248  ret <8 x i64> %x
249}
250
251define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
252; CHECK-LABEL: vpaddq_broadcast2_test:
253; CHECK:       ## BB#0:
254; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
255; CHECK-NEXT:    retq
256  %tmp = load i64, i64* %j
257  %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
258  %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
259  %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
260  %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
261  %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
262  %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
263  %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
264  %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
265  %x = add <8 x i64> %i, %j.7
266  ret <8 x i64> %x
267}
268
269define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
270; CHECK-LABEL: vpaddd_test:
271; CHECK:       ## BB#0:
272; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
273; CHECK-NEXT:    retq
274  %x = add <16 x i32> %i, %j
275  ret <16 x i32> %x
276}
277
278define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
279; CHECK-LABEL: vpaddd_fold_test:
280; CHECK:       ## BB#0:
281; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
282; CHECK-NEXT:    retq
283  %tmp = load <16 x i32>, <16 x i32>* %j, align 4
284  %x = add <16 x i32> %i, %tmp
285  ret <16 x i32> %x
286}
287
288define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
289; CHECK-LABEL: vpaddd_broadcast_test:
290; CHECK:       ## BB#0:
291; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
292; CHECK-NEXT:    retq
293  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
294  ret <16 x i32> %x
295}
296
297define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
298; CHECK-LABEL: vpaddd_mask_test:
299; CHECK:       ## BB#0:
300; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
301; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
302; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1}
303; CHECK-NEXT:    retq
304  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
305  %x = add <16 x i32> %i, %j
306  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
307  ret <16 x i32> %r
308}
309
310define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
311; CHECK-LABEL: vpaddd_maskz_test:
312; CHECK:       ## BB#0:
313; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
314; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
315; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
316; CHECK-NEXT:    retq
317  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
318  %x = add <16 x i32> %i, %j
319  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
320  ret <16 x i32> %r
321}
322
323define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
324; CHECK-LABEL: vpaddd_mask_fold_test:
325; CHECK:       ## BB#0:
326; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
327; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
328; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
329; CHECK-NEXT:    retq
330  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
331  %j = load <16 x i32>, <16 x i32>* %j.ptr
332  %x = add <16 x i32> %i, %j
333  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
334  ret <16 x i32> %r
335}
336
337define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
338; CHECK-LABEL: vpaddd_mask_broadcast_test:
339; CHECK:       ## BB#0:
340; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
341; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
342; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
343; CHECK-NEXT:    retq
344  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
345  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
346  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
347  ret <16 x i32> %r
348}
349
350define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
351; CHECK-LABEL: vpaddd_maskz_fold_test:
352; CHECK:       ## BB#0:
353; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
354; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
355; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
356; CHECK-NEXT:    retq
357  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
358  %j = load <16 x i32>, <16 x i32>* %j.ptr
359  %x = add <16 x i32> %i, %j
360  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
361  ret <16 x i32> %r
362}
363
364define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
365; CHECK-LABEL: vpaddd_maskz_broadcast_test:
366; CHECK:       ## BB#0:
367; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
368; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
369; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
370; CHECK-NEXT:    retq
371  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
372  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
373  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
374  ret <16 x i32> %r
375}
376
377define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
378; CHECK-LABEL: vpsubq_test:
379; CHECK:       ## BB#0:
380; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
381; CHECK-NEXT:    retq
382  %x = sub <8 x i64> %i, %j
383  ret <8 x i64> %x
384}
385
386define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
387; CHECK-LABEL: vpsubd_test:
388; CHECK:       ## BB#0:
389; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
390; CHECK-NEXT:    retq
391  %x = sub <16 x i32> %i, %j
392  ret <16 x i32> %x
393}
394
395define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
396; CHECK-LABEL: vpmulld_test:
397; CHECK:       ## BB#0:
398; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
399; CHECK-NEXT:    retq
400  %x = mul <16 x i32> %i, %j
401  ret <16 x i32> %x
402}
403
404declare float @sqrtf(float) readnone
405define float @sqrtA(float %a) nounwind uwtable readnone ssp {
406; CHECK-LABEL: sqrtA:
407; CHECK:       ## BB#0: ## %entry
408; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
409; CHECK-NEXT:    retq
410entry:
411  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
412  ret float %conv1
413}
414
415declare double @sqrt(double) readnone
416define double @sqrtB(double %a) nounwind uwtable readnone ssp {
417; CHECK-LABEL: sqrtB:
418; CHECK:       ## BB#0: ## %entry
419; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
420; CHECK-NEXT:    retq
421entry:
422  %call = tail call double @sqrt(double %a) nounwind readnone
423  ret double %call
424}
425
426declare float @llvm.sqrt.f32(float)
427define float @sqrtC(float %a) nounwind {
428; CHECK-LABEL: sqrtC:
429; CHECK:       ## BB#0:
430; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
431; CHECK-NEXT:    retq
432  %b = call float @llvm.sqrt.f32(float %a)
433  ret float %b
434}
435
436declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
437define <16 x float> @sqrtD(<16 x float> %a) nounwind {
438; CHECK-LABEL: sqrtD:
439; CHECK:       ## BB#0:
440; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
441; CHECK-NEXT:    retq
442  %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
443  ret <16 x float> %b
444}
445
446declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
447define <8 x double> @sqrtE(<8 x double> %a) nounwind {
448; CHECK-LABEL: sqrtE:
449; CHECK:       ## BB#0:
450; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
451; CHECK-NEXT:    retq
452  %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
453  ret <8 x double> %b
454}
455
456define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
457; CHECK-LABEL: fadd_broadcast:
458; CHECK:       ## BB#0:
459; CHECK-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
460; CHECK-NEXT:    retq
461  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
462  ret <16 x float> %b
463}
464
465define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
466; CHECK-LABEL: addq_broadcast:
467; CHECK:       ## BB#0:
468; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
469; CHECK-NEXT:    retq
470  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
471  ret <8 x i64> %b
472}
473
474define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
475; CHECK-LABEL: orq_broadcast:
476; CHECK:       ## BB#0:
477; CHECK-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
478; CHECK-NEXT:    retq
479  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
480  ret <8 x i64> %b
481}
482
483define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
484; CHECK-LABEL: andd512fold:
485; CHECK:       ## BB#0: ## %entry
486; CHECK-NEXT:    vpandd (%rdi), %zmm0, %zmm0
487; CHECK-NEXT:    retq
488entry:
489  %a = load <16 x i32>, <16 x i32>* %x, align 4
490  %b = and <16 x i32> %y, %a
491  ret <16 x i32> %b
492}
493
494define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
495; CHECK-LABEL: andqbrst:
496; CHECK:       ## BB#0: ## %entry
497; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
498; CHECK-NEXT:    retq
499entry:
500  %a = load i64, i64* %ap, align 8
501  %b = insertelement <8 x i64> undef, i64 %a, i32 0
502  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
503  %d = and <8 x i64> %p1, %c
504  ret <8 x i64>%d
505}
506
507define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
508; CHECK-LABEL: test_mask_vaddps:
509; CHECK:       ## BB#0:
510; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
511; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
512; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1}
513; CHECK-NEXT:    retq
514                                     <16 x float> %j, <16 x i32> %mask1)
515                                     nounwind readnone {
516  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
517  %x = fadd <16 x float> %i, %j
518  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
519  ret <16 x float> %r
520}
521
522define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
523; CHECK-LABEL: test_mask_vmulps:
524; CHECK:       ## BB#0:
525; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
526; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
527; CHECK-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1}
528; CHECK-NEXT:    retq
529                                     <16 x float> %j, <16 x i32> %mask1)
530                                     nounwind readnone {
531  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
532  %x = fmul <16 x float> %i, %j
533  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
534  ret <16 x float> %r
535}
536
537define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
538; CHECK-LABEL: test_mask_vminps:
539; CHECK:       ## BB#0:
540; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
541; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
542; CHECK-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
543; CHECK-NEXT:    retq
544                                     <16 x float> %j, <16 x i32> %mask1)
545                                     nounwind readnone {
546  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
547  %cmp_res = fcmp olt <16 x float> %i, %j
548  %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
549  %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
550  ret <16 x float> %r
551}
552
553define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
554; AVX512F-LABEL: test_mask_vminpd:
555; AVX512F:       ## BB#0:
556; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
557; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
558; AVX512F-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
559; AVX512F-NEXT:    retq
560;
561; AVX512VL-LABEL: test_mask_vminpd:
562; AVX512VL:       ## BB#0:
563; AVX512VL-NEXT:    vpxor %ymm4, %ymm4, %ymm4
564; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
565; AVX512VL-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
566; AVX512VL-NEXT:    retq
567;
568; AVX512BW-LABEL: test_mask_vminpd:
569; AVX512BW:       ## BB#0:
570; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
571; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
572; AVX512BW-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
573; AVX512BW-NEXT:    retq
574;
575; AVX512DQ-LABEL: test_mask_vminpd:
576; AVX512DQ:       ## BB#0:
577; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
578; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
579; AVX512DQ-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
580; AVX512DQ-NEXT:    retq
581;
582; SKX-LABEL: test_mask_vminpd:
583; SKX:       ## BB#0:
584; SKX-NEXT:    vpxor %ymm4, %ymm4, %ymm4
585; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
586; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
587; SKX-NEXT:    retq
588                                     <8 x double> %j, <8 x i32> %mask1)
589                                     nounwind readnone {
590  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
591  %cmp_res = fcmp olt <8 x double> %i, %j
592  %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
593  %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
594  ret <8 x double> %r
595}
596
597define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
598; CHECK-LABEL: test_mask_vmaxps:
599; CHECK:       ## BB#0:
600; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
601; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
602; CHECK-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
603; CHECK-NEXT:    retq
604                                     <16 x float> %j, <16 x i32> %mask1)
605                                     nounwind readnone {
606  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
607  %cmp_res = fcmp ogt <16 x float> %i, %j
608  %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
609  %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
610  ret <16 x float> %r
611}
612
613define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
614; AVX512F-LABEL: test_mask_vmaxpd:
615; AVX512F:       ## BB#0:
616; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
617; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
618; AVX512F-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
619; AVX512F-NEXT:    retq
620;
621; AVX512VL-LABEL: test_mask_vmaxpd:
622; AVX512VL:       ## BB#0:
623; AVX512VL-NEXT:    vpxor %ymm4, %ymm4, %ymm4
624; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
625; AVX512VL-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
626; AVX512VL-NEXT:    retq
627;
628; AVX512BW-LABEL: test_mask_vmaxpd:
629; AVX512BW:       ## BB#0:
630; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
631; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
632; AVX512BW-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
633; AVX512BW-NEXT:    retq
634;
635; AVX512DQ-LABEL: test_mask_vmaxpd:
636; AVX512DQ:       ## BB#0:
637; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
638; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
639; AVX512DQ-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
640; AVX512DQ-NEXT:    retq
641;
642; SKX-LABEL: test_mask_vmaxpd:
643; SKX:       ## BB#0:
644; SKX-NEXT:    vpxor %ymm4, %ymm4, %ymm4
645; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
646; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
647; SKX-NEXT:    retq
648                                     <8 x double> %j, <8 x i32> %mask1)
649                                     nounwind readnone {
650  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
651  %cmp_res = fcmp ogt <8 x double> %i, %j
652  %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
653  %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
654  ret <8 x double> %r
655}
656
657define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
658; CHECK-LABEL: test_mask_vsubps:
659; CHECK:       ## BB#0:
660; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
661; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
662; CHECK-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1}
663; CHECK-NEXT:    retq
664                                     <16 x float> %j, <16 x i32> %mask1)
665                                     nounwind readnone {
666  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
667  %x = fsub <16 x float> %i, %j
668  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
669  ret <16 x float> %r
670}
671
672define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
673; CHECK-LABEL: test_mask_vdivps:
674; CHECK:       ## BB#0:
675; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
676; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
677; CHECK-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1}
678; CHECK-NEXT:    retq
679                                     <16 x float> %j, <16 x i32> %mask1)
680                                     nounwind readnone {
681  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
682  %x = fdiv <16 x float> %i, %j
683  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
684  ret <16 x float> %r
685}
686
687define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
688; CHECK-LABEL: test_mask_vaddpd:
689; CHECK:       ## BB#0:
690; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
691; CHECK-NEXT:    vpcmpneqq %zmm4, %zmm3, %k1
692; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1}
693; CHECK-NEXT:    retq
694                                     <8 x double> %j, <8 x i64> %mask1)
695                                     nounwind readnone {
696  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
697  %x = fadd <8 x double> %i, %j
698  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
699  ret <8 x double> %r
700}
701
702define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
703; CHECK-LABEL: test_maskz_vaddpd:
704; CHECK:       ## BB#0:
705; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
706; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
707; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
708; CHECK-NEXT:    retq
709                                      <8 x i64> %mask1) nounwind readnone {
710  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
711  %x = fadd <8 x double> %i, %j
712  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
713  ret <8 x double> %r
714}
715
716define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
717; CHECK-LABEL: test_mask_fold_vaddpd:
718; CHECK:       ## BB#0:
719; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
720; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
721; CHECK-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1}
722; CHECK-NEXT:    retq
723                                     <8 x double>* %j,  <8 x i64> %mask1)
724                                     nounwind {
725  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
726  %tmp = load <8 x double>, <8 x double>* %j, align 8
727  %x = fadd <8 x double> %i, %tmp
728  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
729  ret <8 x double> %r
730}
731
732define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
733; CHECK-LABEL: test_maskz_fold_vaddpd:
734; CHECK:       ## BB#0:
735; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
736; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
737; CHECK-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
738; CHECK-NEXT:    retq
739                                      <8 x i64> %mask1) nounwind {
740  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
741  %tmp = load <8 x double>, <8 x double>* %j, align 8
742  %x = fadd <8 x double> %i, %tmp
743  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
744  ret <8 x double> %r
745}
746
747define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
748; CHECK-LABEL: test_broadcast_vaddpd:
749; CHECK:       ## BB#0:
750; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0
751; CHECK-NEXT:    retq
752  %tmp = load double, double* %j
753  %b = insertelement <8 x double> undef, double %tmp, i32 0
754  %c = shufflevector <8 x double> %b, <8 x double> undef,
755                     <8 x i32> zeroinitializer
756  %x = fadd <8 x double> %c, %i
757  ret <8 x double> %x
758}
759
760define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
761; CHECK-LABEL: test_mask_broadcast_vaddpd:
762; CHECK:       ## BB#0:
763; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
764; CHECK-NEXT:    vpcmpneqq %zmm0, %zmm2, %k1
765; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
766; CHECK-NEXT:    vmovaps %zmm1, %zmm0
767; CHECK-NEXT:    retq
768                                      double* %j, <8 x i64> %mask1) nounwind {
769  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
770  %tmp = load double, double* %j
771  %b = insertelement <8 x double> undef, double %tmp, i32 0
772  %c = shufflevector <8 x double> %b, <8 x double> undef,
773                     <8 x i32> zeroinitializer
774  %x = fadd <8 x double> %c, %i
775  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
776  ret <8 x double> %r
777}
778
779define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
780; CHECK-LABEL: test_maskz_broadcast_vaddpd:
781; CHECK:       ## BB#0:
782; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
783; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
784; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
785; CHECK-NEXT:    retq
786                                       <8 x i64> %mask1) nounwind {
787  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
788  %tmp = load double, double* %j
789  %b = insertelement <8 x double> undef, double %tmp, i32 0
790  %c = shufflevector <8 x double> %b, <8 x double> undef,
791                     <8 x i32> zeroinitializer
792  %x = fadd <8 x double> %c, %i
793  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
794  ret <8 x double> %r
795}
796
797define <16 x float>  @test_fxor(<16 x float> %a) {
798; AVX512F-LABEL: test_fxor:
799; AVX512F:       ## BB#0:
800; AVX512F-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
801; AVX512F-NEXT:    retq
802;
803; AVX512VL-LABEL: test_fxor:
804; AVX512VL:       ## BB#0:
805; AVX512VL-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
806; AVX512VL-NEXT:    retq
807;
808; AVX512BW-LABEL: test_fxor:
809; AVX512BW:       ## BB#0:
810; AVX512BW-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
811; AVX512BW-NEXT:    retq
812;
813; AVX512DQ-LABEL: test_fxor:
814; AVX512DQ:       ## BB#0:
815; AVX512DQ-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
816; AVX512DQ-NEXT:    retq
817;
818; SKX-LABEL: test_fxor:
819; SKX:       ## BB#0:
820; SKX-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
821; SKX-NEXT:    retq
822
823  %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
824  ret <16 x float>%res
825}
826
827define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
828; CHECK-LABEL: test_fxor_8f32:
829; CHECK:       ## BB#0:
830; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
831; CHECK-NEXT:    retq
832  %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
833  ret <8 x float>%res
834}
835
836define <8 x double> @fabs_v8f64(<8 x double> %p)
837; AVX512F-LABEL: fabs_v8f64:
838; AVX512F:       ## BB#0:
839; AVX512F-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
840; AVX512F-NEXT:    retq
841;
842; AVX512VL-LABEL: fabs_v8f64:
843; AVX512VL:       ## BB#0:
844; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
845; AVX512VL-NEXT:    retq
846;
847; AVX512BW-LABEL: fabs_v8f64:
848; AVX512BW:       ## BB#0:
849; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
850; AVX512BW-NEXT:    retq
851;
852; AVX512DQ-LABEL: fabs_v8f64:
853; AVX512DQ:       ## BB#0:
854; AVX512DQ-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
855; AVX512DQ-NEXT:    retq
856;
857; SKX-LABEL: fabs_v8f64:
858; SKX:       ## BB#0:
859; SKX-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
860; SKX-NEXT:    retq
861{
862  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
863  ret <8 x double> %t
864}
865declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
866
867define <16 x float> @fabs_v16f32(<16 x float> %p)
868; AVX512F-LABEL: fabs_v16f32:
869; AVX512F:       ## BB#0:
870; AVX512F-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
871; AVX512F-NEXT:    retq
872;
873; AVX512VL-LABEL: fabs_v16f32:
874; AVX512VL:       ## BB#0:
875; AVX512VL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
876; AVX512VL-NEXT:    retq
877;
878; AVX512BW-LABEL: fabs_v16f32:
879; AVX512BW:       ## BB#0:
880; AVX512BW-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
881; AVX512BW-NEXT:    retq
882;
883; AVX512DQ-LABEL: fabs_v16f32:
884; AVX512DQ:       ## BB#0:
885; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
886; AVX512DQ-NEXT:    retq
887;
888; SKX-LABEL: fabs_v16f32:
889; SKX:       ## BB#0:
890; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
891; SKX-NEXT:    retq
892{
893  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
894  ret <16 x float> %t
895}
896declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
897