• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
7
8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
9; CHECK-LABEL: addpd512:
10; CHECK:       ## BB#0: ## %entry
11; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
12; CHECK-NEXT:    retq
13entry:
14  %add.i = fadd <8 x double> %x, %y
15  ret <8 x double> %add.i
16}
17
18define <8 x double> @addpd512fold(<8 x double> %y) {
19; CHECK-LABEL: addpd512fold:
20; CHECK:       ## BB#0: ## %entry
21; CHECK-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
22; CHECK-NEXT:    retq
23entry:
24  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
25  ret <8 x double> %add.i
26}
27
28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
29; CHECK-LABEL: addps512:
30; CHECK:       ## BB#0: ## %entry
31; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
32; CHECK-NEXT:    retq
33entry:
34  %add.i = fadd <16 x float> %x, %y
35  ret <16 x float> %add.i
36}
37
38define <16 x float> @addps512fold(<16 x float> %y) {
39; CHECK-LABEL: addps512fold:
40; CHECK:       ## BB#0: ## %entry
41; CHECK-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
42; CHECK-NEXT:    retq
43entry:
44  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
45  ret <16 x float> %add.i
46}
47
48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
49; CHECK-LABEL: subpd512:
50; CHECK:       ## BB#0: ## %entry
51; CHECK-NEXT:    vsubpd %zmm0, %zmm1, %zmm0
52; CHECK-NEXT:    retq
53entry:
54  %sub.i = fsub <8 x double> %x, %y
55  ret <8 x double> %sub.i
56}
57
58define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
59; CHECK-LABEL: subpd512fold:
60; CHECK:       ## BB#0: ## %entry
61; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
62; CHECK-NEXT:    retq
63entry:
64  %tmp2 = load <8 x double>, <8 x double>* %x, align 8
65  %sub.i = fsub <8 x double> %y, %tmp2
66  ret <8 x double> %sub.i
67}
68
69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
70; CHECK-LABEL: subps512:
71; CHECK:       ## BB#0: ## %entry
72; CHECK-NEXT:    vsubps %zmm0, %zmm1, %zmm0
73; CHECK-NEXT:    retq
74entry:
75  %sub.i = fsub <16 x float> %x, %y
76  ret <16 x float> %sub.i
77}
78
79define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
80; CHECK-LABEL: subps512fold:
81; CHECK:       ## BB#0: ## %entry
82; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
83; CHECK-NEXT:    retq
84entry:
85  %tmp2 = load <16 x float>, <16 x float>* %x, align 4
86  %sub.i = fsub <16 x float> %y, %tmp2
87  ret <16 x float> %sub.i
88}
89
90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
91; AVX512F-LABEL: imulq512:
92; AVX512F:       ## BB#0:
93; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
94; AVX512F-NEXT:    vpsrlq $32, %zmm0, %zmm3
95; AVX512F-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
96; AVX512F-NEXT:    vpsllq $32, %zmm3, %zmm3
97; AVX512F-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
98; AVX512F-NEXT:    vpsrlq $32, %zmm1, %zmm1
99; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
100; AVX512F-NEXT:    vpsllq $32, %zmm0, %zmm0
101; AVX512F-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
102; AVX512F-NEXT:    retq
103;
104; AVX512VL-LABEL: imulq512:
105; AVX512VL:       ## BB#0:
106; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
107; AVX512VL-NEXT:    vpsrlq $32, %zmm0, %zmm3
108; AVX512VL-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
109; AVX512VL-NEXT:    vpsllq $32, %zmm3, %zmm3
110; AVX512VL-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
111; AVX512VL-NEXT:    vpsrlq $32, %zmm1, %zmm1
112; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
113; AVX512VL-NEXT:    vpsllq $32, %zmm0, %zmm0
114; AVX512VL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
115; AVX512VL-NEXT:    retq
116;
117; AVX512BW-LABEL: imulq512:
118; AVX512BW:       ## BB#0:
119; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
120; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm3
121; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
122; AVX512BW-NEXT:    vpsllq $32, %zmm3, %zmm3
123; AVX512BW-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
124; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm1
125; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
126; AVX512BW-NEXT:    vpsllq $32, %zmm0, %zmm0
127; AVX512BW-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
128; AVX512BW-NEXT:    retq
129;
130; AVX512DQ-LABEL: imulq512:
131; AVX512DQ:       ## BB#0:
132; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
133; AVX512DQ-NEXT:    retq
134;
135; SKX-LABEL: imulq512:
136; SKX:       ## BB#0:
137; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
138; SKX-NEXT:    retq
139  %z = mul <8 x i64>%x, %y
140  ret <8 x i64>%z
141}
142
143define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
144; AVX512F-LABEL: imulq256:
145; AVX512F:       ## BB#0:
146; AVX512F-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
147; AVX512F-NEXT:    vpsrlq $32, %ymm0, %ymm3
148; AVX512F-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
149; AVX512F-NEXT:    vpsllq $32, %ymm3, %ymm3
150; AVX512F-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
151; AVX512F-NEXT:    vpsrlq $32, %ymm1, %ymm1
152; AVX512F-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
153; AVX512F-NEXT:    vpsllq $32, %ymm0, %ymm0
154; AVX512F-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
155; AVX512F-NEXT:    retq
156;
157; AVX512VL-LABEL: imulq256:
158; AVX512VL:       ## BB#0:
159; AVX512VL-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
160; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm3
161; AVX512VL-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
162; AVX512VL-NEXT:    vpsllq $32, %ymm3, %ymm3
163; AVX512VL-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
164; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm1
165; AVX512VL-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
166; AVX512VL-NEXT:    vpsllq $32, %ymm0, %ymm0
167; AVX512VL-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
168; AVX512VL-NEXT:    retq
169;
170; AVX512BW-LABEL: imulq256:
171; AVX512BW:       ## BB#0:
172; AVX512BW-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
173; AVX512BW-NEXT:    vpsrlq $32, %ymm0, %ymm3
174; AVX512BW-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
175; AVX512BW-NEXT:    vpsllq $32, %ymm3, %ymm3
176; AVX512BW-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
177; AVX512BW-NEXT:    vpsrlq $32, %ymm1, %ymm1
178; AVX512BW-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
179; AVX512BW-NEXT:    vpsllq $32, %ymm0, %ymm0
180; AVX512BW-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
181; AVX512BW-NEXT:    retq
182;
183; AVX512DQ-LABEL: imulq256:
184; AVX512DQ:       ## BB#0:
185; AVX512DQ-NEXT:    vpmuludq %ymm0, %ymm1, %ymm2
186; AVX512DQ-NEXT:    vpsrlq $32, %ymm0, %ymm3
187; AVX512DQ-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
188; AVX512DQ-NEXT:    vpsllq $32, %ymm3, %ymm3
189; AVX512DQ-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
190; AVX512DQ-NEXT:    vpsrlq $32, %ymm1, %ymm1
191; AVX512DQ-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
192; AVX512DQ-NEXT:    vpsllq $32, %ymm0, %ymm0
193; AVX512DQ-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
194; AVX512DQ-NEXT:    retq
195;
196; SKX-LABEL: imulq256:
197; SKX:       ## BB#0:
198; SKX-NEXT:    vpmullq %ymm0, %ymm1, %ymm0
199; SKX-NEXT:    retq
200  %z = mul <4 x i64>%x, %y
201  ret <4 x i64>%z
202}
203
204define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
205; AVX512F-LABEL: imulq128:
206; AVX512F:       ## BB#0:
207; AVX512F-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
208; AVX512F-NEXT:    vpsrlq $32, %xmm0, %xmm3
209; AVX512F-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
210; AVX512F-NEXT:    vpsllq $32, %xmm3, %xmm3
211; AVX512F-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
212; AVX512F-NEXT:    vpsrlq $32, %xmm1, %xmm1
213; AVX512F-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
214; AVX512F-NEXT:    vpsllq $32, %xmm0, %xmm0
215; AVX512F-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
216; AVX512F-NEXT:    retq
217;
218; AVX512VL-LABEL: imulq128:
219; AVX512VL:       ## BB#0:
220; AVX512VL-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
221; AVX512VL-NEXT:    vpsrlq $32, %xmm0, %xmm3
222; AVX512VL-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
223; AVX512VL-NEXT:    vpsllq $32, %xmm3, %xmm3
224; AVX512VL-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
225; AVX512VL-NEXT:    vpsrlq $32, %xmm1, %xmm1
226; AVX512VL-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
227; AVX512VL-NEXT:    vpsllq $32, %xmm0, %xmm0
228; AVX512VL-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
229; AVX512VL-NEXT:    retq
230;
231; AVX512BW-LABEL: imulq128:
232; AVX512BW:       ## BB#0:
233; AVX512BW-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
234; AVX512BW-NEXT:    vpsrlq $32, %xmm0, %xmm3
235; AVX512BW-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
236; AVX512BW-NEXT:    vpsllq $32, %xmm3, %xmm3
237; AVX512BW-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
238; AVX512BW-NEXT:    vpsrlq $32, %xmm1, %xmm1
239; AVX512BW-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
240; AVX512BW-NEXT:    vpsllq $32, %xmm0, %xmm0
241; AVX512BW-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
242; AVX512BW-NEXT:    retq
243;
244; AVX512DQ-LABEL: imulq128:
245; AVX512DQ:       ## BB#0:
246; AVX512DQ-NEXT:    vpmuludq %xmm0, %xmm1, %xmm2
247; AVX512DQ-NEXT:    vpsrlq $32, %xmm0, %xmm3
248; AVX512DQ-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
249; AVX512DQ-NEXT:    vpsllq $32, %xmm3, %xmm3
250; AVX512DQ-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
251; AVX512DQ-NEXT:    vpsrlq $32, %xmm1, %xmm1
252; AVX512DQ-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
253; AVX512DQ-NEXT:    vpsllq $32, %xmm0, %xmm0
254; AVX512DQ-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
255; AVX512DQ-NEXT:    retq
256;
257; SKX-LABEL: imulq128:
258; SKX:       ## BB#0:
259; SKX-NEXT:    vpmullq %xmm0, %xmm1, %xmm0
260; SKX-NEXT:    retq
261  %z = mul <2 x i64>%x, %y
262  ret <2 x i64>%z
263}
264
265define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
266; CHECK-LABEL: mulpd512:
267; CHECK:       ## BB#0: ## %entry
268; CHECK-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
269; CHECK-NEXT:    retq
270entry:
271  %mul.i = fmul <8 x double> %x, %y
272  ret <8 x double> %mul.i
273}
274
275define <8 x double> @mulpd512fold(<8 x double> %y) {
276; CHECK-LABEL: mulpd512fold:
277; CHECK:       ## BB#0: ## %entry
278; CHECK-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0
279; CHECK-NEXT:    retq
280entry:
281  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
282  ret <8 x double> %mul.i
283}
284
285define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
286; CHECK-LABEL: mulps512:
287; CHECK:       ## BB#0: ## %entry
288; CHECK-NEXT:    vmulps %zmm0, %zmm1, %zmm0
289; CHECK-NEXT:    retq
290entry:
291  %mul.i = fmul <16 x float> %x, %y
292  ret <16 x float> %mul.i
293}
294
295define <16 x float> @mulps512fold(<16 x float> %y) {
296; CHECK-LABEL: mulps512fold:
297; CHECK:       ## BB#0: ## %entry
298; CHECK-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
299; CHECK-NEXT:    retq
300entry:
301  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
302  ret <16 x float> %mul.i
303}
304
305define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
306; CHECK-LABEL: divpd512:
307; CHECK:       ## BB#0: ## %entry
308; CHECK-NEXT:    vdivpd %zmm0, %zmm1, %zmm0
309; CHECK-NEXT:    retq
310entry:
311  %div.i = fdiv <8 x double> %x, %y
312  ret <8 x double> %div.i
313}
314
315define <8 x double> @divpd512fold(<8 x double> %y) {
316; CHECK-LABEL: divpd512fold:
317; CHECK:       ## BB#0: ## %entry
318; CHECK-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0
319; CHECK-NEXT:    retq
320entry:
321  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
322  ret <8 x double> %div.i
323}
324
325define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
326; CHECK-LABEL: divps512:
327; CHECK:       ## BB#0: ## %entry
328; CHECK-NEXT:    vdivps %zmm0, %zmm1, %zmm0
329; CHECK-NEXT:    retq
330entry:
331  %div.i = fdiv <16 x float> %x, %y
332  ret <16 x float> %div.i
333}
334
335define <16 x float> @divps512fold(<16 x float> %y) {
336; CHECK-LABEL: divps512fold:
337; CHECK:       ## BB#0: ## %entry
338; CHECK-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0
339; CHECK-NEXT:    retq
340entry:
341  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
342  ret <16 x float> %div.i
343}
344
345define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
346; CHECK-LABEL: vpaddq_test:
347; CHECK:       ## BB#0:
348; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
349; CHECK-NEXT:    retq
350  %x = add <8 x i64> %i, %j
351  ret <8 x i64> %x
352}
353
354define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
355; CHECK-LABEL: vpaddq_fold_test:
356; CHECK:       ## BB#0:
357; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
358; CHECK-NEXT:    retq
359  %tmp = load <8 x i64>, <8 x i64>* %j, align 4
360  %x = add <8 x i64> %i, %tmp
361  ret <8 x i64> %x
362}
363
364define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
365; CHECK-LABEL: vpaddq_broadcast_test:
366; CHECK:       ## BB#0:
367; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
368; CHECK-NEXT:    retq
369  %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
370  ret <8 x i64> %x
371}
372
373define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
374; CHECK-LABEL: vpaddq_broadcast2_test:
375; CHECK:       ## BB#0:
376; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
377; CHECK-NEXT:    retq
378  %tmp = load i64, i64* %j
379  %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
380  %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
381  %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
382  %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
383  %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
384  %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
385  %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
386  %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
387  %x = add <8 x i64> %i, %j.7
388  ret <8 x i64> %x
389}
390
391define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
392; CHECK-LABEL: vpaddd_test:
393; CHECK:       ## BB#0:
394; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
395; CHECK-NEXT:    retq
396  %x = add <16 x i32> %i, %j
397  ret <16 x i32> %x
398}
399
400define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
401; CHECK-LABEL: vpaddd_fold_test:
402; CHECK:       ## BB#0:
403; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
404; CHECK-NEXT:    retq
405  %tmp = load <16 x i32>, <16 x i32>* %j, align 4
406  %x = add <16 x i32> %i, %tmp
407  ret <16 x i32> %x
408}
409
410define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
411; CHECK-LABEL: vpaddd_broadcast_test:
412; CHECK:       ## BB#0:
413; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
414; CHECK-NEXT:    retq
415  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
416  ret <16 x i32> %x
417}
418
419define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
420; CHECK-LABEL: vpaddd_mask_test:
421; CHECK:       ## BB#0:
422; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
423; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
424; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1}
425; CHECK-NEXT:    retq
426  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
427  %x = add <16 x i32> %i, %j
428  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
429  ret <16 x i32> %r
430}
431
432define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
433; CHECK-LABEL: vpaddd_maskz_test:
434; CHECK:       ## BB#0:
435; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
436; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
437; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
438; CHECK-NEXT:    retq
439  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
440  %x = add <16 x i32> %i, %j
441  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
442  ret <16 x i32> %r
443}
444
445define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
446; CHECK-LABEL: vpaddd_mask_fold_test:
447; CHECK:       ## BB#0:
448; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
449; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
450; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
451; CHECK-NEXT:    retq
452  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
453  %j = load <16 x i32>, <16 x i32>* %j.ptr
454  %x = add <16 x i32> %i, %j
455  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
456  ret <16 x i32> %r
457}
458
459define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
460; CHECK-LABEL: vpaddd_mask_broadcast_test:
461; CHECK:       ## BB#0:
462; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
463; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
464; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
465; CHECK-NEXT:    retq
466  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
467  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
468  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
469  ret <16 x i32> %r
470}
471
472define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
473; CHECK-LABEL: vpaddd_maskz_fold_test:
474; CHECK:       ## BB#0:
475; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
476; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
477; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
478; CHECK-NEXT:    retq
479  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
480  %j = load <16 x i32>, <16 x i32>* %j.ptr
481  %x = add <16 x i32> %i, %j
482  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
483  ret <16 x i32> %r
484}
485
486define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
487; CHECK-LABEL: vpaddd_maskz_broadcast_test:
488; CHECK:       ## BB#0:
489; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
490; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
491; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
492; CHECK-NEXT:    retq
493  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
494  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
495  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
496  ret <16 x i32> %r
497}
498
499define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
500; CHECK-LABEL: vpsubq_test:
501; CHECK:       ## BB#0:
502; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
503; CHECK-NEXT:    retq
504  %x = sub <8 x i64> %i, %j
505  ret <8 x i64> %x
506}
507
508define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
509; CHECK-LABEL: vpsubd_test:
510; CHECK:       ## BB#0:
511; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
512; CHECK-NEXT:    retq
513  %x = sub <16 x i32> %i, %j
514  ret <16 x i32> %x
515}
516
517define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
518; CHECK-LABEL: vpmulld_test:
519; CHECK:       ## BB#0:
520; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
521; CHECK-NEXT:    retq
522  %x = mul <16 x i32> %i, %j
523  ret <16 x i32> %x
524}
525
526declare float @sqrtf(float) readnone
527define float @sqrtA(float %a) nounwind uwtable readnone ssp {
528; CHECK-LABEL: sqrtA:
529; CHECK:       ## BB#0: ## %entry
530; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
531; CHECK-NEXT:    retq
532entry:
533  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
534  ret float %conv1
535}
536
537declare double @sqrt(double) readnone
538define double @sqrtB(double %a) nounwind uwtable readnone ssp {
539; CHECK-LABEL: sqrtB:
540; CHECK:       ## BB#0: ## %entry
541; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
542; CHECK-NEXT:    retq
543entry:
544  %call = tail call double @sqrt(double %a) nounwind readnone
545  ret double %call
546}
547
548declare float @llvm.sqrt.f32(float)
549define float @sqrtC(float %a) nounwind {
550; CHECK-LABEL: sqrtC:
551; CHECK:       ## BB#0:
552; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
553; CHECK-NEXT:    retq
554  %b = call float @llvm.sqrt.f32(float %a)
555  ret float %b
556}
557
558declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
559define <16 x float> @sqrtD(<16 x float> %a) nounwind {
560; CHECK-LABEL: sqrtD:
561; CHECK:       ## BB#0:
562; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
563; CHECK-NEXT:    retq
564  %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
565  ret <16 x float> %b
566}
567
568declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
569define <8 x double> @sqrtE(<8 x double> %a) nounwind {
570; CHECK-LABEL: sqrtE:
571; CHECK:       ## BB#0:
572; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
573; CHECK-NEXT:    retq
574  %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
575  ret <8 x double> %b
576}
577
578define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
579; CHECK-LABEL: fadd_broadcast:
580; CHECK:       ## BB#0:
581; CHECK-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
582; CHECK-NEXT:    retq
583  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
584  ret <16 x float> %b
585}
586
587define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
588; CHECK-LABEL: addq_broadcast:
589; CHECK:       ## BB#0:
590; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
591; CHECK-NEXT:    retq
592  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
593  ret <8 x i64> %b
594}
595
596define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
597; CHECK-LABEL: orq_broadcast:
598; CHECK:       ## BB#0:
599; CHECK-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
600; CHECK-NEXT:    retq
601  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
602  ret <8 x i64> %b
603}
604
605define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
606; CHECK-LABEL: andd512fold:
607; CHECK:       ## BB#0: ## %entry
608; CHECK-NEXT:    vpandd (%rdi), %zmm0, %zmm0
609; CHECK-NEXT:    retq
610entry:
611  %a = load <16 x i32>, <16 x i32>* %x, align 4
612  %b = and <16 x i32> %y, %a
613  ret <16 x i32> %b
614}
615
616define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
617; CHECK-LABEL: andqbrst:
618; CHECK:       ## BB#0: ## %entry
619; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
620; CHECK-NEXT:    retq
621entry:
622  %a = load i64, i64* %ap, align 8
623  %b = insertelement <8 x i64> undef, i64 %a, i32 0
624  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
625  %d = and <8 x i64> %p1, %c
626  ret <8 x i64>%d
627}
628
629define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
630; CHECK-LABEL: test_mask_vaddps:
631; CHECK:       ## BB#0:
632; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
633; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
634; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1}
635; CHECK-NEXT:    retq
636                                     <16 x float> %j, <16 x i32> %mask1)
637                                     nounwind readnone {
638  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
639  %x = fadd <16 x float> %i, %j
640  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
641  ret <16 x float> %r
642}
643
644define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
645; CHECK-LABEL: test_mask_vmulps:
646; CHECK:       ## BB#0:
647; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
648; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
649; CHECK-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1}
650; CHECK-NEXT:    retq
651                                     <16 x float> %j, <16 x i32> %mask1)
652                                     nounwind readnone {
653  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
654  %x = fmul <16 x float> %i, %j
655  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
656  ret <16 x float> %r
657}
658
659define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
660; CHECK-LABEL: test_mask_vminps:
661; CHECK:       ## BB#0:
662; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
663; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
664; CHECK-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
665; CHECK-NEXT:    retq
666                                     <16 x float> %j, <16 x i32> %mask1)
667                                     nounwind readnone {
668  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
669  %cmp_res = fcmp olt <16 x float> %i, %j
670  %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
671  %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
672  ret <16 x float> %r
673}
674
675define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
676; AVX512F-LABEL: test_mask_vminpd:
677; AVX512F:       ## BB#0:
678; AVX512F-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
679; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
680; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
681; AVX512F-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
682; AVX512F-NEXT:    retq
683;
684; AVX512VL-LABEL: test_mask_vminpd:
685; AVX512VL:       ## BB#0:
686; AVX512VL-NEXT:    vpxord %ymm4, %ymm4, %ymm4
687; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
688; AVX512VL-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
689; AVX512VL-NEXT:    retq
690;
691; AVX512BW-LABEL: test_mask_vminpd:
692; AVX512BW:       ## BB#0:
693; AVX512BW-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
694; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
695; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
696; AVX512BW-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
697; AVX512BW-NEXT:    retq
698;
699; AVX512DQ-LABEL: test_mask_vminpd:
700; AVX512DQ:       ## BB#0:
701; AVX512DQ-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
702; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
703; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
704; AVX512DQ-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
705; AVX512DQ-NEXT:    retq
706;
707; SKX-LABEL: test_mask_vminpd:
708; SKX:       ## BB#0:
709; SKX-NEXT:    vpxord %ymm4, %ymm4, %ymm4
710; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
711; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
712; SKX-NEXT:    retq
713                                     <8 x double> %j, <8 x i32> %mask1)
714                                     nounwind readnone {
715  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
716  %cmp_res = fcmp olt <8 x double> %i, %j
717  %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
718  %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
719  ret <8 x double> %r
720}
721
722define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
723; CHECK-LABEL: test_mask_vmaxps:
724; CHECK:       ## BB#0:
725; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
726; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
727; CHECK-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
728; CHECK-NEXT:    retq
729                                     <16 x float> %j, <16 x i32> %mask1)
730                                     nounwind readnone {
731  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
732  %cmp_res = fcmp ogt <16 x float> %i, %j
733  %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
734  %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
735  ret <16 x float> %r
736}
737
738define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
739; AVX512F-LABEL: test_mask_vmaxpd:
740; AVX512F:       ## BB#0:
741; AVX512F-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
742; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
743; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
744; AVX512F-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
745; AVX512F-NEXT:    retq
746;
747; AVX512VL-LABEL: test_mask_vmaxpd:
748; AVX512VL:       ## BB#0:
749; AVX512VL-NEXT:    vpxord %ymm4, %ymm4, %ymm4
750; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
751; AVX512VL-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
752; AVX512VL-NEXT:    retq
753;
754; AVX512BW-LABEL: test_mask_vmaxpd:
755; AVX512BW:       ## BB#0:
756; AVX512BW-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
757; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
758; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
759; AVX512BW-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
760; AVX512BW-NEXT:    retq
761;
762; AVX512DQ-LABEL: test_mask_vmaxpd:
763; AVX512DQ:       ## BB#0:
764; AVX512DQ-NEXT:    ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
765; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
766; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
767; AVX512DQ-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
768; AVX512DQ-NEXT:    retq
769;
770; SKX-LABEL: test_mask_vmaxpd:
771; SKX:       ## BB#0:
772; SKX-NEXT:    vpxord %ymm4, %ymm4, %ymm4
773; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
774; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
775; SKX-NEXT:    retq
776                                     <8 x double> %j, <8 x i32> %mask1)
777                                     nounwind readnone {
778  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
779  %cmp_res = fcmp ogt <8 x double> %i, %j
780  %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
781  %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
782  ret <8 x double> %r
783}
784
785define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
786; CHECK-LABEL: test_mask_vsubps:
787; CHECK:       ## BB#0:
788; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
789; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
790; CHECK-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1}
791; CHECK-NEXT:    retq
792                                     <16 x float> %j, <16 x i32> %mask1)
793                                     nounwind readnone {
794  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
795  %x = fsub <16 x float> %i, %j
796  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
797  ret <16 x float> %r
798}
799
800define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
801; CHECK-LABEL: test_mask_vdivps:
802; CHECK:       ## BB#0:
803; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
804; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
805; CHECK-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1}
806; CHECK-NEXT:    retq
807                                     <16 x float> %j, <16 x i32> %mask1)
808                                     nounwind readnone {
809  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
810  %x = fdiv <16 x float> %i, %j
811  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
812  ret <16 x float> %r
813}
814
815define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
816; CHECK-LABEL: test_mask_vaddpd:
817; CHECK:       ## BB#0:
818; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
819; CHECK-NEXT:    vpcmpneqq %zmm4, %zmm3, %k1
820; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1}
821; CHECK-NEXT:    retq
822                                     <8 x double> %j, <8 x i64> %mask1)
823                                     nounwind readnone {
824  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
825  %x = fadd <8 x double> %i, %j
826  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
827  ret <8 x double> %r
828}
829
830define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
831; CHECK-LABEL: test_maskz_vaddpd:
832; CHECK:       ## BB#0:
833; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
834; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
835; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
836; CHECK-NEXT:    retq
837                                      <8 x i64> %mask1) nounwind readnone {
838  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
839  %x = fadd <8 x double> %i, %j
840  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
841  ret <8 x double> %r
842}
843
844define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
845; CHECK-LABEL: test_mask_fold_vaddpd:
846; CHECK:       ## BB#0:
847; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
848; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
849; CHECK-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1}
850; CHECK-NEXT:    retq
851                                     <8 x double>* %j,  <8 x i64> %mask1)
852                                     nounwind {
853  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
854  %tmp = load <8 x double>, <8 x double>* %j, align 8
855  %x = fadd <8 x double> %i, %tmp
856  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
857  ret <8 x double> %r
858}
859
860define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
861; CHECK-LABEL: test_maskz_fold_vaddpd:
862; CHECK:       ## BB#0:
863; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
864; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
865; CHECK-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
866; CHECK-NEXT:    retq
867                                      <8 x i64> %mask1) nounwind {
868  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
869  %tmp = load <8 x double>, <8 x double>* %j, align 8
870  %x = fadd <8 x double> %i, %tmp
871  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
872  ret <8 x double> %r
873}
874
875define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
876; CHECK-LABEL: test_broadcast_vaddpd:
877; CHECK:       ## BB#0:
878; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0
879; CHECK-NEXT:    retq
880  %tmp = load double, double* %j
881  %b = insertelement <8 x double> undef, double %tmp, i32 0
882  %c = shufflevector <8 x double> %b, <8 x double> undef,
883                     <8 x i32> zeroinitializer
884  %x = fadd <8 x double> %c, %i
885  ret <8 x double> %x
886}
887
888define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
889; CHECK-LABEL: test_mask_broadcast_vaddpd:
890; CHECK:       ## BB#0:
891; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
892; CHECK-NEXT:    vpcmpneqq %zmm0, %zmm2, %k1
893; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
894; CHECK-NEXT:    vmovaps %zmm1, %zmm0
895; CHECK-NEXT:    retq
896                                      double* %j, <8 x i64> %mask1) nounwind {
897  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
898  %tmp = load double, double* %j
899  %b = insertelement <8 x double> undef, double %tmp, i32 0
900  %c = shufflevector <8 x double> %b, <8 x double> undef,
901                     <8 x i32> zeroinitializer
902  %x = fadd <8 x double> %c, %i
903  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
904  ret <8 x double> %r
905}
906
907define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
908; CHECK-LABEL: test_maskz_broadcast_vaddpd:
909; CHECK:       ## BB#0:
910; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
911; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
912; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
913; CHECK-NEXT:    retq
914                                       <8 x i64> %mask1) nounwind {
915  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
916  %tmp = load double, double* %j
917  %b = insertelement <8 x double> undef, double %tmp, i32 0
918  %c = shufflevector <8 x double> %b, <8 x double> undef,
919                     <8 x i32> zeroinitializer
920  %x = fadd <8 x double> %c, %i
921  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
922  ret <8 x double> %r
923}
924
925define <16 x float>  @test_fxor(<16 x float> %a) {
926; AVX512F-LABEL: test_fxor:
927; AVX512F:       ## BB#0:
928; AVX512F-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
929; AVX512F-NEXT:    retq
930;
931; AVX512VL-LABEL: test_fxor:
932; AVX512VL:       ## BB#0:
933; AVX512VL-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
934; AVX512VL-NEXT:    retq
935;
936; AVX512BW-LABEL: test_fxor:
937; AVX512BW:       ## BB#0:
938; AVX512BW-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
939; AVX512BW-NEXT:    retq
940;
941; AVX512DQ-LABEL: test_fxor:
942; AVX512DQ:       ## BB#0:
943; AVX512DQ-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
944; AVX512DQ-NEXT:    retq
945;
946; SKX-LABEL: test_fxor:
947; SKX:       ## BB#0:
948; SKX-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
949; SKX-NEXT:    retq
950
951  %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
952  ret <16 x float>%res
953}
954
955define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
956; CHECK-LABEL: test_fxor_8f32:
957; CHECK:       ## BB#0:
958; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
959; CHECK-NEXT:    retq
960  %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
961  ret <8 x float>%res
962}
963
964define <8 x double> @fabs_v8f64(<8 x double> %p)
965; AVX512F-LABEL: fabs_v8f64:
966; AVX512F:       ## BB#0:
967; AVX512F-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
968; AVX512F-NEXT:    retq
969;
970; AVX512VL-LABEL: fabs_v8f64:
971; AVX512VL:       ## BB#0:
972; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
973; AVX512VL-NEXT:    retq
974;
975; AVX512BW-LABEL: fabs_v8f64:
976; AVX512BW:       ## BB#0:
977; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
978; AVX512BW-NEXT:    retq
979;
980; AVX512DQ-LABEL: fabs_v8f64:
981; AVX512DQ:       ## BB#0:
982; AVX512DQ-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
983; AVX512DQ-NEXT:    retq
984;
985; SKX-LABEL: fabs_v8f64:
986; SKX:       ## BB#0:
987; SKX-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
988; SKX-NEXT:    retq
989{
990  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
991  ret <8 x double> %t
992}
993declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
994
995define <16 x float> @fabs_v16f32(<16 x float> %p)
996; AVX512F-LABEL: fabs_v16f32:
997; AVX512F:       ## BB#0:
998; AVX512F-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
999; AVX512F-NEXT:    retq
1000;
1001; AVX512VL-LABEL: fabs_v16f32:
1002; AVX512VL:       ## BB#0:
1003; AVX512VL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
1004; AVX512VL-NEXT:    retq
1005;
1006; AVX512BW-LABEL: fabs_v16f32:
1007; AVX512BW:       ## BB#0:
1008; AVX512BW-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
1009; AVX512BW-NEXT:    retq
1010;
1011; AVX512DQ-LABEL: fabs_v16f32:
1012; AVX512DQ:       ## BB#0:
1013; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
1014; AVX512DQ-NEXT:    retq
1015;
1016; SKX-LABEL: fabs_v16f32:
1017; SKX:       ## BB#0:
1018; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
1019; SKX-NEXT:    retq
1020{
1021  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1022  ret <16 x float> %t
1023}
1024declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1025