• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
7
8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
9; CHECK-LABEL: addpd512:
10; CHECK:       # %bb.0: # %entry
11; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
12; CHECK-NEXT:    retq
13entry:
14  %add.i = fadd <8 x double> %x, %y
15  ret <8 x double> %add.i
16}
17
18define <8 x double> @addpd512fold(<8 x double> %y) {
19; CHECK-LABEL: addpd512fold:
20; CHECK:       # %bb.0: # %entry
21; CHECK-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
22; CHECK-NEXT:    retq
23entry:
24  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
25  ret <8 x double> %add.i
26}
27
28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
29; CHECK-LABEL: addps512:
30; CHECK:       # %bb.0: # %entry
31; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
32; CHECK-NEXT:    retq
33entry:
34  %add.i = fadd <16 x float> %x, %y
35  ret <16 x float> %add.i
36}
37
38define <16 x float> @addps512fold(<16 x float> %y) {
39; CHECK-LABEL: addps512fold:
40; CHECK:       # %bb.0: # %entry
41; CHECK-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
42; CHECK-NEXT:    retq
43entry:
44  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
45  ret <16 x float> %add.i
46}
47
48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
49; CHECK-LABEL: subpd512:
50; CHECK:       # %bb.0: # %entry
51; CHECK-NEXT:    vsubpd %zmm0, %zmm1, %zmm0
52; CHECK-NEXT:    retq
53entry:
54  %sub.i = fsub <8 x double> %x, %y
55  ret <8 x double> %sub.i
56}
57
58define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
59; CHECK-LABEL: subpd512fold:
60; CHECK:       # %bb.0: # %entry
61; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
62; CHECK-NEXT:    retq
63entry:
64  %tmp2 = load <8 x double>, <8 x double>* %x, align 8
65  %sub.i = fsub <8 x double> %y, %tmp2
66  ret <8 x double> %sub.i
67}
68
69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
70; CHECK-LABEL: subps512:
71; CHECK:       # %bb.0: # %entry
72; CHECK-NEXT:    vsubps %zmm0, %zmm1, %zmm0
73; CHECK-NEXT:    retq
74entry:
75  %sub.i = fsub <16 x float> %x, %y
76  ret <16 x float> %sub.i
77}
78
79define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
80; CHECK-LABEL: subps512fold:
81; CHECK:       # %bb.0: # %entry
82; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
83; CHECK-NEXT:    retq
84entry:
85  %tmp2 = load <16 x float>, <16 x float>* %x, align 4
86  %sub.i = fsub <16 x float> %y, %tmp2
87  ret <16 x float> %sub.i
88}
89
90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
91; AVX512F-LABEL: imulq512:
92; AVX512F:       # %bb.0:
93; AVX512F-NEXT:    vpsrlq $32, %zmm1, %zmm2
94; AVX512F-NEXT:    vpmuludq %zmm0, %zmm2, %zmm2
95; AVX512F-NEXT:    vpsrlq $32, %zmm0, %zmm3
96; AVX512F-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
97; AVX512F-NEXT:    vpaddq %zmm2, %zmm3, %zmm2
98; AVX512F-NEXT:    vpsllq $32, %zmm2, %zmm2
99; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
100; AVX512F-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
101; AVX512F-NEXT:    retq
102;
103; AVX512VL-LABEL: imulq512:
104; AVX512VL:       # %bb.0:
105; AVX512VL-NEXT:    vpsrlq $32, %zmm1, %zmm2
106; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm2, %zmm2
107; AVX512VL-NEXT:    vpsrlq $32, %zmm0, %zmm3
108; AVX512VL-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
109; AVX512VL-NEXT:    vpaddq %zmm2, %zmm3, %zmm2
110; AVX512VL-NEXT:    vpsllq $32, %zmm2, %zmm2
111; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
112; AVX512VL-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
113; AVX512VL-NEXT:    retq
114;
115; AVX512BW-LABEL: imulq512:
116; AVX512BW:       # %bb.0:
117; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm2
118; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm2, %zmm2
119; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm3
120; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
121; AVX512BW-NEXT:    vpaddq %zmm2, %zmm3, %zmm2
122; AVX512BW-NEXT:    vpsllq $32, %zmm2, %zmm2
123; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
124; AVX512BW-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
125; AVX512BW-NEXT:    retq
126;
127; AVX512DQ-LABEL: imulq512:
128; AVX512DQ:       # %bb.0:
129; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
130; AVX512DQ-NEXT:    retq
131;
132; SKX-LABEL: imulq512:
133; SKX:       # %bb.0:
134; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
135; SKX-NEXT:    retq
136  %z = mul <8 x i64>%x, %y
137  ret <8 x i64>%z
138}
139
140define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
141; AVX512F-LABEL: imulq256:
142; AVX512F:       # %bb.0:
143; AVX512F-NEXT:    vpsrlq $32, %ymm1, %ymm2
144; AVX512F-NEXT:    vpmuludq %ymm0, %ymm2, %ymm2
145; AVX512F-NEXT:    vpsrlq $32, %ymm0, %ymm3
146; AVX512F-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
147; AVX512F-NEXT:    vpaddq %ymm2, %ymm3, %ymm2
148; AVX512F-NEXT:    vpsllq $32, %ymm2, %ymm2
149; AVX512F-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
150; AVX512F-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
151; AVX512F-NEXT:    retq
152;
153; AVX512VL-LABEL: imulq256:
154; AVX512VL:       # %bb.0:
155; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm2
156; AVX512VL-NEXT:    vpmuludq %ymm0, %ymm2, %ymm2
157; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm3
158; AVX512VL-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
159; AVX512VL-NEXT:    vpaddq %ymm2, %ymm3, %ymm2
160; AVX512VL-NEXT:    vpsllq $32, %ymm2, %ymm2
161; AVX512VL-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
162; AVX512VL-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
163; AVX512VL-NEXT:    retq
164;
165; AVX512BW-LABEL: imulq256:
166; AVX512BW:       # %bb.0:
167; AVX512BW-NEXT:    vpsrlq $32, %ymm1, %ymm2
168; AVX512BW-NEXT:    vpmuludq %ymm0, %ymm2, %ymm2
169; AVX512BW-NEXT:    vpsrlq $32, %ymm0, %ymm3
170; AVX512BW-NEXT:    vpmuludq %ymm3, %ymm1, %ymm3
171; AVX512BW-NEXT:    vpaddq %ymm2, %ymm3, %ymm2
172; AVX512BW-NEXT:    vpsllq $32, %ymm2, %ymm2
173; AVX512BW-NEXT:    vpmuludq %ymm0, %ymm1, %ymm0
174; AVX512BW-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
175; AVX512BW-NEXT:    retq
176;
177; AVX512DQ-LABEL: imulq256:
178; AVX512DQ:       # %bb.0:
179; AVX512DQ-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
180; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
181; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
182; AVX512DQ-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
183; AVX512DQ-NEXT:    retq
184;
185; SKX-LABEL: imulq256:
186; SKX:       # %bb.0:
187; SKX-NEXT:    vpmullq %ymm0, %ymm1, %ymm0
188; SKX-NEXT:    retq
189  %z = mul <4 x i64>%x, %y
190  ret <4 x i64>%z
191}
192
193define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
194; AVX512F-LABEL: imulq128:
195; AVX512F:       # %bb.0:
196; AVX512F-NEXT:    vpsrlq $32, %xmm1, %xmm2
197; AVX512F-NEXT:    vpmuludq %xmm0, %xmm2, %xmm2
198; AVX512F-NEXT:    vpsrlq $32, %xmm0, %xmm3
199; AVX512F-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
200; AVX512F-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
201; AVX512F-NEXT:    vpsllq $32, %xmm2, %xmm2
202; AVX512F-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
203; AVX512F-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
204; AVX512F-NEXT:    retq
205;
206; AVX512VL-LABEL: imulq128:
207; AVX512VL:       # %bb.0:
208; AVX512VL-NEXT:    vpsrlq $32, %xmm1, %xmm2
209; AVX512VL-NEXT:    vpmuludq %xmm0, %xmm2, %xmm2
210; AVX512VL-NEXT:    vpsrlq $32, %xmm0, %xmm3
211; AVX512VL-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
212; AVX512VL-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
213; AVX512VL-NEXT:    vpsllq $32, %xmm2, %xmm2
214; AVX512VL-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
215; AVX512VL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
216; AVX512VL-NEXT:    retq
217;
218; AVX512BW-LABEL: imulq128:
219; AVX512BW:       # %bb.0:
220; AVX512BW-NEXT:    vpsrlq $32, %xmm1, %xmm2
221; AVX512BW-NEXT:    vpmuludq %xmm0, %xmm2, %xmm2
222; AVX512BW-NEXT:    vpsrlq $32, %xmm0, %xmm3
223; AVX512BW-NEXT:    vpmuludq %xmm3, %xmm1, %xmm3
224; AVX512BW-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
225; AVX512BW-NEXT:    vpsllq $32, %xmm2, %xmm2
226; AVX512BW-NEXT:    vpmuludq %xmm0, %xmm1, %xmm0
227; AVX512BW-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
228; AVX512BW-NEXT:    retq
229;
230; AVX512DQ-LABEL: imulq128:
231; AVX512DQ:       # %bb.0:
232; AVX512DQ-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
233; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
234; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
235; AVX512DQ-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
236; AVX512DQ-NEXT:    vzeroupper
237; AVX512DQ-NEXT:    retq
238;
239; SKX-LABEL: imulq128:
240; SKX:       # %bb.0:
241; SKX-NEXT:    vpmullq %xmm0, %xmm1, %xmm0
242; SKX-NEXT:    retq
243  %z = mul <2 x i64>%x, %y
244  ret <2 x i64>%z
245}
246
247define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
248; CHECK-LABEL: mulpd512:
249; CHECK:       # %bb.0: # %entry
250; CHECK-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
251; CHECK-NEXT:    retq
252entry:
253  %mul.i = fmul <8 x double> %x, %y
254  ret <8 x double> %mul.i
255}
256
257define <8 x double> @mulpd512fold(<8 x double> %y) {
258; CHECK-LABEL: mulpd512fold:
259; CHECK:       # %bb.0: # %entry
260; CHECK-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0
261; CHECK-NEXT:    retq
262entry:
263  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
264  ret <8 x double> %mul.i
265}
266
267define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
268; CHECK-LABEL: mulps512:
269; CHECK:       # %bb.0: # %entry
270; CHECK-NEXT:    vmulps %zmm0, %zmm1, %zmm0
271; CHECK-NEXT:    retq
272entry:
273  %mul.i = fmul <16 x float> %x, %y
274  ret <16 x float> %mul.i
275}
276
277define <16 x float> @mulps512fold(<16 x float> %y) {
278; CHECK-LABEL: mulps512fold:
279; CHECK:       # %bb.0: # %entry
280; CHECK-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
281; CHECK-NEXT:    retq
282entry:
283  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
284  ret <16 x float> %mul.i
285}
286
287define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
288; CHECK-LABEL: divpd512:
289; CHECK:       # %bb.0: # %entry
290; CHECK-NEXT:    vdivpd %zmm0, %zmm1, %zmm0
291; CHECK-NEXT:    retq
292entry:
293  %div.i = fdiv <8 x double> %x, %y
294  ret <8 x double> %div.i
295}
296
297define <8 x double> @divpd512fold(<8 x double> %y) {
298; CHECK-LABEL: divpd512fold:
299; CHECK:       # %bb.0: # %entry
300; CHECK-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0
301; CHECK-NEXT:    retq
302entry:
303  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
304  ret <8 x double> %div.i
305}
306
307define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
308; CHECK-LABEL: divps512:
309; CHECK:       # %bb.0: # %entry
310; CHECK-NEXT:    vdivps %zmm0, %zmm1, %zmm0
311; CHECK-NEXT:    retq
312entry:
313  %div.i = fdiv <16 x float> %x, %y
314  ret <16 x float> %div.i
315}
316
317define <16 x float> @divps512fold(<16 x float> %y) {
318; CHECK-LABEL: divps512fold:
319; CHECK:       # %bb.0: # %entry
320; CHECK-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0
321; CHECK-NEXT:    retq
322entry:
323  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
324  ret <16 x float> %div.i
325}
326
327define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
328; CHECK-LABEL: vpaddq_test:
329; CHECK:       # %bb.0:
330; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
331; CHECK-NEXT:    retq
332  %x = add <8 x i64> %i, %j
333  ret <8 x i64> %x
334}
335
336define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
337; CHECK-LABEL: vpaddq_fold_test:
338; CHECK:       # %bb.0:
339; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
340; CHECK-NEXT:    retq
341  %tmp = load <8 x i64>, <8 x i64>* %j, align 4
342  %x = add <8 x i64> %i, %tmp
343  ret <8 x i64> %x
344}
345
346define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
347; CHECK-LABEL: vpaddq_broadcast_test:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
350; CHECK-NEXT:    retq
351  %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
352  ret <8 x i64> %x
353}
354
355define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
356; CHECK-LABEL: vpaddq_broadcast2_test:
357; CHECK:       # %bb.0:
358; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
359; CHECK-NEXT:    retq
360  %tmp = load i64, i64* %j
361  %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
362  %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
363  %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
364  %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
365  %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
366  %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
367  %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
368  %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
369  %x = add <8 x i64> %i, %j.7
370  ret <8 x i64> %x
371}
372
373define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
374; CHECK-LABEL: vpaddd_test:
375; CHECK:       # %bb.0:
376; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
377; CHECK-NEXT:    retq
378  %x = add <16 x i32> %i, %j
379  ret <16 x i32> %x
380}
381
382define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
383; CHECK-LABEL: vpaddd_fold_test:
384; CHECK:       # %bb.0:
385; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
386; CHECK-NEXT:    retq
387  %tmp = load <16 x i32>, <16 x i32>* %j, align 4
388  %x = add <16 x i32> %i, %tmp
389  ret <16 x i32> %x
390}
391
392define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
393; CHECK-LABEL: vpaddd_broadcast_test:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
396; CHECK-NEXT:    retq
397  %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
398  ret <16 x i32> %x
399}
400
401define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
402; CHECK-LABEL: vpaddd_mask_test:
403; CHECK:       # %bb.0:
404; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
405; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1}
406; CHECK-NEXT:    retq
407  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
408  %x = add <16 x i32> %i, %j
409  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
410  ret <16 x i32> %r
411}
412
413define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
414; CHECK-LABEL: vpaddd_maskz_test:
415; CHECK:       # %bb.0:
416; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
417; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
418; CHECK-NEXT:    retq
419  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
420  %x = add <16 x i32> %i, %j
421  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
422  ret <16 x i32> %r
423}
424
425define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
426; CHECK-LABEL: vpaddd_mask_fold_test:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
429; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
430; CHECK-NEXT:    retq
431  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
432  %j = load <16 x i32>, <16 x i32>* %j.ptr
433  %x = add <16 x i32> %i, %j
434  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
435  ret <16 x i32> %r
436}
437
438define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
439; CHECK-LABEL: vpaddd_mask_broadcast_test:
440; CHECK:       # %bb.0:
441; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
442; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
443; CHECK-NEXT:    retq
444  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
445  %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
446  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
447  ret <16 x i32> %r
448}
449
450define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
451; CHECK-LABEL: vpaddd_maskz_fold_test:
452; CHECK:       # %bb.0:
453; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
454; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
455; CHECK-NEXT:    retq
456  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
457  %j = load <16 x i32>, <16 x i32>* %j.ptr
458  %x = add <16 x i32> %i, %j
459  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
460  ret <16 x i32> %r
461}
462
463define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
464; CHECK-LABEL: vpaddd_maskz_broadcast_test:
465; CHECK:       # %bb.0:
466; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k1
467; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
468; CHECK-NEXT:    retq
469  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
470  %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
471  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
472  ret <16 x i32> %r
473}
474
475define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
476; CHECK-LABEL: vpsubq_test:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
479; CHECK-NEXT:    retq
480  %x = sub <8 x i64> %i, %j
481  ret <8 x i64> %x
482}
483
484define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
485; CHECK-LABEL: vpsubd_test:
486; CHECK:       # %bb.0:
487; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
488; CHECK-NEXT:    retq
489  %x = sub <16 x i32> %i, %j
490  ret <16 x i32> %x
491}
492
493define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
494; CHECK-LABEL: vpmulld_test:
495; CHECK:       # %bb.0:
496; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
497; CHECK-NEXT:    retq
498  %x = mul <16 x i32> %i, %j
499  ret <16 x i32> %x
500}
501
502declare float @sqrtf(float) readnone
503define float @sqrtA(float %a) nounwind uwtable readnone ssp {
504; CHECK-LABEL: sqrtA:
505; CHECK:       # %bb.0: # %entry
506; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
507; CHECK-NEXT:    retq
508entry:
509  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
510  ret float %conv1
511}
512
513declare double @sqrt(double) readnone
514define double @sqrtB(double %a) nounwind uwtable readnone ssp {
515; CHECK-LABEL: sqrtB:
516; CHECK:       # %bb.0: # %entry
517; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
518; CHECK-NEXT:    retq
519entry:
520  %call = tail call double @sqrt(double %a) nounwind readnone
521  ret double %call
522}
523
524declare float @llvm.sqrt.f32(float)
525define float @sqrtC(float %a) nounwind {
526; CHECK-LABEL: sqrtC:
527; CHECK:       # %bb.0:
528; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
529; CHECK-NEXT:    retq
530  %b = call float @llvm.sqrt.f32(float %a)
531  ret float %b
532}
533
534declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
535define <16 x float> @sqrtD(<16 x float> %a) nounwind {
536; CHECK-LABEL: sqrtD:
537; CHECK:       # %bb.0:
538; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
539; CHECK-NEXT:    retq
540  %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
541  ret <16 x float> %b
542}
543
544declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
545define <8 x double> @sqrtE(<8 x double> %a) nounwind {
546; CHECK-LABEL: sqrtE:
547; CHECK:       # %bb.0:
548; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
549; CHECK-NEXT:    retq
550  %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
551  ret <8 x double> %b
552}
553
554define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
555; CHECK-LABEL: fadd_broadcast:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
558; CHECK-NEXT:    retq
559  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
560  ret <16 x float> %b
561}
562
563define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
564; CHECK-LABEL: addq_broadcast:
565; CHECK:       # %bb.0:
566; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
567; CHECK-NEXT:    retq
568  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
569  ret <8 x i64> %b
570}
571
572define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
573; AVX512F-LABEL: orq_broadcast:
574; AVX512F:       # %bb.0:
575; AVX512F-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
576; AVX512F-NEXT:    retq
577;
578; AVX512VL-LABEL: orq_broadcast:
579; AVX512VL:       # %bb.0:
580; AVX512VL-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
581; AVX512VL-NEXT:    retq
582;
583; AVX512BW-LABEL: orq_broadcast:
584; AVX512BW:       # %bb.0:
585; AVX512BW-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
586; AVX512BW-NEXT:    retq
587;
588; AVX512DQ-LABEL: orq_broadcast:
589; AVX512DQ:       # %bb.0:
590; AVX512DQ-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
591; AVX512DQ-NEXT:    retq
592;
593; SKX-LABEL: orq_broadcast:
594; SKX:       # %bb.0:
595; SKX-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
596; SKX-NEXT:    retq
597  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
598  ret <8 x i64> %b
599}
600
601define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
602; AVX512F-LABEL: andd512fold:
603; AVX512F:       # %bb.0: # %entry
604; AVX512F-NEXT:    vpandq (%rdi), %zmm0, %zmm0
605; AVX512F-NEXT:    retq
606;
607; AVX512VL-LABEL: andd512fold:
608; AVX512VL:       # %bb.0: # %entry
609; AVX512VL-NEXT:    vpandq (%rdi), %zmm0, %zmm0
610; AVX512VL-NEXT:    retq
611;
612; AVX512BW-LABEL: andd512fold:
613; AVX512BW:       # %bb.0: # %entry
614; AVX512BW-NEXT:    vpandq (%rdi), %zmm0, %zmm0
615; AVX512BW-NEXT:    retq
616;
617; AVX512DQ-LABEL: andd512fold:
618; AVX512DQ:       # %bb.0: # %entry
619; AVX512DQ-NEXT:    vandps (%rdi), %zmm0, %zmm0
620; AVX512DQ-NEXT:    retq
621;
622; SKX-LABEL: andd512fold:
623; SKX:       # %bb.0: # %entry
624; SKX-NEXT:    vandps (%rdi), %zmm0, %zmm0
625; SKX-NEXT:    retq
626entry:
627  %a = load <16 x i32>, <16 x i32>* %x, align 4
628  %b = and <16 x i32> %y, %a
629  ret <16 x i32> %b
630}
631
632define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
633; AVX512F-LABEL: andqbrst:
634; AVX512F:       # %bb.0: # %entry
635; AVX512F-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
636; AVX512F-NEXT:    retq
637;
638; AVX512VL-LABEL: andqbrst:
639; AVX512VL:       # %bb.0: # %entry
640; AVX512VL-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
641; AVX512VL-NEXT:    retq
642;
643; AVX512BW-LABEL: andqbrst:
644; AVX512BW:       # %bb.0: # %entry
645; AVX512BW-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
646; AVX512BW-NEXT:    retq
647;
648; AVX512DQ-LABEL: andqbrst:
649; AVX512DQ:       # %bb.0: # %entry
650; AVX512DQ-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0
651; AVX512DQ-NEXT:    retq
652;
653; SKX-LABEL: andqbrst:
654; SKX:       # %bb.0: # %entry
655; SKX-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0
656; SKX-NEXT:    retq
657entry:
658  %a = load i64, i64* %ap, align 8
659  %b = insertelement <8 x i64> undef, i64 %a, i32 0
660  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
661  %d = and <8 x i64> %p1, %c
662  ret <8 x i64>%d
663}
664
665define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
666; CHECK-LABEL: test_mask_vaddps:
667; CHECK:       # %bb.0:
668; CHECK-NEXT:    vptestmd %zmm3, %zmm3, %k1
669; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1}
670; CHECK-NEXT:    retq
671                                     <16 x float> %j, <16 x i32> %mask1)
672                                     nounwind readnone {
673  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
674  %x = fadd <16 x float> %i, %j
675  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
676  ret <16 x float> %r
677}
678
679define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
680; CHECK-LABEL: test_mask_vmulps:
681; CHECK:       # %bb.0:
682; CHECK-NEXT:    vptestmd %zmm3, %zmm3, %k1
683; CHECK-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1}
684; CHECK-NEXT:    retq
685                                     <16 x float> %j, <16 x i32> %mask1)
686                                     nounwind readnone {
687  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
688  %x = fmul <16 x float> %i, %j
689  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
690  ret <16 x float> %r
691}
692
693define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
694; CHECK-LABEL: test_mask_vminps:
695; CHECK:       # %bb.0:
696; CHECK-NEXT:    vptestmd %zmm3, %zmm3, %k1
697; CHECK-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
698; CHECK-NEXT:    retq
699                                     <16 x float> %j, <16 x i32> %mask1)
700                                     nounwind readnone {
701  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
702  %cmp_res = fcmp olt <16 x float> %i, %j
703  %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
704  %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
705  ret <16 x float> %r
706}
707
708define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
709; AVX512F-LABEL: test_mask_vminpd:
710; AVX512F:       # %bb.0:
711; AVX512F-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
712; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k1
713; AVX512F-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
714; AVX512F-NEXT:    retq
715;
716; AVX512VL-LABEL: test_mask_vminpd:
717; AVX512VL:       # %bb.0:
718; AVX512VL-NEXT:    vptestmd %ymm3, %ymm3, %k1
719; AVX512VL-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
720; AVX512VL-NEXT:    retq
721;
722; AVX512BW-LABEL: test_mask_vminpd:
723; AVX512BW:       # %bb.0:
724; AVX512BW-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
725; AVX512BW-NEXT:    vptestmd %zmm3, %zmm3, %k1
726; AVX512BW-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
727; AVX512BW-NEXT:    retq
728;
729; AVX512DQ-LABEL: test_mask_vminpd:
730; AVX512DQ:       # %bb.0:
731; AVX512DQ-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
732; AVX512DQ-NEXT:    vptestmd %zmm3, %zmm3, %k1
733; AVX512DQ-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
734; AVX512DQ-NEXT:    retq
735;
736; SKX-LABEL: test_mask_vminpd:
737; SKX:       # %bb.0:
738; SKX-NEXT:    vptestmd %ymm3, %ymm3, %k1
739; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
740; SKX-NEXT:    retq
741                                     <8 x double> %j, <8 x i32> %mask1)
742                                     nounwind readnone {
743  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
744  %cmp_res = fcmp olt <8 x double> %i, %j
745  %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
746  %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
747  ret <8 x double> %r
748}
749
750define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
751; CHECK-LABEL: test_mask_vmaxps:
752; CHECK:       # %bb.0:
753; CHECK-NEXT:    vptestmd %zmm3, %zmm3, %k1
754; CHECK-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
755; CHECK-NEXT:    retq
756                                     <16 x float> %j, <16 x i32> %mask1)
757                                     nounwind readnone {
758  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
759  %cmp_res = fcmp ogt <16 x float> %i, %j
760  %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
761  %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
762  ret <16 x float> %r
763}
764
765define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
766; AVX512F-LABEL: test_mask_vmaxpd:
767; AVX512F:       # %bb.0:
768; AVX512F-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
769; AVX512F-NEXT:    vptestmd %zmm3, %zmm3, %k1
770; AVX512F-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
771; AVX512F-NEXT:    retq
772;
773; AVX512VL-LABEL: test_mask_vmaxpd:
774; AVX512VL:       # %bb.0:
775; AVX512VL-NEXT:    vptestmd %ymm3, %ymm3, %k1
776; AVX512VL-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
777; AVX512VL-NEXT:    retq
778;
779; AVX512BW-LABEL: test_mask_vmaxpd:
780; AVX512BW:       # %bb.0:
781; AVX512BW-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
782; AVX512BW-NEXT:    vptestmd %zmm3, %zmm3, %k1
783; AVX512BW-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
784; AVX512BW-NEXT:    retq
785;
786; AVX512DQ-LABEL: test_mask_vmaxpd:
787; AVX512DQ:       # %bb.0:
788; AVX512DQ-NEXT:    # kill: def $ymm3 killed $ymm3 def $zmm3
789; AVX512DQ-NEXT:    vptestmd %zmm3, %zmm3, %k1
790; AVX512DQ-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
791; AVX512DQ-NEXT:    retq
792;
793; SKX-LABEL: test_mask_vmaxpd:
794; SKX:       # %bb.0:
795; SKX-NEXT:    vptestmd %ymm3, %ymm3, %k1
796; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
797; SKX-NEXT:    retq
798                                     <8 x double> %j, <8 x i32> %mask1)
799                                     nounwind readnone {
800  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
801  %cmp_res = fcmp ogt <8 x double> %i, %j
802  %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
803  %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
804  ret <8 x double> %r
805}
806
807define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
808; CHECK-LABEL: test_mask_vsubps:
809; CHECK:       # %bb.0:
810; CHECK-NEXT:    vptestmd %zmm3, %zmm3, %k1
811; CHECK-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1}
812; CHECK-NEXT:    retq
813                                     <16 x float> %j, <16 x i32> %mask1)
814                                     nounwind readnone {
815  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
816  %x = fsub <16 x float> %i, %j
817  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
818  ret <16 x float> %r
819}
820
821define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
822; CHECK-LABEL: test_mask_vdivps:
823; CHECK:       # %bb.0:
824; CHECK-NEXT:    vptestmd %zmm3, %zmm3, %k1
825; CHECK-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1}
826; CHECK-NEXT:    retq
827                                     <16 x float> %j, <16 x i32> %mask1)
828                                     nounwind readnone {
829  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
830  %x = fdiv <16 x float> %i, %j
831  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
832  ret <16 x float> %r
833}
834
835define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
836; CHECK-LABEL: test_mask_vaddpd:
837; CHECK:       # %bb.0:
838; CHECK-NEXT:    vptestmq %zmm3, %zmm3, %k1
839; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1}
840; CHECK-NEXT:    retq
841                                     <8 x double> %j, <8 x i64> %mask1)
842                                     nounwind readnone {
843  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
844  %x = fadd <8 x double> %i, %j
845  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
846  ret <8 x double> %r
847}
848
849define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
850; CHECK-LABEL: test_maskz_vaddpd:
851; CHECK:       # %bb.0:
852; CHECK-NEXT:    vptestmq %zmm2, %zmm2, %k1
853; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
854; CHECK-NEXT:    retq
855                                      <8 x i64> %mask1) nounwind readnone {
856  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
857  %x = fadd <8 x double> %i, %j
858  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
859  ret <8 x double> %r
860}
861
862define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
863; CHECK-LABEL: test_mask_fold_vaddpd:
864; CHECK:       # %bb.0:
865; CHECK-NEXT:    vptestmq %zmm2, %zmm2, %k1
866; CHECK-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1}
867; CHECK-NEXT:    retq
868                                     <8 x double>* %j,  <8 x i64> %mask1)
869                                     nounwind {
870  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
871  %tmp = load <8 x double>, <8 x double>* %j, align 8
872  %x = fadd <8 x double> %i, %tmp
873  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
874  ret <8 x double> %r
875}
876
877define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
878; CHECK-LABEL: test_maskz_fold_vaddpd:
879; CHECK:       # %bb.0:
880; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1
881; CHECK-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
882; CHECK-NEXT:    retq
883                                      <8 x i64> %mask1) nounwind {
884  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
885  %tmp = load <8 x double>, <8 x double>* %j, align 8
886  %x = fadd <8 x double> %i, %tmp
887  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
888  ret <8 x double> %r
889}
890
891define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
892; CHECK-LABEL: test_broadcast_vaddpd:
893; CHECK:       # %bb.0:
894; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0
895; CHECK-NEXT:    retq
896  %tmp = load double, double* %j
897  %b = insertelement <8 x double> undef, double %tmp, i32 0
898  %c = shufflevector <8 x double> %b, <8 x double> undef,
899                     <8 x i32> zeroinitializer
900  %x = fadd <8 x double> %c, %i
901  ret <8 x double> %x
902}
903
904define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
905; CHECK-LABEL: test_mask_broadcast_vaddpd:
906; CHECK:       # %bb.0:
907; CHECK-NEXT:    vptestmq %zmm2, %zmm2, %k1
908; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
909; CHECK-NEXT:    vmovapd %zmm1, %zmm0
910; CHECK-NEXT:    retq
911                                      double* %j, <8 x i64> %mask1) nounwind {
912  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
913  %tmp = load double, double* %j
914  %b = insertelement <8 x double> undef, double %tmp, i32 0
915  %c = shufflevector <8 x double> %b, <8 x double> undef,
916                     <8 x i32> zeroinitializer
917  %x = fadd <8 x double> %c, %i
918  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
919  ret <8 x double> %r
920}
921
922define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
923; CHECK-LABEL: test_maskz_broadcast_vaddpd:
924; CHECK:       # %bb.0:
925; CHECK-NEXT:    vptestmq %zmm1, %zmm1, %k1
926; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
927; CHECK-NEXT:    retq
928                                       <8 x i64> %mask1) nounwind {
929  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
930  %tmp = load double, double* %j
931  %b = insertelement <8 x double> undef, double %tmp, i32 0
932  %c = shufflevector <8 x double> %b, <8 x double> undef,
933                     <8 x i32> zeroinitializer
934  %x = fadd <8 x double> %c, %i
935  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
936  ret <8 x double> %r
937}
938
939define <16 x float>  @test_fxor(<16 x float> %a) {
940; AVX512F-LABEL: test_fxor:
941; AVX512F:       # %bb.0:
942; AVX512F-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
943; AVX512F-NEXT:    retq
944;
945; AVX512VL-LABEL: test_fxor:
946; AVX512VL:       # %bb.0:
947; AVX512VL-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
948; AVX512VL-NEXT:    retq
949;
950; AVX512BW-LABEL: test_fxor:
951; AVX512BW:       # %bb.0:
952; AVX512BW-NEXT:    vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0
953; AVX512BW-NEXT:    retq
954;
955; AVX512DQ-LABEL: test_fxor:
956; AVX512DQ:       # %bb.0:
957; AVX512DQ-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
958; AVX512DQ-NEXT:    retq
959;
960; SKX-LABEL: test_fxor:
961; SKX:       # %bb.0:
962; SKX-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
963; SKX-NEXT:    retq
964
965  %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
966  ret <16 x float>%res
967}
968
969define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
970; AVX512F-LABEL: test_fxor_8f32:
971; AVX512F:       # %bb.0:
972; AVX512F-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0]
973; AVX512F-NEXT:    vxorps %ymm1, %ymm0, %ymm0
974; AVX512F-NEXT:    retq
975;
976; AVX512VL-LABEL: test_fxor_8f32:
977; AVX512VL:       # %bb.0:
978; AVX512VL-NEXT:    vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
979; AVX512VL-NEXT:    retq
980;
981; AVX512BW-LABEL: test_fxor_8f32:
982; AVX512BW:       # %bb.0:
983; AVX512BW-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0]
984; AVX512BW-NEXT:    vxorps %ymm1, %ymm0, %ymm0
985; AVX512BW-NEXT:    retq
986;
987; AVX512DQ-LABEL: test_fxor_8f32:
988; AVX512DQ:       # %bb.0:
989; AVX512DQ-NEXT:    vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0]
990; AVX512DQ-NEXT:    vxorps %ymm1, %ymm0, %ymm0
991; AVX512DQ-NEXT:    retq
992;
993; SKX-LABEL: test_fxor_8f32:
994; SKX:       # %bb.0:
995; SKX-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
996; SKX-NEXT:    retq
997  %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
998  ret <8 x float>%res
999}
1000
1001define <8 x double> @fabs_v8f64(<8 x double> %p)
1002; AVX512F-LABEL: fabs_v8f64:
1003; AVX512F:       # %bb.0:
1004; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1005; AVX512F-NEXT:    retq
1006;
1007; AVX512VL-LABEL: fabs_v8f64:
1008; AVX512VL:       # %bb.0:
1009; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1010; AVX512VL-NEXT:    retq
1011;
1012; AVX512BW-LABEL: fabs_v8f64:
1013; AVX512BW:       # %bb.0:
1014; AVX512BW-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
1015; AVX512BW-NEXT:    retq
1016;
1017; AVX512DQ-LABEL: fabs_v8f64:
1018; AVX512DQ:       # %bb.0:
1019; AVX512DQ-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
1020; AVX512DQ-NEXT:    retq
1021;
1022; SKX-LABEL: fabs_v8f64:
1023; SKX:       # %bb.0:
1024; SKX-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
1025; SKX-NEXT:    retq
1026{
1027  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1028  ret <8 x double> %t
1029}
1030declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
1031
1032define <16 x float> @fabs_v16f32(<16 x float> %p)
1033; AVX512F-LABEL: fabs_v16f32:
1034; AVX512F:       # %bb.0:
1035; AVX512F-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1036; AVX512F-NEXT:    retq
1037;
1038; AVX512VL-LABEL: fabs_v16f32:
1039; AVX512VL:       # %bb.0:
1040; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1041; AVX512VL-NEXT:    retq
1042;
1043; AVX512BW-LABEL: fabs_v16f32:
1044; AVX512BW:       # %bb.0:
1045; AVX512BW-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
1046; AVX512BW-NEXT:    retq
1047;
1048; AVX512DQ-LABEL: fabs_v16f32:
1049; AVX512DQ:       # %bb.0:
1050; AVX512DQ-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
1051; AVX512DQ-NEXT:    retq
1052;
1053; SKX-LABEL: fabs_v16f32:
1054; SKX:       # %bb.0:
1055; SKX-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
1056; SKX-NEXT:    retq
1057{
1058  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1059  ret <16 x float> %t
1060}
1061declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
1062