• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
2
3; CHECK-LABEL: addpd512
4; CHECK: vaddpd
5; CHECK: ret
6define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
7entry:
8  %add.i = fadd <8 x double> %x, %y
9  ret <8 x double> %add.i
10}
11
12; CHECK-LABEL: addpd512fold
13; CHECK: vaddpd LCP{{.*}}(%rip)
14; CHECK: ret
15define <8 x double> @addpd512fold(<8 x double> %y) {
16entry:
17  %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
18  ret <8 x double> %add.i
19}
20
21; CHECK-LABEL: addps512
22; CHECK: vaddps
23; CHECK: ret
24define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
25entry:
26  %add.i = fadd <16 x float> %x, %y
27  ret <16 x float> %add.i
28}
29
30; CHECK-LABEL: addps512fold
31; CHECK: vaddps LCP{{.*}}(%rip)
32; CHECK: ret
33define <16 x float> @addps512fold(<16 x float> %y) {
34entry:
35  %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
36  ret <16 x float> %add.i
37}
38
39; CHECK-LABEL: subpd512
40; CHECK: vsubpd
41; CHECK: ret
42define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
43entry:
44  %sub.i = fsub <8 x double> %x, %y
45  ret <8 x double> %sub.i
46}
47
48; CHECK-LABEL: @subpd512fold
49; CHECK: vsubpd (%
50; CHECK: ret
51define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
52entry:
53  %tmp2 = load <8 x double>* %x, align 8
54  %sub.i = fsub <8 x double> %y, %tmp2
55  ret <8 x double> %sub.i
56}
57
58; CHECK-LABEL: @subps512
59; CHECK: vsubps
60; CHECK: ret
61define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
62entry:
63  %sub.i = fsub <16 x float> %x, %y
64  ret <16 x float> %sub.i
65}
66
67; CHECK-LABEL: subps512fold
68; CHECK: vsubps (%
69; CHECK: ret
70define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
71entry:
72  %tmp2 = load <16 x float>* %x, align 4
73  %sub.i = fsub <16 x float> %y, %tmp2
74  ret <16 x float> %sub.i
75}
76
77; CHECK-LABEL: imulq512
78; CHECK: vpmuludq
79; CHECK: vpmuludq
80; CHECK: ret
81define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
82  %z = mul <8 x i64>%x, %y
83  ret <8 x i64>%z
84}
85
86; CHECK-LABEL: mulpd512
87; CHECK: vmulpd
88; CHECK: ret
89define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
90entry:
91  %mul.i = fmul <8 x double> %x, %y
92  ret <8 x double> %mul.i
93}
94
95; CHECK-LABEL: mulpd512fold
96; CHECK: vmulpd LCP{{.*}}(%rip)
97; CHECK: ret
98define <8 x double> @mulpd512fold(<8 x double> %y) {
99entry:
100  %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
101  ret <8 x double> %mul.i
102}
103
104; CHECK-LABEL: mulps512
105; CHECK: vmulps
106; CHECK: ret
107define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
108entry:
109  %mul.i = fmul <16 x float> %x, %y
110  ret <16 x float> %mul.i
111}
112
113; CHECK-LABEL: mulps512fold
114; CHECK: vmulps LCP{{.*}}(%rip)
115; CHECK: ret
116define <16 x float> @mulps512fold(<16 x float> %y) {
117entry:
118  %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
119  ret <16 x float> %mul.i
120}
121
122; CHECK-LABEL: divpd512
123; CHECK: vdivpd
124; CHECK: ret
125define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
126entry:
127  %div.i = fdiv <8 x double> %x, %y
128  ret <8 x double> %div.i
129}
130
131; CHECK-LABEL: divpd512fold
132; CHECK: vdivpd LCP{{.*}}(%rip)
133; CHECK: ret
134define <8 x double> @divpd512fold(<8 x double> %y) {
135entry:
136  %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
137  ret <8 x double> %div.i
138}
139
140; CHECK-LABEL: divps512
141; CHECK: vdivps
142; CHECK: ret
143define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
144entry:
145  %div.i = fdiv <16 x float> %x, %y
146  ret <16 x float> %div.i
147}
148
149; CHECK-LABEL: divps512fold
150; CHECK: vdivps LCP{{.*}}(%rip)
151; CHECK: ret
152define <16 x float> @divps512fold(<16 x float> %y) {
153entry:
154  %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
155  ret <16 x float> %div.i
156}
157
158; CHECK-LABEL: vpaddq_test
159; CHECK: vpaddq %zmm
160; CHECK: ret
161define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
162  %x = add <8 x i64> %i, %j
163  ret <8 x i64> %x
164}
165
166; CHECK-LABEL: vpaddq_fold_test
167; CHECK: vpaddq (%
168; CHECK: ret
169define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
170  %tmp = load <8 x i64>* %j, align 4
171  %x = add <8 x i64> %i, %tmp
172  ret <8 x i64> %x
173}
174
175; CHECK-LABEL: vpaddq_broadcast_test
176; CHECK: vpaddq LCP{{.*}}(%rip){1to8}
177; CHECK: ret
178define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
179  %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
180  ret <8 x i64> %x
181}
182
183; CHECK-LABEL: vpaddq_broadcast2_test
184; CHECK: vpaddq (%rdi){1to8}
185; CHECK: ret
186define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
187  %tmp = load i64* %j
188  %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
189  %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
190  %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
191  %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
192  %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
193  %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
194  %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
195  %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
196  %x = add <8 x i64> %i, %j.7
197  ret <8 x i64> %x
198}
199
200; CHECK-LABEL: vpaddd_test
201; CHECK: vpaddd %zmm
202; CHECK: ret
203define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
204  %x = add <16 x i32> %i, %j
205  ret <16 x i32> %x
206}
207
208; CHECK-LABEL: vpaddd_fold_test
209; CHECK: vpaddd (%
210; CHECK: ret
211define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
212  %tmp = load <16 x i32>* %j, align 4
213  %x = add <16 x i32> %i, %tmp
214  ret <16 x i32> %x
215}
216
217; CHECK-LABEL: vpaddd_broadcast_test
218; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
219; CHECK: ret
220define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
221  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
222  ret <16 x i32> %x
223}
224
225; CHECK-LABEL: vpaddd_mask_test
226; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
227; CHECK: ret
228define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
229  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
230  %x = add <16 x i32> %i, %j
231  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
232  ret <16 x i32> %r
233}
234
235; CHECK-LABEL: vpaddd_maskz_test
236; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }}
237; CHECK: ret
238define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
239  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
240  %x = add <16 x i32> %i, %j
241  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
242  ret <16 x i32> %r
243}
244
245; CHECK-LABEL: vpaddd_mask_fold_test
246; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
247; CHECK: ret
248define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
249  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
250  %j = load <16 x i32>* %j.ptr
251  %x = add <16 x i32> %i, %j
252  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
253  ret <16 x i32> %r
254}
255
256; CHECK-LABEL: vpaddd_mask_broadcast_test
257; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
258; CHECK: ret
259define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
260  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
261  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
262  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
263  ret <16 x i32> %r
264}
265
266; CHECK-LABEL: vpaddd_maskz_fold_test
267; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
268; CHECK: ret
269define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
270  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
271  %j = load <16 x i32>* %j.ptr
272  %x = add <16 x i32> %i, %j
273  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
274  ret <16 x i32> %r
275}
276
277; CHECK-LABEL: vpaddd_maskz_broadcast_test
278; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
279; CHECK: ret
280define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
281  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
282  %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
283  %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
284  ret <16 x i32> %r
285}
286
287; CHECK-LABEL: vpsubq_test
288; CHECK: vpsubq %zmm
289; CHECK: ret
290define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
291  %x = sub <8 x i64> %i, %j
292  ret <8 x i64> %x
293}
294
295; CHECK-LABEL: vpsubd_test
296; CHECK: vpsubd
297; CHECK: ret
298define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
299  %x = sub <16 x i32> %i, %j
300  ret <16 x i32> %x
301}
302
303; CHECK-LABEL: vpmulld_test
304; CHECK: vpmulld %zmm
305; CHECK: ret
306define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
307  %x = mul <16 x i32> %i, %j
308  ret <16 x i32> %x
309}
310
311; CHECK-LABEL: sqrtA
312; CHECK: vsqrtss {{.*}} encoding: [0x62
313; CHECK: ret
314declare float @sqrtf(float) readnone
315define float @sqrtA(float %a) nounwind uwtable readnone ssp {
316entry:
317  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
318  ret float %conv1
319}
320
321; CHECK-LABEL: sqrtB
322; CHECK: vsqrtsd {{.*}}## encoding: [0x62
323; CHECK: ret
324declare double @sqrt(double) readnone
325define double @sqrtB(double %a) nounwind uwtable readnone ssp {
326entry:
327  %call = tail call double @sqrt(double %a) nounwind readnone
328  ret double %call
329}
330
331; CHECK-LABEL: sqrtC
332; CHECK: vsqrtss {{.*}}## encoding: [0x62
333; CHECK: ret
334declare float @llvm.sqrt.f32(float)
335define float @sqrtC(float %a) nounwind {
336  %b = call float @llvm.sqrt.f32(float %a)
337  ret float %b
338}
339
340; CHECK-LABEL: sqrtD
341; CHECK: vsqrtps {{.*}}
342; CHECK: ret
343declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
344define <16 x float> @sqrtD(<16 x float> %a) nounwind {
345  %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
346  ret <16 x float> %b
347}
348
349; CHECK-LABEL: sqrtE
350; CHECK: vsqrtpd {{.*}}
351; CHECK: ret
352declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
353define <8 x double> @sqrtE(<8 x double> %a) nounwind {
354  %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
355  ret <8 x double> %b
356}
357
358; CHECK-LABEL: fadd_broadcast
359; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
360; CHECK: ret
361define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
362  %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
363  ret <16 x float> %b
364}
365
366; CHECK-LABEL: addq_broadcast
367; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
368; CHECK: ret
369define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
370  %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
371  ret <8 x i64> %b
372}
373
374; CHECK-LABEL: orq_broadcast
375; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
376; CHECK: ret
377define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
378  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
379  ret <8 x i64> %b
380}
381
382; CHECK-LABEL: andd512fold
383; CHECK: vpandd (%
384; CHECK: ret
385define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
386entry:
387  %a = load <16 x i32>* %x, align 4
388  %b = and <16 x i32> %y, %a
389  ret <16 x i32> %b
390}
391
392; CHECK-LABEL: andqbrst
393; CHECK: vpandq  (%rdi){1to8}, %zmm
394; CHECK: ret
395define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
396entry:
397  %a = load i64* %ap, align 8
398  %b = insertelement <8 x i64> undef, i64 %a, i32 0
399  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
400  %d = and <8 x i64> %p1, %c
401  ret <8 x i64>%d
402}
403