• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -slp-vectorizer -slp-threshold=-999 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.8.0"
6
7declare double @llvm.fabs.f64(double) nounwind readnone
8
9define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
10; CHECK-LABEL: @vec_fabs_f64(
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
13; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
14; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
15; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
16; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]]
17; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP4]])
18; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
19; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
20; CHECK-NEXT:    ret void
21;
22entry:
23  %i0 = load double, double* %a, align 8
24  %i1 = load double, double* %b, align 8
25  %mul = fmul double %i0, %i1
26  %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone
27  %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
28  %i3 = load double, double* %arrayidx3, align 8
29  %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
30  %i4 = load double, double* %arrayidx4, align 8
31  %mul5 = fmul double %i3, %i4
32  %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone
33  store double %call, double* %c, align 8
34  %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
35  store double %call5, double* %arrayidx5, align 8
36  ret void
37}
38
39declare float @llvm.copysign.f32(float, float) nounwind readnone
40
41define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
42; CHECK-LABEL: @vec_copysign_f32(
43; CHECK-NEXT:  entry:
44; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
45; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
46; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
47; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
48; CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP3]])
49; CHECK-NEXT:    [[TMP5:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
50; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 4
51; CHECK-NEXT:    ret void
52;
53entry:
54  %0 = load float, float* %a, align 4
55  %1 = load float, float* %b, align 4
56  %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
57  store float %call0, float* %c, align 4
58
59  %ix2 = getelementptr inbounds float, float* %a, i64 1
60  %2 = load float, float* %ix2, align 4
61  %ix3 = getelementptr inbounds float, float* %b, i64 1
62  %3 = load float, float* %ix3, align 4
63  %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone
64  %c1 = getelementptr inbounds float, float* %c, i64 1
65  store float %call1, float* %c1, align 4
66
67  %ix4 = getelementptr inbounds float, float* %a, i64 2
68  %4 = load float, float* %ix4, align 4
69  %ix5 = getelementptr inbounds float, float* %b, i64 2
70  %5 = load float, float* %ix5, align 4
71  %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone
72  %c2 = getelementptr inbounds float, float* %c, i64 2
73  store float %call2, float* %c2, align 4
74
75  %ix6 = getelementptr inbounds float, float* %a, i64 3
76  %6 = load float, float* %ix6, align 4
77  %ix7 = getelementptr inbounds float, float* %b, i64 3
78  %7 = load float, float* %ix7, align 4
79  %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone
80  %c3 = getelementptr inbounds float, float* %c, i64 3
81  store float %call3, float* %c3, align 4
82
83  ret void
84}
85
86declare i32 @llvm.bswap.i32(i32) nounwind readnone
87
88define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
89; CHECK-LABEL: @vec_bswap_i32(
90; CHECK-NEXT:  entry:
91; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
92; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
93; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
94; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
95; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
96; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP4]])
97; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
98; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 4
99; CHECK-NEXT:    ret void
100;
101entry:
102  %i0 = load i32, i32* %a, align 4
103  %i1 = load i32, i32* %b, align 4
104  %add1 = add i32 %i0, %i1
105  %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone
106
107  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
108  %i2 = load i32, i32* %arrayidx2, align 4
109  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
110  %i3 = load i32, i32* %arrayidx3, align 4
111  %add2 = add i32 %i2, %i3
112  %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone
113
114  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
115  %i4 = load i32, i32* %arrayidx4, align 4
116  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
117  %i5 = load i32, i32* %arrayidx5, align 4
118  %add3 = add i32 %i4, %i5
119  %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone
120
121  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
122  %i6 = load i32, i32* %arrayidx6, align 4
123  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
124  %i7 = load i32, i32* %arrayidx7, align 4
125  %add4 = add i32 %i6, %i7
126  %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone
127
128  store i32 %call1, i32* %c, align 4
129  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
130  store i32 %call2, i32* %arrayidx8, align 4
131  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
132  store i32 %call3, i32* %arrayidx9, align 4
133  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
134  store i32 %call4, i32* %arrayidx10, align 4
135  ret void
136
137}
138
139declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
140
141define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
142; CHECK-LABEL: @vec_ctlz_i32(
143; CHECK-NEXT:  entry:
144; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
145; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
146; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
147; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
148; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
149; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP5]], i1 true)
150; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
151; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
152; CHECK-NEXT:    ret void
153;
154entry:
155  %i0 = load i32, i32* %a, align 4
156  %i1 = load i32, i32* %b, align 4
157  %add1 = add i32 %i0, %i1
158  %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
159
160  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
161  %i2 = load i32, i32* %arrayidx2, align 4
162  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
163  %i3 = load i32, i32* %arrayidx3, align 4
164  %add2 = add i32 %i2, %i3
165  %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
166
167  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
168  %i4 = load i32, i32* %arrayidx4, align 4
169  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
170  %i5 = load i32, i32* %arrayidx5, align 4
171  %add3 = add i32 %i4, %i5
172  %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
173
174  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
175  %i6 = load i32, i32* %arrayidx6, align 4
176  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
177  %i7 = load i32, i32* %arrayidx7, align 4
178  %add4 = add i32 %i6, %i7
179  %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
180
181  store i32 %call1, i32* %c, align 4
182  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
183  store i32 %call2, i32* %arrayidx8, align 4
184  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
185  store i32 %call3, i32* %arrayidx9, align 4
186  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
187  store i32 %call4, i32* %arrayidx10, align 4
188  ret void
189
190}
191
192define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
193; CHECK-LABEL: @vec_ctlz_i32_neg(
194; CHECK-NEXT:  entry:
195; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[A:%.*]], align 4
196; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[B:%.*]], align 4
197; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
198; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD1]], i1 true) #3
199; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 1
200; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
201; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 1
202; CHECK-NEXT:    [[I3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
203; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
204; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD2]], i1 false) #3
205; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
206; CHECK-NEXT:    [[I4:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
207; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
208; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
209; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
210; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD3]], i1 true) #3
211; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
212; CHECK-NEXT:    [[I6:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
213; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
214; CHECK-NEXT:    [[I7:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
215; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
216; CHECK-NEXT:    [[CALL4:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[ADD4]], i1 false) #3
217; CHECK-NEXT:    store i32 [[CALL1]], i32* [[C:%.*]], align 4
218; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 1
219; CHECK-NEXT:    store i32 [[CALL2]], i32* [[ARRAYIDX8]], align 4
220; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2
221; CHECK-NEXT:    store i32 [[CALL3]], i32* [[ARRAYIDX9]], align 4
222; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3
223; CHECK-NEXT:    store i32 [[CALL4]], i32* [[ARRAYIDX10]], align 4
224; CHECK-NEXT:    ret void
225;
226entry:
227  %i0 = load i32, i32* %a, align 4
228  %i1 = load i32, i32* %b, align 4
229  %add1 = add i32 %i0, %i1
230  %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
231
232  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
233  %i2 = load i32, i32* %arrayidx2, align 4
234  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
235  %i3 = load i32, i32* %arrayidx3, align 4
236  %add2 = add i32 %i2, %i3
237  %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
238
239  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
240  %i4 = load i32, i32* %arrayidx4, align 4
241  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
242  %i5 = load i32, i32* %arrayidx5, align 4
243  %add3 = add i32 %i4, %i5
244  %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
245
246  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
247  %i6 = load i32, i32* %arrayidx6, align 4
248  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
249  %i7 = load i32, i32* %arrayidx7, align 4
250  %add4 = add i32 %i6, %i7
251  %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
252
253  store i32 %call1, i32* %c, align 4
254  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
255  store i32 %call2, i32* %arrayidx8, align 4
256  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
257  store i32 %call3, i32* %arrayidx9, align 4
258  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
259  store i32 %call4, i32* %arrayidx10, align 4
260  ret void
261
262
263}
264
265
266declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
267
268define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
269; CHECK-LABEL: @vec_cttz_i32(
270; CHECK-NEXT:  entry:
271; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
272; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
273; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
274; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
275; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
276; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP5]], i1 true)
277; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[C:%.*]] to <4 x i32>*
278; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
279; CHECK-NEXT:    ret void
280;
281entry:
282  %i0 = load i32, i32* %a, align 4
283  %i1 = load i32, i32* %b, align 4
284  %add1 = add i32 %i0, %i1
285  %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
286
287  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
288  %i2 = load i32, i32* %arrayidx2, align 4
289  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
290  %i3 = load i32, i32* %arrayidx3, align 4
291  %add2 = add i32 %i2, %i3
292  %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
293
294  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
295  %i4 = load i32, i32* %arrayidx4, align 4
296  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
297  %i5 = load i32, i32* %arrayidx5, align 4
298  %add3 = add i32 %i4, %i5
299  %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
300
301  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
302  %i6 = load i32, i32* %arrayidx6, align 4
303  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
304  %i7 = load i32, i32* %arrayidx7, align 4
305  %add4 = add i32 %i6, %i7
306  %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
307
308  store i32 %call1, i32* %c, align 4
309  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
310  store i32 %call2, i32* %arrayidx8, align 4
311  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
312  store i32 %call3, i32* %arrayidx9, align 4
313  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
314  store i32 %call4, i32* %arrayidx10, align 4
315  ret void
316
317}
318
319define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
320; CHECK-LABEL: @vec_cttz_i32_neg(
321; CHECK-NEXT:  entry:
322; CHECK-NEXT:    [[I0:%.*]] = load i32, i32* [[A:%.*]], align 4
323; CHECK-NEXT:    [[I1:%.*]] = load i32, i32* [[B:%.*]], align 4
324; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[I0]], [[I1]]
325; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD1]], i1 true) #3
326; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 1
327; CHECK-NEXT:    [[I2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
328; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 1
329; CHECK-NEXT:    [[I3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
330; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[I2]], [[I3]]
331; CHECK-NEXT:    [[CALL2:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD2]], i1 false) #3
332; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 2
333; CHECK-NEXT:    [[I4:%.*]] = load i32, i32* [[ARRAYIDX4]], align 4
334; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 2
335; CHECK-NEXT:    [[I5:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4
336; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[I4]], [[I5]]
337; CHECK-NEXT:    [[CALL3:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD3]], i1 true) #3
338; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
339; CHECK-NEXT:    [[I6:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
340; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 3
341; CHECK-NEXT:    [[I7:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
342; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[I6]], [[I7]]
343; CHECK-NEXT:    [[CALL4:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[ADD4]], i1 false) #3
344; CHECK-NEXT:    store i32 [[CALL1]], i32* [[C:%.*]], align 4
345; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 1
346; CHECK-NEXT:    store i32 [[CALL2]], i32* [[ARRAYIDX8]], align 4
347; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 2
348; CHECK-NEXT:    store i32 [[CALL3]], i32* [[ARRAYIDX9]], align 4
349; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 3
350; CHECK-NEXT:    store i32 [[CALL4]], i32* [[ARRAYIDX10]], align 4
351; CHECK-NEXT:    ret void
352;
353entry:
354  %i0 = load i32, i32* %a, align 4
355  %i1 = load i32, i32* %b, align 4
356  %add1 = add i32 %i0, %i1
357  %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
358
359  %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
360  %i2 = load i32, i32* %arrayidx2, align 4
361  %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
362  %i3 = load i32, i32* %arrayidx3, align 4
363  %add2 = add i32 %i2, %i3
364  %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
365
366  %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
367  %i4 = load i32, i32* %arrayidx4, align 4
368  %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
369  %i5 = load i32, i32* %arrayidx5, align 4
370  %add3 = add i32 %i4, %i5
371  %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
372
373  %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
374  %i6 = load i32, i32* %arrayidx6, align 4
375  %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
376  %i7 = load i32, i32* %arrayidx7, align 4
377  %add4 = add i32 %i6, %i7
378  %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
379
380  store i32 %call1, i32* %c, align 4
381  %arrayidx8 = getelementptr inbounds i32, i32* %c, i32 1
382  store i32 %call2, i32* %arrayidx8, align 4
383  %arrayidx9 = getelementptr inbounds i32, i32* %c, i32 2
384  store i32 %call3, i32* %arrayidx9, align 4
385  %arrayidx10 = getelementptr inbounds i32, i32* %c, i32 3
386  store i32 %call4, i32* %arrayidx10, align 4
387  ret void
388
389}
390
391
392declare float @llvm.powi.f32(float, i32)
393define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
394; CHECK-LABEL: @vec_powi_f32(
395; CHECK-NEXT:  entry:
396; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <4 x float>*
397; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
398; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[B:%.*]] to <4 x float>*
399; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
400; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x float> [[TMP1]], [[TMP3]]
401; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32(<4 x float> [[TMP4]], i32 [[P:%.*]])
402; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[C:%.*]] to <4 x float>*
403; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 4
404; CHECK-NEXT:    ret void
405;
406entry:
407  %i0 = load float, float* %a, align 4
408  %i1 = load float, float* %b, align 4
409  %add1 = fadd float %i0, %i1
410  %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
411
412  %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
413  %i2 = load float, float* %arrayidx2, align 4
414  %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
415  %i3 = load float, float* %arrayidx3, align 4
416  %add2 = fadd float %i2, %i3
417  %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
418
419  %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
420  %i4 = load float, float* %arrayidx4, align 4
421  %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
422  %i5 = load float, float* %arrayidx5, align 4
423  %add3 = fadd float %i4, %i5
424  %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
425
426  %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
427  %i6 = load float, float* %arrayidx6, align 4
428  %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
429  %i7 = load float, float* %arrayidx7, align 4
430  %add4 = fadd float %i6, %i7
431  %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
432
433  store float %call1, float* %c, align 4
434  %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
435  store float %call2, float* %arrayidx8, align 4
436  %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
437  store float %call3, float* %arrayidx9, align 4
438  %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
439  store float %call4, float* %arrayidx10, align 4
440  ret void
441
442}
443
444
445define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
446; CHECK-LABEL: @vec_powi_f32_neg(
447; CHECK-NEXT:  entry:
448; CHECK-NEXT:    [[I0:%.*]] = load float, float* [[A:%.*]], align 4
449; CHECK-NEXT:    [[I1:%.*]] = load float, float* [[B:%.*]], align 4
450; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[I0]], [[I1]]
451; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @llvm.powi.f32(float [[ADD1]], i32 [[P:%.*]]) #3
452; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i32 1
453; CHECK-NEXT:    [[I2:%.*]] = load float, float* [[ARRAYIDX2]], align 4
454; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i32 1
455; CHECK-NEXT:    [[I3:%.*]] = load float, float* [[ARRAYIDX3]], align 4
456; CHECK-NEXT:    [[ADD2:%.*]] = fadd float [[I2]], [[I3]]
457; CHECK-NEXT:    [[CALL2:%.*]] = tail call float @llvm.powi.f32(float [[ADD2]], i32 [[Q:%.*]]) #3
458; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[A]], i32 2
459; CHECK-NEXT:    [[I4:%.*]] = load float, float* [[ARRAYIDX4]], align 4
460; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[B]], i32 2
461; CHECK-NEXT:    [[I5:%.*]] = load float, float* [[ARRAYIDX5]], align 4
462; CHECK-NEXT:    [[ADD3:%.*]] = fadd float [[I4]], [[I5]]
463; CHECK-NEXT:    [[CALL3:%.*]] = tail call float @llvm.powi.f32(float [[ADD3]], i32 [[P]]) #3
464; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[A]], i32 3
465; CHECK-NEXT:    [[I6:%.*]] = load float, float* [[ARRAYIDX6]], align 4
466; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[B]], i32 3
467; CHECK-NEXT:    [[I7:%.*]] = load float, float* [[ARRAYIDX7]], align 4
468; CHECK-NEXT:    [[ADD4:%.*]] = fadd float [[I6]], [[I7]]
469; CHECK-NEXT:    [[CALL4:%.*]] = tail call float @llvm.powi.f32(float [[ADD4]], i32 [[Q]]) #3
470; CHECK-NEXT:    store float [[CALL1]], float* [[C:%.*]], align 4
471; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, float* [[C]], i32 1
472; CHECK-NEXT:    store float [[CALL2]], float* [[ARRAYIDX8]], align 4
473; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[C]], i32 2
474; CHECK-NEXT:    store float [[CALL3]], float* [[ARRAYIDX9]], align 4
475; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[C]], i32 3
476; CHECK-NEXT:    store float [[CALL4]], float* [[ARRAYIDX10]], align 4
477; CHECK-NEXT:    ret void
478;
479entry:
480  %i0 = load float, float* %a, align 4
481  %i1 = load float, float* %b, align 4
482  %add1 = fadd float %i0, %i1
483  %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
484
485  %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
486  %i2 = load float, float* %arrayidx2, align 4
487  %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
488  %i3 = load float, float* %arrayidx3, align 4
489  %add2 = fadd float %i2, %i3
490  %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
491
492  %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
493  %i4 = load float, float* %arrayidx4, align 4
494  %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
495  %i5 = load float, float* %arrayidx5, align 4
496  %add3 = fadd float %i4, %i5
497  %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
498
499  %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
500  %i6 = load float, float* %arrayidx6, align 4
501  %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
502  %i7 = load float, float* %arrayidx7, align 4
503  %add4 = fadd float %i6, %i7
504  %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
505
506  store float %call1, float* %c, align 4
507  %arrayidx8 = getelementptr inbounds float, float* %c, i32 1
508  store float %call2, float* %arrayidx8, align 4
509  %arrayidx9 = getelementptr inbounds float, float* %c, i32 2
510  store float %call3, float* %arrayidx9, align 4
511  %arrayidx10 = getelementptr inbounds float, float* %c, i32 3
512  store float %call4, float* %arrayidx10, align 4
513  ret void
514
515}
516