• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
2; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
3; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6target triple = "x86_64-apple-macosx10.8.0"
7
8define void @test1(i16* nocapture %head) nounwind {
9vector.ph:
10  br label %vector.body
11
12vector.body:                                      ; preds = %vector.body, %vector.ph
13  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
14  %0 = getelementptr inbounds i16* %head, i64 %index
15  %1 = bitcast i16* %0 to <8 x i16>*
16  %2 = load <8 x i16>* %1, align 2
17  %3 = icmp slt <8 x i16> %2, zeroinitializer
18  %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
19  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
20  store <8 x i16> %5, <8 x i16>* %1, align 2
21  %index.next = add i64 %index, 8
22  %6 = icmp eq i64 %index.next, 16384
23  br i1 %6, label %for.end, label %vector.body
24
25for.end:                                          ; preds = %vector.body
26  ret void
27
28; SSE2: @test1
29; SSE2: psubusw LCPI0_0(%rip), %xmm0
30
31; AVX1: @test1
32; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
33
34; AVX2: @test1
35; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
36}
37
38define void @test2(i16* nocapture %head) nounwind {
39vector.ph:
40  br label %vector.body
41
42vector.body:                                      ; preds = %vector.body, %vector.ph
43  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
44  %0 = getelementptr inbounds i16* %head, i64 %index
45  %1 = bitcast i16* %0 to <8 x i16>*
46  %2 = load <8 x i16>* %1, align 2
47  %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
48  %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
49  %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
50  store <8 x i16> %5, <8 x i16>* %1, align 2
51  %index.next = add i64 %index, 8
52  %6 = icmp eq i64 %index.next, 16384
53  br i1 %6, label %for.end, label %vector.body
54
55for.end:                                          ; preds = %vector.body
56  ret void
57
58; SSE2: @test2
59; SSE2: psubusw LCPI1_0(%rip), %xmm0
60
61; AVX1: @test2
62; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
63
64; AVX2: @test2
65; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
66}
67
68define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
69vector.ph:
70  %0 = insertelement <8 x i16> undef, i16 %w, i32 0
71  %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
72  br label %vector.body
73
74vector.body:                                      ; preds = %vector.body, %vector.ph
75  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
76  %1 = getelementptr inbounds i16* %head, i64 %index
77  %2 = bitcast i16* %1 to <8 x i16>*
78  %3 = load <8 x i16>* %2, align 2
79  %4 = icmp ult <8 x i16> %3, %broadcast15
80  %5 = sub <8 x i16> %3, %broadcast15
81  %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
82  store <8 x i16> %6, <8 x i16>* %2, align 2
83  %index.next = add i64 %index, 8
84  %7 = icmp eq i64 %index.next, 16384
85  br i1 %7, label %for.end, label %vector.body
86
87for.end:                                          ; preds = %vector.body
88  ret void
89
90; SSE2: @test3
91; SSE2: psubusw %xmm0, %xmm1
92
93; AVX1: @test3
94; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
95
96; AVX2: @test3
97; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
98}
99
100define void @test4(i8* nocapture %head) nounwind {
101vector.ph:
102  br label %vector.body
103
104vector.body:                                      ; preds = %vector.body, %vector.ph
105  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
106  %0 = getelementptr inbounds i8* %head, i64 %index
107  %1 = bitcast i8* %0 to <16 x i8>*
108  %2 = load <16 x i8>* %1, align 1
109  %3 = icmp slt <16 x i8> %2, zeroinitializer
110  %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
111  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
112  store <16 x i8> %5, <16 x i8>* %1, align 1
113  %index.next = add i64 %index, 16
114  %6 = icmp eq i64 %index.next, 16384
115  br i1 %6, label %for.end, label %vector.body
116
117for.end:                                          ; preds = %vector.body
118  ret void
119
120; SSE2: @test4
121; SSE2: psubusb LCPI3_0(%rip), %xmm0
122
123; AVX1: @test4
124; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
125
126; AVX2: @test4
127; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
128}
129
130define void @test5(i8* nocapture %head) nounwind {
131vector.ph:
132  br label %vector.body
133
134vector.body:                                      ; preds = %vector.body, %vector.ph
135  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
136  %0 = getelementptr inbounds i8* %head, i64 %index
137  %1 = bitcast i8* %0 to <16 x i8>*
138  %2 = load <16 x i8>* %1, align 1
139  %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
140  %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
141  %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
142  store <16 x i8> %5, <16 x i8>* %1, align 1
143  %index.next = add i64 %index, 16
144  %6 = icmp eq i64 %index.next, 16384
145  br i1 %6, label %for.end, label %vector.body
146
147for.end:                                          ; preds = %vector.body
148  ret void
149
150; SSE2: @test5
151; SSE2: psubusb LCPI4_0(%rip), %xmm0
152
153; AVX1: @test5
154; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
155
156; AVX2: @test5
157; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
158}
159
160define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
161vector.ph:
162  %0 = insertelement <16 x i8> undef, i8 %w, i32 0
163  %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
164  br label %vector.body
165
166vector.body:                                      ; preds = %vector.body, %vector.ph
167  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
168  %1 = getelementptr inbounds i8* %head, i64 %index
169  %2 = bitcast i8* %1 to <16 x i8>*
170  %3 = load <16 x i8>* %2, align 1
171  %4 = icmp ult <16 x i8> %3, %broadcast15
172  %5 = sub <16 x i8> %3, %broadcast15
173  %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
174  store <16 x i8> %6, <16 x i8>* %2, align 1
175  %index.next = add i64 %index, 16
176  %7 = icmp eq i64 %index.next, 16384
177  br i1 %7, label %for.end, label %vector.body
178
179for.end:                                          ; preds = %vector.body
180  ret void
181
182; SSE2: @test6
183; SSE2: psubusb %xmm0, %xmm1
184
185; AVX1: @test6
186; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
187
188; AVX2: @test6
189; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
190}
191
192define void @test7(i16* nocapture %head) nounwind {
193vector.ph:
194  br label %vector.body
195
196vector.body:                                      ; preds = %vector.body, %vector.ph
197  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
198  %0 = getelementptr inbounds i16* %head, i64 %index
199  %1 = bitcast i16* %0 to <16 x i16>*
200  %2 = load <16 x i16>* %1, align 2
201  %3 = icmp slt <16 x i16> %2, zeroinitializer
202  %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
203  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
204  store <16 x i16> %5, <16 x i16>* %1, align 2
205  %index.next = add i64 %index, 8
206  %6 = icmp eq i64 %index.next, 16384
207  br i1 %6, label %for.end, label %vector.body
208
209for.end:                                          ; preds = %vector.body
210  ret void
211
212; AVX2: @test7
213; AVX2: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
214}
215
216define void @test8(i16* nocapture %head) nounwind {
217vector.ph:
218  br label %vector.body
219
220vector.body:                                      ; preds = %vector.body, %vector.ph
221  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
222  %0 = getelementptr inbounds i16* %head, i64 %index
223  %1 = bitcast i16* %0 to <16 x i16>*
224  %2 = load <16 x i16>* %1, align 2
225  %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
226  %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
227  %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
228  store <16 x i16> %5, <16 x i16>* %1, align 2
229  %index.next = add i64 %index, 8
230  %6 = icmp eq i64 %index.next, 16384
231  br i1 %6, label %for.end, label %vector.body
232
233for.end:                                          ; preds = %vector.body
234  ret void
235
236; AVX2: @test8
237; AVX2: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
238}
239
240define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
241vector.ph:
242  %0 = insertelement <16 x i16> undef, i16 %w, i32 0
243  %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
244  br label %vector.body
245
246vector.body:                                      ; preds = %vector.body, %vector.ph
247  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
248  %1 = getelementptr inbounds i16* %head, i64 %index
249  %2 = bitcast i16* %1 to <16 x i16>*
250  %3 = load <16 x i16>* %2, align 2
251  %4 = icmp ult <16 x i16> %3, %broadcast15
252  %5 = sub <16 x i16> %3, %broadcast15
253  %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
254  store <16 x i16> %6, <16 x i16>* %2, align 2
255  %index.next = add i64 %index, 8
256  %7 = icmp eq i64 %index.next, 16384
257  br i1 %7, label %for.end, label %vector.body
258
259for.end:                                          ; preds = %vector.body
260  ret void
261
262
263; AVX2: @test9
264; AVX2: vpsubusw %ymm0, %ymm1, %ymm1
265}
266
267define void @test10(i8* nocapture %head) nounwind {
268vector.ph:
269  br label %vector.body
270
271vector.body:                                      ; preds = %vector.body, %vector.ph
272  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
273  %0 = getelementptr inbounds i8* %head, i64 %index
274  %1 = bitcast i8* %0 to <32 x i8>*
275  %2 = load <32 x i8>* %1, align 1
276  %3 = icmp slt <32 x i8> %2, zeroinitializer
277  %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
278  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
279  store <32 x i8> %5, <32 x i8>* %1, align 1
280  %index.next = add i64 %index, 16
281  %6 = icmp eq i64 %index.next, 16384
282  br i1 %6, label %for.end, label %vector.body
283
284for.end:                                          ; preds = %vector.body
285  ret void
286
287
288; AVX2: @test10
289; AVX2: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
290}
291
292define void @test11(i8* nocapture %head) nounwind {
293vector.ph:
294  br label %vector.body
295
296vector.body:                                      ; preds = %vector.body, %vector.ph
297  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
298  %0 = getelementptr inbounds i8* %head, i64 %index
299  %1 = bitcast i8* %0 to <32 x i8>*
300  %2 = load <32 x i8>* %1, align 1
301  %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
302  %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
303  %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
304  store <32 x i8> %5, <32 x i8>* %1, align 1
305  %index.next = add i64 %index, 16
306  %6 = icmp eq i64 %index.next, 16384
307  br i1 %6, label %for.end, label %vector.body
308
309for.end:                                          ; preds = %vector.body
310  ret void
311
312; AVX2: @test11
313; AVX2: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
314}
315
316define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
317vector.ph:
318  %0 = insertelement <32 x i8> undef, i8 %w, i32 0
319  %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
320  br label %vector.body
321
322vector.body:                                      ; preds = %vector.body, %vector.ph
323  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
324  %1 = getelementptr inbounds i8* %head, i64 %index
325  %2 = bitcast i8* %1 to <32 x i8>*
326  %3 = load <32 x i8>* %2, align 1
327  %4 = icmp ult <32 x i8> %3, %broadcast15
328  %5 = sub <32 x i8> %3, %broadcast15
329  %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
330  store <32 x i8> %6, <32 x i8>* %2, align 1
331  %index.next = add i64 %index, 16
332  %7 = icmp eq i64 %index.next, 16384
333  br i1 %7, label %for.end, label %vector.body
334
335for.end:                                          ; preds = %vector.body
336  ret void
337
338; AVX2: @test12
339; AVX2: vpsubusb %ymm0, %ymm1, %ymm1
340}
341