1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3
4 // Don't include mm_malloc.h, it's system specific.
5 #define __MM_MALLOC_H
6
7 #include <x86intrin.h>
8
test_mm_blend_epi16(__m128i V1,__m128i V2)9 __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
10 // CHECK-LABEL: test_mm_blend_epi16
11 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
12 return _mm_blend_epi16(V1, V2, 42);
13 }
14
test_mm_blend_pd(__m128d V1,__m128d V2)15 __m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
16 // CHECK-LABEL: test_mm_blend_pd
17 // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
18 return _mm_blend_pd(V1, V2, 2);
19 }
20
test_mm_blend_ps(__m128 V1,__m128 V2)21 __m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
22 // CHECK-LABEL: test_mm_blend_ps
23 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
24 return _mm_blend_ps(V1, V2, 6);
25 }
26
test_mm_blendv_epi8(__m128i V1,__m128i V2,__m128i V3)27 __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
28 // CHECK-LABEL: test_mm_blendv_epi8
29 // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb
30 return _mm_blendv_epi8(V1, V2, V3);
31 }
32
test_mm_blendv_pd(__m128d V1,__m128d V2,__m128d V3)33 __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
34 // CHECK-LABEL: test_mm_blendv_pd
35 // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd
36 return _mm_blendv_pd(V1, V2, V3);
37 }
38
test_mm_blendv_ps(__m128 V1,__m128 V2,__m128 V3)39 __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
40 // CHECK-LABEL: test_mm_blendv_ps
41 // CHECK: call <4 x float> @llvm.x86.sse41.blendvps
42 return _mm_blendv_ps(V1, V2, V3);
43 }
44
test_mm_ceil_pd(__m128d x)45 __m128d test_mm_ceil_pd(__m128d x) {
46 // CHECK-LABEL: test_mm_ceil_pd
47 // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
48 return _mm_ceil_pd(x);
49 }
50
test_mm_ceil_ps(__m128 x)51 __m128 test_mm_ceil_ps(__m128 x) {
52 // CHECK-LABEL: test_mm_ceil_ps
53 // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
54 return _mm_ceil_ps(x);
55 }
56
test_mm_ceil_sd(__m128d x,__m128d y)57 __m128d test_mm_ceil_sd(__m128d x, __m128d y) {
58 // CHECK-LABEL: test_mm_ceil_sd
59 // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
60 return _mm_ceil_sd(x, y);
61 }
62
test_mm_ceil_ss(__m128 x,__m128 y)63 __m128 test_mm_ceil_ss(__m128 x, __m128 y) {
64 // CHECK-LABEL: test_mm_ceil_ss
65 // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
66 return _mm_ceil_ss(x, y);
67 }
68
test_mm_cmpeq_epi64(__m128i A,__m128i B)69 __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
70 // CHECK-LABEL: test_mm_cmpeq_epi64
71 // CHECK: icmp eq <2 x i64>
72 return _mm_cmpeq_epi64(A, B);
73 }
74
test_mm_cvtepi8_epi16(__m128i a)75 __m128i test_mm_cvtepi8_epi16(__m128i a) {
76 // CHECK-LABEL: test_mm_cvtepi8_epi16
77 // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
78 return _mm_cvtepi8_epi16(a);
79 }
80
test_mm_cvtepi8_epi32(__m128i a)81 __m128i test_mm_cvtepi8_epi32(__m128i a) {
82 // CHECK-LABEL: test_mm_cvtepi8_epi32
83 // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
84 return _mm_cvtepi8_epi32(a);
85 }
86
test_mm_cvtepi8_epi64(__m128i a)87 __m128i test_mm_cvtepi8_epi64(__m128i a) {
88 // CHECK-LABEL: test_mm_cvtepi8_epi64
89 // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
90 return _mm_cvtepi8_epi64(a);
91 }
92
test_mm_cvtepi16_epi32(__m128i a)93 __m128i test_mm_cvtepi16_epi32(__m128i a) {
94 // CHECK-LABEL: test_mm_cvtepi16_epi32
95 // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
96 return _mm_cvtepi16_epi32(a);
97 }
98
test_mm_cvtepi16_epi64(__m128i a)99 __m128i test_mm_cvtepi16_epi64(__m128i a) {
100 // CHECK-LABEL: test_mm_cvtepi16_epi64
101 // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
102 return _mm_cvtepi16_epi64(a);
103 }
104
test_mm_cvtepi32_epi64(__m128i a)105 __m128i test_mm_cvtepi32_epi64(__m128i a) {
106 // CHECK-LABEL: test_mm_cvtepi32_epi64
107 // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
108 return _mm_cvtepi32_epi64(a);
109 }
110
test_mm_cvtepu8_epi16(__m128i a)111 __m128i test_mm_cvtepu8_epi16(__m128i a) {
112 // CHECK-LABEL: test_mm_cvtepu8_epi16
113 // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
114 return _mm_cvtepu8_epi16(a);
115 }
116
test_mm_cvtepu8_epi32(__m128i a)117 __m128i test_mm_cvtepu8_epi32(__m128i a) {
118 // CHECK-LABEL: test_mm_cvtepu8_epi32
119 // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
120 return _mm_cvtepu8_epi32(a);
121 }
122
test_mm_cvtepu8_epi64(__m128i a)123 __m128i test_mm_cvtepu8_epi64(__m128i a) {
124 // CHECK-LABEL: test_mm_cvtepu8_epi64
125 // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
126 return _mm_cvtepu8_epi64(a);
127 }
128
test_mm_cvtepu16_epi32(__m128i a)129 __m128i test_mm_cvtepu16_epi32(__m128i a) {
130 // CHECK-LABEL: test_mm_cvtepu16_epi32
131 // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
132 return _mm_cvtepu16_epi32(a);
133 }
134
test_mm_cvtepu16_epi64(__m128i a)135 __m128i test_mm_cvtepu16_epi64(__m128i a) {
136 // CHECK-LABEL: test_mm_cvtepu16_epi64
137 // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
138 return _mm_cvtepu16_epi64(a);
139 }
140
test_mm_cvtepu32_epi64(__m128i a)141 __m128i test_mm_cvtepu32_epi64(__m128i a) {
142 // CHECK-LABEL: test_mm_cvtepu32_epi64
143 // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
144 return _mm_cvtepu32_epi64(a);
145 }
146
test_mm_dp_pd(__m128d x,__m128d y)147 __m128d test_mm_dp_pd(__m128d x, __m128d y) {
148 // CHECK-LABEL: test_mm_dp_pd
149 // CHECK: call <2 x double> @llvm.x86.sse41.dppd
150 return _mm_dp_pd(x, y, 2);
151 }
152
test_mm_dp_ps(__m128 x,__m128 y)153 __m128 test_mm_dp_ps(__m128 x, __m128 y) {
154 // CHECK-LABEL: test_mm_dp_ps
155 // CHECK: call <4 x float> @llvm.x86.sse41.dpps
156 return _mm_dp_ps(x, y, 2);
157 }
158
test_mm_extract_epi8(__m128i x)159 int test_mm_extract_epi8(__m128i x) {
160 // CHECK-LABEL: test_mm_extract_epi8
161 // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
162 return _mm_extract_epi8(x, 16);
163 }
164
test_mm_extract_epi32(__m128i x)165 int test_mm_extract_epi32(__m128i x) {
166 // CHECK-LABEL: test_mm_extract_epi32
167 // CHECK: extractelement <4 x i32> %{{.*}}, i32 1
168 return _mm_extract_epi32(x, 1);
169 }
170
test_mm_extract_epi64(__m128i x)171 long long test_mm_extract_epi64(__m128i x) {
172 // CHECK-LABEL: test_mm_extract_epi64
173 // CHECK: extractelement <2 x i64> %{{.*}}, i32 1
174 return _mm_extract_epi64(x, 1);
175 }
176
177 //TODO
178 //int test_mm_extract_ps(__m128i x) {
179 // return _mm_extract_ps(_mm_add_ps(x,x), 1);
180 //}
181
test_mm_floor_pd(__m128d x)182 __m128d test_mm_floor_pd(__m128d x) {
183 // CHECK-LABEL: test_mm_floor_pd
184 // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
185 return _mm_floor_pd(x);
186 }
187
test_mm_floor_ps(__m128 x)188 __m128 test_mm_floor_ps(__m128 x) {
189 // CHECK-LABEL: test_mm_floor_ps
190 // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
191 return _mm_floor_ps(x);
192 }
193
test_mm_floor_sd(__m128d x,__m128d y)194 __m128d test_mm_floor_sd(__m128d x, __m128d y) {
195 // CHECK-LABEL: test_mm_floor_sd
196 // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
197 return _mm_floor_sd(x, y);
198 }
199
test_mm_floor_ss(__m128 x,__m128 y)200 __m128 test_mm_floor_ss(__m128 x, __m128 y) {
201 // CHECK-LABEL: test_mm_floor_ss
202 // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
203 return _mm_floor_ss(x, y);
204 }
205
test_mm_insert_epi8(__m128i x,char b)206 __m128i test_mm_insert_epi8(__m128i x, char b) {
207 // CHECK-LABEL: test_mm_insert_epi8
208 // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
209 return _mm_insert_epi8(x, b, 16);
210 }
211
test_mm_insert_epi32(__m128i x,int b)212 __m128i test_mm_insert_epi32(__m128i x, int b) {
213 // CHECK-LABEL: test_mm_insert_epi32
214 // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
215 return _mm_insert_epi32(x, b, 4);
216 }
217
test_mm_insert_epi64(__m128i x,long long b)218 __m128i test_mm_insert_epi64(__m128i x, long long b) {
219 // CHECK-LABEL: test_mm_insert_epi64
220 // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
221 return _mm_insert_epi64(x, b, 2);
222 }
223
test_mm_insert_ps(__m128 x,__m128 y)224 __m128 test_mm_insert_ps(__m128 x, __m128 y) {
225 // CHECK-LABEL: test_mm_insert_ps
226 // CHECK: call <4 x float> @llvm.x86.sse41.insertps
227 return _mm_insert_ps(x, y, 5);
228 }
229
test_mm_max_epi8(__m128i x,__m128i y)230 __m128i test_mm_max_epi8(__m128i x, __m128i y) {
231 // CHECK-LABEL: test_mm_max_epi8
232 // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb
233 return _mm_max_epi8(x, y);
234 }
235
test_mm_max_epu16(__m128i x,__m128i y)236 __m128i test_mm_max_epu16(__m128i x, __m128i y) {
237 // CHECK-LABEL: test_mm_max_epu16
238 // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw
239 return _mm_max_epu16(x, y);
240 }
241
test_mm_max_epi32(__m128i x,__m128i y)242 __m128i test_mm_max_epi32(__m128i x, __m128i y) {
243 // CHECK-LABEL: test_mm_max_epi32
244 // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd
245 return _mm_max_epi32(x, y);
246 }
247
test_mm_max_epu32(__m128i x,__m128i y)248 __m128i test_mm_max_epu32(__m128i x, __m128i y) {
249 // CHECK-LABEL: test_mm_max_epu32
250 // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud
251 return _mm_max_epu32(x, y);
252 }
253
test_mm_min_epi8(__m128i x,__m128i y)254 __m128i test_mm_min_epi8(__m128i x, __m128i y) {
255 // CHECK-LABEL: test_mm_min_epi8
256 // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb
257 return _mm_min_epi8(x, y);
258 }
259
test_mm_min_epu16(__m128i x,__m128i y)260 __m128i test_mm_min_epu16(__m128i x, __m128i y) {
261 // CHECK-LABEL: test_mm_min_epu16
262 // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw
263 return _mm_min_epu16(x, y);
264 }
265
test_mm_min_epi32(__m128i x,__m128i y)266 __m128i test_mm_min_epi32(__m128i x, __m128i y) {
267 // CHECK-LABEL: test_mm_min_epi32
268 // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd
269 return _mm_min_epi32(x, y);
270 }
271
test_mm_min_epu32(__m128i x,__m128i y)272 __m128i test_mm_min_epu32(__m128i x, __m128i y) {
273 // CHECK-LABEL: test_mm_min_epu32
274 // CHECK: call <4 x i32> @llvm.x86.sse41.pminud
275 return _mm_min_epu32(x, y);
276 }
277
test_mm_minpos_epu16(__m128i x)278 __m128i test_mm_minpos_epu16(__m128i x) {
279 // CHECK-LABEL: test_mm_minpos_epu16
280 // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw
281 return _mm_minpos_epu16(x);
282 }
283
test_mm_mpsadbw_epu8(__m128i x,__m128i y)284 __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
285 // CHECK-LABEL: test_mm_mpsadbw_epu8
286 // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw
287 return _mm_mpsadbw_epu8(x, y, 1);
288 }
289
test_mm_mul_epi32(__m128i x,__m128i y)290 __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
291 // CHECK-LABEL: test_mm_mul_epi32
292 // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq
293 return _mm_mul_epi32(x, y);
294 }
295
test_mm_mullo_epi32(__m128i x,__m128i y)296 __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
297 // CHECK-LABEL: test_mm_mullo_epi32
298 // CHECK: mul <4 x i32>
299 return _mm_mullo_epi32(x, y);
300 }
301
test_mm_packus_epi32(__m128i x,__m128i y)302 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
303 // CHECK-LABEL: test_mm_packus_epi32
304 // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw
305 return _mm_packus_epi32(x, y);
306 }
307
test_mm_round_pd(__m128d x)308 __m128d test_mm_round_pd(__m128d x) {
309 // CHECK-LABEL: test_mm_round_pd
310 // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
311 return _mm_round_pd(x, 2);
312 }
313
test_mm_round_ps(__m128 x)314 __m128 test_mm_round_ps(__m128 x) {
315 // CHECK-LABEL: test_mm_round_ps
316 // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
317 return _mm_round_ps(x, 2);
318 }
319
test_mm_round_sd(__m128d x,__m128d y)320 __m128d test_mm_round_sd(__m128d x, __m128d y) {
321 // CHECK-LABEL: test_mm_round_sd
322 // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
323 return _mm_round_sd(x, y, 2);
324 }
325
test_mm_round_ss(__m128 x,__m128 y)326 __m128 test_mm_round_ss(__m128 x, __m128 y) {
327 // CHECK-LABEL: test_mm_round_ss
328 // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
329 return _mm_round_ss(x, y, 2);
330 }
331
test_mm_stream_load_si128(__m128i const * a)332 __m128i test_mm_stream_load_si128(__m128i const *a) {
333 // CHECK-LABEL: test_mm_stream_load_si128
334 // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa
335 return _mm_stream_load_si128(a);
336 }
337
test_mm_test_all_ones(__m128i x)338 int test_mm_test_all_ones(__m128i x) {
339 // CHECK-LABEL: test_mm_test_all_ones
340 // CHECK: call i32 @llvm.x86.sse41.ptestc
341 return _mm_test_all_ones(x);
342 }
343
test_mm_test_all_zeros(__m128i x,__m128i y)344 int test_mm_test_all_zeros(__m128i x, __m128i y) {
345 // CHECK-LABEL: test_mm_test_all_zeros
346 // CHECK: call i32 @llvm.x86.sse41.ptestz
347 return _mm_test_all_zeros(x, y);
348 }
349
test_mm_test_mix_ones_zeros(__m128i x,__m128i y)350 int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
351 // CHECK-LABEL: test_mm_test_mix_ones_zeros
352 // CHECK: call i32 @llvm.x86.sse41.ptestnzc
353 return _mm_test_mix_ones_zeros(x, y);
354 }
355
test_mm_testc_si128(__m128i x,__m128i y)356 int test_mm_testc_si128(__m128i x, __m128i y) {
357 // CHECK-LABEL: test_mm_testc_si128
358 // CHECK: call i32 @llvm.x86.sse41.ptestc
359 return _mm_testc_si128(x, y);
360 }
361
test_mm_testnzc_si128(__m128i x,__m128i y)362 int test_mm_testnzc_si128(__m128i x, __m128i y) {
363 // CHECK-LABEL: test_mm_testnzc_si128
364 // CHECK: call i32 @llvm.x86.sse41.ptestnzc
365 return _mm_testnzc_si128(x, y);
366 }
367
test_mm_testz_si128(__m128i x,__m128i y)368 int test_mm_testz_si128(__m128i x, __m128i y) {
369 // CHECK-LABEL: test_mm_testz_si128
370 // CHECK: call i32 @llvm.x86.sse41.ptestz
371 return _mm_testz_si128(x, y);
372 }
373