• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s
2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
3 
4 // Don't include mm_malloc.h, it's system specific.
5 #define __MM_MALLOC_H
6 
7 #include <x86intrin.h>
8 
test_mm_blend_epi16(__m128i V1,__m128i V2)9 __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
10   // CHECK-LABEL: test_mm_blend_epi16
11   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
12   return _mm_blend_epi16(V1, V2, 42);
13 }
14 
test_mm_blend_pd(__m128d V1,__m128d V2)15 __m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
16   // CHECK-LABEL: test_mm_blend_pd
17   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
18   return _mm_blend_pd(V1, V2, 2);
19 }
20 
test_mm_blend_ps(__m128 V1,__m128 V2)21 __m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
22   // CHECK-LABEL: test_mm_blend_ps
23   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
24   return _mm_blend_ps(V1, V2, 6);
25 }
26 
test_mm_blendv_epi8(__m128i V1,__m128i V2,__m128i V3)27 __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
28   // CHECK-LABEL: test_mm_blendv_epi8
29   // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb
30   return _mm_blendv_epi8(V1, V2, V3);
31 }
32 
test_mm_blendv_pd(__m128d V1,__m128d V2,__m128d V3)33 __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
34   // CHECK-LABEL: test_mm_blendv_pd
35   // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd
36   return _mm_blendv_pd(V1, V2, V3);
37 }
38 
test_mm_blendv_ps(__m128 V1,__m128 V2,__m128 V3)39 __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
40   // CHECK-LABEL: test_mm_blendv_ps
41   // CHECK: call <4 x float> @llvm.x86.sse41.blendvps
42   return _mm_blendv_ps(V1, V2, V3);
43 }
44 
test_mm_ceil_pd(__m128d x)45 __m128d test_mm_ceil_pd(__m128d x) {
46   // CHECK-LABEL: test_mm_ceil_pd
47   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
48   return _mm_ceil_pd(x);
49 }
50 
test_mm_ceil_ps(__m128 x)51 __m128 test_mm_ceil_ps(__m128 x) {
52   // CHECK-LABEL: test_mm_ceil_ps
53   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
54   return _mm_ceil_ps(x);
55 }
56 
test_mm_ceil_sd(__m128d x,__m128d y)57 __m128d test_mm_ceil_sd(__m128d x, __m128d y) {
58   // CHECK-LABEL: test_mm_ceil_sd
59   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
60   return _mm_ceil_sd(x, y);
61 }
62 
test_mm_ceil_ss(__m128 x,__m128 y)63 __m128 test_mm_ceil_ss(__m128 x, __m128 y) {
64   // CHECK-LABEL: test_mm_ceil_ss
65   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
66   return _mm_ceil_ss(x, y);
67 }
68 
test_mm_cmpeq_epi64(__m128i A,__m128i B)69 __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
70   // CHECK-LABEL: test_mm_cmpeq_epi64
71   // CHECK: icmp eq <2 x i64>
72   return _mm_cmpeq_epi64(A, B);
73 }
74 
test_mm_cvtepi8_epi16(__m128i a)75 __m128i test_mm_cvtepi8_epi16(__m128i a) {
76   // CHECK-LABEL: test_mm_cvtepi8_epi16
77   // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
78   return _mm_cvtepi8_epi16(a);
79 }
80 
test_mm_cvtepi8_epi32(__m128i a)81 __m128i test_mm_cvtepi8_epi32(__m128i a) {
82   // CHECK-LABEL: test_mm_cvtepi8_epi32
83   // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
84   return _mm_cvtepi8_epi32(a);
85 }
86 
test_mm_cvtepi8_epi64(__m128i a)87 __m128i test_mm_cvtepi8_epi64(__m128i a) {
88   // CHECK-LABEL: test_mm_cvtepi8_epi64
89   // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
90   return _mm_cvtepi8_epi64(a);
91 }
92 
test_mm_cvtepi16_epi32(__m128i a)93 __m128i test_mm_cvtepi16_epi32(__m128i a) {
94   // CHECK-LABEL: test_mm_cvtepi16_epi32
95   // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
96   return _mm_cvtepi16_epi32(a);
97 }
98 
test_mm_cvtepi16_epi64(__m128i a)99 __m128i test_mm_cvtepi16_epi64(__m128i a) {
100   // CHECK-LABEL: test_mm_cvtepi16_epi64
101   // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
102   return _mm_cvtepi16_epi64(a);
103 }
104 
test_mm_cvtepi32_epi64(__m128i a)105 __m128i test_mm_cvtepi32_epi64(__m128i a) {
106   // CHECK-LABEL: test_mm_cvtepi32_epi64
107   // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
108   return _mm_cvtepi32_epi64(a);
109 }
110 
test_mm_cvtepu8_epi16(__m128i a)111 __m128i test_mm_cvtepu8_epi16(__m128i a) {
112   // CHECK-LABEL: test_mm_cvtepu8_epi16
113   // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
114   return _mm_cvtepu8_epi16(a);
115 }
116 
test_mm_cvtepu8_epi32(__m128i a)117 __m128i test_mm_cvtepu8_epi32(__m128i a) {
118   // CHECK-LABEL: test_mm_cvtepu8_epi32
119   // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
120   return _mm_cvtepu8_epi32(a);
121 }
122 
test_mm_cvtepu8_epi64(__m128i a)123 __m128i test_mm_cvtepu8_epi64(__m128i a) {
124   // CHECK-LABEL: test_mm_cvtepu8_epi64
125   // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
126   return _mm_cvtepu8_epi64(a);
127 }
128 
test_mm_cvtepu16_epi32(__m128i a)129 __m128i test_mm_cvtepu16_epi32(__m128i a) {
130   // CHECK-LABEL: test_mm_cvtepu16_epi32
131   // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
132   return _mm_cvtepu16_epi32(a);
133 }
134 
test_mm_cvtepu16_epi64(__m128i a)135 __m128i test_mm_cvtepu16_epi64(__m128i a) {
136   // CHECK-LABEL: test_mm_cvtepu16_epi64
137   // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
138   return _mm_cvtepu16_epi64(a);
139 }
140 
test_mm_cvtepu32_epi64(__m128i a)141 __m128i test_mm_cvtepu32_epi64(__m128i a) {
142   // CHECK-LABEL: test_mm_cvtepu32_epi64
143   // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
144   return _mm_cvtepu32_epi64(a);
145 }
146 
test_mm_dp_pd(__m128d x,__m128d y)147 __m128d test_mm_dp_pd(__m128d x, __m128d y) {
148   // CHECK-LABEL: test_mm_dp_pd
149   // CHECK: call <2 x double> @llvm.x86.sse41.dppd
150   return _mm_dp_pd(x, y, 2);
151 }
152 
test_mm_dp_ps(__m128 x,__m128 y)153 __m128 test_mm_dp_ps(__m128 x, __m128 y) {
154   // CHECK-LABEL: test_mm_dp_ps
155   // CHECK: call <4 x float> @llvm.x86.sse41.dpps
156   return _mm_dp_ps(x, y, 2);
157 }
158 
test_mm_extract_epi8(__m128i x)159 int test_mm_extract_epi8(__m128i x) {
160   // CHECK-LABEL: test_mm_extract_epi8
161   // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
162   return _mm_extract_epi8(x, 16);
163 }
164 
test_mm_extract_epi32(__m128i x)165 int test_mm_extract_epi32(__m128i x) {
166   // CHECK-LABEL: test_mm_extract_epi32
167   // CHECK: extractelement <4 x i32> %{{.*}}, i32 1
168   return _mm_extract_epi32(x, 1);
169 }
170 
test_mm_extract_epi64(__m128i x)171 long long test_mm_extract_epi64(__m128i x) {
172   // CHECK-LABEL: test_mm_extract_epi64
173   // CHECK: extractelement <2 x i64> %{{.*}}, i32 1
174   return _mm_extract_epi64(x, 1);
175 }
176 
177 //TODO
178 //int test_mm_extract_ps(__m128i x) {
179 //  return _mm_extract_ps(_mm_add_ps(x,x), 1);
180 //}
181 
test_mm_floor_pd(__m128d x)182 __m128d test_mm_floor_pd(__m128d x) {
183   // CHECK-LABEL: test_mm_floor_pd
184   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
185   return _mm_floor_pd(x);
186 }
187 
test_mm_floor_ps(__m128 x)188 __m128 test_mm_floor_ps(__m128 x) {
189   // CHECK-LABEL: test_mm_floor_ps
190   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
191   return _mm_floor_ps(x);
192 }
193 
test_mm_floor_sd(__m128d x,__m128d y)194 __m128d test_mm_floor_sd(__m128d x, __m128d y) {
195   // CHECK-LABEL: test_mm_floor_sd
196   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
197   return _mm_floor_sd(x, y);
198 }
199 
test_mm_floor_ss(__m128 x,__m128 y)200 __m128 test_mm_floor_ss(__m128 x, __m128 y) {
201   // CHECK-LABEL: test_mm_floor_ss
202   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
203   return _mm_floor_ss(x, y);
204 }
205 
test_mm_insert_epi8(__m128i x,char b)206 __m128i test_mm_insert_epi8(__m128i x, char b) {
207   // CHECK-LABEL: test_mm_insert_epi8
208   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
209   return _mm_insert_epi8(x, b, 16);
210 }
211 
test_mm_insert_epi32(__m128i x,int b)212 __m128i test_mm_insert_epi32(__m128i x, int b) {
213   // CHECK-LABEL: test_mm_insert_epi32
214   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
215   return _mm_insert_epi32(x, b, 4);
216 }
217 
test_mm_insert_epi64(__m128i x,long long b)218 __m128i test_mm_insert_epi64(__m128i x, long long b) {
219   // CHECK-LABEL: test_mm_insert_epi64
220   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
221   return _mm_insert_epi64(x, b, 2);
222 }
223 
test_mm_insert_ps(__m128 x,__m128 y)224 __m128 test_mm_insert_ps(__m128 x, __m128 y) {
225   // CHECK-LABEL: test_mm_insert_ps
226   // CHECK: call <4 x float> @llvm.x86.sse41.insertps
227   return _mm_insert_ps(x, y, 5);
228 }
229 
test_mm_max_epi8(__m128i x,__m128i y)230 __m128i test_mm_max_epi8(__m128i x, __m128i y) {
231   // CHECK-LABEL: test_mm_max_epi8
232   // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb
233   return _mm_max_epi8(x, y);
234 }
235 
test_mm_max_epu16(__m128i x,__m128i y)236 __m128i test_mm_max_epu16(__m128i x, __m128i y) {
237   // CHECK-LABEL: test_mm_max_epu16
238   // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw
239   return _mm_max_epu16(x, y);
240 }
241 
test_mm_max_epi32(__m128i x,__m128i y)242 __m128i test_mm_max_epi32(__m128i x, __m128i y) {
243   // CHECK-LABEL: test_mm_max_epi32
244   // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd
245   return _mm_max_epi32(x, y);
246 }
247 
test_mm_max_epu32(__m128i x,__m128i y)248 __m128i test_mm_max_epu32(__m128i x, __m128i y) {
249   // CHECK-LABEL: test_mm_max_epu32
250   // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud
251   return _mm_max_epu32(x, y);
252 }
253 
test_mm_min_epi8(__m128i x,__m128i y)254 __m128i test_mm_min_epi8(__m128i x, __m128i y) {
255   // CHECK-LABEL: test_mm_min_epi8
256   // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb
257   return _mm_min_epi8(x, y);
258 }
259 
test_mm_min_epu16(__m128i x,__m128i y)260 __m128i test_mm_min_epu16(__m128i x, __m128i y) {
261   // CHECK-LABEL: test_mm_min_epu16
262   // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw
263   return _mm_min_epu16(x, y);
264 }
265 
test_mm_min_epi32(__m128i x,__m128i y)266 __m128i test_mm_min_epi32(__m128i x, __m128i y) {
267   // CHECK-LABEL: test_mm_min_epi32
268   // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd
269   return _mm_min_epi32(x, y);
270 }
271 
test_mm_min_epu32(__m128i x,__m128i y)272 __m128i test_mm_min_epu32(__m128i x, __m128i y) {
273   // CHECK-LABEL: test_mm_min_epu32
274   // CHECK: call <4 x i32> @llvm.x86.sse41.pminud
275   return _mm_min_epu32(x, y);
276 }
277 
test_mm_minpos_epu16(__m128i x)278 __m128i test_mm_minpos_epu16(__m128i x) {
279   // CHECK-LABEL: test_mm_minpos_epu16
280   // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw
281   return _mm_minpos_epu16(x);
282 }
283 
test_mm_mpsadbw_epu8(__m128i x,__m128i y)284 __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
285   // CHECK-LABEL: test_mm_mpsadbw_epu8
286   // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw
287   return _mm_mpsadbw_epu8(x, y, 1);
288 }
289 
test_mm_mul_epi32(__m128i x,__m128i y)290 __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
291   // CHECK-LABEL: test_mm_mul_epi32
292   // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq
293   return _mm_mul_epi32(x, y);
294 }
295 
test_mm_mullo_epi32(__m128i x,__m128i y)296 __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
297   // CHECK-LABEL: test_mm_mullo_epi32
298   // CHECK: mul <4 x i32>
299   return _mm_mullo_epi32(x, y);
300 }
301 
test_mm_packus_epi32(__m128i x,__m128i y)302 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
303   // CHECK-LABEL: test_mm_packus_epi32
304   // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw
305   return _mm_packus_epi32(x, y);
306 }
307 
test_mm_round_pd(__m128d x)308 __m128d test_mm_round_pd(__m128d x) {
309   // CHECK-LABEL: test_mm_round_pd
310   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
311   return _mm_round_pd(x, 2);
312 }
313 
test_mm_round_ps(__m128 x)314 __m128 test_mm_round_ps(__m128 x) {
315   // CHECK-LABEL: test_mm_round_ps
316   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
317   return _mm_round_ps(x, 2);
318 }
319 
test_mm_round_sd(__m128d x,__m128d y)320 __m128d test_mm_round_sd(__m128d x, __m128d y) {
321   // CHECK-LABEL: test_mm_round_sd
322   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
323   return _mm_round_sd(x, y, 2);
324 }
325 
test_mm_round_ss(__m128 x,__m128 y)326 __m128 test_mm_round_ss(__m128 x, __m128 y) {
327   // CHECK-LABEL: test_mm_round_ss
328   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
329   return _mm_round_ss(x, y, 2);
330 }
331 
test_mm_stream_load_si128(__m128i const * a)332 __m128i test_mm_stream_load_si128(__m128i const *a) {
333   // CHECK-LABEL: test_mm_stream_load_si128
334   // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa
335   return _mm_stream_load_si128(a);
336 }
337 
test_mm_test_all_ones(__m128i x)338 int test_mm_test_all_ones(__m128i x) {
339   // CHECK-LABEL: test_mm_test_all_ones
340   // CHECK: call i32 @llvm.x86.sse41.ptestc
341   return _mm_test_all_ones(x);
342 }
343 
test_mm_test_all_zeros(__m128i x,__m128i y)344 int test_mm_test_all_zeros(__m128i x, __m128i y) {
345   // CHECK-LABEL: test_mm_test_all_zeros
346   // CHECK: call i32 @llvm.x86.sse41.ptestz
347   return _mm_test_all_zeros(x, y);
348 }
349 
test_mm_test_mix_ones_zeros(__m128i x,__m128i y)350 int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
351   // CHECK-LABEL: test_mm_test_mix_ones_zeros
352   // CHECK: call i32 @llvm.x86.sse41.ptestnzc
353   return _mm_test_mix_ones_zeros(x, y);
354 }
355 
test_mm_testc_si128(__m128i x,__m128i y)356 int test_mm_testc_si128(__m128i x, __m128i y) {
357   // CHECK-LABEL: test_mm_testc_si128
358   // CHECK: call i32 @llvm.x86.sse41.ptestc
359   return _mm_testc_si128(x, y);
360 }
361 
test_mm_testnzc_si128(__m128i x,__m128i y)362 int test_mm_testnzc_si128(__m128i x, __m128i y) {
363   // CHECK-LABEL: test_mm_testnzc_si128
364   // CHECK: call i32 @llvm.x86.sse41.ptestnzc
365   return _mm_testnzc_si128(x, y);
366 }
367 
test_mm_testz_si128(__m128i x,__m128i y)368 int test_mm_testz_si128(__m128i x, __m128i y) {
369   // CHECK-LABEL: test_mm_testz_si128
370   // CHECK: call i32 @llvm.x86.sse41.ptestz
371   return _mm_testz_si128(x, y);
372 }
373